diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-11 05:01:30 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-11 05:01:30 +0100 |
commit | c5ce28df0e7c01a1de23c36ebdefcd803f2b6cbb (patch) | |
tree | 9830baf38832769e1cf621708889111bbe3c93df /net | |
parent | Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jik... (diff) | |
parent | crypto: fix af_alg_make_sg() conversion to iov_iter (diff) | |
download | linux-c5ce28df0e7c01a1de23c36ebdefcd803f2b6cbb.tar.xz linux-c5ce28df0e7c01a1de23c36ebdefcd803f2b6cbb.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) More iov_iter conversion work from Al Viro.
[ The "crypto: switch af_alg_make_sg() to iov_iter" commit was
wrong, and this pull actually adds an extra commit on top of the
branch I'm pulling to fix that up, so that the pre-merge state is
ok. - Linus ]
2) Various optimizations to the ipv4 forwarding information base trie
lookup implementation. From Alexander Duyck.
3) Remove sock_iocb altogether, from CHristoph Hellwig.
4) Allow congestion control algorithm selection via routing metrics.
From Daniel Borkmann.
5) Make ipv4 uncached route list per-cpu, from Eric Dumazet.
6) Handle rfs hash collisions more gracefully, also from Eric Dumazet.
7) Add xmit_more support to r8169, e1000, and e1000e drivers. From
Florian Westphal.
8) Transparent Ethernet Bridging support for GRO, from Jesse Gross.
9) Add BPF packet actions to packet scheduler, from Jiri Pirko.
10) Add support for uniqu flow IDs to openvswitch, from Joe Stringer.
11) New NetCP ethernet driver, from Muralidharan Karicheri and Wingman
Kwok.
12) More sanely handle out-of-window dupacks, which can result in
serious ACK storms. From Neal Cardwell.
13) Various rhashtable bug fixes and enhancements, from Herbert Xu,
Patrick McHardy, and Thomas Graf.
14) Support xmit_more in be2net, from Sathya Perla.
15) Group Policy extensions for vxlan, from Thomas Graf.
16) Remove Checksum Offload support for vxlan, from Tom Herbert.
17) Like ipv4, support lockless transmit over ipv6 UDP sockets. From
Vlad Yasevich.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1494+1 commits)
crypto: fix af_alg_make_sg() conversion to iov_iter
ipv4: Namespecify TCP PMTU mechanism
i40e: Fix for stats init function call in Rx setup
tcp: don't include Fast Open option in SYN-ACK on pure SYN-data
openvswitch: Only set TUNNEL_VXLAN_OPT if VXLAN-GBP metadata is set
ipv6: Make __ipv6_select_ident static
ipv6: Fix fragment id assignment on LE arches.
bridge: Fix inability to add non-vlan fdb entry
net: Mellanox: Delete unnecessary checks before the function call "vunmap"
cxgb4: Add support in cxgb4 to get expansion rom version via ethtool
ethtool: rename reserved1 memeber in ethtool_drvinfo for expansion ROM version
net: dsa: Remove redundant phy_attach()
IB/mlx4: Reset flow support for IB kernel ULPs
IB/mlx4: Always use the correct port for mirrored multicast attachments
net/bonding: Fix potential bad memory access during bonding events
tipc: remove tipc_snprintf
tipc: nl compat add noop and remove legacy nl framework
tipc: convert legacy nl stats show to nl compat
tipc: convert legacy nl net id get to nl compat
tipc: convert legacy nl net id set to nl compat
...
Diffstat (limited to 'net')
329 files changed, 18467 insertions, 11560 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 90cc2bdd4064..61bf2a06e85d 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -9,7 +9,7 @@ bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; __be16 vlan_proto = skb->vlan_proto; - u16 vlan_id = vlan_tx_tag_get_id(skb); + u16 vlan_id = skb_vlan_tag_get_id(skb); struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 8ac8a5cc2143..c92b52f37d38 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -238,6 +238,13 @@ nla_put_failure: return -EMSGSIZE; } +static struct net *vlan_get_link_net(const struct net_device *dev) +{ + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; + + return dev_net(real_dev); +} + struct rtnl_link_ops vlan_link_ops __read_mostly = { .kind = "vlan", .maxtype = IFLA_VLAN_MAX, @@ -250,6 +257,7 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = { .dellink = unregister_vlan_dev, .get_size = vlan_get_size, .fill_info = vlan_fill_info, + .get_link_net = vlan_get_link_net, }; int __init vlan_netlink_init(void) diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 11660a3aab5a..c6fc8f756c9a 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -62,6 +62,7 @@ config BATMAN_ADV_MCAST config BATMAN_ADV_DEBUG bool "B.A.T.M.A.N. debugging" depends on BATMAN_ADV + depends on DEBUG_FS help This is an option for use by developers; most people should say N here. This enables compilation of support for diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 1e8053976e83..00e00e09b000 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -26,9 +26,8 @@ #include "bat_algo.h" #include "network-coding.h" - /** - * batadv_dup_status - duplicate status + * enum batadv_dup_status - duplicate status * @BATADV_NO_DUP: the packet is a duplicate * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the * neighbor) @@ -517,7 +516,7 @@ out: * @bat_priv: the bat priv with all the soft interface information * @packet_len: (total) length of the OGM * @send_time: timestamp (jiffies) when the packet is to be sent - * @direktlink: true if this is a direct link packet + * @directlink: true if this is a direct link packet * @if_incoming: interface where the packet was received * @if_outgoing: interface for which the retransmission should be considered * @forw_packet: the forwarded packet which should be checked @@ -879,7 +878,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) hlist_for_each_entry_rcu(orig_node, head, hash_entry) { spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); word_index = hard_iface->if_num * BATADV_NUM_WORDS; - word = &(orig_node->bat_iv.bcast_own[word_index]); + word = &orig_node->bat_iv.bcast_own[word_index]; batadv_bit_get_packet(bat_priv, word, 1, 0); if_num = hard_iface->if_num; @@ -1362,10 +1361,10 @@ out: return ret; } - /** * batadv_iv_ogm_process_per_outif - process a batman iv OGM for an outgoing if * @skb: the skb containing the OGM + * @ogm_offset: offset from skb->data to start of ogm header * @orig_node: the (cached) orig node for the originator of this OGM * @if_incoming: the interface where this packet was received * @if_outgoing: the interface for which the packet should be considered @@ -1664,7 +1663,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, offset = if_num * BATADV_NUM_WORDS; spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); - word = &(orig_neigh_node->bat_iv.bcast_own[offset]); + word = &orig_neigh_node->bat_iv.bcast_own[offset]; bit_pos = if_incoming_seqno - 2; bit_pos -= ntohl(ogm_packet->seqno); batadv_set_bit(word, bit_pos); @@ -1902,10 +1901,10 @@ out: * batadv_iv_ogm_neigh_is_eob - check if neigh1 is equally good or better than * neigh2 from the metric prospective * @neigh1: the first neighbor object of the comparison - * @if_outgoing: outgoing interface for the first neighbor + * @if_outgoing1: outgoing interface for the first neighbor * @neigh2: the second neighbor object of the comparison * @if_outgoing2: outgoing interface for the second neighbor - + * * Returns true if the metric via neigh1 is equally good or better than * the metric via neigh2, false otherwise. */ diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index 9586750022f5..e3da07a64026 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -29,7 +29,6 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, int32_t n) bitmap_shift_left(seq_bits, seq_bits, n, BATADV_TQ_LOCAL_WINDOW_SIZE); } - /* receive and process one packet within the sequence number window. * * returns: diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index cc2407351d36..2acaafe60188 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -29,8 +29,7 @@ static inline int batadv_test_bit(const unsigned long *seq_bits, diff = last_seqno - curr_seqno; if (diff < 0 || diff >= BATADV_TQ_LOCAL_WINDOW_SIZE) return 0; - else - return test_bit(diff, seq_bits) != 0; + return test_bit(diff, seq_bits) != 0; } /* turn corresponding bit on, so we can remember that we got the packet */ diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index a957c8140721..ac4b96eccade 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -69,7 +69,6 @@ static inline uint32_t batadv_choose_backbone_gw(const void *data, return hash % size; } - /* compares address and vid of two backbone gws */ static int batadv_compare_backbone_gw(const struct hlist_node *node, const void *data2) @@ -245,14 +244,14 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) spin_unlock_bh(list_lock); } - /* all claims gone, intialize CRC */ + /* all claims gone, initialize CRC */ backbone_gw->crc = BATADV_BLA_CRC_INIT; } /** * batadv_bla_send_claim - sends a claim frame according to the provided info * @bat_priv: the bat priv with all the soft interface information - * @orig: the mac address to be announced within the claim + * @mac: the mac address to be announced within the claim * @vid: the VLAN ID * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...) */ @@ -364,6 +363,7 @@ out: * @bat_priv: the bat priv with all the soft interface information * @orig: the mac address of the originator * @vid: the VLAN ID + * @own_backbone: set if the requested backbone is local * * searches for the backbone gw or creates a new one if it could not * be found. @@ -454,6 +454,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, /** * batadv_bla_answer_request - answer a bla request by sending own claims * @bat_priv: the bat priv with all the soft interface information + * @primary_if: interface where the request came on * @vid: the vid where the request came on * * Repeat all of our own claims, and finally send an ANNOUNCE frame @@ -660,7 +661,6 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, if (unlikely(!backbone_gw)) return 1; - /* handle as ANNOUNCE frame */ backbone_gw->lasttime = jiffies; crc = ntohs(*((__be16 *)(&an_addr[4]))); @@ -775,6 +775,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv, /** * batadv_check_claim_group * @bat_priv: the bat priv with all the soft interface information + * @primary_if: the primary interface of this batman interface * @hw_src: the Hardware source in the ARP Header * @hw_dst: the Hardware destination in the ARP Header * @ethhdr: pointer to the Ethernet header of the claim frame @@ -846,10 +847,10 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, return 2; } - /** * batadv_bla_process_claim * @bat_priv: the bat priv with all the soft interface information + * @primary_if: the primary hard interface of this batman soft interface * @skb: the frame to be checked * * Check if this is a claim frame, and process it accordingly. @@ -1327,7 +1328,7 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, goto out; } /* not found, add a new entry (overwrite the oldest entry) - * and allow it, its the first occurence. + * and allow it, its the first occurrence. */ curr = (bat_priv->bla.bcast_duplist_curr + BATADV_DUPLIST_SIZE - 1); curr %= BATADV_DUPLIST_SIZE; @@ -1343,8 +1344,6 @@ out: return ret; } - - /** * batadv_bla_is_backbone_gw_orig * @bat_priv: the bat priv with all the soft interface information @@ -1386,7 +1385,6 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig, return false; } - /** * batadv_bla_is_backbone_gw * @skb: the frame to be checked @@ -1476,7 +1474,6 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, if (!atomic_read(&bat_priv->bridge_loop_avoidance)) goto allow; - if (unlikely(atomic_read(&bat_priv->bla.num_requests))) /* don't allow broadcasts while requests are in flight */ if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index a12e25efaf6f..a4972874c056 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -233,7 +233,6 @@ static int batadv_debug_log_setup(struct batadv_priv *bat_priv) static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) { - return; } #endif @@ -405,6 +404,7 @@ struct batadv_debuginfo batadv_hardif_debuginfo_##_name = { \ .release = single_release, \ }, \ } + static BATADV_HARDIF_DEBUGINFO(originators, S_IRUGO, batadv_originators_hardif_open); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index b5981113c9a7..aad022dd15df 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1100,6 +1100,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); } + /** * batadv_dat_snoop_incoming_arp_reply - snoop the ARP reply and fill the local * DAT storage only diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index d76e1d06c5b5..2fe0764c64be 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -25,9 +25,7 @@ #include <linux/if_arp.h> -/** - * BATADV_DAT_ADDR_MAX - maximum address value in the DHT space - */ +/* BATADV_DAT_ADDR_MAX - maximum address value in the DHT space */ #define BATADV_DAT_ADDR_MAX ((batadv_dat_addr_t)~(batadv_dat_addr_t)0) void batadv_dat_status_update(struct net_device *net_dev); diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 00f9e144cc97..3d1dcaa3e8b5 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -23,7 +23,6 @@ #include "hard-interface.h" #include "soft-interface.h" - /** * batadv_frag_clear_chain - delete entries in the fragment buffer chain * @head: head of chain with entries. diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h index 5d7a0e66a22b..d848cf6676a2 100644 --- a/net/batman-adv/fragmentation.h +++ b/net/batman-adv/fragmentation.h @@ -41,8 +41,7 @@ batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry) if (!hlist_empty(&frags_entry->head) && batadv_has_timed_out(frags_entry->timestamp, BATADV_FRAG_TIMEOUT)) return true; - else - return false; + return false; } #endif /* _NET_BATMAN_ADV_FRAGMENTATION_H_ */ diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index e0bcf9e84273..27649e85f3f6 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -775,6 +775,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, return ret; } + /** * batadv_gw_out_of_range - check if the dhcp request destination is the best gw * @bat_priv: the bat priv with all the soft interface information diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index d1183e882167..12fc77bef23f 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -41,7 +41,6 @@ #include "network-coding.h" #include "fragmentation.h" - /* List manipulations on hardif_list have to be rtnl_lock()'ed, * list traversals just rcu-locked */ @@ -403,6 +402,9 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, goto err_free; } + /* reset control block to avoid left overs from previous users */ + memset(skb->cb, 0, sizeof(struct batadv_skb_cb)); + /* all receive handlers return whether they received or reused * the supplied skb. if not, we have to free the skb. */ @@ -651,7 +653,7 @@ static struct batadv_tvlv_handler /** * batadv_tvlv_container_free_ref - decrement the tvlv container refcounter and * possibly free it - * @tvlv_handler: the tvlv container to free + * @tvlv: the tvlv container to free */ static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv) { @@ -796,11 +798,11 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv, } /** - * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accomodate + * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate * requested packet size * @packet_buff: packet buffer * @packet_buff_len: packet buffer size - * @packet_min_len: requested packet minimum size + * @min_packet_len: requested packet minimum size * @additional_packet_len: requested additional packet size on top of minimum * size * diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index a1fcd884f0b1..4d2318829a34 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2014.4.0" +#define BATADV_SOURCE_VERSION "2015.0" #endif /* B.A.T.M.A.N. parameters */ @@ -92,9 +92,8 @@ /* numbers of originator to contact for any PUT/GET DHT operation */ #define BATADV_DAT_CANDIDATES_NUM 3 -/** - * BATADV_TQ_SIMILARITY_THRESHOLD - TQ points that a secondary metric can differ - * at most from the primary one in order to be still considered acceptable +/* BATADV_TQ_SIMILARITY_THRESHOLD - TQ points that a secondary metric can differ + * at most from the primary one in order to be still considered acceptable */ #define BATADV_TQ_SIMILARITY_THRESHOLD 50 @@ -313,10 +312,10 @@ static inline bool batadv_has_timed_out(unsigned long timestamp, * - when adding 128 - it is neither a predecessor nor a successor, * - after adding more than 127 to the starting value - it is a successor */ -#define batadv_seq_before(x, y) ({typeof(x) _d1 = (x); \ - typeof(y) _d2 = (y); \ - typeof(x) _dummy = (_d1 - _d2); \ - (void) (&_d1 == &_d2); \ +#define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \ + typeof(y)_d2 = (y); \ + typeof(x)_dummy = (_d1 - _d2); \ + (void)(&_d1 == &_d2); \ _dummy > batadv_smallest_signed_int(_dummy); }) #define batadv_seq_after(x, y) batadv_seq_before(y, x) diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 73b5d45819c1..3a44ebdb43cb 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -50,7 +50,6 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); static inline void batadv_mcast_mla_update(struct batadv_priv *bat_priv) { - return; } static inline enum batadv_forw_mode @@ -67,12 +66,10 @@ static inline int batadv_mcast_init(struct batadv_priv *bat_priv) static inline void batadv_mcast_free(struct batadv_priv *bat_priv) { - return; } static inline void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node) { - return; } #endif /* CONFIG_BATMAN_ADV_MCAST */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index fab47f1f3ef9..127cc4d7380a 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1212,8 +1212,7 @@ static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, { if (BATADV_SKB_CB(skb)->decoded && !batadv_compare_eth(dst, src)) return false; - else - return true; + return true; } /** diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index bea8198d0198..90e805aba379 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -797,7 +797,6 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, return ifinfo_purged; } - /** * batadv_purge_orig_neighbors - purges neighbors from originator * @bat_priv: the bat priv with all the soft interface information diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index db3a9ed734cb..aa4a43696295 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -70,7 +70,6 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, unsigned short vid); void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan); - /* hashfunction to choose an entry in a hash table of given size * hash algorithm from http://en.wikipedia.org/wiki/Hash_table */ diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 34e096d2dce1..b81fbbf21a63 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -198,6 +198,7 @@ struct batadv_bla_claim_dst { uint8_t type; /* bla_claimframe */ __be16 group; /* group id */ }; + #pragma pack() /** @@ -376,7 +377,7 @@ struct batadv_frag_packet { uint8_t reserved:4; uint8_t no:4; #else -#error "unknown bitfield endianess" +#error "unknown bitfield endianness" #endif uint8_t dest[ETH_ALEN]; uint8_t orig[ETH_ALEN]; @@ -452,7 +453,7 @@ struct batadv_coded_packet { * @src: address of the source * @dst: address of the destination * @tvlv_len: length of tvlv data following the unicast tvlv header - * @align: 2 bytes to align the header to a 4 byte boundry + * @align: 2 bytes to align the header to a 4 byte boundary */ struct batadv_unicast_tvlv_packet { uint8_t packet_type; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 6648f321864d..da83982bf974 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -292,7 +292,6 @@ out: return ret; } - int batadv_recv_icmp_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { @@ -457,7 +456,7 @@ batadv_find_router(struct batadv_priv *bat_priv, * the last chosen bonding candidate (next_candidate). If no such * router is found, use the first candidate found (the previously * chosen bonding candidate might have been the last one in the list). - * If this can't be found either, return the previously choosen + * If this can't be found either, return the previously chosen * router - obviously there are no other candidates. */ rcu_read_lock(); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 5467955eb27c..5ec31d7de24f 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -36,7 +36,6 @@ #include "bridge_loop_avoidance.h" #include "network-coding.h" - static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); static void batadv_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info); diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index f40cb0436eba..a75dc12f96f8 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -151,7 +151,6 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \ static BATADV_ATTR(_name, _mode, batadv_show_##_name, \ batadv_store_##_name) - #define BATADV_ATTR_SIF_STORE_UINT(_name, _min, _max, _post_func) \ ssize_t batadv_store_##_name(struct kobject *kobj, \ struct attribute *attr, char *buff, \ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 5f59e7f899a0..07b263a437d1 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1780,7 +1780,6 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv, batadv_tt_global_del_roaming(bat_priv, tt_global_entry, orig_node, message); - out: if (tt_global_entry) batadv_tt_global_entry_free_ref(tt_global_entry); @@ -2769,9 +2768,8 @@ static bool batadv_send_tt_response(struct batadv_priv *bat_priv, { if (batadv_is_my_mac(bat_priv, req_dst)) return batadv_send_my_tt_response(bat_priv, tt_data, req_src); - else - return batadv_send_other_tt_response(bat_priv, tt_data, - req_src, req_dst); + return batadv_send_other_tt_response(bat_priv, tt_data, req_src, + req_dst); } static void _batadv_tt_update_changes(struct batadv_priv *bat_priv, @@ -2854,7 +2852,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv, /** * batadv_is_my_client - check if a client is served by the local node * @bat_priv: the bat priv with all the soft interface information - * @addr: the mac adress of the client to check + * @addr: the mac address of the client to check * @vid: VLAN identifier * * Returns true if the client is served by this node, false otherwise. diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 8854c05622a9..9398c3fb4174 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -199,7 +199,6 @@ struct batadv_orig_bat_iv { /** * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh * @orig: originator ethernet address - * @primary_addr: hosts primary interface address * @ifinfo_list: list for routers per outgoing interface * @last_bonding_candidate: pointer to last ifinfo of last used router * @batadv_dat_addr_t: address of the orig node in the distributed hash @@ -244,7 +243,6 @@ struct batadv_orig_bat_iv { */ struct batadv_orig_node { uint8_t orig[ETH_ALEN]; - uint8_t primary_addr[ETH_ALEN]; struct hlist_head ifinfo_list; struct batadv_orig_ifinfo *last_bonding_candidate; #ifdef CONFIG_BATMAN_ADV_DAT @@ -970,7 +968,7 @@ struct batadv_tt_orig_list_entry { }; /** - * struct batadv_tt_change_node - structure for tt changes occured + * struct batadv_tt_change_node - structure for tt changes occurred * @list: list node for batadv_priv_tt::changes_list * @change: holds the actual translation table diff data */ diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index c989253737f0..1742b849fcff 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -31,7 +31,7 @@ #define VERSION "0.1" -static struct dentry *lowpan_psm_debugfs; +static struct dentry *lowpan_enable_debugfs; static struct dentry *lowpan_control_debugfs; #define IFACE_NAME_TEMPLATE "bt%d" @@ -55,11 +55,7 @@ struct skb_cb { static LIST_HEAD(bt_6lowpan_devices); static DEFINE_SPINLOCK(devices_lock); -/* If psm is set to 0 (default value), then 6lowpan is disabled. - * Other values are used to indicate a Protocol Service Multiplexer - * value for 6lowpan. - */ -static u16 psm_6lowpan; +static bool enable_6lowpan; /* We are listening incoming connections via this channel */ @@ -761,7 +757,7 @@ static bool is_bt_6lowpan(struct hci_conn *hcon) if (hcon->type != LE_LINK) return false; - if (!psm_6lowpan) + if (!enable_6lowpan) return false; return true; @@ -1085,7 +1081,7 @@ static int bt_6lowpan_connect(bdaddr_t *addr, u8 dst_type) if (!pchan) return -EINVAL; - err = l2cap_chan_connect(pchan, cpu_to_le16(psm_6lowpan), 0, + err = l2cap_chan_connect(pchan, cpu_to_le16(L2CAP_PSM_IPSP), 0, addr, dst_type); BT_DBG("chan %p err %d", pchan, err); @@ -1118,7 +1114,7 @@ static struct l2cap_chan *bt_6lowpan_listen(void) struct l2cap_chan *pchan; int err; - if (psm_6lowpan == 0) + if (!enable_6lowpan) return NULL; pchan = chan_get(); @@ -1130,10 +1126,9 @@ static struct l2cap_chan *bt_6lowpan_listen(void) atomic_set(&pchan->nesting, L2CAP_NESTING_PARENT); - BT_DBG("psm 0x%04x chan %p src type %d", psm_6lowpan, pchan, - pchan->src_type); + BT_DBG("chan %p src type %d", pchan, pchan->src_type); - err = l2cap_add_psm(pchan, addr, cpu_to_le16(psm_6lowpan)); + err = l2cap_add_psm(pchan, addr, cpu_to_le16(L2CAP_PSM_IPSP)); if (err) { l2cap_chan_put(pchan); BT_ERR("psm cannot be added err %d", err); @@ -1219,22 +1214,23 @@ static void disconnect_all_peers(void) spin_unlock(&devices_lock); } -struct set_psm { +struct set_enable { struct work_struct work; - u16 psm; + bool flag; }; -static void do_psm_set(struct work_struct *work) +static void do_enable_set(struct work_struct *work) { - struct set_psm *set_psm = container_of(work, struct set_psm, work); + struct set_enable *set_enable = container_of(work, + struct set_enable, work); - if (set_psm->psm == 0 || psm_6lowpan != set_psm->psm) + if (!set_enable->flag || enable_6lowpan != set_enable->flag) /* Disconnect existing connections if 6lowpan is - * disabled (psm = 0), or if psm changes. + * disabled */ disconnect_all_peers(); - psm_6lowpan = set_psm->psm; + enable_6lowpan = set_enable->flag; if (listen_chan) { l2cap_chan_close(listen_chan, 0); @@ -1243,33 +1239,33 @@ static void do_psm_set(struct work_struct *work) listen_chan = bt_6lowpan_listen(); - kfree(set_psm); + kfree(set_enable); } -static int lowpan_psm_set(void *data, u64 val) +static int lowpan_enable_set(void *data, u64 val) { - struct set_psm *set_psm; + struct set_enable *set_enable; - set_psm = kzalloc(sizeof(*set_psm), GFP_KERNEL); - if (!set_psm) + set_enable = kzalloc(sizeof(*set_enable), GFP_KERNEL); + if (!set_enable) return -ENOMEM; - set_psm->psm = val; - INIT_WORK(&set_psm->work, do_psm_set); + set_enable->flag = !!val; + INIT_WORK(&set_enable->work, do_enable_set); - schedule_work(&set_psm->work); + schedule_work(&set_enable->work); return 0; } -static int lowpan_psm_get(void *data, u64 *val) +static int lowpan_enable_get(void *data, u64 *val) { - *val = psm_6lowpan; + *val = enable_6lowpan; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(lowpan_psm_fops, lowpan_psm_get, - lowpan_psm_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(lowpan_enable_fops, lowpan_enable_get, + lowpan_enable_set, "%llu\n"); static ssize_t lowpan_control_write(struct file *fp, const char __user *user_buffer, @@ -1439,9 +1435,9 @@ static struct notifier_block bt_6lowpan_dev_notifier = { static int __init bt_6lowpan_init(void) { - lowpan_psm_debugfs = debugfs_create_file("6lowpan_psm", 0644, - bt_debugfs, NULL, - &lowpan_psm_fops); + lowpan_enable_debugfs = debugfs_create_file("6lowpan_enable", 0644, + bt_debugfs, NULL, + &lowpan_enable_fops); lowpan_control_debugfs = debugfs_create_file("6lowpan_control", 0644, bt_debugfs, NULL, &lowpan_control_fops); @@ -1451,7 +1447,7 @@ static int __init bt_6lowpan_init(void) static void __exit bt_6lowpan_exit(void) { - debugfs_remove(lowpan_psm_debugfs); + debugfs_remove(lowpan_enable_debugfs); debugfs_remove(lowpan_control_debugfs); if (listen_chan) { diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index 29bcafc41adf..7de74635a110 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -64,4 +64,31 @@ config BT_6LOWPAN help IPv6 compression over Bluetooth Low Energy. +config BT_SELFTEST + bool "Bluetooth self testing support" + depends on BT && DEBUG_KERNEL + help + Run self tests when initializing the Bluetooth subsystem. This + is a developer option and can cause significant delay when booting + the system. + + When the Bluetooth subsystem is built as module, then the test + cases are run first thing at module load time. When the Bluetooth + subsystem is compiled into the kernel image, then the test cases + are run late in the initcall hierarchy. + +config BT_SELFTEST_ECDH + bool "ECDH test cases" + depends on BT_LE && BT_SELFTEST + help + Run test cases for ECDH cryptographic functionality used by the + Bluetooth Low Energy Secure Connections feature. + +config BT_SELFTEST_SMP + bool "SMP test cases" + depends on BT_LE && BT_SELFTEST + help + Run test cases for SMP cryptographic functionality, including both + legacy SMP as well as the Secure Connections features. + source "drivers/bluetooth/Kconfig" diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index a5432a6a0ae6..8e96e3072266 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -13,6 +13,8 @@ bluetooth_6lowpan-y := 6lowpan.o bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \ - a2mp.o amp.o ecc.o + a2mp.o amp.o ecc.o hci_request.o hci_debugfs.o + +bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o subdir-ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 012e3b03589d..ce22e0cfa923 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -31,6 +31,8 @@ #include <net/bluetooth/bluetooth.h> #include <linux/proc_fs.h> +#include "selftest.h" + #define VERSION "2.20" /* Bluetooth sockets */ @@ -716,6 +718,10 @@ static int __init bt_init(void) BT_INFO("Core ver %s", VERSION); + err = bt_selftest(); + if (err < 0) + return err; + bt_debugfs = debugfs_create_dir("bluetooth", NULL); err = bt_sysfs_init(); diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index ce82722d049b..05f57e491ccb 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -511,13 +511,12 @@ static int bnep_session(void *arg) static struct device *bnep_get_device(struct bnep_session *session) { - struct hci_conn *conn; + struct l2cap_conn *conn = l2cap_pi(session->sock->sk)->chan->conn; - conn = l2cap_pi(session->sock->sk)->chan->conn->hcon; - if (!conn) + if (!conn || !conn->hcon) return NULL; - return &conn->dev; + return &conn->hcon->dev; } static struct device_type bnep_type = { diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 1ca8a87a0787..75bd2c42e3e7 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -253,8 +253,6 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s if (skb->len < CAPI_MSG_BASELEN + 15) break; - controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 10); - if (!info && ctrl) { int len = min_t(uint, CAPI_MANUFACTURER_LEN, skb->data[CAPI_MSG_BASELEN + 14]); @@ -270,8 +268,6 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s if (skb->len < CAPI_MSG_BASELEN + 32) break; - controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 12); - if (!info && ctrl) { ctrl->version.majorversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 16); ctrl->version.minorversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 20); @@ -285,8 +281,6 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s if (skb->len < CAPI_MSG_BASELEN + 17) break; - controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 12); - if (!info && ctrl) { int len = min_t(uint, CAPI_SERIAL_LEN, skb->data[CAPI_MSG_BASELEN + 16]); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index fe18825cc8a4..c9b8fa544785 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -25,11 +25,13 @@ /* Bluetooth HCI connection handling. */ #include <linux/export.h> +#include <linux/debugfs.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> +#include "hci_request.h" #include "smp.h" #include "a2mp.h" @@ -546,6 +548,8 @@ int hci_conn_del(struct hci_conn *conn) hci_conn_del_sysfs(conn); + debugfs_remove_recursive(conn->debugfs); + if (test_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags)) hci_conn_params_del(conn->hdev, &conn->dst, conn->dst_type); @@ -629,7 +633,7 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) mgmt_reenable_advertising(hdev); } -static void create_le_conn_complete(struct hci_dev *hdev, u8 status) +static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct hci_conn *conn; @@ -1080,21 +1084,6 @@ int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level) } EXPORT_SYMBOL(hci_conn_check_secure); -/* Change link key */ -int hci_conn_change_link_key(struct hci_conn *conn) -{ - BT_DBG("hcon %p", conn); - - if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) { - struct hci_cp_change_conn_link_key cp; - cp.handle = cpu_to_le16(conn->handle); - hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY, - sizeof(cp), &cp); - } - - return 0; -} - /* Switch role */ int hci_conn_switch_role(struct hci_conn *conn, __u8 role) { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 5dcacf9607e4..3322d3f4c85a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -37,6 +37,8 @@ #include <net/bluetooth/l2cap.h> #include <net/bluetooth/mgmt.h> +#include "hci_request.h" +#include "hci_debugfs.h" #include "smp.h" static void hci_rx_work(struct work_struct *work); @@ -137,941 +139,9 @@ static const struct file_operations dut_mode_fops = { .llseek = default_llseek, }; -static int features_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - u8 p; - - hci_dev_lock(hdev); - for (p = 0; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) { - seq_printf(f, "%2u: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " - "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", p, - hdev->features[p][0], hdev->features[p][1], - hdev->features[p][2], hdev->features[p][3], - hdev->features[p][4], hdev->features[p][5], - hdev->features[p][6], hdev->features[p][7]); - } - if (lmp_le_capable(hdev)) - seq_printf(f, "LE: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " - "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", - hdev->le_features[0], hdev->le_features[1], - hdev->le_features[2], hdev->le_features[3], - hdev->le_features[4], hdev->le_features[5], - hdev->le_features[6], hdev->le_features[7]); - hci_dev_unlock(hdev); - - return 0; -} - -static int features_open(struct inode *inode, struct file *file) -{ - return single_open(file, features_show, inode->i_private); -} - -static const struct file_operations features_fops = { - .open = features_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int blacklist_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - struct bdaddr_list *b; - - hci_dev_lock(hdev); - list_for_each_entry(b, &hdev->blacklist, list) - seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); - hci_dev_unlock(hdev); - - return 0; -} - -static int blacklist_open(struct inode *inode, struct file *file) -{ - return single_open(file, blacklist_show, inode->i_private); -} - -static const struct file_operations blacklist_fops = { - .open = blacklist_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int uuids_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - struct bt_uuid *uuid; - - hci_dev_lock(hdev); - list_for_each_entry(uuid, &hdev->uuids, list) { - u8 i, val[16]; - - /* The Bluetooth UUID values are stored in big endian, - * but with reversed byte order. So convert them into - * the right order for the %pUb modifier. - */ - for (i = 0; i < 16; i++) - val[i] = uuid->uuid[15 - i]; - - seq_printf(f, "%pUb\n", val); - } - hci_dev_unlock(hdev); - - return 0; -} - -static int uuids_open(struct inode *inode, struct file *file) -{ - return single_open(file, uuids_show, inode->i_private); -} - -static const struct file_operations uuids_fops = { - .open = uuids_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int inquiry_cache_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - struct discovery_state *cache = &hdev->discovery; - struct inquiry_entry *e; - - hci_dev_lock(hdev); - - list_for_each_entry(e, &cache->all, all) { - struct inquiry_data *data = &e->data; - seq_printf(f, "%pMR %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", - &data->bdaddr, - data->pscan_rep_mode, data->pscan_period_mode, - data->pscan_mode, data->dev_class[2], - data->dev_class[1], data->dev_class[0], - __le16_to_cpu(data->clock_offset), - data->rssi, data->ssp_mode, e->timestamp); - } - - hci_dev_unlock(hdev); - - return 0; -} - -static int inquiry_cache_open(struct inode *inode, struct file *file) -{ - return single_open(file, inquiry_cache_show, inode->i_private); -} - -static const struct file_operations inquiry_cache_fops = { - .open = inquiry_cache_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int link_keys_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - struct link_key *key; - - rcu_read_lock(); - list_for_each_entry_rcu(key, &hdev->link_keys, list) - seq_printf(f, "%pMR %u %*phN %u\n", &key->bdaddr, key->type, - HCI_LINK_KEY_SIZE, key->val, key->pin_len); - rcu_read_unlock(); - - return 0; -} - -static int link_keys_open(struct inode *inode, struct file *file) -{ - return single_open(file, link_keys_show, inode->i_private); -} - -static const struct file_operations link_keys_fops = { - .open = link_keys_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int dev_class_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - - hci_dev_lock(hdev); - seq_printf(f, "0x%.2x%.2x%.2x\n", hdev->dev_class[2], - hdev->dev_class[1], hdev->dev_class[0]); - hci_dev_unlock(hdev); - - return 0; -} - -static int dev_class_open(struct inode *inode, struct file *file) -{ - return single_open(file, dev_class_show, inode->i_private); -} - -static const struct file_operations dev_class_fops = { - .open = dev_class_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int voice_setting_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->voice_setting; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(voice_setting_fops, voice_setting_get, - NULL, "0x%4.4llx\n"); - -static int auto_accept_delay_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - hdev->auto_accept_delay = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int auto_accept_delay_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->auto_accept_delay; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get, - auto_accept_delay_set, "%llu\n"); - -static ssize_t force_sc_support_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = test_bit(HCI_FORCE_SC, &hdev->dbg_flags) ? 'Y': 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t force_sc_support_write(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[32]; - size_t buf_size = min(count, (sizeof(buf)-1)); - bool enable; - - if (test_bit(HCI_UP, &hdev->flags)) - return -EBUSY; - - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - buf[buf_size] = '\0'; - if (strtobool(buf, &enable)) - return -EINVAL; - - if (enable == test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) - return -EALREADY; - - change_bit(HCI_FORCE_SC, &hdev->dbg_flags); - - return count; -} - -static const struct file_operations force_sc_support_fops = { - .open = simple_open, - .read = force_sc_support_read, - .write = force_sc_support_write, - .llseek = default_llseek, -}; - -static ssize_t force_lesc_support_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = test_bit(HCI_FORCE_LESC, &hdev->dbg_flags) ? 'Y': 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t force_lesc_support_write(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[32]; - size_t buf_size = min(count, (sizeof(buf)-1)); - bool enable; - - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - buf[buf_size] = '\0'; - if (strtobool(buf, &enable)) - return -EINVAL; - - if (enable == test_bit(HCI_FORCE_LESC, &hdev->dbg_flags)) - return -EALREADY; - - change_bit(HCI_FORCE_LESC, &hdev->dbg_flags); - - return count; -} - -static const struct file_operations force_lesc_support_fops = { - .open = simple_open, - .read = force_lesc_support_read, - .write = force_lesc_support_write, - .llseek = default_llseek, -}; - -static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = test_bit(HCI_SC_ONLY, &hdev->dev_flags) ? 'Y': 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static const struct file_operations sc_only_mode_fops = { - .open = simple_open, - .read = sc_only_mode_read, - .llseek = default_llseek, -}; - -static int idle_timeout_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val != 0 && (val < 500 || val > 3600000)) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->idle_timeout = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int idle_timeout_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->idle_timeout; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(idle_timeout_fops, idle_timeout_get, - idle_timeout_set, "%llu\n"); - -static int rpa_timeout_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - /* Require the RPA timeout to be at least 30 seconds and at most - * 24 hours. - */ - if (val < 30 || val > (60 * 60 * 24)) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->rpa_timeout = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int rpa_timeout_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->rpa_timeout; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(rpa_timeout_fops, rpa_timeout_get, - rpa_timeout_set, "%llu\n"); - -static int sniff_min_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val == 0 || val % 2 || val > hdev->sniff_max_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->sniff_min_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int sniff_min_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->sniff_min_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(sniff_min_interval_fops, sniff_min_interval_get, - sniff_min_interval_set, "%llu\n"); - -static int sniff_max_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val == 0 || val % 2 || val < hdev->sniff_min_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->sniff_max_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int sniff_max_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->sniff_max_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get, - sniff_max_interval_set, "%llu\n"); - -static int conn_info_min_age_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val == 0 || val > hdev->conn_info_max_age) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->conn_info_min_age = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int conn_info_min_age_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->conn_info_min_age; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get, - conn_info_min_age_set, "%llu\n"); - -static int conn_info_max_age_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val == 0 || val < hdev->conn_info_min_age) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->conn_info_max_age = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int conn_info_max_age_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->conn_info_max_age; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get, - conn_info_max_age_set, "%llu\n"); - -static int identity_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - bdaddr_t addr; - u8 addr_type; - - hci_dev_lock(hdev); - - hci_copy_identity_address(hdev, &addr, &addr_type); - - seq_printf(f, "%pMR (type %u) %*phN %pMR\n", &addr, addr_type, - 16, hdev->irk, &hdev->rpa); - - hci_dev_unlock(hdev); - - return 0; -} - -static int identity_open(struct inode *inode, struct file *file) -{ - return single_open(file, identity_show, inode->i_private); -} - -static const struct file_operations identity_fops = { - .open = identity_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int random_address_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - - hci_dev_lock(hdev); - seq_printf(f, "%pMR\n", &hdev->random_addr); - hci_dev_unlock(hdev); - - return 0; -} - -static int random_address_open(struct inode *inode, struct file *file) -{ - return single_open(file, random_address_show, inode->i_private); -} - -static const struct file_operations random_address_fops = { - .open = random_address_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int static_address_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - - hci_dev_lock(hdev); - seq_printf(f, "%pMR\n", &hdev->static_addr); - hci_dev_unlock(hdev); - - return 0; -} - -static int static_address_open(struct inode *inode, struct file *file) -{ - return single_open(file, static_address_show, inode->i_private); -} - -static const struct file_operations static_address_fops = { - .open = static_address_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static ssize_t force_static_address_read(struct file *file, - char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ? 'Y': 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t force_static_address_write(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[32]; - size_t buf_size = min(count, (sizeof(buf)-1)); - bool enable; - - if (test_bit(HCI_UP, &hdev->flags)) - return -EBUSY; - - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - buf[buf_size] = '\0'; - if (strtobool(buf, &enable)) - return -EINVAL; - - if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags)) - return -EALREADY; - - change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags); - - return count; -} - -static const struct file_operations force_static_address_fops = { - .open = simple_open, - .read = force_static_address_read, - .write = force_static_address_write, - .llseek = default_llseek, -}; - -static int white_list_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - struct bdaddr_list *b; - - hci_dev_lock(hdev); - list_for_each_entry(b, &hdev->le_white_list, list) - seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); - hci_dev_unlock(hdev); - - return 0; -} - -static int white_list_open(struct inode *inode, struct file *file) -{ - return single_open(file, white_list_show, inode->i_private); -} - -static const struct file_operations white_list_fops = { - .open = white_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int identity_resolving_keys_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - struct smp_irk *irk; - - rcu_read_lock(); - list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) { - seq_printf(f, "%pMR (type %u) %*phN %pMR\n", - &irk->bdaddr, irk->addr_type, - 16, irk->val, &irk->rpa); - } - rcu_read_unlock(); - - return 0; -} - -static int identity_resolving_keys_open(struct inode *inode, struct file *file) -{ - return single_open(file, identity_resolving_keys_show, - inode->i_private); -} - -static const struct file_operations identity_resolving_keys_fops = { - .open = identity_resolving_keys_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int long_term_keys_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - struct smp_ltk *ltk; - - rcu_read_lock(); - list_for_each_entry_rcu(ltk, &hdev->long_term_keys, list) - seq_printf(f, "%pMR (type %u) %u 0x%02x %u %.4x %.16llx %*phN\n", - <k->bdaddr, ltk->bdaddr_type, ltk->authenticated, - ltk->type, ltk->enc_size, __le16_to_cpu(ltk->ediv), - __le64_to_cpu(ltk->rand), 16, ltk->val); - rcu_read_unlock(); - - return 0; -} - -static int long_term_keys_open(struct inode *inode, struct file *file) -{ - return single_open(file, long_term_keys_show, inode->i_private); -} - -static const struct file_operations long_term_keys_fops = { - .open = long_term_keys_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int conn_min_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_conn_min_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int conn_min_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_conn_min_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(conn_min_interval_fops, conn_min_interval_get, - conn_min_interval_set, "%llu\n"); - -static int conn_max_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_conn_max_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int conn_max_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_conn_max_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get, - conn_max_interval_set, "%llu\n"); - -static int conn_latency_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val > 0x01f3) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_conn_latency = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int conn_latency_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_conn_latency; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(conn_latency_fops, conn_latency_get, - conn_latency_set, "%llu\n"); - -static int supervision_timeout_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x000a || val > 0x0c80) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_supv_timeout = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int supervision_timeout_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_supv_timeout; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(supervision_timeout_fops, supervision_timeout_get, - supervision_timeout_set, "%llu\n"); - -static int adv_channel_map_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x01 || val > 0x07) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_adv_channel_map = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int adv_channel_map_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_adv_channel_map; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get, - adv_channel_map_set, "%llu\n"); - -static int adv_min_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_adv_min_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int adv_min_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_adv_min_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(adv_min_interval_fops, adv_min_interval_get, - adv_min_interval_set, "%llu\n"); - -static int adv_max_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_adv_max_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int adv_max_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_adv_max_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get, - adv_max_interval_set, "%llu\n"); - -static int device_list_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - struct hci_conn_params *p; - struct bdaddr_list *b; - - hci_dev_lock(hdev); - list_for_each_entry(b, &hdev->whitelist, list) - seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); - list_for_each_entry(p, &hdev->le_conn_params, list) { - seq_printf(f, "%pMR (type %u) %u\n", &p->addr, p->addr_type, - p->auto_connect); - } - hci_dev_unlock(hdev); - - return 0; -} - -static int device_list_open(struct inode *inode, struct file *file) -{ - return single_open(file, device_list_show, inode->i_private); -} - -static const struct file_operations device_list_fops = { - .open = device_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* ---- HCI requests ---- */ -static void hci_req_sync_complete(struct hci_dev *hdev, u8 result) +static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode) { BT_DBG("%s result 0x%2.2x", hdev->name, result); @@ -1427,43 +497,6 @@ static void le_setup(struct hci_request *req) set_bit(HCI_LE_ENABLED, &hdev->dev_flags); } -static u8 hci_get_inquiry_mode(struct hci_dev *hdev) -{ - if (lmp_ext_inq_capable(hdev)) - return 0x02; - - if (lmp_inq_rssi_capable(hdev)) - return 0x01; - - if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 && - hdev->lmp_subver == 0x0757) - return 0x01; - - if (hdev->manufacturer == 15) { - if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963) - return 0x01; - if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963) - return 0x01; - if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965) - return 0x01; - } - - if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 && - hdev->lmp_subver == 0x1805) - return 0x01; - - return 0x00; -} - -static void hci_setup_inquiry_mode(struct hci_request *req) -{ - u8 mode; - - mode = hci_get_inquiry_mode(req->hdev); - - hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); -} - static void hci_setup_event_mask(struct hci_request *req) { struct hci_dev *hdev = req->hdev; @@ -1553,10 +586,16 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) if (lmp_le_capable(hdev)) le_setup(req); - /* AVM Berlin (31), aka "BlueFRITZ!", doesn't support the read - * local supported commands HCI command. + /* All Bluetooth 1.2 and later controllers should support the + * HCI command for reading the local supported commands. + * + * Unfortunately some controllers indicate Bluetooth 1.2 support, + * but do not have support for this command. If that is the case, + * the driver can quirk the behavior and skip reading the local + * supported commands. */ - if (hdev->manufacturer != 31 && hdev->hci_ver > BLUETOOTH_VER_1_1) + if (hdev->hci_ver > BLUETOOTH_VER_1_1 && + !test_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks)) hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); if (lmp_ssp_capable(hdev)) { @@ -1570,6 +609,7 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { u8 mode = 0x01; + hci_req_add(req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode); } else { @@ -1582,8 +622,18 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) } } - if (lmp_inq_rssi_capable(hdev)) - hci_setup_inquiry_mode(req); + if (lmp_inq_rssi_capable(hdev) || + test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks)) { + u8 mode; + + /* If Extended Inquiry Result events are supported, then + * they are clearly preferred over Inquiry Result with RSSI + * events. + */ + mode = lmp_ext_inq_capable(hdev) ? 0x02 : 0x01; + + hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); + } if (lmp_inq_tx_pwr_capable(hdev)) hci_req_add(req, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL); @@ -1682,27 +732,12 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) hci_setup_event_mask(req); - /* Some Broadcom based Bluetooth controllers do not support the - * Delete Stored Link Key command. They are clearly indicating its - * absence in the bit mask of supported commands. - * - * Check the supported commands and only if the the command is marked - * as supported send it. If not supported assume that the controller - * does not have actual support for stored link keys which makes this - * command redundant anyway. - * - * Some controllers indicate that they support handling deleting - * stored link keys, but they don't. The quirk lets a driver - * just disable this command. - */ - if (hdev->commands[6] & 0x80 && - !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) { - struct hci_cp_delete_stored_link_key cp; + if (hdev->commands[6] & 0x20) { + struct hci_cp_read_stored_link_key cp; bacpy(&cp.bdaddr, BDADDR_ANY); - cp.delete_all = 0x01; - hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, - sizeof(cp), &cp); + cp.read_all = 0x01; + hci_req_add(req, HCI_OP_READ_STORED_LINK_KEY, sizeof(cp), &cp); } if (hdev->commands[5] & 0x10) @@ -1735,6 +770,12 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) * Parameter Request */ + /* If the controller supports the Data Length Extension + * feature, enable the corresponding event. + */ + if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) + events[0] |= 0x40; /* LE Data Length Change */ + /* If the controller supports Extended Scanner Filter * Policies, enable the correspondig event. */ @@ -1765,6 +806,14 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); } + if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { + /* Read LE Maximum Data Length */ + hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL); + + /* Read LE Suggested Default Data Length */ + hci_req_add(req, HCI_OP_LE_READ_DEF_DATA_LEN, 0, NULL); + } + hci_set_le_support(req); } @@ -1782,6 +831,29 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; + /* Some Broadcom based Bluetooth controllers do not support the + * Delete Stored Link Key command. They are clearly indicating its + * absence in the bit mask of supported commands. + * + * Check the supported commands and only if the the command is marked + * as supported send it. If not supported assume that the controller + * does not have actual support for stored link keys which makes this + * command redundant anyway. + * + * Some controllers indicate that they support handling deleting + * stored link keys, but they don't. The quirk lets a driver + * just disable this command. + */ + if (hdev->commands[6] & 0x80 && + !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) { + struct hci_cp_delete_stored_link_key cp; + + bacpy(&cp.bdaddr, BDADDR_ANY); + cp.delete_all = 0x01; + hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, + sizeof(cp), &cp); + } + /* Set event mask page 2 if the HCI command for it is supported */ if (hdev->commands[22] & 0x04) hci_set_event_mask_page_2(req); @@ -1799,8 +871,10 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_READ_SYNC_TRAIN_PARAMS, 0, NULL); /* Enable Secure Connections if supported and configured */ - if (bredr_sc_enabled(hdev)) { + if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && + bredr_sc_enabled(hdev)) { u8 support = 0x01; + hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT, sizeof(support), &support); } @@ -1841,110 +915,29 @@ static int __hci_init(struct hci_dev *hdev) if (err < 0) return err; - /* Only create debugfs entries during the initial setup - * phase and not every time the controller gets powered on. + /* This function is only called when the controller is actually in + * configured state. When the controller is marked as unconfigured, + * this initialization procedure is not run. + * + * It means that it is possible that a controller runs through its + * setup phase and then discovers missing settings. If that is the + * case, then this function will not be called. It then will only + * be called during the config phase. + * + * So only when in setup phase or config phase, create the debugfs + * entries and register the SMP channels. */ - if (!test_bit(HCI_SETUP, &hdev->dev_flags)) + if (!test_bit(HCI_SETUP, &hdev->dev_flags) && + !test_bit(HCI_CONFIG, &hdev->dev_flags)) return 0; - debugfs_create_file("features", 0444, hdev->debugfs, hdev, - &features_fops); - debugfs_create_u16("manufacturer", 0444, hdev->debugfs, - &hdev->manufacturer); - debugfs_create_u8("hci_version", 0444, hdev->debugfs, &hdev->hci_ver); - debugfs_create_u16("hci_revision", 0444, hdev->debugfs, &hdev->hci_rev); - debugfs_create_file("device_list", 0444, hdev->debugfs, hdev, - &device_list_fops); - debugfs_create_file("blacklist", 0444, hdev->debugfs, hdev, - &blacklist_fops); - debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops); - - debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev, - &conn_info_min_age_fops); - debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev, - &conn_info_max_age_fops); - - if (lmp_bredr_capable(hdev)) { - debugfs_create_file("inquiry_cache", 0444, hdev->debugfs, - hdev, &inquiry_cache_fops); - debugfs_create_file("link_keys", 0400, hdev->debugfs, - hdev, &link_keys_fops); - debugfs_create_file("dev_class", 0444, hdev->debugfs, - hdev, &dev_class_fops); - debugfs_create_file("voice_setting", 0444, hdev->debugfs, - hdev, &voice_setting_fops); - } + hci_debugfs_create_common(hdev); - if (lmp_ssp_capable(hdev)) { - debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs, - hdev, &auto_accept_delay_fops); - debugfs_create_file("force_sc_support", 0644, hdev->debugfs, - hdev, &force_sc_support_fops); - debugfs_create_file("sc_only_mode", 0444, hdev->debugfs, - hdev, &sc_only_mode_fops); - if (lmp_le_capable(hdev)) - debugfs_create_file("force_lesc_support", 0644, - hdev->debugfs, hdev, - &force_lesc_support_fops); - } - - if (lmp_sniff_capable(hdev)) { - debugfs_create_file("idle_timeout", 0644, hdev->debugfs, - hdev, &idle_timeout_fops); - debugfs_create_file("sniff_min_interval", 0644, hdev->debugfs, - hdev, &sniff_min_interval_fops); - debugfs_create_file("sniff_max_interval", 0644, hdev->debugfs, - hdev, &sniff_max_interval_fops); - } + if (lmp_bredr_capable(hdev)) + hci_debugfs_create_bredr(hdev); - if (lmp_le_capable(hdev)) { - debugfs_create_file("identity", 0400, hdev->debugfs, - hdev, &identity_fops); - debugfs_create_file("rpa_timeout", 0644, hdev->debugfs, - hdev, &rpa_timeout_fops); - debugfs_create_file("random_address", 0444, hdev->debugfs, - hdev, &random_address_fops); - debugfs_create_file("static_address", 0444, hdev->debugfs, - hdev, &static_address_fops); - - /* For controllers with a public address, provide a debug - * option to force the usage of the configured static - * address. By default the public address is used. - */ - if (bacmp(&hdev->bdaddr, BDADDR_ANY)) - debugfs_create_file("force_static_address", 0644, - hdev->debugfs, hdev, - &force_static_address_fops); - - debugfs_create_u8("white_list_size", 0444, hdev->debugfs, - &hdev->le_white_list_size); - debugfs_create_file("white_list", 0444, hdev->debugfs, hdev, - &white_list_fops); - debugfs_create_file("identity_resolving_keys", 0400, - hdev->debugfs, hdev, - &identity_resolving_keys_fops); - debugfs_create_file("long_term_keys", 0400, hdev->debugfs, - hdev, &long_term_keys_fops); - debugfs_create_file("conn_min_interval", 0644, hdev->debugfs, - hdev, &conn_min_interval_fops); - debugfs_create_file("conn_max_interval", 0644, hdev->debugfs, - hdev, &conn_max_interval_fops); - debugfs_create_file("conn_latency", 0644, hdev->debugfs, - hdev, &conn_latency_fops); - debugfs_create_file("supervision_timeout", 0644, hdev->debugfs, - hdev, &supervision_timeout_fops); - debugfs_create_file("adv_channel_map", 0644, hdev->debugfs, - hdev, &adv_channel_map_fops); - debugfs_create_file("adv_min_interval", 0644, hdev->debugfs, - hdev, &adv_min_interval_fops); - debugfs_create_file("adv_max_interval", 0644, hdev->debugfs, - hdev, &adv_max_interval_fops); - debugfs_create_u16("discov_interleaved_timeout", 0644, - hdev->debugfs, - &hdev->discov_interleaved_timeout); - - smp_register(hdev); - } + if (lmp_le_capable(hdev)) + hci_debugfs_create_le(hdev); return 0; } @@ -2624,6 +1617,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) cancel_delayed_work(&hdev->service_cache); cancel_delayed_work_sync(&hdev->le_scan_disable); + cancel_delayed_work_sync(&hdev->le_scan_restart); if (test_bit(HCI_MGMT, &hdev->dev_flags)) cancel_delayed_work_sync(&hdev->rpa_expired); @@ -2635,6 +1629,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { if (hdev->dev_type == HCI_BREDR) mgmt_powered(hdev, 0); @@ -2645,6 +1641,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) hci_conn_hash_flush(hdev); hci_dev_unlock(hdev); + smp_unregister(hdev); + hci_notify(hdev, HCI_DEV_DOWN); if (hdev->flush) @@ -2724,32 +1722,14 @@ done: return err; } -int hci_dev_reset(__u16 dev) +static int hci_dev_do_reset(struct hci_dev *hdev) { - struct hci_dev *hdev; - int ret = 0; + int ret; - hdev = hci_dev_get(dev); - if (!hdev) - return -ENODEV; + BT_DBG("%s %p", hdev->name, hdev); hci_req_lock(hdev); - if (!test_bit(HCI_UP, &hdev->flags)) { - ret = -ENETDOWN; - goto done; - } - - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { - ret = -EBUSY; - goto done; - } - - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { - ret = -EOPNOTSUPP; - goto done; - } - /* Drop queues */ skb_queue_purge(&hdev->rx_q); skb_queue_purge(&hdev->cmd_q); @@ -2772,12 +1752,41 @@ int hci_dev_reset(__u16 dev) ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); -done: hci_req_unlock(hdev); - hci_dev_put(hdev); return ret; } +int hci_dev_reset(__u16 dev) +{ + struct hci_dev *hdev; + int err; + + hdev = hci_dev_get(dev); + if (!hdev) + return -ENODEV; + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = -ENETDOWN; + goto done; + } + + if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + err = -EBUSY; + goto done; + } + + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + err = -EOPNOTSUPP; + goto done; + } + + err = hci_dev_do_reset(hdev); + +done: + hci_dev_put(hdev); + return err; +} + int hci_dev_reset_stat(__u16 dev) { struct hci_dev *hdev; @@ -3143,6 +2152,24 @@ static void hci_power_off(struct work_struct *work) hci_dev_do_close(hdev); } +static void hci_error_reset(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset); + + BT_DBG("%s", hdev->name); + + if (hdev->hw_error) + hdev->hw_error(hdev, hdev->hw_error_code); + else + BT_ERR("%s hardware error 0x%2.2x", hdev->name, + hdev->hw_error_code); + + if (hci_dev_do_close(hdev)) + return; + + hci_dev_do_open(hdev); +} + static void hci_discov_off(struct work_struct *work) { struct hci_dev *hdev; @@ -3555,9 +2582,15 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, if (hash192 && rand192) { memcpy(data->hash192, hash192, sizeof(data->hash192)); memcpy(data->rand192, rand192, sizeof(data->rand192)); + if (hash256 && rand256) + data->present = 0x03; } else { memset(data->hash192, 0, sizeof(data->hash192)); memset(data->rand192, 0, sizeof(data->rand192)); + if (hash256 && rand256) + data->present = 0x02; + else + data->present = 0x00; } if (hash256 && rand256) { @@ -3566,6 +2599,8 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, } else { memset(data->hash256, 0, sizeof(data->hash256)); memset(data->rand256, 0, sizeof(data->rand256)); + if (hash192 && rand192) + data->present = 0x01; } BT_DBG("%s for %pMR", hdev->name, bdaddr); @@ -3659,23 +2694,6 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev, return NULL; } -static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) -{ - struct hci_conn *conn; - - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr); - if (!conn) - return false; - - if (conn->dst_type != type) - return false; - - if (conn->state != BT_CONNECTED) - return false; - - return true; -} - /* This function requires the caller holds hdev->lock */ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, bdaddr_t *addr, u8 addr_type) @@ -3731,47 +2749,6 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, return params; } -/* This function requires the caller holds hdev->lock */ -int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type, - u8 auto_connect) -{ - struct hci_conn_params *params; - - params = hci_conn_params_add(hdev, addr, addr_type); - if (!params) - return -EIO; - - if (params->auto_connect == auto_connect) - return 0; - - list_del_init(¶ms->action); - - switch (auto_connect) { - case HCI_AUTO_CONN_DISABLED: - case HCI_AUTO_CONN_LINK_LOSS: - hci_update_background_scan(hdev); - break; - case HCI_AUTO_CONN_REPORT: - list_add(¶ms->action, &hdev->pend_le_reports); - hci_update_background_scan(hdev); - break; - case HCI_AUTO_CONN_DIRECT: - case HCI_AUTO_CONN_ALWAYS: - if (!is_connected(hdev, addr, addr_type)) { - list_add(¶ms->action, &hdev->pend_le_conns); - hci_update_background_scan(hdev); - } - break; - } - - params->auto_connect = auto_connect; - - BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type, - auto_connect); - - return 0; -} - static void hci_conn_params_free(struct hci_conn_params *params) { if (params->conn) { @@ -3828,7 +2805,7 @@ void hci_conn_params_clear_all(struct hci_dev *hdev) BT_DBG("All LE connection parameters were removed"); } -static void inquiry_complete(struct hci_dev *hdev, u8 status) +static void inquiry_complete(struct hci_dev *hdev, u8 status, u16 opcode) { if (status) { BT_ERR("Failed to start inquiry: status %d", status); @@ -3840,7 +2817,8 @@ static void inquiry_complete(struct hci_dev *hdev, u8 status) } } -static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) +static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { /* General inquiry access code (GIAC) */ u8 lap[3] = { 0x33, 0x8b, 0x9e }; @@ -3853,6 +2831,8 @@ static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) return; } + hdev->discovery.scan_start = 0; + switch (hdev->discovery.type) { case DISCOV_TYPE_LE: hci_dev_lock(hdev); @@ -3892,6 +2872,8 @@ static void le_scan_disable_work(struct work_struct *work) BT_DBG("%s", hdev->name); + cancel_delayed_work_sync(&hdev->le_scan_restart); + hci_req_init(&req, hdev); hci_req_add_le_scan_disable(&req); @@ -3901,110 +2883,72 @@ static void le_scan_disable_work(struct work_struct *work) BT_ERR("Disable LE scanning request failed: err %d", err); } -static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) +static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { - struct hci_dev *hdev = req->hdev; + unsigned long timeout, duration, scan_start, now; - /* If we're advertising or initiating an LE connection we can't - * go ahead and change the random address at this time. This is - * because the eventual initiator address used for the - * subsequently created connection will be undefined (some - * controllers use the new address and others the one we had - * when the operation started). - * - * In this kind of scenario skip the update and let the random - * address be updated at the next cycle. - */ - if (test_bit(HCI_LE_ADV, &hdev->dev_flags) || - hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) { - BT_DBG("Deferring random address update"); - set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + BT_DBG("%s", hdev->name); + + if (status) { + BT_ERR("Failed to restart LE scan: status %d", status); return; } - hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); -} - -int hci_update_random_address(struct hci_request *req, bool require_privacy, - u8 *own_addr_type) -{ - struct hci_dev *hdev = req->hdev; - int err; + if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) || + !hdev->discovery.scan_start) + return; - /* If privacy is enabled use a resolvable private address. If - * current RPA has expired or there is something else than - * the current RPA in use, then generate a new one. + /* When the scan was started, hdev->le_scan_disable has been queued + * after duration from scan_start. During scan restart this job + * has been canceled, and we need to queue it again after proper + * timeout, to make sure that scan does not run indefinitely. */ - if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) { - int to; - - *own_addr_type = ADDR_LE_DEV_RANDOM; - - if (!test_and_clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags) && - !bacmp(&hdev->random_addr, &hdev->rpa)) - return 0; - - err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); - if (err < 0) { - BT_ERR("%s failed to generate new RPA", hdev->name); - return err; - } - - set_random_addr(req, &hdev->rpa); - - to = msecs_to_jiffies(hdev->rpa_timeout * 1000); - queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to); + duration = hdev->discovery.scan_duration; + scan_start = hdev->discovery.scan_start; + now = jiffies; + if (now - scan_start <= duration) { + int elapsed; + + if (now >= scan_start) + elapsed = now - scan_start; + else + elapsed = ULONG_MAX - scan_start + now; - return 0; + timeout = duration - elapsed; + } else { + timeout = 0; } + queue_delayed_work(hdev->workqueue, + &hdev->le_scan_disable, timeout); +} - /* In case of required privacy without resolvable private address, - * use an non-resolvable private address. This is useful for active - * scanning and non-connectable advertising. - */ - if (require_privacy) { - bdaddr_t nrpa; +static void le_scan_restart_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + le_scan_restart.work); + struct hci_request req; + struct hci_cp_le_set_scan_enable cp; + int err; - while (true) { - /* The non-resolvable private address is generated - * from random six bytes with the two most significant - * bits cleared. - */ - get_random_bytes(&nrpa, 6); - nrpa.b[5] &= 0x3f; + BT_DBG("%s", hdev->name); - /* The non-resolvable private address shall not be - * equal to the public address. - */ - if (bacmp(&hdev->bdaddr, &nrpa)) - break; - } + /* If controller is not scanning we are done. */ + if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + return; - *own_addr_type = ADDR_LE_DEV_RANDOM; - set_random_addr(req, &nrpa); - return 0; - } + hci_req_init(&req, hdev); - /* If forcing static address is in use or there is no public - * address use the static address as random address (but skip - * the HCI command if the current random address is already the - * static one. - */ - if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || - !bacmp(&hdev->bdaddr, BDADDR_ANY)) { - *own_addr_type = ADDR_LE_DEV_RANDOM; - if (bacmp(&hdev->static_addr, &hdev->random_addr)) - hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, - &hdev->static_addr); - return 0; - } + hci_req_add_le_scan_disable(&req); - /* Neither privacy nor static address is being used so use a - * public address. - */ - *own_addr_type = ADDR_LE_DEV_PUBLIC; + memset(&cp, 0, sizeof(cp)); + cp.enable = LE_SCAN_ENABLE; + cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); - return 0; + err = hci_req_run(&req, le_scan_restart_work_complete); + if (err) + BT_ERR("Restart LE scan request failed: err %d", err); } /* Copy the Identity Address of the controller. @@ -4015,12 +2959,18 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, * * For debugging purposes it is possible to force controllers with a * public address to use the static random address instead. + * + * In case BR/EDR has been disabled on a dual-mode controller and + * userspace has configured a static address, then that address + * becomes the identity address instead of the public BR/EDR address. */ void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *bdaddr_type) { if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || - !bacmp(&hdev->bdaddr, BDADDR_ANY)) { + !bacmp(&hdev->bdaddr, BDADDR_ANY) || + (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + bacmp(&hdev->static_addr, BDADDR_ANY))) { bacpy(bdaddr, &hdev->static_addr); *bdaddr_type = ADDR_LE_DEV_RANDOM; } else { @@ -4059,6 +3009,12 @@ struct hci_dev *hci_alloc_dev(void) hdev->le_conn_max_interval = 0x0038; hdev->le_conn_latency = 0x0000; hdev->le_supv_timeout = 0x002a; + hdev->le_def_tx_len = 0x001b; + hdev->le_def_tx_time = 0x0148; + hdev->le_max_tx_len = 0x001b; + hdev->le_max_tx_time = 0x0148; + hdev->le_max_rx_len = 0x001b; + hdev->le_max_rx_time = 0x0148; hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT; hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT; @@ -4086,10 +3042,12 @@ struct hci_dev *hci_alloc_dev(void) INIT_WORK(&hdev->cmd_work, hci_cmd_work); INIT_WORK(&hdev->tx_work, hci_tx_work); INIT_WORK(&hdev->power_on, hci_power_on); + INIT_WORK(&hdev->error_reset, hci_error_reset); INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); + INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); skb_queue_head_init(&hdev->rx_q); skb_queue_head_init(&hdev->cmd_q); @@ -4259,8 +3217,6 @@ void hci_unregister_dev(struct hci_dev *hdev) rfkill_destroy(hdev->rfkill); } - smp_unregister(hdev); - device_del(&hdev->dev); debugfs_remove_recursive(hdev->debugfs); @@ -4539,76 +3495,11 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) } } -void hci_req_init(struct hci_request *req, struct hci_dev *hdev) -{ - skb_queue_head_init(&req->cmd_q); - req->hdev = hdev; - req->err = 0; -} - -int hci_req_run(struct hci_request *req, hci_req_complete_t complete) -{ - struct hci_dev *hdev = req->hdev; - struct sk_buff *skb; - unsigned long flags; - - BT_DBG("length %u", skb_queue_len(&req->cmd_q)); - - /* If an error occurred during request building, remove all HCI - * commands queued on the HCI request queue. - */ - if (req->err) { - skb_queue_purge(&req->cmd_q); - return req->err; - } - - /* Do not allow empty requests */ - if (skb_queue_empty(&req->cmd_q)) - return -ENODATA; - - skb = skb_peek_tail(&req->cmd_q); - bt_cb(skb)->req.complete = complete; - - spin_lock_irqsave(&hdev->cmd_q.lock, flags); - skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); - spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); - - queue_work(hdev->workqueue, &hdev->cmd_work); - - return 0; -} - bool hci_req_pending(struct hci_dev *hdev) { return (hdev->req_status == HCI_REQ_PEND); } -static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, - u32 plen, const void *param) -{ - int len = HCI_COMMAND_HDR_SIZE + plen; - struct hci_command_hdr *hdr; - struct sk_buff *skb; - - skb = bt_skb_alloc(len, GFP_ATOMIC); - if (!skb) - return NULL; - - hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE); - hdr->opcode = cpu_to_le16(opcode); - hdr->plen = plen; - - if (plen) - memcpy(skb_put(skb, plen), param, plen); - - BT_DBG("skb len %d", skb->len); - - bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; - bt_cb(skb)->opcode = opcode; - - return skb; -} - /* Send HCI command */ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, const void *param) @@ -4634,43 +3525,6 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, return 0; } -/* Queue a command to an asynchronous HCI request */ -void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, - const void *param, u8 event) -{ - struct hci_dev *hdev = req->hdev; - struct sk_buff *skb; - - BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); - - /* If an error occurred during request building, there is no point in - * queueing the HCI command. We can simply return. - */ - if (req->err) - return; - - skb = hci_prepare_cmd(hdev, opcode, plen, param); - if (!skb) { - BT_ERR("%s no memory for command (opcode 0x%4.4x)", - hdev->name, opcode); - req->err = -ENOMEM; - return; - } - - if (skb_queue_empty(&req->cmd_q)) - bt_cb(skb)->req.start = true; - - bt_cb(skb)->req.event = event; - - skb_queue_tail(&req->cmd_q, skb); -} - -void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, - const void *param) -{ - hci_req_add_ev(req, opcode, plen, param, 0); -} - /* Get data from the previously sent command */ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) { @@ -5429,7 +4283,7 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) call_complete: if (req_complete) - req_complete(hdev, status); + req_complete(hdev, status, status ? opcode : HCI_OP_NOP); } static void hci_rx_work(struct work_struct *work) @@ -5518,302 +4372,3 @@ static void hci_cmd_work(struct work_struct *work) } } } - -void hci_req_add_le_scan_disable(struct hci_request *req) -{ - struct hci_cp_le_set_scan_enable cp; - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_DISABLE; - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); -} - -static void add_to_white_list(struct hci_request *req, - struct hci_conn_params *params) -{ - struct hci_cp_le_add_to_white_list cp; - - cp.bdaddr_type = params->addr_type; - bacpy(&cp.bdaddr, ¶ms->addr); - - hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp); -} - -static u8 update_white_list(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_conn_params *params; - struct bdaddr_list *b; - uint8_t white_list_entries = 0; - - /* Go through the current white list programmed into the - * controller one by one and check if that address is still - * in the list of pending connections or list of devices to - * report. If not present in either list, then queue the - * command to remove it from the controller. - */ - list_for_each_entry(b, &hdev->le_white_list, list) { - struct hci_cp_le_del_from_white_list cp; - - if (hci_pend_le_action_lookup(&hdev->pend_le_conns, - &b->bdaddr, b->bdaddr_type) || - hci_pend_le_action_lookup(&hdev->pend_le_reports, - &b->bdaddr, b->bdaddr_type)) { - white_list_entries++; - continue; - } - - cp.bdaddr_type = b->bdaddr_type; - bacpy(&cp.bdaddr, &b->bdaddr); - - hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, - sizeof(cp), &cp); - } - - /* Since all no longer valid white list entries have been - * removed, walk through the list of pending connections - * and ensure that any new device gets programmed into - * the controller. - * - * If the list of the devices is larger than the list of - * available white list entries in the controller, then - * just abort and return filer policy value to not use the - * white list. - */ - list_for_each_entry(params, &hdev->pend_le_conns, action) { - if (hci_bdaddr_list_lookup(&hdev->le_white_list, - ¶ms->addr, params->addr_type)) - continue; - - if (white_list_entries >= hdev->le_white_list_size) { - /* Select filter policy to accept all advertising */ - return 0x00; - } - - if (hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type)) { - /* White list can not be used with RPAs */ - return 0x00; - } - - white_list_entries++; - add_to_white_list(req, params); - } - - /* After adding all new pending connections, walk through - * the list of pending reports and also add these to the - * white list if there is still space. - */ - list_for_each_entry(params, &hdev->pend_le_reports, action) { - if (hci_bdaddr_list_lookup(&hdev->le_white_list, - ¶ms->addr, params->addr_type)) - continue; - - if (white_list_entries >= hdev->le_white_list_size) { - /* Select filter policy to accept all advertising */ - return 0x00; - } - - if (hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type)) { - /* White list can not be used with RPAs */ - return 0x00; - } - - white_list_entries++; - add_to_white_list(req, params); - } - - /* Select filter policy to use white list */ - return 0x01; -} - -void hci_req_add_le_passive_scan(struct hci_request *req) -{ - struct hci_cp_le_set_scan_param param_cp; - struct hci_cp_le_set_scan_enable enable_cp; - struct hci_dev *hdev = req->hdev; - u8 own_addr_type; - u8 filter_policy; - - /* Set require_privacy to false since no SCAN_REQ are send - * during passive scanning. Not using an non-resolvable address - * here is important so that peer devices using direct - * advertising with our address will be correctly reported - * by the controller. - */ - if (hci_update_random_address(req, false, &own_addr_type)) - return; - - /* Adding or removing entries from the white list must - * happen before enabling scanning. The controller does - * not allow white list modification while scanning. - */ - filter_policy = update_white_list(req); - - /* When the controller is using random resolvable addresses and - * with that having LE privacy enabled, then controllers with - * Extended Scanner Filter Policies support can now enable support - * for handling directed advertising. - * - * So instead of using filter polices 0x00 (no whitelist) - * and 0x01 (whitelist enabled) use the new filter policies - * 0x02 (no whitelist) and 0x03 (whitelist enabled). - */ - if (test_bit(HCI_PRIVACY, &hdev->dev_flags) && - (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)) - filter_policy |= 0x02; - - memset(¶m_cp, 0, sizeof(param_cp)); - param_cp.type = LE_SCAN_PASSIVE; - param_cp.interval = cpu_to_le16(hdev->le_scan_interval); - param_cp.window = cpu_to_le16(hdev->le_scan_window); - param_cp.own_address_type = own_addr_type; - param_cp.filter_policy = filter_policy; - hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), - ¶m_cp); - - memset(&enable_cp, 0, sizeof(enable_cp)); - enable_cp.enable = LE_SCAN_ENABLE; - enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), - &enable_cp); -} - -static void update_background_scan_complete(struct hci_dev *hdev, u8 status) -{ - if (status) - BT_DBG("HCI request failed to update background scanning: " - "status 0x%2.2x", status); -} - -/* This function controls the background scanning based on hdev->pend_le_conns - * list. If there are pending LE connection we start the background scanning, - * otherwise we stop it. - * - * This function requires the caller holds hdev->lock. - */ -void hci_update_background_scan(struct hci_dev *hdev) -{ - struct hci_request req; - struct hci_conn *conn; - int err; - - if (!test_bit(HCI_UP, &hdev->flags) || - test_bit(HCI_INIT, &hdev->flags) || - test_bit(HCI_SETUP, &hdev->dev_flags) || - test_bit(HCI_CONFIG, &hdev->dev_flags) || - test_bit(HCI_AUTO_OFF, &hdev->dev_flags) || - test_bit(HCI_UNREGISTER, &hdev->dev_flags)) - return; - - /* No point in doing scanning if LE support hasn't been enabled */ - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) - return; - - /* If discovery is active don't interfere with it */ - if (hdev->discovery.state != DISCOVERY_STOPPED) - return; - - /* Reset RSSI and UUID filters when starting background scanning - * since these filters are meant for service discovery only. - * - * The Start Discovery and Start Service Discovery operations - * ensure to set proper values for RSSI threshold and UUID - * filter list. So it is safe to just reset them here. - */ - hci_discovery_filter_clear(hdev); - - hci_req_init(&req, hdev); - - if (list_empty(&hdev->pend_le_conns) && - list_empty(&hdev->pend_le_reports)) { - /* If there is no pending LE connections or devices - * to be scanned for, we should stop the background - * scanning. - */ - - /* If controller is not scanning we are done. */ - if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) - return; - - hci_req_add_le_scan_disable(&req); - - BT_DBG("%s stopping background scanning", hdev->name); - } else { - /* If there is at least one pending LE connection, we should - * keep the background scan running. - */ - - /* If controller is connecting, we should not start scanning - * since some controllers are not able to scan and connect at - * the same time. - */ - conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); - if (conn) - return; - - /* If controller is currently scanning, we stop it to ensure we - * don't miss any advertising (due to duplicates filter). - */ - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) - hci_req_add_le_scan_disable(&req); - - hci_req_add_le_passive_scan(&req); - - BT_DBG("%s starting background scanning", hdev->name); - } - - err = hci_req_run(&req, update_background_scan_complete); - if (err) - BT_ERR("Failed to run HCI request: err %d", err); -} - -static bool disconnected_whitelist_entries(struct hci_dev *hdev) -{ - struct bdaddr_list *b; - - list_for_each_entry(b, &hdev->whitelist, list) { - struct hci_conn *conn; - - conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr); - if (!conn) - return true; - - if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG) - return true; - } - - return false; -} - -void hci_update_page_scan(struct hci_dev *hdev, struct hci_request *req) -{ - u8 scan; - - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) - return; - - if (!hdev_is_powered(hdev)) - return; - - if (mgmt_powering_down(hdev)) - return; - - if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags) || - disconnected_whitelist_entries(hdev)) - scan = SCAN_PAGE; - else - scan = SCAN_DISABLED; - - if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE)) - return; - - if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) - scan |= SCAN_INQUIRY; - - if (req) - hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); - else - hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); -} diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c new file mode 100644 index 000000000000..65261e5d4b84 --- /dev/null +++ b/net/bluetooth/hci_debugfs.c @@ -0,0 +1,1056 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +#include <linux/debugfs.h> + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> + +#include "hci_debugfs.h" + +static int features_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + u8 p; + + hci_dev_lock(hdev); + for (p = 0; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) { + seq_printf(f, "%2u: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " + "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", p, + hdev->features[p][0], hdev->features[p][1], + hdev->features[p][2], hdev->features[p][3], + hdev->features[p][4], hdev->features[p][5], + hdev->features[p][6], hdev->features[p][7]); + } + if (lmp_le_capable(hdev)) + seq_printf(f, "LE: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " + "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", + hdev->le_features[0], hdev->le_features[1], + hdev->le_features[2], hdev->le_features[3], + hdev->le_features[4], hdev->le_features[5], + hdev->le_features[6], hdev->le_features[7]); + hci_dev_unlock(hdev); + + return 0; +} + +static int features_open(struct inode *inode, struct file *file) +{ + return single_open(file, features_show, inode->i_private); +} + +static const struct file_operations features_fops = { + .open = features_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int device_list_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct hci_conn_params *p; + struct bdaddr_list *b; + + hci_dev_lock(hdev); + list_for_each_entry(b, &hdev->whitelist, list) + seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); + list_for_each_entry(p, &hdev->le_conn_params, list) { + seq_printf(f, "%pMR (type %u) %u\n", &p->addr, p->addr_type, + p->auto_connect); + } + hci_dev_unlock(hdev); + + return 0; +} + +static int device_list_open(struct inode *inode, struct file *file) +{ + return single_open(file, device_list_show, inode->i_private); +} + +static const struct file_operations device_list_fops = { + .open = device_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int blacklist_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + struct bdaddr_list *b; + + hci_dev_lock(hdev); + list_for_each_entry(b, &hdev->blacklist, list) + seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); + hci_dev_unlock(hdev); + + return 0; +} + +static int blacklist_open(struct inode *inode, struct file *file) +{ + return single_open(file, blacklist_show, inode->i_private); +} + +static const struct file_operations blacklist_fops = { + .open = blacklist_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int uuids_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + struct bt_uuid *uuid; + + hci_dev_lock(hdev); + list_for_each_entry(uuid, &hdev->uuids, list) { + u8 i, val[16]; + + /* The Bluetooth UUID values are stored in big endian, + * but with reversed byte order. So convert them into + * the right order for the %pUb modifier. + */ + for (i = 0; i < 16; i++) + val[i] = uuid->uuid[15 - i]; + + seq_printf(f, "%pUb\n", val); + } + hci_dev_unlock(hdev); + + return 0; +} + +static int uuids_open(struct inode *inode, struct file *file) +{ + return single_open(file, uuids_show, inode->i_private); +} + +static const struct file_operations uuids_fops = { + .open = uuids_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int remote_oob_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct oob_data *data; + + hci_dev_lock(hdev); + list_for_each_entry(data, &hdev->remote_oob_data, list) { + seq_printf(f, "%pMR (type %u) %u %*phN %*phN %*phN %*phN\n", + &data->bdaddr, data->bdaddr_type, data->present, + 16, data->hash192, 16, data->rand192, + 16, data->hash256, 19, data->rand256); + } + hci_dev_unlock(hdev); + + return 0; +} + +static int remote_oob_open(struct inode *inode, struct file *file) +{ + return single_open(file, remote_oob_show, inode->i_private); +} + +static const struct file_operations remote_oob_fops = { + .open = remote_oob_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int conn_info_min_age_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val > hdev->conn_info_max_age) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->conn_info_min_age = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_info_min_age_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->conn_info_min_age; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get, + conn_info_min_age_set, "%llu\n"); + +static int conn_info_max_age_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val < hdev->conn_info_min_age) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->conn_info_max_age = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_info_max_age_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->conn_info_max_age; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get, + conn_info_max_age_set, "%llu\n"); + +static ssize_t use_debug_keys_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags) ? 'Y': 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static const struct file_operations use_debug_keys_fops = { + .open = simple_open, + .read = use_debug_keys_read, + .llseek = default_llseek, +}; + +static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = test_bit(HCI_SC_ONLY, &hdev->dev_flags) ? 'Y': 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static const struct file_operations sc_only_mode_fops = { + .open = simple_open, + .read = sc_only_mode_read, + .llseek = default_llseek, +}; + +void hci_debugfs_create_common(struct hci_dev *hdev) +{ + debugfs_create_file("features", 0444, hdev->debugfs, hdev, + &features_fops); + debugfs_create_u16("manufacturer", 0444, hdev->debugfs, + &hdev->manufacturer); + debugfs_create_u8("hci_version", 0444, hdev->debugfs, &hdev->hci_ver); + debugfs_create_u16("hci_revision", 0444, hdev->debugfs, &hdev->hci_rev); + debugfs_create_u8("hardware_error", 0444, hdev->debugfs, + &hdev->hw_error_code); + + debugfs_create_file("device_list", 0444, hdev->debugfs, hdev, + &device_list_fops); + debugfs_create_file("blacklist", 0444, hdev->debugfs, hdev, + &blacklist_fops); + debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops); + debugfs_create_file("remote_oob", 0400, hdev->debugfs, hdev, + &remote_oob_fops); + + debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev, + &conn_info_min_age_fops); + debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev, + &conn_info_max_age_fops); + + if (lmp_ssp_capable(hdev) || lmp_le_capable(hdev)) + debugfs_create_file("use_debug_keys", 0444, hdev->debugfs, + hdev, &use_debug_keys_fops); + + if (lmp_sc_capable(hdev) || lmp_le_capable(hdev)) + debugfs_create_file("sc_only_mode", 0444, hdev->debugfs, + hdev, &sc_only_mode_fops); +} + +static int inquiry_cache_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + struct discovery_state *cache = &hdev->discovery; + struct inquiry_entry *e; + + hci_dev_lock(hdev); + + list_for_each_entry(e, &cache->all, all) { + struct inquiry_data *data = &e->data; + seq_printf(f, "%pMR %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", + &data->bdaddr, + data->pscan_rep_mode, data->pscan_period_mode, + data->pscan_mode, data->dev_class[2], + data->dev_class[1], data->dev_class[0], + __le16_to_cpu(data->clock_offset), + data->rssi, data->ssp_mode, e->timestamp); + } + + hci_dev_unlock(hdev); + + return 0; +} + +static int inquiry_cache_open(struct inode *inode, struct file *file) +{ + return single_open(file, inquiry_cache_show, inode->i_private); +} + +static const struct file_operations inquiry_cache_fops = { + .open = inquiry_cache_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int link_keys_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct link_key *key; + + rcu_read_lock(); + list_for_each_entry_rcu(key, &hdev->link_keys, list) + seq_printf(f, "%pMR %u %*phN %u\n", &key->bdaddr, key->type, + HCI_LINK_KEY_SIZE, key->val, key->pin_len); + rcu_read_unlock(); + + return 0; +} + +static int link_keys_open(struct inode *inode, struct file *file) +{ + return single_open(file, link_keys_show, inode->i_private); +} + +static const struct file_operations link_keys_fops = { + .open = link_keys_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int dev_class_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + + hci_dev_lock(hdev); + seq_printf(f, "0x%.2x%.2x%.2x\n", hdev->dev_class[2], + hdev->dev_class[1], hdev->dev_class[0]); + hci_dev_unlock(hdev); + + return 0; +} + +static int dev_class_open(struct inode *inode, struct file *file) +{ + return single_open(file, dev_class_show, inode->i_private); +} + +static const struct file_operations dev_class_fops = { + .open = dev_class_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int voice_setting_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->voice_setting; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(voice_setting_fops, voice_setting_get, + NULL, "0x%4.4llx\n"); + +static ssize_t ssp_debug_mode_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = hdev->ssp_debug_mode ? 'Y': 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static const struct file_operations ssp_debug_mode_fops = { + .open = simple_open, + .read = ssp_debug_mode_read, + .llseek = default_llseek, +}; + +static int auto_accept_delay_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + hdev->auto_accept_delay = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int auto_accept_delay_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->auto_accept_delay; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get, + auto_accept_delay_set, "%llu\n"); + +static int idle_timeout_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val != 0 && (val < 500 || val > 3600000)) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->idle_timeout = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int idle_timeout_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->idle_timeout; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(idle_timeout_fops, idle_timeout_get, + idle_timeout_set, "%llu\n"); + +static int sniff_min_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val % 2 || val > hdev->sniff_max_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->sniff_min_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int sniff_min_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->sniff_min_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(sniff_min_interval_fops, sniff_min_interval_get, + sniff_min_interval_set, "%llu\n"); + +static int sniff_max_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val % 2 || val < hdev->sniff_min_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->sniff_max_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int sniff_max_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->sniff_max_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get, + sniff_max_interval_set, "%llu\n"); + +void hci_debugfs_create_bredr(struct hci_dev *hdev) +{ + debugfs_create_file("inquiry_cache", 0444, hdev->debugfs, hdev, + &inquiry_cache_fops); + debugfs_create_file("link_keys", 0400, hdev->debugfs, hdev, + &link_keys_fops); + debugfs_create_file("dev_class", 0444, hdev->debugfs, hdev, + &dev_class_fops); + debugfs_create_file("voice_setting", 0444, hdev->debugfs, hdev, + &voice_setting_fops); + + if (lmp_ssp_capable(hdev)) { + debugfs_create_file("ssp_debug_mode", 0444, hdev->debugfs, + hdev, &ssp_debug_mode_fops); + debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs, + hdev, &auto_accept_delay_fops); + } + + if (lmp_sniff_capable(hdev)) { + debugfs_create_file("idle_timeout", 0644, hdev->debugfs, + hdev, &idle_timeout_fops); + debugfs_create_file("sniff_min_interval", 0644, hdev->debugfs, + hdev, &sniff_min_interval_fops); + debugfs_create_file("sniff_max_interval", 0644, hdev->debugfs, + hdev, &sniff_max_interval_fops); + } +} + +static int identity_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + bdaddr_t addr; + u8 addr_type; + + hci_dev_lock(hdev); + + hci_copy_identity_address(hdev, &addr, &addr_type); + + seq_printf(f, "%pMR (type %u) %*phN %pMR\n", &addr, addr_type, + 16, hdev->irk, &hdev->rpa); + + hci_dev_unlock(hdev); + + return 0; +} + +static int identity_open(struct inode *inode, struct file *file) +{ + return single_open(file, identity_show, inode->i_private); +} + +static const struct file_operations identity_fops = { + .open = identity_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int rpa_timeout_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + /* Require the RPA timeout to be at least 30 seconds and at most + * 24 hours. + */ + if (val < 30 || val > (60 * 60 * 24)) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->rpa_timeout = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int rpa_timeout_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->rpa_timeout; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(rpa_timeout_fops, rpa_timeout_get, + rpa_timeout_set, "%llu\n"); + +static int random_address_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + + hci_dev_lock(hdev); + seq_printf(f, "%pMR\n", &hdev->random_addr); + hci_dev_unlock(hdev); + + return 0; +} + +static int random_address_open(struct inode *inode, struct file *file) +{ + return single_open(file, random_address_show, inode->i_private); +} + +static const struct file_operations random_address_fops = { + .open = random_address_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int static_address_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + + hci_dev_lock(hdev); + seq_printf(f, "%pMR\n", &hdev->static_addr); + hci_dev_unlock(hdev); + + return 0; +} + +static int static_address_open(struct inode *inode, struct file *file) +{ + return single_open(file, static_address_show, inode->i_private); +} + +static const struct file_operations static_address_fops = { + .open = static_address_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static ssize_t force_static_address_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ? 'Y': 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t force_static_address_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[32]; + size_t buf_size = min(count, (sizeof(buf)-1)); + bool enable; + + if (test_bit(HCI_UP, &hdev->flags)) + return -EBUSY; + + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + if (strtobool(buf, &enable)) + return -EINVAL; + + if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags)) + return -EALREADY; + + change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags); + + return count; +} + +static const struct file_operations force_static_address_fops = { + .open = simple_open, + .read = force_static_address_read, + .write = force_static_address_write, + .llseek = default_llseek, +}; + +static int white_list_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct bdaddr_list *b; + + hci_dev_lock(hdev); + list_for_each_entry(b, &hdev->le_white_list, list) + seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); + hci_dev_unlock(hdev); + + return 0; +} + +static int white_list_open(struct inode *inode, struct file *file) +{ + return single_open(file, white_list_show, inode->i_private); +} + +static const struct file_operations white_list_fops = { + .open = white_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int identity_resolving_keys_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct smp_irk *irk; + + rcu_read_lock(); + list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) { + seq_printf(f, "%pMR (type %u) %*phN %pMR\n", + &irk->bdaddr, irk->addr_type, + 16, irk->val, &irk->rpa); + } + rcu_read_unlock(); + + return 0; +} + +static int identity_resolving_keys_open(struct inode *inode, struct file *file) +{ + return single_open(file, identity_resolving_keys_show, + inode->i_private); +} + +static const struct file_operations identity_resolving_keys_fops = { + .open = identity_resolving_keys_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int long_term_keys_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct smp_ltk *ltk; + + rcu_read_lock(); + list_for_each_entry_rcu(ltk, &hdev->long_term_keys, list) + seq_printf(f, "%pMR (type %u) %u 0x%02x %u %.4x %.16llx %*phN\n", + <k->bdaddr, ltk->bdaddr_type, ltk->authenticated, + ltk->type, ltk->enc_size, __le16_to_cpu(ltk->ediv), + __le64_to_cpu(ltk->rand), 16, ltk->val); + rcu_read_unlock(); + + return 0; +} + +static int long_term_keys_open(struct inode *inode, struct file *file) +{ + return single_open(file, long_term_keys_show, inode->i_private); +} + +static const struct file_operations long_term_keys_fops = { + .open = long_term_keys_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int conn_min_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_conn_min_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_min_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_conn_min_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_min_interval_fops, conn_min_interval_get, + conn_min_interval_set, "%llu\n"); + +static int conn_max_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_conn_max_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_max_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_conn_max_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get, + conn_max_interval_set, "%llu\n"); + +static int conn_latency_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val > 0x01f3) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_conn_latency = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_latency_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_conn_latency; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_latency_fops, conn_latency_get, + conn_latency_set, "%llu\n"); + +static int supervision_timeout_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x000a || val > 0x0c80) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_supv_timeout = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int supervision_timeout_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_supv_timeout; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(supervision_timeout_fops, supervision_timeout_get, + supervision_timeout_set, "%llu\n"); + +static int adv_channel_map_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x01 || val > 0x07) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_adv_channel_map = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int adv_channel_map_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_adv_channel_map; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get, + adv_channel_map_set, "%llu\n"); + +static int adv_min_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_adv_min_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int adv_min_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_adv_min_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(adv_min_interval_fops, adv_min_interval_get, + adv_min_interval_set, "%llu\n"); + +static int adv_max_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_adv_max_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int adv_max_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_adv_max_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get, + adv_max_interval_set, "%llu\n"); + +void hci_debugfs_create_le(struct hci_dev *hdev) +{ + debugfs_create_file("identity", 0400, hdev->debugfs, hdev, + &identity_fops); + debugfs_create_file("rpa_timeout", 0644, hdev->debugfs, hdev, + &rpa_timeout_fops); + debugfs_create_file("random_address", 0444, hdev->debugfs, hdev, + &random_address_fops); + debugfs_create_file("static_address", 0444, hdev->debugfs, hdev, + &static_address_fops); + + /* For controllers with a public address, provide a debug + * option to force the usage of the configured static + * address. By default the public address is used. + */ + if (bacmp(&hdev->bdaddr, BDADDR_ANY)) + debugfs_create_file("force_static_address", 0644, + hdev->debugfs, hdev, + &force_static_address_fops); + + debugfs_create_u8("white_list_size", 0444, hdev->debugfs, + &hdev->le_white_list_size); + debugfs_create_file("white_list", 0444, hdev->debugfs, hdev, + &white_list_fops); + debugfs_create_file("identity_resolving_keys", 0400, hdev->debugfs, + hdev, &identity_resolving_keys_fops); + debugfs_create_file("long_term_keys", 0400, hdev->debugfs, hdev, + &long_term_keys_fops); + debugfs_create_file("conn_min_interval", 0644, hdev->debugfs, hdev, + &conn_min_interval_fops); + debugfs_create_file("conn_max_interval", 0644, hdev->debugfs, hdev, + &conn_max_interval_fops); + debugfs_create_file("conn_latency", 0644, hdev->debugfs, hdev, + &conn_latency_fops); + debugfs_create_file("supervision_timeout", 0644, hdev->debugfs, hdev, + &supervision_timeout_fops); + debugfs_create_file("adv_channel_map", 0644, hdev->debugfs, hdev, + &adv_channel_map_fops); + debugfs_create_file("adv_min_interval", 0644, hdev->debugfs, hdev, + &adv_min_interval_fops); + debugfs_create_file("adv_max_interval", 0644, hdev->debugfs, hdev, + &adv_max_interval_fops); + debugfs_create_u16("discov_interleaved_timeout", 0644, hdev->debugfs, + &hdev->discov_interleaved_timeout); +} + +void hci_debugfs_create_conn(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + char name[6]; + + if (IS_ERR_OR_NULL(hdev->debugfs)) + return; + + snprintf(name, sizeof(name), "%u", conn->handle); + conn->debugfs = debugfs_create_dir(name, hdev->debugfs); +} diff --git a/net/bluetooth/hci_debugfs.h b/net/bluetooth/hci_debugfs.h new file mode 100644 index 000000000000..fb68efe083c5 --- /dev/null +++ b/net/bluetooth/hci_debugfs.h @@ -0,0 +1,26 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +void hci_debugfs_create_common(struct hci_dev *hdev); +void hci_debugfs_create_bredr(struct hci_dev *hdev); +void hci_debugfs_create_le(struct hci_dev *hdev); +void hci_debugfs_create_conn(struct hci_conn *conn); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 3f2e8b830cbd..a3fb094822b6 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -30,10 +30,15 @@ #include <net/bluetooth/hci_core.h> #include <net/bluetooth/mgmt.h> +#include "hci_request.h" +#include "hci_debugfs.h" #include "a2mp.h" #include "amp.h" #include "smp.h" +#define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ + "\x00\x00\x00\x00\x00\x00\x00\x00" + /* Handle HCI Event packets */ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) @@ -195,7 +200,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) /* Reset all non-persistent flags */ hdev->dev_flags &= ~HCI_PERSISTENT_MASK; - hdev->discovery.state = DISCOVERY_STOPPED; + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hdev->inq_tx_power = HCI_TX_POWER_INVALID; hdev->adv_tx_power = HCI_TX_POWER_INVALID; @@ -212,6 +218,40 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) hci_bdaddr_list_clear(&hdev->le_white_list); } +static void hci_cc_read_stored_link_key(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_read_stored_link_key *rp = (void *)skb->data; + struct hci_cp_read_stored_link_key *sent; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + sent = hci_sent_cmd_data(hdev, HCI_OP_READ_STORED_LINK_KEY); + if (!sent) + return; + + if (!rp->status && sent->read_all == 0x01) { + hdev->stored_max_keys = rp->max_keys; + hdev->stored_num_keys = rp->num_keys; + } +} + +static void hci_cc_delete_stored_link_key(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_delete_stored_link_key *rp = (void *)skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + if (rp->num_keys <= hdev->stored_num_keys) + hdev->stored_num_keys -= rp->num_keys; + else + hdev->stored_num_keys = 0; +} + static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) { __u8 status = *((__u8 *) skb->data); @@ -489,9 +529,7 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb) hdev->features[1][0] &= ~LMP_HOST_SC; } - if (test_bit(HCI_MGMT, &hdev->dev_flags)) - mgmt_sc_enable_complete(hdev, sent->support, status); - else if (!status) { + if (!test_bit(HCI_MGMT, &hdev->dev_flags) && !status) { if (sent->support) set_bit(HCI_SC_ENABLED, &hdev->dev_flags); else @@ -1282,6 +1320,55 @@ static void hci_cc_le_read_supported_states(struct hci_dev *hdev, memcpy(hdev->le_states, rp->le_states, 8); } +static void hci_cc_le_read_def_data_len(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_def_data_len *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->le_def_tx_len = le16_to_cpu(rp->tx_len); + hdev->le_def_tx_time = le16_to_cpu(rp->tx_time); +} + +static void hci_cc_le_write_def_data_len(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_cp_le_write_def_data_len *sent; + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (status) + return; + + sent = hci_sent_cmd_data(hdev, HCI_OP_LE_WRITE_DEF_DATA_LEN); + if (!sent) + return; + + hdev->le_def_tx_len = le16_to_cpu(sent->tx_len); + hdev->le_def_tx_time = le16_to_cpu(sent->tx_time); +} + +static void hci_cc_le_read_max_data_len(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_max_data_len *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->le_max_tx_len = le16_to_cpu(rp->tx_len); + hdev->le_max_tx_time = le16_to_cpu(rp->tx_time); + hdev->le_max_rx_len = le16_to_cpu(rp->rx_len); + hdev->le_max_rx_time = le16_to_cpu(rp->rx_time); +} + static void hci_cc_write_le_host_supported(struct hci_dev *hdev, struct sk_buff *skb) { @@ -1402,6 +1489,21 @@ unlock: hci_dev_unlock(hdev); } +static void hci_cc_write_ssp_debug_mode(struct hci_dev *hdev, struct sk_buff *skb) +{ + u8 status = *((u8 *) skb->data); + u8 *mode; + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (status) + return; + + mode = hci_sent_cmd_data(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE); + if (mode) + hdev->ssp_debug_mode = *mode; +} + static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { BT_DBG("%s status 0x%2.2x", hdev->name, status); @@ -2115,6 +2217,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) } else conn->state = BT_CONNECTED; + hci_debugfs_create_conn(conn); hci_conn_add_sysfs(conn); if (test_bit(HCI_AUTH, &hdev->flags)) @@ -2130,7 +2233,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES, sizeof(cp), &cp); - hci_update_page_scan(hdev, NULL); + hci_update_page_scan(hdev); } /* Set packet type for incoming connection */ @@ -2316,7 +2419,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (test_bit(HCI_CONN_FLUSH_KEY, &conn->flags)) hci_remove_link_key(hdev, &conn->dst); - hci_update_page_scan(hdev, NULL); + hci_update_page_scan(hdev); } params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); @@ -2583,7 +2686,8 @@ static void hci_remote_features_evt(struct hci_dev *hdev, if (conn->state != BT_CONFIG) goto unlock; - if (!ev->status && lmp_ssp_capable(hdev) && lmp_ssp_capable(conn)) { + if (!ev->status && lmp_ext_feat_capable(hdev) && + lmp_ext_feat_capable(conn)) { struct hci_cp_read_remote_ext_features cp; cp.handle = ev->handle; cp.page = 0x01; @@ -2662,6 +2766,14 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_reset(hdev, skb); break; + case HCI_OP_READ_STORED_LINK_KEY: + hci_cc_read_stored_link_key(hdev, skb); + break; + + case HCI_OP_DELETE_STORED_LINK_KEY: + hci_cc_delete_stored_link_key(hdev, skb); + break; + case HCI_OP_WRITE_LOCAL_NAME: hci_cc_write_local_name(hdev, skb); break; @@ -2854,6 +2966,18 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_le_read_supported_states(hdev, skb); break; + case HCI_OP_LE_READ_DEF_DATA_LEN: + hci_cc_le_read_def_data_len(hdev, skb); + break; + + case HCI_OP_LE_WRITE_DEF_DATA_LEN: + hci_cc_le_write_def_data_len(hdev, skb); + break; + + case HCI_OP_LE_READ_MAX_DATA_LEN: + hci_cc_le_read_max_data_len(hdev, skb); + break; + case HCI_OP_WRITE_LE_HOST_SUPPORTED: hci_cc_write_le_host_supported(hdev, skb); break; @@ -2874,6 +2998,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_read_tx_power(hdev, skb); break; + case HCI_OP_WRITE_SSP_DEBUG_MODE: + hci_cc_write_ssp_debug_mode(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); break; @@ -2992,7 +3120,9 @@ static void hci_hardware_error_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_hardware_error *ev = (void *) skb->data; - BT_ERR("%s hardware error 0x%2.2x", hdev->name, ev->code); + hdev->hw_error_code = ev->code; + + queue_work(hdev->req_workqueue, &hdev->error_reset); } static void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -3584,6 +3714,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; + hci_debugfs_create_conn(conn); hci_conn_add_sysfs(conn); break; @@ -3750,6 +3881,52 @@ static u8 hci_get_auth_req(struct hci_conn *conn) return (conn->remote_auth & ~0x01) | (conn->auth_type & 0x01); } +static u8 bredr_oob_data_present(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + struct oob_data *data; + + data = hci_find_remote_oob_data(hdev, &conn->dst, BDADDR_BREDR); + if (!data) + return 0x00; + + if (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags)) { + if (bredr_sc_enabled(hdev)) { + /* When Secure Connections is enabled, then just + * return the present value stored with the OOB + * data. The stored value contains the right present + * information. However it can only be trusted when + * not in Secure Connection Only mode. + */ + if (!test_bit(HCI_SC_ONLY, &hdev->dev_flags)) + return data->present; + + /* When Secure Connections Only mode is enabled, then + * the P-256 values are required. If they are not + * available, then do not declare that OOB data is + * present. + */ + if (!memcmp(data->rand256, ZERO_KEY, 16) || + !memcmp(data->hash256, ZERO_KEY, 16)) + return 0x00; + + return 0x02; + } + + /* When Secure Connections is not enabled or actually + * not supported by the hardware, then check that if + * P-192 data values are present. + */ + if (!memcmp(data->rand192, ZERO_KEY, 16) || + !memcmp(data->hash192, ZERO_KEY, 16)) + return 0x00; + + return 0x01; + } + + return 0x00; +} + static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_io_capa_request *ev = (void *) skb->data; @@ -3801,12 +3978,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->auth_type &= HCI_AT_NO_BONDING_MITM; cp.authentication = conn->auth_type; - - if (hci_find_remote_oob_data(hdev, &conn->dst, BDADDR_BREDR) && - (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags))) - cp.oob_data = 0x01; - else - cp.oob_data = 0x00; + cp.oob_data = bredr_oob_data_present(conn); hci_send_cmd(hdev, HCI_OP_IO_CAPABILITY_REPLY, sizeof(cp), &cp); @@ -4058,33 +4230,39 @@ static void hci_remote_oob_data_request_evt(struct hci_dev *hdev, goto unlock; data = hci_find_remote_oob_data(hdev, &ev->bdaddr, BDADDR_BREDR); - if (data) { - if (bredr_sc_enabled(hdev)) { - struct hci_cp_remote_oob_ext_data_reply cp; - - bacpy(&cp.bdaddr, &ev->bdaddr); - memcpy(cp.hash192, data->hash192, sizeof(cp.hash192)); - memcpy(cp.rand192, data->rand192, sizeof(cp.rand192)); - memcpy(cp.hash256, data->hash256, sizeof(cp.hash256)); - memcpy(cp.rand256, data->rand256, sizeof(cp.rand256)); + if (!data) { + struct hci_cp_remote_oob_data_neg_reply cp; - hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_EXT_DATA_REPLY, - sizeof(cp), &cp); - } else { - struct hci_cp_remote_oob_data_reply cp; + bacpy(&cp.bdaddr, &ev->bdaddr); + hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_NEG_REPLY, + sizeof(cp), &cp); + goto unlock; + } - bacpy(&cp.bdaddr, &ev->bdaddr); - memcpy(cp.hash, data->hash192, sizeof(cp.hash)); - memcpy(cp.rand, data->rand192, sizeof(cp.rand)); + if (bredr_sc_enabled(hdev)) { + struct hci_cp_remote_oob_ext_data_reply cp; - hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_REPLY, - sizeof(cp), &cp); + bacpy(&cp.bdaddr, &ev->bdaddr); + if (test_bit(HCI_SC_ONLY, &hdev->dev_flags)) { + memset(cp.hash192, 0, sizeof(cp.hash192)); + memset(cp.rand192, 0, sizeof(cp.rand192)); + } else { + memcpy(cp.hash192, data->hash192, sizeof(cp.hash192)); + memcpy(cp.rand192, data->rand192, sizeof(cp.rand192)); } + memcpy(cp.hash256, data->hash256, sizeof(cp.hash256)); + memcpy(cp.rand256, data->rand256, sizeof(cp.rand256)); + + hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_EXT_DATA_REPLY, + sizeof(cp), &cp); } else { - struct hci_cp_remote_oob_data_neg_reply cp; + struct hci_cp_remote_oob_data_reply cp; bacpy(&cp.bdaddr, &ev->bdaddr); - hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_NEG_REPLY, + memcpy(cp.hash, data->hash192, sizeof(cp.hash)); + memcpy(cp.rand, data->rand192, sizeof(cp.rand)); + + hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_REPLY, sizeof(cp), &cp); } @@ -4124,6 +4302,7 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev, hcon->disc_timeout = HCI_DISCONN_TIMEOUT; hci_conn_drop(hcon); + hci_debugfs_create_conn(hcon); hci_conn_add_sysfs(hcon); amp_physical_cfm(bredr_hcon, hcon); @@ -4330,6 +4509,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->le_conn_latency = le16_to_cpu(ev->latency); conn->le_supv_timeout = le16_to_cpu(ev->supervision_timeout); + hci_debugfs_create_conn(conn); hci_conn_add_sysfs(conn); hci_proto_connect_cfm(conn, ev->status); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c new file mode 100644 index 000000000000..b59f92c6df0c --- /dev/null +++ b/net/bluetooth/hci_request.c @@ -0,0 +1,556 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> + +#include "smp.h" +#include "hci_request.h" + +void hci_req_init(struct hci_request *req, struct hci_dev *hdev) +{ + skb_queue_head_init(&req->cmd_q); + req->hdev = hdev; + req->err = 0; +} + +int hci_req_run(struct hci_request *req, hci_req_complete_t complete) +{ + struct hci_dev *hdev = req->hdev; + struct sk_buff *skb; + unsigned long flags; + + BT_DBG("length %u", skb_queue_len(&req->cmd_q)); + + /* If an error occurred during request building, remove all HCI + * commands queued on the HCI request queue. + */ + if (req->err) { + skb_queue_purge(&req->cmd_q); + return req->err; + } + + /* Do not allow empty requests */ + if (skb_queue_empty(&req->cmd_q)) + return -ENODATA; + + skb = skb_peek_tail(&req->cmd_q); + bt_cb(skb)->req.complete = complete; + + spin_lock_irqsave(&hdev->cmd_q.lock, flags); + skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); + spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); + + queue_work(hdev->workqueue, &hdev->cmd_work); + + return 0; +} + +struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param) +{ + int len = HCI_COMMAND_HDR_SIZE + plen; + struct hci_command_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(len, GFP_ATOMIC); + if (!skb) + return NULL; + + hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE); + hdr->opcode = cpu_to_le16(opcode); + hdr->plen = plen; + + if (plen) + memcpy(skb_put(skb, plen), param, plen); + + BT_DBG("skb len %d", skb->len); + + bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; + bt_cb(skb)->opcode = opcode; + + return skb; +} + +/* Queue a command to an asynchronous HCI request */ +void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, + const void *param, u8 event) +{ + struct hci_dev *hdev = req->hdev; + struct sk_buff *skb; + + BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); + + /* If an error occurred during request building, there is no point in + * queueing the HCI command. We can simply return. + */ + if (req->err) + return; + + skb = hci_prepare_cmd(hdev, opcode, plen, param); + if (!skb) { + BT_ERR("%s no memory for command (opcode 0x%4.4x)", + hdev->name, opcode); + req->err = -ENOMEM; + return; + } + + if (skb_queue_empty(&req->cmd_q)) + bt_cb(skb)->req.start = true; + + bt_cb(skb)->req.event = event; + + skb_queue_tail(&req->cmd_q, skb); +} + +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, + const void *param) +{ + hci_req_add_ev(req, opcode, plen, param, 0); +} + +void hci_req_add_le_scan_disable(struct hci_request *req) +{ + struct hci_cp_le_set_scan_enable cp; + + memset(&cp, 0, sizeof(cp)); + cp.enable = LE_SCAN_DISABLE; + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); +} + +static void add_to_white_list(struct hci_request *req, + struct hci_conn_params *params) +{ + struct hci_cp_le_add_to_white_list cp; + + cp.bdaddr_type = params->addr_type; + bacpy(&cp.bdaddr, ¶ms->addr); + + hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp); +} + +static u8 update_white_list(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_conn_params *params; + struct bdaddr_list *b; + uint8_t white_list_entries = 0; + + /* Go through the current white list programmed into the + * controller one by one and check if that address is still + * in the list of pending connections or list of devices to + * report. If not present in either list, then queue the + * command to remove it from the controller. + */ + list_for_each_entry(b, &hdev->le_white_list, list) { + struct hci_cp_le_del_from_white_list cp; + + if (hci_pend_le_action_lookup(&hdev->pend_le_conns, + &b->bdaddr, b->bdaddr_type) || + hci_pend_le_action_lookup(&hdev->pend_le_reports, + &b->bdaddr, b->bdaddr_type)) { + white_list_entries++; + continue; + } + + cp.bdaddr_type = b->bdaddr_type; + bacpy(&cp.bdaddr, &b->bdaddr); + + hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, + sizeof(cp), &cp); + } + + /* Since all no longer valid white list entries have been + * removed, walk through the list of pending connections + * and ensure that any new device gets programmed into + * the controller. + * + * If the list of the devices is larger than the list of + * available white list entries in the controller, then + * just abort and return filer policy value to not use the + * white list. + */ + list_for_each_entry(params, &hdev->pend_le_conns, action) { + if (hci_bdaddr_list_lookup(&hdev->le_white_list, + ¶ms->addr, params->addr_type)) + continue; + + if (white_list_entries >= hdev->le_white_list_size) { + /* Select filter policy to accept all advertising */ + return 0x00; + } + + if (hci_find_irk_by_addr(hdev, ¶ms->addr, + params->addr_type)) { + /* White list can not be used with RPAs */ + return 0x00; + } + + white_list_entries++; + add_to_white_list(req, params); + } + + /* After adding all new pending connections, walk through + * the list of pending reports and also add these to the + * white list if there is still space. + */ + list_for_each_entry(params, &hdev->pend_le_reports, action) { + if (hci_bdaddr_list_lookup(&hdev->le_white_list, + ¶ms->addr, params->addr_type)) + continue; + + if (white_list_entries >= hdev->le_white_list_size) { + /* Select filter policy to accept all advertising */ + return 0x00; + } + + if (hci_find_irk_by_addr(hdev, ¶ms->addr, + params->addr_type)) { + /* White list can not be used with RPAs */ + return 0x00; + } + + white_list_entries++; + add_to_white_list(req, params); + } + + /* Select filter policy to use white list */ + return 0x01; +} + +void hci_req_add_le_passive_scan(struct hci_request *req) +{ + struct hci_cp_le_set_scan_param param_cp; + struct hci_cp_le_set_scan_enable enable_cp; + struct hci_dev *hdev = req->hdev; + u8 own_addr_type; + u8 filter_policy; + + /* Set require_privacy to false since no SCAN_REQ are send + * during passive scanning. Not using an non-resolvable address + * here is important so that peer devices using direct + * advertising with our address will be correctly reported + * by the controller. + */ + if (hci_update_random_address(req, false, &own_addr_type)) + return; + + /* Adding or removing entries from the white list must + * happen before enabling scanning. The controller does + * not allow white list modification while scanning. + */ + filter_policy = update_white_list(req); + + /* When the controller is using random resolvable addresses and + * with that having LE privacy enabled, then controllers with + * Extended Scanner Filter Policies support can now enable support + * for handling directed advertising. + * + * So instead of using filter polices 0x00 (no whitelist) + * and 0x01 (whitelist enabled) use the new filter policies + * 0x02 (no whitelist) and 0x03 (whitelist enabled). + */ + if (test_bit(HCI_PRIVACY, &hdev->dev_flags) && + (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)) + filter_policy |= 0x02; + + memset(¶m_cp, 0, sizeof(param_cp)); + param_cp.type = LE_SCAN_PASSIVE; + param_cp.interval = cpu_to_le16(hdev->le_scan_interval); + param_cp.window = cpu_to_le16(hdev->le_scan_window); + param_cp.own_address_type = own_addr_type; + param_cp.filter_policy = filter_policy; + hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), + ¶m_cp); + + memset(&enable_cp, 0, sizeof(enable_cp)); + enable_cp.enable = LE_SCAN_ENABLE; + enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), + &enable_cp); +} + +static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) +{ + struct hci_dev *hdev = req->hdev; + + /* If we're advertising or initiating an LE connection we can't + * go ahead and change the random address at this time. This is + * because the eventual initiator address used for the + * subsequently created connection will be undefined (some + * controllers use the new address and others the one we had + * when the operation started). + * + * In this kind of scenario skip the update and let the random + * address be updated at the next cycle. + */ + if (test_bit(HCI_LE_ADV, &hdev->dev_flags) || + hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) { + BT_DBG("Deferring random address update"); + set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + return; + } + + hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); +} + +int hci_update_random_address(struct hci_request *req, bool require_privacy, + u8 *own_addr_type) +{ + struct hci_dev *hdev = req->hdev; + int err; + + /* If privacy is enabled use a resolvable private address. If + * current RPA has expired or there is something else than + * the current RPA in use, then generate a new one. + */ + if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) { + int to; + + *own_addr_type = ADDR_LE_DEV_RANDOM; + + if (!test_and_clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags) && + !bacmp(&hdev->random_addr, &hdev->rpa)) + return 0; + + err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); + if (err < 0) { + BT_ERR("%s failed to generate new RPA", hdev->name); + return err; + } + + set_random_addr(req, &hdev->rpa); + + to = msecs_to_jiffies(hdev->rpa_timeout * 1000); + queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to); + + return 0; + } + + /* In case of required privacy without resolvable private address, + * use an non-resolvable private address. This is useful for active + * scanning and non-connectable advertising. + */ + if (require_privacy) { + bdaddr_t nrpa; + + while (true) { + /* The non-resolvable private address is generated + * from random six bytes with the two most significant + * bits cleared. + */ + get_random_bytes(&nrpa, 6); + nrpa.b[5] &= 0x3f; + + /* The non-resolvable private address shall not be + * equal to the public address. + */ + if (bacmp(&hdev->bdaddr, &nrpa)) + break; + } + + *own_addr_type = ADDR_LE_DEV_RANDOM; + set_random_addr(req, &nrpa); + return 0; + } + + /* If forcing static address is in use or there is no public + * address use the static address as random address (but skip + * the HCI command if the current random address is already the + * static one. + * + * In case BR/EDR has been disabled on a dual-mode controller + * and a static address has been configured, then use that + * address instead of the public BR/EDR address. + */ + if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || + !bacmp(&hdev->bdaddr, BDADDR_ANY) || + (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + bacmp(&hdev->static_addr, BDADDR_ANY))) { + *own_addr_type = ADDR_LE_DEV_RANDOM; + if (bacmp(&hdev->static_addr, &hdev->random_addr)) + hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, + &hdev->static_addr); + return 0; + } + + /* Neither privacy nor static address is being used so use a + * public address. + */ + *own_addr_type = ADDR_LE_DEV_PUBLIC; + + return 0; +} + +static bool disconnected_whitelist_entries(struct hci_dev *hdev) +{ + struct bdaddr_list *b; + + list_for_each_entry(b, &hdev->whitelist, list) { + struct hci_conn *conn; + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr); + if (!conn) + return true; + + if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG) + return true; + } + + return false; +} + +void __hci_update_page_scan(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + u8 scan; + + if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + return; + + if (!hdev_is_powered(hdev)) + return; + + if (mgmt_powering_down(hdev)) + return; + + if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags) || + disconnected_whitelist_entries(hdev)) + scan = SCAN_PAGE; + else + scan = SCAN_DISABLED; + + if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE)) + return; + + if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) + scan |= SCAN_INQUIRY; + + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); +} + +void hci_update_page_scan(struct hci_dev *hdev) +{ + struct hci_request req; + + hci_req_init(&req, hdev); + __hci_update_page_scan(&req); + hci_req_run(&req, NULL); +} + +/* This function controls the background scanning based on hdev->pend_le_conns + * list. If there are pending LE connection we start the background scanning, + * otherwise we stop it. + * + * This function requires the caller holds hdev->lock. + */ +void __hci_update_background_scan(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_conn *conn; + + if (!test_bit(HCI_UP, &hdev->flags) || + test_bit(HCI_INIT, &hdev->flags) || + test_bit(HCI_SETUP, &hdev->dev_flags) || + test_bit(HCI_CONFIG, &hdev->dev_flags) || + test_bit(HCI_AUTO_OFF, &hdev->dev_flags) || + test_bit(HCI_UNREGISTER, &hdev->dev_flags)) + return; + + /* No point in doing scanning if LE support hasn't been enabled */ + if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + return; + + /* If discovery is active don't interfere with it */ + if (hdev->discovery.state != DISCOVERY_STOPPED) + return; + + /* Reset RSSI and UUID filters when starting background scanning + * since these filters are meant for service discovery only. + * + * The Start Discovery and Start Service Discovery operations + * ensure to set proper values for RSSI threshold and UUID + * filter list. So it is safe to just reset them here. + */ + hci_discovery_filter_clear(hdev); + + if (list_empty(&hdev->pend_le_conns) && + list_empty(&hdev->pend_le_reports)) { + /* If there is no pending LE connections or devices + * to be scanned for, we should stop the background + * scanning. + */ + + /* If controller is not scanning we are done. */ + if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + return; + + hci_req_add_le_scan_disable(req); + + BT_DBG("%s stopping background scanning", hdev->name); + } else { + /* If there is at least one pending LE connection, we should + * keep the background scan running. + */ + + /* If controller is connecting, we should not start scanning + * since some controllers are not able to scan and connect at + * the same time. + */ + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (conn) + return; + + /* If controller is currently scanning, we stop it to ensure we + * don't miss any advertising (due to duplicates filter). + */ + if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + hci_req_add_le_scan_disable(req); + + hci_req_add_le_passive_scan(req); + + BT_DBG("%s starting background scanning", hdev->name); + } +} + +static void update_background_scan_complete(struct hci_dev *hdev, u8 status, + u16 opcode) +{ + if (status) + BT_DBG("HCI request failed to update background scanning: " + "status 0x%2.2x", status); +} + +void hci_update_background_scan(struct hci_dev *hdev) +{ + int err; + struct hci_request req; + + hci_req_init(&req, hdev); + + __hci_update_background_scan(&req); + + err = hci_req_run(&req, update_background_scan_complete); + if (err && err != -ENODATA) + BT_ERR("Failed to run HCI request: err %d", err); +} diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h new file mode 100644 index 000000000000..adf074d33544 --- /dev/null +++ b/net/bluetooth/hci_request.h @@ -0,0 +1,54 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +struct hci_request { + struct hci_dev *hdev; + struct sk_buff_head cmd_q; + + /* If something goes wrong when building the HCI request, the error + * value is stored in this field. + */ + int err; +}; + +void hci_req_init(struct hci_request *req, struct hci_dev *hdev); +int hci_req_run(struct hci_request *req, hci_req_complete_t complete); +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, + const void *param); +void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, + const void *param, u8 event); +void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); + +struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param); + +void hci_req_add_le_scan_disable(struct hci_request *req); +void hci_req_add_le_passive_scan(struct hci_request *req); + +void hci_update_page_scan(struct hci_dev *hdev); +void __hci_update_page_scan(struct hci_request *req); + +int hci_update_random_address(struct hci_request *req, bool require_privacy, + u8 *own_addr_type); + +void hci_update_background_scan(struct hci_dev *hdev); +void __hci_update_background_scan(struct hci_request *req); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 2c245fdf319a..1d65c5be7c82 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -216,11 +216,39 @@ void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk) read_unlock(&hci_sk_list.lock); } +static void queue_monitor_skb(struct sk_buff *skb) +{ + struct sock *sk; + + BT_DBG("len %d", skb->len); + + read_lock(&hci_sk_list.lock); + + sk_for_each(sk, &hci_sk_list.head) { + struct sk_buff *nskb; + + if (sk->sk_state != BT_BOUND) + continue; + + if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR) + continue; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + continue; + + if (sock_queue_rcv_skb(sk, nskb)) + kfree_skb(nskb); + } + + read_unlock(&hci_sk_list.lock); +} + /* Send frame to monitor socket */ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) { - struct sock *sk; struct sk_buff *skb_copy = NULL; + struct hci_mon_hdr *hdr; __le16 opcode; if (!atomic_read(&monitor_promisc)) @@ -251,74 +279,21 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) return; } - read_lock(&hci_sk_list.lock); - - sk_for_each(sk, &hci_sk_list.head) { - struct sk_buff *nskb; - - if (sk->sk_state != BT_BOUND) - continue; - - if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR) - continue; - - if (!skb_copy) { - struct hci_mon_hdr *hdr; - - /* Create a private copy with headroom */ - skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE, - GFP_ATOMIC, true); - if (!skb_copy) - continue; - - /* Put header before the data */ - hdr = (void *) skb_push(skb_copy, HCI_MON_HDR_SIZE); - hdr->opcode = opcode; - hdr->index = cpu_to_le16(hdev->id); - hdr->len = cpu_to_le16(skb->len); - } - - nskb = skb_clone(skb_copy, GFP_ATOMIC); - if (!nskb) - continue; - - if (sock_queue_rcv_skb(sk, nskb)) - kfree_skb(nskb); - } + /* Create a private copy with headroom */ + skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE, GFP_ATOMIC, true); + if (!skb_copy) + return; - read_unlock(&hci_sk_list.lock); + /* Put header before the data */ + hdr = (void *) skb_push(skb_copy, HCI_MON_HDR_SIZE); + hdr->opcode = opcode; + hdr->index = cpu_to_le16(hdev->id); + hdr->len = cpu_to_le16(skb->len); + queue_monitor_skb(skb_copy); kfree_skb(skb_copy); } -static void send_monitor_event(struct sk_buff *skb) -{ - struct sock *sk; - - BT_DBG("len %d", skb->len); - - read_lock(&hci_sk_list.lock); - - sk_for_each(sk, &hci_sk_list.head) { - struct sk_buff *nskb; - - if (sk->sk_state != BT_BOUND) - continue; - - if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR) - continue; - - nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) - continue; - - if (sock_queue_rcv_skb(sk, nskb)) - kfree_skb(nskb); - } - - read_unlock(&hci_sk_list.lock); -} - static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) { struct hci_mon_hdr *hdr; @@ -422,7 +397,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) skb = create_monitor_event(hdev, event); if (skb) { - send_monitor_event(skb); + queue_monitor_skb(skb); kfree_skb(skb); } } @@ -1230,6 +1205,8 @@ int __init hci_sock_init(void) { int err; + BUILD_BUG_ON(sizeof(struct sockaddr_hci) > sizeof(struct sockaddr)); + err = proto_register(&hci_sk_proto, 0); if (err < 0) return err; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index d04dc0095736..6ba33f9631e8 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -63,10 +63,10 @@ static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err); static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff_head *skbs, u8 event); -static inline __u8 bdaddr_type(struct hci_conn *hcon, __u8 type) +static inline u8 bdaddr_type(u8 link_type, u8 bdaddr_type) { - if (hcon->type == LE_LINK) { - if (type == ADDR_LE_DEV_PUBLIC) + if (link_type == LE_LINK) { + if (bdaddr_type == ADDR_LE_DEV_PUBLIC) return BDADDR_LE_PUBLIC; else return BDADDR_LE_RANDOM; @@ -75,6 +75,16 @@ static inline __u8 bdaddr_type(struct hci_conn *hcon, __u8 type) return BDADDR_BREDR; } +static inline u8 bdaddr_src_type(struct hci_conn *hcon) +{ + return bdaddr_type(hcon->type, hcon->src_type); +} + +static inline u8 bdaddr_dst_type(struct hci_conn *hcon) +{ + return bdaddr_type(hcon->type, hcon->dst_type); +} + /* ---- L2CAP channels ---- */ static struct l2cap_chan *__l2cap_get_chan_by_dcid(struct l2cap_conn *conn, @@ -646,7 +656,7 @@ static void l2cap_conn_update_id_addr(struct work_struct *work) list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); bacpy(&chan->dst, &hcon->dst); - chan->dst_type = bdaddr_type(hcon, hcon->dst_type); + chan->dst_type = bdaddr_dst_type(hcon); l2cap_chan_unlock(chan); } @@ -3790,8 +3800,8 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, bacpy(&chan->src, &conn->hcon->src); bacpy(&chan->dst, &conn->hcon->dst); - chan->src_type = bdaddr_type(conn->hcon, conn->hcon->src_type); - chan->dst_type = bdaddr_type(conn->hcon, conn->hcon->dst_type); + chan->src_type = bdaddr_src_type(conn->hcon); + chan->dst_type = bdaddr_dst_type(conn->hcon); chan->psm = psm; chan->dcid = scid; chan->local_amp_id = amp_id; @@ -5441,8 +5451,8 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, bacpy(&chan->src, &conn->hcon->src); bacpy(&chan->dst, &conn->hcon->dst); - chan->src_type = bdaddr_type(conn->hcon, conn->hcon->src_type); - chan->dst_type = bdaddr_type(conn->hcon, conn->hcon->dst_type); + chan->src_type = bdaddr_src_type(conn->hcon); + chan->dst_type = bdaddr_dst_type(conn->hcon); chan->psm = psm; chan->dcid = scid; chan->omtu = mtu; @@ -6881,7 +6891,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) */ if (hcon->type == LE_LINK && hci_bdaddr_list_lookup(&hcon->hdev->blacklist, &hcon->dst, - bdaddr_type(hcon, hcon->dst_type))) { + bdaddr_dst_type(hcon))) { kfree_skb(skb); return; } @@ -6968,7 +6978,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) if (test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags) && (bredr_sc_enabled(hcon->hdev) || - test_bit(HCI_FORCE_LESC, &hcon->hdev->dbg_flags))) + test_bit(HCI_FORCE_BREDR_SMP, &hcon->hdev->dbg_flags))) conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR; mutex_init(&conn->ident_lock); @@ -7123,7 +7133,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, /* Update source addr of the socket */ bacpy(&chan->src, &hcon->src); - chan->src_type = bdaddr_type(hcon, hcon->src_type); + chan->src_type = bdaddr_src_type(hcon); __l2cap_chan_add(conn, chan); @@ -7197,8 +7207,10 @@ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr) * global list (by passing NULL as first parameter). */ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, - bdaddr_t *src, u8 link_type) + struct hci_conn *hcon) { + u8 src_type = bdaddr_src_type(hcon); + read_lock(&chan_list_lock); if (c) @@ -7211,11 +7223,9 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, continue; if (c->state != BT_LISTEN) continue; - if (bacmp(&c->src, src) && bacmp(&c->src, BDADDR_ANY)) + if (bacmp(&c->src, &hcon->src) && bacmp(&c->src, BDADDR_ANY)) continue; - if (link_type == ACL_LINK && c->src_type != BDADDR_BREDR) - continue; - if (link_type == LE_LINK && c->src_type == BDADDR_BREDR) + if (src_type != c->src_type) continue; l2cap_chan_hold(c); @@ -7246,7 +7256,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) if (!conn) return; - dst_type = bdaddr_type(hcon, hcon->dst_type); + dst_type = bdaddr_dst_type(hcon); /* If device is blocked, do not create channels for it */ if (hci_bdaddr_list_lookup(&hdev->blacklist, &hcon->dst, dst_type)) @@ -7257,7 +7267,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) * we left off, because the list lock would prevent calling the * potentially sleeping l2cap_chan_lock() function. */ - pchan = l2cap_global_fixed_chan(NULL, &hdev->bdaddr, hcon->type); + pchan = l2cap_global_fixed_chan(NULL, hcon); while (pchan) { struct l2cap_chan *chan, *next; @@ -7270,7 +7280,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) if (chan) { bacpy(&chan->src, &hcon->src); bacpy(&chan->dst, &hcon->dst); - chan->src_type = bdaddr_type(hcon, hcon->src_type); + chan->src_type = bdaddr_src_type(hcon); chan->dst_type = dst_type; __l2cap_chan_add(conn, chan); @@ -7278,8 +7288,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) l2cap_chan_unlock(pchan); next: - next = l2cap_global_fixed_chan(pchan, &hdev->bdaddr, - hcon->type); + next = l2cap_global_fixed_chan(pchan, hcon); l2cap_chan_put(pchan); pchan = next; } @@ -7527,8 +7536,8 @@ static int l2cap_debugfs_show(struct seq_file *f, void *p) read_lock(&chan_list_lock); list_for_each_entry(c, &chan_list, global_l) { - seq_printf(f, "%pMR %pMR %d %d 0x%4.4x 0x%4.4x %d %d %d %d\n", - &c->src, &c->dst, + seq_printf(f, "%pMR (%u) %pMR (%u) %d %d 0x%4.4x 0x%4.4x %d %d %d %d\n", + &c->src, c->src_type, &c->dst, c->dst_type, c->state, __le16_to_cpu(c->psm), c->scid, c->dcid, c->imtu, c->omtu, c->sec_level, c->mode); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index f65caf41953f..60694f0f4c73 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -302,7 +302,7 @@ done: static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; long timeo; int err = 0; @@ -316,8 +316,6 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); - if (sk->sk_state != BT_LISTEN) { err = -EBADFD; break; @@ -338,10 +336,11 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, } release_sock(sk); - timeo = schedule_timeout(timeo); + + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + lock_sock_nested(sk, L2CAP_NESTING_PARENT); } - __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) @@ -1614,6 +1613,8 @@ int __init l2cap_init_sockets(void) { int err; + BUILD_BUG_ON(sizeof(struct sockaddr_l2) > sizeof(struct sockaddr)); + err = proto_register(&l2cap_proto, 0); if (err < 0) return err; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 693ce8bcd06e..9ec5390c85eb 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -32,6 +32,7 @@ #include <net/bluetooth/l2cap.h> #include <net/bluetooth/mgmt.h> +#include "hci_request.h" #include "smp.h" #define MGMT_VERSION 1 @@ -130,6 +131,9 @@ static const u16 mgmt_events[] = { #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) +#define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ + "\x00\x00\x00\x00\x00\x00\x00\x00" + struct pending_cmd { struct list_head list; u16 opcode; @@ -138,7 +142,7 @@ struct pending_cmd { size_t param_len; struct sock *sk; void *user_data; - void (*cmd_complete)(struct pending_cmd *cmd, u8 status); + int (*cmd_complete)(struct pending_cmd *cmd, u8 status); }; /* HCI to MGMT error code conversion table */ @@ -569,8 +573,7 @@ static u32 get_supported_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_HS; } - if (lmp_sc_capable(hdev) || - test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) + if (lmp_sc_capable(hdev)) settings |= MGMT_SETTING_SECURE_CONN; } @@ -1251,7 +1254,7 @@ static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) sizeof(settings)); } -static void clean_up_hci_complete(struct hci_dev *hdev, u8 status) +static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("%s status 0x%02x", hdev->name, status); @@ -1486,16 +1489,16 @@ static void cmd_complete_rsp(struct pending_cmd *cmd, void *data) cmd_status_rsp(cmd, data); } -static void generic_cmd_complete(struct pending_cmd *cmd, u8 status) +static int generic_cmd_complete(struct pending_cmd *cmd, u8 status) { - cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, - cmd->param_len); + return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + cmd->param, cmd->param_len); } -static void addr_cmd_complete(struct pending_cmd *cmd, u8 status) +static int addr_cmd_complete(struct pending_cmd *cmd, u8 status) { - cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, - sizeof(struct mgmt_addr_info)); + return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, + sizeof(struct mgmt_addr_info)); } static u8 mgmt_bredr_support(struct hci_dev *hdev) @@ -1518,7 +1521,8 @@ static u8 mgmt_le_support(struct hci_dev *hdev) return MGMT_STATUS_SUCCESS; } -static void set_discoverable_complete(struct hci_dev *hdev, u8 status) +static void set_discoverable_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct pending_cmd *cmd; struct mgmt_mode *cp; @@ -1566,7 +1570,7 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status) * entries. */ hci_req_init(&req, hdev); - hci_update_page_scan(hdev, &req); + __hci_update_page_scan(&req); update_class(&req); hci_req_run(&req, NULL); @@ -1777,7 +1781,8 @@ static void write_fast_connectable(struct hci_request *req, bool enable) hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); } -static void set_connectable_complete(struct hci_dev *hdev, u8 status) +static void set_connectable_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct pending_cmd *cmd; struct mgmt_mode *cp; @@ -1813,7 +1818,7 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status) if (conn_changed || discov_changed) { new_settings(hdev, cmd->sk); - hci_update_page_scan(hdev, NULL); + hci_update_page_scan(hdev); if (discov_changed) mgmt_update_adv_data(hdev); hci_update_background_scan(hdev); @@ -1847,7 +1852,7 @@ static int set_connectable_update_settings(struct hci_dev *hdev, return err; if (changed) { - hci_update_page_scan(hdev, NULL); + hci_update_page_scan(hdev); hci_update_background_scan(hdev); return new_settings(hdev, sk); } @@ -2195,7 +2200,7 @@ unlock: return err; } -static void le_enable_complete(struct hci_dev *hdev, u8 status) +static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct cmd_lookup match = { NULL, hdev }; @@ -2227,9 +2232,8 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status) hci_req_init(&req, hdev); update_adv_data(&req); update_scan_rsp_data(&req); + __hci_update_background_scan(&req); hci_req_run(&req, NULL); - - hci_update_background_scan(hdev); } unlock: @@ -2386,7 +2390,7 @@ unlock: hci_dev_unlock(hdev); } -static void add_uuid_complete(struct hci_dev *hdev, u8 status) +static void add_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("status 0x%02x", status); @@ -2465,7 +2469,7 @@ static bool enable_service_cache(struct hci_dev *hdev) return false; } -static void remove_uuid_complete(struct hci_dev *hdev, u8 status) +static void remove_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("status 0x%02x", status); @@ -2550,7 +2554,7 @@ unlock: return err; } -static void set_class_complete(struct hci_dev *hdev, u8 status) +static void set_class_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("status 0x%02x", status); @@ -3098,16 +3102,17 @@ static struct pending_cmd *find_pairing(struct hci_conn *conn) return NULL; } -static void pairing_complete(struct pending_cmd *cmd, u8 status) +static int pairing_complete(struct pending_cmd *cmd, u8 status) { struct mgmt_rp_pair_device rp; struct hci_conn *conn = cmd->user_data; + int err; bacpy(&rp.addr.bdaddr, &conn->dst); rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type); - cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, status, - &rp, sizeof(rp)); + err = cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, status, + &rp, sizeof(rp)); /* So we don't get further callbacks for this connection */ conn->connect_cfm_cb = NULL; @@ -3122,6 +3127,8 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status) clear_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags); hci_conn_put(conn); + + return err; } void mgmt_smp_complete(struct hci_conn *conn, bool complete) @@ -3481,7 +3488,7 @@ static void update_name(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); } -static void set_name_complete(struct hci_dev *hdev, u8 status) +static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct mgmt_cp_set_local_name *cp; struct pending_cmd *cmd; @@ -3629,10 +3636,16 @@ unlock: static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { + struct mgmt_addr_info *addr = data; int err; BT_DBG("%s ", hdev->name); + if (!bdaddr_type_is_valid(addr->type)) + return cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS, addr, + sizeof(*addr)); + hci_dev_lock(hdev); if (len == MGMT_ADD_REMOTE_OOB_DATA_SIZE) { @@ -3659,28 +3672,53 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, status, &cp->addr, sizeof(cp->addr)); } else if (len == MGMT_ADD_REMOTE_OOB_EXT_DATA_SIZE) { struct mgmt_cp_add_remote_oob_ext_data *cp = data; - u8 *rand192, *hash192; + u8 *rand192, *hash192, *rand256, *hash256; u8 status; - if (cp->addr.type != BDADDR_BREDR) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_ADD_REMOTE_OOB_DATA, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); - goto unlock; - } - if (bdaddr_type_is_le(cp->addr.type)) { + /* Enforce zero-valued 192-bit parameters as + * long as legacy SMP OOB isn't implemented. + */ + if (memcmp(cp->rand192, ZERO_KEY, 16) || + memcmp(cp->hash192, ZERO_KEY, 16)) { + err = cmd_complete(sk, hdev->id, + MGMT_OP_ADD_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS, + addr, sizeof(*addr)); + goto unlock; + } + rand192 = NULL; hash192 = NULL; } else { - rand192 = cp->rand192; - hash192 = cp->hash192; + /* In case one of the P-192 values is set to zero, + * then just disable OOB data for P-192. + */ + if (!memcmp(cp->rand192, ZERO_KEY, 16) || + !memcmp(cp->hash192, ZERO_KEY, 16)) { + rand192 = NULL; + hash192 = NULL; + } else { + rand192 = cp->rand192; + hash192 = cp->hash192; + } + } + + /* In case one of the P-256 values is set to zero, then just + * disable OOB data for P-256. + */ + if (!memcmp(cp->rand256, ZERO_KEY, 16) || + !memcmp(cp->hash256, ZERO_KEY, 16)) { + rand256 = NULL; + hash256 = NULL; + } else { + rand256 = cp->rand256; + hash256 = cp->hash256; } err = hci_add_remote_oob_data(hdev, &cp->addr.bdaddr, cp->addr.type, hash192, rand192, - cp->hash256, cp->rand256); + hash256, rand256); if (err < 0) status = MGMT_STATUS_FAILED; else @@ -3832,7 +3870,8 @@ static bool trigger_discovery(struct hci_request *req, u8 *status) return true; } -static void start_discovery_complete(struct hci_dev *hdev, u8 status) +static void start_discovery_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct pending_cmd *cmd; unsigned long timeout; @@ -3857,6 +3896,9 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) hci_discovery_set_state(hdev, DISCOVERY_FINDING); + /* If the scan involves LE scan, pick proper timeout to schedule + * hdev->le_scan_disable that will stop it. + */ switch (hdev->discovery.type) { case DISCOV_TYPE_LE: timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); @@ -3873,9 +3915,23 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) break; } - if (timeout) + if (timeout) { + /* When service discovery is used and the controller has + * a strict duplicate filter, it is important to remember + * the start and duration of the scan. This is required + * for restarting scanning during the discovery phase. + */ + if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, + &hdev->quirks) && + (hdev->discovery.uuid_count > 0 || + hdev->discovery.rssi != HCI_RSSI_INVALID)) { + hdev->discovery.scan_start = jiffies; + hdev->discovery.scan_duration = timeout; + } + queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, timeout); + } unlock: hci_dev_unlock(hdev); @@ -3947,9 +4003,10 @@ failed: return err; } -static void service_discovery_cmd_complete(struct pending_cmd *cmd, u8 status) +static int service_discovery_cmd_complete(struct pending_cmd *cmd, u8 status) { - cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, 1); + return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + cmd->param, 1); } static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, @@ -4060,7 +4117,7 @@ failed: return err; } -static void stop_discovery_complete(struct hci_dev *hdev, u8 status) +static void stop_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct pending_cmd *cmd; @@ -4286,7 +4343,8 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, return err; } -static void set_advertising_complete(struct hci_dev *hdev, u8 status) +static void set_advertising_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct cmd_lookup match = { NULL, hdev }; @@ -4493,7 +4551,8 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev, return err; } -static void fast_connectable_complete(struct hci_dev *hdev, u8 status) +static void fast_connectable_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct pending_cmd *cmd; @@ -4591,7 +4650,7 @@ unlock: return err; } -static void set_bredr_complete(struct hci_dev *hdev, u8 status) +static void set_bredr_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct pending_cmd *cmd; @@ -4675,6 +4734,28 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, MGMT_STATUS_REJECTED); goto unlock; + } else { + /* When configuring a dual-mode controller to operate + * with LE only and using a static address, then switching + * BR/EDR back on is not allowed. + * + * Dual-mode controllers shall operate with the public + * address as its identity address for BR/EDR and LE. So + * reject the attempt to create an invalid configuration. + * + * The same restrictions applies when secure connections + * has been enabled. For BR/EDR this is a controller feature + * while for LE it is a host stack feature. This means that + * switching BR/EDR back on when secure connections has been + * enabled is not a supported transaction. + */ + if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + (bacmp(&hdev->static_addr, BDADDR_ANY) || + test_bit(HCI_SC_ENABLED, &hdev->dev_flags))) { + err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_REJECTED); + goto unlock; + } } if (mgmt_pending_find(MGMT_OP_SET_BREDR, hdev)) { @@ -4697,7 +4778,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_req_init(&req, hdev); write_fast_connectable(&req, false); - hci_update_page_scan(hdev, &req); + __hci_update_page_scan(&req); /* Since only the advertising data flags will change, there * is no need to update the scan response data. @@ -4713,30 +4794,80 @@ unlock: return err; } +static void sc_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + struct pending_cmd *cmd; + struct mgmt_mode *cp; + + BT_DBG("%s status %u", hdev->name, status); + + hci_dev_lock(hdev); + + cmd = mgmt_pending_find(MGMT_OP_SET_SECURE_CONN, hdev); + if (!cmd) + goto unlock; + + if (status) { + cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status)); + goto remove; + } + + cp = cmd->param; + + switch (cp->val) { + case 0x00: + clear_bit(HCI_SC_ENABLED, &hdev->dev_flags); + clear_bit(HCI_SC_ONLY, &hdev->dev_flags); + break; + case 0x01: + set_bit(HCI_SC_ENABLED, &hdev->dev_flags); + clear_bit(HCI_SC_ONLY, &hdev->dev_flags); + break; + case 0x02: + set_bit(HCI_SC_ENABLED, &hdev->dev_flags); + set_bit(HCI_SC_ONLY, &hdev->dev_flags); + break; + } + + send_settings_rsp(cmd->sk, MGMT_OP_SET_SECURE_CONN, hdev); + new_settings(hdev, cmd->sk); + +remove: + mgmt_pending_remove(cmd); +unlock: + hci_dev_unlock(hdev); +} + static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct pending_cmd *cmd; + struct hci_request req; u8 val; int err; BT_DBG("request for %s", hdev->name); - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags) && - !lmp_sc_capable(hdev) && !test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) + if (!lmp_sc_capable(hdev) && + !test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, MGMT_STATUS_NOT_SUPPORTED); + if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + lmp_sc_capable(hdev) && + !test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, + MGMT_STATUS_REJECTED); + if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); - if (!hdev_is_powered(hdev) || - (!lmp_sc_capable(hdev) && - !test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) || + if (!hdev_is_powered(hdev) || !lmp_sc_capable(hdev) || !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { bool changed; @@ -4783,17 +4914,14 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, goto failed; } - err = hci_send_cmd(hdev, HCI_OP_WRITE_SC_SUPPORT, 1, &val); + hci_req_init(&req, hdev); + hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, 1, &val); + err = hci_req_run(&req, sc_enable_complete); if (err < 0) { mgmt_pending_remove(cmd); goto failed; } - if (cp->val == 0x02) - set_bit(HCI_SC_ONLY, &hdev->dev_flags); - else - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); - failed: hci_dev_unlock(hdev); return err; @@ -5091,10 +5219,11 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, return err; } -static void conn_info_cmd_complete(struct pending_cmd *cmd, u8 status) +static int conn_info_cmd_complete(struct pending_cmd *cmd, u8 status) { struct hci_conn *conn = cmd->user_data; struct mgmt_rp_get_conn_info rp; + int err; memcpy(&rp.addr, cmd->param, sizeof(rp.addr)); @@ -5108,14 +5237,17 @@ static void conn_info_cmd_complete(struct pending_cmd *cmd, u8 status) rp.max_tx_power = HCI_TX_POWER_INVALID; } - cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, - &rp, sizeof(rp)); + err = cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, + &rp, sizeof(rp)); hci_conn_drop(conn); hci_conn_put(conn); + + return err; } -static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status) +static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status, + u16 opcode) { struct hci_cp_read_rssi *cp; struct pending_cmd *cmd; @@ -5286,11 +5418,12 @@ unlock: return err; } -static void clock_info_cmd_complete(struct pending_cmd *cmd, u8 status) +static int clock_info_cmd_complete(struct pending_cmd *cmd, u8 status) { struct hci_conn *conn = cmd->user_data; struct mgmt_rp_get_clock_info rp; struct hci_dev *hdev; + int err; memset(&rp, 0, sizeof(rp)); memcpy(&rp.addr, &cmd->param, sizeof(rp.addr)); @@ -5310,15 +5443,18 @@ static void clock_info_cmd_complete(struct pending_cmd *cmd, u8 status) } complete: - cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, sizeof(rp)); + err = cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, + sizeof(rp)); if (conn) { hci_conn_drop(conn); hci_conn_put(conn); } + + return err; } -static void get_clock_info_complete(struct hci_dev *hdev, u8 status) +static void get_clock_info_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct hci_cp_read_clock *hci_cp; struct pending_cmd *cmd; @@ -5425,6 +5561,65 @@ unlock: return err; } +static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) +{ + struct hci_conn *conn; + + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr); + if (!conn) + return false; + + if (conn->dst_type != type) + return false; + + if (conn->state != BT_CONNECTED) + return false; + + return true; +} + +/* This function requires the caller holds hdev->lock */ +static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr, + u8 addr_type, u8 auto_connect) +{ + struct hci_dev *hdev = req->hdev; + struct hci_conn_params *params; + + params = hci_conn_params_add(hdev, addr, addr_type); + if (!params) + return -EIO; + + if (params->auto_connect == auto_connect) + return 0; + + list_del_init(¶ms->action); + + switch (auto_connect) { + case HCI_AUTO_CONN_DISABLED: + case HCI_AUTO_CONN_LINK_LOSS: + __hci_update_background_scan(req); + break; + case HCI_AUTO_CONN_REPORT: + list_add(¶ms->action, &hdev->pend_le_reports); + __hci_update_background_scan(req); + break; + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: + if (!is_connected(hdev, addr, addr_type)) { + list_add(¶ms->action, &hdev->pend_le_conns); + __hci_update_background_scan(req); + } + break; + } + + params->auto_connect = auto_connect; + + BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type, + auto_connect); + + return 0; +} + static void device_added(struct sock *sk, struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type, u8 action) { @@ -5437,10 +5632,31 @@ static void device_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk); } +static void add_device_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + struct pending_cmd *cmd; + + BT_DBG("status 0x%02x", status); + + hci_dev_lock(hdev); + + cmd = mgmt_pending_find(MGMT_OP_ADD_DEVICE, hdev); + if (!cmd) + goto unlock; + + cmd->cmd_complete(cmd, mgmt_status(status)); + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + static int add_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_device *cp = data; + struct pending_cmd *cmd; + struct hci_request req; u8 auto_conn, addr_type; int err; @@ -5457,14 +5673,24 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); + hci_req_init(&req, hdev); + hci_dev_lock(hdev); + cmd = mgmt_pending_add(sk, MGMT_OP_ADD_DEVICE, hdev, data, len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + cmd->cmd_complete = addr_cmd_complete; + if (cp->addr.type == BDADDR_BREDR) { /* Only incoming connections action is supported for now */ if (cp->action != 0x01) { - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } @@ -5473,7 +5699,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, if (err) goto unlock; - hci_update_page_scan(hdev, NULL); + __hci_update_page_scan(&req); goto added; } @@ -5493,19 +5719,25 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, /* If the connection parameters don't exist for this device, * they will be created and configured with defaults. */ - if (hci_conn_params_set(hdev, &cp->addr.bdaddr, addr_type, + if (hci_conn_params_set(&req, &cp->addr.bdaddr, addr_type, auto_conn) < 0) { - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, - MGMT_STATUS_FAILED, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, MGMT_STATUS_FAILED); + mgmt_pending_remove(cmd); goto unlock; } added: device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action); - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, - MGMT_STATUS_SUCCESS, &cp->addr, sizeof(cp->addr)); + err = hci_req_run(&req, add_device_complete); + if (err < 0) { + /* ENODATA means no HCI commands were needed (e.g. if + * the adapter is powered off). + */ + if (err == -ENODATA) + err = cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS); + mgmt_pending_remove(cmd); + } unlock: hci_dev_unlock(hdev); @@ -5523,24 +5755,55 @@ static void device_removed(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_DEVICE_REMOVED, hdev, &ev, sizeof(ev), sk); } +static void remove_device_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + struct pending_cmd *cmd; + + BT_DBG("status 0x%02x", status); + + hci_dev_lock(hdev); + + cmd = mgmt_pending_find(MGMT_OP_REMOVE_DEVICE, hdev); + if (!cmd) + goto unlock; + + cmd->cmd_complete(cmd, mgmt_status(status)); + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + static int remove_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_remove_device *cp = data; + struct pending_cmd *cmd; + struct hci_request req; int err; BT_DBG("%s", hdev->name); + hci_req_init(&req, hdev); + hci_dev_lock(hdev); + cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_DEVICE, hdev, data, len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + cmd->cmd_complete = addr_cmd_complete; + if (bacmp(&cp->addr.bdaddr, BDADDR_ANY)) { struct hci_conn_params *params; u8 addr_type; if (!bdaddr_type_is_valid(cp->addr.type)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } @@ -5549,14 +5812,13 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, &cp->addr.bdaddr, cp->addr.type); if (err) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } - hci_update_page_scan(hdev, NULL); + __hci_update_page_scan(&req); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); @@ -5571,23 +5833,23 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, addr_type); if (!params) { - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } if (params->auto_connect == HCI_AUTO_CONN_DISABLED) { - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } list_del(¶ms->action); list_del(¶ms->list); kfree(params); - hci_update_background_scan(hdev); + __hci_update_background_scan(&req); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); } else { @@ -5595,9 +5857,9 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, struct bdaddr_list *b, *btmp; if (cp->addr.type) { - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } @@ -5607,7 +5869,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, kfree(b); } - hci_update_page_scan(hdev, NULL); + __hci_update_page_scan(&req); list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) { if (p->auto_connect == HCI_AUTO_CONN_DISABLED) @@ -5620,12 +5882,19 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, BT_DBG("All LE connection parameters were removed"); - hci_update_background_scan(hdev); + __hci_update_background_scan(&req); } complete: - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_SUCCESS, &cp->addr, sizeof(cp->addr)); + err = hci_req_run(&req, remove_device_complete); + if (err < 0) { + /* ENODATA means no HCI commands were needed (e.g. if + * the adapter is powered off). + */ + if (err == -ENODATA) + err = cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS); + mgmt_pending_remove(cmd); + } unlock: hci_dev_unlock(hdev); @@ -6037,8 +6306,9 @@ void mgmt_index_removed(struct hci_dev *hdev) } /* This function requires the caller holds hdev->lock */ -static void restart_le_actions(struct hci_dev *hdev) +static void restart_le_actions(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; struct hci_conn_params *p; list_for_each_entry(p, &hdev->le_conn_params, list) { @@ -6060,18 +6330,25 @@ static void restart_le_actions(struct hci_dev *hdev) } } - hci_update_background_scan(hdev); + __hci_update_background_scan(req); } -static void powered_complete(struct hci_dev *hdev, u8 status) +static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct cmd_lookup match = { NULL, hdev }; BT_DBG("status 0x%02x", status); - hci_dev_lock(hdev); + if (!status) { + /* Register the available SMP channels (BR/EDR and LE) only + * when successfully powering on the controller. This late + * registration is required so that LE SMP can clearly + * decide if the public address or static address is used. + */ + smp_register(hdev); + } - restart_le_actions(hdev); + hci_dev_lock(hdev); mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); @@ -6092,14 +6369,16 @@ static int powered_update_hci(struct hci_dev *hdev) if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && !lmp_host_ssp_capable(hdev)) { - u8 ssp = 1; + u8 mode = 0x01; - hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, 1, &ssp); - } + hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode); + + if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) { + u8 support = 0x01; - if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) { - u8 sc = 0x01; - hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, sizeof(sc), &sc); + hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, + sizeof(support), &support); + } } if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags) && @@ -6130,6 +6409,8 @@ static int powered_update_hci(struct hci_dev *hdev) if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) enable_advertising(&req); + + restart_le_actions(&req); } link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags); @@ -6139,7 +6420,7 @@ static int powered_update_hci(struct hci_dev *hdev) if (lmp_bredr_capable(hdev)) { write_fast_connectable(&req, false); - hci_update_page_scan(hdev, &req); + __hci_update_page_scan(&req); update_class(&req); update_name(&req); update_eir(&req); @@ -6817,43 +7098,6 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) hci_req_run(&req, NULL); } -void mgmt_sc_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) -{ - struct cmd_lookup match = { NULL, hdev }; - bool changed = false; - - if (status) { - u8 mgmt_err = mgmt_status(status); - - if (enable) { - if (test_and_clear_bit(HCI_SC_ENABLED, - &hdev->dev_flags)) - new_settings(hdev, NULL); - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); - } - - mgmt_pending_foreach(MGMT_OP_SET_SECURE_CONN, hdev, - cmd_status_rsp, &mgmt_err); - return; - } - - if (enable) { - changed = !test_and_set_bit(HCI_SC_ENABLED, &hdev->dev_flags); - } else { - changed = test_and_clear_bit(HCI_SC_ENABLED, &hdev->dev_flags); - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); - } - - mgmt_pending_foreach(MGMT_OP_SET_SECURE_CONN, hdev, - settings_rsp, &match); - - if (changed) - new_settings(hdev, match.sk); - - if (match.sk) - sock_put(match.sk); -} - static void sk_lookup(struct pending_cmd *cmd, void *data) { struct cmd_lookup *match = data; @@ -6924,28 +7168,21 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192, cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, mgmt_status(status)); } else { - if (bredr_sc_enabled(hdev) && hash256 && rand256) { - struct mgmt_rp_read_local_oob_ext_data rp; + struct mgmt_rp_read_local_oob_data rp; + size_t rp_size = sizeof(rp); - memcpy(rp.hash192, hash192, sizeof(rp.hash192)); - memcpy(rp.rand192, rand192, sizeof(rp.rand192)); + memcpy(rp.hash192, hash192, sizeof(rp.hash192)); + memcpy(rp.rand192, rand192, sizeof(rp.rand192)); + if (bredr_sc_enabled(hdev) && hash256 && rand256) { memcpy(rp.hash256, hash256, sizeof(rp.hash256)); memcpy(rp.rand256, rand256, sizeof(rp.rand256)); - - cmd_complete(cmd->sk, hdev->id, - MGMT_OP_READ_LOCAL_OOB_DATA, 0, - &rp, sizeof(rp)); } else { - struct mgmt_rp_read_local_oob_data rp; - - memcpy(rp.hash, hash192, sizeof(rp.hash)); - memcpy(rp.rand, rand192, sizeof(rp.rand)); - - cmd_complete(cmd->sk, hdev->id, - MGMT_OP_READ_LOCAL_OOB_DATA, 0, - &rp, sizeof(rp)); + rp_size -= sizeof(rp.hash256) + sizeof(rp.rand256); } + + cmd_complete(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, 0, + &rp, rp_size); } mgmt_pending_remove(cmd); @@ -7018,6 +7255,21 @@ static bool eir_has_uuids(u8 *eir, u16 eir_len, u16 uuid_count, u8 (*uuids)[16]) return false; } +static void restart_le_scan(struct hci_dev *hdev) +{ + /* If controller is not scanning we are done. */ + if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + return; + + if (time_after(jiffies + DISCOV_LE_RESTART_DELAY, + hdev->discovery.scan_start + + hdev->discovery.scan_duration)) + return; + + queue_delayed_work(hdev->workqueue, &hdev->le_scan_restart, + DISCOV_LE_RESTART_DELAY); +} + void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) @@ -7040,14 +7292,18 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, /* When using service discovery with a RSSI threshold, then check * if such a RSSI threshold is specified. If a RSSI threshold has - * been specified, then all results with a RSSI smaller than the - * RSSI threshold will be dropped. + * been specified, and HCI_QUIRK_STRICT_DUPLICATE_FILTER is not set, + * then all results with a RSSI smaller than the RSSI threshold will be + * dropped. If the quirk is set, let it through for further processing, + * as we might need to restart the scan. * * For BR/EDR devices (pre 1.2) providing no RSSI during inquiry, * the results are also dropped. */ if (hdev->discovery.rssi != HCI_RSSI_INVALID && - (rssi < hdev->discovery.rssi || rssi == HCI_RSSI_INVALID)) + (rssi == HCI_RSSI_INVALID || + (rssi < hdev->discovery.rssi && + !test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)))) return; /* Make sure that the buffer is big enough. The 5 extra bytes @@ -7066,7 +7322,8 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, * However when using service discovery, the value 127 will be * returned when the RSSI is not available. */ - if (rssi == HCI_RSSI_INVALID && !hdev->discovery.report_invalid_rssi) + if (rssi == HCI_RSSI_INVALID && !hdev->discovery.report_invalid_rssi && + link_type == ACL_LINK) rssi = 0; bacpy(&ev->addr.bdaddr, bdaddr); @@ -7081,12 +7338,20 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, * kept and checking possible scan response data * will be skipped. */ - if (hdev->discovery.uuid_count > 0) + if (hdev->discovery.uuid_count > 0) { match = eir_has_uuids(eir, eir_len, hdev->discovery.uuid_count, hdev->discovery.uuids); - else + /* If duplicate filtering does not report RSSI changes, + * then restart scanning to ensure updated result with + * updated RSSI values. + */ + if (match && test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, + &hdev->quirks)) + restart_le_scan(hdev); + } else { match = true; + } if (!match && !scan_rsp_len) return; @@ -7119,6 +7384,14 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, hdev->discovery.uuid_count, hdev->discovery.uuids)) return; + + /* If duplicate filtering does not report RSSI changes, + * then restart scanning to ensure updated result with + * updated RSSI values. + */ + if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, + &hdev->quirks)) + restart_le_scan(hdev); } /* Append scan response data to event */ @@ -7132,6 +7405,14 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, return; } + /* Validate the reported RSSI value against the RSSI threshold once more + * incase HCI_QUIRK_STRICT_DUPLICATE_FILTER forced a restart of LE + * scanning. + */ + if (hdev->discovery.rssi != HCI_RSSI_INVALID && + rssi < hdev->discovery.rssi) + return; + ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len); ev_size = sizeof(*ev) + eir_len + scan_rsp_len; @@ -7174,7 +7455,7 @@ void mgmt_discovering(struct hci_dev *hdev, u8 discovering) mgmt_event(MGMT_EV_DISCOVERING, hdev, &ev, sizeof(ev), NULL); } -static void adv_enable_complete(struct hci_dev *hdev, u8 status) +static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("%s status %u", hdev->name, status); } diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 73f8c75abe6e..4fea24275b17 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -771,7 +771,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bacpy(&addr.l2_bdaddr, dst); addr.l2_family = AF_BLUETOOTH; - addr.l2_psm = cpu_to_le16(RFCOMM_PSM); + addr.l2_psm = cpu_to_le16(L2CAP_PSM_RFCOMM); addr.l2_cid = 0; addr.l2_bdaddr_type = BDADDR_BREDR; *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK); @@ -2038,7 +2038,7 @@ static int rfcomm_add_listener(bdaddr_t *ba) /* Bind socket */ bacpy(&addr.l2_bdaddr, ba); addr.l2_family = AF_BLUETOOTH; - addr.l2_psm = cpu_to_le16(RFCOMM_PSM); + addr.l2_psm = cpu_to_le16(L2CAP_PSM_RFCOMM); addr.l2_cid = 0; addr.l2_bdaddr_type = BDADDR_BREDR; err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 2348176401a0..3c6d2c8ac1a4 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -468,7 +468,7 @@ done: static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; long timeo; int err = 0; @@ -487,8 +487,6 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); - if (sk->sk_state != BT_LISTEN) { err = -EBADFD; break; @@ -509,10 +507,11 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f } release_sock(sk); - timeo = schedule_timeout(timeo); + + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); } - __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) @@ -1058,6 +1057,8 @@ int __init rfcomm_init_sockets(void) { int err; + BUILD_BUG_ON(sizeof(struct sockaddr_rc) > sizeof(struct sockaddr)); + err = proto_register(&rfcomm_proto, 0); if (err < 0) return err; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 30e5ea3f1ad3..76321b546e84 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -618,7 +618,7 @@ done: static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flags) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *ch; long timeo; int err = 0; @@ -632,8 +632,6 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); - if (sk->sk_state != BT_LISTEN) { err = -EBADFD; break; @@ -654,10 +652,10 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag } release_sock(sk); - timeo = schedule_timeout(timeo); + + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); } - __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) @@ -1184,6 +1182,8 @@ int __init sco_init(void) { int err; + BUILD_BUG_ON(sizeof(struct sockaddr_sco) > sizeof(struct sockaddr)); + err = proto_register(&sco_proto, 0); if (err < 0) return err; diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c new file mode 100644 index 000000000000..378f4064952c --- /dev/null +++ b/net/bluetooth/selftest.c @@ -0,0 +1,244 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> + +#include "ecc.h" +#include "smp.h" +#include "selftest.h" + +#if IS_ENABLED(CONFIG_BT_SELFTEST_ECDH) + +static const u8 priv_a_1[32] __initconst = { + 0xbd, 0x1a, 0x3c, 0xcd, 0xa6, 0xb8, 0x99, 0x58, + 0x99, 0xb7, 0x40, 0xeb, 0x7b, 0x60, 0xff, 0x4a, + 0x50, 0x3f, 0x10, 0xd2, 0xe3, 0xb3, 0xc9, 0x74, + 0x38, 0x5f, 0xc5, 0xa3, 0xd4, 0xf6, 0x49, 0x3f, +}; +static const u8 priv_b_1[32] __initconst = { + 0xfd, 0xc5, 0x7f, 0xf4, 0x49, 0xdd, 0x4f, 0x6b, + 0xfb, 0x7c, 0x9d, 0xf1, 0xc2, 0x9a, 0xcb, 0x59, + 0x2a, 0xe7, 0xd4, 0xee, 0xfb, 0xfc, 0x0a, 0x90, + 0x9a, 0xbb, 0xf6, 0x32, 0x3d, 0x8b, 0x18, 0x55, +}; +static const u8 pub_a_1[64] __initconst = { + 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, + 0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef, + 0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e, + 0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20, + + 0x8b, 0xd2, 0x89, 0x15, 0xd0, 0x8e, 0x1c, 0x74, + 0x24, 0x30, 0xed, 0x8f, 0xc2, 0x45, 0x63, 0x76, + 0x5c, 0x15, 0x52, 0x5a, 0xbf, 0x9a, 0x32, 0x63, + 0x6d, 0xeb, 0x2a, 0x65, 0x49, 0x9c, 0x80, 0xdc, +}; +static const u8 pub_b_1[64] __initconst = { + 0x90, 0xa1, 0xaa, 0x2f, 0xb2, 0x77, 0x90, 0x55, + 0x9f, 0xa6, 0x15, 0x86, 0xfd, 0x8a, 0xb5, 0x47, + 0x00, 0x4c, 0x9e, 0xf1, 0x84, 0x22, 0x59, 0x09, + 0x96, 0x1d, 0xaf, 0x1f, 0xf0, 0xf0, 0xa1, 0x1e, + + 0x4a, 0x21, 0xb1, 0x15, 0xf9, 0xaf, 0x89, 0x5f, + 0x76, 0x36, 0x8e, 0xe2, 0x30, 0x11, 0x2d, 0x47, + 0x60, 0x51, 0xb8, 0x9a, 0x3a, 0x70, 0x56, 0x73, + 0x37, 0xad, 0x9d, 0x42, 0x3e, 0xf3, 0x55, 0x4c, +}; +static const u8 dhkey_1[32] __initconst = { + 0x98, 0xa6, 0xbf, 0x73, 0xf3, 0x34, 0x8d, 0x86, + 0xf1, 0x66, 0xf8, 0xb4, 0x13, 0x6b, 0x79, 0x99, + 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, + 0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec, +}; + +static const u8 priv_a_2[32] __initconst = { + 0x63, 0x76, 0x45, 0xd0, 0xf7, 0x73, 0xac, 0xb7, + 0xff, 0xdd, 0x03, 0x72, 0xb9, 0x72, 0x85, 0xb4, + 0x41, 0xb6, 0x5d, 0x0c, 0x5d, 0x54, 0x84, 0x60, + 0x1a, 0xa3, 0x9a, 0x3c, 0x69, 0x16, 0xa5, 0x06, +}; +static const u8 priv_b_2[32] __initconst = { + 0xba, 0x30, 0x55, 0x50, 0x19, 0xa2, 0xca, 0xa3, + 0xa5, 0x29, 0x08, 0xc6, 0xb5, 0x03, 0x88, 0x7e, + 0x03, 0x2b, 0x50, 0x73, 0xd4, 0x2e, 0x50, 0x97, + 0x64, 0xcd, 0x72, 0x0d, 0x67, 0xa0, 0x9a, 0x52, +}; +static const u8 pub_a_2[64] __initconst = { + 0xdd, 0x78, 0x5c, 0x74, 0x03, 0x9b, 0x7e, 0x98, + 0xcb, 0x94, 0x87, 0x4a, 0xad, 0xfa, 0xf8, 0xd5, + 0x43, 0x3e, 0x5c, 0xaf, 0xea, 0xb5, 0x4c, 0xf4, + 0x9e, 0x80, 0x79, 0x57, 0x7b, 0xa4, 0x31, 0x2c, + + 0x4f, 0x5d, 0x71, 0x43, 0x77, 0x43, 0xf8, 0xea, + 0xd4, 0x3e, 0xbd, 0x17, 0x91, 0x10, 0x21, 0xd0, + 0x1f, 0x87, 0x43, 0x8e, 0x40, 0xe2, 0x52, 0xcd, + 0xbe, 0xdf, 0x98, 0x38, 0x18, 0x12, 0x95, 0x91, +}; +static const u8 pub_b_2[64] __initconst = { + 0xcc, 0x00, 0x65, 0xe1, 0xf5, 0x6c, 0x0d, 0xcf, + 0xec, 0x96, 0x47, 0x20, 0x66, 0xc9, 0xdb, 0x84, + 0x81, 0x75, 0xa8, 0x4d, 0xc0, 0xdf, 0xc7, 0x9d, + 0x1b, 0x3f, 0x3d, 0xf2, 0x3f, 0xe4, 0x65, 0xf4, + + 0x79, 0xb2, 0xec, 0xd8, 0xca, 0x55, 0xa1, 0xa8, + 0x43, 0x4d, 0x6b, 0xca, 0x10, 0xb0, 0xc2, 0x01, + 0xc2, 0x33, 0x4e, 0x16, 0x24, 0xc4, 0xef, 0xee, + 0x99, 0xd8, 0xbb, 0xbc, 0x48, 0xd0, 0x01, 0x02, +}; +static const u8 dhkey_2[32] __initconst = { + 0x69, 0xeb, 0x21, 0x32, 0xf2, 0xc6, 0x05, 0x41, + 0x60, 0x19, 0xcd, 0x5e, 0x94, 0xe1, 0xe6, 0x5f, + 0x33, 0x07, 0xe3, 0x38, 0x4b, 0x68, 0xe5, 0x62, + 0x3f, 0x88, 0x6d, 0x2f, 0x3a, 0x84, 0x85, 0xab, +}; + +static const u8 priv_a_3[32] __initconst = { + 0xbd, 0x1a, 0x3c, 0xcd, 0xa6, 0xb8, 0x99, 0x58, + 0x99, 0xb7, 0x40, 0xeb, 0x7b, 0x60, 0xff, 0x4a, + 0x50, 0x3f, 0x10, 0xd2, 0xe3, 0xb3, 0xc9, 0x74, + 0x38, 0x5f, 0xc5, 0xa3, 0xd4, 0xf6, 0x49, 0x3f, +}; +static const u8 pub_a_3[64] __initconst = { + 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, + 0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef, + 0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e, + 0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20, + + 0x8b, 0xd2, 0x89, 0x15, 0xd0, 0x8e, 0x1c, 0x74, + 0x24, 0x30, 0xed, 0x8f, 0xc2, 0x45, 0x63, 0x76, + 0x5c, 0x15, 0x52, 0x5a, 0xbf, 0x9a, 0x32, 0x63, + 0x6d, 0xeb, 0x2a, 0x65, 0x49, 0x9c, 0x80, 0xdc, +}; +static const u8 dhkey_3[32] __initconst = { + 0x2d, 0xab, 0x00, 0x48, 0xcb, 0xb3, 0x7b, 0xda, + 0x55, 0x7b, 0x8b, 0x72, 0xa8, 0x57, 0x87, 0xc3, + 0x87, 0x27, 0x99, 0x32, 0xfc, 0x79, 0x5f, 0xae, + 0x7c, 0x1c, 0xf9, 0x49, 0xe6, 0xd7, 0xaa, 0x70, +}; + +static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32], + const u8 pub_a[64], const u8 pub_b[64], + const u8 dhkey[32]) +{ + u8 dhkey_a[32], dhkey_b[32]; + + ecdh_shared_secret(pub_b, priv_a, dhkey_a); + ecdh_shared_secret(pub_a, priv_b, dhkey_b); + + if (memcmp(dhkey_a, dhkey, 32)) + return -EINVAL; + + if (memcmp(dhkey_b, dhkey, 32)) + return -EINVAL; + + return 0; +} + +static int __init test_ecdh(void) +{ + ktime_t calltime, delta, rettime; + unsigned long long duration; + int err; + + calltime = ktime_get(); + + err = test_ecdh_sample(priv_a_1, priv_b_1, pub_a_1, pub_b_1, dhkey_1); + if (err) { + BT_ERR("ECDH sample 1 failed"); + return err; + } + + err = test_ecdh_sample(priv_a_2, priv_b_2, pub_a_2, pub_b_2, dhkey_2); + if (err) { + BT_ERR("ECDH sample 2 failed"); + return err; + } + + err = test_ecdh_sample(priv_a_3, priv_a_3, pub_a_3, pub_a_3, dhkey_3); + if (err) { + BT_ERR("ECDH sample 3 failed"); + return err; + } + + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + duration = (unsigned long long) ktime_to_ns(delta) >> 10; + + BT_INFO("ECDH test passed in %llu usecs", duration); + + return 0; +} + +#else + +static inline int test_ecdh(void) +{ + return 0; +} + +#endif + +static int __init run_selftest(void) +{ + int err; + + BT_INFO("Starting self testing"); + + err = test_ecdh(); + if (err) + goto done; + + err = bt_selftest_smp(); + +done: + BT_INFO("Finished self testing"); + + return err; +} + +#if IS_MODULE(CONFIG_BT) + +/* This is run when CONFIG_BT_SELFTEST=y and CONFIG_BT=m and is just a + * wrapper to allow running this at module init. + * + * If CONFIG_BT_SELFTEST=n, then this code is not compiled at all. + */ +int __init bt_selftest(void) +{ + return run_selftest(); +} + +#else + +/* This is run when CONFIG_BT_SELFTEST=y and CONFIG_BT=y and is run + * via late_initcall() as last item in the initialization sequence. + * + * If CONFIG_BT_SELFTEST=n, then this code is not compiled at all. + */ +static int __init bt_selftest_init(void) +{ + return run_selftest(); +} +late_initcall(bt_selftest_init); + +#endif diff --git a/net/bluetooth/selftest.h b/net/bluetooth/selftest.h new file mode 100644 index 000000000000..2aa0a346a913 --- /dev/null +++ b/net/bluetooth/selftest.h @@ -0,0 +1,45 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +#if IS_ENABLED(CONFIG_BT_SELFTEST) && IS_MODULE(CONFIG_BT) + +/* When CONFIG_BT_SELFTEST=y and the CONFIG_BT=m, then the self testing + * is run at module loading time. + */ +int bt_selftest(void); + +#else + +/* When CONFIG_BT_SELFTEST=y and CONFIG_BT=y, then the self testing + * is run via late_initcall() to make sure that subsys_initcall() of + * the Bluetooth subsystem and device_initcall() of the Crypto subsystem + * do not clash. + * + * When CONFIG_BT_SELFTEST=n, then this turns into an empty call that + * has no impact. + */ +static inline int bt_selftest(void) +{ + return 0; +} + +#endif diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index b67749bb55bf..c09a821f381d 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -20,6 +20,7 @@ SOFTWARE IS DISCLAIMED. */ +#include <linux/debugfs.h> #include <linux/crypto.h> #include <linux/scatterlist.h> #include <crypto/b128ops.h> @@ -223,8 +224,9 @@ static int smp_f4(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32], return err; } -static int smp_f5(struct crypto_hash *tfm_cmac, u8 w[32], u8 n1[16], u8 n2[16], - u8 a1[7], u8 a2[7], u8 mackey[16], u8 ltk[16]) +static int smp_f5(struct crypto_hash *tfm_cmac, const u8 w[32], + const u8 n1[16], const u8 n2[16], const u8 a1[7], + const u8 a2[7], u8 mackey[16], u8 ltk[16]) { /* The btle, salt and length "magic" values are as defined in * the SMP section of the Bluetooth core specification. In ASCII @@ -276,7 +278,7 @@ static int smp_f5(struct crypto_hash *tfm_cmac, u8 w[32], u8 n1[16], u8 n2[16], } static int smp_f6(struct crypto_hash *tfm_cmac, const u8 w[16], - const u8 n1[16], u8 n2[16], const u8 r[16], + const u8 n1[16], const u8 n2[16], const u8 r[16], const u8 io_cap[3], const u8 a1[7], const u8 a2[7], u8 res[16]) { @@ -298,7 +300,7 @@ static int smp_f6(struct crypto_hash *tfm_cmac, const u8 w[16], if (err) return err; - BT_DBG("res %16phN", res); + SMP_DBG("res %16phN", res); return err; } @@ -618,7 +620,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn, oob_data = hci_find_remote_oob_data(hdev, &hcon->dst, bdaddr_type); - if (oob_data) { + if (oob_data && oob_data->present) { set_bit(SMP_FLAG_OOB, &smp->flags); oob_flag = SMP_OOB_PRESENT; memcpy(smp->rr, oob_data->rand256, 16); @@ -1674,7 +1676,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (conn->hcon->type == ACL_LINK) { /* We must have a BR/EDR SC link */ if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags) && - !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags)) + !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags)) return SMP_CROSS_TRANSP_NOT_ALLOWED; set_bit(SMP_FLAG_SC, &smp->flags); @@ -2303,8 +2305,12 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn, * implementations are not known of and in order to not over * complicate our implementation, simply pretend that we never * received an IRK for such a device. + * + * The Identity Address must also be a Static Random or Public + * Address, which hci_is_identity_address() checks for. */ - if (!bacmp(&info->bdaddr, BDADDR_ANY)) { + if (!bacmp(&info->bdaddr, BDADDR_ANY) || + !hci_is_identity_address(&info->bdaddr, info->addr_type)) { BT_ERR("Ignoring IRK with no identity address"); goto distribute; } @@ -2737,7 +2743,7 @@ static void bredr_pairing(struct l2cap_chan *chan) /* BR/EDR must use Secure Connections for SMP */ if (!test_bit(HCI_CONN_AES_CCM, &hcon->flags) && - !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags)) + !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags)) return; /* If our LE support is not enabled don't do anything */ @@ -2944,11 +2950,30 @@ create_chan: l2cap_chan_set_defaults(chan); - bacpy(&chan->src, &hdev->bdaddr); - if (cid == L2CAP_CID_SMP) - chan->src_type = BDADDR_LE_PUBLIC; - else + if (cid == L2CAP_CID_SMP) { + /* If usage of static address is forced or if the devices + * does not have a public address, then listen on the static + * address. + * + * In case BR/EDR has been disabled on a dual-mode controller + * and a static address has been configued, then listen on + * the static address instead. + */ + if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || + !bacmp(&hdev->bdaddr, BDADDR_ANY) || + (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + bacmp(&hdev->static_addr, BDADDR_ANY))) { + bacpy(&chan->src, &hdev->static_addr); + chan->src_type = BDADDR_LE_RANDOM; + } else { + bacpy(&chan->src, &hdev->bdaddr); + chan->src_type = BDADDR_LE_PUBLIC; + } + } else { + bacpy(&chan->src, &hdev->bdaddr); chan->src_type = BDADDR_BREDR; + } + chan->state = BT_LISTEN; chan->mode = L2CAP_MODE_BASIC; chan->imtu = L2CAP_DEFAULT_MTU; @@ -2975,21 +3000,108 @@ static void smp_del_chan(struct l2cap_chan *chan) l2cap_chan_put(chan); } +static ssize_t force_bredr_smp_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags) ? 'Y': 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t force_bredr_smp_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[32]; + size_t buf_size = min(count, (sizeof(buf)-1)); + bool enable; + + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + if (strtobool(buf, &enable)) + return -EINVAL; + + if (enable == test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags)) + return -EALREADY; + + if (enable) { + struct l2cap_chan *chan; + + chan = smp_add_cid(hdev, L2CAP_CID_SMP_BREDR); + if (IS_ERR(chan)) + return PTR_ERR(chan); + + hdev->smp_bredr_data = chan; + } else { + struct l2cap_chan *chan; + + chan = hdev->smp_bredr_data; + hdev->smp_bredr_data = NULL; + smp_del_chan(chan); + } + + change_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags); + + return count; +} + +static const struct file_operations force_bredr_smp_fops = { + .open = simple_open, + .read = force_bredr_smp_read, + .write = force_bredr_smp_write, + .llseek = default_llseek, +}; + int smp_register(struct hci_dev *hdev) { struct l2cap_chan *chan; BT_DBG("%s", hdev->name); + /* If the controller does not support Low Energy operation, then + * there is also no need to register any SMP channel. + */ + if (!lmp_le_capable(hdev)) + return 0; + + if (WARN_ON(hdev->smp_data)) { + chan = hdev->smp_data; + hdev->smp_data = NULL; + smp_del_chan(chan); + } + chan = smp_add_cid(hdev, L2CAP_CID_SMP); if (IS_ERR(chan)) return PTR_ERR(chan); hdev->smp_data = chan; - if (!lmp_sc_capable(hdev) && - !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags)) + /* If the controller does not support BR/EDR Secure Connections + * feature, then the BR/EDR SMP channel shall not be present. + * + * To test this with Bluetooth 4.0 controllers, create a debugfs + * switch that allows forcing BR/EDR SMP support and accepting + * cross-transport pairing on non-AES encrypted connections. + */ + if (!lmp_sc_capable(hdev)) { + debugfs_create_file("force_bredr_smp", 0644, hdev->debugfs, + hdev, &force_bredr_smp_fops); return 0; + } + + if (WARN_ON(hdev->smp_bredr_data)) { + chan = hdev->smp_bredr_data; + hdev->smp_bredr_data = NULL; + smp_del_chan(chan); + } chan = smp_add_cid(hdev, L2CAP_CID_SMP_BREDR); if (IS_ERR(chan)) { @@ -3021,3 +3133,331 @@ void smp_unregister(struct hci_dev *hdev) smp_del_chan(chan); } } + +#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP) + +static int __init test_ah(struct crypto_blkcipher *tfm_aes) +{ + const u8 irk[16] = { + 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, + 0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec }; + const u8 r[3] = { 0x94, 0x81, 0x70 }; + const u8 exp[3] = { 0xaa, 0xfb, 0x0d }; + u8 res[3]; + int err; + + err = smp_ah(tfm_aes, irk, r, res); + if (err) + return err; + + if (memcmp(res, exp, 3)) + return -EINVAL; + + return 0; +} + +static int __init test_c1(struct crypto_blkcipher *tfm_aes) +{ + const u8 k[16] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + const u8 r[16] = { + 0xe0, 0x2e, 0x70, 0xc6, 0x4e, 0x27, 0x88, 0x63, + 0x0e, 0x6f, 0xad, 0x56, 0x21, 0xd5, 0x83, 0x57 }; + const u8 preq[7] = { 0x01, 0x01, 0x00, 0x00, 0x10, 0x07, 0x07 }; + const u8 pres[7] = { 0x02, 0x03, 0x00, 0x00, 0x08, 0x00, 0x05 }; + const u8 _iat = 0x01; + const u8 _rat = 0x00; + const bdaddr_t ra = { { 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1 } }; + const bdaddr_t ia = { { 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1 } }; + const u8 exp[16] = { + 0x86, 0x3b, 0xf1, 0xbe, 0xc5, 0x4d, 0xa7, 0xd2, + 0xea, 0x88, 0x89, 0x87, 0xef, 0x3f, 0x1e, 0x1e }; + u8 res[16]; + int err; + + err = smp_c1(tfm_aes, k, r, preq, pres, _iat, &ia, _rat, &ra, res); + if (err) + return err; + + if (memcmp(res, exp, 16)) + return -EINVAL; + + return 0; +} + +static int __init test_s1(struct crypto_blkcipher *tfm_aes) +{ + const u8 k[16] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + const u8 r1[16] = { + 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11 }; + const u8 r2[16] = { + 0x00, 0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99 }; + const u8 exp[16] = { + 0x62, 0xa0, 0x6d, 0x79, 0xae, 0x16, 0x42, 0x5b, + 0x9b, 0xf4, 0xb0, 0xe8, 0xf0, 0xe1, 0x1f, 0x9a }; + u8 res[16]; + int err; + + err = smp_s1(tfm_aes, k, r1, r2, res); + if (err) + return err; + + if (memcmp(res, exp, 16)) + return -EINVAL; + + return 0; +} + +static int __init test_f4(struct crypto_hash *tfm_cmac) +{ + const u8 u[32] = { + 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, + 0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef, + 0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e, + 0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20 }; + const u8 v[32] = { + 0xfd, 0xc5, 0x7f, 0xf4, 0x49, 0xdd, 0x4f, 0x6b, + 0xfb, 0x7c, 0x9d, 0xf1, 0xc2, 0x9a, 0xcb, 0x59, + 0x2a, 0xe7, 0xd4, 0xee, 0xfb, 0xfc, 0x0a, 0x90, + 0x9a, 0xbb, 0xf6, 0x32, 0x3d, 0x8b, 0x18, 0x55 }; + const u8 x[16] = { + 0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff, + 0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 }; + const u8 z = 0x00; + const u8 exp[16] = { + 0x2d, 0x87, 0x74, 0xa9, 0xbe, 0xa1, 0xed, 0xf1, + 0x1c, 0xbd, 0xa9, 0x07, 0xf1, 0x16, 0xc9, 0xf2 }; + u8 res[16]; + int err; + + err = smp_f4(tfm_cmac, u, v, x, z, res); + if (err) + return err; + + if (memcmp(res, exp, 16)) + return -EINVAL; + + return 0; +} + +static int __init test_f5(struct crypto_hash *tfm_cmac) +{ + const u8 w[32] = { + 0x98, 0xa6, 0xbf, 0x73, 0xf3, 0x34, 0x8d, 0x86, + 0xf1, 0x66, 0xf8, 0xb4, 0x13, 0x6b, 0x79, 0x99, + 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, + 0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec }; + const u8 n1[16] = { + 0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff, + 0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 }; + const u8 n2[16] = { + 0xcf, 0xc4, 0x3d, 0xff, 0xf7, 0x83, 0x65, 0x21, + 0x6e, 0x5f, 0xa7, 0x25, 0xcc, 0xe7, 0xe8, 0xa6 }; + const u8 a1[7] = { 0xce, 0xbf, 0x37, 0x37, 0x12, 0x56, 0x00 }; + const u8 a2[7] = { 0xc1, 0xcf, 0x2d, 0x70, 0x13, 0xa7, 0x00 }; + const u8 exp_ltk[16] = { + 0x38, 0x0a, 0x75, 0x94, 0xb5, 0x22, 0x05, 0x98, + 0x23, 0xcd, 0xd7, 0x69, 0x11, 0x79, 0x86, 0x69 }; + const u8 exp_mackey[16] = { + 0x20, 0x6e, 0x63, 0xce, 0x20, 0x6a, 0x3f, 0xfd, + 0x02, 0x4a, 0x08, 0xa1, 0x76, 0xf1, 0x65, 0x29 }; + u8 mackey[16], ltk[16]; + int err; + + err = smp_f5(tfm_cmac, w, n1, n2, a1, a2, mackey, ltk); + if (err) + return err; + + if (memcmp(mackey, exp_mackey, 16)) + return -EINVAL; + + if (memcmp(ltk, exp_ltk, 16)) + return -EINVAL; + + return 0; +} + +static int __init test_f6(struct crypto_hash *tfm_cmac) +{ + const u8 w[16] = { + 0x20, 0x6e, 0x63, 0xce, 0x20, 0x6a, 0x3f, 0xfd, + 0x02, 0x4a, 0x08, 0xa1, 0x76, 0xf1, 0x65, 0x29 }; + const u8 n1[16] = { + 0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff, + 0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 }; + const u8 n2[16] = { + 0xcf, 0xc4, 0x3d, 0xff, 0xf7, 0x83, 0x65, 0x21, + 0x6e, 0x5f, 0xa7, 0x25, 0xcc, 0xe7, 0xe8, 0xa6 }; + const u8 r[16] = { + 0xc8, 0x0f, 0x2d, 0x0c, 0xd2, 0x42, 0xda, 0x08, + 0x54, 0xbb, 0x53, 0xb4, 0x3b, 0x34, 0xa3, 0x12 }; + const u8 io_cap[3] = { 0x02, 0x01, 0x01 }; + const u8 a1[7] = { 0xce, 0xbf, 0x37, 0x37, 0x12, 0x56, 0x00 }; + const u8 a2[7] = { 0xc1, 0xcf, 0x2d, 0x70, 0x13, 0xa7, 0x00 }; + const u8 exp[16] = { + 0x61, 0x8f, 0x95, 0xda, 0x09, 0x0b, 0x6c, 0xd2, + 0xc5, 0xe8, 0xd0, 0x9c, 0x98, 0x73, 0xc4, 0xe3 }; + u8 res[16]; + int err; + + err = smp_f6(tfm_cmac, w, n1, n2, r, io_cap, a1, a2, res); + if (err) + return err; + + if (memcmp(res, exp, 16)) + return -EINVAL; + + return 0; +} + +static int __init test_g2(struct crypto_hash *tfm_cmac) +{ + const u8 u[32] = { + 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, + 0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef, + 0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e, + 0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20 }; + const u8 v[32] = { + 0xfd, 0xc5, 0x7f, 0xf4, 0x49, 0xdd, 0x4f, 0x6b, + 0xfb, 0x7c, 0x9d, 0xf1, 0xc2, 0x9a, 0xcb, 0x59, + 0x2a, 0xe7, 0xd4, 0xee, 0xfb, 0xfc, 0x0a, 0x90, + 0x9a, 0xbb, 0xf6, 0x32, 0x3d, 0x8b, 0x18, 0x55 }; + const u8 x[16] = { + 0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff, + 0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 }; + const u8 y[16] = { + 0xcf, 0xc4, 0x3d, 0xff, 0xf7, 0x83, 0x65, 0x21, + 0x6e, 0x5f, 0xa7, 0x25, 0xcc, 0xe7, 0xe8, 0xa6 }; + const u32 exp_val = 0x2f9ed5ba % 1000000; + u32 val; + int err; + + err = smp_g2(tfm_cmac, u, v, x, y, &val); + if (err) + return err; + + if (val != exp_val) + return -EINVAL; + + return 0; +} + +static int __init test_h6(struct crypto_hash *tfm_cmac) +{ + const u8 w[16] = { + 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, + 0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec }; + const u8 key_id[4] = { 0x72, 0x62, 0x65, 0x6c }; + const u8 exp[16] = { + 0x99, 0x63, 0xb1, 0x80, 0xe2, 0xa9, 0xd3, 0xe8, + 0x1c, 0xc9, 0x6d, 0xe7, 0x02, 0xe1, 0x9a, 0x2d }; + u8 res[16]; + int err; + + err = smp_h6(tfm_cmac, w, key_id, res); + if (err) + return err; + + if (memcmp(res, exp, 16)) + return -EINVAL; + + return 0; +} + +static int __init run_selftests(struct crypto_blkcipher *tfm_aes, + struct crypto_hash *tfm_cmac) +{ + ktime_t calltime, delta, rettime; + unsigned long long duration; + int err; + + calltime = ktime_get(); + + err = test_ah(tfm_aes); + if (err) { + BT_ERR("smp_ah test failed"); + return err; + } + + err = test_c1(tfm_aes); + if (err) { + BT_ERR("smp_c1 test failed"); + return err; + } + + err = test_s1(tfm_aes); + if (err) { + BT_ERR("smp_s1 test failed"); + return err; + } + + err = test_f4(tfm_cmac); + if (err) { + BT_ERR("smp_f4 test failed"); + return err; + } + + err = test_f5(tfm_cmac); + if (err) { + BT_ERR("smp_f5 test failed"); + return err; + } + + err = test_f6(tfm_cmac); + if (err) { + BT_ERR("smp_f6 test failed"); + return err; + } + + err = test_g2(tfm_cmac); + if (err) { + BT_ERR("smp_g2 test failed"); + return err; + } + + err = test_h6(tfm_cmac); + if (err) { + BT_ERR("smp_h6 test failed"); + return err; + } + + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + duration = (unsigned long long) ktime_to_ns(delta) >> 10; + + BT_INFO("SMP test passed in %llu usecs", duration); + + return 0; +} + +int __init bt_selftest_smp(void) +{ + struct crypto_blkcipher *tfm_aes; + struct crypto_hash *tfm_cmac; + int err; + + tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm_aes)) { + BT_ERR("Unable to create ECB crypto context"); + return PTR_ERR(tfm_aes); + } + + tfm_cmac = crypto_alloc_hash("cmac(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm_cmac)) { + BT_ERR("Unable to create CMAC crypto context"); + crypto_free_blkcipher(tfm_aes); + return PTR_ERR(tfm_cmac); + } + + err = run_selftests(tfm_aes, tfm_cmac); + + crypto_free_hash(tfm_cmac); + crypto_free_blkcipher(tfm_aes); + + return err; +} + +#endif diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 3296bf42ae80..60c5b73fcb4b 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -192,4 +192,17 @@ int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa); int smp_register(struct hci_dev *hdev); void smp_unregister(struct hci_dev *hdev); +#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP) + +int bt_selftest_smp(void); + +#else + +static inline int bt_selftest_smp(void) +{ + return 0; +} + +#endif + #endif /* __SMP_H */ diff --git a/net/bridge/br.c b/net/bridge/br.c index 44425aff7cba..fb57ab6b24f9 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -19,6 +19,7 @@ #include <linux/llc.h> #include <net/llc.h> #include <net/stp.h> +#include <net/switchdev.h> #include "br_private.h" @@ -120,6 +121,48 @@ static struct notifier_block br_device_notifier = { .notifier_call = br_device_event }; +static int br_netdev_switch_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_switch_notifier_info_to_dev(ptr); + struct net_bridge_port *p; + struct net_bridge *br; + struct netdev_switch_notifier_fdb_info *fdb_info; + int err = NOTIFY_DONE; + + rtnl_lock(); + p = br_port_get_rtnl(dev); + if (!p) + goto out; + + br = p->br; + + switch (event) { + case NETDEV_SWITCH_FDB_ADD: + fdb_info = ptr; + err = br_fdb_external_learn_add(br, p, fdb_info->addr, + fdb_info->vid); + if (err) + err = notifier_from_errno(err); + break; + case NETDEV_SWITCH_FDB_DEL: + fdb_info = ptr; + err = br_fdb_external_learn_del(br, p, fdb_info->addr, + fdb_info->vid); + if (err) + err = notifier_from_errno(err); + break; + } + +out: + rtnl_unlock(); + return err; +} + +static struct notifier_block br_netdev_switch_notifier = { + .notifier_call = br_netdev_switch_event, +}; + static void __net_exit br_net_exit(struct net *net) { struct net_device *dev; @@ -169,10 +212,14 @@ static int __init br_init(void) if (err) goto err_out3; - err = br_netlink_init(); + err = register_netdev_switch_notifier(&br_netdev_switch_notifier); if (err) goto err_out4; + err = br_netlink_init(); + if (err) + goto err_out5; + brioctl_set(br_ioctl_deviceless_stub); #if IS_ENABLED(CONFIG_ATM_LANE) @@ -185,6 +232,8 @@ static int __init br_init(void) return 0; +err_out5: + unregister_netdev_switch_notifier(&br_netdev_switch_notifier); err_out4: unregister_netdevice_notifier(&br_device_notifier); err_out3: @@ -202,6 +251,7 @@ static void __exit br_deinit(void) { stp_proto_unregister(&br_stp_proto); br_netlink_fini(); + unregister_netdev_switch_notifier(&br_netdev_switch_notifier); unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); unregister_pernet_subsys(&br_net_ops); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index cc36e59db7d7..e0670d7054f9 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -633,7 +633,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, if (fdb->vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -686,6 +687,9 @@ int br_fdb_dump(struct sk_buff *skb, if (!(dev->priv_flags & IFF_EBRIDGE)) goto out; + if (!filter_dev) + idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); + for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; @@ -697,7 +701,7 @@ int br_fdb_dump(struct sk_buff *skb, (!f->dst || f->dst->dev != filter_dev)) { if (filter_dev != dev) goto skip; - /* !f->dst is a speacial case for bridge + /* !f->dst is a special case for bridge * It means the MAC belongs to the bridge * Therefore need a little more filtering * we only want to dump the !f->dst case @@ -705,6 +709,8 @@ int br_fdb_dump(struct sk_buff *skb, if (f->dst) goto skip; } + if (!filter_dev && f->dst) + goto skip; if (fdb_fill_info(skb, br, f, NETLINK_CB(cb->skb).portid, @@ -840,10 +846,9 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], /* VID was specified, so use it. */ err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); } else { - if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) { - err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); + err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); + if (err || !pv) goto out; - } /* We have vlans configured on this port and user didn't * specify a VLAN. To be nice, add/update entry for every @@ -911,16 +916,15 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], err = __br_fdb_delete(p, addr, vid); } else { - if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) { - err = __br_fdb_delete(p, addr, 0); + err = -ENOENT; + err &= __br_fdb_delete(p, addr, 0); + if (!pv) goto out; - } /* We have vlans configured on this port and user didn't * specify a VLAN. To be nice, add/update entry for every * vlan on this port. */ - err = -ENOENT; for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { err &= __br_fdb_delete(p, addr, vid); } @@ -985,26 +989,14 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) } } -int br_fdb_external_learn_add(struct net_device *dev, +int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid) { - struct net_bridge_port *p; - struct net_bridge *br; struct hlist_head *head; struct net_bridge_fdb_entry *fdb; int err = 0; - rtnl_lock(); - - p = br_port_get_rtnl(dev); - if (!p) { - pr_info("bridge: %s not a bridge port\n", dev->name); - err = -EINVAL; - goto err_rtnl_unlock; - } - - br = p->br; - + ASSERT_RTNL(); spin_lock_bh(&br->hash_lock); head = &br->hash[br_mac_hash(addr, vid)]; @@ -1029,33 +1021,18 @@ int br_fdb_external_learn_add(struct net_device *dev, err_unlock: spin_unlock_bh(&br->hash_lock); -err_rtnl_unlock: - rtnl_unlock(); return err; } -EXPORT_SYMBOL(br_fdb_external_learn_add); -int br_fdb_external_learn_del(struct net_device *dev, +int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid) { - struct net_bridge_port *p; - struct net_bridge *br; struct hlist_head *head; struct net_bridge_fdb_entry *fdb; int err = 0; - rtnl_lock(); - - p = br_port_get_rtnl(dev); - if (!p) { - pr_info("bridge: %s not a bridge port\n", dev->name); - err = -EINVAL; - goto err_rtnl_unlock; - } - - br = p->br; - + ASSERT_RTNL(); spin_lock_bh(&br->hash_lock); head = &br->hash[br_mac_hash(addr, vid)]; @@ -1066,9 +1043,6 @@ int br_fdb_external_learn_del(struct net_device *dev, err = -ENOENT; spin_unlock_bh(&br->hash_lock); -err_rtnl_unlock: - rtnl_unlock(); return err; } -EXPORT_SYMBOL(br_fdb_external_learn_del); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ed307db7a12b..b087d278c679 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -424,6 +424,7 @@ netdev_features_t br_features_recompute(struct net_bridge *br, features = netdev_increment_features(features, p->dev->features, mask); } + features = netdev_add_tso_features(features, mask); return features; } @@ -435,10 +436,16 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) int err = 0; bool changed_addr; - /* Don't allow bridging non-ethernet like devices */ + /* Don't allow bridging non-ethernet like devices, or DSA-enabled + * master network devices since the bridge layer rx_handler prevents + * the DSA fake ethertype handler to be invoked, so we do not strip off + * the DSA switch tag protocol header and the bridge layer just return + * RX_HANDLER_CONSUMED, stopping RX processing for these frames. + */ if ((dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN || - !is_valid_ether_addr(dev->dev_addr)) + !is_valid_ether_addr(dev->dev_addr) || + netdev_uses_dsa(dev)) return -EINVAL; /* No bridging of bridges */ diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 5df05269d17a..409608960899 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -190,7 +190,8 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb, nla_nest_end(skb, nest2); nla_nest_end(skb, nest); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; end: nla_nest_end(skb, nest); @@ -276,7 +277,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, struct net_device *dev; int err; - err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY, NULL); + err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL); if (err < 0) return err; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index c190d22b6b3d..65728e0dc4ff 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -66,17 +66,17 @@ static int brnf_pass_vlan_indev __read_mostly = 0; #endif #define IS_IP(skb) \ - (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) + (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) #define IS_IPV6(skb) \ - (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) + (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) #define IS_ARP(skb) \ - (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) + (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) static inline __be16 vlan_proto(const struct sk_buff *skb) { - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) return skb->protocol; else if (skb->protocol == htons(ETH_P_8021Q)) return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; @@ -436,11 +436,11 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct struct net_device *vlan, *br; br = bridge_parent(dev); - if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) + if (brnf_pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) return br; vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, - vlan_tx_tag_get(skb) & VLAN_VID_MASK); + skb_vlan_tag_get(skb) & VLAN_VID_MASK); return vlan ? vlan : br; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 9f5eb55a4d3a..4fbcea0e7ecb 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -16,6 +16,7 @@ #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> +#include <net/switchdev.h> #include <uapi/linux/if_bridge.h> #include "br_private.h" @@ -67,6 +68,120 @@ static int br_port_fill_attrs(struct sk_buff *skb, return 0; } +static int br_fill_ifvlaninfo_range(struct sk_buff *skb, u16 vid_start, + u16 vid_end, u16 flags) +{ + struct bridge_vlan_info vinfo; + + if ((vid_end - vid_start) > 0) { + /* add range to skb */ + vinfo.vid = vid_start; + vinfo.flags = flags | BRIDGE_VLAN_INFO_RANGE_BEGIN; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + + vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; + + vinfo.vid = vid_end; + vinfo.flags = flags | BRIDGE_VLAN_INFO_RANGE_END; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } else { + vinfo.vid = vid_start; + vinfo.flags = flags; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb, + const struct net_port_vlans *pv) +{ + u16 vid_range_start = 0, vid_range_end = 0; + u16 vid_range_flags = 0; + u16 pvid, vid, flags; + int err = 0; + + /* Pack IFLA_BRIDGE_VLAN_INFO's for every vlan + * and mark vlan info with begin and end flags + * if vlaninfo represents a range + */ + pvid = br_get_pvid(pv); + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + flags = 0; + if (vid == pvid) + flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(vid, pv->untagged_bitmap)) + flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (vid_range_start == 0) { + goto initvars; + } else if ((vid - vid_range_end) == 1 && + flags == vid_range_flags) { + vid_range_end = vid; + continue; + } else { + err = br_fill_ifvlaninfo_range(skb, vid_range_start, + vid_range_end, + vid_range_flags); + if (err) + return err; + } + +initvars: + vid_range_start = vid; + vid_range_end = vid; + vid_range_flags = flags; + } + + if (vid_range_start != 0) { + /* Call it once more to send any left over vlans */ + err = br_fill_ifvlaninfo_range(skb, vid_range_start, + vid_range_end, + vid_range_flags); + if (err) + return err; + } + + return 0; +} + +static int br_fill_ifvlaninfo(struct sk_buff *skb, + const struct net_port_vlans *pv) +{ + struct bridge_vlan_info vinfo; + u16 pvid, vid; + + pvid = br_get_pvid(pv); + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + vinfo.vid = vid; + vinfo.flags = 0; + if (vid == pvid) + vinfo.flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(vid, pv->untagged_bitmap)) + vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + /* * Create one netlink message for one interface * Contains port and master info as well as carrier and bridge state. @@ -121,12 +236,11 @@ static int br_fill_ifinfo(struct sk_buff *skb, } /* Check if the VID information is requested */ - if (filter_mask & RTEXT_FILTER_BRVLAN) { - struct nlattr *af; + if ((filter_mask & RTEXT_FILTER_BRVLAN) || + (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { const struct net_port_vlans *pv; - struct bridge_vlan_info vinfo; - u16 vid; - u16 pvid; + struct nlattr *af; + int err; if (port) pv = nbp_get_vlan_info(port); @@ -140,26 +254,18 @@ static int br_fill_ifinfo(struct sk_buff *skb, if (!af) goto nla_put_failure; - pvid = br_get_pvid(pv); - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { - vinfo.vid = vid; - vinfo.flags = 0; - if (vid == pvid) - vinfo.flags |= BRIDGE_VLAN_INFO_PVID; - - if (test_bit(vid, pv->untagged_bitmap)) - vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; - - if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, - sizeof(vinfo), &vinfo)) - goto nla_put_failure; - } - + if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) + err = br_fill_ifvlaninfo_compressed(skb, pv); + else + err = br_fill_ifvlaninfo(skb, pv); + if (err) + goto nla_put_failure; nla_nest_end(skb, af); } done: - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -206,69 +312,99 @@ errout: int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask) { - int err = 0; struct net_bridge_port *port = br_port_get_rtnl(dev); - if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN)) - goto out; + if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN) && + !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) + return 0; - err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, - filter_mask, dev); -out: - return err; + return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + filter_mask, dev); } -static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = { - [IFLA_BRIDGE_FLAGS] = { .type = NLA_U16 }, - [IFLA_BRIDGE_MODE] = { .type = NLA_U16 }, - [IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY, - .len = sizeof(struct bridge_vlan_info), }, -}; +static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, + int cmd, struct bridge_vlan_info *vinfo) +{ + int err = 0; + + switch (cmd) { + case RTM_SETLINK: + if (p) { + err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); + if (err) + break; + + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + err = br_vlan_add(p->br, vinfo->vid, + vinfo->flags); + } else { + err = br_vlan_add(br, vinfo->vid, vinfo->flags); + } + break; + + case RTM_DELLINK: + if (p) { + nbp_vlan_delete(p, vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + br_vlan_delete(p->br, vinfo->vid); + } else { + br_vlan_delete(br, vinfo->vid); + } + break; + } + + return err; +} static int br_afspec(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *af_spec, int cmd) { - struct nlattr *tb[IFLA_BRIDGE_MAX+1]; + struct bridge_vlan_info *vinfo_start = NULL; + struct bridge_vlan_info *vinfo = NULL; + struct nlattr *attr; int err = 0; + int rem; - err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy); - if (err) - return err; + nla_for_each_nested(attr, af_spec, rem) { + if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO) + continue; + if (nla_len(attr) != sizeof(struct bridge_vlan_info)) + return -EINVAL; + vinfo = nla_data(attr); + if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + if (vinfo_start) + return -EINVAL; + vinfo_start = vinfo; + continue; + } - if (tb[IFLA_BRIDGE_VLAN_INFO]) { - struct bridge_vlan_info *vinfo; + if (vinfo_start) { + struct bridge_vlan_info tmp_vinfo; + int v; - vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]); + if (!(vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END)) + return -EINVAL; - if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) - return -EINVAL; + if (vinfo->vid <= vinfo_start->vid) + return -EINVAL; - switch (cmd) { - case RTM_SETLINK: - if (p) { - err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); + memcpy(&tmp_vinfo, vinfo_start, + sizeof(struct bridge_vlan_info)); + + for (v = vinfo_start->vid; v <= vinfo->vid; v++) { + tmp_vinfo.vid = v; + err = br_vlan_info(br, p, cmd, &tmp_vinfo); if (err) break; - - if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) - err = br_vlan_add(p->br, vinfo->vid, - vinfo->flags); - } else - err = br_vlan_add(br, vinfo->vid, vinfo->flags); - - break; - - case RTM_DELLINK: - if (p) { - nbp_vlan_delete(p, vinfo->vid); - if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) - br_vlan_delete(p->br, vinfo->vid); - } else - br_vlan_delete(br, vinfo->vid); - break; + } + vinfo_start = NULL; + } else { + err = br_vlan_info(br, p, cmd, vinfo); } + if (err) + break; } return err; @@ -359,13 +495,13 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) } /* Change state and parameters on port. */ -int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) +int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) { struct nlattr *protinfo; struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; - int err = 0; + int err = 0, ret_offload = 0; protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); @@ -407,19 +543,28 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) afspec, RTM_SETLINK); } + if (p && !(flags & BRIDGE_FLAGS_SELF)) { + /* set bridge attributes in hardware if supported + */ + ret_offload = netdev_switch_port_bridge_setlink(dev, nlh, + flags); + if (ret_offload && ret_offload != -EOPNOTSUPP) + br_warn(p->br, "error setting attrs on port %u(%s)\n", + (unsigned int)p->port_no, p->dev->name); + } + if (err == 0) br_ifinfo_notify(RTM_NEWLINK, p); - out: return err; } /* Delete port information */ -int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) +int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) { struct nlattr *afspec; struct net_bridge_port *p; - int err; + int err = 0, ret_offload = 0; afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (!afspec) @@ -432,6 +577,21 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) err = br_afspec((struct net_bridge *)netdev_priv(dev), p, afspec, RTM_DELLINK); + if (err == 0) + /* Send RTM_NEWLINK because userspace + * expects RTM_NEWLINK for vlan dels + */ + br_ifinfo_notify(RTM_NEWLINK, p); + + if (p && !(flags & BRIDGE_FLAGS_SELF)) { + /* del bridge attributes in hardware + */ + ret_offload = netdev_switch_port_bridge_dellink(dev, nlh, + flags); + if (ret_offload && ret_offload != -EOPNOTSUPP) + br_warn(p->br, "error deleting attrs on port %u (%s)\n", + (unsigned int)p->port_no, p->dev->name); + } return err; } @@ -561,7 +721,7 @@ static size_t br_get_link_af_size(const struct net_device *dev) return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); } -static struct rtnl_af_ops br_af_ops = { +static struct rtnl_af_ops br_af_ops __read_mostly = { .family = AF_BRIDGE, .get_link_af_size = br_get_link_af_size, }; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index aea3d1339b3f..de0919975a25 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -402,6 +402,10 @@ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *fdev, int idx); int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); +int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid); +int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid); /* br_forward.c */ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); @@ -628,8 +632,8 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid) { int err = 0; - if (vlan_tx_tag_present(skb)) - *vid = vlan_tx_tag_get(skb) & VLAN_VID_MASK; + if (skb_vlan_tag_present(skb)) + *vid = skb_vlan_tag_get(skb) & VLAN_VID_MASK; else { *vid = 0; err = -EINVAL; @@ -815,8 +819,8 @@ extern struct rtnl_link_ops br_link_ops; int br_netlink_init(void); void br_netlink_fini(void); void br_ifinfo_notify(int event, struct net_bridge_port *port); -int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); -int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); +int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); +int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 97b8ddf57363..13013fe8db24 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -187,7 +187,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, * sent from vlan device on the bridge device, it does not have * HW accelerated vlan tag. */ - if (unlikely(!vlan_tx_tag_present(skb) && + if (unlikely(!skb_vlan_tag_present(skb) && skb->protocol == proto)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) @@ -200,7 +200,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, /* Protocol-mismatch, empty out vlan_tci for new tag */ skb_push(skb, ETH_HLEN); skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto, - vlan_tx_tag_get(skb)); + skb_vlan_tag_get(skb)); if (unlikely(!skb)) return false; diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index 8d3f8c7651f0..618568888128 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -45,8 +45,8 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par) /* VLAN encapsulated Type/Length field, given from orig frame */ __be16 encap; - if (vlan_tx_tag_present(skb)) { - TCI = vlan_tx_tag_get(skb); + if (skb_vlan_tag_present(skb)) { + TCI = skb_vlan_tag_get(skb); encap = skb->protocol; } else { const struct vlan_hdr *fp; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index d9a8c05d995d..91180a7fc943 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -133,7 +133,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, __be16 ethproto; int verdict, i; - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) ethproto = htons(ETH_P_8021Q); else ethproto = h->h_proto; diff --git a/net/can/gw.c b/net/can/gw.c index 295f62e62eb3..a6f448e18ea8 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -575,7 +575,8 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; cancel: nlmsg_cancel(skb, nlh); diff --git a/net/core/Makefile b/net/core/Makefile index 235e6c50708d..fec0856dd6c0 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -2,7 +2,7 @@ # Makefile for the Linux networking core. # -obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ +obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \ gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/dev.c b/net/core/dev.c index 7fe82929f509..d030575532a2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -371,9 +371,10 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) static inline struct list_head *ptype_head(const struct packet_type *pt) { if (pt->type == htons(ETH_P_ALL)) - return &ptype_all; + return pt->dev ? &pt->dev->ptype_all : &ptype_all; else - return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; + return pt->dev ? &pt->dev->ptype_specific : + &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; } /** @@ -1734,6 +1735,23 @@ static inline int deliver_skb(struct sk_buff *skb, return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } +static inline void deliver_ptype_list_skb(struct sk_buff *skb, + struct packet_type **pt, + struct net_device *dev, __be16 type, + struct list_head *ptype_list) +{ + struct packet_type *ptype, *pt_prev = *pt; + + list_for_each_entry_rcu(ptype, ptype_list, list) { + if (ptype->type != type) + continue; + if (pt_prev) + deliver_skb(skb, pt_prev, dev); + pt_prev = ptype; + } + *pt = pt_prev; +} + static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) { if (!ptype->af_packet_priv || !skb->sk) @@ -1757,45 +1775,54 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) struct packet_type *ptype; struct sk_buff *skb2 = NULL; struct packet_type *pt_prev = NULL; + struct list_head *ptype_list = &ptype_all; rcu_read_lock(); - list_for_each_entry_rcu(ptype, &ptype_all, list) { +again: + list_for_each_entry_rcu(ptype, ptype_list, list) { /* Never send packets back to the socket * they originated from - MvS (miquels@drinkel.ow.org) */ - if ((ptype->dev == dev || !ptype->dev) && - (!skb_loop_sk(ptype, skb))) { - if (pt_prev) { - deliver_skb(skb2, pt_prev, skb->dev); - pt_prev = ptype; - continue; - } + if (skb_loop_sk(ptype, skb)) + continue; - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) - break; + if (pt_prev) { + deliver_skb(skb2, pt_prev, skb->dev); + pt_prev = ptype; + continue; + } - net_timestamp_set(skb2); + /* need to clone skb, done only once */ + skb2 = skb_clone(skb, GFP_ATOMIC); + if (!skb2) + goto out_unlock; - /* skb->nh should be correctly - set by sender, so that the second statement is - just protection against buggy protocols. - */ - skb_reset_mac_header(skb2); - - if (skb_network_header(skb2) < skb2->data || - skb_network_header(skb2) > skb_tail_pointer(skb2)) { - net_crit_ratelimited("protocol %04x is buggy, dev %s\n", - ntohs(skb2->protocol), - dev->name); - skb_reset_network_header(skb2); - } + net_timestamp_set(skb2); - skb2->transport_header = skb2->network_header; - skb2->pkt_type = PACKET_OUTGOING; - pt_prev = ptype; + /* skb->nh should be correctly + * set by sender, so that the second statement is + * just protection against buggy protocols. + */ + skb_reset_mac_header(skb2); + + if (skb_network_header(skb2) < skb2->data || + skb_network_header(skb2) > skb_tail_pointer(skb2)) { + net_crit_ratelimited("protocol %04x is buggy, dev %s\n", + ntohs(skb2->protocol), + dev->name); + skb_reset_network_header(skb2); } + + skb2->transport_header = skb2->network_header; + skb2->pkt_type = PACKET_OUTGOING; + pt_prev = ptype; } + + if (ptype_list == &ptype_all) { + ptype_list = &dev->ptype_all; + goto again; + } +out_unlock: if (pt_prev) pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); rcu_read_unlock(); @@ -2549,7 +2576,7 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) if (skb->encapsulation) features &= dev->hw_enc_features; - if (!vlan_tx_tag_present(skb)) { + if (!skb_vlan_tag_present(skb)) { if (unlikely(protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; @@ -2588,7 +2615,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev, unsigned int len; int rc; - if (!list_empty(&ptype_all)) + if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all)) dev_queue_xmit_nit(skb, dev); len = skb->len; @@ -2630,7 +2657,7 @@ out: static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t features) { - if (vlan_tx_tag_present(skb) && + if (skb_vlan_tag_present(skb) && !vlan_hw_offload_capable(features, skb->vlan_proto)) skb = __vlan_hwaccel_push_inside(skb); return skb; @@ -3003,6 +3030,8 @@ static inline void ____napi_schedule(struct softnet_data *sd, /* One global table that all flow-based protocols share. */ struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; EXPORT_SYMBOL(rps_sock_flow_table); +u32 rps_cpu_mask __read_mostly; +EXPORT_SYMBOL(rps_cpu_mask); struct static_key rps_needed __read_mostly; @@ -3059,16 +3088,17 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow **rflowp) { - struct netdev_rx_queue *rxqueue; - struct rps_map *map; + const struct rps_sock_flow_table *sock_flow_table; + struct netdev_rx_queue *rxqueue = dev->_rx; struct rps_dev_flow_table *flow_table; - struct rps_sock_flow_table *sock_flow_table; + struct rps_map *map; int cpu = -1; - u16 tcpu; + u32 tcpu; u32 hash; if (skb_rx_queue_recorded(skb)) { u16 index = skb_get_rx_queue(skb); + if (unlikely(index >= dev->real_num_rx_queues)) { WARN_ONCE(dev->real_num_rx_queues > 1, "%s received packet on queue %u, but number " @@ -3076,39 +3106,40 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, dev->name, index, dev->real_num_rx_queues); goto done; } - rxqueue = dev->_rx + index; - } else - rxqueue = dev->_rx; + rxqueue += index; + } + /* Avoid computing hash if RFS/RPS is not active for this rxqueue */ + + flow_table = rcu_dereference(rxqueue->rps_flow_table); map = rcu_dereference(rxqueue->rps_map); - if (map) { - if (map->len == 1 && - !rcu_access_pointer(rxqueue->rps_flow_table)) { - tcpu = map->cpus[0]; - if (cpu_online(tcpu)) - cpu = tcpu; - goto done; - } - } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) { + if (!flow_table && !map) goto done; - } skb_reset_network_header(skb); hash = skb_get_hash(skb); if (!hash) goto done; - flow_table = rcu_dereference(rxqueue->rps_flow_table); sock_flow_table = rcu_dereference(rps_sock_flow_table); if (flow_table && sock_flow_table) { - u16 next_cpu; struct rps_dev_flow *rflow; + u32 next_cpu; + u32 ident; + + /* First check into global flow table if there is a match */ + ident = sock_flow_table->ents[hash & sock_flow_table->mask]; + if ((ident ^ hash) & ~rps_cpu_mask) + goto try_rps; + + next_cpu = ident & rps_cpu_mask; + /* OK, now we know there is a match, + * we can look at the local (per receive queue) flow table + */ rflow = &flow_table->flows[hash & flow_table->mask]; tcpu = rflow->cpu; - next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask]; - /* * If the desired CPU (where last recvmsg was done) is * different from current CPU (one in the rx-queue flow @@ -3135,6 +3166,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } } +try_rps: + if (map) { tcpu = map->cpus[reciprocal_scale(hash, map->len)]; if (cpu_online(tcpu)) { @@ -3586,7 +3619,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; struct net_device *orig_dev; - struct net_device *null_or_dev; bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; @@ -3629,11 +3661,15 @@ another_round: goto skip_taps; list_for_each_entry_rcu(ptype, &ptype_all, list) { - if (!ptype->dev || ptype->dev == skb->dev) { - if (pt_prev) - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = ptype; - } + if (pt_prev) + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = ptype; + } + + list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) { + if (pt_prev) + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = ptype; } skip_taps: @@ -3647,7 +3683,7 @@ ncls: if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; @@ -3679,8 +3715,8 @@ ncls: } } - if (unlikely(vlan_tx_tag_present(skb))) { - if (vlan_tx_tag_get_id(skb)) + if (unlikely(skb_vlan_tag_present(skb))) { + if (skb_vlan_tag_get_id(skb)) skb->pkt_type = PACKET_OTHERHOST; /* Note: we might in the future use prio bits * and set skb->priority like in vlan_do_receive() @@ -3689,19 +3725,21 @@ ncls: skb->vlan_tci = 0; } + type = skb->protocol; + /* deliver only exact match when indicated */ - null_or_dev = deliver_exact ? skb->dev : NULL; + if (likely(!deliver_exact)) { + deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, + &ptype_base[ntohs(type) & + PTYPE_HASH_MASK]); + } - type = skb->protocol; - list_for_each_entry_rcu(ptype, - &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && - (ptype->dev == null_or_dev || ptype->dev == skb->dev || - ptype->dev == orig_dev)) { - if (pt_prev) - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = ptype; - } + deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, + &orig_dev->ptype_specific); + + if (unlikely(skb->dev != orig_dev)) { + deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, + &skb->dev->ptype_specific); } if (pt_prev) { @@ -5294,6 +5332,26 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); +/** + * netdev_bonding_info_change - Dispatch event about slave change + * @dev: device + * @netdev_bonding_info: info to dispatch + * + * Send NETDEV_BONDING_INFO to netdev notifiers with info. + * The caller must hold the RTNL lock. + */ +void netdev_bonding_info_change(struct net_device *dev, + struct netdev_bonding_info *bonding_info) +{ + struct netdev_notifier_bonding_info info; + + memcpy(&info.bonding_info, bonding_info, + sizeof(struct netdev_bonding_info)); + call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev, + &info.info); +} +EXPORT_SYMBOL(netdev_bonding_info_change); + static void netdev_adjacent_add_links(struct net_device *dev) { struct netdev_adjacent *iter; @@ -6143,13 +6201,16 @@ static int netif_alloc_rx_queues(struct net_device *dev) { unsigned int i, count = dev->num_rx_queues; struct netdev_rx_queue *rx; + size_t sz = count * sizeof(*rx); BUG_ON(count < 1); - rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); - if (!rx) - return -ENOMEM; - + rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!rx) { + rx = vzalloc(sz); + if (!rx) + return -ENOMEM; + } dev->_rx = rx; for (i = 0; i < count; i++) @@ -6547,6 +6608,8 @@ void netdev_run_todo(void) /* paranoia */ BUG_ON(netdev_refcnt_read(dev)); + BUG_ON(!list_empty(&dev->ptype_all)); + BUG_ON(!list_empty(&dev->ptype_specific)); WARN_ON(rcu_access_pointer(dev->ip_ptr)); WARN_ON(rcu_access_pointer(dev->ip6_ptr)); WARN_ON(dev->dn_ptr); @@ -6729,6 +6792,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->adj_list.lower); INIT_LIST_HEAD(&dev->all_adj_list.upper); INIT_LIST_HEAD(&dev->all_adj_list.lower); + INIT_LIST_HEAD(&dev->ptype_all); + INIT_LIST_HEAD(&dev->ptype_specific); dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); @@ -6779,7 +6844,7 @@ void free_netdev(struct net_device *dev) netif_free_tx_queues(dev); #ifdef CONFIG_SYSFS - kfree(dev->_rx); + kvfree(dev->_rx); #endif kfree(rcu_dereference_protected(dev->ingress_queue, 1)); @@ -7064,11 +7129,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, /* Process offline CPU's input_pkt_queue */ while ((skb = __skb_dequeue(&oldsd->process_queue))) { - netif_rx_internal(skb); + netif_rx_ni(skb); input_queue_head_incr(oldsd); } while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { - netif_rx_internal(skb); + netif_rx_ni(skb); input_queue_head_incr(oldsd); } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 550892cd6b3f..91f74f3eb204 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1597,20 +1597,31 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr) return err; } +static int __ethtool_get_module_info(struct net_device *dev, + struct ethtool_modinfo *modinfo) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct phy_device *phydev = dev->phydev; + + if (phydev && phydev->drv && phydev->drv->module_info) + return phydev->drv->module_info(phydev, modinfo); + + if (ops->get_module_info) + return ops->get_module_info(dev, modinfo); + + return -EOPNOTSUPP; +} + static int ethtool_get_module_info(struct net_device *dev, void __user *useraddr) { int ret; struct ethtool_modinfo modinfo; - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (!ops->get_module_info) - return -EOPNOTSUPP; if (copy_from_user(&modinfo, useraddr, sizeof(modinfo))) return -EFAULT; - ret = ops->get_module_info(dev, &modinfo); + ret = __ethtool_get_module_info(dev, &modinfo); if (ret) return ret; @@ -1620,21 +1631,33 @@ static int ethtool_get_module_info(struct net_device *dev, return 0; } +static int __ethtool_get_module_eeprom(struct net_device *dev, + struct ethtool_eeprom *ee, u8 *data) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct phy_device *phydev = dev->phydev; + + if (phydev && phydev->drv && phydev->drv->module_eeprom) + return phydev->drv->module_eeprom(phydev, ee, data); + + if (ops->get_module_eeprom) + return ops->get_module_eeprom(dev, ee, data); + + return -EOPNOTSUPP; +} + static int ethtool_get_module_eeprom(struct net_device *dev, void __user *useraddr) { int ret; struct ethtool_modinfo modinfo; - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (!ops->get_module_info || !ops->get_module_eeprom) - return -EOPNOTSUPP; - ret = ops->get_module_info(dev, &modinfo); + ret = __ethtool_get_module_info(dev, &modinfo); if (ret) return ret; - return ethtool_get_any_eeprom(dev, useraddr, ops->get_module_eeprom, + return ethtool_get_any_eeprom(dev, useraddr, + __ethtool_get_module_eeprom, modinfo.eeprom_len); } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 185c341fafbd..44706e81b2e0 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -609,7 +609,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (ops->fill(rule, skb, frh) < 0) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/core/flow.c b/net/core/flow.c index a0348fde1fdf..1033725be40b 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -379,7 +379,7 @@ done: static void flow_cache_flush_task(struct work_struct *work) { struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, - flow_cache_gc_work); + flow_cache_flush_work); struct net *net = container_of(xfrm, struct net, xfrm); flow_cache_flush(net); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 45084938c403..2c35c02a931e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -178,6 +178,20 @@ ipv6: return false; } } + case htons(ETH_P_TIPC): { + struct { + __be32 pre[3]; + __be32 srcnode; + } *hdr, _hdr; + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); + if (!hdr) + return false; + flow->src = hdr->srcnode; + flow->dst = 0; + flow->n_proto = proto; + flow->thoff = (u16)nhoff; + return true; + } case htons(ETH_P_FCOE): flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN); /* fall through */ @@ -408,7 +422,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) dev_maps = rcu_dereference(dev->xps_maps); if (dev_maps) { map = rcu_dereference( - dev_maps->cpu_map[raw_smp_processor_id()]); + dev_maps->cpu_map[skb->sender_cpu - 1]); if (map) { if (map->len == 1) queue_index = map->queues[0]; @@ -454,6 +468,11 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, { int queue_index = 0; +#ifdef CONFIG_XPS + if (skb->sender_cpu == 0) + skb->sender_cpu = raw_smp_processor_id() + 1; +#endif + if (dev->real_num_tx_queues != 1) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) diff --git a/net/core/iovec.c b/net/core/iovec.c deleted file mode 100644 index dcbe98b3726a..000000000000 --- a/net/core/iovec.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * iovec manipulation routines. - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Fixes: - * Andrew Lunn : Errors in iovec copying. - * Pedro Roque : Added memcpy_fromiovecend and - * csum_..._fromiovecend. - * Andi Kleen : fixed error handling for 2.1 - * Alexey Kuznetsov: 2.1 optimisations - * Andi Kleen : Fix csum*fromiovecend for IPv6. - */ - -#include <linux/errno.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/net.h> -#include <linux/in6.h> -#include <asm/uaccess.h> -#include <asm/byteorder.h> -#include <net/checksum.h> -#include <net/sock.h> - -/* - * And now for the all-in-one: copy and checksum from a user iovec - * directly to a datagram - * Calls to csum_partial but the last must be in 32 bit chunks - * - * ip_build_xmit must ensure that when fragmenting only the last - * call to this function will be unaligned also. - */ -int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov, - int offset, unsigned int len, __wsum *csump) -{ - __wsum csum = *csump; - int partial_cnt = 0, err = 0; - - /* Skip over the finished iovecs */ - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - iov++; - } - - while (len > 0) { - u8 __user *base = iov->iov_base + offset; - int copy = min_t(unsigned int, len, iov->iov_len - offset); - - offset = 0; - - /* There is a remnant from previous iov. */ - if (partial_cnt) { - int par_len = 4 - partial_cnt; - - /* iov component is too short ... */ - if (par_len > copy) { - if (copy_from_user(kdata, base, copy)) - goto out_fault; - kdata += copy; - base += copy; - partial_cnt += copy; - len -= copy; - iov++; - if (len) - continue; - *csump = csum_partial(kdata - partial_cnt, - partial_cnt, csum); - goto out; - } - if (copy_from_user(kdata, base, par_len)) - goto out_fault; - csum = csum_partial(kdata - partial_cnt, 4, csum); - kdata += par_len; - base += par_len; - copy -= par_len; - len -= par_len; - partial_cnt = 0; - } - - if (len > copy) { - partial_cnt = copy % 4; - if (partial_cnt) { - copy -= partial_cnt; - if (copy_from_user(kdata + copy, base + copy, - partial_cnt)) - goto out_fault; - } - } - - if (copy) { - csum = csum_and_copy_from_user(base, kdata, copy, - csum, &err); - if (err) - goto out; - } - len -= copy + partial_cnt; - kdata += copy + partial_cnt; - iov++; - } - *csump = csum; -out: - return err; - -out_fault: - err = -EFAULT; - goto out; -} -EXPORT_SYMBOL(csum_partial_copy_fromiovecend); - -unsigned long iov_pages(const struct iovec *iov, int offset, - unsigned long nr_segs) -{ - unsigned long seg, base; - int pages = 0, len, size; - - while (nr_segs && (offset >= iov->iov_len)) { - offset -= iov->iov_len; - ++iov; - --nr_segs; - } - - for (seg = 0; seg < nr_segs; seg++) { - base = (unsigned long)iov[seg].iov_base + offset; - len = iov[seg].iov_len - offset; - size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; - pages += size; - offset = 0; - } - - return pages; -} -EXPORT_SYMBOL(iov_pages); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8d614c93f86a..70fe9e10ac86 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1884,7 +1884,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, goto nla_put_failure; read_unlock_bh(&tbl->lock); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: read_unlock_bh(&tbl->lock); @@ -1917,7 +1918,8 @@ static int neightbl_fill_param_info(struct sk_buff *skb, goto errout; read_unlock_bh(&tbl->lock); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; errout: read_unlock_bh(&tbl->lock); nlmsg_cancel(skb, nlh); @@ -2126,7 +2128,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, - NLM_F_MULTI) <= 0) + NLM_F_MULTI) < 0) break; nidx = 0; @@ -2142,7 +2144,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, - NLM_F_MULTI) <= 0) + NLM_F_MULTI) < 0) goto out; next: nidx++; @@ -2202,7 +2204,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -2232,7 +2235,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -2270,7 +2274,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, - NLM_F_MULTI) <= 0) { + NLM_F_MULTI) < 0) { rc = -1; goto out; } @@ -2307,7 +2311,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, - NLM_F_MULTI, tbl) <= 0) { + NLM_F_MULTI, tbl) < 0) { read_unlock_bh(&tbl->lock); rc = -1; goto out; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ce780c722e48..cb5290b8c428 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -15,6 +15,10 @@ #include <linux/file.h> #include <linux/export.h> #include <linux/user_namespace.h> +#include <linux/net_namespace.h> +#include <linux/rtnetlink.h> +#include <net/sock.h> +#include <net/netlink.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -144,6 +148,78 @@ static void ops_free_list(const struct pernet_operations *ops, } } +static int alloc_netid(struct net *net, struct net *peer, int reqid) +{ + int min = 0, max = 0; + + ASSERT_RTNL(); + + if (reqid >= 0) { + min = reqid; + max = reqid + 1; + } + + return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL); +} + +/* This function is used by idr_for_each(). If net is equal to peer, the + * function returns the id so that idr_for_each() stops. Because we cannot + * returns the id 0 (idr_for_each() will not stop), we return the magic value + * NET_ID_ZERO (-1) for it. + */ +#define NET_ID_ZERO -1 +static int net_eq_idr(int id, void *net, void *peer) +{ + if (net_eq(net, peer)) + return id ? : NET_ID_ZERO; + return 0; +} + +static int __peernet2id(struct net *net, struct net *peer, bool alloc) +{ + int id = idr_for_each(&net->netns_ids, net_eq_idr, peer); + + ASSERT_RTNL(); + + /* Magic value for id 0. */ + if (id == NET_ID_ZERO) + return 0; + if (id > 0) + return id; + + if (alloc) + return alloc_netid(net, peer, -1); + + return -ENOENT; +} + +/* This function returns the id of a peer netns. If no id is assigned, one will + * be allocated and returned. + */ +int peernet2id(struct net *net, struct net *peer) +{ + int id = __peernet2id(net, peer, true); + + return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; +} +EXPORT_SYMBOL(peernet2id); + +struct net *get_net_ns_by_id(struct net *net, int id) +{ + struct net *peer; + + if (id < 0) + return NULL; + + rcu_read_lock(); + peer = idr_find(&net->netns_ids, id); + if (peer) + get_net(peer); + rcu_read_unlock(); + + return peer; +} + /* * setup_net runs the initializers for the network namespace object. */ @@ -158,6 +234,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) atomic_set(&net->passive, 1); net->dev_base_seq = 1; net->user_ns = user_ns; + idr_init(&net->netns_ids); #ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); @@ -288,6 +365,14 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry(net, &net_kill_list, cleanup_list) { list_del_rcu(&net->list); list_add_tail(&net->exit_list, &net_exit_list); + for_each_net(tmp) { + int id = __peernet2id(tmp, net, false); + + if (id >= 0) + idr_remove(&tmp->netns_ids, id); + } + idr_destroy(&net->netns_ids); + } rtnl_unlock(); @@ -361,6 +446,7 @@ struct net *get_net_ns_by_fd(int fd) return ERR_PTR(-EINVAL); } #endif +EXPORT_SYMBOL_GPL(get_net_ns_by_fd); struct net *get_net_ns_by_pid(pid_t pid) { @@ -402,6 +488,130 @@ static struct pernet_operations __net_initdata net_ns_ops = { .exit = net_ns_net_exit, }; +static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { + [NETNSA_NONE] = { .type = NLA_UNSPEC }, + [NETNSA_NSID] = { .type = NLA_S32 }, + [NETNSA_PID] = { .type = NLA_U32 }, + [NETNSA_FD] = { .type = NLA_U32 }, +}; + +static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[NETNSA_MAX + 1]; + struct net *peer; + int nsid, err; + + err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, + rtnl_net_policy); + if (err < 0) + return err; + if (!tb[NETNSA_NSID]) + return -EINVAL; + nsid = nla_get_s32(tb[NETNSA_NSID]); + + if (tb[NETNSA_PID]) + peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); + else if (tb[NETNSA_FD]) + peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); + else + return -EINVAL; + if (IS_ERR(peer)) + return PTR_ERR(peer); + + if (__peernet2id(net, peer, false) >= 0) { + err = -EEXIST; + goto out; + } + + err = alloc_netid(net, peer, nsid); + if (err > 0) + err = 0; +out: + put_net(peer); + return err; +} + +static int rtnl_net_get_size(void) +{ + return NLMSG_ALIGN(sizeof(struct rtgenmsg)) + + nla_total_size(sizeof(s32)) /* NETNSA_NSID */ + ; +} + +static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, + int cmd, struct net *net, struct net *peer) +{ + struct nlmsghdr *nlh; + struct rtgenmsg *rth; + int id; + + ASSERT_RTNL(); + + nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags); + if (!nlh) + return -EMSGSIZE; + + rth = nlmsg_data(nlh); + rth->rtgen_family = AF_UNSPEC; + + id = __peernet2id(net, peer, false); + if (id < 0) + id = NETNSA_NSID_NOT_ASSIGNED; + if (nla_put_s32(skb, NETNSA_NSID, id)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[NETNSA_MAX + 1]; + struct sk_buff *msg; + int err = -ENOBUFS; + struct net *peer; + + err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, + rtnl_net_policy); + if (err < 0) + return err; + if (tb[NETNSA_PID]) + peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); + else if (tb[NETNSA_FD]) + peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); + else + return -EINVAL; + + if (IS_ERR(peer)) + return PTR_ERR(peer); + + msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); + if (!msg) { + err = -ENOMEM; + goto out; + } + + err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, + RTM_GETNSID, net, peer); + if (err < 0) + goto err_out; + + err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid); + goto out; + +err_out: + nlmsg_free(msg); +out: + put_net(peer); + return err; +} + static int __init net_ns_init(void) { struct net_generic *ng; @@ -435,6 +645,9 @@ static int __init net_ns_init(void) register_pernet_subsys(&net_ns_ops); + rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL); + return 0; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e0ad5d16c9c5..c126a878c47c 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -77,7 +77,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, features = netif_skb_features(skb); - if (vlan_tx_tag_present(skb) && + if (skb_vlan_tag_present(skb) && !vlan_hw_offload_capable(features, skb->vlan_proto)) { skb = __vlan_hwaccel_push_inside(skb); if (unlikely(!skb)) { diff --git a/net/core/pktgen.c b/net/core/pktgen.c index da934fc3faa8..9fa25b0ea145 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2842,25 +2842,25 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->dev = odev; skb->pkt_type = PACKET_HOST; + pktgen_finalize_skb(pkt_dev, skb, datalen); + if (!(pkt_dev->flags & F_UDPCSUM)) { skb->ip_summed = CHECKSUM_NONE; } else if (odev->features & NETIF_F_V4_CSUM) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - udp4_hwcsum(skb, udph->source, udph->dest); + udp4_hwcsum(skb, iph->saddr, iph->daddr); } else { - __wsum csum = udp_csum(skb); + __wsum csum = skb_checksum(skb, skb_transport_offset(skb), datalen + 8, 0); /* add protocol-dependent pseudo-header */ - udph->check = csum_tcpudp_magic(udph->source, udph->dest, + udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, datalen + 8, IPPROTO_UDP, csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; } - pktgen_finalize_skb(pkt_dev, skb, datalen); - #ifdef CONFIG_XFRM if (!process_ipsec(pkt_dev, skb, protocol)) return NULL; @@ -2976,6 +2976,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->dev = odev; skb->pkt_type = PACKET_HOST; + pktgen_finalize_skb(pkt_dev, skb, datalen); + if (!(pkt_dev->flags & F_UDPCSUM)) { skb->ip_summed = CHECKSUM_NONE; } else if (odev->features & NETIF_F_V6_CSUM) { @@ -2984,7 +2986,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->csum_offset = offsetof(struct udphdr, check); udph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, 0); } else { - __wsum csum = udp_csum(skb); + __wsum csum = skb_checksum(skb, skb_transport_offset(skb), udplen, 0); /* add protocol-dependent pseudo-header */ udph->check = csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, csum); @@ -2993,8 +2995,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, udph->check = CSUM_MANGLED_0; } - pktgen_finalize_skb(pkt_dev, skb, datalen); - return skb; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 446cbaf81185..5be499b6a2d2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -50,6 +50,7 @@ #include <net/arp.h> #include <net/route.h> #include <net/udp.h> +#include <net/tcp.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <net/fib_rules.h> @@ -669,9 +670,19 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) for (i = 0; i < RTAX_MAX; i++) { if (metrics[i]) { + if (i == RTAX_CC_ALGO - 1) { + char tmp[TCP_CA_NAME_MAX], *name; + + name = tcp_ca_get_name_by_key(metrics[i], tmp); + if (!name) + continue; + if (nla_put_string(skb, i + 1, name)) + goto nla_put_failure; + } else { + if (nla_put_u32(skb, i + 1, metrics[i])) + goto nla_put_failure; + } valid++; - if (nla_put_u32(skb, i+1, metrics[i])) - goto nla_put_failure; } } @@ -864,6 +875,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ + + nla_total_size(4) /* IFLA_LINK_NETNSID */ + nla_total_size(ext_filter_mask & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ @@ -1158,6 +1170,18 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; } + if (dev->rtnl_link_ops && + dev->rtnl_link_ops->get_link_net) { + struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); + + if (!net_eq(dev_net(dev), link_net)) { + int id = peernet2id(dev_net(dev), link_net); + + if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) + goto nla_put_failure; + } + } + if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) goto nla_put_failure; @@ -1188,7 +1212,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_nest_end(skb, af_spec); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -1223,6 +1248,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */ [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, + [IFLA_LINK_NETNSID] = { .type = NLA_S32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -1237,18 +1263,12 @@ static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { }; static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { - [IFLA_VF_MAC] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_mac) }, - [IFLA_VF_VLAN] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_vlan) }, - [IFLA_VF_TX_RATE] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_tx_rate) }, - [IFLA_VF_SPOOFCHK] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_spoofchk) }, - [IFLA_VF_RATE] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_rate) }, - [IFLA_VF_LINK_STATE] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_link_state) }, + [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, + [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, + [IFLA_VF_TX_RATE] = { .len = sizeof(struct ifla_vf_tx_rate) }, + [IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) }, + [IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) }, + [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) }, }; static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { @@ -1315,7 +1335,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) */ WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); - if (err <= 0) + if (err < 0) goto out; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -1996,7 +2016,7 @@ replay: struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0]; struct nlattr **data = NULL; struct nlattr **slave_data = NULL; - struct net *dest_net; + struct net *dest_net, *link_net = NULL; if (ops) { if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { @@ -2102,7 +2122,18 @@ replay: if (IS_ERR(dest_net)) return PTR_ERR(dest_net); - dev = rtnl_create_link(dest_net, ifname, name_assign_type, ops, tb); + if (tb[IFLA_LINK_NETNSID]) { + int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); + + link_net = get_net_ns_by_id(dest_net, id); + if (!link_net) { + err = -EINVAL; + goto out; + } + } + + dev = rtnl_create_link(link_net ? : dest_net, ifname, + name_assign_type, ops, tb); if (IS_ERR(dev)) { err = PTR_ERR(dev); goto out; @@ -2111,7 +2142,7 @@ replay: dev->ifindex = ifm->ifi_index; if (ops->newlink) { - err = ops->newlink(net, dev, tb, data); + err = ops->newlink(link_net ? : net, dev, tb, data); /* Drivers should call free_netdev() in ->destructor * and unregister it on failure after registration * so that device could be finally freed in rtnl_unlock. @@ -2130,9 +2161,19 @@ replay: } } err = rtnl_configure_link(dev, ifm); - if (err < 0) + if (err < 0) { unregister_netdevice(dev); + goto out; + } + + if (link_net) { + err = dev_change_net_namespace(dev, dest_net, ifname); + if (err < 0) + unregister_netdevice(dev); + } out: + if (link_net) + put_net(link_net); put_net(dest_net); return err; } @@ -2315,7 +2356,8 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb, if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -2698,10 +2740,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) idx); } - idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); if (dev->netdev_ops->ndo_fdb_dump) - idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, bdev, dev, + idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, idx); + else + idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); cops = NULL; } @@ -2797,7 +2840,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, nla_nest_end(skb, protinfo); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; @@ -2868,32 +2912,24 @@ static inline size_t bridge_nlmsg_size(void) + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */ } -static int rtnl_bridge_notify(struct net_device *dev, u16 flags) +static int rtnl_bridge_notify(struct net_device *dev) { struct net *net = dev_net(dev); - struct net_device *br_dev = netdev_master_upper_dev_get(dev); struct sk_buff *skb; int err = -EOPNOTSUPP; + if (!dev->netdev_ops->ndo_bridge_getlink) + return 0; + skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC); if (!skb) { err = -ENOMEM; goto errout; } - if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && - br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { - err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); - if (err < 0) - goto errout; - } - - if ((flags & BRIDGE_FLAGS_SELF) && - dev->netdev_ops->ndo_bridge_getlink) { - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); - if (err < 0) - goto errout; - } + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); + if (err < 0) + goto errout; if (!skb->len) goto errout; @@ -2915,7 +2951,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) struct net_device *dev; struct nlattr *br_spec, *attr = NULL; int rem, err = -EOPNOTSUPP; - u16 oflags, flags = 0; + u16 flags = 0; bool have_flags = false; if (nlmsg_len(nlh) < sizeof(*ifm)) @@ -2945,8 +2981,6 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) } } - oflags = flags; - if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); @@ -2955,7 +2989,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) goto out; } - err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags); if (err) goto out; @@ -2966,17 +3000,20 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) if (!dev->netdev_ops->ndo_bridge_setlink) err = -EOPNOTSUPP; else - err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); - - if (!err) + err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh, + flags); + if (!err) { flags &= ~BRIDGE_FLAGS_SELF; + + /* Generate event to notify upper layer of bridge + * change + */ + err = rtnl_bridge_notify(dev); + } } if (have_flags) memcpy(nla_data(attr), &flags, sizeof(flags)); - /* Generate event to notify upper layer of bridge change */ - if (!err) - err = rtnl_bridge_notify(dev, oflags); out: return err; } @@ -2988,7 +3025,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) struct net_device *dev; struct nlattr *br_spec, *attr = NULL; int rem, err = -EOPNOTSUPP; - u16 oflags, flags = 0; + u16 flags = 0; bool have_flags = false; if (nlmsg_len(nlh) < sizeof(*ifm)) @@ -3018,8 +3055,6 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) } } - oflags = flags; - if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); @@ -3028,7 +3063,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) goto out; } - err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh, flags); if (err) goto out; @@ -3039,17 +3074,21 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) if (!dev->netdev_ops->ndo_bridge_dellink) err = -EOPNOTSUPP; else - err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh, + flags); - if (!err) + if (!err) { flags &= ~BRIDGE_FLAGS_SELF; + + /* Generate event to notify upper layer of bridge + * change + */ + err = rtnl_bridge_notify(dev); + } } if (have_flags) memcpy(nla_data(attr), &flags, sizeof(flags)); - /* Generate event to notify upper layer of bridge change */ - if (!err) - err = rtnl_bridge_notify(dev, oflags); out: return err; } @@ -3139,6 +3178,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_UNREGISTER_FINAL: case NETDEV_RELEASE: case NETDEV_JOIN: + case NETDEV_BONDING_INFO: break; default: rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 395c15b82087..88c613eab142 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -74,6 +74,8 @@ #include <asm/uaccess.h> #include <trace/events/skb.h> #include <linux/highmem.h> +#include <linux/capability.h> +#include <linux/user_namespace.h> struct kmem_cache *skbuff_head_cache __read_mostly; static struct kmem_cache *skbuff_fclone_cache __read_mostly; @@ -677,13 +679,6 @@ static void skb_release_head_state(struct sk_buff *skb) #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(skb->nf_bridge); #endif -/* XXX: IS this still necessary? - JHS */ -#ifdef CONFIG_NET_SCHED - skb->tc_index = 0; -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = 0; -#endif -#endif } /* Free everything but the sk_buff shell. */ @@ -830,6 +825,9 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #ifdef CONFIG_NET_RX_BUSY_POLL CHECK_SKB_FIELD(napi_id); #endif +#ifdef CONFIG_XPS + CHECK_SKB_FIELD(sender_cpu); +#endif #ifdef CONFIG_NET_SCHED CHECK_SKB_FIELD(tc_index); #ifdef CONFIG_NET_CLS_ACT @@ -3697,11 +3695,28 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, kfree_skb(skb); } +static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) +{ + bool ret; + + if (likely(sysctl_tstamp_allow_data || tsonly)) + return true; + + read_lock_bh(&sk->sk_callback_lock); + ret = sk->sk_socket && sk->sk_socket->file && + file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW); + read_unlock_bh(&sk->sk_callback_lock); + return ret; +} + void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps) { struct sock *sk = skb->sk; + if (!skb_may_tx_timestamp(sk, false)) + return; + /* take a reference to prevent skb_orphan() from freeing the socket */ sock_hold(sk); @@ -3717,19 +3732,28 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, struct sock *sk, int tstype) { struct sk_buff *skb; + bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; - if (!sk) + if (!sk || !skb_may_tx_timestamp(sk, tsonly)) return; - if (hwtstamps) - *skb_hwtstamps(orig_skb) = *hwtstamps; + if (tsonly) + skb = alloc_skb(0, GFP_ATOMIC); else - orig_skb->tstamp = ktime_get_real(); - - skb = skb_clone(orig_skb, GFP_ATOMIC); + skb = skb_clone(orig_skb, GFP_ATOMIC); if (!skb) return; + if (tsonly) { + skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags; + skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey; + } + + if (hwtstamps) + *skb_hwtstamps(skb) = *hwtstamps; + else + skb->tstamp = ktime_get_real(); + __skb_complete_tx_timestamp(skb, sk, tstype); } EXPORT_SYMBOL_GPL(__skb_tstamp_tx); @@ -4148,6 +4172,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->ignore_df = 0; skb_dst_drop(skb); skb->mark = 0; + skb->sender_cpu = 0; skb_init_secmark(skb); secpath_reset(skb); nf_reset(skb); @@ -4204,7 +4229,7 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb) struct vlan_hdr *vhdr; u16 vlan_tci; - if (unlikely(vlan_tx_tag_present(skb))) { + if (unlikely(skb_vlan_tag_present(skb))) { /* vlan_tci is already set-up so leave this for another time */ return skb; } @@ -4290,7 +4315,7 @@ int skb_vlan_pop(struct sk_buff *skb) __be16 vlan_proto; int err; - if (likely(vlan_tx_tag_present(skb))) { + if (likely(skb_vlan_tag_present(skb))) { skb->vlan_tci = 0; } else { if (unlikely((skb->protocol != htons(ETH_P_8021Q) && @@ -4320,7 +4345,7 @@ EXPORT_SYMBOL(skb_vlan_pop); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { unsigned int offset = skb->data - skb_mac_header(skb); int err; @@ -4330,7 +4355,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) */ __skb_push(skb, offset); err = __vlan_insert_tag(skb, skb->vlan_proto, - vlan_tx_tag_get(skb)); + skb_vlan_tag_get(skb)); if (err) return err; skb->protocol = skb->vlan_proto; diff --git a/net/core/sock.c b/net/core/sock.c index 1c7a33db1314..93c8b20c91e4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -325,6 +325,8 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); EXPORT_SYMBOL(sysctl_optmem_max); +int sysctl_tstamp_allow_data __read_mostly = 1; + struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE; EXPORT_SYMBOL_GPL(memalloc_socks); @@ -840,6 +842,7 @@ set_rcvbuf: ret = -EINVAL; break; } + if (val & SOF_TIMESTAMPING_OPT_ID && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { if (sk->sk_protocol == IPPROTO_TCP) { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 31baba2a71ce..eaa51ddf2368 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -52,7 +52,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write, if (write) { if (size) { - if (size > 1<<30) { + if (size > 1<<29) { /* Enforce limit to prevent overflow */ mutex_unlock(&sock_flow_mutex); return -EINVAL; @@ -65,7 +65,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write, mutex_unlock(&sock_flow_mutex); return -ENOMEM; } - + rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1; sock_table->mask = size - 1; } else sock_table = orig_sock_table; @@ -321,6 +321,15 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tstamp_allow_data", + .data = &sysctl_tstamp_allow_data, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one + }, #ifdef CONFIG_RPS { .procname = "rps_sock_flow_entries", diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 4400da7739da..b2c26b081134 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -702,7 +702,8 @@ static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || nla_put_u32(skb, IFA_FLAGS, ifa_flags)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index d332aefb0846..df4803437888 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -298,7 +298,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *att int type = nla_type(attr); if (type) { - if (type > RTAX_MAX || nla_len(attr) < 4) + if (type > RTAX_MAX || type == RTAX_CC_ALGO || + nla_len(attr) < 4) goto err_inval; fi->fib_metrics[type-1] = nla_get_u32(attr); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index daccc4a36d80..1d7c1256e845 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1616,7 +1616,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq, nla_put_u32(skb, RTA_IIF, rt->fld.flowidn_iif) < 0) goto errout; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; errout: nlmsg_cancel(skb, nlh); @@ -1709,9 +1710,6 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) rt->rt_flags |= RTCF_NOTIFY; err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); - - if (err == 0) - goto out_free; if (err < 0) { err = -EMSGSIZE; goto out_free; @@ -1762,7 +1760,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) skb_dst_set(skb, dst_clone(&rt->dst)); if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, - 1, NLM_F_MULTI) <= 0) { + 1, NLM_F_MULTI) < 0) { skb_dst_drop(skb); rcu_read_unlock_bh(); goto done; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 86e3807052e9..1540b506e3e0 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -29,6 +29,7 @@ #include <linux/route.h> /* RTF_xxx */ #include <net/neighbour.h> #include <net/netlink.h> +#include <net/tcp.h> #include <net/dst.h> #include <net/flow.h> #include <net/fib_rules.h> @@ -273,7 +274,8 @@ static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi) size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(4) /* RTA_TABLE */ + nla_total_size(2) /* RTA_DST */ - + nla_total_size(4); /* RTA_PRIORITY */ + + nla_total_size(4) /* RTA_PRIORITY */ + + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ /* space for nested metrics */ payload += nla_total_size((RTAX_MAX * nla_total_size(4))); @@ -365,7 +367,8 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_nest_end(skb, mp_head); } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; errout: nlmsg_cancel(skb, nlh); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 37317149f918..2173402d87e0 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -603,7 +603,7 @@ static int dsa_of_probe(struct platform_device *pdev) pdev->dev.platform_data = pd; pd->netdev = ðernet_dev->dev; - pd->nr_chips = of_get_child_count(np); + pd->nr_chips = of_get_available_child_count(np); if (pd->nr_chips > DSA_MAX_SWITCHES) pd->nr_chips = DSA_MAX_SWITCHES; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 589aafd01fc5..d104ae15836f 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -676,18 +676,5 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, netif_carrier_off(slave_dev); - if (p->phy != NULL) { - if (ds->drv->get_phy_flags) - p->phy->dev_flags |= ds->drv->get_phy_flags(ds, port); - - phy_attach(slave_dev, dev_name(&p->phy->dev), - PHY_INTERFACE_MODE_GMII); - - p->phy->autoneg = AUTONEG_ENABLE; - p->phy->speed = 0; - p->phy->duplex = 0; - p->phy->advertising = p->phy->supported | ADVERTISED_Autoneg; - } - return slave_dev; } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 33a140e15834..238f38d21641 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -424,3 +424,95 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr); } EXPORT_SYMBOL(sysfs_format_mac); + +struct sk_buff **eth_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct sk_buff *p, **pp = NULL; + struct ethhdr *eh, *eh2; + unsigned int hlen, off_eth; + const struct packet_offload *ptype; + __be16 type; + int flush = 1; + + off_eth = skb_gro_offset(skb); + hlen = off_eth + sizeof(*eh); + eh = skb_gro_header_fast(skb, off_eth); + if (skb_gro_header_hard(skb, hlen)) { + eh = skb_gro_header_slow(skb, hlen, off_eth); + if (unlikely(!eh)) + goto out; + } + + flush = 0; + + for (p = *head; p; p = p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + eh2 = (struct ethhdr *)(p->data + off_eth); + if (compare_ether_header(eh, eh2)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + } + + type = eh->h_proto; + + rcu_read_lock(); + ptype = gro_find_receive_by_type(type); + if (ptype == NULL) { + flush = 1; + goto out_unlock; + } + + skb_gro_pull(skb, sizeof(*eh)); + skb_gro_postpull_rcsum(skb, eh, sizeof(*eh)); + pp = ptype->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} +EXPORT_SYMBOL(eth_gro_receive); + +int eth_gro_complete(struct sk_buff *skb, int nhoff) +{ + struct ethhdr *eh = (struct ethhdr *)(skb->data + nhoff); + __be16 type = eh->h_proto; + struct packet_offload *ptype; + int err = -ENOSYS; + + if (skb->encapsulation) + skb_set_inner_mac_header(skb, nhoff); + + rcu_read_lock(); + ptype = gro_find_complete_by_type(type); + if (ptype != NULL) + err = ptype->callbacks.gro_complete(skb, nhoff + + sizeof(struct ethhdr)); + + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(eth_gro_complete); + +static struct packet_offload eth_packet_offload __read_mostly = { + .type = cpu_to_be16(ETH_P_TEB), + .callbacks = { + .gro_receive = eth_gro_receive, + .gro_complete = eth_gro_complete, + }, +}; + +static int __init eth_offload_init(void) +{ + dev_add_offload(ð_packet_offload); + + return 0; +} + +fs_initcall(eth_offload_init); diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h new file mode 100644 index 000000000000..e50f69da78eb --- /dev/null +++ b/net/ieee802154/6lowpan/6lowpan_i.h @@ -0,0 +1,72 @@ +#ifndef __IEEE802154_6LOWPAN_I_H__ +#define __IEEE802154_6LOWPAN_I_H__ + +#include <linux/list.h> + +#include <net/ieee802154_netdev.h> +#include <net/inet_frag.h> + +struct lowpan_create_arg { + u16 tag; + u16 d_size; + const struct ieee802154_addr *src; + const struct ieee802154_addr *dst; +}; + +/* Equivalent of ipv4 struct ip + */ +struct lowpan_frag_queue { + struct inet_frag_queue q; + + u16 tag; + u16 d_size; + struct ieee802154_addr saddr; + struct ieee802154_addr daddr; +}; + +static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a) +{ + switch (a->mode) { + case IEEE802154_ADDR_LONG: + return (((__force u64)a->extended_addr) >> 32) ^ + (((__force u64)a->extended_addr) & 0xffffffff); + case IEEE802154_ADDR_SHORT: + return (__force u32)(a->short_addr); + default: + return 0; + } +} + +struct lowpan_dev_record { + struct net_device *ldev; + struct list_head list; +}; + +/* private device info */ +struct lowpan_dev_info { + struct net_device *real_dev; /* real WPAN device ptr */ + struct mutex dev_list_mtx; /* mutex for list ops */ + u16 fragment_tag; +}; + +static inline struct +lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) +{ + return netdev_priv(dev); +} + +extern struct list_head lowpan_devices; + +int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type); +void lowpan_net_frag_exit(void); +int lowpan_net_frag_init(void); + +void lowpan_rx_init(void); +void lowpan_rx_exit(void); + +int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len); +netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev); + +#endif /* __IEEE802154_6LOWPAN_I_H__ */ diff --git a/net/ieee802154/6lowpan/Kconfig b/net/ieee802154/6lowpan/Kconfig new file mode 100644 index 000000000000..d24f985b0bfd --- /dev/null +++ b/net/ieee802154/6lowpan/Kconfig @@ -0,0 +1,5 @@ +config IEEE802154_6LOWPAN + tristate "6lowpan support over IEEE 802.15.4" + depends on 6LOWPAN + ---help--- + IPv6 compression over IEEE 802.15.4. diff --git a/net/ieee802154/6lowpan/Makefile b/net/ieee802154/6lowpan/Makefile new file mode 100644 index 000000000000..6bfb270a81a6 --- /dev/null +++ b/net/ieee802154/6lowpan/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_IEEE802154_6LOWPAN) += ieee802154_6lowpan.o + +ieee802154_6lowpan-y := core.o rx.o reassembly.o tx.o diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c new file mode 100644 index 000000000000..055fbb71ba6f --- /dev/null +++ b/net/ieee802154/6lowpan/core.c @@ -0,0 +1,304 @@ +/* Copyright 2011, Siemens AG + * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> + */ + +/* Based on patches from Jon Smirl <jonsmirl@gmail.com> + * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Jon's code is based on 6lowpan implementation for Contiki which is: + * Copyright (c) 2008, Swedish Institute of Computer Science. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/ieee802154.h> + +#include <net/ipv6.h> + +#include "6lowpan_i.h" + +LIST_HEAD(lowpan_devices); +static int lowpan_open_count; + +static __le16 lowpan_get_pan_id(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + + return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev); +} + +static __le16 lowpan_get_short_addr(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + + return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); +} + +static u8 lowpan_get_dsn(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + + return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev); +} + +static struct header_ops lowpan_header_ops = { + .create = lowpan_header_create, +}; + +static struct lock_class_key lowpan_tx_busylock; +static struct lock_class_key lowpan_netdev_xmit_lock_key; + +static void lowpan_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, + &lowpan_netdev_xmit_lock_key); +} + +static int lowpan_dev_init(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL); + dev->qdisc_tx_busylock = &lowpan_tx_busylock; + return 0; +} + +static const struct net_device_ops lowpan_netdev_ops = { + .ndo_init = lowpan_dev_init, + .ndo_start_xmit = lowpan_xmit, +}; + +static struct ieee802154_mlme_ops lowpan_mlme = { + .get_pan_id = lowpan_get_pan_id, + .get_short_addr = lowpan_get_short_addr, + .get_dsn = lowpan_get_dsn, +}; + +static void lowpan_setup(struct net_device *dev) +{ + dev->addr_len = IEEE802154_ADDR_LEN; + memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN); + dev->type = ARPHRD_IEEE802154; + /* Frame Control + Sequence Number + Address fields + Security Header */ + dev->hard_header_len = 2 + 1 + 20 + 14; + dev->needed_tailroom = 2; /* FCS */ + dev->mtu = IPV6_MIN_MTU; + dev->tx_queue_len = 0; + dev->flags = IFF_BROADCAST | IFF_MULTICAST; + dev->watchdog_timeo = 0; + + dev->netdev_ops = &lowpan_netdev_ops; + dev->header_ops = &lowpan_header_ops; + dev->ml_priv = &lowpan_mlme; + dev->destructor = free_netdev; +} + +static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN) + return -EINVAL; + } + return 0; +} + +static int lowpan_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_device *real_dev; + struct lowpan_dev_record *entry; + int ret; + + ASSERT_RTNL(); + + pr_debug("adding new link\n"); + + if (!tb[IFLA_LINK]) + return -EINVAL; + /* find and hold real wpan device */ + real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); + if (!real_dev) + return -ENODEV; + if (real_dev->type != ARPHRD_IEEE802154) { + dev_put(real_dev); + return -EINVAL; + } + + lowpan_dev_info(dev)->real_dev = real_dev; + mutex_init(&lowpan_dev_info(dev)->dev_list_mtx); + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + dev_put(real_dev); + lowpan_dev_info(dev)->real_dev = NULL; + return -ENOMEM; + } + + entry->ldev = dev; + + /* Set the lowpan hardware address to the wpan hardware address. */ + memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN); + + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); + INIT_LIST_HEAD(&entry->list); + list_add_tail(&entry->list, &lowpan_devices); + mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); + + ret = register_netdevice(dev); + if (ret >= 0) { + if (!lowpan_open_count) + lowpan_rx_init(); + lowpan_open_count++; + } + + return ret; +} + +static void lowpan_dellink(struct net_device *dev, struct list_head *head) +{ + struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev); + struct net_device *real_dev = lowpan_dev->real_dev; + struct lowpan_dev_record *entry, *tmp; + + ASSERT_RTNL(); + + lowpan_open_count--; + if (!lowpan_open_count) + lowpan_rx_exit(); + + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); + list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { + if (entry->ldev == dev) { + list_del(&entry->list); + kfree(entry); + } + } + mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); + + mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx); + + unregister_netdevice_queue(dev, head); + + dev_put(real_dev); +} + +static struct rtnl_link_ops lowpan_link_ops __read_mostly = { + .kind = "lowpan", + .priv_size = sizeof(struct lowpan_dev_info), + .setup = lowpan_setup, + .newlink = lowpan_newlink, + .dellink = lowpan_dellink, + .validate = lowpan_validate, +}; + +static inline int __init lowpan_netlink_init(void) +{ + return rtnl_link_register(&lowpan_link_ops); +} + +static inline void lowpan_netlink_fini(void) +{ + rtnl_link_unregister(&lowpan_link_ops); +} + +static int lowpan_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + LIST_HEAD(del_list); + struct lowpan_dev_record *entry, *tmp; + + if (dev->type != ARPHRD_IEEE802154) + goto out; + + if (event == NETDEV_UNREGISTER) { + list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { + if (lowpan_dev_info(entry->ldev)->real_dev == dev) + lowpan_dellink(entry->ldev, &del_list); + } + + unregister_netdevice_many(&del_list); + } + +out: + return NOTIFY_DONE; +} + +static struct notifier_block lowpan_dev_notifier = { + .notifier_call = lowpan_device_event, +}; + +static int __init lowpan_init_module(void) +{ + int err = 0; + + err = lowpan_net_frag_init(); + if (err < 0) + goto out; + + err = lowpan_netlink_init(); + if (err < 0) + goto out_frag; + + err = register_netdevice_notifier(&lowpan_dev_notifier); + if (err < 0) + goto out_pack; + + return 0; + +out_pack: + lowpan_netlink_fini(); +out_frag: + lowpan_net_frag_exit(); +out: + return err; +} + +static void __exit lowpan_cleanup_module(void) +{ + lowpan_netlink_fini(); + + lowpan_net_frag_exit(); + + unregister_netdevice_notifier(&lowpan_dev_notifier); +} + +module_init(lowpan_init_module); +module_exit(lowpan_cleanup_module); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("lowpan"); diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 9d980ed3ffe2..f46e4d1306f2 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -28,7 +28,7 @@ #include <net/ipv6.h> #include <net/inet_frag.h> -#include "reassembly.h" +#include "6lowpan_i.h" static const char lowpan_frags_cache_name[] = "lowpan-frags"; diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c new file mode 100644 index 000000000000..4be1d289ab2d --- /dev/null +++ b/net/ieee802154/6lowpan/rx.c @@ -0,0 +1,171 @@ +/* This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/if_arp.h> + +#include <net/6lowpan.h> +#include <net/ieee802154_netdev.h> + +#include "6lowpan_i.h" + +static int lowpan_give_skb_to_devices(struct sk_buff *skb, + struct net_device *dev) +{ + struct lowpan_dev_record *entry; + struct sk_buff *skb_cp; + int stat = NET_RX_SUCCESS; + + skb->protocol = htons(ETH_P_IPV6); + skb->pkt_type = PACKET_HOST; + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &lowpan_devices, list) + if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) { + skb_cp = skb_copy(skb, GFP_ATOMIC); + if (!skb_cp) { + kfree_skb(skb); + rcu_read_unlock(); + return NET_RX_DROP; + } + + skb_cp->dev = entry->ldev; + stat = netif_rx(skb_cp); + if (stat == NET_RX_DROP) + break; + } + rcu_read_unlock(); + + consume_skb(skb); + + return stat; +} + +static int +iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr) +{ + u8 iphc0, iphc1; + struct ieee802154_addr_sa sa, da; + void *sap, *dap; + + raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); + /* at least two bytes will be used for the encoding */ + if (skb->len < 2) + return -EINVAL; + + if (lowpan_fetch_skb_u8(skb, &iphc0)) + return -EINVAL; + + if (lowpan_fetch_skb_u8(skb, &iphc1)) + return -EINVAL; + + ieee802154_addr_to_sa(&sa, &hdr->source); + ieee802154_addr_to_sa(&da, &hdr->dest); + + if (sa.addr_type == IEEE802154_ADDR_SHORT) + sap = &sa.short_addr; + else + sap = &sa.hwaddr; + + if (da.addr_type == IEEE802154_ADDR_SHORT) + dap = &da.short_addr; + else + dap = &da.hwaddr; + + return lowpan_header_decompress(skb, skb->dev, sap, sa.addr_type, + IEEE802154_ADDR_LEN, dap, da.addr_type, + IEEE802154_ADDR_LEN, iphc0, iphc1); +} + +static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct ieee802154_hdr hdr; + int ret; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto drop; + + if (!netif_running(dev)) + goto drop_skb; + + if (skb->pkt_type == PACKET_OTHERHOST) + goto drop_skb; + + if (dev->type != ARPHRD_IEEE802154) + goto drop_skb; + + if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) + goto drop_skb; + + /* check that it's our buffer */ + if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { + /* Pull off the 1-byte of 6lowpan header. */ + skb_pull(skb, 1); + return lowpan_give_skb_to_devices(skb, NULL); + } else { + switch (skb->data[0] & 0xe0) { + case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ + ret = iphc_decompress(skb, &hdr); + if (ret < 0) + goto drop_skb; + + return lowpan_give_skb_to_devices(skb, NULL); + case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ + ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1); + if (ret == 1) { + ret = iphc_decompress(skb, &hdr); + if (ret < 0) + goto drop_skb; + + return lowpan_give_skb_to_devices(skb, NULL); + } else if (ret == -1) { + return NET_RX_DROP; + } else { + return NET_RX_SUCCESS; + } + case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */ + ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN); + if (ret == 1) { + ret = iphc_decompress(skb, &hdr); + if (ret < 0) + goto drop_skb; + + return lowpan_give_skb_to_devices(skb, NULL); + } else if (ret == -1) { + return NET_RX_DROP; + } else { + return NET_RX_SUCCESS; + } + default: + break; + } + } + +drop_skb: + kfree_skb(skb); +drop: + return NET_RX_DROP; +} + +static struct packet_type lowpan_packet_type = { + .type = htons(ETH_P_IEEE802154), + .func = lowpan_rcv, +}; + +void lowpan_rx_init(void) +{ + dev_add_pack(&lowpan_packet_type); +} + +void lowpan_rx_exit(void) +{ + dev_remove_pack(&lowpan_packet_type); +} diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c new file mode 100644 index 000000000000..2349070bd534 --- /dev/null +++ b/net/ieee802154/6lowpan/tx.c @@ -0,0 +1,271 @@ +/* This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <net/6lowpan.h> +#include <net/ieee802154_netdev.h> + +#include "6lowpan_i.h" + +/* don't save pan id, it's intra pan */ +struct lowpan_addr { + u8 mode; + union { + /* IPv6 needs big endian here */ + __be64 extended_addr; + __be16 short_addr; + } u; +}; + +struct lowpan_addr_info { + struct lowpan_addr daddr; + struct lowpan_addr saddr; +}; + +static inline struct +lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb) +{ + WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct lowpan_addr_info)); + return (struct lowpan_addr_info *)(skb->data - + sizeof(struct lowpan_addr_info)); +} + +int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len) +{ + const u8 *saddr = _saddr; + const u8 *daddr = _daddr; + struct lowpan_addr_info *info; + + /* TODO: + * if this package isn't ipv6 one, where should it be routed? + */ + if (type != ETH_P_IPV6) + return 0; + + if (!saddr) + saddr = dev->dev_addr; + + raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8); + raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8); + + info = lowpan_skb_priv(skb); + + /* TODO: Currently we only support extended_addr */ + info->daddr.mode = IEEE802154_ADDR_LONG; + memcpy(&info->daddr.u.extended_addr, daddr, + sizeof(info->daddr.u.extended_addr)); + info->saddr.mode = IEEE802154_ADDR_LONG; + memcpy(&info->saddr.u.extended_addr, saddr, + sizeof(info->daddr.u.extended_addr)); + + return 0; +} + +static struct sk_buff* +lowpan_alloc_frag(struct sk_buff *skb, int size, + const struct ieee802154_hdr *master_hdr) +{ + struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev; + struct sk_buff *frag; + int rc; + + frag = alloc_skb(real_dev->hard_header_len + + real_dev->needed_tailroom + size, + GFP_ATOMIC); + + if (likely(frag)) { + frag->dev = real_dev; + frag->priority = skb->priority; + skb_reserve(frag, real_dev->hard_header_len); + skb_reset_network_header(frag); + *mac_cb(frag) = *mac_cb(skb); + + rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest, + &master_hdr->source, size); + if (rc < 0) { + kfree_skb(frag); + return ERR_PTR(rc); + } + } else { + frag = ERR_PTR(-ENOMEM); + } + + return frag; +} + +static int +lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr, + u8 *frag_hdr, int frag_hdrlen, + int offset, int len) +{ + struct sk_buff *frag; + + raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen); + + frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr); + if (IS_ERR(frag)) + return -PTR_ERR(frag); + + memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen); + memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len); + + raw_dump_table(__func__, " fragment dump", frag->data, frag->len); + + return dev_queue_xmit(frag); +} + +static int +lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev, + const struct ieee802154_hdr *wpan_hdr) +{ + u16 dgram_size, dgram_offset; + __be16 frag_tag; + u8 frag_hdr[5]; + int frag_cap, frag_len, payload_cap, rc; + int skb_unprocessed, skb_offset; + + dgram_size = lowpan_uncompress_size(skb, &dgram_offset) - + skb->mac_len; + frag_tag = htons(lowpan_dev_info(dev)->fragment_tag); + lowpan_dev_info(dev)->fragment_tag++; + + frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07); + frag_hdr[1] = dgram_size & 0xff; + memcpy(frag_hdr + 2, &frag_tag, sizeof(frag_tag)); + + payload_cap = ieee802154_max_payload(wpan_hdr); + + frag_len = round_down(payload_cap - LOWPAN_FRAG1_HEAD_SIZE - + skb_network_header_len(skb), 8); + + skb_offset = skb_network_header_len(skb); + skb_unprocessed = skb->len - skb->mac_len - skb_offset; + + rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, + LOWPAN_FRAG1_HEAD_SIZE, 0, + frag_len + skb_network_header_len(skb)); + if (rc) { + pr_debug("%s unable to send FRAG1 packet (tag: %d)", + __func__, ntohs(frag_tag)); + goto err; + } + + frag_hdr[0] &= ~LOWPAN_DISPATCH_FRAG1; + frag_hdr[0] |= LOWPAN_DISPATCH_FRAGN; + frag_cap = round_down(payload_cap - LOWPAN_FRAGN_HEAD_SIZE, 8); + + do { + dgram_offset += frag_len; + skb_offset += frag_len; + skb_unprocessed -= frag_len; + frag_len = min(frag_cap, skb_unprocessed); + + frag_hdr[4] = dgram_offset >> 3; + + rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, + LOWPAN_FRAGN_HEAD_SIZE, skb_offset, + frag_len); + if (rc) { + pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n", + __func__, ntohs(frag_tag), skb_offset); + goto err; + } + } while (skb_unprocessed > frag_cap); + + consume_skb(skb); + return NET_XMIT_SUCCESS; + +err: + kfree_skb(skb); + return rc; +} + +static int lowpan_header(struct sk_buff *skb, struct net_device *dev) +{ + struct ieee802154_addr sa, da; + struct ieee802154_mac_cb *cb = mac_cb_init(skb); + struct lowpan_addr_info info; + void *daddr, *saddr; + + memcpy(&info, lowpan_skb_priv(skb), sizeof(info)); + + /* TODO: Currently we only support extended_addr */ + daddr = &info.daddr.u.extended_addr; + saddr = &info.saddr.u.extended_addr; + + lowpan_header_compress(skb, dev, ETH_P_IPV6, daddr, saddr, skb->len); + + cb->type = IEEE802154_FC_TYPE_DATA; + + /* prepare wpan address data */ + sa.mode = IEEE802154_ADDR_LONG; + sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + sa.extended_addr = ieee802154_devaddr_from_raw(saddr); + + /* intra-PAN communications */ + da.pan_id = sa.pan_id; + + /* if the destination address is the broadcast address, use the + * corresponding short address + */ + if (lowpan_is_addr_broadcast((const u8 *)daddr)) { + da.mode = IEEE802154_ADDR_SHORT; + da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); + cb->ackreq = false; + } else { + da.mode = IEEE802154_ADDR_LONG; + da.extended_addr = ieee802154_devaddr_from_raw(daddr); + cb->ackreq = true; + } + + return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, + ETH_P_IPV6, (void *)&da, (void *)&sa, 0); +} + +netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ieee802154_hdr wpan_hdr; + int max_single, ret; + + pr_debug("package xmit\n"); + + /* We must take a copy of the skb before we modify/replace the ipv6 + * header as the header could be used elsewhere + */ + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + return NET_XMIT_DROP; + + ret = lowpan_header(skb, dev); + if (ret < 0) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + max_single = ieee802154_max_payload(&wpan_hdr); + + if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) { + skb->dev = lowpan_dev_info(dev)->real_dev; + return dev_queue_xmit(skb); + } else { + netdev_tx_t rc; + + pr_debug("frame is too big, fragmentation is needed\n"); + rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr); + + return rc < 0 ? NET_XMIT_DROP : rc; + } +} diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c deleted file mode 100644 index 27eaa65e88e1..000000000000 --- a/net/ieee802154/6lowpan_rtnl.c +++ /dev/null @@ -1,729 +0,0 @@ -/* Copyright 2011, Siemens AG - * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> - */ - -/* Based on patches from Jon Smirl <jonsmirl@gmail.com> - * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -/* Jon's code is based on 6lowpan implementation for Contiki which is: - * Copyright (c) 2008, Swedish Institute of Computer Science. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the Institute nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <linux/bitops.h> -#include <linux/if_arp.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/netdevice.h> -#include <linux/ieee802154.h> -#include <net/af_ieee802154.h> -#include <net/ieee802154_netdev.h> -#include <net/6lowpan.h> -#include <net/ipv6.h> - -#include "reassembly.h" - -static LIST_HEAD(lowpan_devices); -static int lowpan_open_count; - -/* private device info */ -struct lowpan_dev_info { - struct net_device *real_dev; /* real WPAN device ptr */ - struct mutex dev_list_mtx; /* mutex for list ops */ - u16 fragment_tag; -}; - -struct lowpan_dev_record { - struct net_device *ldev; - struct list_head list; -}; - -/* don't save pan id, it's intra pan */ -struct lowpan_addr { - u8 mode; - union { - /* IPv6 needs big endian here */ - __be64 extended_addr; - __be16 short_addr; - } u; -}; - -struct lowpan_addr_info { - struct lowpan_addr daddr; - struct lowpan_addr saddr; -}; - -static inline struct -lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) -{ - return netdev_priv(dev); -} - -static inline struct -lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb) -{ - WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct lowpan_addr_info)); - return (struct lowpan_addr_info *)(skb->data - - sizeof(struct lowpan_addr_info)); -} - -static int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *_daddr, - const void *_saddr, unsigned int len) -{ - const u8 *saddr = _saddr; - const u8 *daddr = _daddr; - struct lowpan_addr_info *info; - - /* TODO: - * if this package isn't ipv6 one, where should it be routed? - */ - if (type != ETH_P_IPV6) - return 0; - - if (!saddr) - saddr = dev->dev_addr; - - raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8); - raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8); - - info = lowpan_skb_priv(skb); - - /* TODO: Currently we only support extended_addr */ - info->daddr.mode = IEEE802154_ADDR_LONG; - memcpy(&info->daddr.u.extended_addr, daddr, - sizeof(info->daddr.u.extended_addr)); - info->saddr.mode = IEEE802154_ADDR_LONG; - memcpy(&info->saddr.u.extended_addr, saddr, - sizeof(info->daddr.u.extended_addr)); - - return 0; -} - -static int lowpan_give_skb_to_devices(struct sk_buff *skb, - struct net_device *dev) -{ - struct lowpan_dev_record *entry; - struct sk_buff *skb_cp; - int stat = NET_RX_SUCCESS; - - skb->protocol = htons(ETH_P_IPV6); - skb->pkt_type = PACKET_HOST; - - rcu_read_lock(); - list_for_each_entry_rcu(entry, &lowpan_devices, list) - if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) { - skb_cp = skb_copy(skb, GFP_ATOMIC); - if (!skb_cp) { - kfree_skb(skb); - rcu_read_unlock(); - return NET_RX_DROP; - } - - skb_cp->dev = entry->ldev; - stat = netif_rx(skb_cp); - if (stat == NET_RX_DROP) - break; - } - rcu_read_unlock(); - - consume_skb(skb); - - return stat; -} - -static int -iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr) -{ - u8 iphc0, iphc1; - struct ieee802154_addr_sa sa, da; - void *sap, *dap; - - raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); - /* at least two bytes will be used for the encoding */ - if (skb->len < 2) - return -EINVAL; - - if (lowpan_fetch_skb_u8(skb, &iphc0)) - return -EINVAL; - - if (lowpan_fetch_skb_u8(skb, &iphc1)) - return -EINVAL; - - ieee802154_addr_to_sa(&sa, &hdr->source); - ieee802154_addr_to_sa(&da, &hdr->dest); - - if (sa.addr_type == IEEE802154_ADDR_SHORT) - sap = &sa.short_addr; - else - sap = &sa.hwaddr; - - if (da.addr_type == IEEE802154_ADDR_SHORT) - dap = &da.short_addr; - else - dap = &da.hwaddr; - - return lowpan_header_decompress(skb, skb->dev, sap, sa.addr_type, - IEEE802154_ADDR_LEN, dap, da.addr_type, - IEEE802154_ADDR_LEN, iphc0, iphc1); -} - -static struct sk_buff* -lowpan_alloc_frag(struct sk_buff *skb, int size, - const struct ieee802154_hdr *master_hdr) -{ - struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev; - struct sk_buff *frag; - int rc; - - frag = alloc_skb(real_dev->hard_header_len + - real_dev->needed_tailroom + size, - GFP_ATOMIC); - - if (likely(frag)) { - frag->dev = real_dev; - frag->priority = skb->priority; - skb_reserve(frag, real_dev->hard_header_len); - skb_reset_network_header(frag); - *mac_cb(frag) = *mac_cb(skb); - - rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest, - &master_hdr->source, size); - if (rc < 0) { - kfree_skb(frag); - return ERR_PTR(rc); - } - } else { - frag = ERR_PTR(-ENOMEM); - } - - return frag; -} - -static int -lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr, - u8 *frag_hdr, int frag_hdrlen, - int offset, int len) -{ - struct sk_buff *frag; - - raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen); - - frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr); - if (IS_ERR(frag)) - return -PTR_ERR(frag); - - memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen); - memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len); - - raw_dump_table(__func__, " fragment dump", frag->data, frag->len); - - return dev_queue_xmit(frag); -} - -static int -lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev, - const struct ieee802154_hdr *wpan_hdr) -{ - u16 dgram_size, dgram_offset; - __be16 frag_tag; - u8 frag_hdr[5]; - int frag_cap, frag_len, payload_cap, rc; - int skb_unprocessed, skb_offset; - - dgram_size = lowpan_uncompress_size(skb, &dgram_offset) - - skb->mac_len; - frag_tag = htons(lowpan_dev_info(dev)->fragment_tag); - lowpan_dev_info(dev)->fragment_tag++; - - frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07); - frag_hdr[1] = dgram_size & 0xff; - memcpy(frag_hdr + 2, &frag_tag, sizeof(frag_tag)); - - payload_cap = ieee802154_max_payload(wpan_hdr); - - frag_len = round_down(payload_cap - LOWPAN_FRAG1_HEAD_SIZE - - skb_network_header_len(skb), 8); - - skb_offset = skb_network_header_len(skb); - skb_unprocessed = skb->len - skb->mac_len - skb_offset; - - rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, - LOWPAN_FRAG1_HEAD_SIZE, 0, - frag_len + skb_network_header_len(skb)); - if (rc) { - pr_debug("%s unable to send FRAG1 packet (tag: %d)", - __func__, ntohs(frag_tag)); - goto err; - } - - frag_hdr[0] &= ~LOWPAN_DISPATCH_FRAG1; - frag_hdr[0] |= LOWPAN_DISPATCH_FRAGN; - frag_cap = round_down(payload_cap - LOWPAN_FRAGN_HEAD_SIZE, 8); - - do { - dgram_offset += frag_len; - skb_offset += frag_len; - skb_unprocessed -= frag_len; - frag_len = min(frag_cap, skb_unprocessed); - - frag_hdr[4] = dgram_offset >> 3; - - rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, - LOWPAN_FRAGN_HEAD_SIZE, skb_offset, - frag_len); - if (rc) { - pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n", - __func__, ntohs(frag_tag), skb_offset); - goto err; - } - } while (skb_unprocessed > frag_cap); - - consume_skb(skb); - return NET_XMIT_SUCCESS; - -err: - kfree_skb(skb); - return rc; -} - -static int lowpan_header(struct sk_buff *skb, struct net_device *dev) -{ - struct ieee802154_addr sa, da; - struct ieee802154_mac_cb *cb = mac_cb_init(skb); - struct lowpan_addr_info info; - void *daddr, *saddr; - - memcpy(&info, lowpan_skb_priv(skb), sizeof(info)); - - /* TODO: Currently we only support extended_addr */ - daddr = &info.daddr.u.extended_addr; - saddr = &info.saddr.u.extended_addr; - - lowpan_header_compress(skb, dev, ETH_P_IPV6, daddr, saddr, skb->len); - - cb->type = IEEE802154_FC_TYPE_DATA; - - /* prepare wpan address data */ - sa.mode = IEEE802154_ADDR_LONG; - sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - sa.extended_addr = ieee802154_devaddr_from_raw(saddr); - - /* intra-PAN communications */ - da.pan_id = sa.pan_id; - - /* if the destination address is the broadcast address, use the - * corresponding short address - */ - if (lowpan_is_addr_broadcast((const u8 *)daddr)) { - da.mode = IEEE802154_ADDR_SHORT; - da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); - cb->ackreq = false; - } else { - da.mode = IEEE802154_ADDR_LONG; - da.extended_addr = ieee802154_devaddr_from_raw(daddr); - cb->ackreq = true; - } - - return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, - ETH_P_IPV6, (void *)&da, (void *)&sa, 0); -} - -static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct ieee802154_hdr wpan_hdr; - int max_single, ret; - - pr_debug("package xmit\n"); - - /* We must take a copy of the skb before we modify/replace the ipv6 - * header as the header could be used elsewhere - */ - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) - return NET_XMIT_DROP; - - ret = lowpan_header(skb, dev); - if (ret < 0) { - kfree_skb(skb); - return NET_XMIT_DROP; - } - - if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) { - kfree_skb(skb); - return NET_XMIT_DROP; - } - - max_single = ieee802154_max_payload(&wpan_hdr); - - if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) { - skb->dev = lowpan_dev_info(dev)->real_dev; - return dev_queue_xmit(skb); - } else { - netdev_tx_t rc; - - pr_debug("frame is too big, fragmentation is needed\n"); - rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr); - - return rc < 0 ? NET_XMIT_DROP : rc; - } -} - -static __le16 lowpan_get_pan_id(const struct net_device *dev) -{ - struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; - - return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev); -} - -static __le16 lowpan_get_short_addr(const struct net_device *dev) -{ - struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; - - return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); -} - -static u8 lowpan_get_dsn(const struct net_device *dev) -{ - struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; - - return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev); -} - -static struct header_ops lowpan_header_ops = { - .create = lowpan_header_create, -}; - -static struct lock_class_key lowpan_tx_busylock; -static struct lock_class_key lowpan_netdev_xmit_lock_key; - -static void lowpan_set_lockdep_class_one(struct net_device *dev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, - &lowpan_netdev_xmit_lock_key); -} - -static int lowpan_dev_init(struct net_device *dev) -{ - netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL); - dev->qdisc_tx_busylock = &lowpan_tx_busylock; - return 0; -} - -static const struct net_device_ops lowpan_netdev_ops = { - .ndo_init = lowpan_dev_init, - .ndo_start_xmit = lowpan_xmit, -}; - -static struct ieee802154_mlme_ops lowpan_mlme = { - .get_pan_id = lowpan_get_pan_id, - .get_short_addr = lowpan_get_short_addr, - .get_dsn = lowpan_get_dsn, -}; - -static void lowpan_setup(struct net_device *dev) -{ - dev->addr_len = IEEE802154_ADDR_LEN; - memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN); - dev->type = ARPHRD_IEEE802154; - /* Frame Control + Sequence Number + Address fields + Security Header */ - dev->hard_header_len = 2 + 1 + 20 + 14; - dev->needed_tailroom = 2; /* FCS */ - dev->mtu = IPV6_MIN_MTU; - dev->tx_queue_len = 0; - dev->flags = IFF_BROADCAST | IFF_MULTICAST; - dev->watchdog_timeo = 0; - - dev->netdev_ops = &lowpan_netdev_ops; - dev->header_ops = &lowpan_header_ops; - dev->ml_priv = &lowpan_mlme; - dev->destructor = free_netdev; -} - -static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) -{ - if (tb[IFLA_ADDRESS]) { - if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN) - return -EINVAL; - } - return 0; -} - -static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - struct ieee802154_hdr hdr; - int ret; - - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - goto drop; - - if (!netif_running(dev)) - goto drop_skb; - - if (skb->pkt_type == PACKET_OTHERHOST) - goto drop_skb; - - if (dev->type != ARPHRD_IEEE802154) - goto drop_skb; - - if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) - goto drop_skb; - - /* check that it's our buffer */ - if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { - /* Pull off the 1-byte of 6lowpan header. */ - skb_pull(skb, 1); - return lowpan_give_skb_to_devices(skb, NULL); - } else { - switch (skb->data[0] & 0xe0) { - case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ - ret = iphc_decompress(skb, &hdr); - if (ret < 0) - goto drop_skb; - - return lowpan_give_skb_to_devices(skb, NULL); - case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ - ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1); - if (ret == 1) { - ret = iphc_decompress(skb, &hdr); - if (ret < 0) - goto drop_skb; - - return lowpan_give_skb_to_devices(skb, NULL); - } else if (ret == -1) { - return NET_RX_DROP; - } else { - return NET_RX_SUCCESS; - } - case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */ - ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN); - if (ret == 1) { - ret = iphc_decompress(skb, &hdr); - if (ret < 0) - goto drop_skb; - - return lowpan_give_skb_to_devices(skb, NULL); - } else if (ret == -1) { - return NET_RX_DROP; - } else { - return NET_RX_SUCCESS; - } - default: - break; - } - } - -drop_skb: - kfree_skb(skb); -drop: - return NET_RX_DROP; -} - -static struct packet_type lowpan_packet_type = { - .type = htons(ETH_P_IEEE802154), - .func = lowpan_rcv, -}; - -static int lowpan_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) -{ - struct net_device *real_dev; - struct lowpan_dev_record *entry; - int ret; - - ASSERT_RTNL(); - - pr_debug("adding new link\n"); - - if (!tb[IFLA_LINK]) - return -EINVAL; - /* find and hold real wpan device */ - real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); - if (!real_dev) - return -ENODEV; - if (real_dev->type != ARPHRD_IEEE802154) { - dev_put(real_dev); - return -EINVAL; - } - - lowpan_dev_info(dev)->real_dev = real_dev; - mutex_init(&lowpan_dev_info(dev)->dev_list_mtx); - - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) { - dev_put(real_dev); - lowpan_dev_info(dev)->real_dev = NULL; - return -ENOMEM; - } - - entry->ldev = dev; - - /* Set the lowpan hardware address to the wpan hardware address. */ - memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN); - - mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); - INIT_LIST_HEAD(&entry->list); - list_add_tail(&entry->list, &lowpan_devices); - mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); - - ret = register_netdevice(dev); - if (ret >= 0) { - if (!lowpan_open_count) - dev_add_pack(&lowpan_packet_type); - lowpan_open_count++; - } - - return ret; -} - -static void lowpan_dellink(struct net_device *dev, struct list_head *head) -{ - struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev); - struct net_device *real_dev = lowpan_dev->real_dev; - struct lowpan_dev_record *entry, *tmp; - - ASSERT_RTNL(); - - lowpan_open_count--; - if (!lowpan_open_count) - dev_remove_pack(&lowpan_packet_type); - - mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); - list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { - if (entry->ldev == dev) { - list_del(&entry->list); - kfree(entry); - } - } - mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); - - mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx); - - unregister_netdevice_queue(dev, head); - - dev_put(real_dev); -} - -static struct rtnl_link_ops lowpan_link_ops __read_mostly = { - .kind = "lowpan", - .priv_size = sizeof(struct lowpan_dev_info), - .setup = lowpan_setup, - .newlink = lowpan_newlink, - .dellink = lowpan_dellink, - .validate = lowpan_validate, -}; - -static inline int __init lowpan_netlink_init(void) -{ - return rtnl_link_register(&lowpan_link_ops); -} - -static inline void lowpan_netlink_fini(void) -{ - rtnl_link_unregister(&lowpan_link_ops); -} - -static int lowpan_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - LIST_HEAD(del_list); - struct lowpan_dev_record *entry, *tmp; - - if (dev->type != ARPHRD_IEEE802154) - goto out; - - if (event == NETDEV_UNREGISTER) { - list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { - if (lowpan_dev_info(entry->ldev)->real_dev == dev) - lowpan_dellink(entry->ldev, &del_list); - } - - unregister_netdevice_many(&del_list); - } - -out: - return NOTIFY_DONE; -} - -static struct notifier_block lowpan_dev_notifier = { - .notifier_call = lowpan_device_event, -}; - -static int __init lowpan_init_module(void) -{ - int err = 0; - - err = lowpan_net_frag_init(); - if (err < 0) - goto out; - - err = lowpan_netlink_init(); - if (err < 0) - goto out_frag; - - err = register_netdevice_notifier(&lowpan_dev_notifier); - if (err < 0) - goto out_pack; - - return 0; - -out_pack: - lowpan_netlink_fini(); -out_frag: - lowpan_net_frag_exit(); -out: - return err; -} - -static void __exit lowpan_cleanup_module(void) -{ - lowpan_netlink_fini(); - - lowpan_net_frag_exit(); - - unregister_netdevice_notifier(&lowpan_dev_notifier); -} - -module_init(lowpan_init_module); -module_exit(lowpan_cleanup_module); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_RTNL_LINK("lowpan"); diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig index c0d4154d144f..1370d5b0041b 100644 --- a/net/ieee802154/Kconfig +++ b/net/ieee802154/Kconfig @@ -1,4 +1,4 @@ -config IEEE802154 +menuconfig IEEE802154 tristate "IEEE Std 802.15.4 Low-Rate Wireless Personal Area Networks support" ---help--- IEEE Std 802.15.4 defines a low data rate, low power and low @@ -10,8 +10,16 @@ config IEEE802154 Say Y here to compile LR-WPAN support into the kernel or say M to compile it as modules. -config IEEE802154_6LOWPAN - tristate "6lowpan support over IEEE 802.15.4" - depends on IEEE802154 && 6LOWPAN +if IEEE802154 + +config IEEE802154_SOCKET + tristate "IEEE 802.15.4 socket interface" + default y ---help--- - IPv6 compression over IEEE 802.15.4. + Socket interface for IEEE 802.15.4. Contains DGRAM sockets interface + for 802.15.4 dataframes. Also RAW socket interface to build MAC + header from userspace. + +source "net/ieee802154/6lowpan/Kconfig" + +endif diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index 9f6970f2a28b..05dab2957cd4 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,9 +1,9 @@ -obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o -obj-$(CONFIG_IEEE802154_6LOWPAN) += ieee802154_6lowpan.o +obj-$(CONFIG_IEEE802154) += ieee802154.o +obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o +obj-y += 6lowpan/ -ieee802154_6lowpan-y := 6lowpan_rtnl.o reassembly.o ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \ header_ops.o sysfs.o nl802154.o -af_802154-y := af_ieee802154.o raw.o dgram.o +ieee802154_socket-y := socket.o ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/ieee802154/af802154.h b/net/ieee802154/af802154.h deleted file mode 100644 index 343b63e6f953..000000000000 --- a/net/ieee802154/af802154.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Internal interfaces for ieee 802.15.4 address family. - * - * Copyright 2007, 2008, 2009 Siemens AG - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Written by: - * Sergey Lapin <slapin@ossfans.org> - * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> - */ - -#ifndef AF802154_H -#define AF802154_H - -struct sk_buff; -struct net_device; -struct ieee802154_addr; -extern struct proto ieee802154_raw_prot; -extern struct proto ieee802154_dgram_prot; -void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb); -int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb); -struct net_device *ieee802154_get_dev(struct net *net, - const struct ieee802154_addr *addr); - -#endif diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c deleted file mode 100644 index d0a1282cdf43..000000000000 --- a/net/ieee802154/af_ieee802154.c +++ /dev/null @@ -1,369 +0,0 @@ -/* - * IEEE802154.4 socket interface - * - * Copyright 2007, 2008 Siemens AG - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Written by: - * Sergey Lapin <slapin@ossfans.org> - * Maxim Gorbachyov <maxim.gorbachev@siemens.com> - */ - -#include <linux/net.h> -#include <linux/capability.h> -#include <linux/module.h> -#include <linux/if_arp.h> -#include <linux/if.h> -#include <linux/termios.h> /* For TIOCOUTQ/INQ */ -#include <linux/list.h> -#include <linux/slab.h> -#include <net/datalink.h> -#include <net/psnap.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <net/route.h> - -#include <net/af_ieee802154.h> -#include <net/ieee802154_netdev.h> - -#include "af802154.h" - -/* Utility function for families */ -struct net_device* -ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) -{ - struct net_device *dev = NULL; - struct net_device *tmp; - __le16 pan_id, short_addr; - u8 hwaddr[IEEE802154_ADDR_LEN]; - - switch (addr->mode) { - case IEEE802154_ADDR_LONG: - ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr); - rcu_read_lock(); - dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr); - if (dev) - dev_hold(dev); - rcu_read_unlock(); - break; - case IEEE802154_ADDR_SHORT: - if (addr->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST) || - addr->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) || - addr->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) - break; - - rtnl_lock(); - - for_each_netdev(net, tmp) { - if (tmp->type != ARPHRD_IEEE802154) - continue; - - pan_id = ieee802154_mlme_ops(tmp)->get_pan_id(tmp); - short_addr = - ieee802154_mlme_ops(tmp)->get_short_addr(tmp); - - if (pan_id == addr->pan_id && - short_addr == addr->short_addr) { - dev = tmp; - dev_hold(dev); - break; - } - } - - rtnl_unlock(); - break; - default: - pr_warn("Unsupported ieee802154 address type: %d\n", - addr->mode); - break; - } - - return dev; -} - -static int ieee802154_sock_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - - if (sk) { - sock->sk = NULL; - sk->sk_prot->close(sk, 0); - } - return 0; -} - -static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) -{ - struct sock *sk = sock->sk; - - return sk->sk_prot->sendmsg(iocb, sk, msg, len); -} - -static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, - int addr_len) -{ - struct sock *sk = sock->sk; - - if (sk->sk_prot->bind) - return sk->sk_prot->bind(sk, uaddr, addr_len); - - return sock_no_bind(sock, uaddr, addr_len); -} - -static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) -{ - struct sock *sk = sock->sk; - - if (addr_len < sizeof(uaddr->sa_family)) - return -EINVAL; - - if (uaddr->sa_family == AF_UNSPEC) - return sk->sk_prot->disconnect(sk, flags); - - return sk->sk_prot->connect(sk, uaddr, addr_len); -} - -static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, - unsigned int cmd) -{ - struct ifreq ifr; - int ret = -ENOIOCTLCMD; - struct net_device *dev; - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - - ifr.ifr_name[IFNAMSIZ-1] = 0; - - dev_load(sock_net(sk), ifr.ifr_name); - dev = dev_get_by_name(sock_net(sk), ifr.ifr_name); - - if (!dev) - return -ENODEV; - - if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl) - ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd); - - if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) - ret = -EFAULT; - dev_put(dev); - - return ret; -} - -static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd, - unsigned long arg) -{ - struct sock *sk = sock->sk; - - switch (cmd) { - case SIOCGSTAMP: - return sock_get_timestamp(sk, (struct timeval __user *)arg); - case SIOCGSTAMPNS: - return sock_get_timestampns(sk, (struct timespec __user *)arg); - case SIOCGIFADDR: - case SIOCSIFADDR: - return ieee802154_dev_ioctl(sk, (struct ifreq __user *)arg, - cmd); - default: - if (!sk->sk_prot->ioctl) - return -ENOIOCTLCMD; - return sk->sk_prot->ioctl(sk, cmd, arg); - } -} - -static const struct proto_ops ieee802154_raw_ops = { - .family = PF_IEEE802154, - .owner = THIS_MODULE, - .release = ieee802154_sock_release, - .bind = ieee802154_sock_bind, - .connect = ieee802154_sock_connect, - .socketpair = sock_no_socketpair, - .accept = sock_no_accept, - .getname = sock_no_getname, - .poll = datagram_poll, - .ioctl = ieee802154_sock_ioctl, - .listen = sock_no_listen, - .shutdown = sock_no_shutdown, - .setsockopt = sock_common_setsockopt, - .getsockopt = sock_common_getsockopt, - .sendmsg = ieee802154_sock_sendmsg, - .recvmsg = sock_common_recvmsg, - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, -#endif -}; - -static const struct proto_ops ieee802154_dgram_ops = { - .family = PF_IEEE802154, - .owner = THIS_MODULE, - .release = ieee802154_sock_release, - .bind = ieee802154_sock_bind, - .connect = ieee802154_sock_connect, - .socketpair = sock_no_socketpair, - .accept = sock_no_accept, - .getname = sock_no_getname, - .poll = datagram_poll, - .ioctl = ieee802154_sock_ioctl, - .listen = sock_no_listen, - .shutdown = sock_no_shutdown, - .setsockopt = sock_common_setsockopt, - .getsockopt = sock_common_getsockopt, - .sendmsg = ieee802154_sock_sendmsg, - .recvmsg = sock_common_recvmsg, - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, -#endif -}; - -/* Create a socket. Initialise the socket, blank the addresses - * set the state. - */ -static int ieee802154_create(struct net *net, struct socket *sock, - int protocol, int kern) -{ - struct sock *sk; - int rc; - struct proto *proto; - const struct proto_ops *ops; - - if (!net_eq(net, &init_net)) - return -EAFNOSUPPORT; - - switch (sock->type) { - case SOCK_RAW: - proto = &ieee802154_raw_prot; - ops = &ieee802154_raw_ops; - break; - case SOCK_DGRAM: - proto = &ieee802154_dgram_prot; - ops = &ieee802154_dgram_ops; - break; - default: - rc = -ESOCKTNOSUPPORT; - goto out; - } - - rc = -ENOMEM; - sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto); - if (!sk) - goto out; - rc = 0; - - sock->ops = ops; - - sock_init_data(sock, sk); - /* FIXME: sk->sk_destruct */ - sk->sk_family = PF_IEEE802154; - - /* Checksums on by default */ - sock_set_flag(sk, SOCK_ZAPPED); - - if (sk->sk_prot->hash) - sk->sk_prot->hash(sk); - - if (sk->sk_prot->init) { - rc = sk->sk_prot->init(sk); - if (rc) - sk_common_release(sk); - } -out: - return rc; -} - -static const struct net_proto_family ieee802154_family_ops = { - .family = PF_IEEE802154, - .create = ieee802154_create, - .owner = THIS_MODULE, -}; - -static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - if (!netif_running(dev)) - goto drop; - pr_debug("got frame, type %d, dev %p\n", dev->type, dev); -#ifdef DEBUG - print_hex_dump_bytes("ieee802154_rcv ", - DUMP_PREFIX_NONE, skb->data, skb->len); -#endif - - if (!net_eq(dev_net(dev), &init_net)) - goto drop; - - ieee802154_raw_deliver(dev, skb); - - if (dev->type != ARPHRD_IEEE802154) - goto drop; - - if (skb->pkt_type != PACKET_OTHERHOST) - return ieee802154_dgram_deliver(dev, skb); - -drop: - kfree_skb(skb); - return NET_RX_DROP; -} - -static struct packet_type ieee802154_packet_type = { - .type = htons(ETH_P_IEEE802154), - .func = ieee802154_rcv, -}; - -static int __init af_ieee802154_init(void) -{ - int rc = -EINVAL; - - rc = proto_register(&ieee802154_raw_prot, 1); - if (rc) - goto out; - - rc = proto_register(&ieee802154_dgram_prot, 1); - if (rc) - goto err_dgram; - - /* Tell SOCKET that we are alive */ - rc = sock_register(&ieee802154_family_ops); - if (rc) - goto err_sock; - dev_add_pack(&ieee802154_packet_type); - - rc = 0; - goto out; - -err_sock: - proto_unregister(&ieee802154_dgram_prot); -err_dgram: - proto_unregister(&ieee802154_raw_prot); -out: - return rc; -} - -static void __exit af_ieee802154_remove(void) -{ - dev_remove_pack(&ieee802154_packet_type); - sock_unregister(PF_IEEE802154); - proto_unregister(&ieee802154_dgram_prot); - proto_unregister(&ieee802154_raw_prot); -} - -module_init(af_ieee802154_init); -module_exit(af_ieee802154_remove); - -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NETPROTO(PF_IEEE802154); diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c deleted file mode 100644 index d1930b70c4aa..000000000000 --- a/net/ieee802154/dgram.c +++ /dev/null @@ -1,549 +0,0 @@ -/* - * IEEE 802.15.4 dgram socket interface - * - * Copyright 2007, 2008 Siemens AG - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Written by: - * Sergey Lapin <slapin@ossfans.org> - * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> - */ - -#include <linux/capability.h> -#include <linux/net.h> -#include <linux/module.h> -#include <linux/if_arp.h> -#include <linux/list.h> -#include <linux/slab.h> -#include <linux/ieee802154.h> -#include <net/sock.h> -#include <net/af_ieee802154.h> -#include <net/ieee802154_netdev.h> - -#include <asm/ioctls.h> - -#include "af802154.h" - -static HLIST_HEAD(dgram_head); -static DEFINE_RWLOCK(dgram_lock); - -struct dgram_sock { - struct sock sk; - - struct ieee802154_addr src_addr; - struct ieee802154_addr dst_addr; - - unsigned int bound:1; - unsigned int connected:1; - unsigned int want_ack:1; - unsigned int secen:1; - unsigned int secen_override:1; - unsigned int seclevel:3; - unsigned int seclevel_override:1; -}; - -static inline struct dgram_sock *dgram_sk(const struct sock *sk) -{ - return container_of(sk, struct dgram_sock, sk); -} - -static void dgram_hash(struct sock *sk) -{ - write_lock_bh(&dgram_lock); - sk_add_node(sk, &dgram_head); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock_bh(&dgram_lock); -} - -static void dgram_unhash(struct sock *sk) -{ - write_lock_bh(&dgram_lock); - if (sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(&dgram_lock); -} - -static int dgram_init(struct sock *sk) -{ - struct dgram_sock *ro = dgram_sk(sk); - - ro->want_ack = 1; - return 0; -} - -static void dgram_close(struct sock *sk, long timeout) -{ - sk_common_release(sk); -} - -static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) -{ - struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; - struct ieee802154_addr haddr; - struct dgram_sock *ro = dgram_sk(sk); - int err = -EINVAL; - struct net_device *dev; - - lock_sock(sk); - - ro->bound = 0; - - if (len < sizeof(*addr)) - goto out; - - if (addr->family != AF_IEEE802154) - goto out; - - ieee802154_addr_from_sa(&haddr, &addr->addr); - dev = ieee802154_get_dev(sock_net(sk), &haddr); - if (!dev) { - err = -ENODEV; - goto out; - } - - if (dev->type != ARPHRD_IEEE802154) { - err = -ENODEV; - goto out_put; - } - - ro->src_addr = haddr; - - ro->bound = 1; - err = 0; -out_put: - dev_put(dev); -out: - release_sock(sk); - - return err; -} - -static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) -{ - switch (cmd) { - case SIOCOUTQ: - { - int amount = sk_wmem_alloc_get(sk); - - return put_user(amount, (int __user *)arg); - } - - case SIOCINQ: - { - struct sk_buff *skb; - unsigned long amount; - - amount = 0; - spin_lock_bh(&sk->sk_receive_queue.lock); - skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) { - /* We will only return the amount - * of this packet since that is all - * that will be read. - */ - amount = skb->len - ieee802154_hdr_length(skb); - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - return put_user(amount, (int __user *)arg); - } - } - - return -ENOIOCTLCMD; -} - -/* FIXME: autobind */ -static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, - int len) -{ - struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; - struct dgram_sock *ro = dgram_sk(sk); - int err = 0; - - if (len < sizeof(*addr)) - return -EINVAL; - - if (addr->family != AF_IEEE802154) - return -EINVAL; - - lock_sock(sk); - - if (!ro->bound) { - err = -ENETUNREACH; - goto out; - } - - ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr); - ro->connected = 1; - -out: - release_sock(sk); - return err; -} - -static int dgram_disconnect(struct sock *sk, int flags) -{ - struct dgram_sock *ro = dgram_sk(sk); - - lock_sock(sk); - ro->connected = 0; - release_sock(sk); - - return 0; -} - -static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) -{ - struct net_device *dev; - unsigned int mtu; - struct sk_buff *skb; - struct ieee802154_mac_cb *cb; - struct dgram_sock *ro = dgram_sk(sk); - struct ieee802154_addr dst_addr; - int hlen, tlen; - int err; - - if (msg->msg_flags & MSG_OOB) { - pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); - return -EOPNOTSUPP; - } - - if (!ro->connected && !msg->msg_name) - return -EDESTADDRREQ; - else if (ro->connected && msg->msg_name) - return -EISCONN; - - if (!ro->bound) - dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); - else - dev = ieee802154_get_dev(sock_net(sk), &ro->src_addr); - - if (!dev) { - pr_debug("no dev\n"); - err = -ENXIO; - goto out; - } - mtu = dev->mtu; - pr_debug("name = %s, mtu = %u\n", dev->name, mtu); - - if (size > mtu) { - pr_debug("size = %Zu, mtu = %u\n", size, mtu); - err = -EMSGSIZE; - goto out_dev; - } - - hlen = LL_RESERVED_SPACE(dev); - tlen = dev->needed_tailroom; - skb = sock_alloc_send_skb(sk, hlen + tlen + size, - msg->msg_flags & MSG_DONTWAIT, - &err); - if (!skb) - goto out_dev; - - skb_reserve(skb, hlen); - - skb_reset_network_header(skb); - - cb = mac_cb_init(skb); - cb->type = IEEE802154_FC_TYPE_DATA; - cb->ackreq = ro->want_ack; - - if (msg->msg_name) { - DECLARE_SOCKADDR(struct sockaddr_ieee802154*, - daddr, msg->msg_name); - - ieee802154_addr_from_sa(&dst_addr, &daddr->addr); - } else { - dst_addr = ro->dst_addr; - } - - cb->secen = ro->secen; - cb->secen_override = ro->secen_override; - cb->seclevel = ro->seclevel; - cb->seclevel_override = ro->seclevel_override; - - err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr, - ro->bound ? &ro->src_addr : NULL, size); - if (err < 0) - goto out_skb; - - err = memcpy_from_msg(skb_put(skb, size), msg, size); - if (err < 0) - goto out_skb; - - skb->dev = dev; - skb->sk = sk; - skb->protocol = htons(ETH_P_IEEE802154); - - dev_put(dev); - - err = dev_queue_xmit(skb); - if (err > 0) - err = net_xmit_errno(err); - - return err ?: size; - -out_skb: - kfree_skb(skb); -out_dev: - dev_put(dev); -out: - return err; -} - -static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) -{ - size_t copied = 0; - int err = -EOPNOTSUPP; - struct sk_buff *skb; - DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name); - - skb = skb_recv_datagram(sk, flags, noblock, &err); - if (!skb) - goto out; - - copied = skb->len; - if (len < copied) { - msg->msg_flags |= MSG_TRUNC; - copied = len; - } - - /* FIXME: skip headers if necessary ?! */ - err = skb_copy_datagram_msg(skb, 0, msg, copied); - if (err) - goto done; - - sock_recv_ts_and_drops(msg, sk, skb); - - if (saddr) { - saddr->family = AF_IEEE802154; - ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source); - *addr_len = sizeof(*saddr); - } - - if (flags & MSG_TRUNC) - copied = skb->len; -done: - skb_free_datagram(sk, skb); -out: - if (err) - return err; - return copied; -} - -static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb) -{ - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - return NET_RX_DROP; - - if (sock_queue_rcv_skb(sk, skb) < 0) { - kfree_skb(skb); - return NET_RX_DROP; - } - - return NET_RX_SUCCESS; -} - -static inline bool -ieee802154_match_sock(__le64 hw_addr, __le16 pan_id, __le16 short_addr, - struct dgram_sock *ro) -{ - if (!ro->bound) - return true; - - if (ro->src_addr.mode == IEEE802154_ADDR_LONG && - hw_addr == ro->src_addr.extended_addr) - return true; - - if (ro->src_addr.mode == IEEE802154_ADDR_SHORT && - pan_id == ro->src_addr.pan_id && - short_addr == ro->src_addr.short_addr) - return true; - - return false; -} - -int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) -{ - struct sock *sk, *prev = NULL; - int ret = NET_RX_SUCCESS; - __le16 pan_id, short_addr; - __le64 hw_addr; - - /* Data frame processing */ - BUG_ON(dev->type != ARPHRD_IEEE802154); - - pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev); - hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr); - - read_lock(&dgram_lock); - sk_for_each(sk, &dgram_head) { - if (ieee802154_match_sock(hw_addr, pan_id, short_addr, - dgram_sk(sk))) { - if (prev) { - struct sk_buff *clone; - - clone = skb_clone(skb, GFP_ATOMIC); - if (clone) - dgram_rcv_skb(prev, clone); - } - - prev = sk; - } - } - - if (prev) { - dgram_rcv_skb(prev, skb); - } else { - kfree_skb(skb); - ret = NET_RX_DROP; - } - read_unlock(&dgram_lock); - - return ret; -} - -static int dgram_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - struct dgram_sock *ro = dgram_sk(sk); - - int val, len; - - if (level != SOL_IEEE802154) - return -EOPNOTSUPP; - - if (get_user(len, optlen)) - return -EFAULT; - - len = min_t(unsigned int, len, sizeof(int)); - - switch (optname) { - case WPAN_WANTACK: - val = ro->want_ack; - break; - case WPAN_SECURITY: - if (!ro->secen_override) - val = WPAN_SECURITY_DEFAULT; - else if (ro->secen) - val = WPAN_SECURITY_ON; - else - val = WPAN_SECURITY_OFF; - break; - case WPAN_SECURITY_LEVEL: - if (!ro->seclevel_override) - val = WPAN_SECURITY_LEVEL_DEFAULT; - else - val = ro->seclevel; - break; - default: - return -ENOPROTOOPT; - } - - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, &val, len)) - return -EFAULT; - return 0; -} - -static int dgram_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - struct dgram_sock *ro = dgram_sk(sk); - struct net *net = sock_net(sk); - int val; - int err = 0; - - if (optlen < sizeof(int)) - return -EINVAL; - - if (get_user(val, (int __user *)optval)) - return -EFAULT; - - lock_sock(sk); - - switch (optname) { - case WPAN_WANTACK: - ro->want_ack = !!val; - break; - case WPAN_SECURITY: - if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && - !ns_capable(net->user_ns, CAP_NET_RAW)) { - err = -EPERM; - break; - } - - switch (val) { - case WPAN_SECURITY_DEFAULT: - ro->secen_override = 0; - break; - case WPAN_SECURITY_ON: - ro->secen_override = 1; - ro->secen = 1; - break; - case WPAN_SECURITY_OFF: - ro->secen_override = 1; - ro->secen = 0; - break; - default: - err = -EINVAL; - break; - } - break; - case WPAN_SECURITY_LEVEL: - if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && - !ns_capable(net->user_ns, CAP_NET_RAW)) { - err = -EPERM; - break; - } - - if (val < WPAN_SECURITY_LEVEL_DEFAULT || - val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) { - err = -EINVAL; - } else if (val == WPAN_SECURITY_LEVEL_DEFAULT) { - ro->seclevel_override = 0; - } else { - ro->seclevel_override = 1; - ro->seclevel = val; - } - break; - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - -struct proto ieee802154_dgram_prot = { - .name = "IEEE-802.15.4-MAC", - .owner = THIS_MODULE, - .obj_size = sizeof(struct dgram_sock), - .init = dgram_init, - .close = dgram_close, - .bind = dgram_bind, - .sendmsg = dgram_sendmsg, - .recvmsg = dgram_recvmsg, - .hash = dgram_hash, - .unhash = dgram_unhash, - .connect = dgram_connect, - .disconnect = dgram_disconnect, - .ioctl = dgram_ioctl, - .getsockopt = dgram_getsockopt, - .setsockopt = dgram_setsockopt, -}; - diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index fa1464762d0d..c8133c07ceee 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -63,13 +63,9 @@ int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group) struct nlmsghdr *nlh = nlmsg_hdr(msg); void *hdr = genlmsg_data(nlmsg_data(nlh)); - if (genlmsg_end(msg, hdr) < 0) - goto out; + genlmsg_end(msg, hdr); return genlmsg_multicast(&nl802154_family, msg, 0, group, GFP_ATOMIC); -out: - nlmsg_free(msg); - return -ENOBUFS; } struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info, @@ -96,13 +92,9 @@ int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info) struct nlmsghdr *nlh = nlmsg_hdr(msg); void *hdr = genlmsg_data(nlmsg_data(nlh)); - if (genlmsg_end(msg, hdr) < 0) - goto out; + genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); -out: - nlmsg_free(msg); - return -ENOBUFS; } static const struct genl_ops ieee8021154_ops[] = { diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index cd919493c976..9105265920fe 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -121,7 +121,7 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, params.transmit_power) || nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, params.lbt) || nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE, - params.cca_mode) || + params.cca.mode) || nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL, params.cca_ed_level) || nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES, @@ -136,7 +136,8 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, } wpan_phy_put(phy); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: wpan_phy_put(phy); @@ -516,7 +517,7 @@ int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info) params.lbt = nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]); if (info->attrs[IEEE802154_ATTR_CCA_MODE]) - params.cca_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]); + params.cca.mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]); if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) params.cca_ed_level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]); diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 7baf98b14611..1b9d25f6e898 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -65,7 +65,8 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, goto nla_put_failure; mutex_unlock(&phy->pib_lock); kfree(buf); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: mutex_unlock(&phy->pib_lock); diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 889647744697..a4daf91b8d0a 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -209,7 +209,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = { [NL802154_ATTR_TX_POWER] = { .type = NLA_S8, }, - [NL802154_ATTR_CCA_MODE] = { .type = NLA_U8, }, + [NL802154_ATTR_CCA_MODE] = { .type = NLA_U32, }, + [NL802154_ATTR_CCA_OPT] = { .type = NLA_U32, }, [NL802154_ATTR_SUPPORTED_CHANNEL] = { .type = NLA_U32, }, @@ -290,16 +291,23 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev, goto nla_put_failure; /* cca mode */ - if (nla_put_u8(msg, NL802154_ATTR_CCA_MODE, - rdev->wpan_phy.cca_mode)) + if (nla_put_u32(msg, NL802154_ATTR_CCA_MODE, + rdev->wpan_phy.cca.mode)) goto nla_put_failure; + if (rdev->wpan_phy.cca.mode == NL802154_CCA_ENERGY_CARRIER) { + if (nla_put_u32(msg, NL802154_ATTR_CCA_OPT, + rdev->wpan_phy.cca.opt)) + goto nla_put_failure; + } + if (nla_put_s8(msg, NL802154_ATTR_TX_POWER, rdev->wpan_phy.transmit_power)) goto nla_put_failure; finish: - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -482,7 +490,8 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, if (nla_put_u8(msg, NL802154_ATTR_LBT_MODE, wpan_dev->lbt)) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -622,6 +631,31 @@ static int nl802154_set_channel(struct sk_buff *skb, struct genl_info *info) return rdev_set_channel(rdev, page, channel); } +static int nl802154_set_cca_mode(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg802154_registered_device *rdev = info->user_ptr[0]; + struct wpan_phy_cca cca; + + if (!info->attrs[NL802154_ATTR_CCA_MODE]) + return -EINVAL; + + cca.mode = nla_get_u32(info->attrs[NL802154_ATTR_CCA_MODE]); + /* checking 802.15.4 constraints */ + if (cca.mode < NL802154_CCA_ENERGY || cca.mode > NL802154_CCA_ATTR_MAX) + return -EINVAL; + + if (cca.mode == NL802154_CCA_ENERGY_CARRIER) { + if (!info->attrs[NL802154_ATTR_CCA_OPT]) + return -EINVAL; + + cca.opt = nla_get_u32(info->attrs[NL802154_ATTR_CCA_OPT]); + if (cca.opt > NL802154_CCA_OPT_ATTR_MAX) + return -EINVAL; + } + + return rdev_set_cca_mode(rdev, &cca); +} + static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; @@ -895,6 +929,14 @@ static const struct genl_ops nl802154_ops[] = { NL802154_FLAG_NEED_RTNL, }, { + .cmd = NL802154_CMD_SET_CCA_MODE, + .doit = nl802154_set_cca_mode, + .policy = nl802154_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | + NL802154_FLAG_NEED_RTNL, + }, + { .cmd = NL802154_CMD_SET_PAN_ID, .doit = nl802154_set_pan_id, .policy = nl802154_policy, diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c deleted file mode 100644 index 1674b115c891..000000000000 --- a/net/ieee802154/raw.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Raw IEEE 802.15.4 sockets - * - * Copyright 2007, 2008 Siemens AG - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Written by: - * Sergey Lapin <slapin@ossfans.org> - * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> - */ - -#include <linux/net.h> -#include <linux/module.h> -#include <linux/if_arp.h> -#include <linux/list.h> -#include <linux/slab.h> -#include <net/sock.h> -#include <net/af_ieee802154.h> -#include <net/ieee802154_netdev.h> - -#include "af802154.h" - -static HLIST_HEAD(raw_head); -static DEFINE_RWLOCK(raw_lock); - -static void raw_hash(struct sock *sk) -{ - write_lock_bh(&raw_lock); - sk_add_node(sk, &raw_head); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock_bh(&raw_lock); -} - -static void raw_unhash(struct sock *sk) -{ - write_lock_bh(&raw_lock); - if (sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(&raw_lock); -} - -static void raw_close(struct sock *sk, long timeout) -{ - sk_common_release(sk); -} - -static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len) -{ - struct ieee802154_addr addr; - struct sockaddr_ieee802154 *uaddr = (struct sockaddr_ieee802154 *)_uaddr; - int err = 0; - struct net_device *dev = NULL; - - if (len < sizeof(*uaddr)) - return -EINVAL; - - uaddr = (struct sockaddr_ieee802154 *)_uaddr; - if (uaddr->family != AF_IEEE802154) - return -EINVAL; - - lock_sock(sk); - - ieee802154_addr_from_sa(&addr, &uaddr->addr); - dev = ieee802154_get_dev(sock_net(sk), &addr); - if (!dev) { - err = -ENODEV; - goto out; - } - - if (dev->type != ARPHRD_IEEE802154) { - err = -ENODEV; - goto out_put; - } - - sk->sk_bound_dev_if = dev->ifindex; - sk_dst_reset(sk); - -out_put: - dev_put(dev); -out: - release_sock(sk); - - return err; -} - -static int raw_connect(struct sock *sk, struct sockaddr *uaddr, - int addr_len) -{ - return -ENOTSUPP; -} - -static int raw_disconnect(struct sock *sk, int flags) -{ - return 0; -} - -static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) -{ - struct net_device *dev; - unsigned int mtu; - struct sk_buff *skb; - int hlen, tlen; - int err; - - if (msg->msg_flags & MSG_OOB) { - pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); - return -EOPNOTSUPP; - } - - lock_sock(sk); - if (!sk->sk_bound_dev_if) - dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); - else - dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); - release_sock(sk); - - if (!dev) { - pr_debug("no dev\n"); - err = -ENXIO; - goto out; - } - - mtu = dev->mtu; - pr_debug("name = %s, mtu = %u\n", dev->name, mtu); - - if (size > mtu) { - pr_debug("size = %Zu, mtu = %u\n", size, mtu); - err = -EINVAL; - goto out_dev; - } - - hlen = LL_RESERVED_SPACE(dev); - tlen = dev->needed_tailroom; - skb = sock_alloc_send_skb(sk, hlen + tlen + size, - msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) - goto out_dev; - - skb_reserve(skb, hlen); - - skb_reset_mac_header(skb); - skb_reset_network_header(skb); - - err = memcpy_from_msg(skb_put(skb, size), msg, size); - if (err < 0) - goto out_skb; - - skb->dev = dev; - skb->sk = sk; - skb->protocol = htons(ETH_P_IEEE802154); - - dev_put(dev); - - err = dev_queue_xmit(skb); - if (err > 0) - err = net_xmit_errno(err); - - return err ?: size; - -out_skb: - kfree_skb(skb); -out_dev: - dev_put(dev); -out: - return err; -} - -static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) -{ - size_t copied = 0; - int err = -EOPNOTSUPP; - struct sk_buff *skb; - - skb = skb_recv_datagram(sk, flags, noblock, &err); - if (!skb) - goto out; - - copied = skb->len; - if (len < copied) { - msg->msg_flags |= MSG_TRUNC; - copied = len; - } - - err = skb_copy_datagram_msg(skb, 0, msg, copied); - if (err) - goto done; - - sock_recv_ts_and_drops(msg, sk, skb); - - if (flags & MSG_TRUNC) - copied = skb->len; -done: - skb_free_datagram(sk, skb); -out: - if (err) - return err; - return copied; -} - -static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) -{ - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - return NET_RX_DROP; - - if (sock_queue_rcv_skb(sk, skb) < 0) { - kfree_skb(skb); - return NET_RX_DROP; - } - - return NET_RX_SUCCESS; -} - -void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) -{ - struct sock *sk; - - read_lock(&raw_lock); - sk_for_each(sk, &raw_head) { - bh_lock_sock(sk); - if (!sk->sk_bound_dev_if || - sk->sk_bound_dev_if == dev->ifindex) { - struct sk_buff *clone; - - clone = skb_clone(skb, GFP_ATOMIC); - if (clone) - raw_rcv_skb(sk, clone); - } - bh_unlock_sock(sk); - } - read_unlock(&raw_lock); -} - -static int raw_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - return -EOPNOTSUPP; -} - -static int raw_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - return -EOPNOTSUPP; -} - -struct proto ieee802154_raw_prot = { - .name = "IEEE-802.15.4-RAW", - .owner = THIS_MODULE, - .obj_size = sizeof(struct sock), - .close = raw_close, - .bind = raw_bind, - .sendmsg = raw_sendmsg, - .recvmsg = raw_recvmsg, - .hash = raw_hash, - .unhash = raw_unhash, - .connect = raw_connect, - .disconnect = raw_disconnect, - .getsockopt = raw_getsockopt, - .setsockopt = raw_setsockopt, -}; diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h index aff54fbd9264..7c46732fad2b 100644 --- a/net/ieee802154/rdev-ops.h +++ b/net/ieee802154/rdev-ops.h @@ -42,6 +42,13 @@ rdev_set_channel(struct cfg802154_registered_device *rdev, u8 page, u8 channel) } static inline int +rdev_set_cca_mode(struct cfg802154_registered_device *rdev, + const struct wpan_phy_cca *cca) +{ + return rdev->ops->set_cca_mode(&rdev->wpan_phy, cca); +} + +static inline int rdev_set_pan_id(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, __le16 pan_id) { diff --git a/net/ieee802154/reassembly.h b/net/ieee802154/reassembly.h deleted file mode 100644 index 836b16fa001f..000000000000 --- a/net/ieee802154/reassembly.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef __IEEE802154_6LOWPAN_REASSEMBLY_H__ -#define __IEEE802154_6LOWPAN_REASSEMBLY_H__ - -#include <net/inet_frag.h> - -struct lowpan_create_arg { - u16 tag; - u16 d_size; - const struct ieee802154_addr *src; - const struct ieee802154_addr *dst; -}; - -/* Equivalent of ipv4 struct ip - */ -struct lowpan_frag_queue { - struct inet_frag_queue q; - - u16 tag; - u16 d_size; - struct ieee802154_addr saddr; - struct ieee802154_addr daddr; -}; - -static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a) -{ - switch (a->mode) { - case IEEE802154_ADDR_LONG: - return (((__force u64)a->extended_addr) >> 32) ^ - (((__force u64)a->extended_addr) & 0xffffffff); - case IEEE802154_ADDR_SHORT: - return (__force u32)(a->short_addr); - default: - return 0; - } -} - -int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type); -void lowpan_net_frag_exit(void); -int lowpan_net_frag_init(void); - -#endif /* __IEEE802154_6LOWPAN_REASSEMBLY_H__ */ diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c new file mode 100644 index 000000000000..2878d8ca6d3b --- /dev/null +++ b/net/ieee802154/socket.c @@ -0,0 +1,1125 @@ +/* + * IEEE802154.4 socket interface + * + * Copyright 2007, 2008 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Written by: + * Sergey Lapin <slapin@ossfans.org> + * Maxim Gorbachyov <maxim.gorbachev@siemens.com> + */ + +#include <linux/net.h> +#include <linux/capability.h> +#include <linux/module.h> +#include <linux/if_arp.h> +#include <linux/if.h> +#include <linux/termios.h> /* For TIOCOUTQ/INQ */ +#include <linux/list.h> +#include <linux/slab.h> +#include <net/datalink.h> +#include <net/psnap.h> +#include <net/sock.h> +#include <net/tcp_states.h> +#include <net/route.h> + +#include <net/af_ieee802154.h> +#include <net/ieee802154_netdev.h> + +/* Utility function for families */ +static struct net_device* +ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) +{ + struct net_device *dev = NULL; + struct net_device *tmp; + __le16 pan_id, short_addr; + u8 hwaddr[IEEE802154_ADDR_LEN]; + + switch (addr->mode) { + case IEEE802154_ADDR_LONG: + ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr); + rcu_read_lock(); + dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr); + if (dev) + dev_hold(dev); + rcu_read_unlock(); + break; + case IEEE802154_ADDR_SHORT: + if (addr->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST) || + addr->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) || + addr->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) + break; + + rtnl_lock(); + + for_each_netdev(net, tmp) { + if (tmp->type != ARPHRD_IEEE802154) + continue; + + pan_id = ieee802154_mlme_ops(tmp)->get_pan_id(tmp); + short_addr = + ieee802154_mlme_ops(tmp)->get_short_addr(tmp); + + if (pan_id == addr->pan_id && + short_addr == addr->short_addr) { + dev = tmp; + dev_hold(dev); + break; + } + } + + rtnl_unlock(); + break; + default: + pr_warn("Unsupported ieee802154 address type: %d\n", + addr->mode); + break; + } + + return dev; +} + +static int ieee802154_sock_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (sk) { + sock->sk = NULL; + sk->sk_prot->close(sk, 0); + } + return 0; +} + +static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + struct sock *sk = sock->sk; + + return sk->sk_prot->sendmsg(iocb, sk, msg, len); +} + +static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, + int addr_len) +{ + struct sock *sk = sock->sk; + + if (sk->sk_prot->bind) + return sk->sk_prot->bind(sk, uaddr, addr_len); + + return sock_no_bind(sock, uaddr, addr_len); +} + +static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + struct sock *sk = sock->sk; + + if (addr_len < sizeof(uaddr->sa_family)) + return -EINVAL; + + if (uaddr->sa_family == AF_UNSPEC) + return sk->sk_prot->disconnect(sk, flags); + + return sk->sk_prot->connect(sk, uaddr, addr_len); +} + +static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, + unsigned int cmd) +{ + struct ifreq ifr; + int ret = -ENOIOCTLCMD; + struct net_device *dev; + + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; + + ifr.ifr_name[IFNAMSIZ-1] = 0; + + dev_load(sock_net(sk), ifr.ifr_name); + dev = dev_get_by_name(sock_net(sk), ifr.ifr_name); + + if (!dev) + return -ENODEV; + + if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl) + ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd); + + if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) + ret = -EFAULT; + dev_put(dev); + + return ret; +} + +static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + struct sock *sk = sock->sk; + + switch (cmd) { + case SIOCGSTAMP: + return sock_get_timestamp(sk, (struct timeval __user *)arg); + case SIOCGSTAMPNS: + return sock_get_timestampns(sk, (struct timespec __user *)arg); + case SIOCGIFADDR: + case SIOCSIFADDR: + return ieee802154_dev_ioctl(sk, (struct ifreq __user *)arg, + cmd); + default: + if (!sk->sk_prot->ioctl) + return -ENOIOCTLCMD; + return sk->sk_prot->ioctl(sk, cmd, arg); + } +} + +/* RAW Sockets (802.15.4 created in userspace) */ +static HLIST_HEAD(raw_head); +static DEFINE_RWLOCK(raw_lock); + +static void raw_hash(struct sock *sk) +{ + write_lock_bh(&raw_lock); + sk_add_node(sk, &raw_head); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + write_unlock_bh(&raw_lock); +} + +static void raw_unhash(struct sock *sk) +{ + write_lock_bh(&raw_lock); + if (sk_del_node_init(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + write_unlock_bh(&raw_lock); +} + +static void raw_close(struct sock *sk, long timeout) +{ + sk_common_release(sk); +} + +static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len) +{ + struct ieee802154_addr addr; + struct sockaddr_ieee802154 *uaddr = (struct sockaddr_ieee802154 *)_uaddr; + int err = 0; + struct net_device *dev = NULL; + + if (len < sizeof(*uaddr)) + return -EINVAL; + + uaddr = (struct sockaddr_ieee802154 *)_uaddr; + if (uaddr->family != AF_IEEE802154) + return -EINVAL; + + lock_sock(sk); + + ieee802154_addr_from_sa(&addr, &uaddr->addr); + dev = ieee802154_get_dev(sock_net(sk), &addr); + if (!dev) { + err = -ENODEV; + goto out; + } + + if (dev->type != ARPHRD_IEEE802154) { + err = -ENODEV; + goto out_put; + } + + sk->sk_bound_dev_if = dev->ifindex; + sk_dst_reset(sk); + +out_put: + dev_put(dev); +out: + release_sock(sk); + + return err; +} + +static int raw_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + return -ENOTSUPP; +} + +static int raw_disconnect(struct sock *sk, int flags) +{ + return 0; +} + +static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size) +{ + struct net_device *dev; + unsigned int mtu; + struct sk_buff *skb; + int hlen, tlen; + int err; + + if (msg->msg_flags & MSG_OOB) { + pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); + return -EOPNOTSUPP; + } + + lock_sock(sk); + if (!sk->sk_bound_dev_if) + dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); + else + dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); + release_sock(sk); + + if (!dev) { + pr_debug("no dev\n"); + err = -ENXIO; + goto out; + } + + mtu = dev->mtu; + pr_debug("name = %s, mtu = %u\n", dev->name, mtu); + + if (size > mtu) { + pr_debug("size = %Zu, mtu = %u\n", size, mtu); + err = -EINVAL; + goto out_dev; + } + + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; + skb = sock_alloc_send_skb(sk, hlen + tlen + size, + msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + goto out_dev; + + skb_reserve(skb, hlen); + + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + + err = memcpy_from_msg(skb_put(skb, size), msg, size); + if (err < 0) + goto out_skb; + + skb->dev = dev; + skb->sk = sk; + skb->protocol = htons(ETH_P_IEEE802154); + + dev_put(dev); + + err = dev_queue_xmit(skb); + if (err > 0) + err = net_xmit_errno(err); + + return err ?: size; + +out_skb: + kfree_skb(skb); +out_dev: + dev_put(dev); +out: + return err; +} + +static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) +{ + size_t copied = 0; + int err = -EOPNOTSUPP; + struct sk_buff *skb; + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) + goto out; + + copied = skb->len; + if (len < copied) { + msg->msg_flags |= MSG_TRUNC; + copied = len; + } + + err = skb_copy_datagram_msg(skb, 0, msg, copied); + if (err) + goto done; + + sock_recv_ts_and_drops(msg, sk, skb); + + if (flags & MSG_TRUNC) + copied = skb->len; +done: + skb_free_datagram(sk, skb); +out: + if (err) + return err; + return copied; +} + +static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + + if (sock_queue_rcv_skb(sk, skb) < 0) { + kfree_skb(skb); + return NET_RX_DROP; + } + + return NET_RX_SUCCESS; +} + +static void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) +{ + struct sock *sk; + + read_lock(&raw_lock); + sk_for_each(sk, &raw_head) { + bh_lock_sock(sk); + if (!sk->sk_bound_dev_if || + sk->sk_bound_dev_if == dev->ifindex) { + struct sk_buff *clone; + + clone = skb_clone(skb, GFP_ATOMIC); + if (clone) + raw_rcv_skb(sk, clone); + } + bh_unlock_sock(sk); + } + read_unlock(&raw_lock); +} + +static int raw_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + return -EOPNOTSUPP; +} + +static int raw_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen) +{ + return -EOPNOTSUPP; +} + +static struct proto ieee802154_raw_prot = { + .name = "IEEE-802.15.4-RAW", + .owner = THIS_MODULE, + .obj_size = sizeof(struct sock), + .close = raw_close, + .bind = raw_bind, + .sendmsg = raw_sendmsg, + .recvmsg = raw_recvmsg, + .hash = raw_hash, + .unhash = raw_unhash, + .connect = raw_connect, + .disconnect = raw_disconnect, + .getsockopt = raw_getsockopt, + .setsockopt = raw_setsockopt, +}; + +static const struct proto_ops ieee802154_raw_ops = { + .family = PF_IEEE802154, + .owner = THIS_MODULE, + .release = ieee802154_sock_release, + .bind = ieee802154_sock_bind, + .connect = ieee802154_sock_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = datagram_poll, + .ioctl = ieee802154_sock_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, + .sendmsg = ieee802154_sock_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, +#endif +}; + +/* DGRAM Sockets (802.15.4 dataframes) */ +static HLIST_HEAD(dgram_head); +static DEFINE_RWLOCK(dgram_lock); + +struct dgram_sock { + struct sock sk; + + struct ieee802154_addr src_addr; + struct ieee802154_addr dst_addr; + + unsigned int bound:1; + unsigned int connected:1; + unsigned int want_ack:1; + unsigned int secen:1; + unsigned int secen_override:1; + unsigned int seclevel:3; + unsigned int seclevel_override:1; +}; + +static inline struct dgram_sock *dgram_sk(const struct sock *sk) +{ + return container_of(sk, struct dgram_sock, sk); +} + +static void dgram_hash(struct sock *sk) +{ + write_lock_bh(&dgram_lock); + sk_add_node(sk, &dgram_head); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + write_unlock_bh(&dgram_lock); +} + +static void dgram_unhash(struct sock *sk) +{ + write_lock_bh(&dgram_lock); + if (sk_del_node_init(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + write_unlock_bh(&dgram_lock); +} + +static int dgram_init(struct sock *sk) +{ + struct dgram_sock *ro = dgram_sk(sk); + + ro->want_ack = 1; + return 0; +} + +static void dgram_close(struct sock *sk, long timeout) +{ + sk_common_release(sk); +} + +static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) +{ + struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; + struct ieee802154_addr haddr; + struct dgram_sock *ro = dgram_sk(sk); + int err = -EINVAL; + struct net_device *dev; + + lock_sock(sk); + + ro->bound = 0; + + if (len < sizeof(*addr)) + goto out; + + if (addr->family != AF_IEEE802154) + goto out; + + ieee802154_addr_from_sa(&haddr, &addr->addr); + dev = ieee802154_get_dev(sock_net(sk), &haddr); + if (!dev) { + err = -ENODEV; + goto out; + } + + if (dev->type != ARPHRD_IEEE802154) { + err = -ENODEV; + goto out_put; + } + + ro->src_addr = haddr; + + ro->bound = 1; + err = 0; +out_put: + dev_put(dev); +out: + release_sock(sk); + + return err; +} + +static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + switch (cmd) { + case SIOCOUTQ: + { + int amount = sk_wmem_alloc_get(sk); + + return put_user(amount, (int __user *)arg); + } + + case SIOCINQ: + { + struct sk_buff *skb; + unsigned long amount; + + amount = 0; + spin_lock_bh(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); + if (skb) { + /* We will only return the amount + * of this packet since that is all + * that will be read. + */ + amount = skb->len - ieee802154_hdr_length(skb); + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + return put_user(amount, (int __user *)arg); + } + } + + return -ENOIOCTLCMD; +} + +/* FIXME: autobind */ +static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, + int len) +{ + struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; + struct dgram_sock *ro = dgram_sk(sk); + int err = 0; + + if (len < sizeof(*addr)) + return -EINVAL; + + if (addr->family != AF_IEEE802154) + return -EINVAL; + + lock_sock(sk); + + if (!ro->bound) { + err = -ENETUNREACH; + goto out; + } + + ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr); + ro->connected = 1; + +out: + release_sock(sk); + return err; +} + +static int dgram_disconnect(struct sock *sk, int flags) +{ + struct dgram_sock *ro = dgram_sk(sk); + + lock_sock(sk); + ro->connected = 0; + release_sock(sk); + + return 0; +} + +static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size) +{ + struct net_device *dev; + unsigned int mtu; + struct sk_buff *skb; + struct ieee802154_mac_cb *cb; + struct dgram_sock *ro = dgram_sk(sk); + struct ieee802154_addr dst_addr; + int hlen, tlen; + int err; + + if (msg->msg_flags & MSG_OOB) { + pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); + return -EOPNOTSUPP; + } + + if (!ro->connected && !msg->msg_name) + return -EDESTADDRREQ; + else if (ro->connected && msg->msg_name) + return -EISCONN; + + if (!ro->bound) + dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); + else + dev = ieee802154_get_dev(sock_net(sk), &ro->src_addr); + + if (!dev) { + pr_debug("no dev\n"); + err = -ENXIO; + goto out; + } + mtu = dev->mtu; + pr_debug("name = %s, mtu = %u\n", dev->name, mtu); + + if (size > mtu) { + pr_debug("size = %Zu, mtu = %u\n", size, mtu); + err = -EMSGSIZE; + goto out_dev; + } + + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; + skb = sock_alloc_send_skb(sk, hlen + tlen + size, + msg->msg_flags & MSG_DONTWAIT, + &err); + if (!skb) + goto out_dev; + + skb_reserve(skb, hlen); + + skb_reset_network_header(skb); + + cb = mac_cb_init(skb); + cb->type = IEEE802154_FC_TYPE_DATA; + cb->ackreq = ro->want_ack; + + if (msg->msg_name) { + DECLARE_SOCKADDR(struct sockaddr_ieee802154*, + daddr, msg->msg_name); + + ieee802154_addr_from_sa(&dst_addr, &daddr->addr); + } else { + dst_addr = ro->dst_addr; + } + + cb->secen = ro->secen; + cb->secen_override = ro->secen_override; + cb->seclevel = ro->seclevel; + cb->seclevel_override = ro->seclevel_override; + + err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr, + ro->bound ? &ro->src_addr : NULL, size); + if (err < 0) + goto out_skb; + + err = memcpy_from_msg(skb_put(skb, size), msg, size); + if (err < 0) + goto out_skb; + + skb->dev = dev; + skb->sk = sk; + skb->protocol = htons(ETH_P_IEEE802154); + + dev_put(dev); + + err = dev_queue_xmit(skb); + if (err > 0) + err = net_xmit_errno(err); + + return err ?: size; + +out_skb: + kfree_skb(skb); +out_dev: + dev_put(dev); +out: + return err; +} + +static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) +{ + size_t copied = 0; + int err = -EOPNOTSUPP; + struct sk_buff *skb; + DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name); + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) + goto out; + + copied = skb->len; + if (len < copied) { + msg->msg_flags |= MSG_TRUNC; + copied = len; + } + + /* FIXME: skip headers if necessary ?! */ + err = skb_copy_datagram_msg(skb, 0, msg, copied); + if (err) + goto done; + + sock_recv_ts_and_drops(msg, sk, skb); + + if (saddr) { + saddr->family = AF_IEEE802154; + ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source); + *addr_len = sizeof(*saddr); + } + + if (flags & MSG_TRUNC) + copied = skb->len; +done: + skb_free_datagram(sk, skb); +out: + if (err) + return err; + return copied; +} + +static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + + if (sock_queue_rcv_skb(sk, skb) < 0) { + kfree_skb(skb); + return NET_RX_DROP; + } + + return NET_RX_SUCCESS; +} + +static inline bool +ieee802154_match_sock(__le64 hw_addr, __le16 pan_id, __le16 short_addr, + struct dgram_sock *ro) +{ + if (!ro->bound) + return true; + + if (ro->src_addr.mode == IEEE802154_ADDR_LONG && + hw_addr == ro->src_addr.extended_addr) + return true; + + if (ro->src_addr.mode == IEEE802154_ADDR_SHORT && + pan_id == ro->src_addr.pan_id && + short_addr == ro->src_addr.short_addr) + return true; + + return false; +} + +static int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) +{ + struct sock *sk, *prev = NULL; + int ret = NET_RX_SUCCESS; + __le16 pan_id, short_addr; + __le64 hw_addr; + + /* Data frame processing */ + BUG_ON(dev->type != ARPHRD_IEEE802154); + + pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev); + hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr); + + read_lock(&dgram_lock); + sk_for_each(sk, &dgram_head) { + if (ieee802154_match_sock(hw_addr, pan_id, short_addr, + dgram_sk(sk))) { + if (prev) { + struct sk_buff *clone; + + clone = skb_clone(skb, GFP_ATOMIC); + if (clone) + dgram_rcv_skb(prev, clone); + } + + prev = sk; + } + } + + if (prev) { + dgram_rcv_skb(prev, skb); + } else { + kfree_skb(skb); + ret = NET_RX_DROP; + } + read_unlock(&dgram_lock); + + return ret; +} + +static int dgram_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct dgram_sock *ro = dgram_sk(sk); + + int val, len; + + if (level != SOL_IEEE802154) + return -EOPNOTSUPP; + + if (get_user(len, optlen)) + return -EFAULT; + + len = min_t(unsigned int, len, sizeof(int)); + + switch (optname) { + case WPAN_WANTACK: + val = ro->want_ack; + break; + case WPAN_SECURITY: + if (!ro->secen_override) + val = WPAN_SECURITY_DEFAULT; + else if (ro->secen) + val = WPAN_SECURITY_ON; + else + val = WPAN_SECURITY_OFF; + break; + case WPAN_SECURITY_LEVEL: + if (!ro->seclevel_override) + val = WPAN_SECURITY_LEVEL_DEFAULT; + else + val = ro->seclevel; + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + return 0; +} + +static int dgram_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct dgram_sock *ro = dgram_sk(sk); + struct net *net = sock_net(sk); + int val; + int err = 0; + + if (optlen < sizeof(int)) + return -EINVAL; + + if (get_user(val, (int __user *)optval)) + return -EFAULT; + + lock_sock(sk); + + switch (optname) { + case WPAN_WANTACK: + ro->want_ack = !!val; + break; + case WPAN_SECURITY: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && + !ns_capable(net->user_ns, CAP_NET_RAW)) { + err = -EPERM; + break; + } + + switch (val) { + case WPAN_SECURITY_DEFAULT: + ro->secen_override = 0; + break; + case WPAN_SECURITY_ON: + ro->secen_override = 1; + ro->secen = 1; + break; + case WPAN_SECURITY_OFF: + ro->secen_override = 1; + ro->secen = 0; + break; + default: + err = -EINVAL; + break; + } + break; + case WPAN_SECURITY_LEVEL: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && + !ns_capable(net->user_ns, CAP_NET_RAW)) { + err = -EPERM; + break; + } + + if (val < WPAN_SECURITY_LEVEL_DEFAULT || + val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) { + err = -EINVAL; + } else if (val == WPAN_SECURITY_LEVEL_DEFAULT) { + ro->seclevel_override = 0; + } else { + ro->seclevel_override = 1; + ro->seclevel = val; + } + break; + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static struct proto ieee802154_dgram_prot = { + .name = "IEEE-802.15.4-MAC", + .owner = THIS_MODULE, + .obj_size = sizeof(struct dgram_sock), + .init = dgram_init, + .close = dgram_close, + .bind = dgram_bind, + .sendmsg = dgram_sendmsg, + .recvmsg = dgram_recvmsg, + .hash = dgram_hash, + .unhash = dgram_unhash, + .connect = dgram_connect, + .disconnect = dgram_disconnect, + .ioctl = dgram_ioctl, + .getsockopt = dgram_getsockopt, + .setsockopt = dgram_setsockopt, +}; + +static const struct proto_ops ieee802154_dgram_ops = { + .family = PF_IEEE802154, + .owner = THIS_MODULE, + .release = ieee802154_sock_release, + .bind = ieee802154_sock_bind, + .connect = ieee802154_sock_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = datagram_poll, + .ioctl = ieee802154_sock_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, + .sendmsg = ieee802154_sock_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, +#endif +}; + +/* Create a socket. Initialise the socket, blank the addresses + * set the state. + */ +static int ieee802154_create(struct net *net, struct socket *sock, + int protocol, int kern) +{ + struct sock *sk; + int rc; + struct proto *proto; + const struct proto_ops *ops; + + if (!net_eq(net, &init_net)) + return -EAFNOSUPPORT; + + switch (sock->type) { + case SOCK_RAW: + proto = &ieee802154_raw_prot; + ops = &ieee802154_raw_ops; + break; + case SOCK_DGRAM: + proto = &ieee802154_dgram_prot; + ops = &ieee802154_dgram_ops; + break; + default: + rc = -ESOCKTNOSUPPORT; + goto out; + } + + rc = -ENOMEM; + sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto); + if (!sk) + goto out; + rc = 0; + + sock->ops = ops; + + sock_init_data(sock, sk); + /* FIXME: sk->sk_destruct */ + sk->sk_family = PF_IEEE802154; + + /* Checksums on by default */ + sock_set_flag(sk, SOCK_ZAPPED); + + if (sk->sk_prot->hash) + sk->sk_prot->hash(sk); + + if (sk->sk_prot->init) { + rc = sk->sk_prot->init(sk); + if (rc) + sk_common_release(sk); + } +out: + return rc; +} + +static const struct net_proto_family ieee802154_family_ops = { + .family = PF_IEEE802154, + .create = ieee802154_create, + .owner = THIS_MODULE, +}; + +static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + if (!netif_running(dev)) + goto drop; + pr_debug("got frame, type %d, dev %p\n", dev->type, dev); +#ifdef DEBUG + print_hex_dump_bytes("ieee802154_rcv ", + DUMP_PREFIX_NONE, skb->data, skb->len); +#endif + + if (!net_eq(dev_net(dev), &init_net)) + goto drop; + + ieee802154_raw_deliver(dev, skb); + + if (dev->type != ARPHRD_IEEE802154) + goto drop; + + if (skb->pkt_type != PACKET_OTHERHOST) + return ieee802154_dgram_deliver(dev, skb); + +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static struct packet_type ieee802154_packet_type = { + .type = htons(ETH_P_IEEE802154), + .func = ieee802154_rcv, +}; + +static int __init af_ieee802154_init(void) +{ + int rc = -EINVAL; + + rc = proto_register(&ieee802154_raw_prot, 1); + if (rc) + goto out; + + rc = proto_register(&ieee802154_dgram_prot, 1); + if (rc) + goto err_dgram; + + /* Tell SOCKET that we are alive */ + rc = sock_register(&ieee802154_family_ops); + if (rc) + goto err_sock; + dev_add_pack(&ieee802154_packet_type); + + rc = 0; + goto out; + +err_sock: + proto_unregister(&ieee802154_dgram_prot); +err_dgram: + proto_unregister(&ieee802154_raw_prot); +out: + return rc; +} + +static void __exit af_ieee802154_remove(void) +{ + dev_remove_pack(&ieee802154_packet_type); + sock_unregister(PF_IEEE802154); + proto_unregister(&ieee802154_dgram_prot); + proto_unregister(&ieee802154_raw_prot); +} + +module_init(af_ieee802154_init); +module_exit(af_ieee802154_remove); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(PF_IEEE802154); diff --git a/net/ieee802154/sysfs.c b/net/ieee802154/sysfs.c index 1613b9c65dfa..dff55c2d87f3 100644 --- a/net/ieee802154/sysfs.c +++ b/net/ieee802154/sysfs.c @@ -68,7 +68,7 @@ static DEVICE_ATTR_RO(name) MASTER_SHOW(current_channel, "%d"); MASTER_SHOW(current_page, "%d"); MASTER_SHOW(transmit_power, "%d +- 1 dB"); -MASTER_SHOW(cca_mode, "%d"); +MASTER_SHOW_COMPLEX(cca_mode, "%d", phy->cca.mode); static ssize_t channels_supported_show(struct device *dev, struct device_attribute *attr, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a44773c8346c..d2e49baaff63 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -395,8 +395,6 @@ int inet_release(struct socket *sock) if (sk) { long timeout; - sock_rps_reset_flow(sk); - /* Applications forget to leave groups before exiting */ ip_mc_drop_socket(sk); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 214882e7d6de..f0b4a31d7bd6 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1522,7 +1522,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, preferred, valid)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -1566,7 +1567,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - RTM_NEWADDR, NLM_F_MULTI) <= 0) { + RTM_NEWADDR, NLM_F_MULTI) < 0) { rcu_read_unlock(); goto done; } @@ -1749,7 +1750,8 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -1881,7 +1883,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) { + -1) < 0) { rcu_read_unlock(); goto done; } @@ -1897,7 +1899,7 @@ cont: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) + -1) < 0) goto done; else h++; @@ -1908,7 +1910,7 @@ cont: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) + -1) < 0) goto done; else h++; @@ -2320,7 +2322,7 @@ static __net_initdata struct pernet_operations devinet_ops = { .exit = devinet_exit_net, }; -static struct rtnl_af_ops inet_af_ops = { +static struct rtnl_af_ops inet_af_ops __read_mostly = { .family = AF_INET, .fill_link_af = inet_fill_link_af, .get_link_af_size = inet_get_link_af_size, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 23104a3f2924..57be71dd6a9e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -67,7 +67,7 @@ static int __net_init fib4_rules_init(struct net *net) return 0; fail: - kfree(local_table); + fib_free_table(local_table); return -ENOMEM; } #else @@ -109,6 +109,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) return tb; } +/* caller must hold either rtnl or rcu read lock */ struct fib_table *fib_get_table(struct net *net, u32 id) { struct fib_table *tb; @@ -119,15 +120,11 @@ struct fib_table *fib_get_table(struct net *net, u32 id) id = RT_TABLE_MAIN; h = id & (FIB_TABLE_HASHSZ - 1); - rcu_read_lock(); head = &net->ipv4.fib_table_hash[h]; hlist_for_each_entry_rcu(tb, head, tb_hlist) { - if (tb->tb_id == id) { - rcu_read_unlock(); + if (tb->tb_id == id) return tb; - } } - rcu_read_unlock(); return NULL; } #endif /* CONFIG_IP_MULTIPLE_TABLES */ @@ -167,16 +164,18 @@ static inline unsigned int __inet_dev_addr_type(struct net *net, if (ipv4_is_multicast(addr)) return RTN_MULTICAST; + rcu_read_lock(); + local_table = fib_get_table(net, RT_TABLE_LOCAL); if (local_table) { ret = RTN_UNICAST; - rcu_read_lock(); if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) { if (!dev || dev == res.fi->fib_dev) ret = res.type; } - rcu_read_unlock(); } + + rcu_read_unlock(); return ret; } @@ -919,7 +918,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) #undef BRD1_OK } -static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) +static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn) { struct fib_result res; @@ -929,6 +928,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) .flowi4_tos = frn->fl_tos, .flowi4_scope = frn->fl_scope, }; + struct fib_table *tb; + + rcu_read_lock(); + + tb = fib_get_table(net, frn->tb_id_in); frn->err = -ENOENT; if (tb) { @@ -945,6 +949,8 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) } local_bh_enable(); } + + rcu_read_unlock(); } static void nl_fib_input(struct sk_buff *skb) @@ -952,7 +958,6 @@ static void nl_fib_input(struct sk_buff *skb) struct net *net; struct fib_result_nl *frn; struct nlmsghdr *nlh; - struct fib_table *tb; u32 portid; net = sock_net(skb->sk); @@ -967,9 +972,7 @@ static void nl_fib_input(struct sk_buff *skb) nlh = nlmsg_hdr(skb); frn = (struct fib_result_nl *) nlmsg_data(nlh); - tb = fib_get_table(net, frn->tb_id_in); - - nl_fib_lookup(frn, tb); + nl_fib_lookup(net, frn); portid = NETLINK_CB(skb).portid; /* netlink portid */ NETLINK_CB(skb).portid = 0; /* from kernel */ diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 1e4f6600b31d..825981b1049a 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -32,7 +32,6 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, unsigned int); void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, const struct nl_info *info, unsigned int nlm_flags); -struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); static inline void fib_result_assign(struct fib_result *res, struct fib_info *fi) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 8f7bd56955b0..d3db718be51d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -81,27 +81,25 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, break; case FR_ACT_UNREACHABLE: - err = -ENETUNREACH; - goto errout; + return -ENETUNREACH; case FR_ACT_PROHIBIT: - err = -EACCES; - goto errout; + return -EACCES; case FR_ACT_BLACKHOLE: default: - err = -EINVAL; - goto errout; + return -EINVAL; } + rcu_read_lock(); + tbl = fib_get_table(rule->fr_net, rule->table); - if (!tbl) - goto errout; + if (tbl) + err = fib_table_lookup(tbl, &flp->u.ip4, + (struct fib_result *)arg->result, + arg->flags); - err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *) arg->result, arg->flags); - if (err > 0) - err = -EAGAIN; -errout: + rcu_read_unlock(); return err; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f99f41bd15b8..1e2090ea663e 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -360,7 +360,8 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) + nla_total_size(4) /* RTA_TABLE */ + nla_total_size(4) /* RTA_DST */ + nla_total_size(4) /* RTA_PRIORITY */ - + nla_total_size(4); /* RTA_PREFSRC */ + + nla_total_size(4) /* RTA_PREFSRC */ + + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ /* space for nested metrics */ payload += nla_total_size((RTAX_MAX * nla_total_size(4))); @@ -410,24 +411,6 @@ errout: rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); } -/* Return the first fib alias matching TOS with - * priority less than or equal to PRIO. - */ -struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) -{ - if (fah) { - struct fib_alias *fa; - list_for_each_entry(fa, fah, fa_list) { - if (fa->fa_tos > tos) - continue; - if (fa->fa_info->fib_priority >= prio || - fa->fa_tos < tos) - return fa; - } - } - return NULL; -} - static int fib_detect_death(struct fib_info *fi, int order, struct fib_info **last_resort, int *last_idx, int dflt) @@ -859,7 +842,16 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (type > RTAX_MAX) goto err_inval; - val = nla_get_u32(nla); + if (type == RTAX_CC_ALGO) { + char tmp[TCP_CA_NAME_MAX]; + + nla_strlcpy(tmp, nla, sizeof(tmp)); + val = tcp_ca_get_key_by_name(tmp); + if (val == TCP_CA_UNSPEC) + goto err_inval; + } else { + val = nla_get_u32(nla); + } if (type == RTAX_ADVMSS && val > 65535 - 40) val = 65535 - 40; if (type == RTAX_MTU && val > 65535 - 15) @@ -1081,7 +1073,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_nest_end(skb, mp); } #endif - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 18bcaf2ff2fd..3daf0224ff2e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -83,28 +83,33 @@ #define MAX_STAT_DEPTH 32 -#define KEYLENGTH (8*sizeof(t_key)) +#define KEYLENGTH (8*sizeof(t_key)) +#define KEY_MAX ((t_key)~0) typedef unsigned int t_key; -#define T_TNODE 0 -#define T_LEAF 1 -#define NODE_TYPE_MASK 0x1UL -#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK) +#define IS_TNODE(n) ((n)->bits) +#define IS_LEAF(n) (!(n)->bits) -#define IS_TNODE(n) (!(n->parent & T_LEAF)) -#define IS_LEAF(n) (n->parent & T_LEAF) +#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> (_kv)->pos) -struct rt_trie_node { - unsigned long parent; - t_key key; -}; - -struct leaf { - unsigned long parent; +struct tnode { t_key key; - struct hlist_head list; + unsigned char bits; /* 2log(KEYLENGTH) bits needed */ + unsigned char pos; /* 2log(KEYLENGTH) bits needed */ + unsigned char slen; + struct tnode __rcu *parent; struct rcu_head rcu; + union { + /* The fields in this struct are valid if bits > 0 (TNODE) */ + struct { + t_key empty_children; /* KEYLENGTH bits needed */ + t_key full_children; /* KEYLENGTH bits needed */ + struct tnode __rcu *child[0]; + }; + /* This list pointer if valid if bits == 0 (LEAF) */ + struct hlist_head list; + }; }; struct leaf_info { @@ -115,20 +120,6 @@ struct leaf_info { struct rcu_head rcu; }; -struct tnode { - unsigned long parent; - t_key key; - unsigned char pos; /* 2log(KEYLENGTH) bits needed */ - unsigned char bits; /* 2log(KEYLENGTH) bits needed */ - unsigned int full_children; /* KEYLENGTH bits needed */ - unsigned int empty_children; /* KEYLENGTH bits needed */ - union { - struct rcu_head rcu; - struct tnode *tnode_free; - }; - struct rt_trie_node __rcu *child[0]; -}; - #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats { unsigned int gets; @@ -151,19 +142,13 @@ struct trie_stat { }; struct trie { - struct rt_trie_node __rcu *trie; + struct tnode __rcu *trie; #ifdef CONFIG_IP_FIB_TRIE_STATS - struct trie_use_stats stats; + struct trie_use_stats __percpu *stats; #endif }; -static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, - int wasfull); -static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); -static struct tnode *inflate(struct trie *t, struct tnode *tn); -static struct tnode *halve(struct trie *t, struct tnode *tn); -/* tnodes to free after resize(); protected by RTNL */ -static struct tnode *tnode_free_head; +static void resize(struct trie *t, struct tnode *tn); static size_t tnode_free_size; /* @@ -176,170 +161,101 @@ static const int sync_pages = 128; static struct kmem_cache *fn_alias_kmem __read_mostly; static struct kmem_cache *trie_leaf_kmem __read_mostly; -/* - * caller must hold RTNL - */ -static inline struct tnode *node_parent(const struct rt_trie_node *node) -{ - unsigned long parent; - - parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held()); +/* caller must hold RTNL */ +#define node_parent(n) rtnl_dereference((n)->parent) - return (struct tnode *)(parent & ~NODE_TYPE_MASK); -} +/* caller must hold RCU read lock or RTNL */ +#define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent) -/* - * caller must hold RCU read lock or RTNL - */ -static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) +/* wrapper for rcu_assign_pointer */ +static inline void node_set_parent(struct tnode *n, struct tnode *tp) { - unsigned long parent; - - parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() || - lockdep_rtnl_is_held()); - - return (struct tnode *)(parent & ~NODE_TYPE_MASK); + if (n) + rcu_assign_pointer(n->parent, tp); } -/* Same as rcu_assign_pointer - * but that macro() assumes that value is a pointer. +#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p) + +/* This provides us with the number of children in this node, in the case of a + * leaf this will return 0 meaning none of the children are accessible. */ -static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) +static inline unsigned long tnode_child_length(const struct tnode *tn) { - smp_wmb(); - node->parent = (unsigned long)ptr | NODE_TYPE(node); + return (1ul << tn->bits) & ~(1ul); } -/* - * caller must hold RTNL - */ -static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i) +/* caller must hold RTNL */ +static inline struct tnode *tnode_get_child(const struct tnode *tn, + unsigned long i) { - BUG_ON(i >= 1U << tn->bits); - return rtnl_dereference(tn->child[i]); } -/* - * caller must hold RCU read lock or RTNL - */ -static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i) +/* caller must hold RCU read lock or RTNL */ +static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn, + unsigned long i) { - BUG_ON(i >= 1U << tn->bits); - return rcu_dereference_rtnl(tn->child[i]); } -static inline int tnode_child_length(const struct tnode *tn) -{ - return 1 << tn->bits; -} - -static inline t_key mask_pfx(t_key k, unsigned int l) -{ - return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); -} - -static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits) -{ - if (offset < KEYLENGTH) - return ((t_key)(a << offset)) >> (KEYLENGTH - bits); - else - return 0; -} - -static inline int tkey_equals(t_key a, t_key b) -{ - return a == b; -} - -static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b) -{ - if (bits == 0 || offset >= KEYLENGTH) - return 1; - bits = bits > KEYLENGTH ? KEYLENGTH : bits; - return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; -} - -static inline int tkey_mismatch(t_key a, int offset, t_key b) -{ - t_key diff = a ^ b; - int i = offset; - - if (!diff) - return 0; - while ((diff << i) >> (KEYLENGTH-1) == 0) - i++; - return i; -} - -/* - To understand this stuff, an understanding of keys and all their bits is - necessary. Every node in the trie has a key associated with it, but not - all of the bits in that key are significant. - - Consider a node 'n' and its parent 'tp'. - - If n is a leaf, every bit in its key is significant. Its presence is - necessitated by path compression, since during a tree traversal (when - searching for a leaf - unless we are doing an insertion) we will completely - ignore all skipped bits we encounter. Thus we need to verify, at the end of - a potentially successful search, that we have indeed been walking the - correct key path. - - Note that we can never "miss" the correct key in the tree if present by - following the wrong path. Path compression ensures that segments of the key - that are the same for all keys with a given prefix are skipped, but the - skipped part *is* identical for each node in the subtrie below the skipped - bit! trie_insert() in this implementation takes care of that - note the - call to tkey_sub_equals() in trie_insert(). - - if n is an internal node - a 'tnode' here, the various parts of its key - have many different meanings. - - Example: - _________________________________________________________________ - | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | - ----------------------------------------------------------------- - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - - _________________________________________________________________ - | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | - ----------------------------------------------------------------- - 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - - tp->pos = 7 - tp->bits = 3 - n->pos = 15 - n->bits = 4 - - First, let's just ignore the bits that come before the parent tp, that is - the bits from 0 to (tp->pos-1). They are *known* but at this point we do - not use them for anything. - - The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the - index into the parent's child array. That is, they will be used to find - 'n' among tp's children. - - The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits - for the node n. - - All the bits we have seen so far are significant to the node n. The rest - of the bits are really not needed or indeed known in n->key. - - The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into - n's child array, and will of course be different for each child. - - - The rest of the bits, from (n->pos + n->bits) onward, are completely unknown - at this point. - -*/ - -static inline void check_tnode(const struct tnode *tn) -{ - WARN_ON(tn && tn->pos+tn->bits > 32); -} +/* To understand this stuff, an understanding of keys and all their bits is + * necessary. Every node in the trie has a key associated with it, but not + * all of the bits in that key are significant. + * + * Consider a node 'n' and its parent 'tp'. + * + * If n is a leaf, every bit in its key is significant. Its presence is + * necessitated by path compression, since during a tree traversal (when + * searching for a leaf - unless we are doing an insertion) we will completely + * ignore all skipped bits we encounter. Thus we need to verify, at the end of + * a potentially successful search, that we have indeed been walking the + * correct key path. + * + * Note that we can never "miss" the correct key in the tree if present by + * following the wrong path. Path compression ensures that segments of the key + * that are the same for all keys with a given prefix are skipped, but the + * skipped part *is* identical for each node in the subtrie below the skipped + * bit! trie_insert() in this implementation takes care of that. + * + * if n is an internal node - a 'tnode' here, the various parts of its key + * have many different meanings. + * + * Example: + * _________________________________________________________________ + * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | + * ----------------------------------------------------------------- + * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 + * + * _________________________________________________________________ + * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | + * ----------------------------------------------------------------- + * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 + * + * tp->pos = 22 + * tp->bits = 3 + * n->pos = 13 + * n->bits = 4 + * + * First, let's just ignore the bits that come before the parent tp, that is + * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this + * point we do not use them for anything. + * + * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the + * index into the parent's child array. That is, they will be used to find + * 'n' among tp's children. + * + * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits + * for the node n. + * + * All the bits we have seen so far are significant to the node n. The rest + * of the bits are really not needed or indeed known in n->key. + * + * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into + * n's child array, and will of course be different for each child. + * + * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown + * at this point. + */ static const int halve_threshold = 25; static const int inflate_threshold = 50; @@ -357,17 +273,23 @@ static inline void alias_free_mem_rcu(struct fib_alias *fa) call_rcu(&fa->rcu, __alias_free_mem); } -static void __leaf_free_rcu(struct rcu_head *head) -{ - struct leaf *l = container_of(head, struct leaf, rcu); - kmem_cache_free(trie_leaf_kmem, l); -} +#define TNODE_KMALLOC_MAX \ + ilog2((PAGE_SIZE - sizeof(struct tnode)) / sizeof(struct tnode *)) -static inline void free_leaf(struct leaf *l) +static void __node_free_rcu(struct rcu_head *head) { - call_rcu(&l->rcu, __leaf_free_rcu); + struct tnode *n = container_of(head, struct tnode, rcu); + + if (IS_LEAF(n)) + kmem_cache_free(trie_leaf_kmem, n); + else if (n->bits <= TNODE_KMALLOC_MAX) + kfree(n); + else + vfree(n); } +#define node_free(n) call_rcu(&n->rcu, __node_free_rcu) + static inline void free_leaf_info(struct leaf_info *leaf) { kfree_rcu(leaf, rcu); @@ -381,56 +303,31 @@ static struct tnode *tnode_alloc(size_t size) return vzalloc(size); } -static void __tnode_free_rcu(struct rcu_head *head) -{ - struct tnode *tn = container_of(head, struct tnode, rcu); - size_t size = sizeof(struct tnode) + - (sizeof(struct rt_trie_node *) << tn->bits); - - if (size <= PAGE_SIZE) - kfree(tn); - else - vfree(tn); -} - -static inline void tnode_free(struct tnode *tn) -{ - if (IS_LEAF(tn)) - free_leaf((struct leaf *) tn); - else - call_rcu(&tn->rcu, __tnode_free_rcu); -} - -static void tnode_free_safe(struct tnode *tn) +static inline void empty_child_inc(struct tnode *n) { - BUG_ON(IS_LEAF(tn)); - tn->tnode_free = tnode_free_head; - tnode_free_head = tn; - tnode_free_size += sizeof(struct tnode) + - (sizeof(struct rt_trie_node *) << tn->bits); + ++n->empty_children ? : ++n->full_children; } -static void tnode_free_flush(void) +static inline void empty_child_dec(struct tnode *n) { - struct tnode *tn; - - while ((tn = tnode_free_head)) { - tnode_free_head = tn->tnode_free; - tn->tnode_free = NULL; - tnode_free(tn); - } - - if (tnode_free_size >= PAGE_SIZE * sync_pages) { - tnode_free_size = 0; - synchronize_rcu(); - } + n->empty_children-- ? : n->full_children--; } -static struct leaf *leaf_new(void) +static struct tnode *leaf_new(t_key key) { - struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); + struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); if (l) { - l->parent = T_LEAF; + l->parent = NULL; + /* set key and pos to reflect full key value + * any trailing zeros in the key should be ignored + * as the nodes are searched + */ + l->key = key; + l->slen = 0; + l->pos = 0; + /* set bits to 0 indicating we are not a tnode */ + l->bits = 0; + INIT_HLIST_HEAD(&l->list); } return l; @@ -449,462 +346,530 @@ static struct leaf_info *leaf_info_new(int plen) static struct tnode *tnode_new(t_key key, int pos, int bits) { - size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits); + size_t sz = offsetof(struct tnode, child[1ul << bits]); struct tnode *tn = tnode_alloc(sz); + unsigned int shift = pos + bits; + + /* verify bits and pos their msb bits clear and values are valid */ + BUG_ON(!bits || (shift > KEYLENGTH)); if (tn) { - tn->parent = T_TNODE; + tn->parent = NULL; + tn->slen = pos; tn->pos = pos; tn->bits = bits; - tn->key = key; - tn->full_children = 0; - tn->empty_children = 1<<bits; + tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; + if (bits == KEYLENGTH) + tn->full_children = 1; + else + tn->empty_children = 1ul << bits; } pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), - sizeof(struct rt_trie_node *) << bits); + sizeof(struct tnode *) << bits); return tn; } -/* - * Check whether a tnode 'n' is "full", i.e. it is an internal node +/* Check whether a tnode 'n' is "full", i.e. it is an internal node * and no bits are skipped. See discussion in dyntree paper p. 6 */ - -static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n) +static inline int tnode_full(const struct tnode *tn, const struct tnode *n) { - if (n == NULL || IS_LEAF(n)) - return 0; - - return ((struct tnode *) n)->pos == tn->pos + tn->bits; + return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n); } -static inline void put_child(struct tnode *tn, int i, - struct rt_trie_node *n) -{ - tnode_put_child_reorg(tn, i, n, -1); -} - - /* - * Add a child at position i overwriting the old value. - * Update the value of full_children and empty_children. - */ - -static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, - int wasfull) +/* Add a child at position i overwriting the old value. + * Update the value of full_children and empty_children. + */ +static void put_child(struct tnode *tn, unsigned long i, struct tnode *n) { - struct rt_trie_node *chi = rtnl_dereference(tn->child[i]); - int isfull; + struct tnode *chi = tnode_get_child(tn, i); + int isfull, wasfull; - BUG_ON(i >= 1<<tn->bits); + BUG_ON(i >= tnode_child_length(tn)); - /* update emptyChildren */ + /* update emptyChildren, overflow into fullChildren */ if (n == NULL && chi != NULL) - tn->empty_children++; - else if (n != NULL && chi == NULL) - tn->empty_children--; + empty_child_inc(tn); + if (n != NULL && chi == NULL) + empty_child_dec(tn); /* update fullChildren */ - if (wasfull == -1) - wasfull = tnode_full(tn, chi); - + wasfull = tnode_full(tn, chi); isfull = tnode_full(tn, n); + if (wasfull && !isfull) tn->full_children--; else if (!wasfull && isfull) tn->full_children++; - if (n) - node_set_parent(n, tn); + if (n && (tn->slen < n->slen)) + tn->slen = n->slen; rcu_assign_pointer(tn->child[i], n); } -#define MAX_WORK 10 -static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) +static void update_children(struct tnode *tn) { - int i; - struct tnode *old_tn; - int inflate_threshold_use; - int halve_threshold_use; - int max_work; + unsigned long i; - if (!tn) - return NULL; + /* update all of the child parent pointers */ + for (i = tnode_child_length(tn); i;) { + struct tnode *inode = tnode_get_child(tn, --i); - pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", - tn, inflate_threshold, halve_threshold); + if (!inode) + continue; - /* No children */ - if (tn->empty_children == tnode_child_length(tn)) { - tnode_free_safe(tn); - return NULL; + /* Either update the children of a tnode that + * already belongs to us or update the child + * to point to ourselves. + */ + if (node_parent(inode) == tn) + update_children(inode); + else + node_set_parent(inode, tn); } - /* One child */ - if (tn->empty_children == tnode_child_length(tn) - 1) - goto one_child; - /* - * Double as long as the resulting node has a number of - * nonempty nodes that are above the threshold. - */ - - /* - * From "Implementing a dynamic compressed trie" by Stefan Nilsson of - * the Helsinki University of Technology and Matti Tikkanen of Nokia - * Telecommunications, page 6: - * "A node is doubled if the ratio of non-empty children to all - * children in the *doubled* node is at least 'high'." - * - * 'high' in this instance is the variable 'inflate_threshold'. It - * is expressed as a percentage, so we multiply it with - * tnode_child_length() and instead of multiplying by 2 (since the - * child array will be doubled by inflate()) and multiplying - * the left-hand side by 100 (to handle the percentage thing) we - * multiply the left-hand side by 50. - * - * The left-hand side may look a bit weird: tnode_child_length(tn) - * - tn->empty_children is of course the number of non-null children - * in the current node. tn->full_children is the number of "full" - * children, that is non-null tnodes with a skip value of 0. - * All of those will be doubled in the resulting inflated tnode, so - * we just count them one extra time here. - * - * A clearer way to write this would be: - * - * to_be_doubled = tn->full_children; - * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - - * tn->full_children; - * - * new_child_length = tnode_child_length(tn) * 2; - * - * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / - * new_child_length; - * if (new_fill_factor >= inflate_threshold) - * - * ...and so on, tho it would mess up the while () loop. - * - * anyway, - * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= - * inflate_threshold - * - * avoid a division: - * 100 * (not_to_be_doubled + 2*to_be_doubled) >= - * inflate_threshold * new_child_length - * - * expand not_to_be_doubled and to_be_doubled, and shorten: - * 100 * (tnode_child_length(tn) - tn->empty_children + - * tn->full_children) >= inflate_threshold * new_child_length - * - * expand new_child_length: - * 100 * (tnode_child_length(tn) - tn->empty_children + - * tn->full_children) >= - * inflate_threshold * tnode_child_length(tn) * 2 - * - * shorten again: - * 50 * (tn->full_children + tnode_child_length(tn) - - * tn->empty_children) >= inflate_threshold * - * tnode_child_length(tn) - * - */ +} - check_tnode(tn); +static inline void put_child_root(struct tnode *tp, struct trie *t, + t_key key, struct tnode *n) +{ + if (tp) + put_child(tp, get_index(key, tp), n); + else + rcu_assign_pointer(t->trie, n); +} - /* Keep root node larger */ +static inline void tnode_free_init(struct tnode *tn) +{ + tn->rcu.next = NULL; +} - if (!node_parent((struct rt_trie_node *)tn)) { - inflate_threshold_use = inflate_threshold_root; - halve_threshold_use = halve_threshold_root; - } else { - inflate_threshold_use = inflate_threshold; - halve_threshold_use = halve_threshold; - } +static inline void tnode_free_append(struct tnode *tn, struct tnode *n) +{ + n->rcu.next = tn->rcu.next; + tn->rcu.next = &n->rcu; +} - max_work = MAX_WORK; - while ((tn->full_children > 0 && max_work-- && - 50 * (tn->full_children + tnode_child_length(tn) - - tn->empty_children) - >= inflate_threshold_use * tnode_child_length(tn))) { +static void tnode_free(struct tnode *tn) +{ + struct callback_head *head = &tn->rcu; - old_tn = tn; - tn = inflate(t, tn); + while (head) { + head = head->next; + tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]); + node_free(tn); - if (IS_ERR(tn)) { - tn = old_tn; -#ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.resize_node_skipped++; -#endif - break; - } + tn = container_of(head, struct tnode, rcu); } - check_tnode(tn); - - /* Return if at least one inflate is run */ - if (max_work != MAX_WORK) - return (struct rt_trie_node *) tn; - - /* - * Halve as long as the number of empty children in this - * node is above threshold. - */ - - max_work = MAX_WORK; - while (tn->bits > 1 && max_work-- && - 100 * (tnode_child_length(tn) - tn->empty_children) < - halve_threshold_use * tnode_child_length(tn)) { - - old_tn = tn; - tn = halve(t, tn); - if (IS_ERR(tn)) { - tn = old_tn; -#ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.resize_node_skipped++; -#endif - break; - } + if (tnode_free_size >= PAGE_SIZE * sync_pages) { + tnode_free_size = 0; + synchronize_rcu(); } +} +static void replace(struct trie *t, struct tnode *oldtnode, struct tnode *tn) +{ + struct tnode *tp = node_parent(oldtnode); + unsigned long i; - /* Only one child remains */ - if (tn->empty_children == tnode_child_length(tn) - 1) { -one_child: - for (i = 0; i < tnode_child_length(tn); i++) { - struct rt_trie_node *n; - - n = rtnl_dereference(tn->child[i]); - if (!n) - continue; - - /* compress one level */ + /* setup the parent pointer out of and back into this node */ + NODE_INIT_PARENT(tn, tp); + put_child_root(tp, t, tn->key, tn); - node_set_parent(n, NULL); - tnode_free_safe(tn); - return n; - } - } - return (struct rt_trie_node *) tn; -} + /* update all of the child parent pointers */ + update_children(tn); + /* all pointers should be clean so we are done */ + tnode_free(oldtnode); -static void tnode_clean_free(struct tnode *tn) -{ - int i; - struct tnode *tofree; + /* resize children now that oldtnode is freed */ + for (i = tnode_child_length(tn); i;) { + struct tnode *inode = tnode_get_child(tn, --i); - for (i = 0; i < tnode_child_length(tn); i++) { - tofree = (struct tnode *)rtnl_dereference(tn->child[i]); - if (tofree) - tnode_free(tofree); + /* resize child node */ + if (tnode_full(tn, inode)) + resize(t, inode); } - tnode_free(tn); } -static struct tnode *inflate(struct trie *t, struct tnode *tn) +static int inflate(struct trie *t, struct tnode *oldtnode) { - struct tnode *oldtnode = tn; - int olen = tnode_child_length(tn); - int i; + struct tnode *tn; + unsigned long i; + t_key m; pr_debug("In inflate\n"); - tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); - + tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1); if (!tn) - return ERR_PTR(-ENOMEM); - - /* - * Preallocate and store tnodes before the actual work so we - * don't get into an inconsistent state if memory allocation - * fails. In case of failure we return the oldnode and inflate - * of tnode is ignored. - */ - - for (i = 0; i < olen; i++) { - struct tnode *inode; - - inode = (struct tnode *) tnode_get_child(oldtnode, i); - if (inode && - IS_TNODE(inode) && - inode->pos == oldtnode->pos + oldtnode->bits && - inode->bits > 1) { - struct tnode *left, *right; - t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos; - - left = tnode_new(inode->key&(~m), inode->pos + 1, - inode->bits - 1); - if (!left) - goto nomem; - - right = tnode_new(inode->key|m, inode->pos + 1, - inode->bits - 1); - - if (!right) { - tnode_free(left); - goto nomem; - } + return -ENOMEM; - put_child(tn, 2*i, (struct rt_trie_node *) left); - put_child(tn, 2*i+1, (struct rt_trie_node *) right); - } - } + /* prepare oldtnode to be freed */ + tnode_free_init(oldtnode); - for (i = 0; i < olen; i++) { - struct tnode *inode; - struct rt_trie_node *node = tnode_get_child(oldtnode, i); - struct tnode *left, *right; - int size, j; + /* Assemble all of the pointers in our cluster, in this case that + * represents all of the pointers out of our allocated nodes that + * point to existing tnodes and the links between our allocated + * nodes. + */ + for (i = tnode_child_length(oldtnode), m = 1u << tn->pos; i;) { + struct tnode *inode = tnode_get_child(oldtnode, --i); + struct tnode *node0, *node1; + unsigned long j, k; /* An empty child */ - if (node == NULL) + if (inode == NULL) continue; /* A leaf or an internal node with skipped bits */ - - if (IS_LEAF(node) || ((struct tnode *) node)->pos > - tn->pos + tn->bits - 1) { - put_child(tn, - tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1), - node); + if (!tnode_full(oldtnode, inode)) { + put_child(tn, get_index(inode->key, tn), inode); continue; } - /* An internal node with two children */ - inode = (struct tnode *) node; + /* drop the node in the old tnode free list */ + tnode_free_append(oldtnode, inode); + /* An internal node with two children */ if (inode->bits == 1) { - put_child(tn, 2*i, rtnl_dereference(inode->child[0])); - put_child(tn, 2*i+1, rtnl_dereference(inode->child[1])); - - tnode_free_safe(inode); + put_child(tn, 2 * i + 1, tnode_get_child(inode, 1)); + put_child(tn, 2 * i, tnode_get_child(inode, 0)); continue; } - /* An internal node with more than two children */ - /* We will replace this node 'inode' with two new - * ones, 'left' and 'right', each with half of the + * ones, 'node0' and 'node1', each with half of the * original children. The two new nodes will have * a position one bit further down the key and this * means that the "significant" part of their keys * (see the discussion near the top of this file) * will differ by one bit, which will be "0" in - * left's key and "1" in right's key. Since we are + * node0's key and "1" in node1's key. Since we are * moving the key position by one step, the bit that * we are moving away from - the bit at position - * (inode->pos) - is the one that will differ between - * left and right. So... we synthesize that bit in the - * two new keys. - * The mask 'm' below will be a single "one" bit at - * the position (inode->pos) + * (tn->pos) - is the one that will differ between + * node0 and node1. So... we synthesize that bit in the + * two new keys. */ + node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1); + if (!node1) + goto nomem; + node0 = tnode_new(inode->key, inode->pos, inode->bits - 1); + + tnode_free_append(tn, node1); + if (!node0) + goto nomem; + tnode_free_append(tn, node0); + + /* populate child pointers in new nodes */ + for (k = tnode_child_length(inode), j = k / 2; j;) { + put_child(node1, --j, tnode_get_child(inode, --k)); + put_child(node0, j, tnode_get_child(inode, j)); + put_child(node1, --j, tnode_get_child(inode, --k)); + put_child(node0, j, tnode_get_child(inode, j)); + } - /* Use the old key, but set the new significant - * bit to zero. - */ + /* link new nodes to parent */ + NODE_INIT_PARENT(node1, tn); + NODE_INIT_PARENT(node0, tn); + + /* link parent to nodes */ + put_child(tn, 2 * i + 1, node1); + put_child(tn, 2 * i, node0); + } + + /* setup the parent pointers into and out of this node */ + replace(t, oldtnode, tn); + + return 0; +nomem: + /* all pointers should be clean so we are done */ + tnode_free(tn); + return -ENOMEM; +} + +static int halve(struct trie *t, struct tnode *oldtnode) +{ + struct tnode *tn; + unsigned long i; + + pr_debug("In halve\n"); - left = (struct tnode *) tnode_get_child(tn, 2*i); - put_child(tn, 2*i, NULL); + tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1); + if (!tn) + return -ENOMEM; - BUG_ON(!left); + /* prepare oldtnode to be freed */ + tnode_free_init(oldtnode); - right = (struct tnode *) tnode_get_child(tn, 2*i+1); - put_child(tn, 2*i+1, NULL); + /* Assemble all of the pointers in our cluster, in this case that + * represents all of the pointers out of our allocated nodes that + * point to existing tnodes and the links between our allocated + * nodes. + */ + for (i = tnode_child_length(oldtnode); i;) { + struct tnode *node1 = tnode_get_child(oldtnode, --i); + struct tnode *node0 = tnode_get_child(oldtnode, --i); + struct tnode *inode; - BUG_ON(!right); + /* At least one of the children is empty */ + if (!node1 || !node0) { + put_child(tn, i / 2, node1 ? : node0); + continue; + } - size = tnode_child_length(left); - for (j = 0; j < size; j++) { - put_child(left, j, rtnl_dereference(inode->child[j])); - put_child(right, j, rtnl_dereference(inode->child[j + size])); + /* Two nonempty children */ + inode = tnode_new(node0->key, oldtnode->pos, 1); + if (!inode) { + tnode_free(tn); + return -ENOMEM; } - put_child(tn, 2*i, resize(t, left)); - put_child(tn, 2*i+1, resize(t, right)); + tnode_free_append(tn, inode); + + /* initialize pointers out of node */ + put_child(inode, 1, node1); + put_child(inode, 0, node0); + NODE_INIT_PARENT(inode, tn); - tnode_free_safe(inode); + /* link parent to node */ + put_child(tn, i / 2, inode); } - tnode_free_safe(oldtnode); - return tn; -nomem: - tnode_clean_free(tn); - return ERR_PTR(-ENOMEM); + + /* setup the parent pointers into and out of this node */ + replace(t, oldtnode, tn); + + return 0; } -static struct tnode *halve(struct trie *t, struct tnode *tn) +static void collapse(struct trie *t, struct tnode *oldtnode) { - struct tnode *oldtnode = tn; - struct rt_trie_node *left, *right; - int i; - int olen = tnode_child_length(tn); + struct tnode *n, *tp; + unsigned long i; - pr_debug("In halve\n"); + /* scan the tnode looking for that one child that might still exist */ + for (n = NULL, i = tnode_child_length(oldtnode); !n && i;) + n = tnode_get_child(oldtnode, --i); - tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); + /* compress one level */ + tp = node_parent(oldtnode); + put_child_root(tp, t, oldtnode->key, n); + node_set_parent(n, tp); - if (!tn) - return ERR_PTR(-ENOMEM); + /* drop dead node */ + node_free(oldtnode); +} - /* - * Preallocate and store tnodes before the actual work so we - * don't get into an inconsistent state if memory allocation - * fails. In case of failure we return the oldnode and halve - * of tnode is ignored. +static unsigned char update_suffix(struct tnode *tn) +{ + unsigned char slen = tn->pos; + unsigned long stride, i; + + /* search though the list of children looking for nodes that might + * have a suffix greater than the one we currently have. This is + * why we start with a stride of 2 since a stride of 1 would + * represent the nodes with suffix length equal to tn->pos */ + for (i = 0, stride = 0x2ul ; i < tnode_child_length(tn); i += stride) { + struct tnode *n = tnode_get_child(tn, i); - for (i = 0; i < olen; i += 2) { - left = tnode_get_child(oldtnode, i); - right = tnode_get_child(oldtnode, i+1); + if (!n || (n->slen <= slen)) + continue; - /* Two nonempty children */ - if (left && right) { - struct tnode *newn; + /* update stride and slen based on new value */ + stride <<= (n->slen - slen); + slen = n->slen; + i &= ~(stride - 1); - newn = tnode_new(left->key, tn->pos + tn->bits, 1); + /* if slen covers all but the last bit we can stop here + * there will be nothing longer than that since only node + * 0 and 1 << (bits - 1) could have that as their suffix + * length. + */ + if ((slen + 1) >= (tn->pos + tn->bits)) + break; + } - if (!newn) - goto nomem; + tn->slen = slen; - put_child(tn, i/2, (struct rt_trie_node *)newn); - } + return slen; +} - } +/* From "Implementing a dynamic compressed trie" by Stefan Nilsson of + * the Helsinki University of Technology and Matti Tikkanen of Nokia + * Telecommunications, page 6: + * "A node is doubled if the ratio of non-empty children to all + * children in the *doubled* node is at least 'high'." + * + * 'high' in this instance is the variable 'inflate_threshold'. It + * is expressed as a percentage, so we multiply it with + * tnode_child_length() and instead of multiplying by 2 (since the + * child array will be doubled by inflate()) and multiplying + * the left-hand side by 100 (to handle the percentage thing) we + * multiply the left-hand side by 50. + * + * The left-hand side may look a bit weird: tnode_child_length(tn) + * - tn->empty_children is of course the number of non-null children + * in the current node. tn->full_children is the number of "full" + * children, that is non-null tnodes with a skip value of 0. + * All of those will be doubled in the resulting inflated tnode, so + * we just count them one extra time here. + * + * A clearer way to write this would be: + * + * to_be_doubled = tn->full_children; + * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - + * tn->full_children; + * + * new_child_length = tnode_child_length(tn) * 2; + * + * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / + * new_child_length; + * if (new_fill_factor >= inflate_threshold) + * + * ...and so on, tho it would mess up the while () loop. + * + * anyway, + * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= + * inflate_threshold + * + * avoid a division: + * 100 * (not_to_be_doubled + 2*to_be_doubled) >= + * inflate_threshold * new_child_length + * + * expand not_to_be_doubled and to_be_doubled, and shorten: + * 100 * (tnode_child_length(tn) - tn->empty_children + + * tn->full_children) >= inflate_threshold * new_child_length + * + * expand new_child_length: + * 100 * (tnode_child_length(tn) - tn->empty_children + + * tn->full_children) >= + * inflate_threshold * tnode_child_length(tn) * 2 + * + * shorten again: + * 50 * (tn->full_children + tnode_child_length(tn) - + * tn->empty_children) >= inflate_threshold * + * tnode_child_length(tn) + * + */ +static bool should_inflate(const struct tnode *tp, const struct tnode *tn) +{ + unsigned long used = tnode_child_length(tn); + unsigned long threshold = used; - for (i = 0; i < olen; i += 2) { - struct tnode *newBinNode; + /* Keep root node larger */ + threshold *= tp ? inflate_threshold : inflate_threshold_root; + used -= tn->empty_children; + used += tn->full_children; - left = tnode_get_child(oldtnode, i); - right = tnode_get_child(oldtnode, i+1); + /* if bits == KEYLENGTH then pos = 0, and will fail below */ - /* At least one of the children is empty */ - if (left == NULL) { - if (right == NULL) /* Both are empty */ - continue; - put_child(tn, i/2, right); - continue; + return (used > 1) && tn->pos && ((50 * used) >= threshold); +} + +static bool should_halve(const struct tnode *tp, const struct tnode *tn) +{ + unsigned long used = tnode_child_length(tn); + unsigned long threshold = used; + + /* Keep root node larger */ + threshold *= tp ? halve_threshold : halve_threshold_root; + used -= tn->empty_children; + + /* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */ + + return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold); +} + +static bool should_collapse(const struct tnode *tn) +{ + unsigned long used = tnode_child_length(tn); + + used -= tn->empty_children; + + /* account for bits == KEYLENGTH case */ + if ((tn->bits == KEYLENGTH) && tn->full_children) + used -= KEY_MAX; + + /* One child or none, time to drop us from the trie */ + return used < 2; +} + +#define MAX_WORK 10 +static void resize(struct trie *t, struct tnode *tn) +{ + struct tnode *tp = node_parent(tn); + struct tnode __rcu **cptr; + int max_work = MAX_WORK; + + pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", + tn, inflate_threshold, halve_threshold); + + /* track the tnode via the pointer from the parent instead of + * doing it ourselves. This way we can let RCU fully do its + * thing without us interfering + */ + cptr = tp ? &tp->child[get_index(tn->key, tp)] : &t->trie; + BUG_ON(tn != rtnl_dereference(*cptr)); + + /* Double as long as the resulting node has a number of + * nonempty nodes that are above the threshold. + */ + while (should_inflate(tp, tn) && max_work) { + if (inflate(t, tn)) { +#ifdef CONFIG_IP_FIB_TRIE_STATS + this_cpu_inc(t->stats->resize_node_skipped); +#endif + break; } - if (right == NULL) { - put_child(tn, i/2, left); - continue; + max_work--; + tn = rtnl_dereference(*cptr); + } + + /* Return if at least one inflate is run */ + if (max_work != MAX_WORK) + return; + + /* Halve as long as the number of empty children in this + * node is above threshold. + */ + while (should_halve(tp, tn) && max_work) { + if (halve(t, tn)) { +#ifdef CONFIG_IP_FIB_TRIE_STATS + this_cpu_inc(t->stats->resize_node_skipped); +#endif + break; } - /* Two nonempty children */ - newBinNode = (struct tnode *) tnode_get_child(tn, i/2); - put_child(tn, i/2, NULL); - put_child(newBinNode, 0, left); - put_child(newBinNode, 1, right); - put_child(tn, i/2, resize(t, newBinNode)); + max_work--; + tn = rtnl_dereference(*cptr); + } + + /* Only one child remains */ + if (should_collapse(tn)) { + collapse(t, tn); + return; + } + + /* Return if at least one deflate was run */ + if (max_work != MAX_WORK) + return; + + /* push the suffix length to the parent node */ + if (tn->slen > tn->pos) { + unsigned char slen = update_suffix(tn); + + if (tp && (slen > tp->slen)) + tp->slen = slen; } - tnode_free_safe(oldtnode); - return tn; -nomem: - tnode_clean_free(tn); - return ERR_PTR(-ENOMEM); } /* readside must use rcu_read_lock currently dump routines via get_fa_head and dump */ -static struct leaf_info *find_leaf_info(struct leaf *l, int plen) +static struct leaf_info *find_leaf_info(struct tnode *l, int plen) { struct hlist_head *head = &l->list; struct leaf_info *li; @@ -916,7 +881,7 @@ static struct leaf_info *find_leaf_info(struct leaf *l, int plen) return NULL; } -static inline struct list_head *get_fa_head(struct leaf *l, int plen) +static inline struct list_head *get_fa_head(struct tnode *l, int plen) { struct leaf_info *li = find_leaf_info(l, plen); @@ -926,8 +891,51 @@ static inline struct list_head *get_fa_head(struct leaf *l, int plen) return &li->falh; } -static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) +static void leaf_pull_suffix(struct tnode *l) +{ + struct tnode *tp = node_parent(l); + + while (tp && (tp->slen > tp->pos) && (tp->slen > l->slen)) { + if (update_suffix(tp) > l->slen) + break; + tp = node_parent(tp); + } +} + +static void leaf_push_suffix(struct tnode *l) +{ + struct tnode *tn = node_parent(l); + + /* if this is a new leaf then tn will be NULL and we can sort + * out parent suffix lengths as a part of trie_rebalance + */ + while (tn && (tn->slen < l->slen)) { + tn->slen = l->slen; + tn = node_parent(tn); + } +} + +static void remove_leaf_info(struct tnode *l, struct leaf_info *old) { + /* record the location of the previous list_info entry */ + struct hlist_node **pprev = old->hlist.pprev; + struct leaf_info *li = hlist_entry(pprev, typeof(*li), hlist.next); + + /* remove the leaf info from the list */ + hlist_del_rcu(&old->hlist); + + /* only access li if it is pointing at the last valid hlist_node */ + if (hlist_empty(&l->list) || (*pprev)) + return; + + /* update the trie with the latest suffix length */ + l->slen = KEYLENGTH - li->plen; + leaf_pull_suffix(l); +} + +static void insert_leaf_info(struct tnode *l, struct leaf_info *new) +{ + struct hlist_head *head = &l->list; struct leaf_info *li = NULL, *last = NULL; if (hlist_empty(head)) { @@ -944,218 +952,174 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) else hlist_add_before_rcu(&new->hlist, &li->hlist); } + + /* if we added to the tail node then we need to update slen */ + if (l->slen < (KEYLENGTH - new->plen)) { + l->slen = KEYLENGTH - new->plen; + leaf_push_suffix(l); + } } /* rcu_read_lock needs to be hold by caller from readside */ +static struct tnode *fib_find_node(struct trie *t, u32 key) +{ + struct tnode *n = rcu_dereference_rtnl(t->trie); + + while (n) { + unsigned long index = get_index(key, n); + + /* This bit of code is a bit tricky but it combines multiple + * checks into a single check. The prefix consists of the + * prefix plus zeros for the bits in the cindex. The index + * is the difference between the key and this value. From + * this we can actually derive several pieces of data. + * if (index & (~0ul << bits)) + * we have a mismatch in skip bits and failed + * else + * we know the value is cindex + */ + if (index & (~0ul << n->bits)) + return NULL; -static struct leaf * -fib_find_node(struct trie *t, u32 key) -{ - int pos; - struct tnode *tn; - struct rt_trie_node *n; + /* we have found a leaf. Prefixes have already been compared */ + if (IS_LEAF(n)) + break; - pos = 0; - n = rcu_dereference_rtnl(t->trie); + n = tnode_get_child_rcu(n, index); + } - while (n != NULL && NODE_TYPE(n) == T_TNODE) { - tn = (struct tnode *) n; + return n; +} - check_tnode(tn); +/* Return the first fib alias matching TOS with + * priority less than or equal to PRIO. + */ +static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) +{ + struct fib_alias *fa; - if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { - pos = tn->pos + tn->bits; - n = tnode_get_child_rcu(tn, - tkey_extract_bits(key, - tn->pos, - tn->bits)); - } else - break; - } - /* Case we have found a leaf. Compare prefixes */ + if (!fah) + return NULL; - if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) - return (struct leaf *)n; + list_for_each_entry(fa, fah, fa_list) { + if (fa->fa_tos > tos) + continue; + if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) + return fa; + } return NULL; } static void trie_rebalance(struct trie *t, struct tnode *tn) { - int wasfull; - t_key cindex, key; struct tnode *tp; - key = tn->key; - - while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { - cindex = tkey_extract_bits(key, tp->pos, tp->bits); - wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); - tn = (struct tnode *)resize(t, tn); - - tnode_put_child_reorg(tp, cindex, - (struct rt_trie_node *)tn, wasfull); - - tp = node_parent((struct rt_trie_node *) tn); - if (!tp) - rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); - - tnode_free_flush(); - if (!tp) - break; + while ((tp = node_parent(tn)) != NULL) { + resize(t, tn); tn = tp; } /* Handle last (top) tnode */ if (IS_TNODE(tn)) - tn = (struct tnode *)resize(t, tn); - - rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); - tnode_free_flush(); + resize(t, tn); } /* only used from updater-side */ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) { - int pos, newpos; - struct tnode *tp = NULL, *tn = NULL; - struct rt_trie_node *n; - struct leaf *l; - int missbit; struct list_head *fa_head = NULL; + struct tnode *l, *n, *tp = NULL; struct leaf_info *li; - t_key cindex; - pos = 0; + li = leaf_info_new(plen); + if (!li) + return NULL; + fa_head = &li->falh; + n = rtnl_dereference(t->trie); /* If we point to NULL, stop. Either the tree is empty and we should * just put a new leaf in if, or we have reached an empty child slot, * and we should just put our new leaf in that. - * If we point to a T_TNODE, check if it matches our key. Note that - * a T_TNODE might be skipping any number of bits - its 'pos' need - * not be the parent's 'pos'+'bits'! - * - * If it does match the current key, get pos/bits from it, extract - * the index from our key, push the T_TNODE and walk the tree. - * - * If it doesn't, we have to replace it with a new T_TNODE. * - * If we point to a T_LEAF, it might or might not have the same key - * as we do. If it does, just change the value, update the T_LEAF's - * value, and return it. - * If it doesn't, we need to replace it with a T_TNODE. + * If we hit a node with a key that does't match then we should stop + * and create a new tnode to replace that node and insert ourselves + * and the other node into the new tnode. */ - - while (n != NULL && NODE_TYPE(n) == T_TNODE) { - tn = (struct tnode *) n; - - check_tnode(tn); - - if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { - tp = tn; - pos = tn->pos + tn->bits; - n = tnode_get_child(tn, - tkey_extract_bits(key, - tn->pos, - tn->bits)); - - BUG_ON(n && node_parent(n) != tn); - } else + while (n) { + unsigned long index = get_index(key, n); + + /* This bit of code is a bit tricky but it combines multiple + * checks into a single check. The prefix consists of the + * prefix plus zeros for the "bits" in the prefix. The index + * is the difference between the key and this value. From + * this we can actually derive several pieces of data. + * if !(index >> bits) + * we know the value is child index + * else + * we have a mismatch in skip bits and failed + */ + if (index >> n->bits) break; - } - /* - * n ----> NULL, LEAF or TNODE - * - * tp is n's (parent) ----> NULL or TNODE - */ - - BUG_ON(tp && IS_LEAF(tp)); - - /* Case 1: n is a leaf. Compare prefixes */ - - if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { - l = (struct leaf *) n; - li = leaf_info_new(plen); - - if (!li) - return NULL; + /* we have found a leaf. Prefixes have already been compared */ + if (IS_LEAF(n)) { + /* Case 1: n is a leaf, and prefixes match*/ + insert_leaf_info(n, li); + return fa_head; + } - fa_head = &li->falh; - insert_leaf_info(&l->list, li); - goto done; + tp = n; + n = tnode_get_child_rcu(n, index); } - l = leaf_new(); - if (!l) - return NULL; - - l->key = key; - li = leaf_info_new(plen); - - if (!li) { - free_leaf(l); + l = leaf_new(key); + if (!l) { + free_leaf_info(li); return NULL; } - fa_head = &li->falh; - insert_leaf_info(&l->list, li); - - if (t->trie && n == NULL) { - /* Case 2: n is NULL, and will just insert a new leaf */ + insert_leaf_info(l, li); - node_set_parent((struct rt_trie_node *)l, tp); - - cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(tp, cindex, (struct rt_trie_node *)l); - } else { - /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ - /* - * Add a new tnode here - * first tnode need some special handling - */ - - if (n) { - pos = tp ? tp->pos+tp->bits : 0; - newpos = tkey_mismatch(key, pos, n->key); - tn = tnode_new(n->key, newpos, 1); - } else { - newpos = 0; - tn = tnode_new(key, newpos, 1); /* First tnode */ - } + /* Case 2: n is a LEAF or a TNODE and the key doesn't match. + * + * Add a new tnode here + * first tnode need some special handling + * leaves us in position for handling as case 3 + */ + if (n) { + struct tnode *tn; + tn = tnode_new(key, __fls(key ^ n->key), 1); if (!tn) { free_leaf_info(li); - free_leaf(l); + node_free(l); return NULL; } - node_set_parent((struct rt_trie_node *)tn, tp); + /* initialize routes out of node */ + NODE_INIT_PARENT(tn, tp); + put_child(tn, get_index(key, tn) ^ 1, n); - missbit = tkey_extract_bits(key, newpos, 1); - put_child(tn, missbit, (struct rt_trie_node *)l); - put_child(tn, 1-missbit, n); - - if (tp) { - cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(tp, cindex, (struct rt_trie_node *)tn); - } else { - rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); - } + /* start adding routes into the node */ + put_child_root(tp, t, key, tn); + node_set_parent(n, tn); + /* parent now has a NULL spot where the leaf can go */ tp = tn; } - if (tp && tp->pos + tp->bits > 32) - pr_warn("fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", - tp, tp->pos, tp->bits, key, plen); - - /* Rebalance the trie */ + /* Case 3: n is NULL, and will just insert a new leaf */ + if (tp) { + NODE_INIT_PARENT(l, tp); + put_child(tp, get_index(key, tp), l); + trie_rebalance(t, tp); + } else { + rcu_assign_pointer(t->trie, l); + } - trie_rebalance(t, tp); -done: return fa_head; } @@ -1172,7 +1136,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) u8 tos = cfg->fc_tos; u32 key, mask; int err; - struct leaf *l; + struct tnode *l; if (plen > 32) return -EINVAL; @@ -1329,18 +1293,130 @@ err: return err; } +static inline t_key prefix_mismatch(t_key key, struct tnode *n) +{ + t_key prefix = n->key; + + return (key ^ prefix) & (prefix | -prefix); +} + /* should be called with rcu_read_lock */ -static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, - t_key key, const struct flowi4 *flp, - struct fib_result *res, int fib_flags) +int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, + struct fib_result *res, int fib_flags) { + struct trie *t = (struct trie *)tb->tb_data; +#ifdef CONFIG_IP_FIB_TRIE_STATS + struct trie_use_stats __percpu *stats = t->stats; +#endif + const t_key key = ntohl(flp->daddr); + struct tnode *n, *pn; struct leaf_info *li; - struct hlist_head *hhead = &l->list; + t_key cindex; + + n = rcu_dereference(t->trie); + if (!n) + return -EAGAIN; + +#ifdef CONFIG_IP_FIB_TRIE_STATS + this_cpu_inc(stats->gets); +#endif + + pn = n; + cindex = 0; + + /* Step 1: Travel to the longest prefix match in the trie */ + for (;;) { + unsigned long index = get_index(key, n); + + /* This bit of code is a bit tricky but it combines multiple + * checks into a single check. The prefix consists of the + * prefix plus zeros for the "bits" in the prefix. The index + * is the difference between the key and this value. From + * this we can actually derive several pieces of data. + * if (index & (~0ul << bits)) + * we have a mismatch in skip bits and failed + * else + * we know the value is cindex + */ + if (index & (~0ul << n->bits)) + break; + + /* we have found a leaf. Prefixes have already been compared */ + if (IS_LEAF(n)) + goto found; + + /* only record pn and cindex if we are going to be chopping + * bits later. Otherwise we are just wasting cycles. + */ + if (n->slen > n->pos) { + pn = n; + cindex = index; + } + + n = tnode_get_child_rcu(n, index); + if (unlikely(!n)) + goto backtrace; + } + + /* Step 2: Sort out leaves and begin backtracing for longest prefix */ + for (;;) { + /* record the pointer where our next node pointer is stored */ + struct tnode __rcu **cptr = n->child; + + /* This test verifies that none of the bits that differ + * between the key and the prefix exist in the region of + * the lsb and higher in the prefix. + */ + if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos)) + goto backtrace; + + /* exit out and process leaf */ + if (unlikely(IS_LEAF(n))) + break; + + /* Don't bother recording parent info. Since we are in + * prefix match mode we will have to come back to wherever + * we started this traversal anyway + */ + + while ((n = rcu_dereference(*cptr)) == NULL) { +backtrace: +#ifdef CONFIG_IP_FIB_TRIE_STATS + if (!n) + this_cpu_inc(stats->null_node_hit); +#endif + /* If we are at cindex 0 there are no more bits for + * us to strip at this level so we must ascend back + * up one level to see if there are any more bits to + * be stripped there. + */ + while (!cindex) { + t_key pkey = pn->key; + + pn = node_parent_rcu(pn); + if (unlikely(!pn)) + return -EAGAIN; +#ifdef CONFIG_IP_FIB_TRIE_STATS + this_cpu_inc(stats->backtrack); +#endif + /* Get Child's index */ + cindex = get_index(pkey, pn); + } + + /* strip the least significant bit from the cindex */ + cindex &= cindex - 1; + + /* grab pointer for next child node */ + cptr = &pn->child[cindex]; + } + } - hlist_for_each_entry_rcu(li, hhead, hlist) { +found: + /* Step 3: Process the leaf, if that fails fall back to backtracing */ + hlist_for_each_entry_rcu(li, &n->list, hlist) { struct fib_alias *fa; - if (l->key != (key & li->mask_plen)) + if ((key ^ n->key) & li->mask_plen) continue; list_for_each_entry_rcu(fa, &li->falh, fa_list) { @@ -1355,9 +1431,9 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, continue; fib_alias_accessed(fa); err = fib_props[fa->fa_type].error; - if (err) { + if (unlikely(err < 0)) { #ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.semantic_match_passed++; + this_cpu_inc(stats->semantic_match_passed); #endif return err; } @@ -1371,241 +1447,48 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) continue; -#ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.semantic_match_passed++; -#endif + if (!(fib_flags & FIB_LOOKUP_NOREF)) + atomic_inc(&fi->fib_clntref); + res->prefixlen = li->plen; res->nh_sel = nhsel; res->type = fa->fa_type; - res->scope = fa->fa_info->fib_scope; + res->scope = fi->fib_scope; res->fi = fi; res->table = tb; res->fa_head = &li->falh; - if (!(fib_flags & FIB_LOOKUP_NOREF)) - atomic_inc(&fi->fib_clntref); - return 0; - } - } - -#ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.semantic_match_miss++; -#endif - } - - return 1; -} - -int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, - struct fib_result *res, int fib_flags) -{ - struct trie *t = (struct trie *) tb->tb_data; - int ret; - struct rt_trie_node *n; - struct tnode *pn; - unsigned int pos, bits; - t_key key = ntohl(flp->daddr); - unsigned int chopped_off; - t_key cindex = 0; - unsigned int current_prefix_length = KEYLENGTH; - struct tnode *cn; - t_key pref_mismatch; - - rcu_read_lock(); - - n = rcu_dereference(t->trie); - if (!n) - goto failed; - #ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.gets++; + this_cpu_inc(stats->semantic_match_passed); #endif - - /* Just a leaf? */ - if (IS_LEAF(n)) { - ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); - goto found; - } - - pn = (struct tnode *) n; - chopped_off = 0; - - while (pn) { - pos = pn->pos; - bits = pn->bits; - - if (!chopped_off) - cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), - pos, bits); - - n = tnode_get_child_rcu(pn, cindex); - - if (n == NULL) { -#ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.null_node_hit++; -#endif - goto backtrace; - } - - if (IS_LEAF(n)) { - ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); - if (ret > 0) - goto backtrace; - goto found; - } - - cn = (struct tnode *)n; - - /* - * It's a tnode, and we can do some extra checks here if we - * like, to avoid descending into a dead-end branch. - * This tnode is in the parent's child array at index - * key[p_pos..p_pos+p_bits] but potentially with some bits - * chopped off, so in reality the index may be just a - * subprefix, padded with zero at the end. - * We can also take a look at any skipped bits in this - * tnode - everything up to p_pos is supposed to be ok, - * and the non-chopped bits of the index (se previous - * paragraph) are also guaranteed ok, but the rest is - * considered unknown. - * - * The skipped bits are key[pos+bits..cn->pos]. - */ - - /* If current_prefix_length < pos+bits, we are already doing - * actual prefix matching, which means everything from - * pos+(bits-chopped_off) onward must be zero along some - * branch of this subtree - otherwise there is *no* valid - * prefix present. Here we can only check the skipped - * bits. Remember, since we have already indexed into the - * parent's child array, we know that the bits we chopped of - * *are* zero. - */ - - /* NOTA BENE: Checking only skipped bits - for the new node here */ - - if (current_prefix_length < pos+bits) { - if (tkey_extract_bits(cn->key, current_prefix_length, - cn->pos - current_prefix_length) - || !(cn->child[0])) - goto backtrace; - } - - /* - * If chopped_off=0, the index is fully validated and we - * only need to look at the skipped bits for this, the new, - * tnode. What we actually want to do is to find out if - * these skipped bits match our key perfectly, or if we will - * have to count on finding a matching prefix further down, - * because if we do, we would like to have some way of - * verifying the existence of such a prefix at this point. - */ - - /* The only thing we can do at this point is to verify that - * any such matching prefix can indeed be a prefix to our - * key, and if the bits in the node we are inspecting that - * do not match our key are not ZERO, this cannot be true. - * Thus, find out where there is a mismatch (before cn->pos) - * and verify that all the mismatching bits are zero in the - * new tnode's key. - */ - - /* - * Note: We aren't very concerned about the piece of - * the key that precede pn->pos+pn->bits, since these - * have already been checked. The bits after cn->pos - * aren't checked since these are by definition - * "unknown" at this point. Thus, what we want to see - * is if we are about to enter the "prefix matching" - * state, and in that case verify that the skipped - * bits that will prevail throughout this subtree are - * zero, as they have to be if we are to find a - * matching prefix. - */ - - pref_mismatch = mask_pfx(cn->key ^ key, cn->pos); - - /* - * In short: If skipped bits in this node do not match - * the search key, enter the "prefix matching" - * state.directly. - */ - if (pref_mismatch) { - /* fls(x) = __fls(x) + 1 */ - int mp = KEYLENGTH - __fls(pref_mismatch) - 1; - - if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0) - goto backtrace; - - if (current_prefix_length >= cn->pos) - current_prefix_length = mp; + return err; + } } - pn = (struct tnode *)n; /* Descend */ - chopped_off = 0; - continue; - -backtrace: - chopped_off++; - - /* As zero don't change the child key (cindex) */ - while ((chopped_off <= pn->bits) - && !(cindex & (1<<(chopped_off-1)))) - chopped_off++; - - /* Decrease current_... with bits chopped off */ - if (current_prefix_length > pn->pos + pn->bits - chopped_off) - current_prefix_length = pn->pos + pn->bits - - chopped_off; - - /* - * Either we do the actual chop off according or if we have - * chopped off all bits in this tnode walk up to our parent. - */ - - if (chopped_off <= pn->bits) { - cindex &= ~(1 << (chopped_off-1)); - } else { - struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn); - if (!parent) - goto failed; - - /* Get Child's index */ - cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits); - pn = parent; - chopped_off = 0; - #ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.backtrack++; + this_cpu_inc(stats->semantic_match_miss); #endif - goto backtrace; - } } -failed: - ret = 1; -found: - rcu_read_unlock(); - return ret; + goto backtrace; } EXPORT_SYMBOL_GPL(fib_table_lookup); /* * Remove the leaf and return parent. */ -static void trie_leaf_remove(struct trie *t, struct leaf *l) +static void trie_leaf_remove(struct trie *t, struct tnode *l) { - struct tnode *tp = node_parent((struct rt_trie_node *) l); + struct tnode *tp = node_parent(l); pr_debug("entering trie_leaf_remove(%p)\n", l); if (tp) { - t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); - put_child(tp, cindex, NULL); + put_child(tp, get_index(l->key, tp), NULL); trie_rebalance(t, tp); - } else + } else { RCU_INIT_POINTER(t->trie, NULL); + } - free_leaf(l); + node_free(l); } /* @@ -1619,7 +1502,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) u8 tos = cfg->fc_tos; struct fib_alias *fa, *fa_to_delete; struct list_head *fa_head; - struct leaf *l; + struct tnode *l; struct leaf_info *li; if (plen > 32) @@ -1684,7 +1567,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) tb->tb_num_default--; if (list_empty(fa_head)) { - hlist_del_rcu(&li->hlist); + remove_leaf_info(l, li); free_leaf_info(li); } @@ -1717,12 +1600,13 @@ static int trie_flush_list(struct list_head *head) return found; } -static int trie_flush_leaf(struct leaf *l) +static int trie_flush_leaf(struct tnode *l) { int found = 0; struct hlist_head *lih = &l->list; struct hlist_node *tmp; struct leaf_info *li = NULL; + unsigned char plen = KEYLENGTH; hlist_for_each_entry_safe(li, tmp, lih, hlist) { found += trie_flush_list(&li->falh); @@ -1730,8 +1614,14 @@ static int trie_flush_leaf(struct leaf *l) if (list_empty(&li->falh)) { hlist_del_rcu(&li->hlist); free_leaf_info(li); + continue; } + + plen = li->plen; } + + l->slen = KEYLENGTH - plen; + return found; } @@ -1739,63 +1629,57 @@ static int trie_flush_leaf(struct leaf *l) * Scan for the next right leaf starting at node p->child[idx] * Since we have back pointer, no recursion necessary. */ -static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) +static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c) { do { - t_key idx; + unsigned long idx = c ? idx = get_index(c->key, p) + 1 : 0; - if (c) - idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1; - else - idx = 0; - - while (idx < 1u << p->bits) { + while (idx < tnode_child_length(p)) { c = tnode_get_child_rcu(p, idx++); if (!c) continue; if (IS_LEAF(c)) - return (struct leaf *) c; + return c; /* Rescan start scanning in new node */ - p = (struct tnode *) c; + p = c; idx = 0; } /* Node empty, walk back up to parent */ - c = (struct rt_trie_node *) p; + c = p; } while ((p = node_parent_rcu(c)) != NULL); return NULL; /* Root of trie */ } -static struct leaf *trie_firstleaf(struct trie *t) +static struct tnode *trie_firstleaf(struct trie *t) { - struct tnode *n = (struct tnode *)rcu_dereference_rtnl(t->trie); + struct tnode *n = rcu_dereference_rtnl(t->trie); if (!n) return NULL; if (IS_LEAF(n)) /* trie is just a leaf */ - return (struct leaf *) n; + return n; return leaf_walk_rcu(n, NULL); } -static struct leaf *trie_nextleaf(struct leaf *l) +static struct tnode *trie_nextleaf(struct tnode *l) { - struct rt_trie_node *c = (struct rt_trie_node *) l; - struct tnode *p = node_parent_rcu(c); + struct tnode *p = node_parent_rcu(l); if (!p) return NULL; /* trie with just one leaf */ - return leaf_walk_rcu(p, c); + return leaf_walk_rcu(p, l); } -static struct leaf *trie_leafindex(struct trie *t, int index) +static struct tnode *trie_leafindex(struct trie *t, int index) { - struct leaf *l = trie_firstleaf(t); + struct tnode *l = trie_firstleaf(t); while (l && index-- > 0) l = trie_nextleaf(l); @@ -1810,19 +1694,28 @@ static struct leaf *trie_leafindex(struct trie *t, int index) int fib_table_flush(struct fib_table *tb) { struct trie *t = (struct trie *) tb->tb_data; - struct leaf *l, *ll = NULL; + struct tnode *l, *ll = NULL; int found = 0; for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) { found += trie_flush_leaf(l); - if (ll && hlist_empty(&ll->list)) - trie_leaf_remove(t, ll); + if (ll) { + if (hlist_empty(&ll->list)) + trie_leaf_remove(t, ll); + else + leaf_pull_suffix(ll); + } + ll = l; } - if (ll && hlist_empty(&ll->list)) - trie_leaf_remove(t, ll); + if (ll) { + if (hlist_empty(&ll->list)) + trie_leaf_remove(t, ll); + else + leaf_pull_suffix(ll); + } pr_debug("trie_flush found=%d\n", found); return found; @@ -1830,6 +1723,11 @@ int fib_table_flush(struct fib_table *tb) void fib_free_table(struct fib_table *tb) { +#ifdef CONFIG_IP_FIB_TRIE_STATS + struct trie *t = (struct trie *)tb->tb_data; + + free_percpu(t->stats); +#endif /* CONFIG_IP_FIB_TRIE_STATS */ kfree(tb); } @@ -1870,7 +1768,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, return skb->len; } -static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, +static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) { struct leaf_info *li; @@ -1906,7 +1804,7 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) { - struct leaf *l; + struct tnode *l; struct trie *t = (struct trie *) tb->tb_data; t_key key = cb->args[2]; int count = cb->args[3]; @@ -1952,7 +1850,7 @@ void __init fib_trie_init(void) 0, SLAB_PANIC, NULL); trie_leaf_kmem = kmem_cache_create("ip_fib_trie", - max(sizeof(struct leaf), + max(sizeof(struct tnode), sizeof(struct leaf_info)), 0, SLAB_PANIC, NULL); } @@ -1973,7 +1871,14 @@ struct fib_table *fib_trie_table(u32 id) tb->tb_num_default = 0; t = (struct trie *) tb->tb_data; - memset(t, 0, sizeof(*t)); + RCU_INIT_POINTER(t->trie, NULL); +#ifdef CONFIG_IP_FIB_TRIE_STATS + t->stats = alloc_percpu(struct trie_use_stats); + if (!t->stats) { + kfree(tb); + tb = NULL; + } +#endif return tb; } @@ -1988,10 +1893,10 @@ struct fib_trie_iter { unsigned int depth; }; -static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter) +static struct tnode *fib_trie_get_next(struct fib_trie_iter *iter) { + unsigned long cindex = iter->index; struct tnode *tn = iter->tnode; - unsigned int cindex = iter->index; struct tnode *p; /* A single entry routing table */ @@ -2001,8 +1906,8 @@ static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter) pr_debug("get_next iter={node=%p index=%d depth=%d}\n", iter->tnode, iter->index, iter->depth); rescan: - while (cindex < (1<<tn->bits)) { - struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex); + while (cindex < tnode_child_length(tn)) { + struct tnode *n = tnode_get_child_rcu(tn, cindex); if (n) { if (IS_LEAF(n)) { @@ -2010,7 +1915,7 @@ rescan: iter->index = cindex + 1; } else { /* push down one level */ - iter->tnode = (struct tnode *) n; + iter->tnode = n; iter->index = 0; ++iter->depth; } @@ -2021,9 +1926,9 @@ rescan: } /* Current node exhausted, pop back up */ - p = node_parent_rcu((struct rt_trie_node *)tn); + p = node_parent_rcu(tn); if (p) { - cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; + cindex = get_index(tn->key, p) + 1; tn = p; --iter->depth; goto rescan; @@ -2033,10 +1938,10 @@ rescan: return NULL; } -static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, +static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter, struct trie *t) { - struct rt_trie_node *n; + struct tnode *n; if (!t) return NULL; @@ -2046,7 +1951,7 @@ static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, return NULL; if (IS_TNODE(n)) { - iter->tnode = (struct tnode *) n; + iter->tnode = n; iter->index = 0; iter->depth = 1; } else { @@ -2060,7 +1965,7 @@ static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, static void trie_collect_stats(struct trie *t, struct trie_stat *s) { - struct rt_trie_node *n; + struct tnode *n; struct fib_trie_iter iter; memset(s, 0, sizeof(*s)); @@ -2068,7 +1973,6 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) rcu_read_lock(); for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { if (IS_LEAF(n)) { - struct leaf *l = (struct leaf *)n; struct leaf_info *li; s->leaves++; @@ -2076,19 +1980,13 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) if (iter.depth > s->maxdepth) s->maxdepth = iter.depth; - hlist_for_each_entry_rcu(li, &l->list, hlist) + hlist_for_each_entry_rcu(li, &n->list, hlist) ++s->prefixes; } else { - const struct tnode *tn = (const struct tnode *) n; - int i; - s->tnodes++; - if (tn->bits < MAX_STAT_DEPTH) - s->nodesizes[tn->bits]++; - - for (i = 0; i < (1<<tn->bits); i++) - if (!tn->child[i]) - s->nullpointers++; + if (n->bits < MAX_STAT_DEPTH) + s->nodesizes[n->bits]++; + s->nullpointers += n->empty_children; } } rcu_read_unlock(); @@ -2111,7 +2009,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); seq_printf(seq, "\tLeaves: %u\n", stat->leaves); - bytes = sizeof(struct leaf) * stat->leaves; + bytes = sizeof(struct tnode) * stat->leaves; seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); bytes += sizeof(struct leaf_info) * stat->prefixes; @@ -2132,25 +2030,38 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) seq_putc(seq, '\n'); seq_printf(seq, "\tPointers: %u\n", pointers); - bytes += sizeof(struct rt_trie_node *) * pointers; + bytes += sizeof(struct tnode *) * pointers; seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); } #ifdef CONFIG_IP_FIB_TRIE_STATS static void trie_show_usage(struct seq_file *seq, - const struct trie_use_stats *stats) + const struct trie_use_stats __percpu *stats) { + struct trie_use_stats s = { 0 }; + int cpu; + + /* loop through all of the CPUs and gather up the stats */ + for_each_possible_cpu(cpu) { + const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu); + + s.gets += pcpu->gets; + s.backtrack += pcpu->backtrack; + s.semantic_match_passed += pcpu->semantic_match_passed; + s.semantic_match_miss += pcpu->semantic_match_miss; + s.null_node_hit += pcpu->null_node_hit; + s.resize_node_skipped += pcpu->resize_node_skipped; + } + seq_printf(seq, "\nCounters:\n---------\n"); - seq_printf(seq, "gets = %u\n", stats->gets); - seq_printf(seq, "backtracks = %u\n", stats->backtrack); + seq_printf(seq, "gets = %u\n", s.gets); + seq_printf(seq, "backtracks = %u\n", s.backtrack); seq_printf(seq, "semantic match passed = %u\n", - stats->semantic_match_passed); - seq_printf(seq, "semantic match miss = %u\n", - stats->semantic_match_miss); - seq_printf(seq, "null node hit= %u\n", stats->null_node_hit); - seq_printf(seq, "skipped node resize = %u\n\n", - stats->resize_node_skipped); + s.semantic_match_passed); + seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss); + seq_printf(seq, "null node hit= %u\n", s.null_node_hit); + seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped); } #endif /* CONFIG_IP_FIB_TRIE_STATS */ @@ -2173,7 +2084,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "Basic info: size of leaf:" " %Zd bytes, size of tnode: %Zd bytes.\n", - sizeof(struct leaf), sizeof(struct tnode)); + sizeof(struct tnode), sizeof(struct tnode)); for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; @@ -2191,7 +2102,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) trie_collect_stats(t, &stat); trie_show_stats(seq, &stat); #ifdef CONFIG_IP_FIB_TRIE_STATS - trie_show_usage(seq, &t->stats); + trie_show_usage(seq, t->stats); #endif } } @@ -2212,7 +2123,7 @@ static const struct file_operations fib_triestat_fops = { .release = single_release_net, }; -static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) +static struct tnode *fib_trie_get_idx(struct seq_file *seq, loff_t pos) { struct fib_trie_iter *iter = seq->private; struct net *net = seq_file_net(seq); @@ -2224,7 +2135,7 @@ static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) struct fib_table *tb; hlist_for_each_entry_rcu(tb, head, tb_hlist) { - struct rt_trie_node *n; + struct tnode *n; for (n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); @@ -2253,7 +2164,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct fib_table *tb = iter->tb; struct hlist_node *tb_node; unsigned int h; - struct rt_trie_node *n; + struct tnode *n; ++*pos; /* next node in same table */ @@ -2339,29 +2250,26 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t) static int fib_trie_seq_show(struct seq_file *seq, void *v) { const struct fib_trie_iter *iter = seq->private; - struct rt_trie_node *n = v; + struct tnode *n = v; if (!node_parent_rcu(n)) fib_table_print(seq, iter->tb); if (IS_TNODE(n)) { - struct tnode *tn = (struct tnode *) n; - __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); + __be32 prf = htonl(n->key); seq_indent(seq, iter->depth-1); - seq_printf(seq, " +-- %pI4/%d %d %d %d\n", - &prf, tn->pos, tn->bits, tn->full_children, - tn->empty_children); - + seq_printf(seq, " +-- %pI4/%zu %u %u %u\n", + &prf, KEYLENGTH - n->pos - n->bits, n->bits, + n->full_children, n->empty_children); } else { - struct leaf *l = (struct leaf *) n; struct leaf_info *li; - __be32 val = htonl(l->key); + __be32 val = htonl(n->key); seq_indent(seq, iter->depth); seq_printf(seq, " |-- %pI4\n", &val); - hlist_for_each_entry_rcu(li, &l->list, hlist) { + hlist_for_each_entry_rcu(li, &n->list, hlist) { struct fib_alias *fa; list_for_each_entry_rcu(fa, &li->falh, fa_list) { @@ -2411,9 +2319,9 @@ struct fib_route_iter { t_key key; }; -static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) +static struct tnode *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) { - struct leaf *l = NULL; + struct tnode *l = NULL; struct trie *t = iter->main_trie; /* use cache location of last found key */ @@ -2458,7 +2366,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct fib_route_iter *iter = seq->private; - struct leaf *l = v; + struct tnode *l = v; ++*pos; if (v == SEQ_START_TOKEN) { @@ -2504,7 +2412,7 @@ static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info */ static int fib_route_seq_show(struct seq_file *seq, void *v) { - struct leaf *l = v; + struct tnode *l = v; struct leaf_info *li; if (v == SEQ_START_TOKEN) { diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index b986298a7ba3..92ddea1e6457 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -70,7 +70,6 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, size_t start = ntohs(pd[0]); size_t offset = ntohs(pd[1]); size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); - __wsum delta; if (skb->remcsum_offload) { /* Already processed in GRO path */ @@ -82,14 +81,7 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, return NULL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; - if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) - __skb_checksum_complete(skb); - - delta = remcsum_adjust((void *)guehdr + hdrlen, - skb->csum, start, offset); - - /* Adjust skb->csum since we changed the packet */ - skb->csum = csum_add(skb->csum, delta); + skb_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset); return guehdr; } @@ -174,7 +166,8 @@ drop: } static struct sk_buff **fou_gro_receive(struct sk_buff **head, - struct sk_buff *skb) + struct sk_buff *skb, + struct udp_offload *uoff) { const struct net_offload *ops; struct sk_buff **pp = NULL; @@ -195,7 +188,8 @@ out_unlock: return pp; } -static int fou_gro_complete(struct sk_buff *skb, int nhoff) +static int fou_gro_complete(struct sk_buff *skb, int nhoff, + struct udp_offload *uoff) { const struct net_offload *ops; u8 proto = NAPI_GRO_CB(skb)->proto; @@ -226,7 +220,6 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, size_t start = ntohs(pd[0]); size_t offset = ntohs(pd[1]); size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); - __wsum delta; if (skb->remcsum_offload) return guehdr; @@ -241,12 +234,7 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, return NULL; } - delta = remcsum_adjust((void *)guehdr + hdrlen, - NAPI_GRO_CB(skb)->csum, start, offset); - - /* Adjust skb->csum since we changed the packet */ - skb->csum = csum_add(skb->csum, delta); - NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); + skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset); skb->remcsum_offload = 1; @@ -254,7 +242,8 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, } static struct sk_buff **gue_gro_receive(struct sk_buff **head, - struct sk_buff *skb) + struct sk_buff *skb, + struct udp_offload *uoff) { const struct net_offload **offloads; const struct net_offload *ops; @@ -360,7 +349,8 @@ out: return pp; } -static int gue_gro_complete(struct sk_buff *skb, int nhoff) +static int gue_gro_complete(struct sk_buff *skb, int nhoff, + struct udp_offload *uoff) { const struct net_offload **offloads; struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff); @@ -490,7 +480,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk->sk_user_data = fou; fou->sock = sock; - udp_set_convert_csum(sk, true); + inet_inc_convert_csum(sk); sk->sk_allocation = GFP_ATOMIC; diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 394a200f93c1..5a4828ba05ad 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -17,7 +17,7 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/skbuff.h> -#include <linux/rculist.h> +#include <linux/list.h> #include <linux/netdevice.h> #include <linux/in.h> #include <linux/ip.h> @@ -26,8 +26,8 @@ #include <linux/etherdevice.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> -#include <linux/hash.h> #include <linux/ethtool.h> +#include <linux/mutex.h> #include <net/arp.h> #include <net/ndisc.h> #include <net/ip.h> @@ -50,38 +50,30 @@ #include <net/ip6_checksum.h> #endif -#define PORT_HASH_BITS 8 -#define PORT_HASH_SIZE (1<<PORT_HASH_BITS) +/* Protects sock_list and refcounts. */ +static DEFINE_MUTEX(geneve_mutex); /* per-network namespace private data for this module */ struct geneve_net { - struct hlist_head sock_list[PORT_HASH_SIZE]; - spinlock_t sock_lock; /* Protects sock_list */ + struct list_head sock_list; }; static int geneve_net_id; -static struct workqueue_struct *geneve_wq; - static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) { return (struct genevehdr *)(udp_hdr(skb) + 1); } -static struct hlist_head *gs_head(struct net *net, __be16 port) +static struct geneve_sock *geneve_find_sock(struct net *net, + sa_family_t family, __be16 port) { struct geneve_net *gn = net_generic(net, geneve_net_id); - - return &gn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; -} - -/* Find geneve socket based on network namespace and UDP port */ -static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port) -{ struct geneve_sock *gs; - hlist_for_each_entry_rcu(gs, gs_head(net, port), hlist) { - if (inet_sk(gs->sock->sk)->inet_sport == port) + list_for_each_entry(gs, &gn->sock_list, list) { + if (inet_sk(gs->sock->sk)->inet_sport == port && + inet_sk(gs->sock->sk)->sk.sk_family == family) return gs; } @@ -115,19 +107,19 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, - bool xnet) + bool csum, bool xnet) { struct genevehdr *gnvh; int min_headroom; int err; - skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx); + skb = udp_tunnel_handle_offloads(skb, csum); if (IS_ERR(skb)) return PTR_ERR(skb); min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) - + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); err = skb_cow_head(skb, min_headroom); if (unlikely(err)) { @@ -144,11 +136,107 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst, - tos, ttl, df, src_port, dst_port, xnet); + return udp_tunnel_xmit_skb(rt, skb, src, dst, + tos, ttl, df, src_port, dst_port, xnet, + !csum); } EXPORT_SYMBOL_GPL(geneve_xmit_skb); +static int geneve_hlen(struct genevehdr *gh) +{ + return sizeof(*gh) + gh->opt_len * 4; +} + +static struct sk_buff **geneve_gro_receive(struct sk_buff **head, + struct sk_buff *skb, + struct udp_offload *uoff) +{ + struct sk_buff *p, **pp = NULL; + struct genevehdr *gh, *gh2; + unsigned int hlen, gh_len, off_gnv; + const struct packet_offload *ptype; + __be16 type; + int flush = 1; + + off_gnv = skb_gro_offset(skb); + hlen = off_gnv + sizeof(*gh); + gh = skb_gro_header_fast(skb, off_gnv); + if (skb_gro_header_hard(skb, hlen)) { + gh = skb_gro_header_slow(skb, hlen, off_gnv); + if (unlikely(!gh)) + goto out; + } + + if (gh->ver != GENEVE_VER || gh->oam) + goto out; + gh_len = geneve_hlen(gh); + + hlen = off_gnv + gh_len; + if (skb_gro_header_hard(skb, hlen)) { + gh = skb_gro_header_slow(skb, hlen, off_gnv); + if (unlikely(!gh)) + goto out; + } + + flush = 0; + + for (p = *head; p; p = p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + gh2 = (struct genevehdr *)(p->data + off_gnv); + if (gh->opt_len != gh2->opt_len || + memcmp(gh, gh2, gh_len)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + } + + type = gh->proto_type; + + rcu_read_lock(); + ptype = gro_find_receive_by_type(type); + if (ptype == NULL) { + flush = 1; + goto out_unlock; + } + + skb_gro_pull(skb, gh_len); + skb_gro_postpull_rcsum(skb, gh, gh_len); + pp = ptype->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int geneve_gro_complete(struct sk_buff *skb, int nhoff, + struct udp_offload *uoff) +{ + struct genevehdr *gh; + struct packet_offload *ptype; + __be16 type; + int gh_len; + int err = -ENOSYS; + + udp_tunnel_gro_complete(skb, nhoff); + + gh = (struct genevehdr *)(skb->data + nhoff); + gh_len = geneve_hlen(gh); + type = gh->proto_type; + + rcu_read_lock(); + ptype = gro_find_complete_by_type(type); + if (ptype != NULL) + err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); + + rcu_read_unlock(); + return err; +} + static void geneve_notify_add_rx_port(struct geneve_sock *gs) { struct sock *sk = gs->sock->sk; @@ -214,15 +302,6 @@ error: return 1; } -static void geneve_del_work(struct work_struct *work) -{ - struct geneve_sock *gs = container_of(work, struct geneve_sock, - del_work); - - udp_tunnel_sock_release(gs->sock); - kfree_rcu(gs, rcu); -} - static struct socket *geneve_create_sock(struct net *net, bool ipv6, __be16 port) { @@ -263,8 +342,6 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, if (!gs) return ERR_PTR(-ENOMEM); - INIT_WORK(&gs->del_work, geneve_del_work); - sock = geneve_create_sock(net, ipv6, port); if (IS_ERR(sock)) { kfree(gs); @@ -272,19 +349,15 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, } gs->sock = sock; - atomic_set(&gs->refcnt, 1); + gs->refcnt = 1; gs->rcv = rcv; gs->rcv_data = data; /* Initialize the geneve udp offloads structure */ gs->udp_offloads.port = port; - gs->udp_offloads.callbacks.gro_receive = NULL; - gs->udp_offloads.callbacks.gro_complete = NULL; - - spin_lock(&gn->sock_lock); - hlist_add_head_rcu(&gs->hlist, gs_head(net, port)); + gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive; + gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete; geneve_notify_add_rx_port(gs); - spin_unlock(&gn->sock_lock); /* Mark socket as an encapsulation socket */ tunnel_cfg.sk_user_data = gs; @@ -293,6 +366,8 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, tunnel_cfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, sock, &tunnel_cfg); + list_add(&gs->list, &gn->sock_list); + return gs; } @@ -300,25 +375,21 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port, geneve_rcv_t *rcv, void *data, bool no_share, bool ipv6) { - struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; - gs = geneve_socket_create(net, port, rcv, data, ipv6); - if (!IS_ERR(gs)) - return gs; - - if (no_share) /* Return error if sharing is not allowed. */ - return ERR_PTR(-EINVAL); + mutex_lock(&geneve_mutex); - spin_lock(&gn->sock_lock); - gs = geneve_find_sock(net, port); - if (gs && ((gs->rcv != rcv) || - !atomic_add_unless(&gs->refcnt, 1, 0))) + gs = geneve_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); + if (gs) { + if (!no_share && gs->rcv == rcv) + gs->refcnt++; + else gs = ERR_PTR(-EBUSY); - spin_unlock(&gn->sock_lock); + } else { + gs = geneve_socket_create(net, port, rcv, data, ipv6); + } - if (!gs) - gs = ERR_PTR(-EINVAL); + mutex_unlock(&geneve_mutex); return gs; } @@ -326,37 +397,32 @@ EXPORT_SYMBOL_GPL(geneve_sock_add); void geneve_sock_release(struct geneve_sock *gs) { - struct net *net = sock_net(gs->sock->sk); - struct geneve_net *gn = net_generic(net, geneve_net_id); + mutex_lock(&geneve_mutex); - if (!atomic_dec_and_test(&gs->refcnt)) - return; + if (--gs->refcnt) + goto unlock; - spin_lock(&gn->sock_lock); - hlist_del_rcu(&gs->hlist); + list_del(&gs->list); geneve_notify_del_rx_port(gs); - spin_unlock(&gn->sock_lock); + udp_tunnel_sock_release(gs->sock); + kfree_rcu(gs, rcu); - queue_work(geneve_wq, &gs->del_work); +unlock: + mutex_unlock(&geneve_mutex); } EXPORT_SYMBOL_GPL(geneve_sock_release); static __net_init int geneve_init_net(struct net *net) { struct geneve_net *gn = net_generic(net, geneve_net_id); - unsigned int h; - spin_lock_init(&gn->sock_lock); - - for (h = 0; h < PORT_HASH_SIZE; ++h) - INIT_HLIST_HEAD(&gn->sock_list[h]); + INIT_LIST_HEAD(&gn->sock_list); return 0; } static struct pernet_operations geneve_net_ops = { .init = geneve_init_net, - .exit = NULL, .id = &geneve_net_id, .size = sizeof(struct geneve_net), }; @@ -365,10 +431,6 @@ static int __init geneve_init_module(void) { int rc; - geneve_wq = alloc_workqueue("geneve", 0, 0); - if (!geneve_wq) - return -ENOMEM; - rc = register_pernet_subsys(&geneve_net_ops); if (rc) return rc; @@ -377,11 +439,10 @@ static int __init geneve_init_module(void) return 0; } -late_initcall(geneve_init_module); +module_init(geneve_init_module); static void __exit geneve_cleanup_module(void) { - destroy_workqueue(geneve_wq); unregister_pernet_subsys(&geneve_net_ops); } module_exit(geneve_cleanup_module); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 36f5584d93c5..5e564014a0b7 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -205,7 +205,7 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; */ static struct sock *icmp_sk(struct net *net) { - return net->ipv4.icmp_sk[smp_processor_id()]; + return *this_cpu_ptr(net->ipv4.icmp_sk); } static inline struct sock *icmp_xmit_lock(struct net *net) @@ -1140,8 +1140,8 @@ static void __net_exit icmp_sk_exit(struct net *net) int i; for_each_possible_cpu(i) - inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]); - kfree(net->ipv4.icmp_sk); + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i)); + free_percpu(net->ipv4.icmp_sk); net->ipv4.icmp_sk = NULL; } @@ -1149,9 +1149,8 @@ static int __net_init icmp_sk_init(struct net *net) { int i, err; - net->ipv4.icmp_sk = - kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); - if (net->ipv4.icmp_sk == NULL) + net->ipv4.icmp_sk = alloc_percpu(struct sock *); + if (!net->ipv4.icmp_sk) return -ENOMEM; for_each_possible_cpu(i) { @@ -1162,7 +1161,7 @@ static int __net_init icmp_sk_init(struct net *net) if (err < 0) goto fail; - net->ipv4.icmp_sk[i] = sk; + *per_cpu_ptr(net->ipv4.icmp_sk, i) = sk; /* Enough space for 2 64K ICMP packets, including * sk_buff/skb_shared_info struct overhead. @@ -1203,8 +1202,8 @@ static int __net_init icmp_sk_init(struct net *net) fail: for_each_possible_cpu(i) - inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]); - kfree(net->ipv4.icmp_sk); + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i)); + free_percpu(net->ipv4.icmp_sk); return err; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index e34dccbc4d70..81751f12645f 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -203,7 +203,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, icsk->icsk_ca_ops->get_info(sk, ext, skb); out: - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; errout: nlmsg_cancel(skb, nlh); @@ -271,7 +272,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, } #endif - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, @@ -758,7 +760,8 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, } #endif - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 4f4bf5b99686..6207275fc749 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -659,12 +659,12 @@ static bool ipgre_netlink_encap_parms(struct nlattr *data[], if (data[IFLA_GRE_ENCAP_SPORT]) { ret = true; - ipencap->sport = nla_get_u16(data[IFLA_GRE_ENCAP_SPORT]); + ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]); } if (data[IFLA_GRE_ENCAP_DPORT]) { ret = true; - ipencap->dport = nla_get_u16(data[IFLA_GRE_ENCAP_DPORT]); + ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]); } return ret; @@ -673,6 +673,7 @@ static bool ipgre_netlink_encap_parms(struct nlattr *data[], static int gre_tap_init(struct net_device *dev) { __gre_tunnel_init(dev); + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; return ip_tunnel_init(dev); } @@ -785,10 +786,10 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, t->encap.type) || - nla_put_u16(skb, IFLA_GRE_ENCAP_SPORT, - t->encap.sport) || - nla_put_u16(skb, IFLA_GRE_ENCAP_DPORT, - t->encap.dport) || + nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT, + t->encap.sport) || + nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT, + t->encap.dport) || nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS, t->encap.flags)) goto nla_put_failure; @@ -828,6 +829,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = { .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, + .get_link_net = ip_tunnel_get_link_net, }; static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { @@ -842,6 +844,7 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, + .get_link_net = ip_tunnel_get_link_net, }; static int __net_init ipgre_tap_init_net(struct net *net) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c373c0708d97..d68199d9b2b0 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -755,13 +755,11 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk struct msghdr *msg = from; if (skb->ip_summed == CHECKSUM_PARTIAL) { - /* XXX: stripping const */ - if (memcpy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len) < 0) + if (copy_from_iter(to, len, &msg->msg_iter) != len) return -EFAULT; } else { __wsum csum = 0; - /* XXX: stripping const */ - if (csum_partial_copy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len, &csum) < 0) + if (csum_and_copy_from_iter(to, len, &csum, &msg->msg_iter) != len) return -EFAULT; skb->csum = csum_block_add(skb->csum, csum, odd); } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 6b85adb05003..31d8c71986b4 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -37,6 +37,7 @@ #include <net/route.h> #include <net/xfrm.h> #include <net/compat.h> +#include <net/checksum.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/transp_v6.h> #endif @@ -45,14 +46,6 @@ #include <linux/errqueue.h> #include <asm/uaccess.h> -#define IP_CMSG_PKTINFO 1 -#define IP_CMSG_TTL 2 -#define IP_CMSG_TOS 4 -#define IP_CMSG_RECVOPTS 8 -#define IP_CMSG_RETOPTS 16 -#define IP_CMSG_PASSSEC 32 -#define IP_CMSG_ORIGDSTADDR 64 - /* * SOL_IP control messages. */ @@ -104,6 +97,20 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); } +static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, + int offset) +{ + __wsum csum = skb->csum; + + if (skb->ip_summed != CHECKSUM_COMPLETE) + return; + + if (offset != 0) + csum = csum_sub(csum, csum_partial(skb->data, offset, 0)); + + put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); +} + static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) { char *secdata; @@ -144,47 +151,73 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); } -void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) +void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, + int offset) { struct inet_sock *inet = inet_sk(skb->sk); unsigned int flags = inet->cmsg_flags; /* Ordered by supposed usage frequency */ - if (flags & 1) + if (flags & IP_CMSG_PKTINFO) { ip_cmsg_recv_pktinfo(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_PKTINFO; + if (!flags) + return; + } + + if (flags & IP_CMSG_TTL) { ip_cmsg_recv_ttl(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_TTL; + if (!flags) + return; + } + + if (flags & IP_CMSG_TOS) { ip_cmsg_recv_tos(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_TOS; + if (!flags) + return; + } + + if (flags & IP_CMSG_RECVOPTS) { ip_cmsg_recv_opts(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_RECVOPTS; + if (!flags) + return; + } + + if (flags & IP_CMSG_RETOPTS) { ip_cmsg_recv_retopts(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_RETOPTS; + if (!flags) + return; + } + + if (flags & IP_CMSG_PASSSEC) { ip_cmsg_recv_security(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_PASSSEC; + if (!flags) + return; + } + + if (flags & IP_CMSG_ORIGDSTADDR) { ip_cmsg_recv_dstaddr(msg, skb); + flags &= ~IP_CMSG_ORIGDSTADDR; + if (!flags) + return; + } + + if (flags & IP_CMSG_CHECKSUM) + ip_cmsg_recv_checksum(msg, skb, offset); } -EXPORT_SYMBOL(ip_cmsg_recv); +EXPORT_SYMBOL(ip_cmsg_recv_offset); int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6) @@ -450,7 +483,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin) { + if (sin && skb->len) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); @@ -463,8 +496,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) sin = &errhdr.offender; memset(sin, 0, sizeof(*sin)); - if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || - ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) { + if (skb->len && + (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; if (inet_sk(sk)->cmsg_flags) @@ -518,6 +552,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_MULTICAST_ALL: case IP_MULTICAST_LOOP: case IP_RECVORIGDSTADDR: + case IP_CHECKSUM: if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; @@ -615,6 +650,19 @@ static int do_ip_setsockopt(struct sock *sk, int level, else inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR; break; + case IP_CHECKSUM: + if (val) { + if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) { + inet_inc_convert_csum(sk); + inet->cmsg_flags |= IP_CMSG_CHECKSUM; + } + } else { + if (inet->cmsg_flags & IP_CMSG_CHECKSUM) { + inet_dec_convert_csum(sk); + inet->cmsg_flags &= ~IP_CMSG_CHECKSUM; + } + } + break; case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; @@ -1218,6 +1266,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_RECVORIGDSTADDR: val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0; break; + case IP_CHECKSUM: + val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0; + break; case IP_TOS: val = inet->tos; break; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index d3e447936720..2cd08280c77b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -972,6 +972,14 @@ void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) } EXPORT_SYMBOL_GPL(ip_tunnel_dellink); +struct net *ip_tunnel_get_link_net(const struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + + return tunnel->net; +} +EXPORT_SYMBOL(ip_tunnel_get_link_net); + int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname) { diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 1a7e979e80ba..94efe148181c 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -531,6 +531,7 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = { .dellink = ip_tunnel_dellink, .get_size = vti_get_size, .fill_info = vti_fill_info, + .get_link_net = ip_tunnel_get_link_net, }; static int __init vti_init(void) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 7fa18bc7e47f..b26376ef87f6 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -209,9 +209,9 @@ static int __init ic_open_devs(void) last = &ic_first_dev; rtnl_lock(); - /* bring loopback device up first */ + /* bring loopback and DSA master network devices up first */ for_each_netdev(&init_net, dev) { - if (!(dev->flags & IFF_LOOPBACK)) + if (!(dev->flags & IFF_LOOPBACK) && !netdev_uses_dsa(dev)) continue; if (dev_change_flags(dev, dev->flags | IFF_UP) < 0) pr_err("IP-Config: Failed to open %s\n", dev->name); @@ -306,7 +306,7 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if (dev != ic_dev) { + if (dev != ic_dev && !netdev_uses_dsa(dev)) { DBG(("IP-Config: Downing %s\n", dev->name)); dev_change_flags(dev, d->flags); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 40403114f00a..915d215a7d14 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -366,12 +366,12 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[], if (data[IFLA_IPTUN_ENCAP_SPORT]) { ret = true; - ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]); + ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); } if (data[IFLA_IPTUN_ENCAP_DPORT]) { ret = true; - ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]); + ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); } return ret; @@ -460,10 +460,10 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || - nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT, - tunnel->encap.sport) || - nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT, - tunnel->encap.dport) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, + tunnel->encap.sport) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, + tunnel->encap.dport) || nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags)) goto nla_put_failure; @@ -498,6 +498,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = { .dellink = ip_tunnel_dellink, .get_size = ipip_get_size, .fill_info = ipip_fill_info, + .get_link_net = ip_tunnel_get_link_net, }; static struct xfrm_tunnel ipip_handler __read_mostly = { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c8034587859d..9d78427652d2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2290,7 +2290,8 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, if (err < 0 && err != -ENOENT) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2a3720fb5a5f..e9f66e1cda50 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -599,18 +599,18 @@ int ping_getfrag(void *from, char *to, struct pingfakehdr *pfh = (struct pingfakehdr *)from; if (offset == 0) { - if (fraglen < sizeof(struct icmphdr)) + fraglen -= sizeof(struct icmphdr); + if (fraglen < 0) BUG(); - if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr), - pfh->iov, 0, fraglen - sizeof(struct icmphdr), - &pfh->wcheck)) + if (csum_and_copy_from_iter(to + sizeof(struct icmphdr), + fraglen, &pfh->wcheck, + &pfh->msg->msg_iter) != fraglen) return -EFAULT; } else if (offset < sizeof(struct icmphdr)) { BUG(); } else { - if (csum_partial_copy_fromiovecend - (to, pfh->iov, offset - sizeof(struct icmphdr), - fraglen, &pfh->wcheck)) + if (csum_and_copy_from_iter(to, fraglen, &pfh->wcheck, + &pfh->msg->msg_iter) != fraglen) return -EFAULT; } @@ -811,8 +811,7 @@ back_from_confirm: pfh.icmph.checksum = 0; pfh.icmph.un.echo.id = inet->inet_sport; pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence; - /* XXX: stripping const */ - pfh.iov = (struct iovec *)msg->msg_iter.iov; + pfh.msg = msg; pfh.wcheck = 0; pfh.family = AF_INET; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 8f9cd200ce20..d8953ef0770c 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -292,6 +292,12 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPHystartTrainCwnd", LINUX_MIB_TCPHYSTARTTRAINCWND), SNMP_MIB_ITEM("TCPHystartDelayDetect", LINUX_MIB_TCPHYSTARTDELAYDETECT), SNMP_MIB_ITEM("TCPHystartDelayCwnd", LINUX_MIB_TCPHYSTARTDELAYCWND), + SNMP_MIB_ITEM("TCPACKSkippedSynRecv", LINUX_MIB_TCPACKSKIPPEDSYNRECV), + SNMP_MIB_ITEM("TCPACKSkippedPAWS", LINUX_MIB_TCPACKSKIPPEDPAWS), + SNMP_MIB_ITEM("TCPACKSkippedSeq", LINUX_MIB_TCPACKSKIPPEDSEQ), + SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2), + SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT), + SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 0bb68df5055d..f027a708b7e0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -337,7 +337,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) } static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, - void *from, size_t length, + struct msghdr *msg, size_t length, struct rtable **rtp, unsigned int flags) { @@ -382,7 +382,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->transport_header = skb->network_header; err = -EFAULT; - if (memcpy_fromiovecend((void *)iph, from, 0, length)) + if (memcpy_from_msg(iph, msg, length)) goto error_free; iphlen = iph->ihl * 4; @@ -625,8 +625,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, back_from_confirm: if (inet->hdrincl) - /* XXX: stripping const */ - err = raw_send_hdrinc(sk, &fl4, (struct iovec *)msg->msg_iter.iov, len, + err = raw_send_hdrinc(sk, &fl4, msg, len, &rt, msg->msg_flags); else { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 52e1f2bf0ca2..ad5064362c5c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1328,14 +1328,22 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) return ret; } -static DEFINE_SPINLOCK(rt_uncached_lock); -static LIST_HEAD(rt_uncached_list); +struct uncached_list { + spinlock_t lock; + struct list_head head; +}; + +static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); static void rt_add_uncached_list(struct rtable *rt) { - spin_lock_bh(&rt_uncached_lock); - list_add_tail(&rt->rt_uncached, &rt_uncached_list); - spin_unlock_bh(&rt_uncached_lock); + struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); + + rt->rt_uncached_list = ul; + + spin_lock_bh(&ul->lock); + list_add_tail(&rt->rt_uncached, &ul->head); + spin_unlock_bh(&ul->lock); } static void ipv4_dst_destroy(struct dst_entry *dst) @@ -1343,27 +1351,32 @@ static void ipv4_dst_destroy(struct dst_entry *dst) struct rtable *rt = (struct rtable *) dst; if (!list_empty(&rt->rt_uncached)) { - spin_lock_bh(&rt_uncached_lock); + struct uncached_list *ul = rt->rt_uncached_list; + + spin_lock_bh(&ul->lock); list_del(&rt->rt_uncached); - spin_unlock_bh(&rt_uncached_lock); + spin_unlock_bh(&ul->lock); } } void rt_flush_dev(struct net_device *dev) { - if (!list_empty(&rt_uncached_list)) { - struct net *net = dev_net(dev); - struct rtable *rt; + struct net *net = dev_net(dev); + struct rtable *rt; + int cpu; - spin_lock_bh(&rt_uncached_lock); - list_for_each_entry(rt, &rt_uncached_list, rt_uncached) { + for_each_possible_cpu(cpu) { + struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); + + spin_lock_bh(&ul->lock); + list_for_each_entry(rt, &ul->head, rt_uncached) { if (rt->dst.dev != dev) continue; rt->dst.dev = net->loopback_dev; dev_hold(rt->dst.dev); dev_put(dev); } - spin_unlock_bh(&rt_uncached_lock); + spin_unlock_bh(&ul->lock); } } @@ -2381,7 +2394,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -2473,7 +2487,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) err = rt_fill_info(net, dst, src, &fl4, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); - if (err <= 0) + if (err < 0) goto errout_free; err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); @@ -2721,6 +2735,7 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; int __init ip_rt_init(void) { int rc = 0; + int cpu; ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); if (!ip_idents) @@ -2728,6 +2743,12 @@ int __init ip_rt_init(void) prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); + for_each_possible_cpu(cpu) { + struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); + + INIT_LIST_HEAD(&ul->head); + spin_lock_init(&ul->lock); + } #ifdef CONFIG_IP_ROUTE_CLASSID ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); if (!ip_rt_acct) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e0ee384a448f..d151539da8e6 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -604,20 +604,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_tcp_congestion_control, }, { - .procname = "tcp_mtu_probing", - .data = &sysctl_tcp_mtu_probing, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "tcp_base_mss", - .data = &sysctl_tcp_base_mss, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { .procname = "tcp_workaround_signed_windows", .data = &sysctl_tcp_workaround_signed_windows, .maxlen = sizeof(int), @@ -729,6 +715,13 @@ static struct ctl_table ipv4_table[] = { .extra2 = &one, }, { + .procname = "tcp_invalid_ratelimit", + .data = &sysctl_tcp_invalid_ratelimit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + }, + { .procname = "icmp_msgs_per_sec", .data = &sysctl_icmp_msgs_per_sec, .maxlen = sizeof(int), @@ -876,6 +869,20 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_mtu_probing", + .data = &init_net.ipv4.sysctl_tcp_mtu_probing, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_base_mss", + .data = &init_net.ipv4.sysctl_tcp_base_mss, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3075723c729b..9d72a0fcd928 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1067,11 +1067,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size) { - const struct iovec *iov; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int iovlen, flags, err, copied = 0; - int mss_now = 0, size_goal, copied_syn = 0, offset = 0; + int flags, err, copied = 0; + int mss_now = 0, size_goal, copied_syn = 0; bool sg; long timeo; @@ -1084,7 +1083,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out; else if (err) goto out_err; - offset = copied_syn; } timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); @@ -1118,8 +1116,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, mss_now = tcp_send_mss(sk, &size_goal, flags); /* Ok commence sending. */ - iovlen = msg->msg_iter.nr_segs; - iov = msg->msg_iter.iov; copied = 0; err = -EPIPE; @@ -1128,151 +1124,134 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sg = !!(sk->sk_route_caps & NETIF_F_SG); - while (--iovlen >= 0) { - size_t seglen = iov->iov_len; - unsigned char __user *from = iov->iov_base; + while (iov_iter_count(&msg->msg_iter)) { + int copy = 0; + int max = size_goal; - iov++; - if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */ - if (offset >= seglen) { - offset -= seglen; - continue; - } - seglen -= offset; - from += offset; - offset = 0; + skb = tcp_write_queue_tail(sk); + if (tcp_send_head(sk)) { + if (skb->ip_summed == CHECKSUM_NONE) + max = mss_now; + copy = max - skb->len; } - while (seglen > 0) { - int copy = 0; - int max = size_goal; - - skb = tcp_write_queue_tail(sk); - if (tcp_send_head(sk)) { - if (skb->ip_summed == CHECKSUM_NONE) - max = mss_now; - copy = max - skb->len; - } - - if (copy <= 0) { + if (copy <= 0) { new_segment: - /* Allocate new segment. If the interface is SG, - * allocate skb fitting to single page. - */ - if (!sk_stream_memory_free(sk)) - goto wait_for_sndbuf; + /* Allocate new segment. If the interface is SG, + * allocate skb fitting to single page. + */ + if (!sk_stream_memory_free(sk)) + goto wait_for_sndbuf; - skb = sk_stream_alloc_skb(sk, - select_size(sk, sg), - sk->sk_allocation); - if (!skb) - goto wait_for_memory; + skb = sk_stream_alloc_skb(sk, + select_size(sk, sg), + sk->sk_allocation); + if (!skb) + goto wait_for_memory; - /* - * Check whether we can use HW checksum. - */ - if (sk->sk_route_caps & NETIF_F_ALL_CSUM) - skb->ip_summed = CHECKSUM_PARTIAL; + /* + * Check whether we can use HW checksum. + */ + if (sk->sk_route_caps & NETIF_F_ALL_CSUM) + skb->ip_summed = CHECKSUM_PARTIAL; - skb_entail(sk, skb); - copy = size_goal; - max = size_goal; + skb_entail(sk, skb); + copy = size_goal; + max = size_goal; - /* All packets are restored as if they have - * already been sent. skb_mstamp isn't set to - * avoid wrong rtt estimation. - */ - if (tp->repair) - TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; - } + /* All packets are restored as if they have + * already been sent. skb_mstamp isn't set to + * avoid wrong rtt estimation. + */ + if (tp->repair) + TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; + } - /* Try to append data to the end of skb. */ - if (copy > seglen) - copy = seglen; - - /* Where to copy to? */ - if (skb_availroom(skb) > 0) { - /* We have some space in skb head. Superb! */ - copy = min_t(int, copy, skb_availroom(skb)); - err = skb_add_data_nocache(sk, skb, from, copy); - if (err) - goto do_fault; - } else { - bool merge = true; - int i = skb_shinfo(skb)->nr_frags; - struct page_frag *pfrag = sk_page_frag(sk); - - if (!sk_page_frag_refill(sk, pfrag)) - goto wait_for_memory; - - if (!skb_can_coalesce(skb, i, pfrag->page, - pfrag->offset)) { - if (i == MAX_SKB_FRAGS || !sg) { - tcp_mark_push(tp, skb); - goto new_segment; - } - merge = false; - } + /* Try to append data to the end of skb. */ + if (copy > iov_iter_count(&msg->msg_iter)) + copy = iov_iter_count(&msg->msg_iter); + + /* Where to copy to? */ + if (skb_availroom(skb) > 0) { + /* We have some space in skb head. Superb! */ + copy = min_t(int, copy, skb_availroom(skb)); + err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); + if (err) + goto do_fault; + } else { + bool merge = true; + int i = skb_shinfo(skb)->nr_frags; + struct page_frag *pfrag = sk_page_frag(sk); + + if (!sk_page_frag_refill(sk, pfrag)) + goto wait_for_memory; - copy = min_t(int, copy, pfrag->size - pfrag->offset); - - if (!sk_wmem_schedule(sk, copy)) - goto wait_for_memory; - - err = skb_copy_to_page_nocache(sk, from, skb, - pfrag->page, - pfrag->offset, - copy); - if (err) - goto do_error; - - /* Update the skb. */ - if (merge) { - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); - } else { - skb_fill_page_desc(skb, i, pfrag->page, - pfrag->offset, copy); - get_page(pfrag->page); + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { + if (i == MAX_SKB_FRAGS || !sg) { + tcp_mark_push(tp, skb); + goto new_segment; } - pfrag->offset += copy; + merge = false; } - if (!copied) - TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; + copy = min_t(int, copy, pfrag->size - pfrag->offset); - tp->write_seq += copy; - TCP_SKB_CB(skb)->end_seq += copy; - tcp_skb_pcount_set(skb, 0); + if (!sk_wmem_schedule(sk, copy)) + goto wait_for_memory; - from += copy; - copied += copy; - if ((seglen -= copy) == 0 && iovlen == 0) { - tcp_tx_timestamp(sk, skb); - goto out; + err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, + pfrag->page, + pfrag->offset, + copy); + if (err) + goto do_error; + + /* Update the skb. */ + if (merge) { + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); + } else { + skb_fill_page_desc(skb, i, pfrag->page, + pfrag->offset, copy); + get_page(pfrag->page); } + pfrag->offset += copy; + } - if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) - continue; + if (!copied) + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; + + tp->write_seq += copy; + TCP_SKB_CB(skb)->end_seq += copy; + tcp_skb_pcount_set(skb, 0); + + copied += copy; + if (!iov_iter_count(&msg->msg_iter)) { + tcp_tx_timestamp(sk, skb); + goto out; + } - if (forced_push(tp)) { - tcp_mark_push(tp, skb); - __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); - } else if (skb == tcp_send_head(sk)) - tcp_push_one(sk, mss_now); + if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) continue; + if (forced_push(tp)) { + tcp_mark_push(tp, skb); + __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); + } else if (skb == tcp_send_head(sk)) + tcp_push_one(sk, mss_now); + continue; + wait_for_sndbuf: - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: - if (copied) - tcp_push(sk, flags & ~MSG_MORE, mss_now, - TCP_NAGLE_PUSH, size_goal); + if (copied) + tcp_push(sk, flags & ~MSG_MORE, mss_now, + TCP_NAGLE_PUSH, size_goal); - if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) - goto do_error; + if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) + goto do_error; - mss_now = tcp_send_mss(sk, &size_goal, flags); - } + mss_now = tcp_send_mss(sk, &size_goal, flags); } out: diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 8670e68e2ce6..d694088214cd 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/list.h> #include <linux/gfp.h> +#include <linux/jhash.h> #include <net/tcp.h> static DEFINE_SPINLOCK(tcp_cong_list_lock); @@ -31,6 +32,34 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name) return NULL; } +/* Must be called with rcu lock held */ +static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name) +{ + const struct tcp_congestion_ops *ca = tcp_ca_find(name); +#ifdef CONFIG_MODULES + if (!ca && capable(CAP_NET_ADMIN)) { + rcu_read_unlock(); + request_module("tcp_%s", name); + rcu_read_lock(); + ca = tcp_ca_find(name); + } +#endif + return ca; +} + +/* Simple linear search, not much in here. */ +struct tcp_congestion_ops *tcp_ca_find_key(u32 key) +{ + struct tcp_congestion_ops *e; + + list_for_each_entry_rcu(e, &tcp_cong_list, list) { + if (e->key == key) + return e; + } + + return NULL; +} + /* * Attach new congestion control algorithm to the list * of available options. @@ -45,9 +74,12 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) return -EINVAL; } + ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); + spin_lock(&tcp_cong_list_lock); - if (tcp_ca_find(ca->name)) { - pr_notice("%s already registered\n", ca->name); + if (ca->key == TCP_CA_UNSPEC || tcp_ca_find_key(ca->key)) { + pr_notice("%s already registered or non-unique key\n", + ca->name); ret = -EEXIST; } else { list_add_tail_rcu(&ca->list, &tcp_cong_list); @@ -70,9 +102,50 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) spin_lock(&tcp_cong_list_lock); list_del_rcu(&ca->list); spin_unlock(&tcp_cong_list_lock); + + /* Wait for outstanding readers to complete before the + * module gets removed entirely. + * + * A try_module_get() should fail by now as our module is + * in "going" state since no refs are held anymore and + * module_exit() handler being called. + */ + synchronize_rcu(); } EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); +u32 tcp_ca_get_key_by_name(const char *name) +{ + const struct tcp_congestion_ops *ca; + u32 key; + + might_sleep(); + + rcu_read_lock(); + ca = __tcp_ca_find_autoload(name); + key = ca ? ca->key : TCP_CA_UNSPEC; + rcu_read_unlock(); + + return key; +} +EXPORT_SYMBOL_GPL(tcp_ca_get_key_by_name); + +char *tcp_ca_get_name_by_key(u32 key, char *buffer) +{ + const struct tcp_congestion_ops *ca; + char *ret = NULL; + + rcu_read_lock(); + ca = tcp_ca_find_key(key); + if (ca) + ret = strncpy(buffer, ca->name, + TCP_CA_NAME_MAX); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key); + /* Assign choice of congestion control. */ void tcp_assign_congestion_control(struct sock *sk) { @@ -107,6 +180,18 @@ void tcp_init_congestion_control(struct sock *sk) icsk->icsk_ca_ops->init(sk); } +static void tcp_reinit_congestion_control(struct sock *sk, + const struct tcp_congestion_ops *ca) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + tcp_cleanup_congestion_control(sk); + icsk->icsk_ca_ops = ca; + + if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) + icsk->icsk_ca_ops->init(sk); +} + /* Manage refcounts on socket close. */ void tcp_cleanup_congestion_control(struct sock *sk) { @@ -241,42 +326,26 @@ out: int tcp_set_congestion_control(struct sock *sk, const char *name) { struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_congestion_ops *ca; + const struct tcp_congestion_ops *ca; int err = 0; - rcu_read_lock(); - ca = tcp_ca_find(name); + if (icsk->icsk_ca_dst_locked) + return -EPERM; - /* no change asking for existing value */ + rcu_read_lock(); + ca = __tcp_ca_find_autoload(name); + /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) goto out; - -#ifdef CONFIG_MODULES - /* not found attempt to autoload module */ - if (!ca && capable(CAP_NET_ADMIN)) { - rcu_read_unlock(); - request_module("tcp_%s", name); - rcu_read_lock(); - ca = tcp_ca_find(name); - } -#endif if (!ca) err = -ENOENT; - else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) err = -EPERM; - else if (!try_module_get(ca->owner)) err = -EBUSY; - - else { - tcp_cleanup_congestion_control(sk); - icsk->icsk_ca_ops = ca; - - if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) - icsk->icsk_ca_ops->init(sk); - } + else + tcp_reinit_congestion_control(sk, ca); out: rcu_read_unlock(); return err; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 815c85e3b1e0..53db2c309572 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -255,6 +255,9 @@ bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct tcp_fastopen_cookie valid_foc = { .len = -1 }; bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; + if (foc->len == 0) /* Client requests a cookie */ + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); + if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && (syn_data || foc->len >= 0) && tcp_fastopen_queue_check(sk))) { @@ -265,7 +268,8 @@ bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) goto fastopen; - if (tcp_fastopen_cookie_gen(req, skb, &valid_foc) && + if (foc->len >= 0 && /* Client presents or requests a cookie */ + tcp_fastopen_cookie_gen(req, skb, &valid_foc) && foc->len == TCP_FASTOPEN_COOKIE_SIZE && foc->len == valid_foc.len && !memcmp(foc->val, valid_foc.val, foc->len)) { @@ -284,11 +288,10 @@ fastopen: LINUX_MIB_TCPFASTOPENPASSIVE); return true; } - } + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); + } else if (foc->len > 0) /* Client presents an invalid cookie */ + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - NET_INC_STATS_BH(sock_net(sk), foc->len ? - LINUX_MIB_TCPFASTOPENPASSIVEFAIL : - LINUX_MIB_TCPFASTOPENCOOKIEREQD); *foc = valid_foc; return false; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 075ab4d5af5e..8fdd27b17306 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -100,6 +100,7 @@ int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_early_retrans __read_mostly = 3; +int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -3183,8 +3184,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); - if (ca_ops->pkts_acked) - ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us); + if (ca_ops->pkts_acked) { + long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us); + ca_ops->pkts_acked(sk, pkts_acked, rtt_us); + } } else if (skb && rtt_update && sack_rtt_us >= 0 && sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { @@ -3319,13 +3322,22 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 } /* RFC 5961 7 [ACK Throttling] */ -static void tcp_send_challenge_ack(struct sock *sk) +static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) { /* unprotected vars, we dont care of overwrites */ static u32 challenge_timestamp; static unsigned int challenge_count; - u32 now = jiffies / HZ; + struct tcp_sock *tp = tcp_sk(sk); + u32 now; + + /* First check our per-socket dupack rate limit. */ + if (tcp_oow_rate_limited(sock_net(sk), skb, + LINUX_MIB_TCPACKSKIPPEDCHALLENGE, + &tp->last_oow_ack_time)) + return; + /* Then check the check host-wide RFC 5961 rate limit. */ + now = jiffies / HZ; if (now != challenge_timestamp) { challenge_timestamp = now; challenge_count = 0; @@ -3358,34 +3370,34 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) } /* This routine deals with acks during a TLP episode. + * We mark the end of a TLP episode on receiving TLP dupack or when + * ack is after tlp_high_seq. * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. */ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) { struct tcp_sock *tp = tcp_sk(sk); - bool is_tlp_dupack = (ack == tp->tlp_high_seq) && - !(flag & (FLAG_SND_UNA_ADVANCED | - FLAG_NOT_DUP | FLAG_DATA_SACKED)); - /* Mark the end of TLP episode on receiving TLP dupack or when - * ack is after tlp_high_seq. - */ - if (is_tlp_dupack) { - tp->tlp_high_seq = 0; + if (before(ack, tp->tlp_high_seq)) return; - } - if (after(ack, tp->tlp_high_seq)) { + if (flag & FLAG_DSACKING_ACK) { + /* This DSACK means original and TLP probe arrived; no loss */ + tp->tlp_high_seq = 0; + } else if (after(ack, tp->tlp_high_seq)) { + /* ACK advances: there was a loss, so reduce cwnd. Reset + * tlp_high_seq in tcp_init_cwnd_reduction() + */ + tcp_init_cwnd_reduction(sk); + tcp_set_ca_state(sk, TCP_CA_CWR); + tcp_end_cwnd_reduction(sk); + tcp_try_keep_open(sk); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSSPROBERECOVERY); + } else if (!(flag & (FLAG_SND_UNA_ADVANCED | + FLAG_NOT_DUP | FLAG_DATA_SACKED))) { + /* Pure dupack: original and TLP probe arrived; no loss */ tp->tlp_high_seq = 0; - /* Don't reduce cwnd if DSACK arrives for TLP retrans. */ - if (!(flag & FLAG_DSACKING_ACK)) { - tcp_init_cwnd_reduction(sk); - tcp_set_ca_state(sk, TCP_CA_CWR); - tcp_end_cwnd_reduction(sk); - tcp_try_keep_open(sk); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPLOSSPROBERECOVERY); - } } } @@ -3421,7 +3433,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (before(ack, prior_snd_una)) { /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ if (before(ack, prior_snd_una - tp->max_window)) { - tcp_send_challenge_ack(sk); + tcp_send_challenge_ack(sk, skb); return -1; } goto old_ack; @@ -4990,7 +5002,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); - tcp_send_dupack(sk, skb); + if (!tcp_oow_rate_limited(sock_net(sk), skb, + LINUX_MIB_TCPACKSKIPPEDPAWS, + &tp->last_oow_ack_time)) + tcp_send_dupack(sk, skb); goto discard; } /* Reset is accepted even if it did not pass PAWS. */ @@ -5007,7 +5022,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (!th->rst) { if (th->syn) goto syn_challenge; - tcp_send_dupack(sk, skb); + if (!tcp_oow_rate_limited(sock_net(sk), skb, + LINUX_MIB_TCPACKSKIPPEDSEQ, + &tp->last_oow_ack_time)) + tcp_send_dupack(sk, skb); } goto discard; } @@ -5023,7 +5041,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) tcp_reset(sk); else - tcp_send_challenge_ack(sk); + tcp_send_challenge_ack(sk, skb); goto discard; } @@ -5037,7 +5055,7 @@ syn_challenge: if (syn_inerr) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); - tcp_send_challenge_ack(sk); + tcp_send_challenge_ack(sk, skb); goto discard; } @@ -5870,10 +5888,9 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) * TCP ECN negotiation. * * Exception: tcp_ca wants ECN. This is required for DCTCP - * congestion control; it requires setting ECT on all packets, - * including SYN. We inverse the test in this case: If our - * local socket wants ECN, but peer only set ece/cwr (but not - * ECT in IP header) its probably a non-DCTCP aware sender. + * congestion control: Linux DCTCP asserts ECT on all packets, + * including SYN, which is most optimal solution; however, + * others, such as FreeBSD do not. */ static void tcp_ecn_create_request(struct request_sock *req, const struct sk_buff *skb, @@ -5883,18 +5900,15 @@ static void tcp_ecn_create_request(struct request_sock *req, const struct tcphdr *th = tcp_hdr(skb); const struct net *net = sock_net(listen_sk); bool th_ecn = th->ece && th->cwr; - bool ect, need_ecn, ecn_ok; + bool ect, ecn_ok; if (!th_ecn) return; ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); - need_ecn = tcp_ca_needs_ecn(listen_sk); ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN); - if (!ect && !need_ecn && ecn_ok) - inet_rsk(req)->ecn_ok = 1; - else if (ect && need_ecn) + if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk)) inet_rsk(req)->ecn_ok = 1; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d22f54482bab..5a2dfed4783b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1342,6 +1342,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } sk_setup_caps(newsk, dst); + tcp_ca_openreq_child(newsk, dst); + tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); if (tcp_sk(sk)->rx_opt.user_mss && @@ -2457,6 +2459,7 @@ static int __net_init tcp_sk_init(struct net *net) *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; } net->ipv4.sysctl_tcp_ecn = 2; + net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; return 0; fail: diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index ed9c9a91851c..e5f41bd5ec1b 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -886,7 +886,8 @@ static int tcp_metrics_dump_info(struct sk_buff *skb, if (tcp_metrics_fill_info(skb, tm) < 0) goto nla_put_failure; - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 63d2680b65db..dd11ac7798c6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -58,6 +58,25 @@ static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) return seq == e_win && seq == end_seq; } +static enum tcp_tw_status +tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw, + const struct sk_buff *skb, int mib_idx) +{ + struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); + + if (!tcp_oow_rate_limited(twsk_net(tw), skb, mib_idx, + &tcptw->tw_last_oow_ack_time)) { + /* Send ACK. Note, we do not put the bucket, + * it will be released by caller. + */ + return TCP_TW_ACK; + } + + /* We are rate-limiting, so just release the tw sock and drop skb. */ + inet_twsk_put(tw); + return TCP_TW_SUCCESS; +} + /* * * Main purpose of TIME-WAIT state is to close connection gracefully, * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN @@ -116,7 +135,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt, tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd)) - return TCP_TW_ACK; + return tcp_timewait_check_oow_rate_limit( + tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2); if (th->rst) goto kill; @@ -250,10 +270,8 @@ kill: inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, TCP_TIMEWAIT_LEN); - /* Send ACK. Note, we do not put the bucket, - * it will be released by caller. - */ - return TCP_TW_ACK; + return tcp_timewait_check_oow_rate_limit( + tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT); } inet_twsk_put(tw); return TCP_TW_SUCCESS; @@ -289,6 +307,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; tcptw->tw_ts_offset = tp->tsoffset; + tcptw->tw_last_oow_ack_time = 0; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { @@ -399,6 +418,32 @@ static void tcp_ecn_openreq_child(struct tcp_sock *tp, tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; } +void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); + bool ca_got_dst = false; + + if (ca_key != TCP_CA_UNSPEC) { + const struct tcp_congestion_ops *ca; + + rcu_read_lock(); + ca = tcp_ca_find_key(ca_key); + if (likely(ca && try_module_get(ca->owner))) { + icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); + icsk->icsk_ca_ops = ca; + ca_got_dst = true; + } + rcu_read_unlock(); + } + + if (!ca_got_dst && !try_module_get(icsk->icsk_ca_ops->owner)) + tcp_assign_congestion_control(sk); + + tcp_set_ca_state(sk, TCP_CA_Open); +} +EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); + /* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * @@ -441,6 +486,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_enable_early_retrans(newtp); newtp->tlp_high_seq = 0; newtp->lsndtime = treq->snt_synack; + newtp->last_oow_ack_time = 0; newtp->total_retrans = req->num_retrans; /* So many TCP implementations out there (incorrectly) count the @@ -451,10 +497,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; - if (!try_module_get(newicsk->icsk_ca_ops->owner)) - tcp_assign_congestion_control(newsk); - - tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); __skb_queue_head_init(&newtp->out_of_order_queue); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; @@ -583,7 +625,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * Reset timer after retransmitting SYNACK, similar to * the idea of fast retransmit in recovery. */ - if (!inet_rtx_syn_ack(sk, req)) + if (!tcp_oow_rate_limited(sock_net(sk), skb, + LINUX_MIB_TCPACKSKIPPEDSYNRECV, + &tcp_rsk(req)->last_oow_ack_time) && + + !inet_rtx_syn_ack(sk, req)) req->expires = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX) + jiffies; return NULL; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 65caf8b95e17..a2a796c5536b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -59,9 +59,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 131072; */ int sysctl_tcp_tso_win_divisor __read_mostly = 3; -int sysctl_tcp_mtu_probing __read_mostly = 0; -int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; - /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; @@ -948,7 +945,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_orphan(skb); skb->sk = sk; - skb->destructor = tcp_wfree; + skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree; skb_set_hash_from_sk(skb, sk); atomic_add(skb->truesize, &sk->sk_wmem_alloc); @@ -1350,11 +1347,12 @@ void tcp_mtup_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct net *net = sock_net(sk); - icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1; + icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1; icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + icsk->icsk_af_ops->net_header_len; - icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); icsk->icsk_mtup.probe_size = 0; } EXPORT_SYMBOL(tcp_mtup_init); @@ -2939,6 +2937,25 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, } EXPORT_SYMBOL(tcp_make_synack); +static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_congestion_ops *ca; + u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); + + if (ca_key == TCP_CA_UNSPEC) + return; + + rcu_read_lock(); + ca = tcp_ca_find_key(ca_key); + if (likely(ca && try_module_get(ca->owner))) { + module_put(icsk->icsk_ca_ops->owner); + icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); + icsk->icsk_ca_ops = ca; + } + rcu_read_unlock(); +} + /* Do all connect socket setups that can be done AF independent. */ static void tcp_connect_init(struct sock *sk) { @@ -2964,6 +2981,8 @@ static void tcp_connect_init(struct sock *sk) tcp_mtup_init(sk); tcp_sync_mss(sk, dst_mtu(dst)); + tcp_ca_dst_init(sk, dst); + if (!tp->window_clamp) tp->window_clamp = dst_metric(dst, RTAX_WINDOW); tp->advmss = dst_metric_advmss(dst); @@ -3034,7 +3053,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; - int syn_loss = 0, space, err = 0; + int syn_loss = 0, space, err = 0, copied; unsigned long last_syn_loss = 0; struct sk_buff *syn_data; @@ -3072,11 +3091,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) goto fallback; syn_data->ip_summed = CHECKSUM_PARTIAL; memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); - if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), - fo->data->msg_iter.iov, 0, space))) { + copied = copy_from_iter(skb_put(syn_data, space), space, + &fo->data->msg_iter); + if (unlikely(!copied)) { kfree_skb(syn_data); goto fallback; } + if (copied != space) { + skb_trim(syn_data, copied); + space = copied; + } /* No more data pending in inet_wait_for_connect() */ if (space == fo->size) @@ -3244,6 +3268,14 @@ void tcp_send_ack(struct sock *sk) skb_reserve(buff, MAX_TCP_HEADER); tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); + /* We do not want pure acks influencing TCP Small Queues or fq/pacing + * too much. + * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784 + * We also avoid tcp_wfree() overhead (cache line miss accessing + * tp->tsq_flags) by using regular sock_wfree() + */ + skb_set_tcp_pure_ack(buff); + /* Send it off, this clears delayed acks for us. */ skb_mstamp_get(&buff->skb_mstamp); tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 1829c7fbc77e..0732b787904e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -101,17 +101,20 @@ static int tcp_orphan_retries(struct sock *sk, int alive) static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) { + struct net *net = sock_net(sk); + /* Black hole detection */ - if (sysctl_tcp_mtu_probing) { + if (net->ipv4.sysctl_tcp_mtu_probing) { if (!icsk->icsk_mtup.enabled) { icsk->icsk_mtup.enabled = 1; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { + struct net *net = sock_net(sk); struct tcp_sock *tp = tcp_sk(sk); int mss; mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; - mss = min(sysctl_tcp_base_mss, mss); + mss = min(net->ipv4.sysctl_tcp_base_mss, mss); mss = max(mss, 68 - tp->tcp_header_len); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 13b4dcf86ef6..97ef1f8b7be8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1329,7 +1329,7 @@ try_again: *addr_len = sizeof(*sin); } if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); + ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr)); err = copied; if (flags & MSG_TRUNC) @@ -1806,7 +1806,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; - if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk)) + if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, inet_compute_pseudo); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index d3e537ef6b7f..d10f6f4ead27 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -339,7 +339,8 @@ unflush: skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; - pp = uo_priv->offload->callbacks.gro_receive(head, skb); + pp = uo_priv->offload->callbacks.gro_receive(head, skb, + uo_priv->offload); out_unlock: rcu_read_unlock(); @@ -395,7 +396,9 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) if (uo_priv != NULL) { NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; - err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr)); + err = uo_priv->offload->callbacks.gro_complete(skb, + nhoff + sizeof(struct udphdr), + uo_priv->offload); } rcu_read_unlock(); diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 1671263e5fa0..c83b35485056 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -63,7 +63,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, inet_sk(sk)->mc_loop = 0; /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */ - udp_set_convert_csum(sk, true); + inet_inc_convert_csum(sk); rcu_assign_sk_user_data(sk, cfg->sk_user_data); @@ -75,10 +75,10 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); -int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt, - struct sk_buff *skb, __be32 src, __be32 dst, - __u8 tos, __u8 ttl, __be16 df, __be16 src_port, - __be16 dst_port, bool xnet) +int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 tos, __u8 ttl, + __be16 df, __be16 src_port, __be16 dst_port, + bool xnet, bool nocheck) { struct udphdr *uh; @@ -90,9 +90,9 @@ int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt, uh->source = src_port; uh->len = htons(skb->len); - udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len); + udp_set_csum(nocheck, skb, src, dst, skb->len); - return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP, + return iptunnel_xmit(skb->sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); } EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f7c8bbeb27b7..98e4a63d72bb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -201,6 +201,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .disable_ipv6 = 0, .accept_dad = 1, .suppress_frag_ndisc = 1, + .accept_ra_mtu = 1, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -238,6 +239,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .disable_ipv6 = 0, .accept_dad = 1, .suppress_frag_ndisc = 1, + .accept_ra_mtu = 1, }; /* Check if a valid qdisc is available */ @@ -489,7 +491,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -619,7 +622,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) { + -1) < 0) { rcu_read_unlock(); goto done; } @@ -635,7 +638,7 @@ cont: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) + -1) < 0) goto done; else h++; @@ -646,7 +649,7 @@ cont: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) + -1) < 0) goto done; else h++; @@ -1519,15 +1522,30 @@ static int ipv6_count_addresses(struct inet6_dev *idev) int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict) { + return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE); +} +EXPORT_SYMBOL(ipv6_chk_addr); + +int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, int strict, + u32 banned_flags) +{ struct inet6_ifaddr *ifp; unsigned int hash = inet6_addr_hash(addr); + u32 ifp_flags; rcu_read_lock_bh(); hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; + /* Decouple optimistic from tentative for evaluation here. + * Ban optimistic addresses explicitly, when required. + */ + ifp_flags = (ifp->flags&IFA_F_OPTIMISTIC) + ? (ifp->flags&~IFA_F_TENTATIVE) + : ifp->flags; if (ipv6_addr_equal(&ifp->addr, addr) && - !(ifp->flags&IFA_F_TENTATIVE) && + !(ifp_flags&banned_flags) && (dev == NULL || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) { rcu_read_unlock_bh(); @@ -1538,7 +1556,7 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, rcu_read_unlock_bh(); return 0; } -EXPORT_SYMBOL(ipv6_chk_addr); +EXPORT_SYMBOL(ipv6_chk_addr_and_flags); static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, struct net_device *dev) @@ -4047,7 +4065,8 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0) goto error; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; error: nlmsg_cancel(skb, nlh); @@ -4076,7 +4095,8 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, return -EMSGSIZE; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, @@ -4101,7 +4121,8 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, return -EMSGSIZE; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } enum addr_type_t { @@ -4134,7 +4155,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI); - if (err <= 0) + if (err < 0) break; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); } @@ -4151,7 +4172,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_GETMULTICAST, NLM_F_MULTI); - if (err <= 0) + if (err < 0) break; } break; @@ -4166,7 +4187,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_GETANYCAST, NLM_F_MULTI); - if (err <= 0) + if (err < 0) break; } break; @@ -4209,7 +4230,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, goto cont; if (in6_dump_addrs(idev, skb, cb, type, - s_ip_idx, &ip_idx) <= 0) + s_ip_idx, &ip_idx) < 0) goto done; cont: idx++; @@ -4376,6 +4397,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; + array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu; } static inline size_t inet6_ifla6_size(void) @@ -4572,6 +4594,22 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) return 0; } +static const struct nla_policy inet6_af_policy[IFLA_INET6_MAX + 1] = { + [IFLA_INET6_ADDR_GEN_MODE] = { .type = NLA_U8 }, + [IFLA_INET6_TOKEN] = { .len = sizeof(struct in6_addr) }, +}; + +static int inet6_validate_link_af(const struct net_device *dev, + const struct nlattr *nla) +{ + struct nlattr *tb[IFLA_INET6_MAX + 1]; + + if (dev && !__in6_dev_get(dev)) + return -EAFNOSUPPORT; + + return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy); +} + static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) { int err = -EINVAL; @@ -4638,7 +4676,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, goto nla_put_failure; nla_nest_end(skb, protoinfo); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -4670,7 +4709,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - RTM_NEWLINK, NLM_F_MULTI) <= 0) + RTM_NEWLINK, NLM_F_MULTI) < 0) goto out; cont: idx++; @@ -4747,7 +4786,8 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, ci.valid_time = ntohl(pinfo->valid); if (nla_put(skb, PREFIX_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -5253,6 +5293,13 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { + .procname = "accept_ra_mtu", + .data = &ipv6_devconf.accept_ra_mtu, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { /* sentinel */ } }, @@ -5389,10 +5436,11 @@ static struct pernet_operations addrconf_ops = { .exit = addrconf_exit_net, }; -static struct rtnl_af_ops inet6_ops = { +static struct rtnl_af_ops inet6_ops __read_mostly = { .family = AF_INET6, .fill_link_af = inet6_fill_link_af, .get_link_af_size = inet6_get_link_af_size, + .validate_link_af = inet6_validate_link_af, .set_link_af = inet6_set_link_af, }; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index fd0dc47f471d..e43e79d0a612 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -490,7 +490,8 @@ static int ip6addrlbl_fill(struct sk_buff *skb, return -EMSGSIZE; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -510,7 +511,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, RTM_NEWADDRLABEL, NLM_F_MULTI); - if (err <= 0) + if (err < 0) break; } idx++; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 49f5e73db122..c215be70cac0 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -369,7 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin) { + if (sin && skb->len) { const unsigned char *nh = skb_network_header(skb); sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; @@ -394,8 +394,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); sin = &errhdr.offender; memset(sin, 0, sizeof(*sin)); - - if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { + if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) { sin->sin6_family = AF_INET6; if (np->rxopt.all) { if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP && diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index d674152b6ede..a5e95199585e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -427,7 +427,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * Dest addr check */ - if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) { + if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) { if (type != ICMPV6_PKT_TOOBIG && !(type == ICMPV6_PARAMPROB && code == ICMPV6_UNK_OPTION && diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index f1c6d5e98322..263ef4143bff 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -277,7 +277,6 @@ static int fib6_dump_node(struct fib6_walker *w) w->leaf = rt; return 1; } - WARN_ON(res == 0); } w->leaf = NULL; return 0; @@ -630,32 +629,35 @@ static bool rt6_qualify_for_ecmp(struct rt6_info *rt) RTF_GATEWAY; } -static int fib6_commit_metrics(struct dst_entry *dst, - struct nlattr *mx, int mx_len) +static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc) { - struct nlattr *nla; - int remaining; - u32 *mp; + int i; - if (dst->flags & DST_HOST) { - mp = dst_metrics_write_ptr(dst); - } else { - mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); - if (!mp) - return -ENOMEM; - dst_init_metrics(dst, mp, 0); + for (i = 0; i < RTAX_MAX; i++) { + if (test_bit(i, mxc->mx_valid)) + mp[i] = mxc->mx[i]; } +} - nla_for_each_attr(nla, mx, mx_len, remaining) { - int type = nla_type(nla); +static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc) +{ + if (!mxc->mx) + return 0; - if (type) { - if (type > RTAX_MAX) - return -EINVAL; + if (dst->flags & DST_HOST) { + u32 *mp = dst_metrics_write_ptr(dst); - mp[type - 1] = nla_get_u32(nla); - } + if (unlikely(!mp)) + return -ENOMEM; + + fib6_copy_metrics(mp, mxc); + } else { + dst_init_metrics(dst, mxc->mx, false); + + /* We've stolen mx now. */ + mxc->mx = NULL; } + return 0; } @@ -687,7 +689,7 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, */ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nl_info *info, struct nlattr *mx, int mx_len) + struct nl_info *info, struct mx6_config *mxc) { struct rt6_info *iter = NULL; struct rt6_info **ins; @@ -796,11 +798,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, pr_warn("NLM_F_CREATE should be set when creating new route\n"); add: - if (mx) { - err = fib6_commit_metrics(&rt->dst, mx, mx_len); - if (err) - return err; - } + err = fib6_commit_metrics(&rt->dst, mxc); + if (err) + return err; + rt->dst.rt6_next = iter; *ins = rt; rt->rt6i_node = fn; @@ -820,11 +821,11 @@ add: pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); return -ENOENT; } - if (mx) { - err = fib6_commit_metrics(&rt->dst, mx, mx_len); - if (err) - return err; - } + + err = fib6_commit_metrics(&rt->dst, mxc); + if (err) + return err; + *ins = rt; rt->rt6i_node = fn; rt->dst.rt6_next = iter->dst.rt6_next; @@ -862,8 +863,8 @@ void fib6_force_start_gc(struct net *net) * with source addr info in sub-trees */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, - struct nlattr *mx, int mx_len) +int fib6_add(struct fib6_node *root, struct rt6_info *rt, + struct nl_info *info, struct mx6_config *mxc) { struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; @@ -958,7 +959,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, } #endif - err = fib6_add_rt2node(fn, rt, info, mx, mx_len); + err = fib6_add_rt2node(fn, rt, info, mxc); if (!err) { fib6_start_gc(info->nl_net, rt); if (!(rt->rt6i_flags & RTF_CACHE)) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 01ccc28a686f..bc28b7d42a6d 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1662,6 +1662,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { .dellink = ip6gre_dellink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, + .get_link_net = ip6_tnl_get_link_net, }; static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { @@ -1675,6 +1676,7 @@ static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { .changelink = ip6gre_changelink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, + .get_link_net = ip6_tnl_get_link_net, }; /* diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d28f2a2efb32..d33df4cbd872 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1027,6 +1027,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); static inline int ip6_ufo_append_data(struct sock *sk, + struct sk_buff_head *queue, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, @@ -1042,7 +1043,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, * device, so create one single skb packet containing complete * udp datagram */ - skb = skb_peek_tail(&sk->sk_write_queue); + skb = skb_peek_tail(queue); if (skb == NULL) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, @@ -1065,7 +1066,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->protocol = htons(ETH_P_IPV6); skb->csum = 0; - __skb_queue_tail(&sk->sk_write_queue, skb); + __skb_queue_tail(queue, skb); } else if (skb_is_gso(skb)) { goto append; } @@ -1121,99 +1122,106 @@ static void ip6_append_data_mtu(unsigned int *mtu, } } -int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, - int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, - int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, - struct rt6_info *rt, unsigned int flags, int dontfrag) +static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, + struct inet6_cork *v6_cork, + int hlimit, int tclass, struct ipv6_txoptions *opt, + struct rt6_info *rt, struct flowi6 *fl6) { - struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct inet_cork *cork; + unsigned int mtu; + + /* + * setup for corking + */ + if (opt) { + if (WARN_ON(v6_cork->opt)) + return -EINVAL; + + v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); + if (unlikely(v6_cork->opt == NULL)) + return -ENOBUFS; + + v6_cork->opt->tot_len = opt->tot_len; + v6_cork->opt->opt_flen = opt->opt_flen; + v6_cork->opt->opt_nflen = opt->opt_nflen; + + v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, + sk->sk_allocation); + if (opt->dst0opt && !v6_cork->opt->dst0opt) + return -ENOBUFS; + + v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, + sk->sk_allocation); + if (opt->dst1opt && !v6_cork->opt->dst1opt) + return -ENOBUFS; + + v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, + sk->sk_allocation); + if (opt->hopopt && !v6_cork->opt->hopopt) + return -ENOBUFS; + + v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, + sk->sk_allocation); + if (opt->srcrt && !v6_cork->opt->srcrt) + return -ENOBUFS; + + /* need source address above miyazawa*/ + } + dst_hold(&rt->dst); + cork->base.dst = &rt->dst; + cork->fl.u.ip6 = *fl6; + v6_cork->hop_limit = hlimit; + v6_cork->tclass = tclass; + if (rt->dst.flags & DST_XFRM_TUNNEL) + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(&rt->dst); + else + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(rt->dst.path); + if (np->frag_size < mtu) { + if (np->frag_size) + mtu = np->frag_size; + } + cork->base.fragsize = mtu; + if (dst_allfrag(rt->dst.path)) + cork->base.flags |= IPCORK_ALLFRAG; + cork->base.length = 0; + + return 0; +} + +static int __ip6_append_data(struct sock *sk, + struct flowi6 *fl6, + struct sk_buff_head *queue, + struct inet_cork *cork, + struct inet6_cork *v6_cork, + struct page_frag *pfrag, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + unsigned int flags, int dontfrag) +{ struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; - int exthdrlen; - int dst_exthdrlen; + int exthdrlen = 0; + int dst_exthdrlen = 0; int hh_len; int copy; int err; int offset = 0; __u8 tx_flags = 0; u32 tskey = 0; + struct rt6_info *rt = (struct rt6_info *)cork->dst; + struct ipv6_txoptions *opt = v6_cork->opt; + int csummode = CHECKSUM_NONE; - if (flags&MSG_PROBE) - return 0; - cork = &inet->cork.base; - if (skb_queue_empty(&sk->sk_write_queue)) { - /* - * setup for corking - */ - if (opt) { - if (WARN_ON(np->cork.opt)) - return -EINVAL; - - np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation); - if (unlikely(np->cork.opt == NULL)) - return -ENOBUFS; - - np->cork.opt->tot_len = opt->tot_len; - np->cork.opt->opt_flen = opt->opt_flen; - np->cork.opt->opt_nflen = opt->opt_nflen; - - np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, - sk->sk_allocation); - if (opt->dst0opt && !np->cork.opt->dst0opt) - return -ENOBUFS; - - np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, - sk->sk_allocation); - if (opt->dst1opt && !np->cork.opt->dst1opt) - return -ENOBUFS; - - np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, - sk->sk_allocation); - if (opt->hopopt && !np->cork.opt->hopopt) - return -ENOBUFS; - - np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, - sk->sk_allocation); - if (opt->srcrt && !np->cork.opt->srcrt) - return -ENOBUFS; - - /* need source address above miyazawa*/ - } - dst_hold(&rt->dst); - cork->dst = &rt->dst; - inet->cork.fl.u.ip6 = *fl6; - np->cork.hop_limit = hlimit; - np->cork.tclass = tclass; - if (rt->dst.flags & DST_XFRM_TUNNEL) - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(&rt->dst); - else - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); - if (np->frag_size < mtu) { - if (np->frag_size) - mtu = np->frag_size; - } - cork->fragsize = mtu; - if (dst_allfrag(rt->dst.path)) - cork->flags |= IPCORK_ALLFRAG; - cork->length = 0; - exthdrlen = (opt ? opt->opt_flen : 0); - length += exthdrlen; - transhdrlen += exthdrlen; + skb = skb_peek_tail(queue); + if (!skb) { + exthdrlen = opt ? opt->opt_flen : 0; dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; - } else { - rt = (struct rt6_info *)cork->dst; - fl6 = &inet->cork.fl.u.ip6; - opt = np->cork.opt; - transhdrlen = 0; - exthdrlen = 0; - dst_exthdrlen = 0; - mtu = cork->fragsize; } + + mtu = cork->fragsize; orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -1262,6 +1270,14 @@ emsgsize: tskey = sk->sk_tskey++; } + /* If this is the first and only packet and device + * supports checksum offloading, let's use it. + */ + if (!skb && + length + fragheaderlen < mtu && + rt->dst.dev->features & NETIF_F_V6_CSUM && + !exthdrlen) + csummode = CHECKSUM_PARTIAL; /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. @@ -1278,13 +1294,12 @@ emsgsize: * --yoshfuji */ - skb = skb_peek_tail(&sk->sk_write_queue); cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { - err = ip6_ufo_append_data(sk, getfrag, from, length, + err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags, rt); if (err) @@ -1375,7 +1390,7 @@ alloc_new_skb: * Fill in the control structures */ skb->protocol = htons(ETH_P_IPV6); - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = csummode; skb->csum = 0; /* reserve for fragmentation and ipsec header */ skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + @@ -1425,7 +1440,7 @@ alloc_new_skb: /* * Put the packet on the pending queue */ - __skb_queue_tail(&sk->sk_write_queue, skb); + __skb_queue_tail(queue, skb); continue; } @@ -1444,7 +1459,6 @@ alloc_new_skb: } } else { int i = skb_shinfo(skb)->nr_frags; - struct page_frag *pfrag = sk_page_frag(sk); err = -ENOMEM; if (!sk_page_frag_refill(sk, pfrag)) @@ -1487,43 +1501,81 @@ error: IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; } + +int ip6_append_data(struct sock *sk, + int getfrag(void *from, char *to, int offset, int len, + int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, int hlimit, + int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, + struct rt6_info *rt, unsigned int flags, int dontfrag) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + int exthdrlen; + int err; + + if (flags&MSG_PROBE) + return 0; + if (skb_queue_empty(&sk->sk_write_queue)) { + /* + * setup for corking + */ + err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit, + tclass, opt, rt, fl6); + if (err) + return err; + + exthdrlen = (opt ? opt->opt_flen : 0); + length += exthdrlen; + transhdrlen += exthdrlen; + } else { + fl6 = &inet->cork.fl.u.ip6; + transhdrlen = 0; + } + + return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, + &np->cork, sk_page_frag(sk), getfrag, + from, length, transhdrlen, flags, dontfrag); +} EXPORT_SYMBOL_GPL(ip6_append_data); -static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) +static void ip6_cork_release(struct inet_cork_full *cork, + struct inet6_cork *v6_cork) { - if (np->cork.opt) { - kfree(np->cork.opt->dst0opt); - kfree(np->cork.opt->dst1opt); - kfree(np->cork.opt->hopopt); - kfree(np->cork.opt->srcrt); - kfree(np->cork.opt); - np->cork.opt = NULL; + if (v6_cork->opt) { + kfree(v6_cork->opt->dst0opt); + kfree(v6_cork->opt->dst1opt); + kfree(v6_cork->opt->hopopt); + kfree(v6_cork->opt->srcrt); + kfree(v6_cork->opt); + v6_cork->opt = NULL; } - if (inet->cork.base.dst) { - dst_release(inet->cork.base.dst); - inet->cork.base.dst = NULL; - inet->cork.base.flags &= ~IPCORK_ALLFRAG; + if (cork->base.dst) { + dst_release(cork->base.dst); + cork->base.dst = NULL; + cork->base.flags &= ~IPCORK_ALLFRAG; } - memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); + memset(&cork->fl, 0, sizeof(cork->fl)); } -int ip6_push_pending_frames(struct sock *sk) +struct sk_buff *__ip6_make_skb(struct sock *sk, + struct sk_buff_head *queue, + struct inet_cork_full *cork, + struct inet6_cork *v6_cork) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; - struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *hdr; - struct ipv6_txoptions *opt = np->cork.opt; - struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; - struct flowi6 *fl6 = &inet->cork.fl.u.ip6; + struct ipv6_txoptions *opt = v6_cork->opt; + struct rt6_info *rt = (struct rt6_info *)cork->base.dst; + struct flowi6 *fl6 = &cork->fl.u.ip6; unsigned char proto = fl6->flowi6_proto; - int err = 0; - skb = __skb_dequeue(&sk->sk_write_queue); + skb = __skb_dequeue(queue); if (skb == NULL) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); @@ -1531,7 +1583,7 @@ int ip6_push_pending_frames(struct sock *sk) /* move skb->data to ip header from ext header */ if (skb->data < skb_network_header(skb)) __skb_pull(skb, skb_network_offset(skb)); - while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { + while ((tmp_skb = __skb_dequeue(queue)) != NULL) { __skb_pull(tmp_skb, skb_network_header_len(skb)); *tail_skb = tmp_skb; tail_skb = &(tmp_skb->next); @@ -1556,10 +1608,10 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - ip6_flow_hdr(hdr, np->cork.tclass, + ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, np->autoflowlabel)); - hdr->hop_limit = np->cork.hop_limit; + hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; hdr->daddr = *final_dst; @@ -1576,34 +1628,104 @@ int ip6_push_pending_frames(struct sock *sk) ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } + ip6_cork_release(cork, v6_cork); +out: + return skb; +} + +int ip6_send_skb(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + int err; + err = ip6_local_out(skb); if (err) { if (err > 0) err = net_xmit_errno(err); if (err) - goto error; + IP6_INC_STATS(net, rt->rt6i_idev, + IPSTATS_MIB_OUTDISCARDS); } -out: - ip6_cork_release(inet, np); return err; -error: - IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); - goto out; +} + +int ip6_push_pending_frames(struct sock *sk) +{ + struct sk_buff *skb; + + skb = ip6_finish_skb(sk); + if (!skb) + return 0; + + return ip6_send_skb(skb); } EXPORT_SYMBOL_GPL(ip6_push_pending_frames); -void ip6_flush_pending_frames(struct sock *sk) +static void __ip6_flush_pending_frames(struct sock *sk, + struct sk_buff_head *queue, + struct inet_cork_full *cork, + struct inet6_cork *v6_cork) { struct sk_buff *skb; - while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { + while ((skb = __skb_dequeue_tail(queue)) != NULL) { if (skb_dst(skb)) IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } - ip6_cork_release(inet_sk(sk), inet6_sk(sk)); + ip6_cork_release(cork, v6_cork); +} + +void ip6_flush_pending_frames(struct sock *sk) +{ + __ip6_flush_pending_frames(sk, &sk->sk_write_queue, + &inet_sk(sk)->cork, &inet6_sk(sk)->cork); } EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); + +struct sk_buff *ip6_make_skb(struct sock *sk, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + int hlimit, int tclass, + struct ipv6_txoptions *opt, struct flowi6 *fl6, + struct rt6_info *rt, unsigned int flags, + int dontfrag) +{ + struct inet_cork_full cork; + struct inet6_cork v6_cork; + struct sk_buff_head queue; + int exthdrlen = (opt ? opt->opt_flen : 0); + int err; + + if (flags & MSG_PROBE) + return NULL; + + __skb_queue_head_init(&queue); + + cork.base.flags = 0; + cork.base.addr = 0; + cork.base.opt = NULL; + v6_cork.opt = NULL; + err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6); + if (err) + return ERR_PTR(err); + + if (dontfrag < 0) + dontfrag = inet6_sk(sk)->dontfrag; + + err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork, + ¤t->task_frag, getfrag, from, + length + exthdrlen, transhdrlen + exthdrlen, + flags, dontfrag); + if (err) { + __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork); + return ERR_PTR(err); + } + + return __ip6_make_skb(sk, &queue, &cork, &v6_cork); +} diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 92b3da571980..266a264ec212 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1760,6 +1760,14 @@ nla_put_failure: return -EMSGSIZE; } +struct net *ip6_tnl_get_link_net(const struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + + return tunnel->net; +} +EXPORT_SYMBOL(ip6_tnl_get_link_net); + static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) }, @@ -1783,6 +1791,7 @@ static struct rtnl_link_ops ip6_link_ops __read_mostly = { .dellink = ip6_tnl_dellink, .get_size = ip6_tnl_get_size, .fill_info = ip6_tnl_fill_info, + .get_link_net = ip6_tnl_get_link_net, }; static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 8db6c98fe218..32d9b268e7d8 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -62,14 +62,14 @@ error: } EXPORT_SYMBOL_GPL(udp_sock_create6); -int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, - struct sk_buff *skb, struct net_device *dev, - struct in6_addr *saddr, struct in6_addr *daddr, - __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port) +int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, + struct net_device *dev, struct in6_addr *saddr, + struct in6_addr *daddr, + __u8 prio, __u8 ttl, __be16 src_port, + __be16 dst_port, bool nocheck) { struct udphdr *uh; struct ipv6hdr *ip6h; - struct sock *sk = sock->sk; __skb_push(skb, sizeof(*uh)); skb_reset_transport_header(skb); @@ -85,7 +85,7 @@ int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, | IPSKB_REROUTED); skb_dst_set(skb, dst); - udp6_set_csum(udp_get_no_check6_tx(sk), skb, saddr, daddr, skb->len); + udp6_set_csum(nocheck, skb, saddr, daddr, skb->len); __skb_push(skb, sizeof(*ip6h)); skb_reset_network_header(skb); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index ace10d0b3aac..5fb9e212eca8 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1016,6 +1016,7 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = { .changelink = vti6_changelink, .get_size = vti6_get_size, .fill_info = vti6_fill_info, + .get_link_net = ip6_tnl_get_link_net, }; static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 722669754bbf..34b682617f50 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2388,7 +2388,8 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, if (err < 0 && err != -ENOENT) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 66980d8d98d1..8d766d9100cb 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -996,13 +996,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, lock_sock(sk); skb = np->pktoptions; if (skb) - atomic_inc(&skb->users); - release_sock(sk); - - if (skb) { ip6_datagram_recv_ctl(sk, &msg, skb); - kfree_skb(skb); - } else { + release_sock(sk); + if (!skb) { if (np->rxopt.bits.rxinfo) { struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 682866777d53..471ed24aabae 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -655,7 +655,9 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; int probes = atomic_read(&neigh->probes); - if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1)) + if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr, + dev, 1, + IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) saddr = &ipv6_hdr(skb)->saddr; probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { @@ -1348,7 +1350,7 @@ skip_routeinfo: } } - if (ndopts.nd_opts_mtu) { + if (ndopts.nd_opts_mtu && in6_dev->cnf.accept_ra_mtu) { __be32 n; u32 mtu; diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 54520a0bd5e3..74581f706c4d 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -9,7 +9,8 @@ #include <net/addrconf.h> #include <net/secure_seq.h> -u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, struct in6_addr *src) +static u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, + struct in6_addr *src) { u32 hash, id; @@ -54,7 +55,7 @@ void ipv6_proxy_select_ident(struct sk_buff *skb) id = __ipv6_select_ident(ip6_proxy_idents_hashrnd, &addrs[1], &addrs[0]); - skb_shinfo(skb)->ip6_frag_id = id; + skb_shinfo(skb)->ip6_frag_id = htonl(id); } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 2d3148378a1f..bd46f736f61d 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -163,8 +163,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, pfh.icmph.checksum = 0; pfh.icmph.un.echo.id = inet->inet_sport; pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence; - /* XXX: stripping const */ - pfh.iov = (struct iovec *)msg->msg_iter.iov; + pfh.msg = msg; pfh.wcheck = 0; pfh.family = AF_INET6; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ee25631f8c29..dae7f1a1e464 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -609,7 +609,7 @@ out: return err; } -static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, +static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, struct flowi6 *fl6, struct dst_entry **dstp, unsigned int flags) { @@ -648,7 +648,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, skb->ip_summed = CHECKSUM_NONE; skb->transport_header = skb->network_header; - err = memcpy_fromiovecend((void *)iph, from, 0, length); + err = memcpy_from_msg(iph, msg, length); if (err) goto error_fault; @@ -886,8 +886,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, back_from_confirm: if (inet->hdrincl) - /* XXX: stripping const */ - err = rawv6_send_hdrinc(sk, (struct iovec *)msg->msg_iter.iov, len, &fl6, &dst, msg->msg_flags); + err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags); else { lock_sock(sk); err = ip6_append_data(sk, raw6_getfrag, &rfv, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 495965358d22..98565ce0ebcd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -499,7 +499,7 @@ static void rt6_probe_deferred(struct work_struct *w) addrconf_addr_solict_mult(&work->target, &mcaddr); ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); dev_put(work->dev); - kfree(w); + kfree(work); } static void rt6_probe(struct rt6_info *rt) @@ -853,14 +853,14 @@ EXPORT_SYMBOL(rt6_lookup); */ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, - struct nlattr *mx, int mx_len) + struct mx6_config *mxc) { int err; struct fib6_table *table; table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, info, mx, mx_len); + err = fib6_add(&table->tb6_root, rt, info, mxc); write_unlock_bh(&table->tb6_lock); return err; @@ -868,10 +868,10 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, int ip6_ins_rt(struct rt6_info *rt) { - struct nl_info info = { - .nl_net = dev_net(rt->dst.dev), - }; - return __ip6_ins_rt(rt, &info, NULL, 0); + struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; + struct mx6_config mxc = { .mx = NULL, }; + + return __ip6_ins_rt(rt, &info, &mxc); } static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, @@ -1471,9 +1471,51 @@ out: return entries > rt_max_size; } -/* - * - */ +static int ip6_convert_metrics(struct mx6_config *mxc, + const struct fib6_config *cfg) +{ + struct nlattr *nla; + int remaining; + u32 *mp; + + if (cfg->fc_mx == NULL) + return 0; + + mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (unlikely(!mp)) + return -ENOMEM; + + nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { + int type = nla_type(nla); + + if (type) { + u32 val; + + if (unlikely(type > RTAX_MAX)) + goto err; + if (type == RTAX_CC_ALGO) { + char tmp[TCP_CA_NAME_MAX]; + + nla_strlcpy(tmp, nla, sizeof(tmp)); + val = tcp_ca_get_key_by_name(tmp); + if (val == TCP_CA_UNSPEC) + goto err; + } else { + val = nla_get_u32(nla); + } + + mp[type - 1] = val; + __set_bit(type - 1, mxc->mx_valid); + } + } + + mxc->mx = mp; + + return 0; + err: + kfree(mp); + return -EINVAL; +} int ip6_route_add(struct fib6_config *cfg) { @@ -1483,6 +1525,7 @@ int ip6_route_add(struct fib6_config *cfg) struct net_device *dev = NULL; struct inet6_dev *idev = NULL; struct fib6_table *table; + struct mx6_config mxc = { .mx = NULL, }; int addr_type; if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) @@ -1678,8 +1721,14 @@ install_route: cfg->fc_nlinfo.nl_net = dev_net(dev); - return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len); + err = ip6_convert_metrics(&mxc, cfg); + if (err) + goto out; + err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); + + kfree(mxc.mx); + return err; out: if (dev) dev_put(dev); @@ -2535,7 +2584,8 @@ static inline size_t rt6_nlmsg_size(void) + nla_total_size(4) /* RTA_OIF */ + nla_total_size(4) /* RTA_PRIORITY */ + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ - + nla_total_size(sizeof(struct rta_cacheinfo)); + + nla_total_size(sizeof(struct rta_cacheinfo)) + + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ } static int rt6_fill_node(struct net *net, @@ -2676,7 +2726,8 @@ static int rt6_fill_node(struct net *net, if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index cdbfe5af6187..e4cbd5798eba 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1763,6 +1763,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = { .get_size = ipip6_get_size, .fill_info = ipip6_fill_info, .dellink = ipip6_dellink, + .get_link_net = ip_tunnel_get_link_net, }; static struct xfrm_tunnel sit_handler __read_mostly = { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9c0b54e87b47..5d46832c6f72 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1199,6 +1199,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + newnp->opt->opt_flen); + tcp_ca_openreq_child(newsk, dst); + tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); if (tcp_sk(sk)->rx_opt.user_mss && diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 189dc4ae3eca..d048d46779fc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -909,7 +909,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, goto csum_error; } - if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk)) + if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, ip6_compute_pseudo); @@ -990,9 +990,10 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, { unsigned int offset; struct udphdr *uh = udp_hdr(skb); + struct sk_buff *frags = skb_shinfo(skb)->frag_list; __wsum csum = 0; - if (skb_queue_len(&sk->sk_write_queue) == 1) { + if (!frags) { /* Only one fragment on the socket. */ skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); @@ -1008,9 +1009,9 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, skb->ip_summed = CHECKSUM_NONE; - skb_queue_walk(&sk->sk_write_queue, skb) { - csum = csum_add(csum, skb->csum); - } + do { + csum = csum_add(csum, frags->csum); + } while ((frags = frags->next)); uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, csum); @@ -1023,26 +1024,15 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, * Sending */ -static int udp_v6_push_pending_frames(struct sock *sk) +static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6) { - struct sk_buff *skb; + struct sock *sk = skb->sk; struct udphdr *uh; - struct udp_sock *up = udp_sk(sk); - struct inet_sock *inet = inet_sk(sk); - struct flowi6 *fl6; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; - - if (up->pending == AF_INET) - return udp_push_pending_frames(sk); - - fl6 = &inet->cork.fl.u.ip6; - - /* Grab the skbuff where UDP header space exists. */ - skb = skb_peek(&sk->sk_write_queue); - if (skb == NULL) - goto out; + int offset = skb_transport_offset(skb); + int len = skb->len - offset; /* * Create a UDP header @@ -1050,29 +1040,28 @@ static int udp_v6_push_pending_frames(struct sock *sk) uh = udp_hdr(skb); uh->source = fl6->fl6_sport; uh->dest = fl6->fl6_dport; - uh->len = htons(up->len); + uh->len = htons(len); uh->check = 0; if (is_udplite) - csum = udplite_csum_outgoing(sk, skb); - else if (up->no_check6_tx) { /* UDP csum disabled */ + csum = udplite_csum(skb); + else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, - up->len); + udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len); goto send; } else - csum = udp_csum_outgoing(sk, skb); + csum = udp_csum(skb); /* add protocol-dependent pseudo-header */ uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, - up->len, fl6->flowi6_proto, csum); + len, fl6->flowi6_proto, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; send: - err = ip6_push_pending_frames(sk); + err = ip6_send_skb(skb); if (err) { if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { UDP6_INC_STATS_USER(sock_net(sk), @@ -1082,6 +1071,30 @@ send: } else UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_OUTDATAGRAMS, is_udplite); + return err; +} + +static int udp_v6_push_pending_frames(struct sock *sk) +{ + struct sk_buff *skb; + struct udp_sock *up = udp_sk(sk); + struct flowi6 fl6; + int err = 0; + + if (up->pending == AF_INET) + return udp_push_pending_frames(sk); + + /* ip6_finish_skb will release the cork, so make a copy of + * fl6 here. + */ + fl6 = inet_sk(sk)->cork.fl.u.ip6; + + skb = ip6_finish_skb(sk); + if (!skb) + goto out; + + err = udp_v6_send_skb(skb, &fl6); + out: up->len = 0; up->pending = 0; @@ -1164,6 +1177,7 @@ do_udp_sendmsg: if (len > INT_MAX - sizeof(struct udphdr)) return -EMSGSIZE; + getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; if (up->pending) { /* * There are pending frames. @@ -1294,6 +1308,20 @@ do_udp_sendmsg: goto do_confirm; back_from_confirm: + /* Lockless fast path for the non-corking case */ + if (!corkreq) { + struct sk_buff *skb; + + skb = ip6_make_skb(sk, getfrag, msg, ulen, + sizeof(struct udphdr), hlimit, tclass, opt, + &fl6, (struct rt6_info *)dst, + msg->msg_flags, dontfrag); + err = PTR_ERR(skb); + if (!IS_ERR_OR_NULL(skb)) + err = udp_v6_send_skb(skb, &fl6); + goto release_dst; + } + lock_sock(sk); if (unlikely(up->pending)) { /* The socket is already corked while preparing it. */ @@ -1311,7 +1339,6 @@ do_append_data: if (dontfrag < 0) dontfrag = np->dontfrag; up->len += ulen; - getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, @@ -1323,6 +1350,11 @@ do_append_data: else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) up->pending = 0; + if (err > 0) + err = np->recverr ? net_xmit_errno(err) : 0; + release_sock(sk); + +release_dst: if (dst) { if (connected) { ip6_dst_store(sk, dst, @@ -1339,9 +1371,6 @@ do_append_data: dst = NULL; } - if (err > 0) - err = np->recverr ? net_xmit_errno(err) : 0; - release_sock(sk); out: dst_release(dst); fl6_sock_release(flowlabel); diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 7f2cafddfb6e..1cde711bcab5 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -533,7 +533,7 @@ void irlap_discovery_request(struct irlap_cb *self, discovery_t *discovery) info.discovery = discovery; /* sysctl_slot_timeout bounds are checked in irsysctl.c - Jean II */ - self->slot_timeout = sysctl_slot_timeout * HZ / 1000; + self->slot_timeout = msecs_to_jiffies(sysctl_slot_timeout); irlap_do_event(self, DISCOVERY_REQUEST, NULL, &info); } @@ -1015,13 +1015,15 @@ void irlap_apply_connection_parameters(struct irlap_cb *self, int now) * Or, this is how much we can keep the pf bit in primary mode. * Therefore, it must be lower or equal than our *OWN* max turn around. * Jean II */ - self->poll_timeout = self->qos_tx.max_turn_time.value * HZ / 1000; + self->poll_timeout = msecs_to_jiffies( + self->qos_tx.max_turn_time.value); /* The Final timeout applies only to the primary station. * It defines the maximum time the primary wait (mostly in RECV mode) * for an answer from the secondary station before polling it again. * Therefore, it must be greater or equal than our *PARTNER* * max turn around time - Jean II */ - self->final_timeout = self->qos_rx.max_turn_time.value * HZ / 1000; + self->final_timeout = msecs_to_jiffies( + self->qos_rx.max_turn_time.value); /* The Watchdog Bit timeout applies only to the secondary station. * It defines the maximum time the secondary wait (mostly in RECV mode) * for poll from the primary station before getting annoyed. diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 0ac907adb2f4..b4e923f77954 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -40,6 +40,18 @@ static struct genl_family l2tp_nl_family = { .netnsok = true, }; +static const struct genl_multicast_group l2tp_multicast_group[] = { + { + .name = L2TP_GENL_MCGROUP, + }, +}; + +static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, + int flags, struct l2tp_tunnel *tunnel, u8 cmd); +static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, + int flags, struct l2tp_session *session, + u8 cmd); + /* Accessed under genl lock */ static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX]; @@ -97,6 +109,52 @@ out: return ret; } +static int l2tp_tunnel_notify(struct genl_family *family, + struct genl_info *info, + struct l2tp_tunnel *tunnel, + u8 cmd) +{ + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, + NLM_F_ACK, tunnel, cmd); + + if (ret >= 0) + return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + + nlmsg_free(msg); + + return ret; +} + +static int l2tp_session_notify(struct genl_family *family, + struct genl_info *info, + struct l2tp_session *session, + u8 cmd) +{ + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, + NLM_F_ACK, session, cmd); + + if (ret >= 0) + return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + + nlmsg_free(msg); + + return ret; +} + static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info) { u32 tunnel_id; @@ -188,6 +246,9 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info break; } + if (ret >= 0) + ret = l2tp_tunnel_notify(&l2tp_nl_family, info, + tunnel, L2TP_CMD_TUNNEL_CREATE); out: return ret; } @@ -211,6 +272,9 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info goto out; } + l2tp_tunnel_notify(&l2tp_nl_family, info, + tunnel, L2TP_CMD_TUNNEL_DELETE); + (void) l2tp_tunnel_delete(tunnel); out: @@ -239,12 +303,15 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info if (info->attrs[L2TP_ATTR_DEBUG]) tunnel->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]); + ret = l2tp_tunnel_notify(&l2tp_nl_family, info, + tunnel, L2TP_CMD_TUNNEL_MODIFY); + out: return ret; } static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, - struct l2tp_tunnel *tunnel) + struct l2tp_tunnel *tunnel, u8 cmd) { void *hdr; struct nlattr *nest; @@ -254,8 +321,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla struct ipv6_pinfo *np = NULL; #endif - hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, - L2TP_CMD_TUNNEL_GET); + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; @@ -324,7 +390,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla } out: - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -359,7 +426,7 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info) } ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, - NLM_F_ACK, tunnel); + NLM_F_ACK, tunnel, L2TP_CMD_TUNNEL_GET); if (ret < 0) goto err_out; @@ -385,7 +452,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - tunnel) <= 0) + tunnel, L2TP_CMD_TUNNEL_GET) < 0) goto out; ti++; @@ -539,6 +606,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id, session_id, peer_session_id, &cfg); + if (ret >= 0) { + session = l2tp_session_find(net, tunnel, session_id); + if (session) + ret = l2tp_session_notify(&l2tp_nl_family, info, session, + L2TP_CMD_SESSION_CREATE); + } + out: return ret; } @@ -555,6 +629,9 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf goto out; } + l2tp_session_notify(&l2tp_nl_family, info, + session, L2TP_CMD_SESSION_DELETE); + pw_type = session->pwtype; if (pw_type < __L2TP_PWTYPE_MAX) if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete) @@ -601,12 +678,15 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf if (info->attrs[L2TP_ATTR_MRU]) session->mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]); + ret = l2tp_session_notify(&l2tp_nl_family, info, + session, L2TP_CMD_SESSION_MODIFY); + out: return ret; } static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, - struct l2tp_session *session) + struct l2tp_session *session, u8 cmd) { void *hdr; struct nlattr *nest; @@ -615,7 +695,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl sk = tunnel->sock; - hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; @@ -673,7 +753,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl goto nla_put_failure; nla_nest_end(skb, nest); - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -699,7 +780,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) } ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, - 0, session); + 0, session, L2TP_CMD_SESSION_GET); if (ret < 0) goto err_out; @@ -737,7 +818,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - session) <= 0) + session, L2TP_CMD_SESSION_GET) < 0) break; si++; @@ -896,7 +977,9 @@ EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops); static int l2tp_nl_init(void) { pr_info("L2TP netlink interface\n"); - return genl_register_family_with_ops(&l2tp_nl_family, l2tp_nl_ops); + return genl_register_family_with_ops_groups(&l2tp_nl_family, + l2tp_nl_ops, + l2tp_multicast_group); } static void l2tp_nl_cleanup(void) diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 75cc6801a431..64a012a0c6e5 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -5,6 +5,7 @@ config MAC80211 select CRYPTO_ARC4 select CRYPTO_AES select CRYPTO_CCM + select CRYPTO_GCM select CRC32 select AVERAGE ---help--- diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index e53671b1105e..3275f01881be 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -15,7 +15,9 @@ mac80211-y := \ michael.o \ tkip.o \ aes_ccm.o \ + aes_gcm.o \ aes_cmac.o \ + aes_gmac.o \ cfg.o \ ethtool.o \ rx.o \ diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 09d9caaec591..7869bb40acaa 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -20,7 +20,8 @@ #include "aes_ccm.h" void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic) + u8 *data, size_t data_len, u8 *mic, + size_t mic_len) { struct scatterlist assoc, pt, ct[2]; @@ -35,7 +36,7 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); sg_init_table(ct, 2); sg_set_buf(&ct[0], data, data_len); - sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN); + sg_set_buf(&ct[1], mic, mic_len); aead_request_set_tfm(aead_req, tfm); aead_request_set_assoc(aead_req, &assoc, assoc.length); @@ -45,7 +46,8 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, } int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic) + u8 *data, size_t data_len, u8 *mic, + size_t mic_len) { struct scatterlist assoc, pt, ct[2]; char aead_req_data[sizeof(struct aead_request) + @@ -62,17 +64,18 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); sg_init_table(ct, 2); sg_set_buf(&ct[0], data, data_len); - sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN); + sg_set_buf(&ct[1], mic, mic_len); aead_request_set_tfm(aead_req, tfm); aead_request_set_assoc(aead_req, &assoc, assoc.length); - aead_request_set_crypt(aead_req, ct, &pt, - data_len + IEEE80211_CCMP_MIC_LEN, b_0); + aead_request_set_crypt(aead_req, ct, &pt, data_len + mic_len, b_0); return crypto_aead_decrypt(aead_req); } -struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[]) +struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], + size_t key_len, + size_t mic_len) { struct crypto_aead *tfm; int err; @@ -81,9 +84,9 @@ struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[]) if (IS_ERR(tfm)) return tfm; - err = crypto_aead_setkey(tfm, key, WLAN_KEY_LEN_CCMP); + err = crypto_aead_setkey(tfm, key, key_len); if (!err) - err = crypto_aead_setauthsize(tfm, IEEE80211_CCMP_MIC_LEN); + err = crypto_aead_setauthsize(tfm, mic_len); if (!err) return tfm; diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h index 2c7ab1948a2e..6a73d1e4d186 100644 --- a/net/mac80211/aes_ccm.h +++ b/net/mac80211/aes_ccm.h @@ -12,11 +12,15 @@ #include <linux/crypto.h> -struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[]); +struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], + size_t key_len, + size_t mic_len); void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic); + u8 *data, size_t data_len, u8 *mic, + size_t mic_len); int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic); + u8 *data, size_t data_len, u8 *mic, + size_t mic_len); void ieee80211_aes_key_free(struct crypto_aead *tfm); #endif /* AES_CCM_H */ diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c index 9b9009f99551..4192806be3d3 100644 --- a/net/mac80211/aes_cmac.c +++ b/net/mac80211/aes_cmac.c @@ -18,8 +18,8 @@ #include "key.h" #include "aes_cmac.h" -#define AES_CMAC_KEY_LEN 16 #define CMAC_TLEN 8 /* CMAC TLen = 64 bits (8 octets) */ +#define CMAC_TLEN_256 16 /* CMAC TLen = 128 bits (16 octets) */ #define AAD_LEN 20 @@ -35,9 +35,9 @@ static void gf_mulx(u8 *pad) pad[AES_BLOCK_SIZE - 1] ^= 0x87; } - -static void aes_128_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, - const u8 *addr[], const size_t *len, u8 *mac) +static void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, + const u8 *addr[], const size_t *len, u8 *mac, + size_t mac_len) { u8 cbc[AES_BLOCK_SIZE], pad[AES_BLOCK_SIZE]; const u8 *pos, *end; @@ -88,7 +88,7 @@ static void aes_128_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, for (i = 0; i < AES_BLOCK_SIZE; i++) pad[i] ^= cbc[i]; crypto_cipher_encrypt_one(tfm, pad, pad); - memcpy(mac, pad, CMAC_TLEN); + memcpy(mac, pad, mac_len); } @@ -107,17 +107,35 @@ void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad, addr[2] = zero; len[2] = CMAC_TLEN; - aes_128_cmac_vector(tfm, 3, addr, len, mic); + aes_cmac_vector(tfm, 3, addr, len, mic, CMAC_TLEN); } +void ieee80211_aes_cmac_256(struct crypto_cipher *tfm, const u8 *aad, + const u8 *data, size_t data_len, u8 *mic) +{ + const u8 *addr[3]; + size_t len[3]; + u8 zero[CMAC_TLEN_256]; + + memset(zero, 0, CMAC_TLEN_256); + addr[0] = aad; + len[0] = AAD_LEN; + addr[1] = data; + len[1] = data_len - CMAC_TLEN_256; + addr[2] = zero; + len[2] = CMAC_TLEN_256; + + aes_cmac_vector(tfm, 3, addr, len, mic, CMAC_TLEN_256); +} -struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[]) +struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[], + size_t key_len) { struct crypto_cipher *tfm; tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); if (!IS_ERR(tfm)) - crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN); + crypto_cipher_setkey(tfm, key, key_len); return tfm; } diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h index 0ce6487af795..3702041f44fd 100644 --- a/net/mac80211/aes_cmac.h +++ b/net/mac80211/aes_cmac.h @@ -11,9 +11,12 @@ #include <linux/crypto.h> -struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[]); +struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[], + size_t key_len); void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad, const u8 *data, size_t data_len, u8 *mic); +void ieee80211_aes_cmac_256(struct crypto_cipher *tfm, const u8 *aad, + const u8 *data, size_t data_len, u8 *mic); void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm); #endif /* AES_CMAC_H */ diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c new file mode 100644 index 000000000000..c2bf6698d738 --- /dev/null +++ b/net/mac80211/aes_gcm.c @@ -0,0 +1,95 @@ +/* + * Copyright 2014-2015, Qualcomm Atheros, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <crypto/aes.h> + +#include <net/mac80211.h> +#include "key.h" +#include "aes_gcm.h" + +void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic) +{ + struct scatterlist assoc, pt, ct[2]; + + char aead_req_data[sizeof(struct aead_request) + + crypto_aead_reqsize(tfm)] + __aligned(__alignof__(struct aead_request)); + struct aead_request *aead_req = (void *)aead_req_data; + + memset(aead_req, 0, sizeof(aead_req_data)); + + sg_init_one(&pt, data, data_len); + sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); + sg_init_table(ct, 2); + sg_set_buf(&ct[0], data, data_len); + sg_set_buf(&ct[1], mic, IEEE80211_GCMP_MIC_LEN); + + aead_request_set_tfm(aead_req, tfm); + aead_request_set_assoc(aead_req, &assoc, assoc.length); + aead_request_set_crypt(aead_req, &pt, ct, data_len, j_0); + + crypto_aead_encrypt(aead_req); +} + +int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic) +{ + struct scatterlist assoc, pt, ct[2]; + char aead_req_data[sizeof(struct aead_request) + + crypto_aead_reqsize(tfm)] + __aligned(__alignof__(struct aead_request)); + struct aead_request *aead_req = (void *)aead_req_data; + + if (data_len == 0) + return -EINVAL; + + memset(aead_req, 0, sizeof(aead_req_data)); + + sg_init_one(&pt, data, data_len); + sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); + sg_init_table(ct, 2); + sg_set_buf(&ct[0], data, data_len); + sg_set_buf(&ct[1], mic, IEEE80211_GCMP_MIC_LEN); + + aead_request_set_tfm(aead_req, tfm); + aead_request_set_assoc(aead_req, &assoc, assoc.length); + aead_request_set_crypt(aead_req, ct, &pt, + data_len + IEEE80211_GCMP_MIC_LEN, j_0); + + return crypto_aead_decrypt(aead_req); +} + +struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], + size_t key_len) +{ + struct crypto_aead *tfm; + int err; + + tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + return tfm; + + err = crypto_aead_setkey(tfm, key, key_len); + if (!err) + err = crypto_aead_setauthsize(tfm, IEEE80211_GCMP_MIC_LEN); + if (!err) + return tfm; + + crypto_free_aead(tfm); + return ERR_PTR(err); +} + +void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm) +{ + crypto_free_aead(tfm); +} diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h new file mode 100644 index 000000000000..1347fda6b76a --- /dev/null +++ b/net/mac80211/aes_gcm.h @@ -0,0 +1,22 @@ +/* + * Copyright 2014-2015, Qualcomm Atheros, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef AES_GCM_H +#define AES_GCM_H + +#include <linux/crypto.h> + +void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic); +int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic); +struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], + size_t key_len); +void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm); + +#endif /* AES_GCM_H */ diff --git a/net/mac80211/aes_gmac.c b/net/mac80211/aes_gmac.c new file mode 100644 index 000000000000..1c72edcb0083 --- /dev/null +++ b/net/mac80211/aes_gmac.c @@ -0,0 +1,84 @@ +/* + * AES-GMAC for IEEE 802.11 BIP-GMAC-128 and BIP-GMAC-256 + * Copyright 2015, Qualcomm Atheros, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <crypto/aes.h> + +#include <net/mac80211.h> +#include "key.h" +#include "aes_gmac.h" + +#define GMAC_MIC_LEN 16 +#define GMAC_NONCE_LEN 12 +#define AAD_LEN 20 + +int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, + const u8 *data, size_t data_len, u8 *mic) +{ + struct scatterlist sg[3], ct[1]; + char aead_req_data[sizeof(struct aead_request) + + crypto_aead_reqsize(tfm)] + __aligned(__alignof__(struct aead_request)); + struct aead_request *aead_req = (void *)aead_req_data; + u8 zero[GMAC_MIC_LEN], iv[AES_BLOCK_SIZE]; + + if (data_len < GMAC_MIC_LEN) + return -EINVAL; + + memset(aead_req, 0, sizeof(aead_req_data)); + + memset(zero, 0, GMAC_MIC_LEN); + sg_init_table(sg, 3); + sg_set_buf(&sg[0], aad, AAD_LEN); + sg_set_buf(&sg[1], data, data_len - GMAC_MIC_LEN); + sg_set_buf(&sg[2], zero, GMAC_MIC_LEN); + + memcpy(iv, nonce, GMAC_NONCE_LEN); + memset(iv + GMAC_NONCE_LEN, 0, sizeof(iv) - GMAC_NONCE_LEN); + iv[AES_BLOCK_SIZE - 1] = 0x01; + + sg_init_table(ct, 1); + sg_set_buf(&ct[0], mic, GMAC_MIC_LEN); + + aead_request_set_tfm(aead_req, tfm); + aead_request_set_assoc(aead_req, sg, AAD_LEN + data_len); + aead_request_set_crypt(aead_req, NULL, ct, 0, iv); + + crypto_aead_encrypt(aead_req); + + return 0; +} + +struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[], + size_t key_len) +{ + struct crypto_aead *tfm; + int err; + + tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + return tfm; + + err = crypto_aead_setkey(tfm, key, key_len); + if (!err) + return tfm; + if (!err) + err = crypto_aead_setauthsize(tfm, GMAC_MIC_LEN); + + crypto_free_aead(tfm); + return ERR_PTR(err); +} + +void ieee80211_aes_gmac_key_free(struct crypto_aead *tfm) +{ + crypto_free_aead(tfm); +} diff --git a/net/mac80211/aes_gmac.h b/net/mac80211/aes_gmac.h new file mode 100644 index 000000000000..d328204d73a8 --- /dev/null +++ b/net/mac80211/aes_gmac.h @@ -0,0 +1,20 @@ +/* + * Copyright 2015, Qualcomm Atheros, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef AES_GMAC_H +#define AES_GMAC_H + +#include <linux/crypto.h> + +struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[], + size_t key_len); +int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, + const u8 *data, size_t data_len, u8 *mic); +void ieee80211_aes_gmac_key_free(struct crypto_aead *tfm); + +#endif /* AES_GMAC_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e75d5c53e97b..dd4ff36c557a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -162,8 +162,13 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: break; default: cs = ieee80211_cs_get(local, params->cipher, sdata->vif.type); @@ -348,6 +353,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, params.seq_len = 6; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: pn64 = atomic64_read(&key->u.ccmp.tx_pn); seq[0] = pn64; seq[1] = pn64 >> 8; @@ -359,6 +365,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, params.seq_len = 6; break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); seq[0] = pn64; seq[1] = pn64 >> 8; @@ -369,6 +376,30 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, params.seq = seq; params.seq_len = 6; break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + pn64 = atomic64_read(&key->u.aes_gmac.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + params.seq = seq; + params.seq_len = 6; + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + pn64 = atomic64_read(&key->u.gcmp.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + params.seq = seq; + params.seq_len = 6; + break; } params.key = key->conf.key; @@ -428,11 +459,13 @@ void sta_set_rate_info_tx(struct sta_info *sta, rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); } if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) - rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; - if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH) - rinfo->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH; - if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH) - rinfo->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH; + rinfo->bw = RATE_INFO_BW_40; + else if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH) + rinfo->bw = RATE_INFO_BW_80; + else if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH) + rinfo->bw = RATE_INFO_BW_160; + else + rinfo->bw = RATE_INFO_BW_20; if (rate->flags & IEEE80211_TX_RC_SHORT_GI) rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; } @@ -459,16 +492,21 @@ void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); } - if (sta->last_rx_rate_flag & RX_FLAG_40MHZ) - rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; if (sta->last_rx_rate_flag & RX_FLAG_SHORT_GI) rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; - if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ) - rinfo->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH; - if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80P80MHZ) - rinfo->flags |= RATE_INFO_FLAGS_80P80_MHZ_WIDTH; - if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ) - rinfo->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH; + + if (sta->last_rx_rate_flag & RX_FLAG_5MHZ) + rinfo->bw = RATE_INFO_BW_5; + else if (sta->last_rx_rate_flag & RX_FLAG_10MHZ) + rinfo->bw = RATE_INFO_BW_10; + else if (sta->last_rx_rate_flag & RX_FLAG_40MHZ) + rinfo->bw = RATE_INFO_BW_40; + else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ) + rinfo->bw = RATE_INFO_BW_80; + else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ) + rinfo->bw = RATE_INFO_BW_160; + else + rinfo->bw = RATE_INFO_BW_20; } static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, @@ -678,7 +716,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_BEACON | BSS_CHANGED_SSID | - BSS_CHANGED_P2P_PS; + BSS_CHANGED_P2P_PS | + BSS_CHANGED_TXPOWER; int err; old = sdata_dereference(sdata->u.ap.beacon, sdata); @@ -2102,6 +2141,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; + enum nl80211_tx_power_setting txp_type = type; + bool update_txp_type = false; if (wdev) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); @@ -2109,6 +2150,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, switch (type) { case NL80211_TX_POWER_AUTOMATIC: sdata->user_power_level = IEEE80211_UNSET_POWER_LEVEL; + txp_type = NL80211_TX_POWER_LIMITED; break; case NL80211_TX_POWER_LIMITED: case NL80211_TX_POWER_FIXED: @@ -2118,7 +2160,12 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, break; } - ieee80211_recalc_txpower(sdata); + if (txp_type != sdata->vif.bss_conf.txpower_type) { + update_txp_type = true; + sdata->vif.bss_conf.txpower_type = txp_type; + } + + ieee80211_recalc_txpower(sdata, update_txp_type); return 0; } @@ -2126,6 +2173,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, switch (type) { case NL80211_TX_POWER_AUTOMATIC: local->user_power_level = IEEE80211_UNSET_POWER_LEVEL; + txp_type = NL80211_TX_POWER_LIMITED; break; case NL80211_TX_POWER_LIMITED: case NL80211_TX_POWER_FIXED: @@ -2136,10 +2184,14 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, } mutex_lock(&local->iflist_mtx); - list_for_each_entry(sdata, &local->interfaces, list) + list_for_each_entry(sdata, &local->interfaces, list) { sdata->user_power_level = local->user_power_level; + if (txp_type != sdata->vif.bss_conf.txpower_type) + update_txp_type = true; + sdata->vif.bss_conf.txpower_type = txp_type; + } list_for_each_entry(sdata, &local->interfaces, list) - ieee80211_recalc_txpower(sdata); + ieee80211_recalc_txpower(sdata, update_txp_type); mutex_unlock(&local->iflist_mtx); return 0; @@ -2556,7 +2608,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, /* if there's one pending or we're scanning, queue this one */ if (!list_empty(&local->roc_list) || - local->scanning || local->radar_detect_enabled) + local->scanning || ieee80211_is_radar_required(local)) goto out_check_combine; /* if not HW assist, just queue & schedule work */ @@ -3664,7 +3716,7 @@ static int ieee80211_del_tx_ts(struct wiphy *wiphy, struct net_device *dev, * queues. */ synchronize_net(); - ieee80211_flush_queues(local, sdata); + ieee80211_flush_queues(local, sdata, false); /* restore the normal QoS parameters * (unconditionally to avoid races) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index da1c12c34487..ff0d2db09df9 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -388,7 +388,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local, return NULL; } -static bool ieee80211_is_radar_required(struct ieee80211_local *local) +bool ieee80211_is_radar_required(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; @@ -406,6 +406,34 @@ static bool ieee80211_is_radar_required(struct ieee80211_local *local) return false; } +static bool +ieee80211_chanctx_radar_required(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + struct ieee80211_chanctx_conf *conf = &ctx->conf; + struct ieee80211_sub_if_data *sdata; + bool required = false; + + lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_held(&local->mtx); + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(sdata)) + continue; + if (rcu_access_pointer(sdata->vif.chanctx_conf) != conf) + continue; + if (!sdata->radar_required) + continue; + + required = true; + break; + } + rcu_read_unlock(); + + return required; +} + static struct ieee80211_chanctx * ieee80211_alloc_chanctx(struct ieee80211_local *local, const struct cfg80211_chan_def *chandef, @@ -425,7 +453,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local, ctx->conf.rx_chains_static = 1; ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; - ctx->conf.radar_enabled = ieee80211_is_radar_required(local); + ctx->conf.radar_enabled = false; ieee80211_recalc_chanctx_min_def(local, ctx); return ctx; @@ -567,16 +595,15 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, bool radar_enabled; lockdep_assert_held(&local->chanctx_mtx); - /* for setting local->radar_detect_enabled */ + /* for ieee80211_is_radar_required */ lockdep_assert_held(&local->mtx); - radar_enabled = ieee80211_is_radar_required(local); + radar_enabled = ieee80211_chanctx_radar_required(local, chanctx); if (radar_enabled == chanctx->conf.radar_enabled) return; chanctx->conf.radar_enabled = radar_enabled; - local->radar_detect_enabled = chanctx->conf.radar_enabled; if (!local->use_chanctx) { local->hw.conf.radar_enabled = chanctx->conf.radar_enabled; @@ -628,7 +655,7 @@ out: } if (new_ctx && ieee80211_chanctx_num_assigned(local, new_ctx) > 0) { - ieee80211_recalc_txpower(sdata); + ieee80211_recalc_txpower(sdata, false); ieee80211_recalc_chanctx_min_def(local, new_ctx); } @@ -1360,7 +1387,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) ieee80211_bss_info_change_notify(sdata, changed); - ieee80211_recalc_txpower(sdata); + ieee80211_recalc_txpower(sdata, false); } ieee80211_recalc_chanctx_chantype(local, ctx); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 54a189f0393e..eeb0bbd69d98 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -303,8 +303,6 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_PS\n"); if (local->hw.flags & IEEE80211_HW_MFP_CAPABLE) sf += scnprintf(buf + sf, mxln - sf, "MFP_CAPABLE\n"); - if (local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) - sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_UAPSD\n"); if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) sf += scnprintf(buf + sf, mxln - sf, "REPORTS_TX_ACK_STATUS\n"); diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 5523b94c7c90..71ac1b5f4da5 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -94,17 +94,33 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf, key->u.tkip.tx.iv16); break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: pn = atomic64_read(&key->u.ccmp.tx_pn); len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24), (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn); break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: pn = atomic64_read(&key->u.aes_cmac.tx_pn); len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24), (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn); break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + pn = atomic64_read(&key->u.aes_gmac.tx_pn); + len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", + (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24), + (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn); + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + pn = atomic64_read(&key->u.gcmp.tx_pn); + len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", + (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24), + (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn); + break; default: return 0; } @@ -134,6 +150,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, len = p - buf; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) { rpn = key->u.ccmp.rx_pn[i]; p += scnprintf(p, sizeof(buf)+buf-p, @@ -144,6 +161,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, len = p - buf; break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: rpn = key->u.aes_cmac.rx_pn; p += scnprintf(p, sizeof(buf)+buf-p, "%02x%02x%02x%02x%02x%02x\n", @@ -151,6 +169,26 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, rpn[3], rpn[4], rpn[5]); len = p - buf; break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + rpn = key->u.aes_gmac.rx_pn; + p += scnprintf(p, sizeof(buf)+buf-p, + "%02x%02x%02x%02x%02x%02x\n", + rpn[0], rpn[1], rpn[2], + rpn[3], rpn[4], rpn[5]); + len = p - buf; + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) { + rpn = key->u.gcmp.rx_pn[i]; + p += scnprintf(p, sizeof(buf)+buf-p, + "%02x%02x%02x%02x%02x%02x\n", + rpn[0], rpn[1], rpn[2], + rpn[3], rpn[4], rpn[5]); + } + len = p - buf; + break; default: return 0; } @@ -167,12 +205,23 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf, switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: len = scnprintf(buf, sizeof(buf), "%u\n", key->u.aes_cmac.replays); break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + len = scnprintf(buf, sizeof(buf), "%u\n", + key->u.aes_gmac.replays); + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + len = scnprintf(buf, sizeof(buf), "%u\n", key->u.gcmp.replays); + break; default: return 0; } @@ -189,9 +238,15 @@ static ssize_t key_icverrors_read(struct file *file, char __user *userbuf, switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: len = scnprintf(buf, sizeof(buf), "%u\n", key->u.aes_cmac.icverrors); break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + len = scnprintf(buf, sizeof(buf), "%u\n", + key->u.aes_gmac.icverrors); + break; default: return 0; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 2ebc9ead9695..fdeda17b8dd2 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -639,6 +639,21 @@ static inline void drv_sta_rate_tbl_update(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline void drv_sta_statistics(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + struct station_info *sinfo) +{ + sdata = get_bss_sdata(sdata); + if (!check_sdata_in_driver(sdata)) + return; + + trace_drv_sta_statistics(local, sdata, sta); + if (local->ops->sta_statistics) + local->ops->sta_statistics(&local->hw, &sdata->vif, sta, sinfo); + trace_drv_return_void(local); +} + static inline int drv_conf_tx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u16 ac, const struct ieee80211_tx_queue_params *params) @@ -966,21 +981,6 @@ drv_allow_buffered_frames(struct ieee80211_local *local, trace_drv_return_void(local); } -static inline int drv_get_rssi(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, - s8 *rssi_dbm) -{ - int ret; - - might_sleep(); - - ret = local->ops->get_rssi(&local->hw, &sdata->vif, sta, rssi_dbm); - trace_drv_get_rssi(local, sta, *rssi_dbm, ret); - - return ret; -} - static inline void drv_mgd_prepare_tx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index ebfc8091557b..52bcea6ad9e8 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -117,16 +117,16 @@ static void ieee80211_get_stats(struct net_device *dev, data[i++] = sta->sta_state; - if (sinfo.filled & STATION_INFO_TX_BITRATE) + if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)) data[i] = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate); i++; - if (sinfo.filled & STATION_INFO_RX_BITRATE) + if (sinfo.filled & BIT(NL80211_STA_INFO_RX_BITRATE)) data[i] = 100000 * cfg80211_calculate_bitrate(&sinfo.rxrate); i++; - if (sinfo.filled & STATION_INFO_SIGNAL_AVG) + if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL_AVG)) data[i] = (u8)sinfo.signal_avg; i++; } else { @@ -175,24 +175,24 @@ do_survey: data[i++] = (u8)survey.noise; else data[i++] = -1LL; - if (survey.filled & SURVEY_INFO_CHANNEL_TIME) - data[i++] = survey.channel_time; + if (survey.filled & SURVEY_INFO_TIME) + data[i++] = survey.time; else data[i++] = -1LL; - if (survey.filled & SURVEY_INFO_CHANNEL_TIME_BUSY) - data[i++] = survey.channel_time_busy; + if (survey.filled & SURVEY_INFO_TIME_BUSY) + data[i++] = survey.time_busy; else data[i++] = -1LL; - if (survey.filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY) - data[i++] = survey.channel_time_ext_busy; + if (survey.filled & SURVEY_INFO_TIME_EXT_BUSY) + data[i++] = survey.time_ext_busy; else data[i++] = -1LL; - if (survey.filled & SURVEY_INFO_CHANNEL_TIME_RX) - data[i++] = survey.channel_time_rx; + if (survey.filled & SURVEY_INFO_TIME_RX) + data[i++] = survey.time_rx; else data[i++] = -1LL; - if (survey.filled & SURVEY_INFO_CHANNEL_TIME_TX) - data[i++] = survey.channel_time_tx; + if (survey.filled & SURVEY_INFO_TIME_TX) + data[i++] = survey.time_tx; else data[i++] = -1LL; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 509bc157ce55..b606b53a49a7 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1069,9 +1069,16 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } if (sta && rates_updated) { - drv_sta_rc_update(local, sdata, &sta->sta, - IEEE80211_RC_SUPP_RATES_CHANGED); + u32 changed = IEEE80211_RC_SUPP_RATES_CHANGED; + u8 rx_nss = sta->sta.rx_nss; + + /* Force rx_nss recalculation */ + sta->sta.rx_nss = 0; rate_control_rate_init(sta); + if (sta->sta.rx_nss != rx_nss) + changed |= IEEE80211_RC_NSS_CHANGED; + + drv_sta_rc_update(local, sdata, &sta->sta, changed); } rcu_read_unlock(); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index cc6e964d9837..3afe36824703 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1168,8 +1168,6 @@ struct ieee80211_local { /* wowlan is enabled -- don't reconfig on resume */ bool wowlan; - /* DFS/radar detection is enabled */ - bool radar_detect_enabled; struct work_struct radar_detected_work; /* number of RX chains the hardware has */ @@ -1623,7 +1621,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local); void ieee80211_del_virtual_monitor(struct ieee80211_local *local); bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); -void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); +void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata, + bool update_bss); static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) { @@ -1704,6 +1703,7 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, const struct ieee80211_vht_cap *vht_cap_ie, struct sta_info *sta); +enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta); enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta); void ieee80211_sta_set_rx_nss(struct sta_info *sta); u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, @@ -1752,7 +1752,8 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - WARN(test_bit(SCAN_HW_SCANNING, &local->scanning), + WARN(test_bit(SCAN_HW_SCANNING, &local->scanning) && + !test_bit(SCAN_COMPLETED, &local->scanning), "%s: resume with hardware scan still in progress\n", wiphy_name(hw->wiphy)); @@ -1881,10 +1882,40 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local, void ieee80211_add_pending_skbs(struct ieee80211_local *local, struct sk_buff_head *skbs); void ieee80211_flush_queues(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata); + struct ieee80211_sub_if_data *sdata, bool drop); void __ieee80211_flush_queues(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - unsigned int queues); + unsigned int queues, bool drop); + +static inline bool ieee80211_can_run_worker(struct ieee80211_local *local) +{ + /* + * If quiescing is set, we are racing with __ieee80211_suspend. + * __ieee80211_suspend flushes the workers after setting quiescing, + * and we check quiescing / suspended before enqueing new workers. + * We should abort the worker to avoid the races below. + */ + if (local->quiescing) + return false; + + /* + * We might already be suspended if the following scenario occurs: + * __ieee80211_suspend Control path + * + * if (local->quiescing) + * return; + * local->quiescing = true; + * flush_workqueue(); + * queue_work(...); + * local->suspended = true; + * local->quiescing = false; + * worker starts running... + */ + if (local->suspended) + return false; + + return true; +} void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, @@ -1981,6 +2012,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx); void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, struct ieee80211_chanctx *ctx); +bool ieee80211_is_radar_required(struct ieee80211_local *local); void ieee80211_dfs_cac_timer(unsigned long data); void ieee80211_dfs_cac_timer_work(struct work_struct *work); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 417355390873..81a27516813e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -73,9 +73,10 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) return false; } -void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) +void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata, + bool update_bss) { - if (__ieee80211_recalc_txpower(sdata)) + if (__ieee80211_recalc_txpower(sdata) || update_bss) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER); } @@ -93,7 +94,7 @@ static u32 __ieee80211_idle_on(struct ieee80211_local *local) if (local->hw.conf.flags & IEEE80211_CONF_IDLE) return 0; - ieee80211_flush_queues(local, NULL); + ieee80211_flush_queues(local, NULL, false); local->hw.conf.flags |= IEEE80211_CONF_IDLE; return IEEE80211_CONF_CHANGE_IDLE; @@ -1169,12 +1170,7 @@ static void ieee80211_iface_work(struct work_struct *work) if (local->scanning) return; - /* - * ieee80211_queue_work() should have picked up most cases, - * here we'll pick the rest. - */ - if (WARN(local->suspended, - "interface work scheduled while going to suspend\n")) + if (!ieee80211_can_run_worker(local)) return; /* first process frames */ diff --git a/net/mac80211/key.c b/net/mac80211/key.c index bd4e46ec32bd..0825d76edcfc 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -24,6 +24,8 @@ #include "debugfs_key.h" #include "aes_ccm.h" #include "aes_cmac.h" +#include "aes_gmac.h" +#include "aes_gcm.h" /** @@ -90,7 +92,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) { struct ieee80211_sub_if_data *sdata; struct sta_info *sta; - int ret; + int ret = -EOPNOTSUPP; might_sleep(); @@ -141,8 +143,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || - (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) || - (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE))) + (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) sdata->crypto_tx_tailroom_needed_cnt--; WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && @@ -151,7 +152,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) return 0; } - if (ret != -ENOSPC && ret != -EOPNOTSUPP) + if (ret != -ENOSPC && ret != -EOPNOTSUPP && ret != 1) sdata_err(sdata, "failed to set key (%d, %pM) to hardware (%d)\n", key->conf.keyidx, @@ -163,8 +164,18 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) case WLAN_CIPHER_SUITE_WEP104: case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: case WLAN_CIPHER_SUITE_AES_CMAC: - /* all of these we can do in software */ + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + /* all of these we can do in software - if driver can */ + if (ret == 1) + return 0; + if (key->local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL) + return -EINVAL; return 0; default: return -EINVAL; @@ -191,8 +202,7 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) sdata = key->sdata; if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || - (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) || - (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE))) + (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) increment_tailroom_need_count(sdata); ret = drv_set_key(key->local, DISABLE_KEY, sdata, @@ -384,7 +394,26 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. */ - key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data); + key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt( + key_data, key_len, IEEE80211_CCMP_MIC_LEN); + if (IS_ERR(key->u.ccmp.tfm)) { + err = PTR_ERR(key->u.ccmp.tfm); + kfree(key); + return ERR_PTR(err); + } + break; + case WLAN_CIPHER_SUITE_CCMP_256: + key->conf.iv_len = IEEE80211_CCMP_256_HDR_LEN; + key->conf.icv_len = IEEE80211_CCMP_256_MIC_LEN; + for (i = 0; seq && i < IEEE80211_NUM_TIDS + 1; i++) + for (j = 0; j < IEEE80211_CCMP_256_PN_LEN; j++) + key->u.ccmp.rx_pn[i][j] = + seq[IEEE80211_CCMP_256_PN_LEN - j - 1]; + /* Initialize AES key state here as an optimization so that + * it does not need to be initialized for every packet. + */ + key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt( + key_data, key_len, IEEE80211_CCMP_256_MIC_LEN); if (IS_ERR(key->u.ccmp.tfm)) { err = PTR_ERR(key->u.ccmp.tfm); kfree(key); @@ -392,8 +421,12 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, } break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: key->conf.iv_len = 0; - key->conf.icv_len = sizeof(struct ieee80211_mmie); + if (cipher == WLAN_CIPHER_SUITE_AES_CMAC) + key->conf.icv_len = sizeof(struct ieee80211_mmie); + else + key->conf.icv_len = sizeof(struct ieee80211_mmie_16); if (seq) for (j = 0; j < IEEE80211_CMAC_PN_LEN; j++) key->u.aes_cmac.rx_pn[j] = @@ -403,13 +436,51 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, * it does not need to be initialized for every packet. */ key->u.aes_cmac.tfm = - ieee80211_aes_cmac_key_setup(key_data); + ieee80211_aes_cmac_key_setup(key_data, key_len); if (IS_ERR(key->u.aes_cmac.tfm)) { err = PTR_ERR(key->u.aes_cmac.tfm); kfree(key); return ERR_PTR(err); } break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + key->conf.iv_len = 0; + key->conf.icv_len = sizeof(struct ieee80211_mmie_16); + if (seq) + for (j = 0; j < IEEE80211_GMAC_PN_LEN; j++) + key->u.aes_gmac.rx_pn[j] = + seq[IEEE80211_GMAC_PN_LEN - j - 1]; + /* Initialize AES key state here as an optimization so that + * it does not need to be initialized for every packet. + */ + key->u.aes_gmac.tfm = + ieee80211_aes_gmac_key_setup(key_data, key_len); + if (IS_ERR(key->u.aes_gmac.tfm)) { + err = PTR_ERR(key->u.aes_gmac.tfm); + kfree(key); + return ERR_PTR(err); + } + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + key->conf.iv_len = IEEE80211_GCMP_HDR_LEN; + key->conf.icv_len = IEEE80211_GCMP_MIC_LEN; + for (i = 0; seq && i < IEEE80211_NUM_TIDS + 1; i++) + for (j = 0; j < IEEE80211_GCMP_PN_LEN; j++) + key->u.gcmp.rx_pn[i][j] = + seq[IEEE80211_GCMP_PN_LEN - j - 1]; + /* Initialize AES key state here as an optimization so that + * it does not need to be initialized for every packet. + */ + key->u.gcmp.tfm = ieee80211_aes_gcm_key_setup_encrypt(key_data, + key_len); + if (IS_ERR(key->u.gcmp.tfm)) { + err = PTR_ERR(key->u.gcmp.tfm); + kfree(key); + return ERR_PTR(err); + } + break; default: if (cs) { size_t len = (seq_len > MAX_PN_LEN) ? @@ -431,10 +502,24 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, static void ieee80211_key_free_common(struct ieee80211_key *key) { - if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP) + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: ieee80211_aes_key_free(key->u.ccmp.tfm); - if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC) + break; + case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); + break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + ieee80211_aes_gmac_key_free(key->u.aes_gmac.tfm); + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + ieee80211_aes_gcm_key_free(key->u.gcmp.tfm); + break; + } kzfree(key); } @@ -741,6 +826,7 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf, seq->tkip.iv16 = key->u.tkip.tx.iv16; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: pn64 = atomic64_read(&key->u.ccmp.tx_pn); seq->ccmp.pn[5] = pn64; seq->ccmp.pn[4] = pn64 >> 8; @@ -750,6 +836,7 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf, seq->ccmp.pn[0] = pn64 >> 40; break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); seq->ccmp.pn[5] = pn64; seq->ccmp.pn[4] = pn64 >> 8; @@ -758,6 +845,26 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf, seq->ccmp.pn[1] = pn64 >> 32; seq->ccmp.pn[0] = pn64 >> 40; break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + pn64 = atomic64_read(&key->u.aes_gmac.tx_pn); + seq->ccmp.pn[5] = pn64; + seq->ccmp.pn[4] = pn64 >> 8; + seq->ccmp.pn[3] = pn64 >> 16; + seq->ccmp.pn[2] = pn64 >> 24; + seq->ccmp.pn[1] = pn64 >> 32; + seq->ccmp.pn[0] = pn64 >> 40; + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + pn64 = atomic64_read(&key->u.gcmp.tx_pn); + seq->gcmp.pn[5] = pn64; + seq->gcmp.pn[4] = pn64 >> 8; + seq->gcmp.pn[3] = pn64 >> 16; + seq->gcmp.pn[2] = pn64 >> 24; + seq->gcmp.pn[1] = pn64 >> 32; + seq->gcmp.pn[0] = pn64 >> 40; + break; default: WARN_ON(1); } @@ -780,6 +887,7 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf, seq->tkip.iv16 = key->u.tkip.rx[tid].iv16; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) return; if (tid < 0) @@ -789,11 +897,29 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf, memcpy(seq->ccmp.pn, pn, IEEE80211_CCMP_PN_LEN); break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: if (WARN_ON(tid != 0)) return; pn = key->u.aes_cmac.rx_pn; memcpy(seq->aes_cmac.pn, pn, IEEE80211_CMAC_PN_LEN); break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + if (WARN_ON(tid != 0)) + return; + pn = key->u.aes_gmac.rx_pn; + memcpy(seq->aes_gmac.pn, pn, IEEE80211_GMAC_PN_LEN); + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) + return; + if (tid < 0) + pn = key->u.gcmp.rx_pn[IEEE80211_NUM_TIDS]; + else + pn = key->u.gcmp.rx_pn[tid]; + memcpy(seq->gcmp.pn, pn, IEEE80211_GCMP_PN_LEN); + break; } } EXPORT_SYMBOL(ieee80211_get_key_rx_seq); @@ -812,6 +938,7 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf, key->u.tkip.tx.iv16 = seq->tkip.iv16; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: pn64 = (u64)seq->ccmp.pn[5] | ((u64)seq->ccmp.pn[4] << 8) | ((u64)seq->ccmp.pn[3] << 16) | @@ -821,6 +948,7 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf, atomic64_set(&key->u.ccmp.tx_pn, pn64); break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: pn64 = (u64)seq->aes_cmac.pn[5] | ((u64)seq->aes_cmac.pn[4] << 8) | ((u64)seq->aes_cmac.pn[3] << 16) | @@ -829,6 +957,26 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf, ((u64)seq->aes_cmac.pn[0] << 40); atomic64_set(&key->u.aes_cmac.tx_pn, pn64); break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + pn64 = (u64)seq->aes_gmac.pn[5] | + ((u64)seq->aes_gmac.pn[4] << 8) | + ((u64)seq->aes_gmac.pn[3] << 16) | + ((u64)seq->aes_gmac.pn[2] << 24) | + ((u64)seq->aes_gmac.pn[1] << 32) | + ((u64)seq->aes_gmac.pn[0] << 40); + atomic64_set(&key->u.aes_gmac.tx_pn, pn64); + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + pn64 = (u64)seq->gcmp.pn[5] | + ((u64)seq->gcmp.pn[4] << 8) | + ((u64)seq->gcmp.pn[3] << 16) | + ((u64)seq->gcmp.pn[2] << 24) | + ((u64)seq->gcmp.pn[1] << 32) | + ((u64)seq->gcmp.pn[0] << 40); + atomic64_set(&key->u.gcmp.tx_pn, pn64); + break; default: WARN_ON(1); break; @@ -852,6 +1000,7 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf, key->u.tkip.rx[tid].iv16 = seq->tkip.iv16; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) return; if (tid < 0) @@ -861,11 +1010,29 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf, memcpy(pn, seq->ccmp.pn, IEEE80211_CCMP_PN_LEN); break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: if (WARN_ON(tid != 0)) return; pn = key->u.aes_cmac.rx_pn; memcpy(pn, seq->aes_cmac.pn, IEEE80211_CMAC_PN_LEN); break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + if (WARN_ON(tid != 0)) + return; + pn = key->u.aes_gmac.rx_pn; + memcpy(pn, seq->aes_gmac.pn, IEEE80211_GMAC_PN_LEN); + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) + return; + if (tid < 0) + pn = key->u.gcmp.rx_pn[IEEE80211_NUM_TIDS]; + else + pn = key->u.gcmp.rx_pn[tid]; + memcpy(pn, seq->gcmp.pn, IEEE80211_GCMP_PN_LEN); + break; default: WARN_ON(1); break; @@ -889,8 +1056,7 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf) key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || - (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) || - (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE))) + (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) increment_tailroom_need_count(key->sdata); } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 19db68663d75..d57a9915494f 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -95,6 +95,24 @@ struct ieee80211_key { u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ } aes_cmac; struct { + atomic64_t tx_pn; + u8 rx_pn[IEEE80211_GMAC_PN_LEN]; + struct crypto_aead *tfm; + u32 replays; /* dot11RSNAStatsCMACReplays */ + u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ + } aes_gmac; + struct { + atomic64_t tx_pn; + /* Last received packet number. The first + * IEEE80211_NUM_TIDS counters are used with Data + * frames and the last counter is used with Robust + * Management frames. + */ + u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_GCMP_PN_LEN]; + struct crypto_aead *tfm; + u32 replays; /* dot11RSNAStatsGCMPReplays */ + } gcmp; + struct { /* generic cipher scheme */ u8 rx_pn[IEEE80211_NUM_TIDS + 1][MAX_PN_LEN]; } gen; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 6ab99da38db9..5e09d354c5a5 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -658,7 +658,6 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local) bool have_wep = !(IS_ERR(local->wep_tx_tfm) || IS_ERR(local->wep_rx_tfm)); bool have_mfp = local->hw.flags & IEEE80211_HW_MFP_CAPABLE; - const struct ieee80211_cipher_scheme *cs = local->hw.cipher_schemes; int n_suites = 0, r = 0, w = 0; u32 *suites; static const u32 cipher_suites[] = { @@ -667,79 +666,109 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local) WLAN_CIPHER_SUITE_WEP104, WLAN_CIPHER_SUITE_TKIP, WLAN_CIPHER_SUITE_CCMP, + WLAN_CIPHER_SUITE_CCMP_256, + WLAN_CIPHER_SUITE_GCMP, + WLAN_CIPHER_SUITE_GCMP_256, /* keep last -- depends on hw flags! */ - WLAN_CIPHER_SUITE_AES_CMAC + WLAN_CIPHER_SUITE_AES_CMAC, + WLAN_CIPHER_SUITE_BIP_CMAC_256, + WLAN_CIPHER_SUITE_BIP_GMAC_128, + WLAN_CIPHER_SUITE_BIP_GMAC_256, }; - /* Driver specifies the ciphers, we have nothing to do... */ - if (local->hw.wiphy->cipher_suites && have_wep) - return 0; + if (local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL || + local->hw.wiphy->cipher_suites) { + /* If the driver advertises, or doesn't support SW crypto, + * we only need to remove WEP if necessary. + */ + if (have_wep) + return 0; + + /* well if it has _no_ ciphers ... fine */ + if (!local->hw.wiphy->n_cipher_suites) + return 0; + + /* Driver provides cipher suites, but we need to exclude WEP */ + suites = kmemdup(local->hw.wiphy->cipher_suites, + sizeof(u32) * local->hw.wiphy->n_cipher_suites, + GFP_KERNEL); + if (!suites) + return -ENOMEM; + + for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) { + u32 suite = local->hw.wiphy->cipher_suites[r]; - /* Set up cipher suites if driver relies on mac80211 cipher defs */ - if (!local->hw.wiphy->cipher_suites && !cs) { + if (suite == WLAN_CIPHER_SUITE_WEP40 || + suite == WLAN_CIPHER_SUITE_WEP104) + continue; + suites[w++] = suite; + } + } else if (!local->hw.cipher_schemes) { + /* If the driver doesn't have cipher schemes, there's nothing + * else to do other than assign the (software supported and + * perhaps offloaded) cipher suites. + */ local->hw.wiphy->cipher_suites = cipher_suites; local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); if (!have_mfp) - local->hw.wiphy->n_cipher_suites--; + local->hw.wiphy->n_cipher_suites -= 4; if (!have_wep) { local->hw.wiphy->cipher_suites += 2; local->hw.wiphy->n_cipher_suites -= 2; } + /* not dynamically allocated, so just return */ return 0; - } + } else { + const struct ieee80211_cipher_scheme *cs; - if (!local->hw.wiphy->cipher_suites) { - /* - * Driver specifies cipher schemes only - * We start counting ciphers defined by schemes, TKIP and CCMP + cs = local->hw.cipher_schemes; + + /* Driver specifies cipher schemes only (but not cipher suites + * including the schemes) + * + * We start counting ciphers defined by schemes, TKIP, CCMP, + * CCMP-256, GCMP, and GCMP-256 */ - n_suites = local->hw.n_cipher_schemes + 2; + n_suites = local->hw.n_cipher_schemes + 5; /* check if we have WEP40 and WEP104 */ if (have_wep) n_suites += 2; - /* check if we have AES_CMAC */ + /* check if we have AES_CMAC, BIP-CMAC-256, BIP-GMAC-128, + * BIP-GMAC-256 + */ if (have_mfp) - n_suites++; + n_suites += 4; suites = kmalloc(sizeof(u32) * n_suites, GFP_KERNEL); if (!suites) return -ENOMEM; suites[w++] = WLAN_CIPHER_SUITE_CCMP; + suites[w++] = WLAN_CIPHER_SUITE_CCMP_256; suites[w++] = WLAN_CIPHER_SUITE_TKIP; + suites[w++] = WLAN_CIPHER_SUITE_GCMP; + suites[w++] = WLAN_CIPHER_SUITE_GCMP_256; if (have_wep) { suites[w++] = WLAN_CIPHER_SUITE_WEP40; suites[w++] = WLAN_CIPHER_SUITE_WEP104; } - if (have_mfp) + if (have_mfp) { suites[w++] = WLAN_CIPHER_SUITE_AES_CMAC; + suites[w++] = WLAN_CIPHER_SUITE_BIP_CMAC_256; + suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_128; + suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_256; + } for (r = 0; r < local->hw.n_cipher_schemes; r++) suites[w++] = cs[r].cipher; - } else { - /* Driver provides cipher suites, but we need to exclude WEP */ - suites = kmemdup(local->hw.wiphy->cipher_suites, - sizeof(u32) * local->hw.wiphy->n_cipher_suites, - GFP_KERNEL); - if (!suites) - return -ENOMEM; - - for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) { - u32 suite = local->hw.wiphy->cipher_suites[r]; - - if (suite == WLAN_CIPHER_SUITE_WEP40 || - suite == WLAN_CIPHER_SUITE_WEP104) - continue; - suites[w++] = suite; - } } local->hw.wiphy->cipher_suites = suites; @@ -916,10 +945,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } } - WARN((local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) - && (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK), - "U-APSD not supported with HW_PS_NULLFUNC_STACK\n"); - /* * Calculate scan IE length -- we need this to alloc * memory and to subtract from the driver limit. It @@ -1045,10 +1070,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_max_network_latency; result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY, &local->network_latency_notifier); - if (result) { - rtnl_lock(); + if (result) goto fail_pm_qos; - } #ifdef CONFIG_INET local->ifa_notifier.notifier_call = ieee80211_ifa_changed; @@ -1076,15 +1099,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) fail_ifa: pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, &local->network_latency_notifier); - rtnl_lock(); #endif fail_pm_qos: - ieee80211_led_exit(local); + rtnl_lock(); + rate_control_deinitialize(local); ieee80211_remove_interfaces(local); fail_rate: rtnl_unlock(); + ieee80211_led_exit(local); ieee80211_wep_free(local); - sta_info_stop(local); destroy_workqueue(local->workqueue); fail_workqueue: wiphy_unregister(local->hw.wiphy); @@ -1180,6 +1203,8 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) kfree(rcu_access_pointer(local->tx_latency)); + sta_info_stop(local); + wiphy_free(local->hw.wiphy); } EXPORT_SYMBOL(ieee80211_free_hw); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 837a406a9dd6..10ac6324c1d0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -157,14 +157,18 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct cfg80211_chan_def vht_chandef; + struct ieee80211_sta_ht_cap sta_ht_cap; u32 ht_cfreq, ret; + memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); + ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); + chandef->chan = channel; chandef->width = NL80211_CHAN_WIDTH_20_NOHT; chandef->center_freq1 = channel->center_freq; chandef->center_freq2 = 0; - if (!ht_cap || !ht_oper || !sband->ht_cap.ht_supported) { + if (!ht_cap || !ht_oper || !sta_ht_cap.ht_supported) { ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; goto out; } @@ -198,7 +202,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, } /* check 40 MHz support, if we have it */ - if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) { + if (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) { switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: chandef->width = NL80211_CHAN_WIDTH_40; @@ -1054,8 +1058,6 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata) sdata->csa_block_tx = false; } - cfg80211_ch_switch_notify(sdata->dev, &sdata->reserved_chandef); - sdata->vif.csa_active = false; ifmgd->csa_waiting_bcn = false; @@ -1067,6 +1069,8 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata) &ifmgd->csa_connection_drop_work); return; } + + cfg80211_ch_switch_notify(sdata->dev, &sdata->reserved_chandef); } void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success) @@ -1284,8 +1288,11 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata, country_ie_len -= 3; } - if (have_chan_pwr) + if (have_chan_pwr && pwr_constr_elem) *pwr_reduction = *pwr_constr_elem; + else + *pwr_reduction = 0; + return have_chan_pwr; } @@ -1314,10 +1321,11 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, int chan_pwr = 0, pwr_reduction_80211h = 0; int pwr_level_cisco, pwr_level_80211h; int new_ap_level; + __le16 capab = mgmt->u.probe_resp.capab_info; - if (country_ie && pwr_constr_ie && - mgmt->u.probe_resp.capab_info & - cpu_to_le16(WLAN_CAPABILITY_SPECTRUM_MGMT)) { + if (country_ie && + (capab & cpu_to_le16(WLAN_CAPABILITY_SPECTRUM_MGMT) || + capab & cpu_to_le16(WLAN_CAPABILITY_RADIO_MEASURE))) { has_80211h_pwr = ieee80211_find_80211h_pwr_constr( sdata, channel, country_ie, country_ie_len, pwr_constr_ie, &chan_pwr, &pwr_reduction_80211h); @@ -1596,7 +1604,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) } else { ieee80211_send_nullfunc(local, sdata, 1); /* Flush to get the tx status of nullfunc frame */ - ieee80211_flush_queues(local, sdata); + ieee80211_flush_queues(local, sdata, false); } } @@ -2003,18 +2011,26 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* disable per-vif ps */ ieee80211_recalc_ps_vif(sdata); - /* flush out any pending frame (e.g. DELBA) before deauth/disassoc */ + /* make sure ongoing transmission finishes */ + synchronize_net(); + + /* + * drop any frame before deauth/disassoc, this can be data or + * management frame. Since we are disconnecting, we should not + * insist sending these frames which can take time and delay + * the disconnection and possible the roaming. + */ if (tx) - ieee80211_flush_queues(local, sdata); + ieee80211_flush_queues(local, sdata, true); /* deauthenticate/disassociate now */ if (tx || frame_buf) ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype, reason, tx, frame_buf); - /* flush out frame */ + /* flush out frame - make sure the deauth was actually sent */ if (tx) - ieee80211_flush_queues(local, sdata); + ieee80211_flush_queues(local, sdata, false); /* clear bssid only after building the needed mgmt frames */ memset(ifmgd->bssid, 0, ETH_ALEN); @@ -2440,6 +2456,12 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, sdata_assert_lock(sdata); if (!assoc) { + /* + * we are not authenticated yet, the only timer that could be + * running is the timeout for the authentication response which + * which is not relevant anymore. + */ + del_timer_sync(&sdata->u.mgd.timer); sta_info_destroy_addr(sdata, auth_data->bss->bssid); memset(sdata->u.mgd.bssid, 0, ETH_ALEN); @@ -2747,6 +2769,12 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, sdata_assert_lock(sdata); if (!assoc) { + /* + * we are not associated yet, the only timer that could be + * running is the timeout for the association response which + * which is not relevant anymore. + */ + del_timer_sync(&sdata->u.mgd.timer); sta_info_destroy_addr(sdata, assoc_data->bss->bssid); memset(sdata->u.mgd.bssid, 0, ETH_ALEN); @@ -4197,9 +4225,13 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss *bss = (void *)cbss->priv; struct sta_info *new_sta = NULL; - bool have_sta = false; + struct ieee80211_supported_band *sband; + struct ieee80211_sta_ht_cap sta_ht_cap; + bool have_sta = false, is_override = false; int err; + sband = local->hw.wiphy->bands[cbss->channel->band]; + if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data)) return -EINVAL; @@ -4214,25 +4246,32 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, if (!new_sta) return -ENOMEM; } + + memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); + ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); + + is_override = (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) != + (sband->ht_cap.cap & + IEEE80211_HT_CAP_SUP_WIDTH_20_40); + + if (new_sta || is_override) { + err = ieee80211_prep_channel(sdata, cbss); + if (err) { + if (new_sta) + sta_info_free(local, new_sta); + return -EINVAL; + } + } + if (new_sta) { u32 rates = 0, basic_rates = 0; bool have_higher_than_11mbit; int min_rate = INT_MAX, min_rate_index = -1; struct ieee80211_chanctx_conf *chanctx_conf; - struct ieee80211_supported_band *sband; const struct cfg80211_bss_ies *ies; - int shift; + int shift = ieee80211_vif_get_shift(&sdata->vif); u32 rate_flags; - sband = local->hw.wiphy->bands[cbss->channel->band]; - - err = ieee80211_prep_channel(sdata, cbss); - if (err) { - sta_info_free(local, new_sta); - return -EINVAL; - } - shift = ieee80211_vif_get_shift(&sdata->vif); - rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (WARN_ON(!chanctx_conf)) { @@ -4668,8 +4707,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; rcu_read_unlock(); + if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) && + (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK), + "U-APSD not supported with HW_PS_NULLFUNC_STACK\n")) + sdata->vif.driver_flags &= ~IEEE80211_VIF_SUPPORTS_UAPSD; + if (bss->wmm_used && bss->uapsd_supported && - (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) { + (sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD)) { assoc_data->uapsd = true; ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED; } else { diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index ff20b2ebdb30..683f0e3cb124 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -121,7 +121,7 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL, false); - ieee80211_flush_queues(local, NULL); + ieee80211_flush_queues(local, NULL, false); mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { @@ -398,7 +398,7 @@ void ieee80211_sw_roc_work(struct work_struct *work) ieee80211_roc_notify_destroy(roc, !roc->abort); if (started && !on_channel) { - ieee80211_flush_queues(local, NULL); + ieee80211_flush_queues(local, NULL, false); local->tmp_channel = NULL; ieee80211_hw_config(local, 0); diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 4a95fe3cffbc..ca405b6b686d 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -41,7 +41,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) /* flush out all packets */ synchronize_net(); - ieee80211_flush_queues(local, NULL); + ieee80211_flush_queues(local, NULL, true); local->quiescing = true; /* make quiescing visible to timers everywhere */ diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index d51f6b1c549b..7c86a002df95 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -263,12 +263,12 @@ static inline unsigned int minstrel_get_retry_count(struct minstrel_rate *mr, struct ieee80211_tx_info *info) { - unsigned int retry = mr->adjusted_retry_count; + u8 retry = mr->adjusted_retry_count; if (info->control.use_rts) - retry = max(2U, min(mr->stats.retry_count_rtscts, retry)); + retry = max_t(u8, 2, min(mr->stats.retry_count_rtscts, retry)); else if (info->control.use_cts_prot) - retry = max(2U, min(mr->retry_count_cts, retry)); + retry = max_t(u8, 2, min(mr->retry_count_cts, retry)); return retry; } diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index 97eca86a4af0..410efe620c57 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -33,8 +33,8 @@ minstrel_ewma(int old, int new, int weight) struct minstrel_rate_stats { /* current / last sampling period attempts/success counters */ - unsigned int attempts, last_attempts; - unsigned int success, last_success; + u16 attempts, last_attempts; + u16 success, last_success; /* total attempts/success counters */ u64 att_hist, succ_hist; @@ -46,8 +46,8 @@ struct minstrel_rate_stats { unsigned int cur_prob, probability; /* maximum retry counts */ - unsigned int retry_count; - unsigned int retry_count_rtscts; + u8 retry_count; + u8 retry_count_rtscts; u8 sample_skipped; bool retry_updated; @@ -55,14 +55,15 @@ struct minstrel_rate_stats { struct minstrel_rate { int bitrate; - int rix; + + s8 rix; + u8 retry_count_cts; + u8 adjusted_retry_count; unsigned int perfect_tx_time; unsigned int ack_time; int sample_limit; - unsigned int retry_count_cts; - unsigned int adjusted_retry_count; struct minstrel_rate_stats stats; }; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d69ca513848e..1101563357ea 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -361,9 +361,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, u16 known = local->hw.radiotap_vht_details; rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT); - /* known field - how to handle 80+80? */ - if (status->vht_flag & RX_VHT_FLAG_80P80MHZ) - known &= ~IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH; put_unaligned_le16(known, pos); pos += 2; /* flags */ @@ -378,8 +375,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* bandwidth */ if (status->vht_flag & RX_VHT_FLAG_80MHZ) *pos++ = 4; - else if (status->vht_flag & RX_VHT_FLAG_80P80MHZ) - *pos++ = 0; /* marked not known above */ else if (status->vht_flag & RX_VHT_FLAG_160MHZ) *pos++ = 11; else if (status->flag & RX_FLAG_40MHZ) @@ -652,6 +647,7 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb) { struct ieee80211_mgmt *hdr = (struct ieee80211_mgmt *) skb->data; struct ieee80211_mmie *mmie; + struct ieee80211_mmie_16 *mmie16; if (skb->len < 24 + sizeof(*mmie) || !is_multicast_ether_addr(hdr->da)) return -1; @@ -661,11 +657,18 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb) mmie = (struct ieee80211_mmie *) (skb->data + skb->len - sizeof(*mmie)); - if (mmie->element_id != WLAN_EID_MMIE || - mmie->length != sizeof(*mmie) - 2) - return -1; - - return le16_to_cpu(mmie->key_id); + if (mmie->element_id == WLAN_EID_MMIE && + mmie->length == sizeof(*mmie) - 2) + return le16_to_cpu(mmie->key_id); + + mmie16 = (struct ieee80211_mmie_16 *) + (skb->data + skb->len - sizeof(*mmie16)); + if (skb->len >= 24 + sizeof(*mmie16) && + mmie16->element_id == WLAN_EID_MMIE && + mmie16->length == sizeof(*mmie16) - 2) + return le16_to_cpu(mmie16->key_id); + + return -1; } static int iwl80211_get_cs_keyid(const struct ieee80211_cipher_scheme *cs, @@ -1655,11 +1658,27 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) result = ieee80211_crypto_tkip_decrypt(rx); break; case WLAN_CIPHER_SUITE_CCMP: - result = ieee80211_crypto_ccmp_decrypt(rx); + result = ieee80211_crypto_ccmp_decrypt( + rx, IEEE80211_CCMP_MIC_LEN); + break; + case WLAN_CIPHER_SUITE_CCMP_256: + result = ieee80211_crypto_ccmp_decrypt( + rx, IEEE80211_CCMP_256_MIC_LEN); break; case WLAN_CIPHER_SUITE_AES_CMAC: result = ieee80211_crypto_aes_cmac_decrypt(rx); break; + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + result = ieee80211_crypto_aes_cmac_256_decrypt(rx); + break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + result = ieee80211_crypto_aes_gmac_decrypt(rx); + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + result = ieee80211_crypto_gcmp_decrypt(rx); + break; default: result = ieee80211_crypto_hw_decrypt(rx); } @@ -1786,7 +1805,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) /* This is the first fragment of a new frame. */ entry = ieee80211_reassemble_add(rx->sdata, frag, seq, rx->seqno_idx, &(rx->skb)); - if (rx->key && rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP && + if (rx->key && + (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP || + rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256) && ieee80211_has_protected(fc)) { int queue = rx->security_idx; /* Store CCMP PN so that we can verify that the next @@ -1815,7 +1836,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) int i; u8 pn[IEEE80211_CCMP_PN_LEN], *rpn; int queue; - if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP) + if (!rx->key || + (rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP && + rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256)) return RX_DROP_UNUSABLE; memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN); for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) { @@ -2314,6 +2337,15 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) return RX_DROP_MONITOR; + if (rx->sta) { + /* The seqno index has the same property as needed + * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS + * for non-QoS-data frames. Here we know it's a data + * frame, so count MSDUs. + */ + rx->sta->rx_msdu[rx->seqno_idx]++; + } + /* * Send unexpected-4addr-frame event to hostapd. For older versions, * also drop the frame to cooked monitor interfaces. @@ -2598,7 +2630,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: { struct ieee80211_supported_band *sband; u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth; - enum ieee80211_sta_rx_bandwidth new_bw; + enum ieee80211_sta_rx_bandwidth max_bw, new_bw; /* If it doesn't support 40 MHz it can't change ... */ if (!(rx->sta->sta.ht_cap.cap & @@ -2606,13 +2638,18 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto handled; if (chanwidth == IEEE80211_HT_CHANWIDTH_20MHZ) - new_bw = IEEE80211_STA_RX_BW_20; + max_bw = IEEE80211_STA_RX_BW_20; else - new_bw = ieee80211_sta_cur_vht_bw(rx->sta); + max_bw = ieee80211_sta_cap_rx_bw(rx->sta); + + /* set cur_max_bandwidth and recalc sta bw */ + rx->sta->cur_max_bandwidth = max_bw; + new_bw = ieee80211_sta_cur_vht_bw(rx->sta); if (rx->sta->sta.bandwidth == new_bw) goto handled; + rx->sta->sta.bandwidth = new_bw; sband = rx->local->hw.wiphy->bands[status->band]; rate_control_rate_update(local, sband, rx->sta, diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index ae842678b629..05f0d711b6d8 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -416,7 +416,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local, ieee80211_offchannel_stop_vifs(local); /* ensure nullfunc is transmitted before leaving operating channel */ - ieee80211_flush_queues(local, NULL); + ieee80211_flush_queues(local, NULL, false); ieee80211_configure_filter(local); @@ -432,7 +432,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local, static bool ieee80211_can_scan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { - if (local->radar_detect_enabled) + if (ieee80211_is_radar_required(local)) return false; if (!list_empty(&local->roc_list)) @@ -505,7 +505,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, lockdep_assert_held(&local->mtx); - if (local->scan_req) + if (local->scan_req || ieee80211_is_radar_required(local)) return -EBUSY; if (!ieee80211_can_scan(local, sdata)) { @@ -805,7 +805,7 @@ static void ieee80211_scan_state_resume(struct ieee80211_local *local, ieee80211_offchannel_stop_vifs(local); if (local->ops->flush) { - ieee80211_flush_queues(local, NULL); + ieee80211_flush_queues(local, NULL, false); *next_delay = 0; } else *next_delay = HZ / 10; @@ -828,6 +828,11 @@ void ieee80211_scan_work(struct work_struct *work) mutex_lock(&local->mtx); + if (!ieee80211_can_run_worker(local)) { + aborted = true; + goto out_complete; + } + sdata = rcu_dereference_protected(local->scan_sdata, lockdep_is_held(&local->mtx)); scan_req = rcu_dereference_protected(local->scan_req, diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index efeba56c913b..06e6ac8cc693 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -34,19 +34,15 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def new_vht_chandef = {}; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; - const struct ieee80211_ht_operation *ht_oper; int secondary_channel_offset = -1; sec_chan_offs = elems->sec_chan_offs; wide_bw_chansw_ie = elems->wide_bw_chansw_ie; - ht_oper = elems->ht_operation; if (sta_flags & (IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_40MHZ)) { sec_chan_offs = NULL; wide_bw_chansw_ie = NULL; - /* only used for bandwidth here */ - ht_oper = NULL; } if (sta_flags & IEEE80211_STA_DISABLE_VHT) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a42f5b2b024d..00ca8dcc2bcf 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -116,7 +116,6 @@ static void __cleanup_single_sta(struct sta_info *sta) clear_sta_flag(sta, WLAN_STA_PS_DELIVER); atomic_dec(&ps->num_sta_ps); - sta_info_recalc_tim(sta); } for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { @@ -625,7 +624,7 @@ static unsigned long ieee80211_tids_for_ac(int ac) } } -void sta_info_recalc_tim(struct sta_info *sta) +static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) { struct ieee80211_local *local = sta->local; struct ps_data *ps; @@ -667,6 +666,9 @@ void sta_info_recalc_tim(struct sta_info *sta) if (ignore_for_tim == BIT(IEEE80211_NUM_ACS) - 1) ignore_for_tim = 0; + if (ignore_pending) + ignore_for_tim = BIT(IEEE80211_NUM_ACS) - 1; + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { unsigned long tids; @@ -695,7 +697,7 @@ void sta_info_recalc_tim(struct sta_info *sta) else __bss_tim_clear(ps->tim, id); - if (local->ops->set_tim) { + if (local->ops->set_tim && !WARN_ON(sta->dead)) { local->tim_in_locked_section = true; drv_set_tim(local, &sta->sta, indicate_tim); local->tim_in_locked_section = false; @@ -705,6 +707,11 @@ out_unlock: spin_unlock_bh(&local->tim_lock); } +void sta_info_recalc_tim(struct sta_info *sta) +{ + __sta_info_recalc_tim(sta, false); +} + static bool sta_info_buffer_expired(struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_tx_info *info; @@ -874,6 +881,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; + struct station_info sinfo = {}; int ret; /* @@ -887,6 +895,9 @@ static void __sta_info_destroy_part2(struct sta_info *sta) /* now keys can no longer be reached */ ieee80211_free_sta_keys(local, sta); + /* disable TIM bit - last chance to tell driver */ + __sta_info_recalc_tim(sta, true); + sta->dead = true; local->num_sta--; @@ -908,7 +919,8 @@ static void __sta_info_destroy_part2(struct sta_info *sta) sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); - cfg80211_del_sta(sdata->dev, sta->sta.addr, GFP_KERNEL); + sta_set_sinfo(sta, &sinfo); + cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); rate_control_remove_sta_debugfs(sta); ieee80211_sta_debugfs_remove(sta); @@ -1243,10 +1255,11 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, * ends the poll/service period. */ info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | - IEEE80211_TX_CTL_PS_RESPONSE | IEEE80211_TX_STATUS_EOSP | IEEE80211_TX_CTL_REQ_TX_STATUS; + info->control.flags |= IEEE80211_TX_CTRL_PS_RESPONSE; + if (call_driver) drv_allow_buffered_frames(local, sta, BIT(tid), 1, reason, false); @@ -1395,8 +1408,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, * STA may still remain is PS mode after this frame * exchange. */ - info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | - IEEE80211_TX_CTL_PS_RESPONSE; + info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; + info->control.flags |= IEEE80211_TX_CTRL_PS_RESPONSE; /* * Use MoreData flag to indicate whether there are @@ -1743,7 +1756,6 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) struct ieee80211_local *local = sdata->local; struct rate_control_ref *ref = NULL; struct timespec uptime; - u64 packets = 0; u32 thr = 0; int i, ac; @@ -1752,49 +1764,90 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->generation = sdata->local->sta_generation; - sinfo->filled = STATION_INFO_INACTIVE_TIME | - STATION_INFO_RX_BYTES64 | - STATION_INFO_TX_BYTES64 | - STATION_INFO_RX_PACKETS | - STATION_INFO_TX_PACKETS | - STATION_INFO_TX_RETRIES | - STATION_INFO_TX_FAILED | - STATION_INFO_TX_BITRATE | - STATION_INFO_RX_BITRATE | - STATION_INFO_RX_DROP_MISC | - STATION_INFO_BSS_PARAM | - STATION_INFO_CONNECTED_TIME | - STATION_INFO_STA_FLAGS | - STATION_INFO_BEACON_LOSS_COUNT; + /* do before driver, so beacon filtering drivers have a + * chance to e.g. just add the number of filtered beacons + * (or just modify the value entirely, of course) + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION) + sinfo->rx_beacon = sdata->u.mgd.count_beacon_signal; + + drv_sta_statistics(local, sdata, &sta->sta, sinfo); + + sinfo->filled |= BIT(NL80211_STA_INFO_INACTIVE_TIME) | + BIT(NL80211_STA_INFO_STA_FLAGS) | + BIT(NL80211_STA_INFO_BSS_PARAM) | + BIT(NL80211_STA_INFO_CONNECTED_TIME) | + BIT(NL80211_STA_INFO_RX_DROP_MISC) | + BIT(NL80211_STA_INFO_BEACON_LOSS); ktime_get_ts(&uptime); sinfo->connected_time = uptime.tv_sec - sta->last_connected; - sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); - sinfo->tx_bytes = 0; - for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { - sinfo->tx_bytes += sta->tx_bytes[ac]; - packets += sta->tx_packets[ac]; + + if (!(sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES64) | + BIT(NL80211_STA_INFO_TX_BYTES)))) { + sinfo->tx_bytes = 0; + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + sinfo->tx_bytes += sta->tx_bytes[ac]; + sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64); + } + + if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_PACKETS))) { + sinfo->tx_packets = 0; + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + sinfo->tx_packets += sta->tx_packets[ac]; + sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS); + } + + if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) | + BIT(NL80211_STA_INFO_RX_BYTES)))) { + sinfo->rx_bytes = sta->rx_bytes; + sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64); + } + + if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) { + sinfo->rx_packets = sta->rx_packets; + sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS); + } + + if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_RETRIES))) { + sinfo->tx_retries = sta->tx_retry_count; + sinfo->filled |= BIT(NL80211_STA_INFO_TX_RETRIES); } - sinfo->tx_packets = packets; - sinfo->rx_bytes = sta->rx_bytes; - sinfo->rx_packets = sta->rx_packets; - sinfo->tx_retries = sta->tx_retry_count; - sinfo->tx_failed = sta->tx_retry_failed; + + if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_FAILED))) { + sinfo->tx_failed = sta->tx_retry_failed; + sinfo->filled |= BIT(NL80211_STA_INFO_TX_FAILED); + } + sinfo->rx_dropped_misc = sta->rx_dropped; sinfo->beacon_loss_count = sta->beacon_loss_count; + if (sdata->vif.type == NL80211_IFTYPE_STATION && + !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) { + sinfo->filled |= BIT(NL80211_STA_INFO_BEACON_RX) | + BIT(NL80211_STA_INFO_BEACON_SIGNAL_AVG); + sinfo->rx_beacon_signal_avg = ieee80211_ave_rssi(&sdata->vif); + } + if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { - sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG; - if (!local->ops->get_rssi || - drv_get_rssi(local, sdata, &sta->sta, &sinfo->signal)) + if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) { sinfo->signal = (s8)sta->last_signal; - sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); + sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); + } + + if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) { + sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); + sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG); + } } - if (sta->chains) { - sinfo->filled |= STATION_INFO_CHAIN_SIGNAL | - STATION_INFO_CHAIN_SIGNAL_AVG; + + if (sta->chains && + !(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) | + BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) { + sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL) | + BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); sinfo->chains = sta->chains; for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) { @@ -1804,23 +1857,61 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) } } - sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate); - sta_set_rate_info_rx(sta, &sinfo->rxrate); + if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE))) { + sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate); + sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE); + } + + if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE))) { + sta_set_rate_info_rx(sta, &sinfo->rxrate); + sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE); + } + + sinfo->filled |= BIT(NL80211_STA_INFO_TID_STATS); + for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) { + struct cfg80211_tid_stats *tidstats = &sinfo->pertid[i]; + + if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) { + tidstats->filled |= BIT(NL80211_TID_STATS_RX_MSDU); + tidstats->rx_msdu = sta->rx_msdu[i]; + } + + if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) { + tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU); + tidstats->tx_msdu = sta->tx_msdu[i]; + } + + if (!(tidstats->filled & + BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) && + local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { + tidstats->filled |= + BIT(NL80211_TID_STATS_TX_MSDU_RETRIES); + tidstats->tx_msdu_retries = sta->tx_msdu_retries[i]; + } + + if (!(tidstats->filled & + BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) && + local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { + tidstats->filled |= + BIT(NL80211_TID_STATS_TX_MSDU_FAILED); + tidstats->tx_msdu_failed = sta->tx_msdu_failed[i]; + } + } if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH - sinfo->filled |= STATION_INFO_LLID | - STATION_INFO_PLID | - STATION_INFO_PLINK_STATE | - STATION_INFO_LOCAL_PM | - STATION_INFO_PEER_PM | - STATION_INFO_NONPEER_PM; + sinfo->filled |= BIT(NL80211_STA_INFO_LLID) | + BIT(NL80211_STA_INFO_PLID) | + BIT(NL80211_STA_INFO_PLINK_STATE) | + BIT(NL80211_STA_INFO_LOCAL_PM) | + BIT(NL80211_STA_INFO_PEER_PM) | + BIT(NL80211_STA_INFO_NONPEER_PM); sinfo->llid = sta->llid; sinfo->plid = sta->plid; sinfo->plink_state = sta->plink_state; if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { - sinfo->filled |= STATION_INFO_T_OFFSET; + sinfo->filled |= BIT(NL80211_STA_INFO_T_OFFSET); sinfo->t_offset = sta->t_offset; } sinfo->local_pm = sta->local_pm; @@ -1869,7 +1960,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) thr = drv_get_expected_throughput(local, &sta->sta); if (thr != 0) { - sinfo->filled |= STATION_INFO_EXPECTED_THROUGHPUT; + sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT); sinfo->expected_throughput = thr; } } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 4f052bb2a5ad..925e68fe64c7 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -346,6 +346,14 @@ struct ieee80211_tx_latency_stat { * @cipher_scheme: optional cipher scheme for this station * @last_tdls_pkt_time: holds the time in jiffies of last TDLS pkt ACKed * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) + * @tx_msdu: MSDUs transmitted to this station, using IEEE80211_NUM_TID + * entry for non-QoS frames + * @tx_msdu_retries: MSDU retries for transmissions to to this station, + * using IEEE80211_NUM_TID entry for non-QoS frames + * @tx_msdu_failed: MSDU failures for transmissions to to this station, + * using IEEE80211_NUM_TID entry for non-QoS frames + * @rx_msdu: MSDUs received from this station, using IEEE80211_NUM_TID + * entry for non-QoS frames */ struct sta_info { /* General information, mostly static */ @@ -416,6 +424,10 @@ struct sta_info { u32 last_rx_rate_vht_flag; u8 last_rx_rate_vht_nss; u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; + u64 tx_msdu[IEEE80211_NUM_TIDS + 1]; + u64 tx_msdu_retries[IEEE80211_NUM_TIDS + 1]; + u64 tx_msdu_failed[IEEE80211_NUM_TIDS + 1]; + u64 rx_msdu[IEEE80211_NUM_TIDS + 1]; /* * Aggregation information, locked with lock. diff --git a/net/mac80211/status.c b/net/mac80211/status.c index bb146f377ee4..e679b7c9b160 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -664,13 +664,15 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, struct ieee80211_supported_band *sband; int retry_count; int rates_idx; - bool acked; + bool acked, noack_success; rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count); sband = hw->wiphy->bands[info->band]; acked = !!(info->flags & IEEE80211_TX_STAT_ACK); + noack_success = !!(info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED); + if (pubsta) { struct sta_info *sta; @@ -696,7 +698,7 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, rate_control_tx_status_noskb(local, sband, sta, info); } - if (acked) { + if (acked || noack_success) { local->dot11TransmittedFrameCount++; if (!pubsta) local->dot11MulticastTransmittedFrameCount++; @@ -728,6 +730,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_bar *bar; int rtap_len; int shift = 0; + int tid = IEEE80211_NUM_TIDS; rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count); @@ -771,7 +774,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) && (ieee80211_is_data_qos(fc))) { - u16 tid, ssn; + u16 ssn; u8 *qc; qc = ieee80211_get_qos_ctl(hdr); @@ -780,10 +783,14 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) & IEEE80211_SCTL_SEQ); ieee80211_send_bar(&sta->sdata->vif, hdr->addr1, tid, ssn); + } else if (ieee80211_is_data_qos(fc)) { + u8 *qc = ieee80211_get_qos_ctl(hdr); + + tid = qc[0] & 0xf; } if (!acked && ieee80211_is_back_req(fc)) { - u16 tid, control; + u16 control; /* * BAR failed, store the last SSN and retry sending @@ -811,6 +818,12 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (!acked) sta->tx_retry_failed++; sta->tx_retry_count += retry_count; + + if (ieee80211_is_data_present(fc)) { + if (!acked) + sta->tx_msdu_failed[tid]++; + sta->tx_msdu_retries[tid] += retry_count; + } } rate_control_tx_status(local, sband, sta, skb); @@ -856,10 +869,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) * Fragments are passed to low-level drivers as separate skbs, so these * are actually fragments, not frames. Update frame counters only for * the first fragment of the frame. */ - if (info->flags & IEEE80211_TX_STAT_ACK) { + if ((info->flags & IEEE80211_TX_STAT_ACK) || + (info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED)) { if (ieee80211_is_first_frag(hdr->seq_ctrl)) { local->dot11TransmittedFrameCount++; - if (is_multicast_ether_addr(hdr->addr1)) + if (is_multicast_ether_addr(ieee80211_get_DA(hdr))) local->dot11MulticastTransmittedFrameCount++; if (retry_count > 0) local->dot11RetryCount++; diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 55ddd77b865d..c9f9752217ac 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -68,17 +68,24 @@ ieee80211_tdls_add_subband(struct ieee80211_sub_if_data *sdata, ch = ieee80211_get_channel(sdata->local->hw.wiphy, i); if (ch) { /* we will be active on the channel */ - u32 flags = IEEE80211_CHAN_DISABLED | - IEEE80211_CHAN_NO_IR; cfg80211_chandef_create(&chandef, ch, - NL80211_CHAN_HT20); - if (cfg80211_chandef_usable(sdata->local->hw.wiphy, - &chandef, flags)) { + NL80211_CHAN_NO_HT); + if (cfg80211_reg_can_beacon(sdata->local->hw.wiphy, + &chandef, + sdata->wdev.iftype)) { ch_cnt++; + /* + * check if the next channel is also part of + * this allowed range + */ continue; } } + /* + * we've reached the end of a range, with allowed channels + * found + */ if (ch_cnt) { u8 *pos = skb_put(skb, 2); *pos++ = ieee80211_frequency_to_channel(subband_start); @@ -89,6 +96,15 @@ ieee80211_tdls_add_subband(struct ieee80211_sub_if_data *sdata, } } + /* all channels in the requested range are allowed - add them here */ + if (ch_cnt) { + u8 *pos = skb_put(skb, 2); + *pos++ = ieee80211_frequency_to_channel(subband_start); + *pos++ = ch_cnt; + + subband_cnt++; + } + return subband_cnt; } @@ -329,24 +345,24 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, */ sband = local->hw.wiphy->bands[band]; memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap)); - if ((action_code == WLAN_TDLS_SETUP_REQUEST || - action_code == WLAN_TDLS_SETUP_RESPONSE) && - ht_cap.ht_supported && (!sta || sta->sta.ht_cap.ht_supported)) { - if (action_code == WLAN_TDLS_SETUP_REQUEST) { - ieee80211_apply_htcap_overrides(sdata, &ht_cap); - - /* disable SMPS in TDLS initiator */ - ht_cap.cap |= (WLAN_HT_CAP_SM_PS_DISABLED - << IEEE80211_HT_CAP_SM_PS_SHIFT); - } else { - /* disable SMPS in TDLS responder */ - sta->sta.ht_cap.cap |= - (WLAN_HT_CAP_SM_PS_DISABLED - << IEEE80211_HT_CAP_SM_PS_SHIFT); - - /* the peer caps are already intersected with our own */ - memcpy(&ht_cap, &sta->sta.ht_cap, sizeof(ht_cap)); - } + + if (action_code == WLAN_TDLS_SETUP_REQUEST && ht_cap.ht_supported) { + ieee80211_apply_htcap_overrides(sdata, &ht_cap); + + /* disable SMPS in TDLS initiator */ + ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED + << IEEE80211_HT_CAP_SM_PS_SHIFT; + + pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2); + ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap); + } else if (action_code == WLAN_TDLS_SETUP_RESPONSE && + ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) { + /* disable SMPS in TDLS responder */ + sta->sta.ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED + << IEEE80211_HT_CAP_SM_PS_SHIFT; + + /* the peer caps are already intersected with our own */ + memcpy(&ht_cap, &sta->sta.ht_cap, sizeof(ht_cap)); pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2); ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap); @@ -836,7 +852,6 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, */ if ((action_code == WLAN_TDLS_TEARDOWN) && (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { - struct sta_info *sta = NULL; bool try_resend; /* Should we keep skb for possible resend */ /* If not sending directly to peer - no point in keeping skb */ @@ -912,7 +927,7 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, rcu_read_unlock(); } - ieee80211_flush_queues(local, sdata); + ieee80211_flush_queues(local, sdata, false); ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, action_code, dialog_token, status_code, @@ -952,7 +967,7 @@ ieee80211_tdls_mgmt_teardown(struct wiphy *wiphy, struct net_device *dev, */ ieee80211_stop_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN); - ieee80211_flush_queues(local, sdata); + ieee80211_flush_queues(local, sdata, false); ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, action_code, dialog_token, status_code, @@ -1098,7 +1113,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, */ tasklet_kill(&local->tx_pending_tasklet); /* flush a potentially queued teardown packet */ - ieee80211_flush_queues(local, sdata); + ieee80211_flush_queues(local, sdata, false); ret = sta_info_destroy_addr(sdata, peer); break; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 8e461a02c6a8..263a9561eb26 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -825,6 +825,13 @@ DECLARE_EVENT_CLASS(sta_event, ) ); +DEFINE_EVENT(sta_event, drv_sta_statistics, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta), + TP_ARGS(local, sdata, sta) +); + DEFINE_EVENT(sta_event, drv_sta_add, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, @@ -1329,32 +1336,6 @@ DEFINE_EVENT(release_evt, drv_allow_buffered_frames, TP_ARGS(local, sta, tids, num_frames, reason, more_data) ); -TRACE_EVENT(drv_get_rssi, - TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, - s8 rssi, int ret), - - TP_ARGS(local, sta, rssi, ret), - - TP_STRUCT__entry( - LOCAL_ENTRY - STA_ENTRY - __field(s8, rssi) - __field(int, ret) - ), - - TP_fast_assign( - LOCAL_ASSIGN; - STA_ASSIGN; - __entry->rssi = rssi; - __entry->ret = ret; - ), - - TP_printk( - LOCAL_PR_FMT STA_PR_FMT " rssi:%d ret:%d", - LOCAL_PR_ARG, STA_PR_ARG, __entry->rssi, __entry->ret - ) -); - DEFINE_EVENT(local_sdata_evt, drv_mgd_prepare_tx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 058686a721a1..88a18ffe2975 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -626,6 +626,9 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) tx->key = NULL; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: if (!ieee80211_is_data_present(hdr->frame_control) && !ieee80211_use_mfp(hdr->frame_control, tx->sta, tx->skb)) @@ -636,6 +639,9 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) ieee80211_is_mgmt(hdr->frame_control); break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: if (!ieee80211_is_mgmt(hdr->frame_control)) tx->key = NULL; break; @@ -815,6 +821,8 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) /* for pure STA mode without beacons, we can do it */ hdr->seq_ctrl = cpu_to_le16(tx->sdata->sequence_number); tx->sdata->sequence_number += 0x10; + if (tx->sta) + tx->sta->tx_msdu[IEEE80211_NUM_TIDS]++; return TX_CONTINUE; } @@ -831,6 +839,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) qc = ieee80211_get_qos_ctl(hdr); tid = *qc & IEEE80211_QOS_CTL_TID_MASK; seq = &tx->sta->tid_seq[tid]; + tx->sta->tx_msdu[tid]++; hdr->seq_ctrl = cpu_to_le16(*seq); @@ -1008,9 +1017,21 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) case WLAN_CIPHER_SUITE_TKIP: return ieee80211_crypto_tkip_encrypt(tx); case WLAN_CIPHER_SUITE_CCMP: - return ieee80211_crypto_ccmp_encrypt(tx); + return ieee80211_crypto_ccmp_encrypt( + tx, IEEE80211_CCMP_MIC_LEN); + case WLAN_CIPHER_SUITE_CCMP_256: + return ieee80211_crypto_ccmp_encrypt( + tx, IEEE80211_CCMP_256_MIC_LEN); case WLAN_CIPHER_SUITE_AES_CMAC: return ieee80211_crypto_aes_cmac_encrypt(tx); + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + return ieee80211_crypto_aes_cmac_256_encrypt(tx); + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + return ieee80211_crypto_aes_gmac_encrypt(tx); + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + return ieee80211_crypto_gcmp_encrypt(tx); default: return ieee80211_crypto_hw_encrypt(tx); } @@ -3152,7 +3173,7 @@ int ieee80211_reserve_tid(struct ieee80211_sta *pubsta, u8 tid) } queues = BIT(sdata->vif.hw_queue[ieee802_1d_to_ac[tid]]); - __ieee80211_flush_queues(local, sdata, queues); + __ieee80211_flush_queues(local, sdata, queues, false); sta->reserved_tid = tid; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 974ebe70f5b0..8428f4a95479 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -578,7 +578,7 @@ ieee80211_get_vif_queues(struct ieee80211_local *local, void __ieee80211_flush_queues(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - unsigned int queues) + unsigned int queues, bool drop) { if (!local->ops->flush) return; @@ -594,7 +594,7 @@ void __ieee80211_flush_queues(struct ieee80211_local *local, IEEE80211_QUEUE_STOP_REASON_FLUSH, false); - drv_flush(local, sdata, queues, false); + drv_flush(local, sdata, queues, drop); ieee80211_wake_queues_by_reason(&local->hw, queues, IEEE80211_QUEUE_STOP_REASON_FLUSH, @@ -602,9 +602,9 @@ void __ieee80211_flush_queues(struct ieee80211_local *local, } void ieee80211_flush_queues(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) + struct ieee80211_sub_if_data *sdata, bool drop) { - __ieee80211_flush_queues(local, sdata, 0); + __ieee80211_flush_queues(local, sdata, 0, drop); } void ieee80211_stop_vif_queues(struct ieee80211_local *local, @@ -744,16 +744,19 @@ EXPORT_SYMBOL_GPL(wdev_to_ieee80211_vif); /* * Nothing should have been stuffed into the workqueue during - * the suspend->resume cycle. If this WARN is seen then there - * is a bug with either the driver suspend or something in - * mac80211 stuffing into the workqueue which we haven't yet - * cleared during mac80211's suspend cycle. + * the suspend->resume cycle. Since we can't check each caller + * of this function if we are already quiescing / suspended, + * check here and don't WARN since this can actually happen when + * the rx path (for example) is racing against __ieee80211_suspend + * and suspending / quiescing was set after the rx path checked + * them. */ static bool ieee80211_can_queue_work(struct ieee80211_local *local) { - if (WARN(local->suspended && !local->resuming, - "queueing ieee80211 work while going to suspend\n")) + if (local->quiescing || (local->suspended && !local->resuming)) { + pr_warn("queueing ieee80211 work while going to suspend\n"); return false; + } return true; } @@ -1470,10 +1473,12 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, /* Check if any channel in this sband supports at least 80 MHz */ for (i = 0; i < sband->n_channels; i++) { - if (!(sband->channels[i].flags & IEEE80211_CHAN_NO_80MHZ)) { - have_80mhz = true; - break; - } + if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED | + IEEE80211_CHAN_NO_80MHZ)) + continue; + + have_80mhz = true; + break; } if (sband->vht_cap.vht_supported && have_80mhz) { @@ -1735,6 +1740,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) struct cfg80211_sched_scan_request *sched_scan_req; bool sched_scan_stopped = false; + /* nothing to do if HW shouldn't run */ + if (!local->open_count) + goto wake_up; + #ifdef CONFIG_PM if (local->suspended) local->resuming = true; @@ -1756,9 +1765,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) reconfig_due_to_wowlan = true; } #endif - /* everything else happens only if HW was up & running */ - if (!local->open_count) - goto wake_up; /* * Upon resume hardware can sometimes be goofy due to @@ -2042,7 +2048,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) * If this is for hw restart things are still running. * We may want to change that later, however. */ - if (!local->suspended || reconfig_due_to_wowlan) + if (local->open_count && (!local->suspended || reconfig_due_to_wowlan)) drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_RESTART); if (!local->suspended) @@ -2054,7 +2060,19 @@ int ieee80211_reconfig(struct ieee80211_local *local) mb(); local->resuming = false; - if (!reconfig_due_to_wowlan) + /* It's possible that we don't handle the scan completion in + * time during suspend, so if it's still marked as completed + * here, queue the work and flush it to clean things up. + * Instead of calling the worker function directly here, we + * really queue it to avoid potential races with other flows + * scheduling the same work. + */ + if (test_bit(SCAN_COMPLETED, &local->scanning)) { + ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0); + flush_delayed_work(&local->scan_work); + } + + if (local->open_count && !reconfig_due_to_wowlan) drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_SUSPEND); list_for_each_entry(sdata, &local->interfaces, list) { @@ -2538,7 +2556,9 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, ri.mcs = status->rate_idx; ri.flags |= RATE_INFO_FLAGS_MCS; if (status->flag & RX_FLAG_40MHZ) - ri.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; + ri.bw = RATE_INFO_BW_40; + else + ri.bw = RATE_INFO_BW_20; if (status->flag & RX_FLAG_SHORT_GI) ri.flags |= RATE_INFO_FLAGS_SHORT_GI; } else if (status->flag & RX_FLAG_VHT) { @@ -2546,13 +2566,13 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, ri.mcs = status->rate_idx; ri.nss = status->vht_nss; if (status->flag & RX_FLAG_40MHZ) - ri.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; - if (status->vht_flag & RX_VHT_FLAG_80MHZ) - ri.flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH; - if (status->vht_flag & RX_VHT_FLAG_80P80MHZ) - ri.flags |= RATE_INFO_FLAGS_80P80_MHZ_WIDTH; - if (status->vht_flag & RX_VHT_FLAG_160MHZ) - ri.flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH; + ri.bw = RATE_INFO_BW_40; + else if (status->vht_flag & RX_VHT_FLAG_80MHZ) + ri.bw = RATE_INFO_BW_80; + else if (status->vht_flag & RX_VHT_FLAG_160MHZ) + ri.bw = RATE_INFO_BW_160; + else + ri.bw = RATE_INFO_BW_20; if (status->flag & RX_FLAG_SHORT_GI) ri.flags |= RATE_INFO_FLAGS_SHORT_GI; } else { @@ -2560,10 +2580,15 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, int shift = 0; int bitrate; - if (status->flag & RX_FLAG_10MHZ) + if (status->flag & RX_FLAG_10MHZ) { shift = 1; - if (status->flag & RX_FLAG_5MHZ) + ri.bw = RATE_INFO_BW_10; + } else if (status->flag & RX_FLAG_5MHZ) { shift = 2; + ri.bw = RATE_INFO_BW_5; + } else { + ri.bw = RATE_INFO_BW_20; + } sband = local->hw.wiphy->bands[status->band]; bitrate = sband->bitrates[status->rate_idx].bitrate; diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index bc9e8fc48785..85f9596da07b 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -269,51 +269,54 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); } -enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) +enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta) { - struct ieee80211_sub_if_data *sdata = sta->sdata; - u32 cap = sta->sta.vht_cap.cap; - enum ieee80211_sta_rx_bandwidth bw; + struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + u32 cap_width; - if (!sta->sta.vht_cap.vht_supported) { - bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? - IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; - goto check_max; - } + if (!vht_cap->vht_supported) + return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + IEEE80211_STA_RX_BW_40 : + IEEE80211_STA_RX_BW_20; - switch (sdata->vif.bss_conf.chandef.width) { - default: - WARN_ON_ONCE(1); - /* fall through */ + cap_width = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; + + if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ || + cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) + return IEEE80211_STA_RX_BW_160; + + return IEEE80211_STA_RX_BW_80; +} + +static enum ieee80211_sta_rx_bandwidth +ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width) +{ + switch (width) { case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: - bw = IEEE80211_STA_RX_BW_20; - break; + return IEEE80211_STA_RX_BW_20; case NL80211_CHAN_WIDTH_40: - bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? - IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; - break; + return IEEE80211_STA_RX_BW_40; + case NL80211_CHAN_WIDTH_80: + return IEEE80211_STA_RX_BW_80; case NL80211_CHAN_WIDTH_160: - if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) == - IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) { - bw = IEEE80211_STA_RX_BW_160; - break; - } - /* fall through */ case NL80211_CHAN_WIDTH_80P80: - if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) == - IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) { - bw = IEEE80211_STA_RX_BW_160; - break; - } - /* fall through */ - case NL80211_CHAN_WIDTH_80: - bw = IEEE80211_STA_RX_BW_80; + return IEEE80211_STA_RX_BW_160; + default: + WARN_ON_ONCE(1); + return IEEE80211_STA_RX_BW_20; } +} + +enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + enum ieee80211_sta_rx_bandwidth bw; + + bw = ieee80211_chan_width_to_rx_bw(sdata->vif.bss_conf.chandef.width); + bw = min(bw, ieee80211_sta_cap_rx_bw(sta)); + bw = min(bw, sta->cur_max_bandwidth); - check_max: - if (bw > sta->cur_max_bandwidth) - bw = sta->cur_max_bandwidth; return bw; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 12398fde02e8..75de6fac40d1 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -22,6 +22,8 @@ #include "tkip.h" #include "aes_ccm.h" #include "aes_cmac.h" +#include "aes_gmac.h" +#include "aes_gcm.h" #include "wpa.h" ieee80211_tx_result @@ -393,7 +395,8 @@ static inline void ccmp_hdr2pn(u8 *pn, u8 *hdr) } -static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) +static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, + unsigned int mic_len) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_key *key = tx->key; @@ -424,7 +427,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key) tail = 0; else - tail = IEEE80211_CCMP_MIC_LEN; + tail = mic_len; if (WARN_ON(skb_tailroom(skb) < tail || skb_headroom(skb) < IEEE80211_CCMP_HDR_LEN)) @@ -459,21 +462,22 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) pos += IEEE80211_CCMP_HDR_LEN; ccmp_special_blocks(skb, pn, b_0, aad); ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, - skb_put(skb, IEEE80211_CCMP_MIC_LEN)); + skb_put(skb, mic_len), mic_len); return 0; } ieee80211_tx_result -ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx) +ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx, + unsigned int mic_len) { struct sk_buff *skb; ieee80211_tx_set_protected(tx); skb_queue_walk(&tx->skbs, skb) { - if (ccmp_encrypt_skb(tx, skb) < 0) + if (ccmp_encrypt_skb(tx, skb, mic_len) < 0) return TX_DROP; } @@ -482,7 +486,8 @@ ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx) ieee80211_rx_result -ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) +ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, + unsigned int mic_len) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; int hdrlen; @@ -499,8 +504,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) !ieee80211_is_robust_mgmt_frame(skb)) return RX_CONTINUE; - data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - - IEEE80211_CCMP_MIC_LEN; + data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len; if (!rx->sta || data_len < 0) return RX_DROP_UNUSABLE; @@ -531,14 +535,14 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) key->u.ccmp.tfm, b_0, aad, skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN, data_len, - skb->data + skb->len - IEEE80211_CCMP_MIC_LEN)) + skb->data + skb->len - mic_len, mic_len)) return RX_DROP_UNUSABLE; } memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN); /* Remove CCMP header and MIC */ - if (pskb_trim(skb, skb->len - IEEE80211_CCMP_MIC_LEN)) + if (pskb_trim(skb, skb->len - mic_len)) return RX_DROP_UNUSABLE; memmove(skb->data + IEEE80211_CCMP_HDR_LEN, skb->data, hdrlen); skb_pull(skb, IEEE80211_CCMP_HDR_LEN); @@ -546,6 +550,229 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; } +static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad) +{ + __le16 mask_fc; + u8 qos_tid; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + + memcpy(j_0, hdr->addr2, ETH_ALEN); + memcpy(&j_0[ETH_ALEN], pn, IEEE80211_GCMP_PN_LEN); + j_0[13] = 0; + j_0[14] = 0; + j_0[AES_BLOCK_SIZE - 1] = 0x01; + + /* AAD (extra authenticate-only data) / masked 802.11 header + * FC | A1 | A2 | A3 | SC | [A4] | [QC] + */ + put_unaligned_be16(ieee80211_hdrlen(hdr->frame_control) - 2, &aad[0]); + /* Mask FC: zero subtype b4 b5 b6 (if not mgmt) + * Retry, PwrMgt, MoreData; set Protected + */ + mask_fc = hdr->frame_control; + mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_RETRY | + IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA); + if (!ieee80211_is_mgmt(hdr->frame_control)) + mask_fc &= ~cpu_to_le16(0x0070); + mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + + put_unaligned(mask_fc, (__le16 *)&aad[2]); + memcpy(&aad[4], &hdr->addr1, 3 * ETH_ALEN); + + /* Mask Seq#, leave Frag# */ + aad[22] = *((u8 *)&hdr->seq_ctrl) & 0x0f; + aad[23] = 0; + + if (ieee80211_is_data_qos(hdr->frame_control)) + qos_tid = *ieee80211_get_qos_ctl(hdr) & + IEEE80211_QOS_CTL_TID_MASK; + else + qos_tid = 0; + + if (ieee80211_has_a4(hdr->frame_control)) { + memcpy(&aad[24], hdr->addr4, ETH_ALEN); + aad[30] = qos_tid; + aad[31] = 0; + } else { + memset(&aad[24], 0, ETH_ALEN + IEEE80211_QOS_CTL_LEN); + aad[24] = qos_tid; + } +} + +static inline void gcmp_pn2hdr(u8 *hdr, const u8 *pn, int key_id) +{ + hdr[0] = pn[5]; + hdr[1] = pn[4]; + hdr[2] = 0; + hdr[3] = 0x20 | (key_id << 6); + hdr[4] = pn[3]; + hdr[5] = pn[2]; + hdr[6] = pn[1]; + hdr[7] = pn[0]; +} + +static inline void gcmp_hdr2pn(u8 *pn, const u8 *hdr) +{ + pn[0] = hdr[7]; + pn[1] = hdr[6]; + pn[2] = hdr[5]; + pn[3] = hdr[4]; + pn[4] = hdr[1]; + pn[5] = hdr[0]; +} + +static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_key *key = tx->key; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + int hdrlen, len, tail; + u8 *pos; + u8 pn[6]; + u64 pn64; + u8 aad[2 * AES_BLOCK_SIZE]; + u8 j_0[AES_BLOCK_SIZE]; + + if (info->control.hw_key && + !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) && + !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && + !((info->control.hw_key->flags & + IEEE80211_KEY_FLAG_GENERATE_IV_MGMT) && + ieee80211_is_mgmt(hdr->frame_control))) { + /* hwaccel has no need for preallocated room for GCMP + * header or MIC fields + */ + return 0; + } + + hdrlen = ieee80211_hdrlen(hdr->frame_control); + len = skb->len - hdrlen; + + if (info->control.hw_key) + tail = 0; + else + tail = IEEE80211_GCMP_MIC_LEN; + + if (WARN_ON(skb_tailroom(skb) < tail || + skb_headroom(skb) < IEEE80211_GCMP_HDR_LEN)) + return -1; + + pos = skb_push(skb, IEEE80211_GCMP_HDR_LEN); + memmove(pos, pos + IEEE80211_GCMP_HDR_LEN, hdrlen); + skb_set_network_header(skb, skb_network_offset(skb) + + IEEE80211_GCMP_HDR_LEN); + + /* the HW only needs room for the IV, but not the actual IV */ + if (info->control.hw_key && + (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) + return 0; + + hdr = (struct ieee80211_hdr *)pos; + pos += hdrlen; + + pn64 = atomic64_inc_return(&key->u.gcmp.tx_pn); + + pn[5] = pn64; + pn[4] = pn64 >> 8; + pn[3] = pn64 >> 16; + pn[2] = pn64 >> 24; + pn[1] = pn64 >> 32; + pn[0] = pn64 >> 40; + + gcmp_pn2hdr(pos, pn, key->conf.keyidx); + + /* hwaccel - with software GCMP header */ + if (info->control.hw_key) + return 0; + + pos += IEEE80211_GCMP_HDR_LEN; + gcmp_special_blocks(skb, pn, j_0, aad); + ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len, + skb_put(skb, IEEE80211_GCMP_MIC_LEN)); + + return 0; +} + +ieee80211_tx_result +ieee80211_crypto_gcmp_encrypt(struct ieee80211_tx_data *tx) +{ + struct sk_buff *skb; + + ieee80211_tx_set_protected(tx); + + skb_queue_walk(&tx->skbs, skb) { + if (gcmp_encrypt_skb(tx, skb) < 0) + return TX_DROP; + } + + return TX_CONTINUE; +} + +ieee80211_rx_result +ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + int hdrlen; + struct ieee80211_key *key = rx->key; + struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + u8 pn[IEEE80211_GCMP_PN_LEN]; + int data_len; + int queue; + + hdrlen = ieee80211_hdrlen(hdr->frame_control); + + if (!ieee80211_is_data(hdr->frame_control) && + !ieee80211_is_robust_mgmt_frame(skb)) + return RX_CONTINUE; + + data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - + IEEE80211_GCMP_MIC_LEN; + if (!rx->sta || data_len < 0) + return RX_DROP_UNUSABLE; + + if (status->flag & RX_FLAG_DECRYPTED) { + if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_GCMP_HDR_LEN)) + return RX_DROP_UNUSABLE; + } else { + if (skb_linearize(rx->skb)) + return RX_DROP_UNUSABLE; + } + + gcmp_hdr2pn(pn, skb->data + hdrlen); + + queue = rx->security_idx; + + if (memcmp(pn, key->u.gcmp.rx_pn[queue], IEEE80211_GCMP_PN_LEN) <= 0) { + key->u.gcmp.replays++; + return RX_DROP_UNUSABLE; + } + + if (!(status->flag & RX_FLAG_DECRYPTED)) { + u8 aad[2 * AES_BLOCK_SIZE]; + u8 j_0[AES_BLOCK_SIZE]; + /* hardware didn't decrypt/verify MIC */ + gcmp_special_blocks(skb, pn, j_0, aad); + + if (ieee80211_aes_gcm_decrypt( + key->u.gcmp.tfm, j_0, aad, + skb->data + hdrlen + IEEE80211_GCMP_HDR_LEN, + data_len, + skb->data + skb->len - IEEE80211_GCMP_MIC_LEN)) + return RX_DROP_UNUSABLE; + } + + memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN); + + /* Remove GCMP header and MIC */ + if (pskb_trim(skb, skb->len - IEEE80211_GCMP_MIC_LEN)) + return RX_DROP_UNUSABLE; + memmove(skb->data + IEEE80211_GCMP_HDR_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_GCMP_HDR_LEN); + + return RX_CONTINUE; +} + static ieee80211_tx_result ieee80211_crypto_cs_encrypt(struct ieee80211_tx_data *tx, struct sk_buff *skb) @@ -729,6 +956,48 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx) return TX_CONTINUE; } +ieee80211_tx_result +ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx) +{ + struct sk_buff *skb; + struct ieee80211_tx_info *info; + struct ieee80211_key *key = tx->key; + struct ieee80211_mmie_16 *mmie; + u8 aad[20]; + u64 pn64; + + if (WARN_ON(skb_queue_len(&tx->skbs) != 1)) + return TX_DROP; + + skb = skb_peek(&tx->skbs); + + info = IEEE80211_SKB_CB(skb); + + if (info->control.hw_key) + return TX_CONTINUE; + + if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie))) + return TX_DROP; + + mmie = (struct ieee80211_mmie_16 *)skb_put(skb, sizeof(*mmie)); + mmie->element_id = WLAN_EID_MMIE; + mmie->length = sizeof(*mmie) - 2; + mmie->key_id = cpu_to_le16(key->conf.keyidx); + + /* PN = PN + 1 */ + pn64 = atomic64_inc_return(&key->u.aes_cmac.tx_pn); + + bip_ipn_set64(mmie->sequence_number, pn64); + + bip_aad(skb, aad); + + /* MIC = AES-256-CMAC(IGTK, AAD || Management Frame Body || MMIE, 128) + */ + ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad, + skb->data + 24, skb->len - 24, mmie->mic); + + return TX_CONTINUE; +} ieee80211_rx_result ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx) @@ -780,6 +1049,160 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; } +ieee80211_rx_result +ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx) +{ + struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + struct ieee80211_key *key = rx->key; + struct ieee80211_mmie_16 *mmie; + u8 aad[20], mic[16], ipn[6]; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + + if (!ieee80211_is_mgmt(hdr->frame_control)) + return RX_CONTINUE; + + /* management frames are already linear */ + + if (skb->len < 24 + sizeof(*mmie)) + return RX_DROP_UNUSABLE; + + mmie = (struct ieee80211_mmie_16 *) + (skb->data + skb->len - sizeof(*mmie)); + if (mmie->element_id != WLAN_EID_MMIE || + mmie->length != sizeof(*mmie) - 2) + return RX_DROP_UNUSABLE; /* Invalid MMIE */ + + bip_ipn_swap(ipn, mmie->sequence_number); + + if (memcmp(ipn, key->u.aes_cmac.rx_pn, 6) <= 0) { + key->u.aes_cmac.replays++; + return RX_DROP_UNUSABLE; + } + + if (!(status->flag & RX_FLAG_DECRYPTED)) { + /* hardware didn't decrypt/verify MIC */ + bip_aad(skb, aad); + ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad, + skb->data + 24, skb->len - 24, mic); + if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) { + key->u.aes_cmac.icverrors++; + return RX_DROP_UNUSABLE; + } + } + + memcpy(key->u.aes_cmac.rx_pn, ipn, 6); + + /* Remove MMIE */ + skb_trim(skb, skb->len - sizeof(*mmie)); + + return RX_CONTINUE; +} + +ieee80211_tx_result +ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx) +{ + struct sk_buff *skb; + struct ieee80211_tx_info *info; + struct ieee80211_key *key = tx->key; + struct ieee80211_mmie_16 *mmie; + struct ieee80211_hdr *hdr; + u8 aad[20]; + u64 pn64; + u8 nonce[12]; + + if (WARN_ON(skb_queue_len(&tx->skbs) != 1)) + return TX_DROP; + + skb = skb_peek(&tx->skbs); + + info = IEEE80211_SKB_CB(skb); + + if (info->control.hw_key) + return TX_CONTINUE; + + if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie))) + return TX_DROP; + + mmie = (struct ieee80211_mmie_16 *)skb_put(skb, sizeof(*mmie)); + mmie->element_id = WLAN_EID_MMIE; + mmie->length = sizeof(*mmie) - 2; + mmie->key_id = cpu_to_le16(key->conf.keyidx); + + /* PN = PN + 1 */ + pn64 = atomic64_inc_return(&key->u.aes_gmac.tx_pn); + + bip_ipn_set64(mmie->sequence_number, pn64); + + bip_aad(skb, aad); + + hdr = (struct ieee80211_hdr *)skb->data; + memcpy(nonce, hdr->addr2, ETH_ALEN); + bip_ipn_swap(nonce + ETH_ALEN, mmie->sequence_number); + + /* MIC = AES-GMAC(IGTK, AAD || Management Frame Body || MMIE, 128) */ + if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce, + skb->data + 24, skb->len - 24, mmie->mic) < 0) + return TX_DROP; + + return TX_CONTINUE; +} + +ieee80211_rx_result +ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx) +{ + struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + struct ieee80211_key *key = rx->key; + struct ieee80211_mmie_16 *mmie; + u8 aad[20], mic[16], ipn[6], nonce[12]; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + + if (!ieee80211_is_mgmt(hdr->frame_control)) + return RX_CONTINUE; + + /* management frames are already linear */ + + if (skb->len < 24 + sizeof(*mmie)) + return RX_DROP_UNUSABLE; + + mmie = (struct ieee80211_mmie_16 *) + (skb->data + skb->len - sizeof(*mmie)); + if (mmie->element_id != WLAN_EID_MMIE || + mmie->length != sizeof(*mmie) - 2) + return RX_DROP_UNUSABLE; /* Invalid MMIE */ + + bip_ipn_swap(ipn, mmie->sequence_number); + + if (memcmp(ipn, key->u.aes_gmac.rx_pn, 6) <= 0) { + key->u.aes_gmac.replays++; + return RX_DROP_UNUSABLE; + } + + if (!(status->flag & RX_FLAG_DECRYPTED)) { + /* hardware didn't decrypt/verify MIC */ + bip_aad(skb, aad); + + memcpy(nonce, hdr->addr2, ETH_ALEN); + memcpy(nonce + ETH_ALEN, ipn, 6); + + if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce, + skb->data + 24, skb->len - 24, + mic) < 0 || + memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) { + key->u.aes_gmac.icverrors++; + return RX_DROP_UNUSABLE; + } + } + + memcpy(key->u.aes_gmac.rx_pn, ipn, 6); + + /* Remove MMIE */ + skb_trim(skb, skb->len - sizeof(*mmie)); + + return RX_CONTINUE; +} + ieee80211_tx_result ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx) { diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h index 62e5a12dfe0a..d98011ee8f55 100644 --- a/net/mac80211/wpa.h +++ b/net/mac80211/wpa.h @@ -24,17 +24,32 @@ ieee80211_rx_result ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx); ieee80211_tx_result -ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx); +ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx, + unsigned int mic_len); ieee80211_rx_result -ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx); +ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, + unsigned int mic_len); ieee80211_tx_result ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx); +ieee80211_tx_result +ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx); ieee80211_rx_result ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx); +ieee80211_rx_result +ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx); +ieee80211_tx_result +ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx); +ieee80211_rx_result +ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx); ieee80211_tx_result ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx); ieee80211_rx_result ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx); +ieee80211_tx_result +ieee80211_crypto_gcmp_encrypt(struct ieee80211_tx_data *tx); +ieee80211_rx_result +ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx); + #endif /* WPA_H */ diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c index c035708ada16..5d9f68c75e5f 100644 --- a/net/mac802154/cfg.c +++ b/net/mac802154/cfg.c @@ -51,10 +51,7 @@ ieee802154_add_iface(struct wpan_phy *phy, const char *name, struct net_device *err; err = ieee802154_if_add(local, name, type, extended_addr); - if (IS_ERR(err)) - return PTR_ERR(err); - - return 0; + return PTR_ERR_OR_ZERO(err); } static int @@ -87,6 +84,26 @@ ieee802154_set_channel(struct wpan_phy *wpan_phy, u8 page, u8 channel) } static int +ieee802154_set_cca_mode(struct wpan_phy *wpan_phy, + const struct wpan_phy_cca *cca) +{ + struct ieee802154_local *local = wpan_phy_priv(wpan_phy); + int ret; + + ASSERT_RTNL(); + + /* check if phy support this setting */ + if (!(local->hw.flags & IEEE802154_HW_CCA_MODE)) + return -EOPNOTSUPP; + + ret = drv_set_cca_mode(local, cca); + if (!ret) + wpan_phy->cca = *cca; + + return ret; +} + +static int ieee802154_set_pan_id(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 pan_id) { @@ -201,6 +218,7 @@ const struct cfg802154_ops mac802154_config_ops = { .add_virtual_intf = ieee802154_add_iface, .del_virtual_intf = ieee802154_del_iface, .set_channel = ieee802154_set_channel, + .set_cca_mode = ieee802154_set_cca_mode, .set_pan_id = ieee802154_set_pan_id, .set_short_addr = ieee802154_set_short_addr, .set_backoff_exponent = ieee802154_set_backoff_exponent, diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h index f21e864613d0..98180a9fff4a 100644 --- a/net/mac802154/driver-ops.h +++ b/net/mac802154/driver-ops.h @@ -70,7 +70,8 @@ static inline int drv_set_tx_power(struct ieee802154_local *local, s8 dbm) return local->ops->set_txpower(&local->hw, dbm); } -static inline int drv_set_cca_mode(struct ieee802154_local *local, u8 cca_mode) +static inline int drv_set_cca_mode(struct ieee802154_local *local, + const struct wpan_phy_cca *cca) { might_sleep(); @@ -79,7 +80,7 @@ static inline int drv_set_cca_mode(struct ieee802154_local *local, u8 cca_mode) return -EOPNOTSUPP; } - return local->ops->set_cca_mode(&local->hw, cca_mode); + return local->ops->set_cca_mode(&local->hw, cca); } static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode) diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 9ae893057dd7..6fb6bdf9868c 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -137,25 +137,11 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) static int mac802154_slave_open(struct net_device *dev) { struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); - struct ieee802154_sub_if_data *subif; struct ieee802154_local *local = sdata->local; int res = 0; ASSERT_RTNL(); - if (sdata->vif.type == NL802154_IFTYPE_NODE) { - mutex_lock(&sdata->local->iflist_mtx); - list_for_each_entry(subif, &sdata->local->interfaces, list) { - if (subif != sdata && - subif->vif.type == sdata->vif.type && - ieee802154_sdata_running(subif)) { - mutex_unlock(&sdata->local->iflist_mtx); - return -EBUSY; - } - } - mutex_unlock(&sdata->local->iflist_mtx); - } - set_bit(SDATA_STATE_RUNNING, &sdata->state); if (!local->open_count) { @@ -175,6 +161,88 @@ err: return res; } +static int +ieee802154_check_mac_settings(struct ieee802154_local *local, + struct wpan_dev *wpan_dev, + struct wpan_dev *nwpan_dev) +{ + ASSERT_RTNL(); + + if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) { + if (wpan_dev->promiscuous_mode != nwpan_dev->promiscuous_mode) + return -EBUSY; + } + + if (local->hw.flags & IEEE802154_HW_AFILT) { + if (wpan_dev->pan_id != nwpan_dev->pan_id) + return -EBUSY; + + if (wpan_dev->short_addr != nwpan_dev->short_addr) + return -EBUSY; + + if (wpan_dev->extended_addr != nwpan_dev->extended_addr) + return -EBUSY; + } + + if (local->hw.flags & IEEE802154_HW_CSMA_PARAMS) { + if (wpan_dev->min_be != nwpan_dev->min_be) + return -EBUSY; + + if (wpan_dev->max_be != nwpan_dev->max_be) + return -EBUSY; + + if (wpan_dev->csma_retries != nwpan_dev->csma_retries) + return -EBUSY; + } + + if (local->hw.flags & IEEE802154_HW_FRAME_RETRIES) { + if (wpan_dev->frame_retries != nwpan_dev->frame_retries) + return -EBUSY; + } + + if (local->hw.flags & IEEE802154_HW_LBT) { + if (wpan_dev->lbt != nwpan_dev->lbt) + return -EBUSY; + } + + return 0; +} + +static int +ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata, + enum nl802154_iftype iftype) +{ + struct ieee802154_local *local = sdata->local; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct ieee802154_sub_if_data *nsdata; + + /* we hold the RTNL here so can safely walk the list */ + list_for_each_entry(nsdata, &local->interfaces, list) { + if (nsdata != sdata && ieee802154_sdata_running(nsdata)) { + int ret; + + /* TODO currently we don't support multiple node types + * we need to run skb_clone at rx path. Check if there + * exist really an use case if we need to support + * multiple node types at the same time. + */ + if (sdata->vif.type == NL802154_IFTYPE_NODE && + nsdata->vif.type == NL802154_IFTYPE_NODE) + return -EBUSY; + + /* check all phy mac sublayer settings are the same. + * We have only one phy, different values makes trouble. + */ + ret = ieee802154_check_mac_settings(local, wpan_dev, + &nsdata->wpan_dev); + if (ret < 0) + return ret; + } + } + + return 0; +} + static int mac802154_wpan_open(struct net_device *dev) { int rc; @@ -183,6 +251,10 @@ static int mac802154_wpan_open(struct net_device *dev) struct wpan_dev *wpan_dev = &sdata->wpan_dev; struct wpan_phy *phy = sdata->local->phy; + rc = ieee802154_check_concurrent_iface(sdata, sdata->vif.type); + if (rc < 0) + return rc; + rc = mac802154_slave_open(dev); if (rc < 0) return rc; diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index 6aacb1816889..bdccb4ecd30f 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -81,7 +81,7 @@ static int mac802154_set_mac_params(struct net_device *dev, /* PHY */ wpan_dev->wpan_phy->transmit_power = params->transmit_power; - wpan_dev->wpan_phy->cca_mode = params->cca_mode; + wpan_dev->wpan_phy->cca = params->cca; wpan_dev->wpan_phy->cca_ed_level = params->cca_ed_level; /* MAC */ @@ -98,7 +98,7 @@ static int mac802154_set_mac_params(struct net_device *dev, } if (local->hw.flags & IEEE802154_HW_CCA_MODE) { - ret = drv_set_cca_mode(local, params->cca_mode); + ret = drv_set_cca_mode(local, ¶ms->cca); if (ret < 0) return ret; } @@ -122,7 +122,7 @@ static void mac802154_get_mac_params(struct net_device *dev, /* PHY */ params->transmit_power = wpan_dev->wpan_phy->transmit_power; - params->cca_mode = wpan_dev->wpan_phy->cca_mode; + params->cca = wpan_dev->wpan_phy->cca; params->cca_ed_level = wpan_dev->wpan_phy->cca_ed_level; /* MAC */ diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 349295d21946..809df534a720 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -60,14 +60,14 @@ out: return segs; } -static struct packet_offload mpls_mc_offload = { +static struct packet_offload mpls_mc_offload __read_mostly = { .type = cpu_to_be16(ETH_P_MPLS_MC), .callbacks = { .gso_segment = mpls_gso_segment, }, }; -static struct packet_offload mpls_uc_offload = { +static struct packet_offload mpls_uc_offload __read_mostly = { .type = cpu_to_be16(ETH_P_MPLS_UC), .callbacks = { .gso_segment = mpls_gso_segment, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index b8295a430a56..e55759056361 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2887,7 +2887,8 @@ static int ip_vs_genl_dump_service(struct sk_buff *skb, if (ip_vs_genl_fill_service(skb, svc) < 0) goto nla_put_failure; - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -3079,7 +3080,8 @@ static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, if (ip_vs_genl_fill_dest(skb, dest) < 0) goto nla_put_failure; - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -3215,7 +3217,8 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) goto nla_put_failure; - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 46d1b26a468e..13fad8668f83 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1424,12 +1424,6 @@ void nf_ct_free_hashtable(void *hash, unsigned int size) } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush_report(struct net *net, u32 portid, int report) -{ - nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report); -} -EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); - static int untrack_refs(void) { int cnt = 0, cpu; @@ -1622,13 +1616,18 @@ int nf_conntrack_init_start(void) for (i = 0; i < CONNTRACK_LOCKS; i++) spin_lock_init(&nf_conntrack_locks[i]); - /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB - * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ if (!nf_conntrack_htable_size) { + /* Idea from tcp.c: use 1/16384 of memory. + * On i386: 32MB machine has 512 buckets. + * >= 1GB machines have 16384 buckets. + * >= 4GB machines have 65536 buckets. + */ nf_conntrack_htable_size = (((totalram_pages << PAGE_SHIFT) / 16384) / sizeof(struct hlist_head)); - if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) + if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE))) + nf_conntrack_htable_size = 65536; + else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) nf_conntrack_htable_size = 16384; if (nf_conntrack_htable_size < 32) nf_conntrack_htable_size = 32; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1bd9ed9e62f6..d1c23940a86a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -749,13 +749,47 @@ static int ctnetlink_done(struct netlink_callback *cb) return 0; } -struct ctnetlink_dump_filter { +struct ctnetlink_filter { struct { u_int32_t val; u_int32_t mask; } mark; }; +static struct ctnetlink_filter * +ctnetlink_alloc_filter(const struct nlattr * const cda[]) +{ +#ifdef CONFIG_NF_CONNTRACK_MARK + struct ctnetlink_filter *filter; + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (filter == NULL) + return ERR_PTR(-ENOMEM); + + filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK])); + filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK])); + + return filter; +#else + return ERR_PTR(-EOPNOTSUPP); +#endif +} + +static int ctnetlink_filter_match(struct nf_conn *ct, void *data) +{ + struct ctnetlink_filter *filter = data; + + if (filter == NULL) + return 1; + +#ifdef CONFIG_NF_CONNTRACK_MARK + if ((ct->mark & filter->mark.mask) == filter->mark.val) + return 1; +#endif + + return 0; +} + static int ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { @@ -768,10 +802,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) int res; spinlock_t *lockp; -#ifdef CONFIG_NF_CONNTRACK_MARK - const struct ctnetlink_dump_filter *filter = cb->data; -#endif - last = (struct nf_conn *)cb->args[1]; local_bh_disable(); @@ -798,12 +828,9 @@ restart: continue; cb->args[1] = 0; } -#ifdef CONFIG_NF_CONNTRACK_MARK - if (filter && !((ct->mark & filter->mark.mask) == - filter->mark.val)) { + if (!ctnetlink_filter_match(ct, cb->data)) continue; - } -#endif + rcu_read_lock(); res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, @@ -1001,6 +1028,25 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { .len = NF_CT_LABELS_MAX_SIZE }, }; +static int ctnetlink_flush_conntrack(struct net *net, + const struct nlattr * const cda[], + u32 portid, int report) +{ + struct ctnetlink_filter *filter = NULL; + + if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) { + filter = ctnetlink_alloc_filter(cda); + if (IS_ERR(filter)) + return PTR_ERR(filter); + } + + nf_ct_iterate_cleanup(net, ctnetlink_filter_match, filter, + portid, report); + kfree(filter); + + return 0; +} + static int ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, @@ -1024,11 +1070,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, else if (cda[CTA_TUPLE_REPLY]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { - /* Flush the whole table */ - nf_conntrack_flush_report(net, - NETLINK_CB(skb).portid, - nlmsg_report(nlh)); - return 0; + return ctnetlink_flush_conntrack(net, cda, + NETLINK_CB(skb).portid, + nlmsg_report(nlh)); } if (err < 0) @@ -1076,21 +1120,16 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, .dump = ctnetlink_dump_table, .done = ctnetlink_done, }; -#ifdef CONFIG_NF_CONNTRACK_MARK + if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) { - struct ctnetlink_dump_filter *filter; + struct ctnetlink_filter *filter; - filter = kzalloc(sizeof(struct ctnetlink_dump_filter), - GFP_ATOMIC); - if (filter == NULL) - return -ENOMEM; + filter = ctnetlink_alloc_filter(cda); + if (IS_ERR(filter)) + return PTR_ERR(filter); - filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK])); - filter->mark.mask = - ntohl(nla_get_be32(cda[CTA_MARK_MASK])); c.data = filter; } -#endif return netlink_dump_start(ctnl, skb, nlh, &c); } diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index f6e2ae91a80b..ce3e840c8704 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -98,9 +98,9 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb, new_end_seq = htonl(ntohl(sack->end_seq) - seq->offset_before); - pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", - ntohl(sack->start_seq), new_start_seq, - ntohl(sack->end_seq), new_end_seq); + pr_debug("sack_adjust: start_seq: %u->%u, end_seq: %u->%u\n", + ntohl(sack->start_seq), ntohl(new_start_seq), + ntohl(sack->end_seq), ntohl(new_end_seq)); inet_proto_csum_replace4(&tcph->check, skb, sack->start_seq, new_start_seq, 0); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 43c926cae9c0..0d8448f19dfe 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -425,8 +425,7 @@ static int netfilter_log_sysctl_init(struct net *net) nf_log_sysctl_table[i].procname = nf_log_sysctl_fnames[i]; nf_log_sysctl_table[i].data = NULL; - nf_log_sysctl_table[i].maxlen = - NFLOGGER_NAME_LEN * sizeof(char); + nf_log_sysctl_table[i].maxlen = NFLOGGER_NAME_LEN; nf_log_sysctl_table[i].mode = 0644; nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1ff04bcd4871..199fd0f27b0e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -427,7 +427,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use))) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -971,7 +972,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use))) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -1713,7 +1715,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule))) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -2367,7 +2370,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; nla_nest_end(skb, desc); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -3041,7 +3045,8 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, nla_nest_end(skb, nest); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -3330,7 +3335,8 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq))) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index c421d94c4652..8b117c90ecd7 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -272,7 +272,7 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, u_int16_t subsys_id) { - struct sk_buff *nskb, *oskb = skb; + struct sk_buff *oskb = skb; struct net *net = sock_net(skb->sk); const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; @@ -283,12 +283,11 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, if (subsys_id >= NFNL_SUBSYS_COUNT) return netlink_ack(skb, nlh, -EINVAL); replay: - nskb = netlink_skb_clone(oskb, GFP_KERNEL); - if (!nskb) + skb = netlink_skb_clone(oskb, GFP_KERNEL); + if (!skb) return netlink_ack(oskb, nlh, -ENOMEM); - nskb->sk = oskb->sk; - skb = nskb; + skb->sk = oskb->sk; nfnl_lock(subsys_id); ss = rcu_dereference_protected(table[subsys_id].subsys, @@ -305,7 +304,7 @@ replay: { nfnl_unlock(subsys_id); netlink_ack(skb, nlh, -EOPNOTSUPP); - return kfree_skb(nskb); + return kfree_skb(skb); } } @@ -386,7 +385,7 @@ replay: nfnl_err_reset(&err_list); ss->abort(oskb); nfnl_unlock(subsys_id); - kfree_skb(nskb); + kfree_skb(skb); goto replay; } } @@ -427,7 +426,7 @@ done: nfnl_err_deliver(&err_list, oskb); nfnl_unlock(subsys_id); - kfree_skb(nskb); + kfree_skb(skb); } static void nfnetlink_rcv(struct sk_buff *skb) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 9e287cb56a04..a5599fc51a6f 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -86,7 +86,7 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, static int nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) { - const struct nf_conn_help *help = nfct_help(ct); + struct nf_conn_help *help = nfct_help(ct); if (attr == NULL) return -EINVAL; @@ -94,7 +94,7 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) if (help->helper->data_len == 0) return -EINVAL; - memcpy(&help->data, nla_data(attr), help->helper->data_len); + memcpy(help->data, nla_data(attr), help->helper->data_len); return 0; } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 1e316ce4cb5d..61e6c407476a 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -33,7 +33,7 @@ static bool nft_hash_lookup(const struct nft_set *set, const struct nft_data *key, struct nft_data *data) { - const struct rhashtable *priv = nft_set_priv(set); + struct rhashtable *priv = nft_set_priv(set); const struct nft_hash_elem *he; he = rhashtable_lookup(priv, key); @@ -83,69 +83,97 @@ static void nft_hash_remove(const struct nft_set *set, const struct nft_set_elem *elem) { struct rhashtable *priv = nft_set_priv(set); - struct rhash_head *he, __rcu **pprev; - pprev = elem->cookie; - he = rht_dereference((*pprev), priv); + rhashtable_remove(priv, elem->cookie); + synchronize_rcu(); + kfree(elem->cookie); +} - rhashtable_remove_pprev(priv, he, pprev); +struct nft_compare_arg { + const struct nft_set *set; + struct nft_set_elem *elem; +}; - synchronize_rcu(); - kfree(he); +static bool nft_hash_compare(void *ptr, void *arg) +{ + struct nft_hash_elem *he = ptr; + struct nft_compare_arg *x = arg; + + if (!nft_data_cmp(&he->key, &x->elem->key, x->set->klen)) { + x->elem->cookie = he; + x->elem->flags = 0; + if (x->set->flags & NFT_SET_MAP) + nft_data_copy(&x->elem->data, he->data); + + return true; + } + + return false; } static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) { - const struct rhashtable *priv = nft_set_priv(set); - const struct bucket_table *tbl = rht_dereference_rcu(priv->tbl, priv); - struct rhash_head __rcu * const *pprev; - struct nft_hash_elem *he; - u32 h; - - h = rhashtable_hashfn(priv, &elem->key, set->klen); - pprev = &tbl->buckets[h]; - rht_for_each_entry_rcu(he, tbl->buckets[h], node) { - if (nft_data_cmp(&he->key, &elem->key, set->klen)) { - pprev = &he->node.next; - continue; - } + struct rhashtable *priv = nft_set_priv(set); + struct nft_compare_arg arg = { + .set = set, + .elem = elem, + }; - elem->cookie = (void *)pprev; - elem->flags = 0; - if (set->flags & NFT_SET_MAP) - nft_data_copy(&elem->data, he->data); + if (rhashtable_lookup_compare(priv, &elem->key, + &nft_hash_compare, &arg)) return 0; - } + return -ENOENT; } static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_iter *iter) { - const struct rhashtable *priv = nft_set_priv(set); - const struct bucket_table *tbl; + struct rhashtable *priv = nft_set_priv(set); const struct nft_hash_elem *he; + struct rhashtable_iter hti; struct nft_set_elem elem; - unsigned int i; + int err; - tbl = rht_dereference_rcu(priv->tbl, priv); - for (i = 0; i < tbl->size; i++) { - rht_for_each_entry_rcu(he, tbl->buckets[i], node) { - if (iter->count < iter->skip) - goto cont; - - memcpy(&elem.key, &he->key, sizeof(elem.key)); - if (set->flags & NFT_SET_MAP) - memcpy(&elem.data, he->data, sizeof(elem.data)); - elem.flags = 0; - - iter->err = iter->fn(ctx, set, iter, &elem); - if (iter->err < 0) - return; -cont: - iter->count++; + err = rhashtable_walk_init(priv, &hti); + iter->err = err; + if (err) + return; + + err = rhashtable_walk_start(&hti); + if (err && err != -EAGAIN) { + iter->err = err; + goto out; + } + + while ((he = rhashtable_walk_next(&hti))) { + if (IS_ERR(he)) { + err = PTR_ERR(he); + if (err != -EAGAIN) { + iter->err = err; + goto out; + } } + + if (iter->count < iter->skip) + goto cont; + + memcpy(&elem.key, &he->key, sizeof(elem.key)); + if (set->flags & NFT_SET_MAP) + memcpy(&elem.data, he->data, sizeof(elem.data)); + elem.flags = 0; + + iter->err = iter->fn(ctx, set, iter, &elem); + if (iter->err < 0) + goto out; + +cont: + iter->count++; } + +out: + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); } static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) @@ -153,13 +181,6 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) return sizeof(struct rhashtable); } -#ifdef CONFIG_PROVE_LOCKING -static int lockdep_nfnl_lock_is_held(void *parent) -{ - return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES); -} -#endif - static int nft_hash_init(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const tb[]) @@ -173,9 +194,6 @@ static int nft_hash_init(const struct nft_set *set, .hashfn = jhash, .grow_decision = rht_grow_above_75, .shrink_decision = rht_shrink_below_30, -#ifdef CONFIG_PROVE_LOCKING - .mutex_is_held = lockdep_nfnl_lock_is_held, -#endif }; return rhashtable_init(priv, ¶ms); @@ -183,18 +201,23 @@ static int nft_hash_init(const struct nft_set *set, static void nft_hash_destroy(const struct nft_set *set) { - const struct rhashtable *priv = nft_set_priv(set); - const struct bucket_table *tbl = priv->tbl; - struct nft_hash_elem *he, *next; + struct rhashtable *priv = nft_set_priv(set); + const struct bucket_table *tbl; + struct nft_hash_elem *he; + struct rhash_head *pos, *next; unsigned int i; + /* Stop an eventual async resizing */ + priv->being_destroyed = true; + mutex_lock(&priv->mutex); + + tbl = rht_dereference(priv->tbl, priv); for (i = 0; i < tbl->size; i++) { - for (he = rht_entry(tbl->buckets[i], struct nft_hash_elem, node); - he != NULL; he = next) { - next = rht_entry(he->node.next, struct nft_hash_elem, node); + rht_for_each_entry_safe(he, pos, next, tbl, i, node) nft_hash_elem_destroy(set, he); - } } + mutex_unlock(&priv->mutex); + rhashtable_destroy(priv); } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index c529161cdbf8..0778855ea5e7 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -225,6 +225,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) rcu_read_lock(); list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) { + int foptsize, optnum; + f = &kf->finger; if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre)) @@ -233,110 +235,109 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) optp = _optp; fmatch = FMATCH_WRONG; - if (totlen == f->ss && xt_osf_ttl(skb, info, f->ttl)) { - int foptsize, optnum; + if (totlen != f->ss || !xt_osf_ttl(skb, info, f->ttl)) + continue; - /* - * Should not happen if userspace parser was written correctly. - */ - if (f->wss.wc >= OSF_WSS_MAX) - continue; + /* + * Should not happen if userspace parser was written correctly. + */ + if (f->wss.wc >= OSF_WSS_MAX) + continue; - /* Check options */ + /* Check options */ - foptsize = 0; - for (optnum = 0; optnum < f->opt_num; ++optnum) - foptsize += f->opt[optnum].length; + foptsize = 0; + for (optnum = 0; optnum < f->opt_num; ++optnum) + foptsize += f->opt[optnum].length; - if (foptsize > MAX_IPOPTLEN || - optsize > MAX_IPOPTLEN || - optsize != foptsize) - continue; + if (foptsize > MAX_IPOPTLEN || + optsize > MAX_IPOPTLEN || + optsize != foptsize) + continue; - check_WSS = f->wss.wc; + check_WSS = f->wss.wc; - for (optnum = 0; optnum < f->opt_num; ++optnum) { - if (f->opt[optnum].kind == (*optp)) { - __u32 len = f->opt[optnum].length; - const __u8 *optend = optp + len; - int loop_cont = 0; + for (optnum = 0; optnum < f->opt_num; ++optnum) { + if (f->opt[optnum].kind == (*optp)) { + __u32 len = f->opt[optnum].length; + const __u8 *optend = optp + len; + int loop_cont = 0; - fmatch = FMATCH_OK; + fmatch = FMATCH_OK; - switch (*optp) { - case OSFOPT_MSS: - mss = optp[3]; - mss <<= 8; - mss |= optp[2]; + switch (*optp) { + case OSFOPT_MSS: + mss = optp[3]; + mss <<= 8; + mss |= optp[2]; - mss = ntohs((__force __be16)mss); - break; - case OSFOPT_TS: - loop_cont = 1; - break; - } + mss = ntohs((__force __be16)mss); + break; + case OSFOPT_TS: + loop_cont = 1; + break; + } - optp = optend; - } else - fmatch = FMATCH_OPT_WRONG; + optp = optend; + } else + fmatch = FMATCH_OPT_WRONG; - if (fmatch != FMATCH_OK) - break; - } + if (fmatch != FMATCH_OK) + break; + } - if (fmatch != FMATCH_OPT_WRONG) { - fmatch = FMATCH_WRONG; + if (fmatch != FMATCH_OPT_WRONG) { + fmatch = FMATCH_WRONG; - switch (check_WSS) { - case OSF_WSS_PLAIN: - if (f->wss.val == 0 || window == f->wss.val) - fmatch = FMATCH_OK; - break; - case OSF_WSS_MSS: - /* - * Some smart modems decrease mangle MSS to - * SMART_MSS_2, so we check standard, decreased - * and the one provided in the fingerprint MSS - * values. - */ + switch (check_WSS) { + case OSF_WSS_PLAIN: + if (f->wss.val == 0 || window == f->wss.val) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MSS: + /* + * Some smart modems decrease mangle MSS to + * SMART_MSS_2, so we check standard, decreased + * and the one provided in the fingerprint MSS + * values. + */ #define SMART_MSS_1 1460 #define SMART_MSS_2 1448 - if (window == f->wss.val * mss || - window == f->wss.val * SMART_MSS_1 || - window == f->wss.val * SMART_MSS_2) - fmatch = FMATCH_OK; - break; - case OSF_WSS_MTU: - if (window == f->wss.val * (mss + 40) || - window == f->wss.val * (SMART_MSS_1 + 40) || - window == f->wss.val * (SMART_MSS_2 + 40)) - fmatch = FMATCH_OK; - break; - case OSF_WSS_MODULO: - if ((window % f->wss.val) == 0) - fmatch = FMATCH_OK; - break; - } + if (window == f->wss.val * mss || + window == f->wss.val * SMART_MSS_1 || + window == f->wss.val * SMART_MSS_2) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MTU: + if (window == f->wss.val * (mss + 40) || + window == f->wss.val * (SMART_MSS_1 + 40) || + window == f->wss.val * (SMART_MSS_2 + 40)) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MODULO: + if ((window % f->wss.val) == 0) + fmatch = FMATCH_OK; + break; } + } - if (fmatch != FMATCH_OK) - continue; + if (fmatch != FMATCH_OK) + continue; - fcount++; + fcount++; - if (info->flags & XT_OSF_LOG) - nf_log_packet(net, p->family, p->hooknum, skb, - p->in, p->out, NULL, - "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", - f->genre, f->version, f->subtype, - &ip->saddr, ntohs(tcp->source), - &ip->daddr, ntohs(tcp->dest), - f->ttl - ip->ttl); + if (info->flags & XT_OSF_LOG) + nf_log_packet(net, p->family, p->hooknum, skb, + p->in, p->out, NULL, + "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", + f->genre, f->version, f->subtype, + &ip->saddr, ntohs(tcp->source), + &ip->daddr, ntohs(tcp->dest), + f->ttl - ip->ttl); - if ((info->flags & XT_OSF_LOG) && - info->loglevel == XT_OSF_LOGLEVEL_FIRST) - break; - } + if ((info->flags & XT_OSF_LOG) && + info->loglevel == XT_OSF_LOGLEVEL_FIRST) + break; } rcu_read_unlock(); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index c2f2a53a4879..7fd1104ba900 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -324,8 +324,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, return 0; add_std_failure: - if (doi_def) - cipso_v4_doi_free(doi_def); + cipso_v4_doi_free(doi_def); return ret_val; } @@ -641,7 +640,8 @@ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg) if (ret_val != 0) goto listall_cb_failure; - return genlmsg_end(cb_arg->skb, data); + genlmsg_end(cb_arg->skb, data); + return 0; listall_cb_failure: genlmsg_cancel(cb_arg->skb, data); diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index e66e977ef2fa..70440748fe5c 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -93,23 +93,20 @@ static int netlbl_mgmt_add_common(struct genl_info *info, struct netlbl_audit *audit_info) { int ret_val = -EINVAL; - struct netlbl_dom_map *entry = NULL; struct netlbl_domaddr_map *addrmap = NULL; struct cipso_v4_doi *cipsov4 = NULL; u32 tmp_val; + struct netlbl_dom_map *entry = kzalloc(sizeof(*entry), GFP_KERNEL); - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (entry == NULL) { - ret_val = -ENOMEM; - goto add_failure; - } + if (!entry) + return -ENOMEM; entry->def.type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); if (info->attrs[NLBL_MGMT_A_DOMAIN]) { size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); entry->domain = kmalloc(tmp_size, GFP_KERNEL); if (entry->domain == NULL) { ret_val = -ENOMEM; - goto add_failure; + goto add_free_entry; } nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); @@ -125,16 +122,16 @@ static int netlbl_mgmt_add_common(struct genl_info *info, break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) - goto add_failure; + goto add_free_domain; tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); cipsov4 = cipso_v4_doi_getdef(tmp_val); if (cipsov4 == NULL) - goto add_failure; + goto add_free_domain; entry->def.cipso = cipsov4; break; default: - goto add_failure; + goto add_free_domain; } if (info->attrs[NLBL_MGMT_A_IPV4ADDR]) { @@ -145,7 +142,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); if (addrmap == NULL) { ret_val = -ENOMEM; - goto add_failure; + goto add_doi_put_def; } INIT_LIST_HEAD(&addrmap->list4); INIT_LIST_HEAD(&addrmap->list6); @@ -153,12 +150,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info, if (nla_len(info->attrs[NLBL_MGMT_A_IPV4ADDR]) != sizeof(struct in_addr)) { ret_val = -EINVAL; - goto add_failure; + goto add_free_addrmap; } if (nla_len(info->attrs[NLBL_MGMT_A_IPV4MASK]) != sizeof(struct in_addr)) { ret_val = -EINVAL; - goto add_failure; + goto add_free_addrmap; } addr = nla_data(info->attrs[NLBL_MGMT_A_IPV4ADDR]); mask = nla_data(info->attrs[NLBL_MGMT_A_IPV4MASK]); @@ -166,7 +163,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, map = kzalloc(sizeof(*map), GFP_KERNEL); if (map == NULL) { ret_val = -ENOMEM; - goto add_failure; + goto add_free_addrmap; } map->list.addr = addr->s_addr & mask->s_addr; map->list.mask = mask->s_addr; @@ -178,7 +175,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, ret_val = netlbl_af4list_add(&map->list, &addrmap->list4); if (ret_val != 0) { kfree(map); - goto add_failure; + goto add_free_addrmap; } entry->def.type = NETLBL_NLTYPE_ADDRSELECT; @@ -192,7 +189,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); if (addrmap == NULL) { ret_val = -ENOMEM; - goto add_failure; + goto add_doi_put_def; } INIT_LIST_HEAD(&addrmap->list4); INIT_LIST_HEAD(&addrmap->list6); @@ -200,12 +197,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info, if (nla_len(info->attrs[NLBL_MGMT_A_IPV6ADDR]) != sizeof(struct in6_addr)) { ret_val = -EINVAL; - goto add_failure; + goto add_free_addrmap; } if (nla_len(info->attrs[NLBL_MGMT_A_IPV6MASK]) != sizeof(struct in6_addr)) { ret_val = -EINVAL; - goto add_failure; + goto add_free_addrmap; } addr = nla_data(info->attrs[NLBL_MGMT_A_IPV6ADDR]); mask = nla_data(info->attrs[NLBL_MGMT_A_IPV6MASK]); @@ -213,7 +210,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, map = kzalloc(sizeof(*map), GFP_KERNEL); if (map == NULL) { ret_val = -ENOMEM; - goto add_failure; + goto add_free_addrmap; } map->list.addr = *addr; map->list.addr.s6_addr32[0] &= mask->s6_addr32[0]; @@ -227,7 +224,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, ret_val = netlbl_af6list_add(&map->list, &addrmap->list6); if (ret_val != 0) { kfree(map); - goto add_failure; + goto add_free_addrmap; } entry->def.type = NETLBL_NLTYPE_ADDRSELECT; @@ -237,16 +234,17 @@ static int netlbl_mgmt_add_common(struct genl_info *info, ret_val = netlbl_domhsh_add(entry, audit_info); if (ret_val != 0) - goto add_failure; + goto add_free_addrmap; return 0; -add_failure: - if (cipsov4) - cipso_v4_doi_putdef(cipsov4); - if (entry) - kfree(entry->domain); +add_free_addrmap: kfree(addrmap); +add_doi_put_def: + cipso_v4_doi_putdef(cipsov4); +add_free_domain: + kfree(entry->domain); +add_free_entry: kfree(entry); return ret_val; } @@ -456,7 +454,8 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) goto listall_cb_failure; cb_arg->seq++; - return genlmsg_end(cb_arg->skb, data); + genlmsg_end(cb_arg->skb, data); + return 0; listall_cb_failure: genlmsg_cancel(cb_arg->skb, data); @@ -620,7 +619,8 @@ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb, if (ret_val != 0) goto protocols_cb_failure; - return genlmsg_end(skb, data); + genlmsg_end(skb, data); + return 0; protocols_cb_failure: genlmsg_cancel(skb, data); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 78a63c18779e..aec7994f78cf 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1163,7 +1163,8 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, goto list_cb_failure; cb_arg->seq++; - return genlmsg_end(cb_arg->skb, data); + genlmsg_end(cb_arg->skb, data); + return 0; list_cb_failure: genlmsg_cancel(cb_arg->skb, data); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 75532efa51cd..2702673f0f23 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -98,12 +98,12 @@ static int netlink_dump(struct sock *sk); static void netlink_skb_destructor(struct sk_buff *skb); /* nl_table locking explained: - * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock - * combined with an RCU read-side lock. Insertion and removal are protected - * with nl_sk_hash_lock while using RCU list modification primitives and may - * run in parallel to nl_table_lock protected lookups. Destruction of the - * Netlink socket may only occur *after* nl_table_lock has been acquired - * either during or after the socket has been removed from the list. + * Lookup and traversal are protected with an RCU read-side lock. Insertion + * and removal are protected with per bucket lock while using RCU list + * modification primitives and may run in parallel to RCU protected lookups. + * Destruction of the Netlink socket may only occur *after* nl_table_lock has + * been acquired * either during or after the socket has been removed from + * the list and after an RCU grace period. */ DEFINE_RWLOCK(nl_table_lock); EXPORT_SYMBOL_GPL(nl_table_lock); @@ -111,19 +111,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); -/* Protects netlink socket hash table mutations */ -DEFINE_MUTEX(nl_sk_hash_lock); -EXPORT_SYMBOL_GPL(nl_sk_hash_lock); - -#ifdef CONFIG_PROVE_LOCKING -static int lockdep_nl_sk_hash_is_held(void *parent) -{ - if (debug_locks) - return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock); - return 1; -} -#endif - static ATOMIC_NOTIFIER_HEAD(netlink_chain); static DEFINE_SPINLOCK(netlink_tap_lock); @@ -708,7 +695,7 @@ static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, u32 dst_portid, u32 dst_group, - struct sock_iocb *siocb) + struct scm_cookie *scm) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_ring *ring; @@ -754,7 +741,7 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).creds = siocb->scm->creds; + NETLINK_CB(skb).creds = scm->creds; err = security_netlink_send(sk, skb); if (err) { @@ -833,7 +820,7 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) #define netlink_tx_is_mmaped(sk) false #define netlink_mmap sock_no_mmap #define netlink_poll datagram_poll -#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0 +#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0 #endif /* CONFIG_NETLINK_MMAP */ static void netlink_skb_destructor(struct sk_buff *skb) @@ -1003,26 +990,33 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, .net = net, .portid = portid, }; - u32 hash; - - hash = rhashtable_hashfn(&table->hash, &portid, sizeof(portid)); - return rhashtable_lookup_compare(&table->hash, hash, + return rhashtable_lookup_compare(&table->hash, &portid, &netlink_compare, &arg); } +static bool __netlink_insert(struct netlink_table *table, struct sock *sk) +{ + struct netlink_compare_arg arg = { + .net = sock_net(sk), + .portid = nlk_sk(sk)->portid, + }; + + return rhashtable_lookup_compare_insert(&table->hash, + &nlk_sk(sk)->node, + &netlink_compare, &arg); +} + static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { struct netlink_table *table = &nl_table[protocol]; struct sock *sk; - read_lock(&nl_table_lock); rcu_read_lock(); sk = __netlink_lookup(table, portid, net); if (sk) sock_hold(sk); rcu_read_unlock(); - read_unlock(&nl_table_lock); return sk; } @@ -1053,29 +1047,33 @@ netlink_update_listeners(struct sock *sk) * makes sure updates are visible before bind or setsockopt return. */ } -static int netlink_insert(struct sock *sk, struct net *net, u32 portid) +static int netlink_insert(struct sock *sk, u32 portid) { struct netlink_table *table = &nl_table[sk->sk_protocol]; - int err = -EADDRINUSE; + int err; - mutex_lock(&nl_sk_hash_lock); - if (__netlink_lookup(table, portid, net)) - goto err; + lock_sock(sk); err = -EBUSY; if (nlk_sk(sk)->portid) goto err; err = -ENOMEM; - if (BITS_PER_LONG > 32 && unlikely(table->hash.nelems >= UINT_MAX)) + if (BITS_PER_LONG > 32 && + unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX)) goto err; nlk_sk(sk)->portid = portid; sock_hold(sk); - rhashtable_insert(&table->hash, &nlk_sk(sk)->node); + err = 0; + if (!__netlink_insert(table, sk)) { + err = -EADDRINUSE; + sock_put(sk); + } + err: - mutex_unlock(&nl_sk_hash_lock); + release_sock(sk); return err; } @@ -1083,13 +1081,11 @@ static void netlink_remove(struct sock *sk) { struct netlink_table *table; - mutex_lock(&nl_sk_hash_lock); table = &nl_table[sk->sk_protocol]; if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) { WARN_ON(atomic_read(&sk->sk_refcnt) == 1); __sock_put(sk); } - mutex_unlock(&nl_sk_hash_lock); netlink_table_grab(); if (nlk_sk(sk)->subscriptions) { @@ -1197,6 +1193,13 @@ out_module: goto out; } +static void deferred_put_nlk_sk(struct rcu_head *head) +{ + struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); + + sock_put(&nlk->sk); +} + static int netlink_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -1269,7 +1272,7 @@ static int netlink_release(struct socket *sock) local_bh_disable(); sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); local_bh_enable(); - sock_put(sk); + call_rcu(&nlk->rcu, deferred_put_nlk_sk); return 0; } @@ -1284,7 +1287,6 @@ static int netlink_autobind(struct socket *sock) retry: cond_resched(); - netlink_table_grab(); rcu_read_lock(); if (__netlink_lookup(table, portid, net)) { /* Bind collision, search negative portid values. */ @@ -1292,13 +1294,11 @@ retry: if (rover > -4097) rover = -4097; rcu_read_unlock(); - netlink_table_ungrab(); goto retry; } rcu_read_unlock(); - netlink_table_ungrab(); - err = netlink_insert(sk, net, portid); + err = netlink_insert(sk, portid); if (err == -EADDRINUSE) goto retry; @@ -1486,7 +1486,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (!nlk->portid) { err = nladdr->nl_pid ? - netlink_insert(sk, net, nladdr->nl_pid) : + netlink_insert(sk, nladdr->nl_pid) : netlink_autobind(sock); if (err) { netlink_undo_bind(nlk->ngroups, groups, sk); @@ -2259,7 +2259,6 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); @@ -2273,10 +2272,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; - if (NULL == siocb->scm) - siocb->scm = &scm; - - err = scm_send(sock, msg, siocb->scm, true); + err = scm_send(sock, msg, &scm, true); if (err < 0) return err; @@ -2302,10 +2298,15 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; } + /* It's a really convoluted way for userland to ask for mmaped + * sendmsg(), but that's what we've got... + */ if (netlink_tx_is_mmaped(sk) && + msg->msg_iter.type == ITER_IOVEC && + msg->msg_iter.nr_segs == 1 && msg->msg_iter.iov->iov_base == NULL) { err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, - siocb); + &scm); goto out; } @@ -2319,7 +2320,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).creds = siocb->scm->creds; + NETLINK_CB(skb).creds = scm.creds; NETLINK_CB(skb).flags = netlink_skb_flags; err = -EFAULT; @@ -2341,7 +2342,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT); out: - scm_destroy(siocb->scm); + scm_destroy(&scm); return err; } @@ -2349,7 +2350,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) { - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct scm_cookie scm; struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); @@ -2412,11 +2412,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, if (nlk->flags & NETLINK_RECV_PKTINFO) netlink_cmsg_recv_pktinfo(msg, skb); - if (NULL == siocb->scm) { - memset(&scm, 0, sizeof(scm)); - siocb->scm = &scm; - } - siocb->scm->creds = *NETLINK_CREDS(skb); + memset(&scm, 0, sizeof(scm)); + scm.creds = *NETLINK_CREDS(skb); if (flags & MSG_TRUNC) copied = data_skb->len; @@ -2431,7 +2428,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } } - scm_recv(sock, msg, siocb->scm, flags); + scm_recv(sock, msg, &scm, flags); out: netlink_rcv_wake(sk); return err ? : copied; @@ -2492,7 +2489,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (cfg && cfg->input) nlk_sk(sk)->netlink_rcv = cfg->input; - if (netlink_insert(sk, net, 0)) + if (netlink_insert(sk, 0)) goto out_sock_release; nlk = nlk_sk(sk); @@ -2894,97 +2891,97 @@ EXPORT_SYMBOL(nlmsg_notify); #ifdef CONFIG_PROC_FS struct nl_seq_iter { struct seq_net_private p; + struct rhashtable_iter hti; int link; - int hash_idx; }; -static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) +static int netlink_walk_start(struct nl_seq_iter *iter) { - struct nl_seq_iter *iter = seq->private; - int i, j; - struct netlink_sock *nlk; - struct sock *s; - loff_t off = 0; - - for (i = 0; i < MAX_LINKS; i++) { - struct rhashtable *ht = &nl_table[i].hash; - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - - for (j = 0; j < tbl->size; j++) { - rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) { - s = (struct sock *)nlk; + int err; - if (sock_net(s) != seq_file_net(seq)) - continue; - if (off == pos) { - iter->link = i; - iter->hash_idx = j; - return s; - } - ++off; - } - } + err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti); + if (err) { + iter->link = MAX_LINKS; + return err; } - return NULL; + + err = rhashtable_walk_start(&iter->hti); + return err == -EAGAIN ? 0 : err; } -static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(nl_table_lock) __acquires(RCU) +static void netlink_walk_stop(struct nl_seq_iter *iter) { - read_lock(&nl_table_lock); - rcu_read_lock(); - return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; + rhashtable_walk_stop(&iter->hti); + rhashtable_walk_exit(&iter->hti); } -static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *__netlink_seq_next(struct seq_file *seq) { - struct rhashtable *ht; + struct nl_seq_iter *iter = seq->private; struct netlink_sock *nlk; - struct nl_seq_iter *iter; - struct net *net; - int i, j; - ++*pos; + do { + for (;;) { + int err; - if (v == SEQ_START_TOKEN) - return netlink_seq_socket_idx(seq, 0); + nlk = rhashtable_walk_next(&iter->hti); - net = seq_file_net(seq); - iter = seq->private; - nlk = v; + if (IS_ERR(nlk)) { + if (PTR_ERR(nlk) == -EAGAIN) + continue; - i = iter->link; - ht = &nl_table[i].hash; - rht_for_each_entry(nlk, nlk->node.next, ht, node) - if (net_eq(sock_net((struct sock *)nlk), net)) - return nlk; + return nlk; + } - j = iter->hash_idx + 1; + if (nlk) + break; - do { - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - - for (; j < tbl->size; j++) { - rht_for_each_entry(nlk, tbl->buckets[j], ht, node) { - if (net_eq(sock_net((struct sock *)nlk), net)) { - iter->link = i; - iter->hash_idx = j; - return nlk; - } - } + netlink_walk_stop(iter); + if (++iter->link >= MAX_LINKS) + return NULL; + + err = netlink_walk_start(iter); + if (err) + return ERR_PTR(err); } + } while (sock_net(&nlk->sk) != seq_file_net(seq)); - j = 0; - } while (++i < MAX_LINKS); + return nlk; +} - return NULL; +static void *netlink_seq_start(struct seq_file *seq, loff_t *posp) +{ + struct nl_seq_iter *iter = seq->private; + void *obj = SEQ_START_TOKEN; + loff_t pos; + int err; + + iter->link = 0; + + err = netlink_walk_start(iter); + if (err) + return ERR_PTR(err); + + for (pos = *posp; pos && obj && !IS_ERR(obj); pos--) + obj = __netlink_seq_next(seq); + + return obj; +} + +static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return __netlink_seq_next(seq); } static void netlink_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) __releases(nl_table_lock) { - rcu_read_unlock(); - read_unlock(&nl_table_lock); + struct nl_seq_iter *iter = seq->private; + + if (iter->link >= MAX_LINKS) + return; + + netlink_walk_stop(iter); } @@ -3131,9 +3128,6 @@ static int __init netlink_proto_init(void) .max_shift = 16, /* 64K */ .grow_decision = rht_grow_above_75, .shrink_decision = rht_shrink_below_30, -#ifdef CONFIG_PROVE_LOCKING - .mutex_is_held = lockdep_nl_sk_hash_is_held, -#endif }; if (err != 0) diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index f1c31b39aa3e..89008405d6b4 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -51,6 +51,7 @@ struct netlink_sock { #endif /* CONFIG_NETLINK_MMAP */ struct rhash_head node; + struct rcu_head rcu; }; static inline struct netlink_sock *nlk_sk(struct sock *sk) @@ -74,6 +75,5 @@ struct netlink_table { extern struct netlink_table *nl_table; extern rwlock_t nl_table_lock; -extern struct mutex nl_sk_hash_lock; #endif diff --git a/net/netlink/diag.c b/net/netlink/diag.c index de8c74a3c061..3ee63a3cff30 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -91,7 +91,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sk_diag_put_rings_cfg(sk, skb)) goto out_nlmsg_trim; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_nlmsg_trim: nlmsg_cancel(skb, nlh); @@ -103,7 +104,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, { struct netlink_table *tbl = &nl_table[protocol]; struct rhashtable *ht = &tbl->hash; - const struct bucket_table *htbl = rht_dereference(ht->tbl, ht); + const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht); struct net *net = sock_net(skb->sk); struct netlink_diag_req *req; struct netlink_sock *nlsk; @@ -113,7 +114,9 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, req = nlmsg_data(cb->nlh); for (i = 0; i < htbl->size; i++) { - rht_for_each_entry(nlsk, htbl->buckets[i], ht, node) { + struct rhash_head *pos; + + rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) { sk = (struct sock *)nlsk; if (!net_eq(sock_net(sk), net)) @@ -170,7 +173,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) req = nlmsg_data(cb->nlh); - mutex_lock(&nl_sk_hash_lock); + rcu_read_lock(); read_lock(&nl_table_lock); if (req->sdiag_protocol == NDIAG_PROTO_ALL) { @@ -184,7 +187,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } else { if (req->sdiag_protocol >= MAX_LINKS) { read_unlock(&nl_table_lock); - mutex_unlock(&nl_sk_hash_lock); + rcu_read_unlock(); return -ENOENT; } @@ -192,7 +195,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } read_unlock(&nl_table_lock); - mutex_unlock(&nl_sk_hash_lock); + rcu_read_unlock(); return skb->len; } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index ee57459fc258..2ed5f964772e 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -762,7 +762,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, nla_nest_end(skb, nla_grps); } - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -802,7 +803,8 @@ static int ctrl_fill_mcgrp_info(struct genl_family *family, nla_nest_end(skb, nest); nla_nest_end(skb, nla_grps); - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); diff --git a/net/nfc/core.c b/net/nfc/core.c index 819b87702b70..cff3f1614ad4 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -555,7 +555,6 @@ EXPORT_SYMBOL(nfc_find_se); int nfc_enable_se(struct nfc_dev *dev, u32 se_idx) { - struct nfc_se *se; int rc; @@ -605,7 +604,6 @@ error: int nfc_disable_se(struct nfc_dev *dev, u32 se_idx) { - struct nfc_se *se; int rc; @@ -934,6 +932,27 @@ int nfc_remove_se(struct nfc_dev *dev, u32 se_idx) } EXPORT_SYMBOL(nfc_remove_se); +int nfc_se_transaction(struct nfc_dev *dev, u8 se_idx, + struct nfc_evt_transaction *evt_transaction) +{ + int rc; + + pr_debug("transaction: %x\n", se_idx); + + device_lock(&dev->dev); + + if (!evt_transaction) { + rc = -EPROTO; + goto out; + } + + rc = nfc_genl_se_transaction(dev, se_idx, evt_transaction); +out: + device_unlock(&dev->dev); + return rc; +} +EXPORT_SYMBOL(nfc_se_transaction); + static void nfc_release(struct device *d) { struct nfc_dev *dev = to_nfc_dev(d); diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index 91df487aa0a9..844673cb7c18 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -116,23 +116,6 @@ int nfc_hci_send_event(struct nfc_hci_dev *hdev, u8 gate, u8 event, } EXPORT_SYMBOL(nfc_hci_send_event); -int nfc_hci_send_response(struct nfc_hci_dev *hdev, u8 gate, u8 response, - const u8 *param, size_t param_len) -{ - u8 pipe; - - pr_debug("\n"); - - pipe = hdev->gate2pipe[gate]; - if (pipe == NFC_HCI_INVALID_PIPE) - return -EADDRNOTAVAIL; - - return nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE, - response, param, param_len, NULL, NULL, - 0); -} -EXPORT_SYMBOL(nfc_hci_send_response); - /* * Execute an hci command sent to gate. * skb will contain response data if success. skb can be NULL if you are not @@ -331,7 +314,7 @@ int nfc_hci_disconnect_all_gates(struct nfc_hci_dev *hdev) if (r < 0) return r; - memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); + nfc_hci_reset_pipes(hdev); return 0; } @@ -345,7 +328,7 @@ int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate, pr_debug("\n"); - if (hdev->gate2pipe[dest_gate] == NFC_HCI_DO_NOT_CREATE_PIPE) + if (pipe == NFC_HCI_DO_NOT_CREATE_PIPE) return 0; if (hdev->gate2pipe[dest_gate] != NFC_HCI_INVALID_PIPE) @@ -380,6 +363,8 @@ open_pipe: return r; } + hdev->pipes[pipe].gate = dest_gate; + hdev->pipes[pipe].dest_host = dest_host; hdev->gate2pipe[dest_gate] = pipe; return 0; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index ef50e7716c4a..6e061da2258a 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -46,6 +46,32 @@ int nfc_hci_result_to_errno(u8 result) } EXPORT_SYMBOL(nfc_hci_result_to_errno); +void nfc_hci_reset_pipes(struct nfc_hci_dev *hdev) +{ + int i = 0; + + for (i = 0; i < NFC_HCI_MAX_PIPES; i++) { + hdev->pipes[i].gate = NFC_HCI_INVALID_GATE; + hdev->pipes[i].dest_host = NFC_HCI_INVALID_HOST; + } + memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); +} +EXPORT_SYMBOL(nfc_hci_reset_pipes); + +void nfc_hci_reset_pipes_per_host(struct nfc_hci_dev *hdev, u8 host) +{ + int i = 0; + + for (i = 0; i < NFC_HCI_MAX_PIPES; i++) { + if (hdev->pipes[i].dest_host != host) + continue; + + hdev->pipes[i].gate = NFC_HCI_INVALID_GATE; + hdev->pipes[i].dest_host = NFC_HCI_INVALID_HOST; + } +} +EXPORT_SYMBOL(nfc_hci_reset_pipes_per_host); + static void nfc_hci_msg_tx_work(struct work_struct *work) { struct nfc_hci_dev *hdev = container_of(work, struct nfc_hci_dev, @@ -167,48 +193,69 @@ exit: void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, struct sk_buff *skb) { - int r = 0; - u8 gate = nfc_hci_pipe2gate(hdev, pipe); - u8 local_gate, new_pipe; - u8 gate_opened = 0x00; + u8 gate = hdev->pipes[pipe].gate; + u8 status = NFC_HCI_ANY_OK; + struct hci_create_pipe_resp *create_info; + struct hci_delete_pipe_noti *delete_info; + struct hci_all_pipe_cleared_noti *cleared_info; pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd); switch (cmd) { case NFC_HCI_ADM_NOTIFY_PIPE_CREATED: if (skb->len != 5) { - r = -EPROTO; - break; + status = NFC_HCI_ANY_E_NOK; + goto exit; } + create_info = (struct hci_create_pipe_resp *)skb->data; - local_gate = skb->data[3]; - new_pipe = skb->data[4]; - nfc_hci_send_response(hdev, gate, NFC_HCI_ANY_OK, NULL, 0); - - /* save the new created pipe and bind with local gate, + /* Save the new created pipe and bind with local gate, * the description for skb->data[3] is destination gate id * but since we received this cmd from host controller, we * are the destination and it is our local gate */ - hdev->gate2pipe[local_gate] = new_pipe; + hdev->gate2pipe[create_info->dest_gate] = create_info->pipe; + hdev->pipes[create_info->pipe].gate = create_info->dest_gate; + hdev->pipes[create_info->pipe].dest_host = + create_info->src_host; break; case NFC_HCI_ANY_OPEN_PIPE: - /* if the pipe is already created, we allow remote host to - * open it - */ - if (gate != 0xff) - nfc_hci_send_response(hdev, gate, NFC_HCI_ANY_OK, - &gate_opened, 1); + if (gate == NFC_HCI_INVALID_GATE) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + break; + case NFC_HCI_ADM_NOTIFY_PIPE_DELETED: + if (skb->len != 1) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + delete_info = (struct hci_delete_pipe_noti *)skb->data; + + hdev->pipes[delete_info->pipe].gate = NFC_HCI_INVALID_GATE; + hdev->pipes[delete_info->pipe].dest_host = NFC_HCI_INVALID_HOST; break; case NFC_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED: - nfc_hci_send_response(hdev, gate, NFC_HCI_ANY_OK, NULL, 0); + if (skb->len != 1) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + cleared_info = (struct hci_all_pipe_cleared_noti *)skb->data; + + nfc_hci_reset_pipes_per_host(hdev, cleared_info->host); break; default: pr_info("Discarded unknown cmd %x to gate %x\n", cmd, gate); - r = -EINVAL; break; } + if (hdev->ops->cmd_received) + hdev->ops->cmd_received(hdev, pipe, cmd, skb); + +exit: + nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE, + status, NULL, 0, NULL, NULL, 0); + kfree_skb(skb); } @@ -330,15 +377,15 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, struct sk_buff *skb) { int r = 0; - u8 gate = nfc_hci_pipe2gate(hdev, pipe); + u8 gate = hdev->pipes[pipe].gate; - if (gate == 0xff) { + if (gate == NFC_HCI_INVALID_GATE) { pr_err("Discarded event %x to unopened pipe %x\n", event, pipe); goto exit; } if (hdev->ops->event_received) { - r = hdev->ops->event_received(hdev, gate, event, skb); + r = hdev->ops->event_received(hdev, pipe, event, skb); if (r <= 0) goto exit_noskb; } @@ -573,7 +620,7 @@ static int hci_dev_down(struct nfc_dev *nfc_dev) if (hdev->ops->close) hdev->ops->close(hdev); - memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); + nfc_hci_reset_pipes(hdev); return 0; } @@ -932,7 +979,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, nfc_set_drvdata(hdev->ndev, hdev); - memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); + nfc_hci_reset_pipes(hdev); hdev->quirks = quirks; diff --git a/net/nfc/hci/hci.h b/net/nfc/hci/hci.h index c3d2e2c1394c..ab4c8e80b1ad 100644 --- a/net/nfc/hci/hci.h +++ b/net/nfc/hci/hci.h @@ -65,6 +65,14 @@ struct hci_create_pipe_resp { u8 pipe; } __packed; +struct hci_delete_pipe_noti { + u8 pipe; +} __packed; + +struct hci_all_pipe_cleared_noti { + u8 host; +} __packed; + #define NFC_HCI_FRAGMENT 0x7f #define HCP_HEADER(type, instr) ((((type) & 0x03) << 6) | ((instr) & 0x3f)) @@ -77,8 +85,6 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, data_exchange_cb_t cb, void *cb_context, unsigned long completion_delay); -u8 nfc_hci_pipe2gate(struct nfc_hci_dev *hdev, u8 pipe); - void nfc_hci_hcp_message_rx(struct nfc_hci_dev *hdev, u8 pipe, u8 type, u8 instruction, struct sk_buff *skb); diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c index e9de1514656e..1fe725d66085 100644 --- a/net/nfc/hci/hcp.c +++ b/net/nfc/hci/hcp.c @@ -124,17 +124,6 @@ out_skb_err: return err; } -u8 nfc_hci_pipe2gate(struct nfc_hci_dev *hdev, u8 pipe) -{ - int gate; - - for (gate = 0; gate < NFC_HCI_MAX_GATES; gate++) - if (hdev->gate2pipe[gate] == pipe) - return gate; - - return 0xff; -} - /* * Receive hcp message for pipe, with type and cmd. * skb contains optional message data only. diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile index 7aeedc43187d..7ed8949266cc 100644 --- a/net/nfc/nci/Makefile +++ b/net/nfc/nci/Makefile @@ -4,6 +4,6 @@ obj-$(CONFIG_NFC_NCI) += nci.o -nci-objs := core.o data.o lib.o ntf.o rsp.o +nci-objs := core.o data.o lib.o ntf.o rsp.o hci.o nci-$(CONFIG_NFC_NCI_SPI) += spi.o diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 51feb5e63008..9575a1892607 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -41,10 +41,28 @@ #include <net/nfc/nci_core.h> #include <linux/nfc.h> +struct core_conn_create_data { + int length; + struct nci_core_conn_create_cmd *cmd; +}; + static void nci_cmd_work(struct work_struct *work); static void nci_rx_work(struct work_struct *work); static void nci_tx_work(struct work_struct *work); +struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev, + int conn_id) +{ + struct nci_conn_info *conn_info; + + list_for_each_entry(conn_info, &ndev->conn_info_list, list) { + if (conn_info->conn_id == conn_id) + return conn_info; + } + + return NULL; +} + /* ---- NCI requests ---- */ void nci_req_complete(struct nci_dev *ndev, int result) @@ -109,10 +127,10 @@ static int __nci_request(struct nci_dev *ndev, return rc; } -static inline int nci_request(struct nci_dev *ndev, - void (*req)(struct nci_dev *ndev, - unsigned long opt), - unsigned long opt, __u32 timeout) +inline int nci_request(struct nci_dev *ndev, + void (*req)(struct nci_dev *ndev, + unsigned long opt), + unsigned long opt, __u32 timeout) { int rc; @@ -456,6 +474,95 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val) } EXPORT_SYMBOL(nci_set_config); +static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt) +{ + struct nci_nfcee_discover_cmd cmd; + __u8 action = opt; + + cmd.discovery_action = action; + + nci_send_cmd(ndev, NCI_OP_NFCEE_DISCOVER_CMD, 1, &cmd); +} + +int nci_nfcee_discover(struct nci_dev *ndev, u8 action) +{ + return nci_request(ndev, nci_nfcee_discover_req, action, + msecs_to_jiffies(NCI_CMD_TIMEOUT)); +} +EXPORT_SYMBOL(nci_nfcee_discover); + +static void nci_nfcee_mode_set_req(struct nci_dev *ndev, unsigned long opt) +{ + struct nci_nfcee_mode_set_cmd *cmd = + (struct nci_nfcee_mode_set_cmd *)opt; + + nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD, + sizeof(struct nci_nfcee_mode_set_cmd), cmd); +} + +int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode) +{ + struct nci_nfcee_mode_set_cmd cmd; + + cmd.nfcee_id = nfcee_id; + cmd.nfcee_mode = nfcee_mode; + + return nci_request(ndev, nci_nfcee_mode_set_req, (unsigned long)&cmd, + msecs_to_jiffies(NCI_CMD_TIMEOUT)); +} +EXPORT_SYMBOL(nci_nfcee_mode_set); + +static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt) +{ + struct core_conn_create_data *data = + (struct core_conn_create_data *)opt; + + nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd); +} + +int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, + u8 number_destination_params, + size_t params_len, + struct core_conn_create_dest_spec_params *params) +{ + int r; + struct nci_core_conn_create_cmd *cmd; + struct core_conn_create_data data; + + data.length = params_len + sizeof(struct nci_core_conn_create_cmd); + cmd = kzalloc(data.length, GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + cmd->destination_type = destination_type; + cmd->number_destination_params = number_destination_params; + memcpy(cmd->params, params, params_len); + + data.cmd = cmd; + ndev->cur_id = params->value[DEST_SPEC_PARAMS_ID_INDEX]; + + r = __nci_request(ndev, nci_core_conn_create_req, + (unsigned long)&data, + msecs_to_jiffies(NCI_CMD_TIMEOUT)); + kfree(cmd); + return r; +} +EXPORT_SYMBOL(nci_core_conn_create); + +static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt) +{ + __u8 conn_id = opt; + + nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id); +} + +int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id) +{ + return nci_request(ndev, nci_core_conn_close_req, conn_id, + msecs_to_jiffies(NCI_CMD_TIMEOUT)); +} +EXPORT_SYMBOL(nci_core_conn_close); + static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); @@ -712,6 +819,11 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; + struct nci_conn_info *conn_info; + + conn_info = ndev->rf_conn_info; + if (!conn_info) + return -EPROTO; pr_debug("target_idx %d, len %d\n", target->idx, skb->len); @@ -724,8 +836,8 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, return -EBUSY; /* store cb and context to be used on receiving data */ - ndev->data_exchange_cb = cb; - ndev->data_exchange_cb_context = cb_context; + conn_info->data_exchange_cb = cb; + conn_info->data_exchange_cb_context = cb_context; rc = nci_send_data(ndev, NCI_STATIC_RF_CONN_ID, skb); if (rc) @@ -768,10 +880,16 @@ static int nci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx) static int nci_discover_se(struct nfc_dev *nfc_dev) { + int r; struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); - if (ndev->ops->discover_se) + if (ndev->ops->discover_se) { + r = nci_nfcee_discover(ndev, NCI_NFCEE_DISCOVERY_ACTION_ENABLE); + if (r != NCI_STATUS_OK) + return -EPROTO; + return ndev->ops->discover_se(ndev); + } return 0; } @@ -807,7 +925,6 @@ static struct nfc_ops nci_nfc_ops = { }; /* ---- Interface to NCI drivers ---- */ - /** * nci_allocate_device - allocate a new nci device * @@ -842,13 +959,20 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, tx_headroom + NCI_DATA_HDR_SIZE, tx_tailroom); if (!ndev->nfc_dev) - goto free_exit; + goto free_nci; + + ndev->hci_dev = nci_hci_allocate(ndev); + if (!ndev->hci_dev) + goto free_nfc; nfc_set_drvdata(ndev->nfc_dev, ndev); return ndev; -free_exit: +free_nfc: + kfree(ndev->nfc_dev); + +free_nci: kfree(ndev); return NULL; } @@ -913,6 +1037,7 @@ int nci_register_device(struct nci_dev *ndev) (unsigned long) ndev); mutex_init(&ndev->req_lock); + INIT_LIST_HEAD(&ndev->conn_info_list); rc = nfc_register_device(ndev->nfc_dev); if (rc) @@ -938,12 +1063,19 @@ EXPORT_SYMBOL(nci_register_device); */ void nci_unregister_device(struct nci_dev *ndev) { + struct nci_conn_info *conn_info, *n; + nci_close_device(ndev); destroy_workqueue(ndev->cmd_wq); destroy_workqueue(ndev->rx_wq); destroy_workqueue(ndev->tx_wq); + list_for_each_entry_safe(conn_info, n, &ndev->conn_info_list, list) { + list_del(&conn_info->list); + /* conn_info is allocated with devm_kzalloc */ + } + nfc_unregister_device(ndev->nfc_dev); } EXPORT_SYMBOL(nci_unregister_device); @@ -1027,20 +1159,25 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) static void nci_tx_work(struct work_struct *work) { struct nci_dev *ndev = container_of(work, struct nci_dev, tx_work); + struct nci_conn_info *conn_info; struct sk_buff *skb; - pr_debug("credits_cnt %d\n", atomic_read(&ndev->credits_cnt)); + conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id); + if (!conn_info) + return; + + pr_debug("credits_cnt %d\n", atomic_read(&conn_info->credits_cnt)); /* Send queued tx data */ - while (atomic_read(&ndev->credits_cnt)) { + while (atomic_read(&conn_info->credits_cnt)) { skb = skb_dequeue(&ndev->tx_q); if (!skb) return; /* Check if data flow control is used */ - if (atomic_read(&ndev->credits_cnt) != + if (atomic_read(&conn_info->credits_cnt) != NCI_DATA_FLOW_CONTROL_NOT_USED) - atomic_dec(&ndev->credits_cnt); + atomic_dec(&conn_info->credits_cnt); pr_debug("NCI TX: MT=data, PBF=%d, conn_id=%d, plen=%d\n", nci_pbf(skb->data), @@ -1092,7 +1229,9 @@ static void nci_rx_work(struct work_struct *work) if (test_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags)) { /* complete the data exchange transaction, if exists */ if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) - nci_data_exchange_complete(ndev, NULL, -ETIMEDOUT); + nci_data_exchange_complete(ndev, NULL, + ndev->cur_conn_id, + -ETIMEDOUT); clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags); } diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index a2de2a8cb00e..566466d90048 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -36,10 +36,20 @@ /* Complete data exchange transaction and forward skb to nfc core */ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, - int err) + __u8 conn_id, int err) { - data_exchange_cb_t cb = ndev->data_exchange_cb; - void *cb_context = ndev->data_exchange_cb_context; + struct nci_conn_info *conn_info; + data_exchange_cb_t cb; + void *cb_context; + + conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); + if (!conn_info) { + kfree_skb(skb); + goto exit; + } + + cb = conn_info->data_exchange_cb; + cb_context = conn_info->data_exchange_cb_context; pr_debug("len %d, err %d\n", skb ? skb->len : 0, err); @@ -48,9 +58,6 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags); if (cb) { - ndev->data_exchange_cb = NULL; - ndev->data_exchange_cb_context = NULL; - /* forward skb to nfc core */ cb(cb_context, skb, err); } else if (skb) { @@ -60,6 +67,7 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, kfree_skb(skb); } +exit: clear_bit(NCI_DATA_EXCHANGE, &ndev->flags); } @@ -85,6 +93,7 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev, static int nci_queue_tx_data_frags(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb) { + struct nci_conn_info *conn_info; int total_len = skb->len; unsigned char *data = skb->data; unsigned long flags; @@ -95,11 +104,17 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev, pr_debug("conn_id 0x%x, total_len %d\n", conn_id, total_len); + conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); + if (!conn_info) { + rc = -EPROTO; + goto free_exit; + } + __skb_queue_head_init(&frags_q); while (total_len) { frag_len = - min_t(int, total_len, ndev->max_data_pkt_payload_size); + min_t(int, total_len, conn_info->max_pkt_payload_len); skb_frag = nci_skb_alloc(ndev, (NCI_DATA_HDR_SIZE + frag_len), @@ -151,12 +166,19 @@ exit: /* Send NCI data */ int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb) { + struct nci_conn_info *conn_info; int rc = 0; pr_debug("conn_id 0x%x, plen %d\n", conn_id, skb->len); + conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); + if (!conn_info) { + rc = -EPROTO; + goto free_exit; + } + /* check if the packet need to be fragmented */ - if (skb->len <= ndev->max_data_pkt_payload_size) { + if (skb->len <= conn_info->max_pkt_payload_len) { /* no need to fragment packet */ nci_push_data_hdr(ndev, conn_id, skb, NCI_PBF_LAST); @@ -170,6 +192,7 @@ int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb) } } + ndev->cur_conn_id = conn_id; queue_work(ndev->tx_wq, &ndev->tx_work); goto exit; @@ -185,7 +208,7 @@ exit: static void nci_add_rx_data_frag(struct nci_dev *ndev, struct sk_buff *skb, - __u8 pbf, __u8 status) + __u8 pbf, __u8 conn_id, __u8 status) { int reassembly_len; int err = 0; @@ -229,16 +252,13 @@ static void nci_add_rx_data_frag(struct nci_dev *ndev, } exit: - if (ndev->nfc_dev->rf_mode == NFC_RF_INITIATOR) { - nci_data_exchange_complete(ndev, skb, err); - } else if (ndev->nfc_dev->rf_mode == NFC_RF_TARGET) { + if (ndev->nfc_dev->rf_mode == NFC_RF_TARGET) { /* Data received in Target mode, forward to nfc core */ err = nfc_tm_data_received(ndev->nfc_dev, skb); if (err) pr_err("unable to handle received data\n"); } else { - pr_err("rf mode unknown\n"); - kfree_skb(skb); + nci_data_exchange_complete(ndev, skb, conn_id, err); } } @@ -247,6 +267,8 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb) { __u8 pbf = nci_pbf(skb->data); __u8 status = 0; + __u8 conn_id = nci_conn_id(skb->data); + struct nci_conn_info *conn_info; pr_debug("len %d\n", skb->len); @@ -255,6 +277,10 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb) nci_conn_id(skb->data), nci_plen(skb->data)); + conn_info = nci_get_conn_info_by_conn_id(ndev, nci_conn_id(skb->data)); + if (!conn_info) + return; + /* strip the nci data header */ skb_pull(skb, NCI_DATA_HDR_SIZE); @@ -268,5 +294,5 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb) skb_trim(skb, (skb->len - 1)); } - nci_add_rx_data_frag(ndev, skb, pbf, nci_to_errno(status)); + nci_add_rx_data_frag(ndev, skb, pbf, conn_id, nci_to_errno(status)); } diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c new file mode 100644 index 000000000000..ed54ec533836 --- /dev/null +++ b/net/nfc/nci/hci.c @@ -0,0 +1,694 @@ +/* + * The NFC Controller Interface is the communication protocol between an + * NFC Controller (NFCC) and a Device Host (DH). + * This is the HCI over NCI implementation, as specified in the 10.2 + * section of the NCI 1.1 specification. + * + * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include <linux/skbuff.h> + +#include "../nfc.h" +#include <net/nfc/nci.h> +#include <net/nfc/nci_core.h> +#include <linux/nfc.h> + +struct nci_data { + u8 conn_id; + u8 pipe; + u8 cmd; + const u8 *data; + u32 data_len; +} __packed; + +struct nci_hci_create_pipe_params { + u8 src_gate; + u8 dest_host; + u8 dest_gate; +} __packed; + +struct nci_hci_create_pipe_resp { + u8 src_host; + u8 src_gate; + u8 dest_host; + u8 dest_gate; + u8 pipe; +} __packed; + +struct nci_hci_delete_pipe_noti { + u8 pipe; +} __packed; + +struct nci_hci_all_pipe_cleared_noti { + u8 host; +} __packed; + +struct nci_hcp_message { + u8 header; /* type -cmd,evt,rsp- + instruction */ + u8 data[]; +} __packed; + +struct nci_hcp_packet { + u8 header; /* cbit+pipe */ + struct nci_hcp_message message; +} __packed; + +#define NCI_HCI_ANY_SET_PARAMETER 0x01 +#define NCI_HCI_ANY_GET_PARAMETER 0x02 +#define NCI_HCI_ANY_CLOSE_PIPE 0x04 + +#define NCI_HFP_NO_CHAINING 0x80 + +#define NCI_NFCEE_ID_HCI 0x80 + +#define NCI_EVT_HOT_PLUG 0x03 + +#define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY 0x01 + +/* HCP headers */ +#define NCI_HCI_HCP_PACKET_HEADER_LEN 1 +#define NCI_HCI_HCP_MESSAGE_HEADER_LEN 1 +#define NCI_HCI_HCP_HEADER_LEN 2 + +/* HCP types */ +#define NCI_HCI_HCP_COMMAND 0x00 +#define NCI_HCI_HCP_EVENT 0x01 +#define NCI_HCI_HCP_RESPONSE 0x02 + +#define NCI_HCI_ADM_NOTIFY_PIPE_CREATED 0x12 +#define NCI_HCI_ADM_NOTIFY_PIPE_DELETED 0x13 +#define NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED 0x15 + +#define NCI_HCI_FRAGMENT 0x7f +#define NCI_HCP_HEADER(type, instr) ((((type) & 0x03) << 6) |\ + ((instr) & 0x3f)) + +#define NCI_HCP_MSG_GET_TYPE(header) ((header & 0xc0) >> 6) +#define NCI_HCP_MSG_GET_CMD(header) (header & 0x3f) +#define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f) + +/* HCI core */ +static void nci_hci_reset_pipes(struct nci_hci_dev *hdev) +{ + int i; + + for (i = 0; i < NCI_HCI_MAX_PIPES; i++) { + hdev->pipes[i].gate = NCI_HCI_INVALID_GATE; + hdev->pipes[i].host = NCI_HCI_INVALID_HOST; + } + memset(hdev->gate2pipe, NCI_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); +} + +static void nci_hci_reset_pipes_per_host(struct nci_dev *ndev, u8 host) +{ + int i; + + for (i = 0; i < NCI_HCI_MAX_PIPES; i++) { + if (ndev->hci_dev->pipes[i].host == host) { + ndev->hci_dev->pipes[i].gate = NCI_HCI_INVALID_GATE; + ndev->hci_dev->pipes[i].host = NCI_HCI_INVALID_HOST; + } + } +} + +/* Fragment HCI data over NCI packet. + * NFC Forum NCI 10.2.2 Data Exchange: + * The payload of the Data Packets sent on the Logical Connection SHALL be + * valid HCP packets, as defined within [ETSI_102622]. Each Data Packet SHALL + * contain a single HCP packet. NCI Segmentation and Reassembly SHALL NOT be + * applied to Data Messages in either direction. The HCI fragmentation mechanism + * is used if required. + */ +static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, + const u8 data_type, const u8 *data, + size_t data_len) +{ + struct nci_conn_info *conn_info; + struct sk_buff *skb; + int len, i, r; + u8 cb = pipe; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) + return -EPROTO; + + skb = nci_skb_alloc(ndev, 2 + conn_info->max_pkt_payload_len + + NCI_DATA_HDR_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, 2 + NCI_DATA_HDR_SIZE); + *skb_push(skb, 1) = data_type; + + i = 0; + len = conn_info->max_pkt_payload_len; + + do { + /* If last packet add NCI_HFP_NO_CHAINING */ + if (i + conn_info->max_pkt_payload_len - + (skb->len + 1) >= data_len) { + cb |= NCI_HFP_NO_CHAINING; + len = data_len - i; + } else { + len = conn_info->max_pkt_payload_len - skb->len - 1; + } + + *skb_push(skb, 1) = cb; + + if (len > 0) + memcpy(skb_put(skb, len), data + i, len); + + r = nci_send_data(ndev, conn_info->conn_id, skb); + if (r < 0) + return r; + + i += len; + if (i < data_len) { + skb_trim(skb, 0); + skb_pull(skb, len); + } + } while (i < data_len); + + return i; +} + +static void nci_hci_send_data_req(struct nci_dev *ndev, unsigned long opt) +{ + struct nci_data *data = (struct nci_data *)opt; + + nci_hci_send_data(ndev, data->pipe, data->cmd, + data->data, data->data_len); +} + +int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event, + const u8 *param, size_t param_len) +{ + u8 pipe = ndev->hci_dev->gate2pipe[gate]; + + if (pipe == NCI_HCI_INVALID_PIPE) + return -EADDRNOTAVAIL; + + return nci_hci_send_data(ndev, pipe, + NCI_HCP_HEADER(NCI_HCI_HCP_EVENT, event), + param, param_len); +} +EXPORT_SYMBOL(nci_hci_send_event); + +int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, + const u8 *param, size_t param_len, + struct sk_buff **skb) +{ + struct nci_conn_info *conn_info; + struct nci_data data; + int r; + u8 pipe = ndev->hci_dev->gate2pipe[gate]; + + if (pipe == NCI_HCI_INVALID_PIPE) + return -EADDRNOTAVAIL; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) + return -EPROTO; + + data.conn_id = conn_info->conn_id; + data.pipe = pipe; + data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, cmd); + data.data = param; + data.data_len = param_len; + + r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, + msecs_to_jiffies(NCI_DATA_TIMEOUT)); + + if (r == NCI_STATUS_OK) + *skb = conn_info->rx_skb; + + return r; +} +EXPORT_SYMBOL(nci_hci_send_cmd); + +static void nci_hci_event_received(struct nci_dev *ndev, u8 pipe, + u8 event, struct sk_buff *skb) +{ + if (ndev->ops->hci_event_received) + ndev->ops->hci_event_received(ndev, pipe, event, skb); +} + +static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, + u8 cmd, struct sk_buff *skb) +{ + u8 gate = ndev->hci_dev->pipes[pipe].gate; + u8 status = NCI_HCI_ANY_OK | ~NCI_HCI_FRAGMENT; + u8 dest_gate, new_pipe; + struct nci_hci_create_pipe_resp *create_info; + struct nci_hci_delete_pipe_noti *delete_info; + struct nci_hci_all_pipe_cleared_noti *cleared_info; + + pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd); + + switch (cmd) { + case NCI_HCI_ADM_NOTIFY_PIPE_CREATED: + if (skb->len != 5) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } + create_info = (struct nci_hci_create_pipe_resp *)skb->data; + dest_gate = create_info->dest_gate; + new_pipe = create_info->pipe; + + /* Save the new created pipe and bind with local gate, + * the description for skb->data[3] is destination gate id + * but since we received this cmd from host controller, we + * are the destination and it is our local gate + */ + ndev->hci_dev->gate2pipe[dest_gate] = new_pipe; + ndev->hci_dev->pipes[new_pipe].gate = dest_gate; + ndev->hci_dev->pipes[new_pipe].host = + create_info->src_host; + break; + case NCI_HCI_ANY_OPEN_PIPE: + /* If the pipe is not created report an error */ + if (gate == NCI_HCI_INVALID_GATE) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } + break; + case NCI_HCI_ADM_NOTIFY_PIPE_DELETED: + if (skb->len != 1) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } + delete_info = (struct nci_hci_delete_pipe_noti *)skb->data; + + ndev->hci_dev->pipes[delete_info->pipe].gate = + NCI_HCI_INVALID_GATE; + ndev->hci_dev->pipes[delete_info->pipe].host = + NCI_HCI_INVALID_HOST; + break; + case NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED: + if (skb->len != 1) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } + + cleared_info = + (struct nci_hci_all_pipe_cleared_noti *)skb->data; + nci_hci_reset_pipes_per_host(ndev, cleared_info->host); + break; + default: + pr_debug("Discarded unknown cmd %x to gate %x\n", cmd, gate); + break; + } + + if (ndev->ops->hci_cmd_received) + ndev->ops->hci_cmd_received(ndev, pipe, cmd, skb); + +exit: + nci_hci_send_data(ndev, pipe, status, NULL, 0); + + kfree_skb(skb); +} + +static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe, + u8 result, struct sk_buff *skb) +{ + struct nci_conn_info *conn_info; + u8 status = result; + + if (result != NCI_HCI_ANY_OK) + goto exit; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) { + status = NCI_STATUS_REJECTED; + goto exit; + } + + conn_info->rx_skb = skb; + +exit: + nci_req_complete(ndev, status); +} + +/* Receive hcp message for pipe, with type and cmd. + * skb contains optional message data only. + */ +static void nci_hci_hcp_message_rx(struct nci_dev *ndev, u8 pipe, + u8 type, u8 instruction, struct sk_buff *skb) +{ + switch (type) { + case NCI_HCI_HCP_RESPONSE: + nci_hci_resp_received(ndev, pipe, instruction, skb); + break; + case NCI_HCI_HCP_COMMAND: + nci_hci_cmd_received(ndev, pipe, instruction, skb); + break; + case NCI_HCI_HCP_EVENT: + nci_hci_event_received(ndev, pipe, instruction, skb); + break; + default: + pr_err("UNKNOWN MSG Type %d, instruction=%d\n", + type, instruction); + kfree_skb(skb); + break; + } + + nci_req_complete(ndev, 0); +} + +static void nci_hci_msg_rx_work(struct work_struct *work) +{ + struct nci_hci_dev *hdev = + container_of(work, struct nci_hci_dev, msg_rx_work); + struct sk_buff *skb; + struct nci_hcp_message *message; + u8 pipe, type, instruction; + + while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) { + pipe = skb->data[0]; + skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN); + message = (struct nci_hcp_message *)skb->data; + type = NCI_HCP_MSG_GET_TYPE(message->header); + instruction = NCI_HCP_MSG_GET_CMD(message->header); + skb_pull(skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); + + nci_hci_hcp_message_rx(hdev->ndev, pipe, + type, instruction, skb); + } +} + +void nci_hci_data_received_cb(void *context, + struct sk_buff *skb, int err) +{ + struct nci_dev *ndev = (struct nci_dev *)context; + struct nci_hcp_packet *packet; + u8 pipe, type, instruction; + struct sk_buff *hcp_skb; + struct sk_buff *frag_skb; + int msg_len; + + pr_debug("\n"); + + if (err) { + nci_req_complete(ndev, err); + return; + } + + packet = (struct nci_hcp_packet *)skb->data; + if ((packet->header & ~NCI_HCI_FRAGMENT) == 0) { + skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); + return; + } + + /* it's the last fragment. Does it need re-aggregation? */ + if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) { + pipe = packet->header & NCI_HCI_FRAGMENT; + skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); + + msg_len = 0; + skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { + msg_len += (frag_skb->len - + NCI_HCI_HCP_PACKET_HEADER_LEN); + } + + hcp_skb = nfc_alloc_recv_skb(NCI_HCI_HCP_PACKET_HEADER_LEN + + msg_len, GFP_KERNEL); + if (!hcp_skb) { + nci_req_complete(ndev, -ENOMEM); + return; + } + + *skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe; + + skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { + msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN; + memcpy(skb_put(hcp_skb, msg_len), frag_skb->data + + NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len); + } + + skb_queue_purge(&ndev->hci_dev->rx_hcp_frags); + } else { + packet->header &= NCI_HCI_FRAGMENT; + hcp_skb = skb; + } + + /* if this is a response, dispatch immediately to + * unblock waiting cmd context. Otherwise, enqueue to dispatch + * in separate context where handler can also execute command. + */ + packet = (struct nci_hcp_packet *)hcp_skb->data; + type = NCI_HCP_MSG_GET_TYPE(packet->message.header); + if (type == NCI_HCI_HCP_RESPONSE) { + pipe = packet->header; + instruction = NCI_HCP_MSG_GET_CMD(packet->message.header); + skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN + + NCI_HCI_HCP_MESSAGE_HEADER_LEN); + nci_hci_hcp_message_rx(ndev, pipe, type, instruction, hcp_skb); + } else { + skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb); + schedule_work(&ndev->hci_dev->msg_rx_work); + } +} + +int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe) +{ + struct nci_data data; + struct nci_conn_info *conn_info; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) + return -EPROTO; + + data.conn_id = conn_info->conn_id; + data.pipe = pipe; + data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, + NCI_HCI_ANY_OPEN_PIPE); + data.data = NULL; + data.data_len = 0; + + return nci_request(ndev, nci_hci_send_data_req, + (unsigned long)&data, + msecs_to_jiffies(NCI_DATA_TIMEOUT)); +} +EXPORT_SYMBOL(nci_hci_open_pipe); + +int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, + const u8 *param, size_t param_len) +{ + struct nci_conn_info *conn_info; + struct nci_data data; + int r; + u8 *tmp; + u8 pipe = ndev->hci_dev->gate2pipe[gate]; + + pr_debug("idx=%d to gate %d\n", idx, gate); + + if (pipe == NCI_HCI_INVALID_PIPE) + return -EADDRNOTAVAIL; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) + return -EPROTO; + + tmp = kmalloc(1 + param_len, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + *tmp = idx; + memcpy(tmp + 1, param, param_len); + + data.conn_id = conn_info->conn_id; + data.pipe = pipe; + data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, + NCI_HCI_ANY_SET_PARAMETER); + data.data = tmp; + data.data_len = param_len + 1; + + r = nci_request(ndev, nci_hci_send_data_req, + (unsigned long)&data, + msecs_to_jiffies(NCI_DATA_TIMEOUT)); + + kfree(tmp); + return r; +} +EXPORT_SYMBOL(nci_hci_set_param); + +int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, + struct sk_buff **skb) +{ + struct nci_conn_info *conn_info; + struct nci_data data; + int r; + u8 pipe = ndev->hci_dev->gate2pipe[gate]; + + pr_debug("idx=%d to gate %d\n", idx, gate); + + if (pipe == NCI_HCI_INVALID_PIPE) + return -EADDRNOTAVAIL; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) + return -EPROTO; + + data.conn_id = conn_info->conn_id; + data.pipe = pipe; + data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, + NCI_HCI_ANY_GET_PARAMETER); + data.data = &idx; + data.data_len = 1; + + r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, + msecs_to_jiffies(NCI_DATA_TIMEOUT)); + + if (r == NCI_STATUS_OK) + *skb = conn_info->rx_skb; + + return r; +} +EXPORT_SYMBOL(nci_hci_get_param); + +int nci_hci_connect_gate(struct nci_dev *ndev, + u8 dest_host, u8 dest_gate, u8 pipe) +{ + int r; + + if (pipe == NCI_HCI_DO_NOT_OPEN_PIPE) + return 0; + + if (ndev->hci_dev->gate2pipe[dest_gate] != NCI_HCI_INVALID_PIPE) + return -EADDRINUSE; + + if (pipe != NCI_HCI_INVALID_PIPE) + goto open_pipe; + + switch (dest_gate) { + case NCI_HCI_LINK_MGMT_GATE: + pipe = NCI_HCI_LINK_MGMT_PIPE; + break; + case NCI_HCI_ADMIN_GATE: + pipe = NCI_HCI_ADMIN_PIPE; + break; + } + +open_pipe: + r = nci_hci_open_pipe(ndev, pipe); + if (r < 0) + return r; + + ndev->hci_dev->pipes[pipe].gate = dest_gate; + ndev->hci_dev->pipes[pipe].host = dest_host; + ndev->hci_dev->gate2pipe[dest_gate] = pipe; + + return 0; +} +EXPORT_SYMBOL(nci_hci_connect_gate); + +static int nci_hci_dev_connect_gates(struct nci_dev *ndev, + u8 gate_count, + struct nci_hci_gate *gates) +{ + int r; + + while (gate_count--) { + r = nci_hci_connect_gate(ndev, gates->dest_host, + gates->gate, gates->pipe); + if (r < 0) + return r; + gates++; + } + + return 0; +} + +int nci_hci_dev_session_init(struct nci_dev *ndev) +{ + struct nci_conn_info *conn_info; + struct sk_buff *skb; + int r; + + ndev->hci_dev->count_pipes = 0; + ndev->hci_dev->expected_pipes = 0; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) + return -EPROTO; + + conn_info->data_exchange_cb = nci_hci_data_received_cb; + conn_info->data_exchange_cb_context = ndev; + + nci_hci_reset_pipes(ndev->hci_dev); + + if (ndev->hci_dev->init_data.gates[0].gate != NCI_HCI_ADMIN_GATE) + return -EPROTO; + + r = nci_hci_connect_gate(ndev, + ndev->hci_dev->init_data.gates[0].dest_host, + ndev->hci_dev->init_data.gates[0].gate, + ndev->hci_dev->init_data.gates[0].pipe); + if (r < 0) + goto exit; + + r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE, + NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, &skb); + if (r < 0) + goto exit; + + if (skb->len && + skb->len == strlen(ndev->hci_dev->init_data.session_id) && + memcmp(ndev->hci_dev->init_data.session_id, + skb->data, skb->len) == 0 && + ndev->ops->hci_load_session) { + /* Restore gate<->pipe table from some proprietary location. */ + r = ndev->ops->hci_load_session(ndev); + if (r < 0) + goto exit; + } else { + r = nci_hci_dev_connect_gates(ndev, + ndev->hci_dev->init_data.gate_count, + ndev->hci_dev->init_data.gates); + if (r < 0) + goto exit; + + r = nci_hci_set_param(ndev, NCI_HCI_ADMIN_GATE, + NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, + ndev->hci_dev->init_data.session_id, + strlen(ndev->hci_dev->init_data.session_id)); + } + if (r == 0) + goto exit; + +exit: + kfree_skb(skb); + + return r; +} +EXPORT_SYMBOL(nci_hci_dev_session_init); + +struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev) +{ + struct nci_hci_dev *hdev; + + hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); + if (!hdev) + return NULL; + + skb_queue_head_init(&hdev->rx_hcp_frags); + INIT_WORK(&hdev->msg_rx_work, nci_hci_msg_rx_work); + skb_queue_head_init(&hdev->msg_rx_queue); + hdev->ndev = ndev; + + return hdev; +} diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 22e453cb787d..3218071072ac 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -43,6 +43,7 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { struct nci_core_conn_credit_ntf *ntf = (void *) skb->data; + struct nci_conn_info *conn_info; int i; pr_debug("num_entries %d\n", ntf->num_entries); @@ -59,11 +60,13 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev, i, ntf->conn_entries[i].conn_id, ntf->conn_entries[i].credits); - if (ntf->conn_entries[i].conn_id == NCI_STATIC_RF_CONN_ID) { - /* found static rf connection */ - atomic_add(ntf->conn_entries[i].credits, - &ndev->credits_cnt); - } + conn_info = nci_get_conn_info_by_conn_id(ndev, + ntf->conn_entries[i].conn_id); + if (!conn_info) + return; + + atomic_add(ntf->conn_entries[i].credits, + &conn_info->credits_cnt); } /* trigger the next tx */ @@ -96,7 +99,7 @@ static void nci_core_conn_intf_error_ntf_packet(struct nci_dev *ndev, /* complete the data exchange transaction, if exists */ if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) - nci_data_exchange_complete(ndev, NULL, -EIO); + nci_data_exchange_complete(ndev, NULL, ntf->conn_id, -EIO); } static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev, @@ -513,6 +516,7 @@ static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev, static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { + struct nci_conn_info *conn_info; struct nci_rf_intf_activated_ntf ntf; __u8 *data = skb->data; int err = NCI_STATUS_OK; @@ -537,6 +541,13 @@ static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, pr_debug("rf_tech_specific_params_len %d\n", ntf.rf_tech_specific_params_len); + /* If this contains a value of 0x00 (NFCEE Direct RF + * Interface) then all following parameters SHALL contain a + * value of 0 and SHALL be ignored. + */ + if (ntf.rf_interface == NCI_RF_INTERFACE_NFCEE_DIRECT) + goto listen; + if (ntf.rf_tech_specific_params_len > 0) { switch (ntf.activation_rf_tech_and_mode) { case NCI_NFC_A_PASSIVE_POLL_MODE: @@ -614,11 +625,16 @@ static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, exit: if (err == NCI_STATUS_OK) { - ndev->max_data_pkt_payload_size = ntf.max_data_pkt_payload_size; - ndev->initial_num_credits = ntf.initial_num_credits; + conn_info = ndev->rf_conn_info; + if (!conn_info) + return; + + conn_info->max_pkt_payload_len = ntf.max_data_pkt_payload_size; + conn_info->initial_num_credits = ntf.initial_num_credits; /* set the available credits to initial value */ - atomic_set(&ndev->credits_cnt, ndev->initial_num_credits); + atomic_set(&conn_info->credits_cnt, + conn_info->initial_num_credits); /* store general bytes to be reported later in dep_link_up */ if (ntf.rf_interface == NCI_RF_INTERFACE_NFC_DEP) { @@ -643,6 +659,7 @@ exit: nci_req_complete(ndev, err); } } else { +listen: /* Listen mode */ atomic_set(&ndev->state, NCI_LISTEN_ACTIVE); if (err == NCI_STATUS_OK && @@ -661,10 +678,15 @@ exit: static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { + struct nci_conn_info *conn_info; struct nci_rf_deactivate_ntf *ntf = (void *) skb->data; pr_debug("entry, type 0x%x, reason 0x%x\n", ntf->type, ntf->reason); + conn_info = ndev->rf_conn_info; + if (!conn_info) + return; + /* drop tx data queue */ skb_queue_purge(&ndev->tx_q); @@ -676,7 +698,8 @@ static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev, /* complete the data exchange transaction, if exists */ if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) - nci_data_exchange_complete(ndev, NULL, -EIO); + nci_data_exchange_complete(ndev, NULL, NCI_STATIC_RF_CONN_ID, + -EIO); switch (ntf->type) { case NCI_DEACTIVATE_TYPE_IDLE_MODE: @@ -696,6 +719,32 @@ static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev, nci_req_complete(ndev, NCI_STATUS_OK); } +static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + u8 status = NCI_STATUS_OK; + struct nci_nfcee_discover_ntf *nfcee_ntf = + (struct nci_nfcee_discover_ntf *)skb->data; + + pr_debug("\n"); + + /* NFCForum NCI 9.2.1 HCI Network Specific Handling + * If the NFCC supports the HCI Network, it SHALL return one, + * and only one, NFCEE_DISCOVER_NTF with a Protocol type of + * “HCI Access”, even if the HCI Network contains multiple NFCEEs. + */ + ndev->hci_dev->nfcee_id = nfcee_ntf->nfcee_id; + ndev->cur_id = nfcee_ntf->nfcee_id; + + nci_req_complete(ndev, status); +} + +static void nci_nfcee_action_ntf_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + pr_debug("\n"); +} + void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { __u16 ntf_opcode = nci_opcode(skb->data); @@ -734,6 +783,14 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) nci_rf_deactivate_ntf_packet(ndev, skb); break; + case NCI_OP_NFCEE_DISCOVER_NTF: + nci_nfcee_discover_ntf_packet(ndev, skb); + break; + + case NCI_OP_RF_NFCEE_ACTION_NTF: + nci_nfcee_action_ntf_packet(ndev, skb); + break; + default: pr_err("unknown ntf opcode 0x%x\n", ntf_opcode); break; diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index 041de51ccdbe..02486bc2ceea 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -140,13 +140,31 @@ static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev, static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) { + struct nci_conn_info *conn_info; __u8 status = skb->data[0]; pr_debug("status 0x%x\n", status); - if (status == NCI_STATUS_OK) + if (status == NCI_STATUS_OK) { atomic_set(&ndev->state, NCI_DISCOVERY); + conn_info = ndev->rf_conn_info; + if (!conn_info) { + conn_info = devm_kzalloc(&ndev->nfc_dev->dev, + sizeof(struct nci_conn_info), + GFP_KERNEL); + if (!conn_info) { + status = NCI_STATUS_REJECTED; + goto exit; + } + conn_info->conn_id = NCI_STATIC_RF_CONN_ID; + INIT_LIST_HEAD(&conn_info->list); + list_add(&conn_info->list, &ndev->conn_info_list); + ndev->rf_conn_info = conn_info; + } + } + +exit: nci_req_complete(ndev, status); } @@ -178,6 +196,90 @@ static void nci_rf_deactivate_rsp_packet(struct nci_dev *ndev, } } +static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + struct nci_nfcee_discover_rsp *discover_rsp; + + if (skb->len != 2) { + nci_req_complete(ndev, NCI_STATUS_NFCEE_PROTOCOL_ERROR); + return; + } + + discover_rsp = (struct nci_nfcee_discover_rsp *)skb->data; + + if (discover_rsp->status != NCI_STATUS_OK || + discover_rsp->num_nfcee == 0) + nci_req_complete(ndev, discover_rsp->status); +} + +static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + __u8 status = skb->data[0]; + + pr_debug("status 0x%x\n", status); + nci_req_complete(ndev, status); +} + +static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + __u8 status = skb->data[0]; + struct nci_conn_info *conn_info; + struct nci_core_conn_create_rsp *rsp; + + pr_debug("status 0x%x\n", status); + + if (status == NCI_STATUS_OK) { + rsp = (struct nci_core_conn_create_rsp *)skb->data; + + conn_info = devm_kzalloc(&ndev->nfc_dev->dev, + sizeof(*conn_info), GFP_KERNEL); + if (!conn_info) { + status = NCI_STATUS_REJECTED; + goto exit; + } + + conn_info->id = ndev->cur_id; + conn_info->conn_id = rsp->conn_id; + + /* Note: data_exchange_cb and data_exchange_cb_context need to + * be specify out of nci_core_conn_create_rsp_packet + */ + + INIT_LIST_HEAD(&conn_info->list); + list_add(&conn_info->list, &ndev->conn_info_list); + + if (ndev->cur_id == ndev->hci_dev->nfcee_id) + ndev->hci_dev->conn_info = conn_info; + + conn_info->conn_id = rsp->conn_id; + conn_info->max_pkt_payload_len = rsp->max_ctrl_pkt_payload_len; + atomic_set(&conn_info->credits_cnt, rsp->credits_cnt); + } + +exit: + nci_req_complete(ndev, status); +} + +static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + struct nci_conn_info *conn_info; + __u8 status = skb->data[0]; + + pr_debug("status 0x%x\n", status); + if (status == NCI_STATUS_OK) { + conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_id); + if (conn_info) { + list_del(&conn_info->list); + devm_kfree(&ndev->nfc_dev->dev, conn_info); + } + } + nci_req_complete(ndev, status); +} + void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) { __u16 rsp_opcode = nci_opcode(skb->data); @@ -207,6 +309,14 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) nci_core_set_config_rsp_packet(ndev, skb); break; + case NCI_OP_CORE_CONN_CREATE_RSP: + nci_core_conn_create_rsp_packet(ndev, skb); + break; + + case NCI_OP_CORE_CONN_CLOSE_RSP: + nci_core_conn_close_rsp_packet(ndev, skb); + break; + case NCI_OP_RF_DISCOVER_MAP_RSP: nci_rf_disc_map_rsp_packet(ndev, skb); break; @@ -223,6 +333,14 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) nci_rf_deactivate_rsp_packet(ndev, skb); break; + case NCI_OP_NFCEE_DISCOVER_RSP: + nci_nfcee_discover_rsp_packet(ndev, skb); + break; + + case NCI_OP_NFCEE_MODE_SET_RSP: + nci_nfcee_mode_set_rsp_packet(ndev, skb); + break; + default: pr_err("unknown rsp opcode 0x%x\n", rsp_opcode); break; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 44989fc8cddf..14a2d11581da 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -102,7 +102,8 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, goto nla_put_failure; } - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -496,6 +497,53 @@ free_msg: return -EMSGSIZE; } +int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx, + struct nfc_evt_transaction *evt_transaction) +{ + struct nfc_se *se; + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_SE_TRANSACTION); + if (!hdr) + goto free_msg; + + se = nfc_find_se(dev, se_idx); + if (!se) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || + nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) || + nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type) || + nla_put(msg, NFC_ATTR_SE_AID, evt_transaction->aid_len, + evt_transaction->aid) || + nla_put(msg, NFC_ATTR_SE_PARAMS, evt_transaction->params_len, + evt_transaction->params)) + goto nla_put_failure; + + /* evt_transaction is no more used */ + devm_kfree(&dev->dev, evt_transaction); + + genlmsg_end(msg, hdr); + + genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + /* evt_transaction is no more used */ + devm_kfree(&dev->dev, evt_transaction); + nlmsg_free(msg); + return -EMSGSIZE; +} + static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, u32 portid, u32 seq, struct netlink_callback *cb, @@ -518,7 +566,8 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -908,7 +957,8 @@ static int nfc_genl_send_params(struct sk_buff *msg, nla_put_u16(msg, NFC_ATTR_LLC_PARAM_MIUX, be16_to_cpu(local->miux))) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: @@ -1247,8 +1297,7 @@ static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev, nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type)) goto nla_put_failure; - if (genlmsg_end(msg, hdr) < 0) - goto nla_put_failure; + genlmsg_end(msg, hdr); } return 0; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 88d60064890e..a8ce80b47720 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -100,6 +100,8 @@ int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list); int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type); int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx); +int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx, + struct nfc_evt_transaction *evt_transaction); struct nfc_dev *nfc_get_device(unsigned int idx); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 770064c83711..b491c1c296fe 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -185,10 +185,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, return 0; } -static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key, - const __be32 *mpls_lse) +/* 'KEY' must not have any bits set outside of the 'MASK' */ +#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK))) +#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK)) + +static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, + const __be32 *mpls_lse, const __be32 *mask) { __be32 *stack; + __be32 lse; int err; err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); @@ -196,14 +201,16 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key, return err; stack = (__be32 *)skb_mpls_header(skb); + lse = MASKED(*stack, *mpls_lse, *mask); if (skb->ip_summed == CHECKSUM_COMPLETE) { - __be32 diff[] = { ~(*stack), *mpls_lse }; + __be32 diff[] = { ~(*stack), lse }; + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); } - *stack = *mpls_lse; - key->mpls.top_lse = *mpls_lse; + *stack = lse; + flow_key->mpls.top_lse = lse; return 0; } @@ -212,7 +219,7 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key) int err; err = skb_vlan_pop(skb); - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) invalidate_flow_key(key); else key->eth.tci = 0; @@ -222,7 +229,7 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key) static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_vlan *vlan) { - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) invalidate_flow_key(key); else key->eth.tci = vlan->vlan_tci; @@ -230,23 +237,39 @@ static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); } -static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_ethernet *eth_key) +/* 'src' is already properly masked. */ +static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_) +{ + u16 *dst = (u16 *)dst_; + const u16 *src = (const u16 *)src_; + const u16 *mask = (const u16 *)mask_; + + SET_MASKED(dst[0], src[0], mask[0]); + SET_MASKED(dst[1], src[1], mask[1]); + SET_MASKED(dst[2], src[2], mask[2]); +} + +static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_ethernet *key, + const struct ovs_key_ethernet *mask) { int err; + err = skb_ensure_writable(skb, ETH_HLEN); if (unlikely(err)) return err; skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); - ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src); - ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst); + ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src, + mask->eth_src); + ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst, + mask->eth_dst); ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); - ether_addr_copy(key->eth.src, eth_key->eth_src); - ether_addr_copy(key->eth.dst, eth_key->eth_dst); + ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source); + ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest); return 0; } @@ -304,6 +327,15 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, } } +static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4], + const __be32 mask[4], __be32 masked[4]) +{ + masked[0] = MASKED(old[0], addr[0], mask[0]); + masked[1] = MASKED(old[1], addr[1], mask[1]); + masked[2] = MASKED(old[2], addr[2], mask[2]); + masked[3] = MASKED(old[3], addr[3], mask[3]); +} + static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, __be32 addr[4], const __be32 new_addr[4], bool recalculate_csum) @@ -315,29 +347,29 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, memcpy(addr, new_addr, sizeof(__be32[4])); } -static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc) +static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask) { - nh->priority = tc >> 4; - nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4); + /* Bits 21-24 are always unmasked, so this retains their values. */ + SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16)); + SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8)); + SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask); } -static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl) +static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl, + u8 mask) { - nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16; - nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8; - nh->flow_lbl[2] = fl & 0x000000FF; -} + new_ttl = MASKED(nh->ttl, new_ttl, mask); -static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl) -{ csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8)); nh->ttl = new_ttl; } -static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_ipv4 *ipv4_key) +static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_ipv4 *key, + const struct ovs_key_ipv4 *mask) { struct iphdr *nh; + __be32 new_addr; int err; err = skb_ensure_writable(skb, skb_network_offset(skb) + @@ -347,36 +379,49 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key, nh = ip_hdr(skb); - if (ipv4_key->ipv4_src != nh->saddr) { - set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src); - key->ipv4.addr.src = ipv4_key->ipv4_src; - } + /* Setting an IP addresses is typically only a side effect of + * matching on them in the current userspace implementation, so it + * makes sense to check if the value actually changed. + */ + if (mask->ipv4_src) { + new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src); - if (ipv4_key->ipv4_dst != nh->daddr) { - set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst); - key->ipv4.addr.dst = ipv4_key->ipv4_dst; + if (unlikely(new_addr != nh->saddr)) { + set_ip_addr(skb, nh, &nh->saddr, new_addr); + flow_key->ipv4.addr.src = new_addr; + } } + if (mask->ipv4_dst) { + new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst); - if (ipv4_key->ipv4_tos != nh->tos) { - ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos); - key->ip.tos = nh->tos; + if (unlikely(new_addr != nh->daddr)) { + set_ip_addr(skb, nh, &nh->daddr, new_addr); + flow_key->ipv4.addr.dst = new_addr; + } } - - if (ipv4_key->ipv4_ttl != nh->ttl) { - set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl); - key->ip.ttl = ipv4_key->ipv4_ttl; + if (mask->ipv4_tos) { + ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos); + flow_key->ip.tos = nh->tos; + } + if (mask->ipv4_ttl) { + set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl); + flow_key->ip.ttl = nh->ttl; } return 0; } -static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_ipv6 *ipv6_key) +static bool is_ipv6_mask_nonzero(const __be32 addr[4]) +{ + return !!(addr[0] | addr[1] | addr[2] | addr[3]); +} + +static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_ipv6 *key, + const struct ovs_key_ipv6 *mask) { struct ipv6hdr *nh; int err; - __be32 *saddr; - __be32 *daddr; err = skb_ensure_writable(skb, skb_network_offset(skb) + sizeof(struct ipv6hdr)); @@ -384,71 +429,77 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key, return err; nh = ipv6_hdr(skb); - saddr = (__be32 *)&nh->saddr; - daddr = (__be32 *)&nh->daddr; - - if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) { - set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr, - ipv6_key->ipv6_src, true); - memcpy(&key->ipv6.addr.src, ipv6_key->ipv6_src, - sizeof(ipv6_key->ipv6_src)); - } - if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) { + /* Setting an IP addresses is typically only a side effect of + * matching on them in the current userspace implementation, so it + * makes sense to check if the value actually changed. + */ + if (is_ipv6_mask_nonzero(mask->ipv6_src)) { + __be32 *saddr = (__be32 *)&nh->saddr; + __be32 masked[4]; + + mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked); + + if (unlikely(memcmp(saddr, masked, sizeof(masked)))) { + set_ipv6_addr(skb, key->ipv6_proto, saddr, masked, + true); + memcpy(&flow_key->ipv6.addr.src, masked, + sizeof(flow_key->ipv6.addr.src)); + } + } + if (is_ipv6_mask_nonzero(mask->ipv6_dst)) { unsigned int offset = 0; int flags = IP6_FH_F_SKIP_RH; bool recalc_csum = true; - - if (ipv6_ext_hdr(nh->nexthdr)) - recalc_csum = ipv6_find_hdr(skb, &offset, - NEXTHDR_ROUTING, NULL, - &flags) != NEXTHDR_ROUTING; - - set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr, - ipv6_key->ipv6_dst, recalc_csum); - memcpy(&key->ipv6.addr.dst, ipv6_key->ipv6_dst, - sizeof(ipv6_key->ipv6_dst)); + __be32 *daddr = (__be32 *)&nh->daddr; + __be32 masked[4]; + + mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked); + + if (unlikely(memcmp(daddr, masked, sizeof(masked)))) { + if (ipv6_ext_hdr(nh->nexthdr)) + recalc_csum = (ipv6_find_hdr(skb, &offset, + NEXTHDR_ROUTING, + NULL, &flags) + != NEXTHDR_ROUTING); + + set_ipv6_addr(skb, key->ipv6_proto, daddr, masked, + recalc_csum); + memcpy(&flow_key->ipv6.addr.dst, masked, + sizeof(flow_key->ipv6.addr.dst)); + } + } + if (mask->ipv6_tclass) { + ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass); + flow_key->ip.tos = ipv6_get_dsfield(nh); + } + if (mask->ipv6_label) { + set_ipv6_fl(nh, ntohl(key->ipv6_label), + ntohl(mask->ipv6_label)); + flow_key->ipv6.label = + *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); + } + if (mask->ipv6_hlimit) { + SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit); + flow_key->ip.ttl = nh->hop_limit; } - - set_ipv6_tc(nh, ipv6_key->ipv6_tclass); - key->ip.tos = ipv6_get_dsfield(nh); - - set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label)); - key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); - - nh->hop_limit = ipv6_key->ipv6_hlimit; - key->ip.ttl = ipv6_key->ipv6_hlimit; return 0; } /* Must follow skb_ensure_writable() since that can move the skb data. */ static void set_tp_port(struct sk_buff *skb, __be16 *port, - __be16 new_port, __sum16 *check) + __be16 new_port, __sum16 *check) { inet_proto_csum_replace2(check, skb, *port, new_port, 0); *port = new_port; - skb_clear_hash(skb); -} - -static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port) -{ - struct udphdr *uh = udp_hdr(skb); - - if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) { - set_tp_port(skb, port, new_port, &uh->check); - - if (!uh->check) - uh->check = CSUM_MANGLED_0; - } else { - *port = new_port; - skb_clear_hash(skb); - } } -static int set_udp(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_udp *udp_port_key) +static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_udp *key, + const struct ovs_key_udp *mask) { struct udphdr *uh; + __be16 src, dst; int err; err = skb_ensure_writable(skb, skb_transport_offset(skb) + @@ -457,23 +508,40 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *key, return err; uh = udp_hdr(skb); - if (udp_port_key->udp_src != uh->source) { - set_udp_port(skb, &uh->source, udp_port_key->udp_src); - key->tp.src = udp_port_key->udp_src; - } + /* Either of the masks is non-zero, so do not bother checking them. */ + src = MASKED(uh->source, key->udp_src, mask->udp_src); + dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst); - if (udp_port_key->udp_dst != uh->dest) { - set_udp_port(skb, &uh->dest, udp_port_key->udp_dst); - key->tp.dst = udp_port_key->udp_dst; + if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) { + if (likely(src != uh->source)) { + set_tp_port(skb, &uh->source, src, &uh->check); + flow_key->tp.src = src; + } + if (likely(dst != uh->dest)) { + set_tp_port(skb, &uh->dest, dst, &uh->check); + flow_key->tp.dst = dst; + } + + if (unlikely(!uh->check)) + uh->check = CSUM_MANGLED_0; + } else { + uh->source = src; + uh->dest = dst; + flow_key->tp.src = src; + flow_key->tp.dst = dst; } + skb_clear_hash(skb); + return 0; } -static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_tcp *tcp_port_key) +static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_tcp *key, + const struct ovs_key_tcp *mask) { struct tcphdr *th; + __be16 src, dst; int err; err = skb_ensure_writable(skb, skb_transport_offset(skb) + @@ -482,50 +550,49 @@ static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key, return err; th = tcp_hdr(skb); - if (tcp_port_key->tcp_src != th->source) { - set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check); - key->tp.src = tcp_port_key->tcp_src; + src = MASKED(th->source, key->tcp_src, mask->tcp_src); + if (likely(src != th->source)) { + set_tp_port(skb, &th->source, src, &th->check); + flow_key->tp.src = src; } - - if (tcp_port_key->tcp_dst != th->dest) { - set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check); - key->tp.dst = tcp_port_key->tcp_dst; + dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst); + if (likely(dst != th->dest)) { + set_tp_port(skb, &th->dest, dst, &th->check); + flow_key->tp.dst = dst; } + skb_clear_hash(skb); return 0; } -static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_sctp *sctp_port_key) +static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_sctp *key, + const struct ovs_key_sctp *mask) { + unsigned int sctphoff = skb_transport_offset(skb); struct sctphdr *sh; + __le32 old_correct_csum, new_csum, old_csum; int err; - unsigned int sctphoff = skb_transport_offset(skb); err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr)); if (unlikely(err)) return err; sh = sctp_hdr(skb); - if (sctp_port_key->sctp_src != sh->source || - sctp_port_key->sctp_dst != sh->dest) { - __le32 old_correct_csum, new_csum, old_csum; + old_csum = sh->checksum; + old_correct_csum = sctp_compute_cksum(skb, sctphoff); - old_csum = sh->checksum; - old_correct_csum = sctp_compute_cksum(skb, sctphoff); + sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src); + sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst); - sh->source = sctp_port_key->sctp_src; - sh->dest = sctp_port_key->sctp_dst; + new_csum = sctp_compute_cksum(skb, sctphoff); - new_csum = sctp_compute_cksum(skb, sctphoff); + /* Carry any checksum errors through. */ + sh->checksum = old_csum ^ old_correct_csum ^ new_csum; - /* Carry any checksum errors through. */ - sh->checksum = old_csum ^ old_correct_csum ^ new_csum; - - skb_clear_hash(skb); - key->tp.src = sctp_port_key->sctp_src; - key->tp.dst = sctp_port_key->sctp_dst; - } + skb_clear_hash(skb); + flow_key->tp.src = sh->source; + flow_key->tp.dst = sh->dest; return 0; } @@ -653,52 +720,77 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key, key->ovs_flow_hash = hash; } -static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key, - const struct nlattr *nested_attr) +static int execute_set_action(struct sk_buff *skb, + struct sw_flow_key *flow_key, + const struct nlattr *a) +{ + /* Only tunnel set execution is supported without a mask. */ + if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) { + OVS_CB(skb)->egress_tun_info = nla_data(a); + return 0; + } + + return -EINVAL; +} + +/* Mask is at the midpoint of the data. */ +#define get_mask(a, type) ((const type)nla_data(a) + 1) + +static int execute_masked_set_action(struct sk_buff *skb, + struct sw_flow_key *flow_key, + const struct nlattr *a) { int err = 0; - switch (nla_type(nested_attr)) { + switch (nla_type(a)) { case OVS_KEY_ATTR_PRIORITY: - skb->priority = nla_get_u32(nested_attr); - key->phy.priority = skb->priority; + SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *)); + flow_key->phy.priority = skb->priority; break; case OVS_KEY_ATTR_SKB_MARK: - skb->mark = nla_get_u32(nested_attr); - key->phy.skb_mark = skb->mark; + SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *)); + flow_key->phy.skb_mark = skb->mark; break; case OVS_KEY_ATTR_TUNNEL_INFO: - OVS_CB(skb)->egress_tun_info = nla_data(nested_attr); + /* Masked data not supported for tunnel. */ + err = -EINVAL; break; case OVS_KEY_ATTR_ETHERNET: - err = set_eth_addr(skb, key, nla_data(nested_attr)); + err = set_eth_addr(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_ethernet *)); break; case OVS_KEY_ATTR_IPV4: - err = set_ipv4(skb, key, nla_data(nested_attr)); + err = set_ipv4(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_ipv4 *)); break; case OVS_KEY_ATTR_IPV6: - err = set_ipv6(skb, key, nla_data(nested_attr)); + err = set_ipv6(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_ipv6 *)); break; case OVS_KEY_ATTR_TCP: - err = set_tcp(skb, key, nla_data(nested_attr)); + err = set_tcp(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_tcp *)); break; case OVS_KEY_ATTR_UDP: - err = set_udp(skb, key, nla_data(nested_attr)); + err = set_udp(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_udp *)); break; case OVS_KEY_ATTR_SCTP: - err = set_sctp(skb, key, nla_data(nested_attr)); + err = set_sctp(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_sctp *)); break; case OVS_KEY_ATTR_MPLS: - err = set_mpls(skb, key, nla_data(nested_attr)); + err = set_mpls(skb, flow_key, nla_data(a), get_mask(a, + __be32 *)); break; } @@ -818,6 +910,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = execute_set_action(skb, key, nla_data(a)); break; + case OVS_ACTION_ATTR_SET_MASKED: + case OVS_ACTION_ATTR_SET_TO_MASKED: + err = execute_masked_set_action(skb, key, nla_data(a)); + break; + case OVS_ACTION_ATTR_SAMPLE: err = sample(dp, skb, key, a); break; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index b07349e82d78..ae5e77cdc0ca 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -65,6 +65,8 @@ static struct genl_family dp_packet_genl_family; static struct genl_family dp_flow_genl_family; static struct genl_family dp_datapath_genl_family; +static const struct nla_policy flow_policy[]; + static const struct genl_multicast_group ovs_dp_flow_multicast_group = { .name = OVS_FLOW_MCGROUP, }; @@ -419,7 +421,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, if (!dp_ifindex) return -ENODEV; - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return -ENOMEM; @@ -461,10 +463,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 0, upcall_info->cmd); upcall->dp_ifindex = dp_ifindex; - nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); - err = ovs_nla_put_flow(key, key, user_skb); + err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); BUG_ON(err); - nla_nest_end(user_skb, nla); if (upcall_info->userdata) __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, @@ -664,46 +664,48 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, } } -static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) +static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags) { - return NLMSG_ALIGN(sizeof(struct ovs_header)) - + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */ - + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */ - + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ - + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ - + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ - + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ + return ovs_identifier_is_ufid(sfid) && + !(ufid_flags & OVS_UFID_F_OMIT_KEY); } -/* Called with ovs_mutex or RCU read lock. */ -static int ovs_flow_cmd_fill_match(const struct sw_flow *flow, - struct sk_buff *skb) +static bool should_fill_mask(uint32_t ufid_flags) { - struct nlattr *nla; - int err; + return !(ufid_flags & OVS_UFID_F_OMIT_MASK); +} - /* Fill flow key. */ - nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); - if (!nla) - return -EMSGSIZE; +static bool should_fill_actions(uint32_t ufid_flags) +{ + return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS); +} - err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); - if (err) - return err; +static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, + const struct sw_flow_id *sfid, + uint32_t ufid_flags) +{ + size_t len = NLMSG_ALIGN(sizeof(struct ovs_header)); - nla_nest_end(skb, nla); + /* OVS_FLOW_ATTR_UFID */ + if (sfid && ovs_identifier_is_ufid(sfid)) + len += nla_total_size(sfid->ufid_len); - /* Fill flow mask. */ - nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK); - if (!nla) - return -EMSGSIZE; + /* OVS_FLOW_ATTR_KEY */ + if (!sfid || should_fill_key(sfid, ufid_flags)) + len += nla_total_size(ovs_key_attr_size()); - err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); - if (err) - return err; + /* OVS_FLOW_ATTR_MASK */ + if (should_fill_mask(ufid_flags)) + len += nla_total_size(ovs_key_attr_size()); - nla_nest_end(skb, nla); - return 0; + /* OVS_FLOW_ATTR_ACTIONS */ + if (should_fill_actions(ufid_flags)) + len += nla_total_size(acts->actions_len); + + return len + + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ + + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ + + nla_total_size(8); /* OVS_FLOW_ATTR_USED */ } /* Called with ovs_mutex or RCU read lock. */ @@ -774,7 +776,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow, /* Called with ovs_mutex or RCU read lock. */ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, struct sk_buff *skb, u32 portid, - u32 seq, u32 flags, u8 cmd) + u32 seq, u32 flags, u8 cmd, u32 ufid_flags) { const int skb_orig_len = skb->len; struct ovs_header *ovs_header; @@ -787,19 +789,34 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, ovs_header->dp_ifindex = dp_ifindex; - err = ovs_flow_cmd_fill_match(flow, skb); + err = ovs_nla_put_identifier(flow, skb); if (err) goto error; + if (should_fill_key(&flow->id, ufid_flags)) { + err = ovs_nla_put_masked_key(flow, skb); + if (err) + goto error; + } + + if (should_fill_mask(ufid_flags)) { + err = ovs_nla_put_mask(flow, skb); + if (err) + goto error; + } + err = ovs_flow_cmd_fill_stats(flow, skb); if (err) goto error; - err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); - if (err) - goto error; + if (should_fill_actions(ufid_flags)) { + err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); + if (err) + goto error; + } - return genlmsg_end(skb, ovs_header); + genlmsg_end(skb, ovs_header); + return 0; error: genlmsg_cancel(skb, ovs_header); @@ -808,15 +825,19 @@ error: /* May not be called with RCU read lock. */ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, + const struct sw_flow_id *sfid, struct genl_info *info, - bool always) + bool always, + uint32_t ufid_flags) { struct sk_buff *skb; + size_t len; if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0)) return NULL; - skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); + len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags); + skb = genlmsg_new_unicast(len, info, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); @@ -827,19 +848,19 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, int dp_ifindex, struct genl_info *info, u8 cmd, - bool always) + bool always, u32 ufid_flags) { struct sk_buff *skb; int retval; - skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, - always); + skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), + &flow->id, info, always, ufid_flags); if (IS_ERR_OR_NULL(skb)) return skb; retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, info->snd_portid, info->snd_seq, 0, - cmd); + cmd, ufid_flags); BUG_ON(retval < 0); return skb; } @@ -848,12 +869,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; - struct sw_flow *flow, *new_flow; + struct sw_flow *flow = NULL, *new_flow; struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; + struct sw_flow_key key; struct sw_flow_actions *acts; struct sw_flow_match match; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int error; bool log = !a[OVS_FLOW_ATTR_PROBE]; @@ -878,13 +901,19 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* Extract key. */ - ovs_match_init(&match, &new_flow->unmasked_key, &mask); + ovs_match_init(&match, &key, &mask); error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) goto err_kfree_flow; - ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); + ovs_flow_mask_key(&new_flow->key, &key, &mask); + + /* Extract flow identifier. */ + error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], + &key, log); + if (error) + goto err_kfree_flow; /* Validate actions. */ error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, @@ -894,7 +923,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_kfree_flow; } - reply = ovs_flow_cmd_alloc_info(acts, info, false); + reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false, + ufid_flags); if (IS_ERR(reply)) { error = PTR_ERR(reply); goto err_kfree_acts; @@ -906,8 +936,12 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) error = -ENODEV; goto err_unlock_ovs; } + /* Check if this is a duplicate flow */ - flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key); + if (ovs_identifier_is_ufid(&new_flow->id)) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); + if (!flow) + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (likely(!flow)) { rcu_assign_pointer(new_flow->sf_acts, acts); @@ -923,7 +957,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW); + OVS_FLOW_CMD_NEW, + ufid_flags); BUG_ON(error < 0); } ovs_unlock(); @@ -941,10 +976,15 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) error = -EEXIST; goto err_unlock_ovs; } - /* The unmasked key has to be the same for flow updates. */ - if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { - /* Look for any overlapping flow. */ - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + /* The flow identifier has to be the same for flow updates. + * Look for any overlapping flow. + */ + if (unlikely(!ovs_flow_cmp(flow, &match))) { + if (ovs_identifier_is_key(&flow->id)) + flow = ovs_flow_tbl_lookup_exact(&dp->table, + &match); + else /* UFID matches but key is different */ + flow = NULL; if (!flow) { error = -ENOENT; goto err_unlock_ovs; @@ -959,7 +999,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW); + OVS_FLOW_CMD_NEW, + ufid_flags); BUG_ON(error < 0); } ovs_unlock(); @@ -1015,8 +1056,11 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_actions *old_acts = NULL, *acts = NULL; struct sw_flow_match match; + struct sw_flow_id sfid; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int error; bool log = !a[OVS_FLOW_ATTR_PROBE]; + bool ufid_present; /* Extract key. */ error = -EINVAL; @@ -1025,6 +1069,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) goto error; } + ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); ovs_match_init(&match, &key, &mask); error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); @@ -1041,7 +1086,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) } /* Can allocate before locking if have acts. */ - reply = ovs_flow_cmd_alloc_info(acts, info, false); + reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false, + ufid_flags); if (IS_ERR(reply)) { error = PTR_ERR(reply); goto err_kfree_acts; @@ -1055,7 +1101,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; } /* Check that the flow exists. */ - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (ufid_present) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid); + else + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (unlikely(!flow)) { error = -ENOENT; goto err_unlock_ovs; @@ -1071,13 +1120,16 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW); + OVS_FLOW_CMD_NEW, + ufid_flags); BUG_ON(error < 0); } } else { /* Could not alloc without acts before locking. */ reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); + info, OVS_FLOW_CMD_NEW, false, + ufid_flags); + if (unlikely(IS_ERR(reply))) { error = PTR_ERR(reply); goto err_unlock_ovs; @@ -1114,17 +1166,22 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) struct sw_flow *flow; struct datapath *dp; struct sw_flow_match match; - int err; + struct sw_flow_id ufid; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); + int err = 0; bool log = !a[OVS_FLOW_ATTR_PROBE]; + bool ufid_present; - if (!a[OVS_FLOW_ATTR_KEY]) { + ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); + if (a[OVS_FLOW_ATTR_KEY]) { + ovs_match_init(&match, &key, NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, + log); + } else if (!ufid_present) { OVS_NLERR(log, "Flow get message rejected, Key attribute missing."); - return -EINVAL; + err = -EINVAL; } - - ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log); if (err) return err; @@ -1135,14 +1192,17 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (ufid_present) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); + else + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (!flow) { err = -ENOENT; goto unlock; } reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, - OVS_FLOW_CMD_NEW, true); + OVS_FLOW_CMD_NEW, true, ufid_flags); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -1161,13 +1221,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key; struct sk_buff *reply; - struct sw_flow *flow; + struct sw_flow *flow = NULL; struct datapath *dp; struct sw_flow_match match; + struct sw_flow_id ufid; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int err; bool log = !a[OVS_FLOW_ATTR_PROBE]; + bool ufid_present; - if (likely(a[OVS_FLOW_ATTR_KEY])) { + ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); + if (a[OVS_FLOW_ATTR_KEY]) { ovs_match_init(&match, &key, NULL); err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log); @@ -1182,12 +1246,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - if (unlikely(!a[OVS_FLOW_ATTR_KEY])) { + if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) { err = ovs_flow_tbl_flush(&dp->table); goto unlock; } - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (ufid_present) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); + else + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (unlikely(!flow)) { err = -ENOENT; goto unlock; @@ -1197,14 +1264,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_unlock(); reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, - info, false); + &flow->id, info, false, ufid_flags); if (likely(reply)) { if (likely(!IS_ERR(reply))) { rcu_read_lock(); /*To keep RCU checker happy. */ err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_DEL); + OVS_FLOW_CMD_DEL, + ufid_flags); rcu_read_unlock(); BUG_ON(err < 0); @@ -1223,9 +1291,18 @@ unlock: static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct nlattr *a[__OVS_FLOW_ATTR_MAX]; struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); struct table_instance *ti; struct datapath *dp; + u32 ufid_flags; + int err; + + err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a, + OVS_FLOW_ATTR_MAX, flow_policy); + if (err) + return err; + ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); rcu_read_lock(); dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); @@ -1248,7 +1325,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - OVS_FLOW_CMD_NEW) < 0) + OVS_FLOW_CMD_NEW, ufid_flags) < 0) break; cb->args[0] = bucket; @@ -1264,6 +1341,8 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG }, + [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 }, + [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 }, }; static const struct genl_ops dp_flow_genl_ops[] = { @@ -1349,7 +1428,8 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) goto nla_put_failure; - return genlmsg_end(skb, ovs_header); + genlmsg_end(skb, ovs_header); + return 0; nla_put_failure: genlmsg_cancel(skb, ovs_header); @@ -1723,7 +1803,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (err == -EMSGSIZE) goto error; - return genlmsg_end(skb, ovs_header); + genlmsg_end(skb, ovs_header); + return 0; nla_put_failure: err = -EMSGSIZE; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index da2fae0873a5..e2c348b8baca 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -70,7 +70,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, { struct flow_stats *stats; int node = numa_node_id(); - int len = skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); stats = rcu_dereference(flow->stats[node]); @@ -472,7 +472,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) */ key->eth.tci = 0; - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) key->eth.tci = htons(skb->vlan_tci); else if (eth->h_proto == htons(ETH_P_8021Q)) if (unlikely(parse_vlan(skb, key))) @@ -691,7 +691,7 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) * 8)) - 1 > sizeof(key->tun_opts)); - memcpy(GENEVE_OPTS(key, tun_info->options_len), + memcpy(TUN_METADATA_OPTS(key, tun_info->options_len), tun_info->options, tun_info->options_len); key->tun_opts_len = tun_info->options_len; } else { diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index a8b30f334388..a076e445ccc2 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -53,7 +53,7 @@ struct ovs_key_ipv4_tunnel { struct ovs_tunnel_info { struct ovs_key_ipv4_tunnel tunnel; - const struct geneve_opt *options; + const void *options; u8 options_len; }; @@ -61,10 +61,10 @@ struct ovs_tunnel_info { * maximum size. This allows us to get the benefits of variable length * matching for small options. */ -#define GENEVE_OPTS(flow_key, opt_len) \ - ((struct geneve_opt *)((flow_key)->tun_opts + \ - FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \ - opt_len)) +#define TUN_METADATA_OFFSET(opt_len) \ + (FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len) +#define TUN_METADATA_OPTS(flow_key, opt_len) \ + ((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len))) static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be32 saddr, __be32 daddr, @@ -73,7 +73,7 @@ static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be16 tp_dst, __be64 tun_id, __be16 tun_flags, - const struct geneve_opt *opts, + const void *opts, u8 opts_len) { tun_info->tunnel.tun_id = tun_id; @@ -105,7 +105,7 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be16 tp_dst, __be64 tun_id, __be16 tun_flags, - const struct geneve_opt *opts, + const void *opts, u8 opts_len) { __ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr, @@ -197,6 +197,16 @@ struct sw_flow_match { struct sw_flow_mask *mask; }; +#define MAX_UFID_LENGTH 16 /* 128 bits */ + +struct sw_flow_id { + u32 ufid_len; + union { + u32 ufid[MAX_UFID_LENGTH / 4]; + struct sw_flow_key *unmasked_key; + }; +}; + struct sw_flow_actions { struct rcu_head rcu; u32 actions_len; @@ -213,13 +223,15 @@ struct flow_stats { struct sw_flow { struct rcu_head rcu; - struct hlist_node hash_node[2]; - u32 hash; + struct { + struct hlist_node node[2]; + u32 hash; + } flow_table, ufid_table; int stats_last_writer; /* NUMA-node id of the last writer on * 'stats[0]'. */ struct sw_flow_key key; - struct sw_flow_key unmasked_key; + struct sw_flow_id id; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one @@ -243,6 +255,16 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; +static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid) +{ + return sfid->ufid_len; +} + +static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid) +{ + return !ovs_identifier_is_ufid(sfid); +} + void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags, const struct sk_buff *); void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index d1eecf707613..993281e6278d 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -49,6 +49,14 @@ #include <net/mpls.h> #include "flow_netlink.h" +#include "vport-vxlan.h" + +struct ovs_len_tbl { + int len; + const struct ovs_len_tbl *next; +}; + +#define OVS_ATTR_NESTED -1 static void update_range(struct sw_flow_match *match, size_t offset, size_t size, bool is_mask) @@ -261,6 +269,9 @@ size_t ovs_tun_key_attr_size(void) + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with + * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. + */ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ } @@ -289,29 +300,45 @@ size_t ovs_key_attr_size(void) + nla_total_size(28); /* OVS_KEY_ATTR_ND */ } +static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { + [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) }, + [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 }, + [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 }, + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, + [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, + [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED }, + [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED }, +}; + /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ -static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { - [OVS_KEY_ATTR_ENCAP] = -1, - [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), - [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), - [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), - [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), - [OVS_KEY_ATTR_VLAN] = sizeof(__be16), - [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), - [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), - [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), - [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), - [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16), - [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), - [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), - [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), - [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), - [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), - [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), - [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32), - [OVS_KEY_ATTR_DP_HASH] = sizeof(u32), - [OVS_KEY_ATTR_TUNNEL] = -1, - [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls), +static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { + [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, + [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) }, + [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) }, + [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) }, + [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) }, + [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) }, + [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) }, + [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) }, + [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) }, + [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) }, + [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) }, + [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) }, + [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) }, + [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) }, + [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, + .next = ovs_tunnel_key_lens, }, + [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, }; static bool is_all_zero(const u8 *fp, size_t size) @@ -352,8 +379,8 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, return -EINVAL; } - expected_len = ovs_key_lens[type]; - if (nla_len(nla) != expected_len && expected_len != -1) { + expected_len = ovs_key_lens[type].len; + if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) { OVS_NLERR(log, "Key %d has unexpected len %d expected %d", type, nla_len(nla), expected_len); return -EINVAL; @@ -432,13 +459,47 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a, SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); } - opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0, - nla_len(a)); + opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), nla_len(a), is_mask); return 0; } +static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = { + [OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 }, +}; + +static int vxlan_tun_opt_from_nlattr(const struct nlattr *a, + struct sw_flow_match *match, bool is_mask, + bool log) +{ + struct nlattr *tb[OVS_VXLAN_EXT_MAX+1]; + unsigned long opt_key_offset; + struct ovs_vxlan_opts opts; + int err; + + BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); + + err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy); + if (err < 0) + return err; + + memset(&opts, 0, sizeof(opts)); + + if (tb[OVS_VXLAN_EXT_GBP]) + opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]); + + if (!is_mask) + SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); + else + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); + + opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), + is_mask); + return 0; +} + static int ipv4_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) @@ -447,35 +508,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, int rem; bool ttl = false; __be16 tun_flags = 0; + int opts_type = 0; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); int err; - static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { - [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), - [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_TOS] = 1, - [OVS_TUNNEL_KEY_ATTR_TTL] = 1, - [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, - [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, - [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16), - [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16), - [OVS_TUNNEL_KEY_ATTR_OAM] = 0, - [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1, - }; - if (type > OVS_TUNNEL_KEY_ATTR_MAX) { OVS_NLERR(log, "Tunnel attr %d out of range max %d", type, OVS_TUNNEL_KEY_ATTR_MAX); return -EINVAL; } - if (ovs_tunnel_key_lens[type] != nla_len(a) && - ovs_tunnel_key_lens[type] != -1) { + if (ovs_tunnel_key_lens[type].len != nla_len(a) && + ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) { OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", - type, nla_len(a), ovs_tunnel_key_lens[type]); + type, nla_len(a), ovs_tunnel_key_lens[type].len); return -EINVAL; } @@ -520,11 +568,30 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, tun_flags |= TUNNEL_OAM; break; case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + err = genev_tun_opt_from_nlattr(a, match, is_mask, log); if (err) return err; - tun_flags |= TUNNEL_OPTIONS_PRESENT; + tun_flags |= TUNNEL_GENEVE_OPT; + opts_type = type; + break; + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + + err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log); + if (err) + return err; + + tun_flags |= TUNNEL_VXLAN_OPT; + opts_type = type; break; default: OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", @@ -553,13 +620,29 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, } } + return opts_type; +} + +static int vxlan_opt_to_nlattr(struct sk_buff *skb, + const void *tun_opts, int swkey_tun_opts_len) +{ + const struct ovs_vxlan_opts *opts = tun_opts; + struct nlattr *nla; + + nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); + if (!nla) + return -EMSGSIZE; + + if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0) + return -EMSGSIZE; + + nla_nest_end(skb, nla); return 0; } static int __ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *output, - const struct geneve_opt *tun_opts, - int swkey_tun_opts_len) + const void *tun_opts, int swkey_tun_opts_len) { if (output->tun_flags & TUNNEL_KEY && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) @@ -590,18 +673,22 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; - if (tun_opts && - nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, - swkey_tun_opts_len, tun_opts)) - return -EMSGSIZE; + if (tun_opts) { + if (output->tun_flags & TUNNEL_GENEVE_OPT && + nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, + swkey_tun_opts_len, tun_opts)) + return -EMSGSIZE; + else if (output->tun_flags & TUNNEL_VXLAN_OPT && + vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) + return -EMSGSIZE; + } return 0; } static int ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *output, - const struct geneve_opt *tun_opts, - int swkey_tun_opts_len) + const void *tun_opts, int swkey_tun_opts_len) { struct nlattr *nla; int err; @@ -675,7 +762,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, } if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, - is_mask, log)) + is_mask, log) < 0) return -EINVAL; *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); } @@ -915,18 +1002,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, return 0; } -static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) +static void nlattr_set(struct nlattr *attr, u8 val, + const struct ovs_len_tbl *tbl) { struct nlattr *nla; int rem; /* The nlattr stream should already have been validated */ nla_for_each_nested(nla, attr, rem) { - /* We assume that ovs_key_lens[type] == -1 means that type is a - * nested attribute - */ - if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1) - nlattr_set(nla, val, false); + if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED) + nlattr_set(nla, val, tbl[nla_type(nla)].next); else memset(nla_data(nla), val, nla_len(nla)); } @@ -934,7 +1019,7 @@ static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) static void mask_set_nlattr(struct nlattr *attr, u8 val) { - nlattr_set(attr, val, true); + nlattr_set(attr, val, ovs_key_lens); } /** @@ -1095,6 +1180,59 @@ free_newmask: return err; } +static size_t get_ufid_len(const struct nlattr *attr, bool log) +{ + size_t len; + + if (!attr) + return 0; + + len = nla_len(attr); + if (len < 1 || len > MAX_UFID_LENGTH) { + OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)", + nla_len(attr), MAX_UFID_LENGTH); + return 0; + } + + return len; +} + +/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID, + * or false otherwise. + */ +bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr, + bool log) +{ + sfid->ufid_len = get_ufid_len(attr, log); + if (sfid->ufid_len) + memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len); + + return sfid->ufid_len; +} + +int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, + const struct sw_flow_key *key, bool log) +{ + struct sw_flow_key *new_key; + + if (ovs_nla_get_ufid(sfid, ufid, log)) + return 0; + + /* If UFID was not provided, use unmasked key. */ + new_key = kmalloc(sizeof(*new_key), GFP_KERNEL); + if (!new_key) + return -ENOMEM; + memcpy(new_key, key, sizeof(*key)); + sfid->unmasked_key = new_key; + + return 0; +} + +u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) +{ + return attr ? nla_get_u32(attr) : 0; +} + /** * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. * @key: Receives extracted in_port, priority, tun_key and skb_mark. @@ -1131,12 +1269,12 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr, return metadata_from_nlattrs(&match, &attrs, a, false, log); } -int ovs_nla_put_flow(const struct sw_flow_key *swkey, - const struct sw_flow_key *output, struct sk_buff *skb) +static int __ovs_nla_put_key(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, bool is_mask, + struct sk_buff *skb) { struct ovs_key_ethernet *eth_key; struct nlattr *nla, *encap; - bool is_mask = (swkey != output); if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) goto nla_put_failure; @@ -1148,10 +1286,10 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, goto nla_put_failure; if ((swkey->tun_key.ipv4_dst || is_mask)) { - const struct geneve_opt *opts = NULL; + const void *opts = NULL; if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) - opts = GENEVE_OPTS(output, swkey->tun_opts_len); + opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, swkey->tun_opts_len)) @@ -1346,6 +1484,49 @@ nla_put_failure: return -EMSGSIZE; } +int ovs_nla_put_key(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, int attr, bool is_mask, + struct sk_buff *skb) +{ + int err; + struct nlattr *nla; + + nla = nla_nest_start(skb, attr); + if (!nla) + return -EMSGSIZE; + err = __ovs_nla_put_key(swkey, output, is_mask, skb); + if (err) + return err; + nla_nest_end(skb, nla); + + return 0; +} + +/* Called with ovs_mutex or RCU read lock. */ +int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb) +{ + if (ovs_identifier_is_ufid(&flow->id)) + return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len, + flow->id.ufid); + + return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key, + OVS_FLOW_ATTR_KEY, false, skb); +} + +/* Called with ovs_mutex or RCU read lock. */ +int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb) +{ + return ovs_nla_put_key(&flow->mask->key, &flow->key, + OVS_FLOW_ATTR_KEY, false, skb); +} + +/* Called with ovs_mutex or RCU read lock. */ +int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) +{ + return ovs_nla_put_key(&flow->key, &flow->mask->key, + OVS_FLOW_ATTR_MASK, true, skb); +} + #define MAX_ACTIONS_BUFSIZE (32 * 1024) static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) @@ -1514,16 +1695,6 @@ static int validate_and_copy_sample(const struct nlattr *attr, return 0; } -static int validate_tp_port(const struct sw_flow_key *flow_key, - __be16 eth_type) -{ - if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) && - (flow_key->tp.src || flow_key->tp.dst)) - return 0; - - return -EINVAL; -} - void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, struct sw_flow_mask *mask) @@ -1540,6 +1711,34 @@ void ovs_match_init(struct sw_flow_match *match, } } +static int validate_geneve_opts(struct sw_flow_key *key) +{ + struct geneve_opt *option; + int opts_len = key->tun_opts_len; + bool crit_opt = false; + + option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len); + while (opts_len > 0) { + int len; + + if (opts_len < sizeof(*option)) + return -EINVAL; + + len = sizeof(*option) + option->length * 4; + if (len > opts_len) + return -EINVAL; + + crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); + + option = (struct geneve_opt *)((u8 *)option + len); + opts_len -= len; + }; + + key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + + return 0; +} + static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_actions **sfa, bool log) { @@ -1547,36 +1746,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_key key; struct ovs_tunnel_info *tun_info; struct nlattr *a; - int err, start; + int err, start, opts_type; ovs_match_init(&match, &key, NULL); - err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); - if (err) - return err; + opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); + if (opts_type < 0) + return opts_type; if (key.tun_opts_len) { - struct geneve_opt *option = GENEVE_OPTS(&key, - key.tun_opts_len); - int opts_len = key.tun_opts_len; - bool crit_opt = false; - - while (opts_len > 0) { - int len; - - if (opts_len < sizeof(*option)) - return -EINVAL; - - len = sizeof(*option) + option->length * 4; - if (len > opts_len) - return -EINVAL; - - crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); - - option = (struct geneve_opt *)((u8 *)option + len); - opts_len -= len; - }; - - key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + switch (opts_type) { + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + err = validate_geneve_opts(&key); + if (err < 0) + return err; + break; + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: + break; + } }; start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); @@ -1597,9 +1783,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, * everything else will go away after flow setup. We can append * it to tun_info and then point there. */ - memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len), - key.tun_opts_len); - tun_info->options = (struct geneve_opt *)(tun_info + 1); + memcpy((tun_info + 1), + TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len); + tun_info->options = (tun_info + 1); } else { tun_info->options = NULL; } @@ -1609,21 +1795,43 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, return err; } +/* Return false if there are any non-masked bits set. + * Mask follows data immediately, before any netlink padding. + */ +static bool validate_masked(u8 *data, int len) +{ + u8 *mask = data + len; + + while (len--) + if (*data++ & ~*mask++) + return false; + + return true; +} + static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, struct sw_flow_actions **sfa, - bool *set_tun, __be16 eth_type, bool log) + bool *skip_copy, __be16 eth_type, bool masked, bool log) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); + size_t key_len; /* There can be only one key in a action */ if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) return -EINVAL; + key_len = nla_len(ovs_key); + if (masked) + key_len /= 2; + if (key_type > OVS_KEY_ATTR_MAX || - (ovs_key_lens[key_type] != nla_len(ovs_key) && - ovs_key_lens[key_type] != -1)) + (ovs_key_lens[key_type].len != key_len && + ovs_key_lens[key_type].len != OVS_ATTR_NESTED)) + return -EINVAL; + + if (masked && !validate_masked(nla_data(ovs_key), key_len)) return -EINVAL; switch (key_type) { @@ -1640,7 +1848,10 @@ static int validate_set(const struct nlattr *a, if (eth_p_mpls(eth_type)) return -EINVAL; - *set_tun = true; + if (masked) + return -EINVAL; /* Masked tunnel set not supported. */ + + *skip_copy = true; err = validate_and_copy_set_tun(a, sfa, log); if (err) return err; @@ -1650,48 +1861,66 @@ static int validate_set(const struct nlattr *a, if (eth_type != htons(ETH_P_IP)) return -EINVAL; - if (!flow_key->ip.proto) - return -EINVAL; - ipv4_key = nla_data(ovs_key); - if (ipv4_key->ipv4_proto != flow_key->ip.proto) - return -EINVAL; - if (ipv4_key->ipv4_frag != flow_key->ip.frag) - return -EINVAL; + if (masked) { + const struct ovs_key_ipv4 *mask = ipv4_key + 1; + + /* Non-writeable fields. */ + if (mask->ipv4_proto || mask->ipv4_frag) + return -EINVAL; + } else { + if (ipv4_key->ipv4_proto != flow_key->ip.proto) + return -EINVAL; + if (ipv4_key->ipv4_frag != flow_key->ip.frag) + return -EINVAL; + } break; case OVS_KEY_ATTR_IPV6: if (eth_type != htons(ETH_P_IPV6)) return -EINVAL; - if (!flow_key->ip.proto) - return -EINVAL; - ipv6_key = nla_data(ovs_key); - if (ipv6_key->ipv6_proto != flow_key->ip.proto) - return -EINVAL; - if (ipv6_key->ipv6_frag != flow_key->ip.frag) - return -EINVAL; + if (masked) { + const struct ovs_key_ipv6 *mask = ipv6_key + 1; + /* Non-writeable fields. */ + if (mask->ipv6_proto || mask->ipv6_frag) + return -EINVAL; + + /* Invalid bits in the flow label mask? */ + if (ntohl(mask->ipv6_label) & 0xFFF00000) + return -EINVAL; + } else { + if (ipv6_key->ipv6_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv6_key->ipv6_frag != flow_key->ip.frag) + return -EINVAL; + } if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) return -EINVAL; break; case OVS_KEY_ATTR_TCP: - if (flow_key->ip.proto != IPPROTO_TCP) + if ((eth_type != htons(ETH_P_IP) && + eth_type != htons(ETH_P_IPV6)) || + flow_key->ip.proto != IPPROTO_TCP) return -EINVAL; - return validate_tp_port(flow_key, eth_type); + break; case OVS_KEY_ATTR_UDP: - if (flow_key->ip.proto != IPPROTO_UDP) + if ((eth_type != htons(ETH_P_IP) && + eth_type != htons(ETH_P_IPV6)) || + flow_key->ip.proto != IPPROTO_UDP) return -EINVAL; - return validate_tp_port(flow_key, eth_type); + break; case OVS_KEY_ATTR_MPLS: if (!eth_p_mpls(eth_type)) @@ -1699,15 +1928,45 @@ static int validate_set(const struct nlattr *a, break; case OVS_KEY_ATTR_SCTP: - if (flow_key->ip.proto != IPPROTO_SCTP) + if ((eth_type != htons(ETH_P_IP) && + eth_type != htons(ETH_P_IPV6)) || + flow_key->ip.proto != IPPROTO_SCTP) return -EINVAL; - return validate_tp_port(flow_key, eth_type); + break; default: return -EINVAL; } + /* Convert non-masked non-tunnel set actions to masked set actions. */ + if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) { + int start, len = key_len * 2; + struct nlattr *at; + + *skip_copy = true; + + start = add_nested_action_start(sfa, + OVS_ACTION_ATTR_SET_TO_MASKED, + log); + if (start < 0) + return start; + + at = __add_action(sfa, key_type, NULL, len, log); + if (IS_ERR(at)) + return PTR_ERR(at); + + memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */ + memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */ + /* Clear non-writeable bits from otherwise writeable fields. */ + if (key_type == OVS_KEY_ATTR_IPV6) { + struct ovs_key_ipv6 *mask = nla_data(at) + key_len; + + mask->ipv6_label &= htonl(0x000FFFFF); + } + add_nested_action_end(*sfa, start); + } + return 0; } @@ -1769,6 +2028,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), [OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_SET] = (u32)-1, + [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1, [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash) }; @@ -1864,7 +2124,14 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_SET: err = validate_set(a, key, sfa, - &skip_copy, eth_type, log); + &skip_copy, eth_type, false, log); + if (err) + return err; + break; + + case OVS_ACTION_ATTR_SET_MASKED: + err = validate_set(a, key, sfa, + &skip_copy, eth_type, true, log); if (err) return err; break; @@ -1894,6 +2161,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, return 0; } +/* 'key' must be the masked key. */ int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log) @@ -1981,6 +2249,21 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) return 0; } +static int masked_set_action_to_set_action_attr(const struct nlattr *a, + struct sk_buff *skb) +{ + const struct nlattr *ovs_key = nla_data(a); + size_t key_len = nla_len(ovs_key) / 2; + + /* Revert the conversion we did from a non-masked set action to + * masked set action. + */ + if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len, ovs_key)) + return -EMSGSIZE; + + return 0; +} + int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) { const struct nlattr *a; @@ -1996,6 +2279,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) return err; break; + case OVS_ACTION_ATTR_SET_TO_MASKED: + err = masked_set_action_to_set_action_attr(a, skb); + if (err) + return err; + break; + case OVS_ACTION_ATTR_SAMPLE: err = sample_action_to_attr(a, skb); if (err) diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 577f12be3459..5c3d75bff310 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -43,16 +43,25 @@ size_t ovs_key_attr_size(void); void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, struct sw_flow_mask *mask); -int ovs_nla_put_flow(const struct sw_flow_key *, - const struct sw_flow_key *, struct sk_buff *); +int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *, + int attr, bool is_mask, struct sk_buff *); int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *, bool log); +int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb); +int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb); +int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb); + int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key, const struct nlattr *mask, bool log); int ovs_nla_put_egress_tunnel_key(struct sk_buff *, const struct ovs_tunnel_info *); +bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log); +int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, + const struct sw_flow_key *key, bool log); +u32 ovs_nla_get_ufid_flags(const struct nlattr *attr); + int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 5899bf161c61..4613df8c8290 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -85,6 +85,8 @@ struct sw_flow *ovs_flow_alloc(void) flow->sf_acts = NULL; flow->mask = NULL; + flow->id.unmasked_key = NULL; + flow->id.ufid_len = 0; flow->stats_last_writer = NUMA_NO_NODE; /* Initialize the default stat node. */ @@ -139,6 +141,8 @@ static void flow_free(struct sw_flow *flow) { int node; + if (ovs_identifier_is_key(&flow->id)) + kfree(flow->id.unmasked_key); kfree((struct sw_flow_actions __force *)flow->sf_acts); for_each_node(node) if (flow->stats[node]) @@ -200,18 +204,28 @@ static struct table_instance *table_instance_alloc(int new_size) int ovs_flow_tbl_init(struct flow_table *table) { - struct table_instance *ti; + struct table_instance *ti, *ufid_ti; ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!ti) return -ENOMEM; + ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); + if (!ufid_ti) + goto free_ti; + rcu_assign_pointer(table->ti, ti); + rcu_assign_pointer(table->ufid_ti, ufid_ti); INIT_LIST_HEAD(&table->mask_list); table->last_rehash = jiffies; table->count = 0; + table->ufid_count = 0; return 0; + +free_ti: + __table_instance_destroy(ti); + return -ENOMEM; } static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) @@ -221,13 +235,16 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) __table_instance_destroy(ti); } -static void table_instance_destroy(struct table_instance *ti, bool deferred) +static void table_instance_destroy(struct table_instance *ti, + struct table_instance *ufid_ti, + bool deferred) { int i; if (!ti) return; + BUG_ON(!ufid_ti); if (ti->keep_flows) goto skip_flows; @@ -236,18 +253,24 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred) struct hlist_head *head = flex_array_get(ti->buckets, i); struct hlist_node *n; int ver = ti->node_ver; + int ufid_ver = ufid_ti->node_ver; - hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { - hlist_del_rcu(&flow->hash_node[ver]); + hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) { + hlist_del_rcu(&flow->flow_table.node[ver]); + if (ovs_identifier_is_ufid(&flow->id)) + hlist_del_rcu(&flow->ufid_table.node[ufid_ver]); ovs_flow_free(flow, deferred); } } skip_flows: - if (deferred) + if (deferred) { call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); - else + call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); + } else { __table_instance_destroy(ti); + __table_instance_destroy(ufid_ti); + } } /* No need for locking this function is called from RCU callback or @@ -256,8 +279,9 @@ skip_flows: void ovs_flow_tbl_destroy(struct flow_table *table) { struct table_instance *ti = rcu_dereference_raw(table->ti); + struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); - table_instance_destroy(ti, false); + table_instance_destroy(ti, ufid_ti, false); } struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, @@ -272,7 +296,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, while (*bucket < ti->n_buckets) { i = 0; head = flex_array_get(ti->buckets, *bucket); - hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { + hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) { if (i < *last) { i++; continue; @@ -294,16 +318,26 @@ static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash) (hash & (ti->n_buckets - 1))); } -static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow) +static void table_instance_insert(struct table_instance *ti, + struct sw_flow *flow) +{ + struct hlist_head *head; + + head = find_bucket(ti, flow->flow_table.hash); + hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head); +} + +static void ufid_table_instance_insert(struct table_instance *ti, + struct sw_flow *flow) { struct hlist_head *head; - head = find_bucket(ti, flow->hash); - hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head); + head = find_bucket(ti, flow->ufid_table.hash); + hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head); } static void flow_table_copy_flows(struct table_instance *old, - struct table_instance *new) + struct table_instance *new, bool ufid) { int old_ver; int i; @@ -318,15 +352,21 @@ static void flow_table_copy_flows(struct table_instance *old, head = flex_array_get(old->buckets, i); - hlist_for_each_entry(flow, head, hash_node[old_ver]) - table_instance_insert(new, flow); + if (ufid) + hlist_for_each_entry(flow, head, + ufid_table.node[old_ver]) + ufid_table_instance_insert(new, flow); + else + hlist_for_each_entry(flow, head, + flow_table.node[old_ver]) + table_instance_insert(new, flow); } old->keep_flows = true; } static struct table_instance *table_instance_rehash(struct table_instance *ti, - int n_buckets) + int n_buckets, bool ufid) { struct table_instance *new_ti; @@ -334,32 +374,45 @@ static struct table_instance *table_instance_rehash(struct table_instance *ti, if (!new_ti) return NULL; - flow_table_copy_flows(ti, new_ti); + flow_table_copy_flows(ti, new_ti, ufid); return new_ti; } int ovs_flow_tbl_flush(struct flow_table *flow_table) { - struct table_instance *old_ti; - struct table_instance *new_ti; + struct table_instance *old_ti, *new_ti; + struct table_instance *old_ufid_ti, *new_ufid_ti; - old_ti = ovsl_dereference(flow_table->ti); new_ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!new_ti) return -ENOMEM; + new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); + if (!new_ufid_ti) + goto err_free_ti; + + old_ti = ovsl_dereference(flow_table->ti); + old_ufid_ti = ovsl_dereference(flow_table->ufid_ti); rcu_assign_pointer(flow_table->ti, new_ti); + rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti); flow_table->last_rehash = jiffies; flow_table->count = 0; + flow_table->ufid_count = 0; - table_instance_destroy(old_ti, true); + table_instance_destroy(old_ti, old_ufid_ti, true); return 0; + +err_free_ti: + __table_instance_destroy(new_ti); + return -ENOMEM; } -static u32 flow_hash(const struct sw_flow_key *key, int key_start, - int key_end) +static u32 flow_hash(const struct sw_flow_key *key, + const struct sw_flow_key_range *range) { + int key_start = range->start; + int key_end = range->end; const u32 *hash_key = (const u32 *)((const u8 *)key + key_start); int hash_u32s = (key_end - key_start) >> 2; @@ -395,19 +448,20 @@ static bool cmp_key(const struct sw_flow_key *key1, static bool flow_cmp_masked_key(const struct sw_flow *flow, const struct sw_flow_key *key, - int key_start, int key_end) + const struct sw_flow_key_range *range) { - return cmp_key(&flow->key, key, key_start, key_end); + return cmp_key(&flow->key, key, range->start, range->end); } -bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_match *match) +static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, + const struct sw_flow_match *match) { struct sw_flow_key *key = match->key; int key_start = flow_key_start(key); int key_end = match->range.end; - return cmp_key(&flow->unmasked_key, key, key_start, key_end); + BUG_ON(ovs_identifier_is_ufid(&flow->id)); + return cmp_key(flow->id.unmasked_key, key, key_start, key_end); } static struct sw_flow *masked_flow_lookup(struct table_instance *ti, @@ -416,18 +470,15 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, { struct sw_flow *flow; struct hlist_head *head; - int key_start = mask->range.start; - int key_end = mask->range.end; u32 hash; struct sw_flow_key masked_key; ovs_flow_mask_key(&masked_key, unmasked, mask); - hash = flow_hash(&masked_key, key_start, key_end); + hash = flow_hash(&masked_key, &mask->range); head = find_bucket(ti, hash); - hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) { - if (flow->mask == mask && flow->hash == hash && - flow_cmp_masked_key(flow, &masked_key, - key_start, key_end)) + hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { + if (flow->mask == mask && flow->flow_table.hash == hash && + flow_cmp_masked_key(flow, &masked_key, &mask->range)) return flow; } return NULL; @@ -469,7 +520,48 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, /* Always called under ovs-mutex. */ list_for_each_entry(mask, &tbl->mask_list, list) { flow = masked_flow_lookup(ti, match->key, mask); - if (flow && ovs_flow_cmp_unmasked_key(flow, match)) /* Found */ + if (flow && ovs_identifier_is_key(&flow->id) && + ovs_flow_cmp_unmasked_key(flow, match)) + return flow; + } + return NULL; +} + +static u32 ufid_hash(const struct sw_flow_id *sfid) +{ + return jhash(sfid->ufid, sfid->ufid_len, 0); +} + +static bool ovs_flow_cmp_ufid(const struct sw_flow *flow, + const struct sw_flow_id *sfid) +{ + if (flow->id.ufid_len != sfid->ufid_len) + return false; + + return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len); +} + +bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match) +{ + if (ovs_identifier_is_ufid(&flow->id)) + return flow_cmp_masked_key(flow, match->key, &match->range); + + return ovs_flow_cmp_unmasked_key(flow, match); +} + +struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, + const struct sw_flow_id *ufid) +{ + struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti); + struct sw_flow *flow; + struct hlist_head *head; + u32 hash; + + hash = ufid_hash(ufid); + head = find_bucket(ti, hash); + hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) { + if (flow->ufid_table.hash == hash && + ovs_flow_cmp_ufid(flow, ufid)) return flow; } return NULL; @@ -486,9 +578,10 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table) return num; } -static struct table_instance *table_instance_expand(struct table_instance *ti) +static struct table_instance *table_instance_expand(struct table_instance *ti, + bool ufid) { - return table_instance_rehash(ti, ti->n_buckets * 2); + return table_instance_rehash(ti, ti->n_buckets * 2, ufid); } /* Remove 'mask' from the mask list, if it is not needed any more. */ @@ -513,10 +606,15 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { struct table_instance *ti = ovsl_dereference(table->ti); + struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti); BUG_ON(table->count == 0); - hlist_del_rcu(&flow->hash_node[ti->node_ver]); + hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); table->count--; + if (ovs_identifier_is_ufid(&flow->id)) { + hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); + table->ufid_count--; + } /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be * accessible as long as the RCU read lock is held. @@ -585,34 +683,64 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, } /* Must be called with OVS mutex held. */ -int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, - const struct sw_flow_mask *mask) +static void flow_key_insert(struct flow_table *table, struct sw_flow *flow) { struct table_instance *new_ti = NULL; struct table_instance *ti; - int err; - - err = flow_mask_insert(table, flow, mask); - if (err) - return err; - flow->hash = flow_hash(&flow->key, flow->mask->range.start, - flow->mask->range.end); + flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range); ti = ovsl_dereference(table->ti); table_instance_insert(ti, flow); table->count++; /* Expand table, if necessary, to make room. */ if (table->count > ti->n_buckets) - new_ti = table_instance_expand(ti); + new_ti = table_instance_expand(ti, false); else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) - new_ti = table_instance_rehash(ti, ti->n_buckets); + new_ti = table_instance_rehash(ti, ti->n_buckets, false); if (new_ti) { rcu_assign_pointer(table->ti, new_ti); - table_instance_destroy(ti, true); + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); table->last_rehash = jiffies; } +} + +/* Must be called with OVS mutex held. */ +static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow) +{ + struct table_instance *ti; + + flow->ufid_table.hash = ufid_hash(&flow->id); + ti = ovsl_dereference(table->ufid_ti); + ufid_table_instance_insert(ti, flow); + table->ufid_count++; + + /* Expand table, if necessary, to make room. */ + if (table->ufid_count > ti->n_buckets) { + struct table_instance *new_ti; + + new_ti = table_instance_expand(ti, true); + if (new_ti) { + rcu_assign_pointer(table->ufid_ti, new_ti); + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); + } + } +} + +/* Must be called with OVS mutex held. */ +int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + const struct sw_flow_mask *mask) +{ + int err; + + err = flow_mask_insert(table, flow, mask); + if (err) + return err; + flow_key_insert(table, flow); + if (ovs_identifier_is_ufid(&flow->id)) + flow_ufid_insert(table, flow); + return 0; } diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 309fa6415689..616eda10d955 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -47,9 +47,11 @@ struct table_instance { struct flow_table { struct table_instance __rcu *ti; + struct table_instance __rcu *ufid_ti; struct list_head mask_list; unsigned long last_rehash; unsigned int count; + unsigned int ufid_count; }; extern struct kmem_cache *flow_stats_cache; @@ -78,8 +80,10 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, const struct sw_flow_key *); struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, const struct sw_flow_match *match); -bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_match *match); +struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *, + const struct sw_flow_id *); + +bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *); void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, const struct sw_flow_mask *mask); diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 484864dd0e68..bf02fd5808c9 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -9,8 +9,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/version.h> - #include <linux/in.h> #include <linux/ip.h> #include <linux/net.h> @@ -90,7 +88,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb) opts_len = geneveh->opt_len * 4; - flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT | + flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) | (geneveh->oam ? TUNNEL_OAM : 0) | (geneveh->critical ? TUNNEL_CRIT_OPT : 0); @@ -172,7 +170,7 @@ error: static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) { - struct ovs_key_ipv4_tunnel *tun_key; + const struct ovs_key_ipv4_tunnel *tun_key; struct ovs_tunnel_info *tun_info; struct net *net = ovs_dp_get_net(vport->dp); struct geneve_port *geneve_port = geneve_vport(vport); @@ -180,7 +178,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) __be16 sport; struct rtable *rt; struct flowi4 fl; - u8 vni[3]; + u8 vni[3], opts_len, *opts; __be16 df; int err; @@ -191,16 +189,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) } tun_key = &tun_info->tunnel; - - /* Route lookup */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb->mark; - fl.flowi4_proto = IPPROTO_UDP; - - rt = ip_route_output_key(net, &fl); + rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto error; @@ -211,12 +200,19 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) tunnel_id_to_vni(tun_key->tun_id, vni); skb->ignore_df = 1; + if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) { + opts = (u8 *)tun_info->options; + opts_len = tun_info->options_len; + } else { + opts = NULL; + opts_len = 0; + } + err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, sport, dport, - tun_key->tun_flags, vni, - tun_info->options_len, (u8 *)tun_info->options, - false); + tun_key->tun_flags, vni, opts_len, opts, + !!(tun_key->tun_flags & TUNNEL_CSUM), false); if (err < 0) ip_rt_put(rt); return err; diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index d4168c442db5..f17ac9642f4e 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -134,7 +134,7 @@ static int gre_err(struct sk_buff *skb, u32 info, static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); - struct ovs_key_ipv4_tunnel *tun_key; + const struct ovs_key_ipv4_tunnel *tun_key; struct flowi4 fl; struct rtable *rt; int min_headroom; @@ -148,15 +148,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) } tun_key = &OVS_CB(skb)->egress_tun_info->tunnel; - /* Route lookup */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb->mark; - fl.flowi4_proto = IPPROTO_GRE; - - rt = ip_route_output_key(net, &fl); + rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_GRE); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto err_free_skb; @@ -166,7 +158,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + tunnel_hlen + sizeof(struct iphdr) - + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { int head_delta = SKB_DATA_ALIGN(min_headroom - skb_headroom(skb) + diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index d7c46b301024..3277a7520e31 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -40,6 +40,7 @@ #include "datapath.h" #include "vport.h" +#include "vport-vxlan.h" /** * struct vxlan_port - Keeps track of open UDP ports @@ -49,6 +50,7 @@ struct vxlan_port { struct vxlan_sock *vs; char name[IFNAMSIZ]; + u32 exts; /* VXLAN_F_* in <net/vxlan.h> */ }; static struct vport_ops ovs_vxlan_vport_ops; @@ -59,19 +61,30 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport) } /* Called with rcu_read_lock and BH disabled. */ -static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) +static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, + struct vxlan_metadata *md) { struct ovs_tunnel_info tun_info; + struct vxlan_port *vxlan_port; struct vport *vport = vs->data; struct iphdr *iph; + struct ovs_vxlan_opts opts = { + .gbp = md->gbp, + }; __be64 key; + __be16 flags; + + flags = TUNNEL_KEY | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0); + vxlan_port = vxlan_vport(vport); + if (vxlan_port->exts & VXLAN_F_GBP && md->gbp) + flags |= TUNNEL_VXLAN_OPT; /* Save outer tunnel values */ iph = ip_hdr(skb); - key = cpu_to_be64(ntohl(vx_vni) >> 8); + key = cpu_to_be64(ntohl(md->vni) >> 8); ovs_flow_tun_info_init(&tun_info, iph, udp_hdr(skb)->source, udp_hdr(skb)->dest, - key, TUNNEL_KEY, NULL, 0); + key, flags, &opts, sizeof(opts)); ovs_vport_receive(vport, skb, &tun_info); } @@ -83,6 +96,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port))) return -EMSGSIZE; + + if (vxlan_port->exts) { + struct nlattr *exts; + + exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION); + if (!exts) + return -EMSGSIZE; + + if (vxlan_port->exts & VXLAN_F_GBP && + nla_put_flag(skb, OVS_VXLAN_EXT_GBP)) + return -EMSGSIZE; + + nla_nest_end(skb, exts); + } + return 0; } @@ -95,6 +123,31 @@ static void vxlan_tnl_destroy(struct vport *vport) ovs_vport_deferred_free(vport); } +static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = { + [OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, }, +}; + +static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr) +{ + struct nlattr *exts[OVS_VXLAN_EXT_MAX+1]; + struct vxlan_port *vxlan_port; + int err; + + if (nla_len(attr) < sizeof(struct nlattr)) + return -EINVAL; + + err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy); + if (err < 0) + return err; + + vxlan_port = vxlan_vport(vport); + + if (exts[OVS_VXLAN_EXT_GBP]) + vxlan_port->exts |= VXLAN_F_GBP; + + return 0; +} + static struct vport *vxlan_tnl_create(const struct vport_parms *parms) { struct net *net = ovs_dp_get_net(parms->dp); @@ -127,7 +180,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) vxlan_port = vxlan_vport(vport); strncpy(vxlan_port->name, parms->name, IFNAMSIZ); - vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0); + a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION); + if (a) { + err = vxlan_configure_exts(vport, a); + if (err) { + ovs_vport_free(vport); + goto error; + } + } + + vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, + vxlan_port->exts); if (IS_ERR(vs)) { ovs_vport_free(vport); return (void *)vs; @@ -140,17 +203,34 @@ error: return ERR_PTR(err); } +static int vxlan_ext_gbp(struct sk_buff *skb) +{ + const struct ovs_tunnel_info *tun_info; + const struct ovs_vxlan_opts *opts; + + tun_info = OVS_CB(skb)->egress_tun_info; + opts = tun_info->options; + + if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT && + tun_info->options_len >= sizeof(*opts)) + return opts->gbp; + else + return 0; +} + static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); struct vxlan_port *vxlan_port = vxlan_vport(vport); __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; - struct ovs_key_ipv4_tunnel *tun_key; + const struct ovs_key_ipv4_tunnel *tun_key; + struct vxlan_metadata md = {0}; struct rtable *rt; struct flowi4 fl; __be16 src_port; __be16 df; int err; + u32 vxflags; if (unlikely(!OVS_CB(skb)->egress_tun_info)) { err = -EINVAL; @@ -158,15 +238,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) } tun_key = &OVS_CB(skb)->egress_tun_info->tunnel; - /* Route lookup */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb->mark; - fl.flowi4_proto = IPPROTO_UDP; - - rt = ip_route_output_key(net, &fl); + rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto error; @@ -178,13 +250,15 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) skb->ignore_df = 1; src_port = udp_flow_src_port(net, skb, 0, 0, true); + md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8); + md.gbp = vxlan_ext_gbp(skb); + vxflags = vxlan_port->exts | + (tun_key->tun_flags & TUNNEL_CSUM ? VXLAN_F_UDP_CSUM : 0); - err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, - fl.saddr, tun_key->ipv4_dst, + err = vxlan_xmit_skb(rt, skb, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, src_port, dst_port, - htonl(be64_to_cpu(tun_key->tun_id) << 8), - false); + &md, false, vxflags); if (err < 0) ip_rt_put(rt); return err; diff --git a/net/openvswitch/vport-vxlan.h b/net/openvswitch/vport-vxlan.h new file mode 100644 index 000000000000..4b08233e73d5 --- /dev/null +++ b/net/openvswitch/vport-vxlan.h @@ -0,0 +1,11 @@ +#ifndef VPORT_VXLAN_H +#define VPORT_VXLAN_H 1 + +#include <linux/kernel.h> +#include <linux/types.h> + +struct ovs_vxlan_opts { + __u32 gbp; +}; + +#endif diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 2034c6d9cb5a..ec2954ffc690 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -480,7 +480,8 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->syncp); stats->rx_packets++; - stats->rx_bytes += skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + stats->rx_bytes += skb->len + + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); u64_stats_update_end(&stats->syncp); OVS_CB(skb)->input_vport = vport; @@ -594,14 +595,7 @@ int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info, * The process may need to be changed if the corresponding process * in vports ops changed. */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb_mark; - fl.flowi4_proto = ipproto; - - rt = ip_route_output_key(net, &fl); + rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 99c8e71d9e6c..f8ae295fb001 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -236,4 +236,22 @@ static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, int ovs_vport_ops_register(struct vport_ops *ops); void ovs_vport_ops_unregister(struct vport_ops *ops); +static inline struct rtable *ovs_tunnel_route_lookup(struct net *net, + const struct ovs_key_ipv4_tunnel *key, + u32 mark, + struct flowi4 *fl, + u8 protocol) +{ + struct rtable *rt; + + memset(fl, 0, sizeof(*fl)); + fl->daddr = key->ipv4_dst; + fl->saddr = key->ipv4_src; + fl->flowi4_tos = RT_TOS(key->ipv4_tos); + fl->flowi4_mark = mark; + fl->flowi4_proto = protocol; + + rt = ip_route_output_key(net, fl); + return rt; +} #endif /* vport.h */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9cfe2e1dd8b5..9c28cec1a083 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -986,8 +986,8 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { - if (vlan_tx_tag_present(pkc->skb)) { - ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb); + if (skb_vlan_tag_present(pkc->skb)) { + ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb); ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto); ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { @@ -2000,8 +2000,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_net = netoff; h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; - if (vlan_tx_tag_present(skb)) { - h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); + if (skb_vlan_tag_present(skb)) { + h.h2->tp_vlan_tci = skb_vlan_tag_get(skb); h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto); status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { @@ -2102,7 +2102,7 @@ static bool ll_header_truncated(const struct net_device *dev, int len) { /* net device doesn't like empty head */ if (unlikely(len <= dev->hard_header_len)) { - net_warn_ratelimited("%s: packet size is too short (%d < %d)\n", + net_warn_ratelimited("%s: packet size is too short (%d <= %d)\n", current->comm, len, dev->hard_header_len); return true; } @@ -3010,8 +3010,8 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); - if (vlan_tx_tag_present(skb)) { - aux.tp_vlan_tci = vlan_tx_tag_get(skb); + if (skb_vlan_tag_present(skb)) { + aux.tp_vlan_tci = skb_vlan_tag_get(skb); aux.tp_vlan_tpid = ntohs(skb->vlan_proto); aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { diff --git a/net/packet/diag.c b/net/packet/diag.c index 92f2c7107eec..0ed68f0238bf 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -177,7 +177,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, PACKET_DIAG_FILTER)) goto out_nlmsg_trim; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_nlmsg_trim: nlmsg_cancel(skb, nlh); diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index b64151ade6b3..bc5ee5fbe6ae 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -121,7 +121,8 @@ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, ifm->ifa_index = dev->ifindex; if (nla_put_u8(skb, IFA_LOCAL, addr)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -190,7 +191,8 @@ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst, if (nla_put_u8(skb, RTA_DST, dst) || nla_put_u32(skb, RTA_OIF, dev->ifindex)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -270,27 +272,23 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh) static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - u8 addr, addr_idx = 0, addr_start_idx = cb->args[0]; + u8 addr; rcu_read_lock(); - for (addr = 0; addr < 64; addr++) { - struct net_device *dev; + for (addr = cb->args[0]; addr < 64; addr++) { + struct net_device *dev = phonet_route_get_rcu(net, addr << 2); - dev = phonet_route_get_rcu(net, addr << 2); if (!dev) continue; - if (addr_idx++ < addr_start_idx) - continue; if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWROUTE)) + cb->nlh->nlmsg_seq, RTM_NEWROUTE) < 0) goto out; } out: rcu_read_unlock(); - cb->args[0] = addr_idx; - cb->args[1] = 0; + cb->args[0] = addr; return skb->len; } diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 1dde91e3dc70..bd3825d38abc 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -409,7 +409,7 @@ try_again: posted = IB_GET_POST_CREDITS(oldval); avail = IB_GET_SEND_CREDITS(oldval); - rdsdebug("rds_ib_send_grab_credits(%u): credits=%u posted=%u\n", + rdsdebug("wanted=%u credits=%u posted=%u\n", wanted, avail, posted); /* The last credit must be used to send a credit update. */ @@ -453,7 +453,7 @@ void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits) if (credits == 0) return; - rdsdebug("rds_ib_send_add_credits(%u): current=%u%s\n", + rdsdebug("credits=%u current=%u%s\n", credits, IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)), test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : ""); diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index a91e1db62ee6..a6c2bea9f8f9 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -590,8 +590,8 @@ void rds_iw_conn_shutdown(struct rds_connection *conn) /* Actually this may happen quite frequently, when * an outgoing connect raced with an incoming connect. */ - rdsdebug("rds_iw_conn_shutdown: failed to disconnect," - " cm: %p err %d\n", ic->i_cm_id, err); + rdsdebug("failed to disconnect, cm: %p err %d\n", + ic->i_cm_id, err); } if (ic->i_cm_id->qp) { diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 9105ea03aec5..13834780a308 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -361,7 +361,7 @@ try_again: posted = IB_GET_POST_CREDITS(oldval); avail = IB_GET_SEND_CREDITS(oldval); - rdsdebug("rds_iw_send_grab_credits(%u): credits=%u posted=%u\n", + rdsdebug("wanted=%u credits=%u posted=%u\n", wanted, avail, posted); /* The last credit must be used to send a credit update. */ @@ -405,7 +405,7 @@ void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits) if (credits == 0) return; - rdsdebug("rds_iw_send_add_credits(%u): current=%u%s\n", + rdsdebug("credits=%u current=%u%s\n", credits, IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)), test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : ""); diff --git a/net/rds/message.c b/net/rds/message.c index 5a21e6f5986f..756c73729126 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -266,7 +266,7 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from) { - unsigned long to_copy; + unsigned long to_copy, nbytes; unsigned long sg_off; struct scatterlist *sg; int ret = 0; @@ -293,9 +293,9 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from) sg->length - sg_off); rds_stats_add(s_copy_from_user, to_copy); - ret = copy_page_from_iter(sg_page(sg), sg->offset + sg_off, - to_copy, from); - if (ret != to_copy) + nbytes = copy_page_from_iter(sg_page(sg), sg->offset + sg_off, + to_copy, from); + if (nbytes != to_copy) return -EFAULT; sg_off += to_copy; diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 3f4a0bbeed3d..d978f2f46ff3 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -170,6 +170,7 @@ static const struct acpi_device_id rfkill_acpi_match[] = { { "BCM2E1A", RFKILL_TYPE_BLUETOOTH }, { "BCM2E39", RFKILL_TYPE_BLUETOOTH }, { "BCM2E3D", RFKILL_TYPE_BLUETOOTH }, + { "BCM2E40", RFKILL_TYPE_BLUETOOTH }, { "BCM2E64", RFKILL_TYPE_BLUETOOTH }, { "BCM4752", RFKILL_TYPE_GPS }, { "LNV4752", RFKILL_TYPE_GPS }, diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c index 74c0fcd36838..5394b6be46ec 100644 --- a/net/rxrpc/ar-error.c +++ b/net/rxrpc/ar-error.c @@ -42,6 +42,11 @@ void rxrpc_UDP_error_report(struct sock *sk) _leave("UDP socket errqueue empty"); return; } + if (!skb->len) { + _leave("UDP empty message"); + kfree_skb(skb); + return; + } rxrpc_new_skb(skb); diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index e1a9373e5979..8331c95e1522 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -232,10 +232,7 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, call->state != RXRPC_CALL_SERVER_SEND_REPLY) { ret = -EPROTO; /* request phase complete for this client call */ } else { - mm_segment_t oldfs = get_fs(); - set_fs(KERNEL_DS); ret = rxrpc_send_data(NULL, call->socket, call, msg, len); - set_fs(oldfs); } release_sock(&call->socket->sk); @@ -529,13 +526,11 @@ static int rxrpc_send_data(struct kiocb *iocb, struct msghdr *msg, size_t len) { struct rxrpc_skb_priv *sp; - unsigned char __user *from; struct sk_buff *skb; - const struct iovec *iov; struct sock *sk = &rx->sk; long timeo; bool more; - int ret, ioc, segment, copied; + int ret, copied; timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); @@ -545,25 +540,17 @@ static int rxrpc_send_data(struct kiocb *iocb, if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) return -EPIPE; - iov = msg->msg_iter.iov; - ioc = msg->msg_iter.nr_segs - 1; - from = iov->iov_base; - segment = iov->iov_len; - iov++; more = msg->msg_flags & MSG_MORE; skb = call->tx_pending; call->tx_pending = NULL; copied = 0; - do { + if (len > iov_iter_count(&msg->msg_iter)) + len = iov_iter_count(&msg->msg_iter); + while (len) { int copy; - if (segment > len) - segment = len; - - _debug("SEGMENT %d @%p", segment, from); - if (!skb) { size_t size, chunk, max, space; @@ -631,13 +618,13 @@ static int rxrpc_send_data(struct kiocb *iocb, /* append next segment of data to the current buffer */ copy = skb_tailroom(skb); ASSERTCMP(copy, >, 0); - if (copy > segment) - copy = segment; + if (copy > len) + copy = len; if (copy > sp->remain) copy = sp->remain; _debug("add"); - ret = skb_add_data(skb, from, copy); + ret = skb_add_data(skb, &msg->msg_iter, copy); _debug("added"); if (ret < 0) goto efault; @@ -646,18 +633,6 @@ static int rxrpc_send_data(struct kiocb *iocb, copied += copy; len -= copy; - segment -= copy; - from += copy; - while (segment == 0 && ioc > 0) { - from = iov->iov_base; - segment = iov->iov_len; - iov++; - ioc--; - } - if (len == 0) { - segment = 0; - ioc = 0; - } /* check for the far side aborting the call or a network error * occurring */ @@ -665,7 +640,7 @@ static int rxrpc_send_data(struct kiocb *iocb, goto call_aborted; /* add the packet to the send queue if it's now full */ - if (sp->remain <= 0 || (segment == 0 && !more)) { + if (sp->remain <= 0 || (!len && !more)) { struct rxrpc_connection *conn = call->conn; uint32_t seq; size_t pad; @@ -711,11 +686,10 @@ static int rxrpc_send_data(struct kiocb *iocb, memcpy(skb->head, &sp->hdr, sizeof(struct rxrpc_header)); - rxrpc_queue_packet(call, skb, segment == 0 && !more); + rxrpc_queue_packet(call, skb, !iov_iter_count(&msg->msg_iter) && !more); skb = NULL; } - - } while (segment > 0); + } success: ret = copied; diff --git a/net/sched/Kconfig b/net/sched/Kconfig index c54c9d9d1ffb..899d0319f2b2 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -698,6 +698,30 @@ config NET_ACT_VLAN To compile this code as a module, choose M here: the module will be called act_vlan. +config NET_ACT_BPF + tristate "BPF based action" + depends on NET_CLS_ACT + ---help--- + Say Y here to execute BPF code on packets. The BPF code will decide + if the packet should be dropped or not. + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_bpf. + +config NET_ACT_CONNMARK + tristate "Netfilter Connection Mark Retriever" + depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES + depends on NF_CONNTRACK && NF_CONNTRACK_MARK + ---help--- + Say Y here to allow retrieving of conn mark + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_connmark. + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index 679f24ae7f93..7ca7f4c1b8c2 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -17,6 +17,8 @@ obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o +obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o +obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c new file mode 100644 index 000000000000..82c5d7fc1988 --- /dev/null +++ b/net/sched/act_bpf.c @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <linux/filter.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> + +#include <linux/tc_act/tc_bpf.h> +#include <net/tc_act/tc_bpf.h> + +#define BPF_TAB_MASK 15 + +static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_bpf *b = a->priv; + int action; + int filter_res; + + spin_lock(&b->tcf_lock); + b->tcf_tm.lastuse = jiffies; + bstats_update(&b->tcf_bstats, skb); + action = b->tcf_action; + + filter_res = BPF_PROG_RUN(b->filter, skb); + if (filter_res == 0) { + /* Return code 0 from the BPF program + * is being interpreted as a drop here. + */ + action = TC_ACT_SHOT; + b->tcf_qstats.drops++; + } + + spin_unlock(&b->tcf_lock); + return action; +} + +static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *tp = skb_tail_pointer(skb); + struct tcf_bpf *b = a->priv; + struct tc_act_bpf opt = { + .index = b->tcf_index, + .refcnt = b->tcf_refcnt - ref, + .bindcnt = b->tcf_bindcnt - bind, + .action = b->tcf_action, + }; + struct tcf_t t; + struct nlattr *nla; + + if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops)) + goto nla_put_failure; + + nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops * + sizeof(struct sock_filter)); + if (!nla) + goto nla_put_failure; + + memcpy(nla_data(nla), b->bpf_ops, nla_len(nla)); + + t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(b->tcf_tm.expires); + if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t)) + goto nla_put_failure; + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, tp); + return -1; +} + +static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = { + [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) }, + [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 }, + [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY, + .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, +}; + +static int tcf_bpf_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) +{ + struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tc_act_bpf *parm; + struct tcf_bpf *b; + u16 bpf_size, bpf_num_ops; + struct sock_filter *bpf_ops; + struct sock_fprog_kern tmp; + struct bpf_prog *fp; + int ret; + + if (!nla) + return -EINVAL; + + ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); + if (ret < 0) + return ret; + + if (!tb[TCA_ACT_BPF_PARMS] || + !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS]) + return -EINVAL; + parm = nla_data(tb[TCA_ACT_BPF_PARMS]); + + bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]); + if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) + return -EINVAL; + + bpf_size = bpf_num_ops * sizeof(*bpf_ops); + if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS])) + return -EINVAL; + + bpf_ops = kzalloc(bpf_size, GFP_KERNEL); + if (!bpf_ops) + return -ENOMEM; + + memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size); + + tmp.len = bpf_num_ops; + tmp.filter = bpf_ops; + + ret = bpf_prog_create(&fp, &tmp); + if (ret) + goto free_bpf_ops; + + if (!tcf_hash_check(parm->index, a, bind)) { + ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind); + if (ret) + goto destroy_fp; + + ret = ACT_P_CREATED; + } else { + if (bind) + goto destroy_fp; + tcf_hash_release(a, bind); + if (!ovr) { + ret = -EEXIST; + goto destroy_fp; + } + } + + b = to_bpf(a); + spin_lock_bh(&b->tcf_lock); + b->tcf_action = parm->action; + b->bpf_num_ops = bpf_num_ops; + b->bpf_ops = bpf_ops; + b->filter = fp; + spin_unlock_bh(&b->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(a); + return ret; + +destroy_fp: + bpf_prog_destroy(fp); +free_bpf_ops: + kfree(bpf_ops); + return ret; +} + +static void tcf_bpf_cleanup(struct tc_action *a, int bind) +{ + struct tcf_bpf *b = a->priv; + + bpf_prog_destroy(b->filter); +} + +static struct tc_action_ops act_bpf_ops = { + .kind = "bpf", + .type = TCA_ACT_BPF, + .owner = THIS_MODULE, + .act = tcf_bpf, + .dump = tcf_bpf_dump, + .cleanup = tcf_bpf_cleanup, + .init = tcf_bpf_init, +}; + +static int __init bpf_init_module(void) +{ + return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK); +} + +static void __exit bpf_cleanup_module(void) +{ + tcf_unregister_action(&act_bpf_ops); +} + +module_init(bpf_init_module); +module_exit(bpf_cleanup_module); + +MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); +MODULE_DESCRIPTION("TC BPF based action"); +MODULE_LICENSE("GPL v2"); diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c new file mode 100644 index 000000000000..8e472518f9f6 --- /dev/null +++ b/net/sched/act_connmark.c @@ -0,0 +1,192 @@ +/* + * net/sched/act_connmark.c netfilter connmark retriever action + * skb mark is over-written + * + * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <linux/pkt_cls.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> +#include <net/act_api.h> +#include <uapi/linux/tc_act/tc_connmark.h> +#include <net/tc_act/tc_connmark.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_zones.h> + +#define CONNMARK_TAB_MASK 3 + +static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + const struct nf_conntrack_tuple_hash *thash; + struct nf_conntrack_tuple tuple; + enum ip_conntrack_info ctinfo; + struct tcf_connmark_info *ca = a->priv; + struct nf_conn *c; + int proto; + + spin_lock(&ca->tcf_lock); + ca->tcf_tm.lastuse = jiffies; + bstats_update(&ca->tcf_bstats, skb); + + if (skb->protocol == htons(ETH_P_IP)) { + if (skb->len < sizeof(struct iphdr)) + goto out; + + proto = NFPROTO_IPV4; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + if (skb->len < sizeof(struct ipv6hdr)) + goto out; + + proto = NFPROTO_IPV6; + } else { + goto out; + } + + c = nf_ct_get(skb, &ctinfo); + if (c) { + skb->mark = c->mark; + /* using overlimits stats to count how many packets marked */ + ca->tcf_qstats.overlimits++; + nf_ct_put(c); + goto out; + } + + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), + proto, &tuple)) + goto out; + + thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple); + if (!thash) + goto out; + + c = nf_ct_tuplehash_to_ctrack(thash); + /* using overlimits stats to count how many packets marked */ + ca->tcf_qstats.overlimits++; + skb->mark = c->mark; + nf_ct_put(c); + +out: + skb->nfct = NULL; + spin_unlock(&ca->tcf_lock); + return ca->tcf_action; +} + +static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { + [TCA_CONNMARK_PARMS] = { .len = sizeof(struct tc_connmark) }, +}; + +static int tcf_connmark_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) +{ + struct nlattr *tb[TCA_CONNMARK_MAX + 1]; + struct tcf_connmark_info *ci; + struct tc_connmark *parm; + int ret = 0; + + if (!nla) + return -EINVAL; + + ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy); + if (ret < 0) + return ret; + + parm = nla_data(tb[TCA_CONNMARK_PARMS]); + + if (!tcf_hash_check(parm->index, a, bind)) { + ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind); + if (ret) + return ret; + + ci = to_connmark(a); + ci->tcf_action = parm->action; + ci->zone = parm->zone; + + tcf_hash_insert(a); + ret = ACT_P_CREATED; + } else { + ci = to_connmark(a); + if (bind) + return 0; + tcf_hash_release(a, bind); + if (!ovr) + return -EEXIST; + /* replacing action and zone */ + ci->tcf_action = parm->action; + ci->zone = parm->zone; + } + + return ret; +} + +static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_connmark_info *ci = a->priv; + + struct tc_connmark opt = { + .index = ci->tcf_index, + .refcnt = ci->tcf_refcnt - ref, + .bindcnt = ci->tcf_bindcnt - bind, + .action = ci->tcf_action, + .zone = ci->zone, + }; + struct tcf_t t; + + if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(ci->tcf_tm.expires); + if (nla_put(skb, TCA_CONNMARK_TM, sizeof(t), &t)) + goto nla_put_failure; + + return skb->len; +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tc_action_ops act_connmark_ops = { + .kind = "connmark", + .type = TCA_ACT_CONNMARK, + .owner = THIS_MODULE, + .act = tcf_connmark, + .dump = tcf_connmark_dump, + .init = tcf_connmark_init, +}; + +static int __init connmark_init_module(void) +{ + return tcf_register_action(&act_connmark_ops, CONNMARK_TAB_MASK); +} + +static void __exit connmark_cleanup_module(void) +{ + tcf_unregister_action(&act_connmark_ops); +} + +module_init(connmark_init_module); +module_exit(connmark_cleanup_module); +MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>"); +MODULE_DESCRIPTION("Connection tracking mark restoring"); +MODULE_LICENSE("GPL"); + diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index edbf40dac709..4cd5cf1aedf8 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -509,7 +509,7 @@ static int tcf_csum(struct sk_buff *skb, if (unlikely(action == TC_ACT_SHOT)) goto drop; - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case cpu_to_be16(ETH_P_IP): if (!tcf_csum_ipv4(skb, update_flags)) goto drop; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 5aed341406c2..fc399db86f11 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -65,9 +65,12 @@ static unsigned long basic_get(struct tcf_proto *tp, u32 handle) if (head == NULL) return 0UL; - list_for_each_entry(f, &head->flist, link) - if (f->handle == handle) + list_for_each_entry(f, &head->flist, link) { + if (f->handle == handle) { l = (unsigned long) f; + break; + } + } return l; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index f59adf8a4cd7..5f3ee9e4b5bf 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -37,7 +37,7 @@ struct cls_bpf_prog { struct tcf_result res; struct list_head link; u32 handle; - u16 bpf_len; + u16 bpf_num_ops; struct tcf_proto *tp; struct rcu_head rcu; }; @@ -160,7 +160,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, struct tcf_exts exts; struct sock_fprog_kern tmp; struct bpf_prog *fp; - u16 bpf_size, bpf_len; + u16 bpf_size, bpf_num_ops; u32 classid; int ret; @@ -173,13 +173,13 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, return ret; classid = nla_get_u32(tb[TCA_BPF_CLASSID]); - bpf_len = nla_get_u16(tb[TCA_BPF_OPS_LEN]); - if (bpf_len > BPF_MAXINSNS || bpf_len == 0) { + bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]); + if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) { ret = -EINVAL; goto errout; } - bpf_size = bpf_len * sizeof(*bpf_ops); + bpf_size = bpf_num_ops * sizeof(*bpf_ops); if (bpf_size != nla_len(tb[TCA_BPF_OPS])) { ret = -EINVAL; goto errout; @@ -193,14 +193,14 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); - tmp.len = bpf_len; + tmp.len = bpf_num_ops; tmp.filter = bpf_ops; ret = bpf_prog_create(&fp, &tmp); if (ret) goto errout_free; - prog->bpf_len = bpf_len; + prog->bpf_num_ops = bpf_num_ops; prog->bpf_ops = bpf_ops; prog->filter = fp; prog->res.classid = classid; @@ -314,10 +314,10 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid)) goto nla_put_failure; - if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_len)) + if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops)) goto nla_put_failure; - nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_len * + nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops * sizeof(struct sock_filter)); if (nla == NULL) goto nla_put_failure; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 15d68f24a521..461410394d08 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -77,7 +77,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) { if (flow->dst) return ntohl(flow->dst); - return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; + return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) @@ -98,7 +98,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys if (flow->ports) return ntohs(flow->port16[1]); - return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; + return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } static u32 flow_get_iif(const struct sk_buff *skb) @@ -144,7 +144,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb) static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, src.u3.ip)); case htons(ETH_P_IPV6): @@ -156,7 +156,7 @@ fallback: static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, dst.u3.ip)); case htons(ETH_P_IPV6): diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index 5b4a4efe468c..a3d79c8bf3b8 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, struct net_device *dev, *indev = NULL; int ret, network_offset; - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): acpar.family = NFPROTO_IPV4; if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index c8f8c399b99a..b5294ce20cd4 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -176,7 +176,7 @@ META_COLLECTOR(int_vlan_tag) { unsigned short tag; - tag = vlan_tx_tag_get(skb); + tag = skb_vlan_tag_get(skb); if (!tag && __vlan_get_tag(skb, &tag)) *err = -1; else @@ -197,7 +197,7 @@ META_COLLECTOR(int_priority) META_COLLECTOR(int_protocol) { /* Let userspace take care of the byte ordering */ - dst->value = skb->protocol; + dst->value = tc_skb_protocol(skb); } META_COLLECTOR(int_pkttype) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 76f402e05bd6..243b7d169d61 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1807,7 +1807,7 @@ done: int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - __be16 protocol = skb->protocol; + __be16 protocol = tc_skb_protocol(skb); int err; for (; tp; tp = rcu_dereference_bh(tp->next)) { diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 227114f27f94..66700a6116aa 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -203,7 +203,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); if (p->set_tc_index) { - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): if (skb_cow_head(skb, sizeof(struct iphdr))) goto drop; @@ -289,7 +289,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) index = skb->tc_index & (p->indices - 1); pr_debug("index %d->%d\n", skb->tc_index, index); - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): ipv4_change_dsfield(ip_hdr(skb), p->mask[index], p->value[index]); @@ -306,7 +306,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) */ if (p->mask[index] != 0xff || p->value[index]) pr_warn("%s: unsupported protocol %d\n", - __func__, ntohs(skb->protocol)); + __func__, ntohs(tc_skb_protocol(skb))); break; } diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 333cd94ba381..dfcea20e3171 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -1,7 +1,7 @@ /* * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing) * - * Copyright (C) 2013 Eric Dumazet <edumazet@google.com> + * Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -52,6 +52,7 @@ #include <net/pkt_sched.h> #include <net/sock.h> #include <net/tcp_states.h> +#include <net/tcp.h> /* * Per flow structure, dynamically allocated @@ -92,6 +93,7 @@ struct fq_sched_data { u32 flow_refill_delay; u32 flow_max_rate; /* optional max rate per flow */ u32 flow_plimit; /* max packets per flow */ + u32 orphan_mask; /* mask for orphaned skb */ struct rb_root *fq_root; u8 rate_enable; u8 fq_trees_log; @@ -222,11 +224,20 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) return &q->internal; - if (unlikely(!sk)) { + /* SYNACK messages are attached to a listener socket. + * 1) They are not part of a 'flow' yet + * 2) We do not want to rate limit them (eg SYNFLOOD attack), + * especially if the listener set SO_MAX_PACING_RATE + * 3) We pretend they are orphaned + */ + if (!sk || sk->sk_state == TCP_LISTEN) { + unsigned long hash = skb_get_hash(skb) & q->orphan_mask; + /* By forcing low order bit to 1, we make sure to not * collide with a local flow (socket pointers are word aligned) */ - sk = (struct sock *)(skb_get_hash(skb) | 1L); + sk = (struct sock *)((hash << 1) | 1UL); + skb_orphan(skb); } root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)]; @@ -445,7 +456,9 @@ begin: goto begin; } - if (unlikely(f->head && now < f->time_next_packet)) { + skb = f->head; + if (unlikely(skb && now < f->time_next_packet && + !skb_is_tcp_pure_ack(skb))) { head->first = f->next; fq_flow_set_throttled(q, f); goto begin; @@ -464,14 +477,17 @@ begin: goto begin; } prefetch(&skb->end); - f->time_next_packet = now; f->credit -= qdisc_pkt_len(skb); if (f->credit > 0 || !q->rate_enable) goto out; + /* Do not pace locally generated ack packets */ + if (skb_is_tcp_pure_ack(skb)) + goto out; + rate = q->flow_max_rate; - if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) + if (skb->sk) rate = min(skb->sk->sk_pacing_rate, rate); if (rate != ~0U) { @@ -704,6 +720,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) q->flow_refill_delay = usecs_to_jiffies(usecs_delay); } + if (tb[TCA_FQ_ORPHAN_MASK]) + q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]); + if (!err) { sch_tree_unlock(sch); err = fq_resize(sch, fq_log); @@ -749,6 +768,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) q->delayed = RB_ROOT; q->fq_root = NULL; q->fq_trees_log = ilog2(1024); + q->orphan_mask = 1024 - 1; qdisc_watchdog_init(&q->watchdog, sch); if (opt) @@ -778,6 +798,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) || nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, jiffies_to_usecs(q->flow_refill_delay)) || + nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 6ada42396a24..e02687185a59 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -122,13 +122,6 @@ teql_peek(struct Qdisc *sch) return NULL; } -static inline void -teql_neigh_release(struct neighbour *n) -{ - if (n) - neigh_release(n); -} - static void teql_reset(struct Qdisc *sch) { @@ -249,8 +242,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, char haddr[MAX_ADDR_LEN]; neigh_ha_snapshot(haddr, n, dev); - err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr, - NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)), + haddr, NULL, skb->len); if (err < 0) err = -EINVAL; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 26d06dbcc1c8..197c3f59ecbf 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -391,8 +391,7 @@ void sctp_association_free(struct sctp_association *asoc) sctp_asconf_queue_teardown(asoc); /* Free pending address space being deleted */ - if (asoc->asconf_addr_del_pending != NULL) - kfree(asoc->asconf_addr_del_pending); + kfree(asoc->asconf_addr_del_pending); /* AUTH - Free the endpoint shared keys */ sctp_auth_destroy_keys(&asoc->endpoint_shared_keys); diff --git a/net/socket.c b/net/socket.c index 418795caa897..bbedbfcb42c2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -113,10 +113,8 @@ unsigned int sysctl_net_busy_read __read_mostly; unsigned int sysctl_net_busy_poll __read_mostly; #endif -static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); -static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); +static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to); +static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from); static int sock_mmap(struct file *file, struct vm_area_struct *vma); static int sock_close(struct inode *inode, struct file *file); @@ -142,8 +140,10 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, static const struct file_operations socket_file_ops = { .owner = THIS_MODULE, .llseek = no_llseek, - .aio_read = sock_aio_read, - .aio_write = sock_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = sock_read_iter, + .write_iter = sock_write_iter, .poll = sock_poll, .unlocked_ioctl = sock_ioctl, #ifdef CONFIG_COMPAT @@ -613,13 +613,6 @@ EXPORT_SYMBOL(__sock_tx_timestamp); static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size) { - struct sock_iocb *si = kiocb_to_siocb(iocb); - - si->sock = sock; - si->scm = NULL; - si->msg = msg; - si->size = size; - return sock->ops->sendmsg(iocb, sock, msg, size); } @@ -635,11 +628,9 @@ static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size, bool nosec) { struct kiocb iocb; - struct sock_iocb siocb; int ret; init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) : __sock_sendmsg(&iocb, sock, msg, size); if (-EIOCBQUEUED == ret) @@ -756,14 +747,6 @@ EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { - struct sock_iocb *si = kiocb_to_siocb(iocb); - - si->sock = sock; - si->scm = NULL; - si->msg = msg; - si->size = size; - si->flags = flags; - return sock->ops->recvmsg(iocb, sock, msg, size, flags); } @@ -779,11 +762,9 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct kiocb iocb; - struct sock_iocb siocb; int ret; init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; ret = __sock_recvmsg(&iocb, sock, msg, size, flags); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&iocb); @@ -795,11 +776,9 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct kiocb iocb; - struct sock_iocb siocb; int ret; init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&iocb); @@ -866,89 +845,47 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, return sock->ops->splice_read(sock, ppos, pipe, len, flags); } -static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, - struct sock_iocb *siocb) -{ - siocb->kiocb = iocb; - iocb->private = siocb; - return siocb; -} - -static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, - struct file *file, const struct iovec *iov, - unsigned long nr_segs) +static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - size_t size = 0; - int i; - - for (i = 0; i < nr_segs; i++) - size += iov[i].iov_len; - - msg->msg_name = NULL; - msg->msg_namelen = 0; - msg->msg_control = NULL; - msg->msg_controllen = 0; - iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, size); - msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; - - return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); -} + struct msghdr msg = {.msg_iter = *to}; + ssize_t res; -static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct sock_iocb siocb, *x; + if (file->f_flags & O_NONBLOCK) + msg.msg_flags = MSG_DONTWAIT; - if (pos != 0) + if (iocb->ki_pos != 0) return -ESPIPE; if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */ return 0; - - x = alloc_sock_iocb(iocb, &siocb); - if (!x) - return -ENOMEM; - return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); + res = __sock_recvmsg(iocb, sock, &msg, + iocb->ki_nbytes, msg.msg_flags); + *to = msg.msg_iter; + return res; } -static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, - struct file *file, const struct iovec *iov, - unsigned long nr_segs) +static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) { + struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - size_t size = 0; - int i; - - for (i = 0; i < nr_segs; i++) - size += iov[i].iov_len; - - msg->msg_name = NULL; - msg->msg_namelen = 0; - msg->msg_control = NULL; - msg->msg_controllen = 0; - iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, size); - msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; - if (sock->type == SOCK_SEQPACKET) - msg->msg_flags |= MSG_EOR; - - return __sock_sendmsg(iocb, sock, msg, size); -} + struct msghdr msg = {.msg_iter = *from}; + ssize_t res; -static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct sock_iocb siocb, *x; - - if (pos != 0) + if (iocb->ki_pos != 0) return -ESPIPE; - x = alloc_sock_iocb(iocb, &siocb); - if (!x) - return -ENOMEM; + if (file->f_flags & O_NONBLOCK) + msg.msg_flags = MSG_DONTWAIT; + + if (sock->type == SOCK_SEQPACKET) + msg.msg_flags |= MSG_EOR; - return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); + res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes); + *from = msg.msg_iter; + return res; } /* diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index d162b21b14bd..8c1e558db118 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -11,6 +11,8 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/init.h> +#include <linux/mutex.h> +#include <linux/notifier.h> #include <linux/netdevice.h> #include <net/switchdev.h> @@ -50,3 +52,176 @@ int netdev_switch_port_stp_update(struct net_device *dev, u8 state) return ops->ndo_switch_port_stp_update(dev, state); } EXPORT_SYMBOL(netdev_switch_port_stp_update); + +static DEFINE_MUTEX(netdev_switch_mutex); +static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain); + +/** + * register_netdev_switch_notifier - Register nofifier + * @nb: notifier_block + * + * Register switch device notifier. This should be used by code + * which needs to monitor events happening in particular device. + * Return values are same as for atomic_notifier_chain_register(). + */ +int register_netdev_switch_notifier(struct notifier_block *nb) +{ + int err; + + mutex_lock(&netdev_switch_mutex); + err = raw_notifier_chain_register(&netdev_switch_notif_chain, nb); + mutex_unlock(&netdev_switch_mutex); + return err; +} +EXPORT_SYMBOL(register_netdev_switch_notifier); + +/** + * unregister_netdev_switch_notifier - Unregister nofifier + * @nb: notifier_block + * + * Unregister switch device notifier. + * Return values are same as for atomic_notifier_chain_unregister(). + */ +int unregister_netdev_switch_notifier(struct notifier_block *nb) +{ + int err; + + mutex_lock(&netdev_switch_mutex); + err = raw_notifier_chain_unregister(&netdev_switch_notif_chain, nb); + mutex_unlock(&netdev_switch_mutex); + return err; +} +EXPORT_SYMBOL(unregister_netdev_switch_notifier); + +/** + * call_netdev_switch_notifiers - Call nofifiers + * @val: value passed unmodified to notifier function + * @dev: port device + * @info: notifier information data + * + * Call all network notifier blocks. This should be called by driver + * when it needs to propagate hardware event. + * Return values are same as for atomic_notifier_call_chain(). + */ +int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, + struct netdev_switch_notifier_info *info) +{ + int err; + + info->dev = dev; + mutex_lock(&netdev_switch_mutex); + err = raw_notifier_call_chain(&netdev_switch_notif_chain, val, info); + mutex_unlock(&netdev_switch_mutex); + return err; +} +EXPORT_SYMBOL(call_netdev_switch_notifiers); + +/** + * netdev_switch_port_bridge_setlink - Notify switch device port of bridge + * port attributes + * + * @dev: port device + * @nlh: netlink msg with bridge port attributes + * @flags: bridge setlink flags + * + * Notify switch device port of bridge port attributes + */ +int netdev_switch_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) + return 0; + + if (!ops->ndo_bridge_setlink) + return -EOPNOTSUPP; + + return ops->ndo_bridge_setlink(dev, nlh, flags); +} +EXPORT_SYMBOL(netdev_switch_port_bridge_setlink); + +/** + * netdev_switch_port_bridge_dellink - Notify switch device port of bridge + * port attribute delete + * + * @dev: port device + * @nlh: netlink msg with bridge port attributes + * @flags: bridge setlink flags + * + * Notify switch device port of bridge port attribute delete + */ +int netdev_switch_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) + return 0; + + if (!ops->ndo_bridge_dellink) + return -EOPNOTSUPP; + + return ops->ndo_bridge_dellink(dev, nlh, flags); +} +EXPORT_SYMBOL(netdev_switch_port_bridge_dellink); + +/** + * ndo_dflt_netdev_switch_port_bridge_setlink - default ndo bridge setlink + * op for master devices + * + * @dev: port device + * @nlh: netlink msg with bridge port attributes + * @flags: bridge setlink flags + * + * Notify master device slaves of bridge port attributes + */ +int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) +{ + struct net_device *lower_dev; + struct list_head *iter; + int ret = 0, err = 0; + + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) + return ret; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = netdev_switch_port_bridge_setlink(lower_dev, nlh, flags); + if (err && err != -EOPNOTSUPP) + ret = err; + } + + return ret; +} +EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_setlink); + +/** + * ndo_dflt_netdev_switch_port_bridge_dellink - default ndo bridge dellink + * op for master devices + * + * @dev: port device + * @nlh: netlink msg with bridge port attributes + * @flags: bridge dellink flags + * + * Notify master device slaves of bridge port attribute deletes + */ +int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) +{ + struct net_device *lower_dev; + struct list_head *iter; + int ret = 0, err = 0; + + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) + return ret; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = netdev_switch_port_bridge_dellink(lower_dev, nlh, flags); + if (err && err != -EOPNOTSUPP) + ret = err; + } + + return ret; +} +EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink); diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index c890848f9d56..91c8a8e031db 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -20,18 +20,6 @@ menuconfig TIPC If in doubt, say N. -config TIPC_PORTS - int "Maximum number of ports in a node" - depends on TIPC - range 127 65535 - default "8191" - help - Specifies how many ports can be supported by a node. - Can range from 127 to 65535 ports; default is 8191. - - Setting this to a smaller value saves some memory, - setting it to higher allows for more ports. - config TIPC_MEDIA_IB bool "InfiniBand media type support" depends on TIPC && INFINIBAND_IPOIB diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 333e4592772c..599b1a540d2b 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -4,11 +4,11 @@ obj-$(CONFIG_TIPC) := tipc.o -tipc-y += addr.o bcast.o bearer.o config.o \ +tipc-y += addr.o bcast.o bearer.o \ core.o link.o discover.o msg.o \ name_distr.o subscr.o name_table.o net.o \ - netlink.o node.o socket.o log.o eth_media.o \ - server.o + netlink.o netlink_compat.o node.o socket.o eth_media.o \ + server.o socket.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o tipc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 357b74b26f9e..48fd3b5a73fb 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -34,8 +34,51 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "core.h" +#include <linux/kernel.h> #include "addr.h" +#include "core.h" + +/** + * in_own_cluster - test for cluster inclusion; <0.0.0> always matches + */ +int in_own_cluster(struct net *net, u32 addr) +{ + return in_own_cluster_exact(net, addr) || !addr; +} + +int in_own_cluster_exact(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return !((addr ^ tn->own_addr) >> 12); +} + +/** + * in_own_node - test for node inclusion; <0.0.0> always matches + */ +int in_own_node(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return (addr == tn->own_addr) || !addr; +} + +/** + * addr_domain - convert 2-bit scope value to equivalent message lookup domain + * + * Needed when address of a named message must be looked up a second time + * after a network hop. + */ +u32 addr_domain(struct net *net, u32 sc) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + if (likely(sc == TIPC_NODE_SCOPE)) + return tn->own_addr; + if (sc == TIPC_CLUSTER_SCOPE) + return tipc_cluster_mask(tn->own_addr); + return tipc_zone_mask(tn->own_addr); +} /** * tipc_addr_domain_valid - validates a network domain address diff --git a/net/tipc/addr.h b/net/tipc/addr.h index a74acf9ee804..c700c2d28e09 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -37,7 +37,10 @@ #ifndef _TIPC_ADDR_H #define _TIPC_ADDR_H -#include "core.h" +#include <linux/types.h> +#include <linux/tipc.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> #define TIPC_ZONE_MASK 0xff000000u #define TIPC_CLUSTER_MASK 0xfffff000u @@ -52,42 +55,10 @@ static inline u32 tipc_cluster_mask(u32 addr) return addr & TIPC_CLUSTER_MASK; } -static inline int in_own_cluster_exact(u32 addr) -{ - return !((addr ^ tipc_own_addr) >> 12); -} - -/** - * in_own_node - test for node inclusion; <0.0.0> always matches - */ -static inline int in_own_node(u32 addr) -{ - return (addr == tipc_own_addr) || !addr; -} - -/** - * in_own_cluster - test for cluster inclusion; <0.0.0> always matches - */ -static inline int in_own_cluster(u32 addr) -{ - return in_own_cluster_exact(addr) || !addr; -} - -/** - * addr_domain - convert 2-bit scope value to equivalent message lookup domain - * - * Needed when address of a named message must be looked up a second time - * after a network hop. - */ -static inline u32 addr_domain(u32 sc) -{ - if (likely(sc == TIPC_NODE_SCOPE)) - return tipc_own_addr; - if (sc == TIPC_CLUSTER_SCOPE) - return tipc_cluster_mask(tipc_own_addr); - return tipc_zone_mask(tipc_own_addr); -} - +int in_own_cluster(struct net *net, u32 addr); +int in_own_cluster_exact(struct net *net, u32 addr); +int in_own_node(struct net *net, u32 addr); +u32 addr_domain(struct net *net, u32 sc); int tipc_addr_domain_valid(u32); int tipc_addr_node_valid(u32 addr); int tipc_in_scope(u32 domain, u32 addr); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index a9e174fc0f91..3e41704832de 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -1,7 +1,7 @@ /* * net/tipc/bcast.c: TIPC broadcast code * - * Copyright (c) 2004-2006, 2014, Ericsson AB + * Copyright (c) 2004-2006, 2014-2015, Ericsson AB * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. @@ -35,77 +35,14 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "core.h" -#include "link.h" #include "socket.h" #include "msg.h" #include "bcast.h" #include "name_distr.h" +#include "core.h" #define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ #define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ -#define BCBEARER MAX_BEARERS - -/** - * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link - * @primary: pointer to primary bearer - * @secondary: pointer to secondary bearer - * - * Bearers must have same priority and same set of reachable destinations - * to be paired. - */ - -struct tipc_bcbearer_pair { - struct tipc_bearer *primary; - struct tipc_bearer *secondary; -}; - -/** - * struct tipc_bcbearer - bearer used by broadcast link - * @bearer: (non-standard) broadcast bearer structure - * @media: (non-standard) broadcast media structure - * @bpairs: array of bearer pairs - * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort() - * @remains: temporary node map used by tipc_bcbearer_send() - * @remains_new: temporary node map used tipc_bcbearer_send() - * - * Note: The fields labelled "temporary" are incorporated into the bearer - * to avoid consuming potentially limited stack space through the use of - * large local variables within multicast routines. Concurrent access is - * prevented through use of the spinlock "bclink_lock". - */ -struct tipc_bcbearer { - struct tipc_bearer bearer; - struct tipc_media media; - struct tipc_bcbearer_pair bpairs[MAX_BEARERS]; - struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; - struct tipc_node_map remains; - struct tipc_node_map remains_new; -}; - -/** - * struct tipc_bclink - link used for broadcast messages - * @lock: spinlock governing access to structure - * @link: (non-standard) broadcast link structure - * @node: (non-standard) node structure representing b'cast link's peer node - * @flags: represent bclink states - * @bcast_nodes: map of broadcast-capable nodes - * @retransmit_to: node that most recently requested a retransmit - * - * Handles sequence numbering, fragmentation, bundling, etc. - */ -struct tipc_bclink { - spinlock_t lock; - struct tipc_link link; - struct tipc_node node; - unsigned int flags; - struct tipc_node_map bcast_nodes; - struct tipc_node *retransmit_to; -}; - -static struct tipc_bcbearer *bcbearer; -static struct tipc_bclink *bclink; -static struct tipc_link *bcl; const char tipc_bclink_name[] = "broadcast-link"; @@ -115,38 +52,50 @@ static void tipc_nmap_diff(struct tipc_node_map *nm_a, static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); -static void tipc_bclink_lock(void) +static void tipc_bclink_lock(struct net *net) { - spin_lock_bh(&bclink->lock); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + spin_lock_bh(&tn->bclink->lock); } -static void tipc_bclink_unlock(void) +static void tipc_bclink_unlock(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node = NULL; - if (likely(!bclink->flags)) { - spin_unlock_bh(&bclink->lock); + if (likely(!tn->bclink->flags)) { + spin_unlock_bh(&tn->bclink->lock); return; } - if (bclink->flags & TIPC_BCLINK_RESET) { - bclink->flags &= ~TIPC_BCLINK_RESET; - node = tipc_bclink_retransmit_to(); + if (tn->bclink->flags & TIPC_BCLINK_RESET) { + tn->bclink->flags &= ~TIPC_BCLINK_RESET; + node = tipc_bclink_retransmit_to(net); } - spin_unlock_bh(&bclink->lock); + spin_unlock_bh(&tn->bclink->lock); if (node) tipc_link_reset_all(node); } +void tipc_bclink_input(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_sk_mcast_rcv(net, &tn->bclink->arrvq, &tn->bclink->inputq); +} + uint tipc_bclink_get_mtu(void) { return MAX_PKT_DEFAULT_MCAST; } -void tipc_bclink_set_flags(unsigned int flags) +void tipc_bclink_set_flags(struct net *net, unsigned int flags) { - bclink->flags |= flags; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tn->bclink->flags |= flags; } static u32 bcbuf_acks(struct sk_buff *buf) @@ -164,31 +113,40 @@ static void bcbuf_decr_acks(struct sk_buff *buf) bcbuf_set_acks(buf, bcbuf_acks(buf) - 1); } -void tipc_bclink_add_node(u32 addr) +void tipc_bclink_add_node(struct net *net, u32 addr) { - tipc_bclink_lock(); - tipc_nmap_add(&bclink->bcast_nodes, addr); - tipc_bclink_unlock(); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_bclink_lock(net); + tipc_nmap_add(&tn->bclink->bcast_nodes, addr); + tipc_bclink_unlock(net); } -void tipc_bclink_remove_node(u32 addr) +void tipc_bclink_remove_node(struct net *net, u32 addr) { - tipc_bclink_lock(); - tipc_nmap_remove(&bclink->bcast_nodes, addr); - tipc_bclink_unlock(); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_bclink_lock(net); + tipc_nmap_remove(&tn->bclink->bcast_nodes, addr); + tipc_bclink_unlock(net); } -static void bclink_set_last_sent(void) +static void bclink_set_last_sent(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + if (bcl->next_out) bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1); else bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1); } -u32 tipc_bclink_get_last_sent(void) +u32 tipc_bclink_get_last_sent(struct net *net) { - return bcl->fsm_msg_cnt; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return tn->bcl->fsm_msg_cnt; } static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) @@ -203,9 +161,11 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) * * Called with bclink_lock locked */ -struct tipc_node *tipc_bclink_retransmit_to(void) +struct tipc_node *tipc_bclink_retransmit_to(struct net *net) { - return bclink->retransmit_to; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return tn->bclink->retransmit_to; } /** @@ -215,9 +175,10 @@ struct tipc_node *tipc_bclink_retransmit_to(void) * * Called with bclink_lock locked */ -static void bclink_retransmit_pkt(u32 after, u32 to) +static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to) { struct sk_buff *skb; + struct tipc_link *bcl = tn->bcl; skb_queue_walk(&bcl->outqueue, skb) { if (more(buf_seqno(skb), after)) { @@ -232,13 +193,11 @@ static void bclink_retransmit_pkt(u32 after, u32 to) * * Called with no locks taken */ -void tipc_bclink_wakeup_users(void) +void tipc_bclink_wakeup_users(struct net *net) { - struct sk_buff *skb; - - while ((skb = skb_dequeue(&bclink->link.waiting_sks))) - tipc_sk_rcv(skb); + struct tipc_net *tn = net_generic(net, tipc_net_id); + tipc_sk_rcv(net, &tn->bclink->link.wakeupq); } /** @@ -253,10 +212,12 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) struct sk_buff *skb, *tmp; struct sk_buff *next; unsigned int released = 0; + struct net *net = n_ptr->net; + struct tipc_net *tn = net_generic(net, tipc_net_id); - tipc_bclink_lock(); + tipc_bclink_lock(net); /* Bail out if tx queue is empty (no clean up is required) */ - skb = skb_peek(&bcl->outqueue); + skb = skb_peek(&tn->bcl->outqueue); if (!skb) goto exit; @@ -267,43 +228,43 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) * acknowledge sent messages only (if other nodes still exist) * or both sent and unsent messages (otherwise) */ - if (bclink->bcast_nodes.count) - acked = bcl->fsm_msg_cnt; + if (tn->bclink->bcast_nodes.count) + acked = tn->bcl->fsm_msg_cnt; else - acked = bcl->next_out_no; + acked = tn->bcl->next_out_no; } else { /* * Bail out if specified sequence number does not correspond * to a message that has been sent and not yet acknowledged */ if (less(acked, buf_seqno(skb)) || - less(bcl->fsm_msg_cnt, acked) || + less(tn->bcl->fsm_msg_cnt, acked) || less_eq(acked, n_ptr->bclink.acked)) goto exit; } /* Skip over packets that node has previously acknowledged */ - skb_queue_walk(&bcl->outqueue, skb) { + skb_queue_walk(&tn->bcl->outqueue, skb) { if (more(buf_seqno(skb), n_ptr->bclink.acked)) break; } /* Update packets that node is now acknowledging */ - skb_queue_walk_from_safe(&bcl->outqueue, skb, tmp) { + skb_queue_walk_from_safe(&tn->bcl->outqueue, skb, tmp) { if (more(buf_seqno(skb), acked)) break; - next = tipc_skb_queue_next(&bcl->outqueue, skb); - if (skb != bcl->next_out) { + next = tipc_skb_queue_next(&tn->bcl->outqueue, skb); + if (skb != tn->bcl->next_out) { bcbuf_decr_acks(skb); } else { bcbuf_set_acks(skb, 0); - bcl->next_out = next; - bclink_set_last_sent(); + tn->bcl->next_out = next; + bclink_set_last_sent(net); } if (bcbuf_acks(skb) == 0) { - __skb_unlink(skb, &bcl->outqueue); + __skb_unlink(skb, &tn->bcl->outqueue); kfree_skb(skb); released = 1; } @@ -311,15 +272,14 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) n_ptr->bclink.acked = acked; /* Try resolving broadcast link congestion, if necessary */ - if (unlikely(bcl->next_out)) { - tipc_link_push_packets(bcl); - bclink_set_last_sent(); + if (unlikely(tn->bcl->next_out)) { + tipc_link_push_packets(tn->bcl); + bclink_set_last_sent(net); } - if (unlikely(released && !skb_queue_empty(&bcl->waiting_sks))) + if (unlikely(released && !skb_queue_empty(&tn->bcl->wakeupq))) n_ptr->action_flags |= TIPC_WAKEUP_BCAST_USERS; - exit: - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } /** @@ -327,9 +287,12 @@ exit: * * RCU and node lock set */ -void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) +void tipc_bclink_update_link_state(struct tipc_node *n_ptr, + u32 last_sent) { struct sk_buff *buf; + struct net *net = n_ptr->net; + struct tipc_net *tn = net_generic(net, tipc_net_id); /* Ignore "stale" link state info */ if (less_eq(last_sent, n_ptr->bclink.last_in)) @@ -359,18 +322,18 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferred_queue); u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent; - tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, + tipc_msg_init(tn->own_addr, msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, n_ptr->addr); msg_set_non_seq(msg, 1); - msg_set_mc_netid(msg, tipc_net_id); + msg_set_mc_netid(msg, tn->net_id); msg_set_bcast_ack(msg, n_ptr->bclink.last_in); msg_set_bcgap_after(msg, n_ptr->bclink.last_in); msg_set_bcgap_to(msg, to); - tipc_bclink_lock(); - tipc_bearer_send(MAX_BEARERS, buf, NULL); - bcl->stats.sent_nacks++; - tipc_bclink_unlock(); + tipc_bclink_lock(net); + tipc_bearer_send(net, MAX_BEARERS, buf, NULL); + tn->bcl->stats.sent_nacks++; + tipc_bclink_unlock(net); kfree_skb(buf); n_ptr->bclink.oos_state++; @@ -383,9 +346,9 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) * Delay any upcoming NACK by this node if another node has already * requested the first message this node is going to ask for. */ -static void bclink_peek_nack(struct tipc_msg *msg) +static void bclink_peek_nack(struct net *net, struct tipc_msg *msg) { - struct tipc_node *n_ptr = tipc_node_find(msg_destnode(msg)); + struct tipc_node *n_ptr = tipc_node_find(net, msg_destnode(msg)); if (unlikely(!n_ptr)) return; @@ -400,17 +363,23 @@ static void bclink_peek_nack(struct tipc_msg *msg) tipc_node_unlock(n_ptr); } -/* tipc_bclink_xmit - broadcast buffer chain to all nodes in cluster +/* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster * and to identified node local sockets + * @net: the applicable net namespace * @list: chain of buffers containing message * Consumes the buffer chain, except when returning -ELINKCONG * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE */ -int tipc_bclink_xmit(struct sk_buff_head *list) +int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + struct tipc_bclink *bclink = tn->bclink; int rc = 0; int bc = 0; struct sk_buff *skb; + struct sk_buff_head arrvq; + struct sk_buff_head inputq; /* Prepare clone of message for local node */ skb = tipc_msg_reassemble(list); @@ -419,32 +388,35 @@ int tipc_bclink_xmit(struct sk_buff_head *list) return -EHOSTUNREACH; } - /* Broadcast to all other nodes */ + /* Broadcast to all nodes */ if (likely(bclink)) { - tipc_bclink_lock(); + tipc_bclink_lock(net); if (likely(bclink->bcast_nodes.count)) { - rc = __tipc_link_xmit(bcl, list); + rc = __tipc_link_xmit(net, bcl, list); if (likely(!rc)) { u32 len = skb_queue_len(&bcl->outqueue); - bclink_set_last_sent(); + bclink_set_last_sent(net); bcl->stats.queue_sz_counts++; bcl->stats.accu_queue_sz += len; } bc = 1; } - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } if (unlikely(!bc)) __skb_queue_purge(list); - /* Deliver message clone */ - if (likely(!rc)) - tipc_sk_mcast_rcv(skb); - else + if (unlikely(rc)) { kfree_skb(skb); - + return rc; + } + /* Deliver message clone */ + __skb_queue_head_init(&arrvq); + skb_queue_head_init(&inputq); + __skb_queue_tail(&arrvq, skb); + tipc_sk_mcast_rcv(net, &arrvq, &inputq); return rc; } @@ -455,19 +427,21 @@ int tipc_bclink_xmit(struct sk_buff_head *list) */ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) { + struct tipc_net *tn = net_generic(node->net, tipc_net_id); + bclink_update_last_sent(node, seqno); node->bclink.last_in = seqno; node->bclink.oos_state = 0; - bcl->stats.recv_info++; + tn->bcl->stats.recv_info++; /* * Unicast an ACK periodically, ensuring that * all nodes in the cluster don't ACK at the same time */ - if (((seqno - tipc_own_addr) % TIPC_MIN_LINK_WIN) == 0) { + if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) { tipc_link_proto_xmit(node->active_links[node->addr & 1], STATE_MSG, 0, 0, 0, 0, 0); - bcl->stats.sent_acks++; + tn->bcl->stats.sent_acks++; } } @@ -476,19 +450,24 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) * * RCU is locked, no other locks set */ -void tipc_bclink_rcv(struct sk_buff *buf) +void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; struct tipc_msg *msg = buf_msg(buf); struct tipc_node *node; u32 next_in; u32 seqno; int deferred = 0; + int pos = 0; + struct sk_buff *iskb; + struct sk_buff_head *arrvq, *inputq; /* Screen out unwanted broadcast messages */ - if (msg_mc_netid(msg) != tipc_net_id) + if (msg_mc_netid(msg) != tn->net_id) goto exit; - node = tipc_node_find(msg_prevnode(msg)); + node = tipc_node_find(net, msg_prevnode(msg)); if (unlikely(!node)) goto exit; @@ -500,18 +479,18 @@ void tipc_bclink_rcv(struct sk_buff *buf) if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) { if (msg_type(msg) != STATE_MSG) goto unlock; - if (msg_destnode(msg) == tipc_own_addr) { + if (msg_destnode(msg) == tn->own_addr) { tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); tipc_node_unlock(node); - tipc_bclink_lock(); + tipc_bclink_lock(net); bcl->stats.recv_nacks++; - bclink->retransmit_to = node; - bclink_retransmit_pkt(msg_bcgap_after(msg), + tn->bclink->retransmit_to = node; + bclink_retransmit_pkt(tn, msg_bcgap_after(msg), msg_bcgap_to(msg)); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } else { tipc_node_unlock(node); - bclink_peek_nack(msg); + bclink_peek_nack(net, msg); } goto exit; } @@ -519,52 +498,54 @@ void tipc_bclink_rcv(struct sk_buff *buf) /* Handle in-sequence broadcast message */ seqno = msg_seqno(msg); next_in = mod(node->bclink.last_in + 1); + arrvq = &tn->bclink->arrvq; + inputq = &tn->bclink->inputq; if (likely(seqno == next_in)) { receive: /* Deliver message to destination */ if (likely(msg_isdata(msg))) { - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); - tipc_bclink_unlock(); + spin_lock_bh(&inputq->lock); + __skb_queue_tail(arrvq, buf); + spin_unlock_bh(&inputq->lock); + node->action_flags |= TIPC_BCAST_MSG_EVT; + tipc_bclink_unlock(net); tipc_node_unlock(node); - if (likely(msg_mcast(msg))) - tipc_sk_mcast_rcv(buf); - else - kfree_skb(buf); } else if (msg_user(msg) == MSG_BUNDLER) { - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); bcl->stats.recv_bundles++; bcl->stats.recv_bundled += msg_msgcnt(msg); - tipc_bclink_unlock(); + pos = 0; + while (tipc_msg_extract(buf, &iskb, &pos)) { + spin_lock_bh(&inputq->lock); + __skb_queue_tail(arrvq, iskb); + spin_unlock_bh(&inputq->lock); + } + node->action_flags |= TIPC_BCAST_MSG_EVT; + tipc_bclink_unlock(net); tipc_node_unlock(node); - tipc_link_bundle_rcv(buf); } else if (msg_user(msg) == MSG_FRAGMENTER) { tipc_buf_append(&node->bclink.reasm_buf, &buf); if (unlikely(!buf && !node->bclink.reasm_buf)) goto unlock; - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); bcl->stats.recv_fragments++; if (buf) { bcl->stats.recv_fragmented++; msg = buf_msg(buf); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); goto receive; } - tipc_bclink_unlock(); - tipc_node_unlock(node); - } else if (msg_user(msg) == NAME_DISTRIBUTOR) { - tipc_bclink_lock(); - bclink_accept_pkt(node, seqno); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); tipc_node_unlock(node); - tipc_named_rcv(buf); } else { - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); tipc_node_unlock(node); kfree_skb(buf); } @@ -602,14 +583,14 @@ receive: buf = NULL; } - tipc_bclink_lock(); + tipc_bclink_lock(net); if (deferred) bcl->stats.deferred_recv++; else bcl->stats.duplicates++; - tipc_bclink_unlock(); + tipc_bclink_unlock(net); unlock: tipc_node_unlock(node); @@ -620,7 +601,7 @@ exit: u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) { return (n_ptr->bclink.recv_permitted && - (tipc_bclink_get_last_sent() != n_ptr->bclink.acked)); + (tipc_bclink_get_last_sent(n_ptr->net) != n_ptr->bclink.acked)); } @@ -633,11 +614,15 @@ u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) * Returns 0 (packet sent successfully) under all circumstances, * since the broadcast link's pseudo-bearer never blocks */ -static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, +static int tipc_bcbearer_send(struct net *net, struct sk_buff *buf, + struct tipc_bearer *unused1, struct tipc_media_addr *unused2) { int bp_index; struct tipc_msg *msg = buf_msg(buf); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bcbearer *bcbearer = tn->bcbearer; + struct tipc_bclink *bclink = tn->bclink; /* Prepare broadcast link message for reliable transmission, * if first time trying to send it; @@ -647,8 +632,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, if (likely(!msg_non_seq(buf_msg(buf)))) { bcbuf_set_acks(buf, bclink->bcast_nodes.count); msg_set_non_seq(msg, 1); - msg_set_mc_netid(msg, tipc_net_id); - bcl->stats.sent_info++; + msg_set_mc_netid(msg, tn->net_id); + tn->bcl->stats.sent_info++; if (WARN_ON(!bclink->bcast_nodes.count)) { dump_stack(); @@ -677,13 +662,14 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, if (bp_index == 0) { /* Use original buffer for first bearer */ - tipc_bearer_send(b->identity, buf, &b->bcast_addr); + tipc_bearer_send(net, b->identity, buf, &b->bcast_addr); } else { /* Avoid concurrent buffer access */ tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC); if (!tbuf) break; - tipc_bearer_send(b->identity, tbuf, &b->bcast_addr); + tipc_bearer_send(net, b->identity, tbuf, + &b->bcast_addr); kfree_skb(tbuf); /* Bearer keeps a clone */ } if (bcbearer->remains_new.count == 0) @@ -698,15 +684,18 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, /** * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer */ -void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) +void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, + u32 node, bool action) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bcbearer *bcbearer = tn->bcbearer; struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp; struct tipc_bcbearer_pair *bp_curr; struct tipc_bearer *b; int b_index; int pri; - tipc_bclink_lock(); + tipc_bclink_lock(net); if (action) tipc_nmap_add(nm_ptr, node); @@ -718,7 +707,7 @@ void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) rcu_read_lock(); for (b_index = 0; b_index < MAX_BEARERS; b_index++) { - b = rcu_dereference_rtnl(bearer_list[b_index]); + b = rcu_dereference_rtnl(tn->bearer_list[b_index]); if (!b || !b->nodes.count) continue; @@ -753,7 +742,7 @@ void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) bp_curr++; } - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, @@ -807,19 +796,21 @@ msg_full: return -EMSGSIZE; } -int tipc_nl_add_bc_link(struct tipc_nl_msg *msg) +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) { int err; void *hdr; struct nlattr *attrs; struct nlattr *prop; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; if (!bcl) return 0; - tipc_bclink_lock(); + tipc_bclink_lock(net); - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_LINK_GET); if (!hdr) return -EMSGSIZE; @@ -852,7 +843,7 @@ int tipc_nl_add_bc_link(struct tipc_nl_msg *msg) if (err) goto attr_msg_full; - tipc_bclink_unlock(); + tipc_bclink_unlock(net); nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); @@ -863,79 +854,49 @@ prop_msg_full: attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: - tipc_bclink_unlock(); + tipc_bclink_unlock(net); genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } -int tipc_bclink_stats(char *buf, const u32 buf_size) +int tipc_bclink_reset_stats(struct net *net) { - int ret; - struct tipc_stats *s; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; if (!bcl) - return 0; - - tipc_bclink_lock(); - - s = &bcl->stats; - - ret = tipc_snprintf(buf, buf_size, "Link <%s>\n" - " Window:%u packets\n", - bcl->name, bcl->queue_limit[0]); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " RX packets:%u fragments:%u/%u bundles:%u/%u\n", - s->recv_info, s->recv_fragments, - s->recv_fragmented, s->recv_bundles, - s->recv_bundled); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX packets:%u fragments:%u/%u bundles:%u/%u\n", - s->sent_info, s->sent_fragments, - s->sent_fragmented, s->sent_bundles, - s->sent_bundled); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " RX naks:%u defs:%u dups:%u\n", - s->recv_nacks, s->deferred_recv, s->duplicates); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX naks:%u acks:%u dups:%u\n", - s->sent_nacks, s->sent_acks, s->retransmitted); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " Congestion link:%u Send queue max:%u avg:%u\n", - s->link_congs, s->max_queue_sz, - s->queue_sz_counts ? - (s->accu_queue_sz / s->queue_sz_counts) : 0); - - tipc_bclink_unlock(); - return ret; -} - -int tipc_bclink_reset_stats(void) -{ - if (!bcl) return -ENOPROTOOPT; - tipc_bclink_lock(); + tipc_bclink_lock(net); memset(&bcl->stats, 0, sizeof(bcl->stats)); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); return 0; } -int tipc_bclink_set_queue_limits(u32 limit) +int tipc_bclink_set_queue_limits(struct net *net, u32 limit) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + if (!bcl) return -ENOPROTOOPT; if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN)) return -EINVAL; - tipc_bclink_lock(); + tipc_bclink_lock(net); tipc_link_set_queue_limits(bcl, limit); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); return 0; } -int tipc_bclink_init(void) +int tipc_bclink_init(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bcbearer *bcbearer; + struct tipc_bclink *bclink; + struct tipc_link *bcl; + bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); if (!bcbearer) return -ENOMEM; @@ -954,30 +915,39 @@ int tipc_bclink_init(void) spin_lock_init(&bclink->lock); __skb_queue_head_init(&bcl->outqueue); __skb_queue_head_init(&bcl->deferred_queue); - skb_queue_head_init(&bcl->waiting_sks); + skb_queue_head_init(&bcl->wakeupq); bcl->next_out_no = 1; spin_lock_init(&bclink->node.lock); - __skb_queue_head_init(&bclink->node.waiting_sks); + __skb_queue_head_init(&bclink->arrvq); + skb_queue_head_init(&bclink->inputq); bcl->owner = &bclink->node; + bcl->owner->net = net; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); bcl->bearer_id = MAX_BEARERS; - rcu_assign_pointer(bearer_list[MAX_BEARERS], &bcbearer->bearer); + rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer); bcl->state = WORKING_WORKING; + bcl->pmsg = (struct tipc_msg *)&bcl->proto_msg; + msg_set_prevnode(bcl->pmsg, tn->own_addr); strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); + tn->bcbearer = bcbearer; + tn->bclink = bclink; + tn->bcl = bcl; return 0; } -void tipc_bclink_stop(void) +void tipc_bclink_stop(struct net *net) { - tipc_bclink_lock(); - tipc_link_purge_queues(bcl); - tipc_bclink_unlock(); + struct tipc_net *tn = net_generic(net, tipc_net_id); - RCU_INIT_POINTER(bearer_list[BCBEARER], NULL); + tipc_bclink_lock(net); + tipc_link_purge_queues(tn->bcl); + tipc_bclink_unlock(net); + + RCU_INIT_POINTER(tn->bearer_list[BCBEARER], NULL); synchronize_net(); - kfree(bcbearer); - kfree(bclink); + kfree(tn->bcbearer); + kfree(tn->bclink); } /** @@ -1037,50 +1007,3 @@ static void tipc_nmap_diff(struct tipc_node_map *nm_a, } } } - -/** - * tipc_port_list_add - add a port to a port list, ensuring no duplicates - */ -void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port) -{ - struct tipc_port_list *item = pl_ptr; - int i; - int item_sz = PLSIZE; - int cnt = pl_ptr->count; - - for (; ; cnt -= item_sz, item = item->next) { - if (cnt < PLSIZE) - item_sz = cnt; - for (i = 0; i < item_sz; i++) - if (item->ports[i] == port) - return; - if (i < PLSIZE) { - item->ports[i] = port; - pl_ptr->count++; - return; - } - if (!item->next) { - item->next = kmalloc(sizeof(*item), GFP_ATOMIC); - if (!item->next) { - pr_warn("Incomplete multicast delivery, no memory\n"); - return; - } - item->next->next = NULL; - } - } -} - -/** - * tipc_port_list_free - free dynamically created entries in port_list chain - * - */ -void tipc_port_list_free(struct tipc_port_list *pl_ptr) -{ - struct tipc_port_list *item; - struct tipc_port_list *next; - - for (item = pl_ptr->next; item; item = next) { - next = item->next; - kfree(item); - } -} diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 644d79129fba..43f397fbac55 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -1,7 +1,7 @@ /* * net/tipc/bcast.h: Include file for TIPC broadcast code * - * Copyright (c) 2003-2006, 2014, Ericsson AB + * Copyright (c) 2003-2006, 2014-2015, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -37,39 +37,73 @@ #ifndef _TIPC_BCAST_H #define _TIPC_BCAST_H -#include "netlink.h" - -#define MAX_NODES 4096 -#define WSIZE 32 -#define TIPC_BCLINK_RESET 1 +#include <linux/tipc_config.h> +#include "link.h" +#include "node.h" /** - * struct tipc_node_map - set of node identifiers - * @count: # of nodes in set - * @map: bitmap of node identifiers that are in the set + * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link + * @primary: pointer to primary bearer + * @secondary: pointer to secondary bearer + * + * Bearers must have same priority and same set of reachable destinations + * to be paired. */ -struct tipc_node_map { - u32 count; - u32 map[MAX_NODES / WSIZE]; + +struct tipc_bcbearer_pair { + struct tipc_bearer *primary; + struct tipc_bearer *secondary; }; -#define PLSIZE 32 +#define TIPC_BCLINK_RESET 1 +#define BCBEARER MAX_BEARERS /** - * struct tipc_port_list - set of node local destination ports - * @count: # of ports in set (only valid for first entry in list) - * @next: pointer to next entry in list - * @ports: array of port references + * struct tipc_bcbearer - bearer used by broadcast link + * @bearer: (non-standard) broadcast bearer structure + * @media: (non-standard) broadcast media structure + * @bpairs: array of bearer pairs + * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort() + * @remains: temporary node map used by tipc_bcbearer_send() + * @remains_new: temporary node map used tipc_bcbearer_send() + * + * Note: The fields labelled "temporary" are incorporated into the bearer + * to avoid consuming potentially limited stack space through the use of + * large local variables within multicast routines. Concurrent access is + * prevented through use of the spinlock "bclink_lock". */ -struct tipc_port_list { - int count; - struct tipc_port_list *next; - u32 ports[PLSIZE]; +struct tipc_bcbearer { + struct tipc_bearer bearer; + struct tipc_media media; + struct tipc_bcbearer_pair bpairs[MAX_BEARERS]; + struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; + struct tipc_node_map remains; + struct tipc_node_map remains_new; }; +/** + * struct tipc_bclink - link used for broadcast messages + * @lock: spinlock governing access to structure + * @link: (non-standard) broadcast link structure + * @node: (non-standard) node structure representing b'cast link's peer node + * @flags: represent bclink states + * @bcast_nodes: map of broadcast-capable nodes + * @retransmit_to: node that most recently requested a retransmit + * + * Handles sequence numbering, fragmentation, bundling, etc. + */ +struct tipc_bclink { + spinlock_t lock; + struct tipc_link link; + struct tipc_node node; + unsigned int flags; + struct sk_buff_head arrvq; + struct sk_buff_head inputq; + struct tipc_node_map bcast_nodes; + struct tipc_node *retransmit_to; +}; struct tipc_node; - extern const char tipc_bclink_name[]; /** @@ -81,27 +115,26 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, return !memcmp(nm_a, nm_b, sizeof(*nm_a)); } -void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port); -void tipc_port_list_free(struct tipc_port_list *pl_ptr); - -int tipc_bclink_init(void); -void tipc_bclink_stop(void); -void tipc_bclink_set_flags(unsigned int flags); -void tipc_bclink_add_node(u32 addr); -void tipc_bclink_remove_node(u32 addr); -struct tipc_node *tipc_bclink_retransmit_to(void); +int tipc_bclink_init(struct net *net); +void tipc_bclink_stop(struct net *net); +void tipc_bclink_set_flags(struct net *tn, unsigned int flags); +void tipc_bclink_add_node(struct net *net, u32 addr); +void tipc_bclink_remove_node(struct net *net, u32 addr); +struct tipc_node *tipc_bclink_retransmit_to(struct net *tn); void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); -void tipc_bclink_rcv(struct sk_buff *buf); -u32 tipc_bclink_get_last_sent(void); +void tipc_bclink_rcv(struct net *net, struct sk_buff *buf); +u32 tipc_bclink_get_last_sent(struct net *net); u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr); -void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent); -int tipc_bclink_stats(char *stats_buf, const u32 buf_size); -int tipc_bclink_reset_stats(void); -int tipc_bclink_set_queue_limits(u32 limit); -void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action); +void tipc_bclink_update_link_state(struct tipc_node *node, + u32 last_sent); +int tipc_bclink_reset_stats(struct net *net); +int tipc_bclink_set_queue_limits(struct net *net, u32 limit); +void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, + u32 node, bool action); uint tipc_bclink_get_mtu(void); -int tipc_bclink_xmit(struct sk_buff_head *list); -void tipc_bclink_wakeup_users(void); -int tipc_nl_add_bc_link(struct tipc_nl_msg *msg); +int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list); +void tipc_bclink_wakeup_users(struct net *net); +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); +void tipc_bclink_input(struct net *net); #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 463db5b15b8b..48852c2dcc03 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -34,11 +34,12 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <net/sock.h> #include "core.h" -#include "config.h" #include "bearer.h" #include "link.h" #include "discover.h" +#include "bcast.h" #define MAX_ADDR_STR 60 @@ -67,9 +68,8 @@ static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = { [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED } }; -struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; - -static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down); +static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, + bool shutting_down); /** * tipc_media_find - locates specified media object by name @@ -111,38 +111,18 @@ void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) m_ptr = media_find_id(a->media_id); if (m_ptr && !m_ptr->addr2str(a, addr_str, sizeof(addr_str))) - ret = tipc_snprintf(buf, len, "%s(%s)", m_ptr->name, addr_str); + ret = scnprintf(buf, len, "%s(%s)", m_ptr->name, addr_str); else { u32 i; - ret = tipc_snprintf(buf, len, "UNKNOWN(%u)", a->media_id); + ret = scnprintf(buf, len, "UNKNOWN(%u)", a->media_id); for (i = 0; i < sizeof(a->value); i++) - ret += tipc_snprintf(buf - ret, len + ret, + ret += scnprintf(buf - ret, len + ret, "-%02x", a->value[i]); } } /** - * tipc_media_get_names - record names of registered media in buffer - */ -struct sk_buff *tipc_media_get_names(void) -{ - struct sk_buff *buf; - int i; - - buf = tipc_cfg_reply_alloc(MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME)); - if (!buf) - return NULL; - - for (i = 0; media_info_array[i] != NULL; i++) { - tipc_cfg_append_tlv(buf, TIPC_TLV_MEDIA_NAME, - media_info_array[i]->name, - strlen(media_info_array[i]->name) + 1); - } - return buf; -} - -/** * bearer_name_validate - validate & (optionally) deconstruct bearer name * @name: ptr to bearer name string * @name_parts: ptr to area for bearer name components (or NULL if not needed) @@ -190,68 +170,43 @@ static int bearer_name_validate(const char *name, /** * tipc_bearer_find - locates bearer object with matching bearer name */ -struct tipc_bearer *tipc_bearer_find(const char *name) +struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = rtnl_dereference(bearer_list[i]); + b_ptr = rtnl_dereference(tn->bearer_list[i]); if (b_ptr && (!strcmp(b_ptr->name, name))) return b_ptr; } return NULL; } -/** - * tipc_bearer_get_names - record names of bearers in buffer - */ -struct sk_buff *tipc_bearer_get_names(void) -{ - struct sk_buff *buf; - struct tipc_bearer *b; - int i, j; - - buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME)); - if (!buf) - return NULL; - - for (i = 0; media_info_array[i] != NULL; i++) { - for (j = 0; j < MAX_BEARERS; j++) { - b = rtnl_dereference(bearer_list[j]); - if (!b) - continue; - if (b->media == media_info_array[i]) { - tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME, - b->name, - strlen(b->name) + 1); - } - } - } - return buf; -} - -void tipc_bearer_add_dest(u32 bearer_id, u32 dest) +void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); if (b_ptr) { - tipc_bcbearer_sort(&b_ptr->nodes, dest, true); + tipc_bcbearer_sort(net, &b_ptr->nodes, dest, true); tipc_disc_add_dest(b_ptr->link_req); } rcu_read_unlock(); } -void tipc_bearer_remove_dest(u32 bearer_id, u32 dest) +void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); if (b_ptr) { - tipc_bcbearer_sort(&b_ptr->nodes, dest, false); + tipc_bcbearer_sort(net, &b_ptr->nodes, dest, false); tipc_disc_remove_dest(b_ptr->link_req); } rcu_read_unlock(); @@ -260,8 +215,10 @@ void tipc_bearer_remove_dest(u32 bearer_id, u32 dest) /** * tipc_enable_bearer - enable bearer with the given name */ -int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) +static int tipc_enable_bearer(struct net *net, const char *name, + u32 disc_domain, u32 priority) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; struct tipc_media *m_ptr; struct tipc_bearer_names b_names; @@ -271,7 +228,7 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) u32 i; int res = -EINVAL; - if (!tipc_own_addr) { + if (!tn->own_addr) { pr_warn("Bearer <%s> rejected, not supported in standalone mode\n", name); return -ENOPROTOOPT; @@ -281,11 +238,11 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) return -EINVAL; } if (tipc_addr_domain_valid(disc_domain) && - (disc_domain != tipc_own_addr)) { - if (tipc_in_scope(disc_domain, tipc_own_addr)) { - disc_domain = tipc_own_addr & TIPC_CLUSTER_MASK; + (disc_domain != tn->own_addr)) { + if (tipc_in_scope(disc_domain, tn->own_addr)) { + disc_domain = tn->own_addr & TIPC_CLUSTER_MASK; res = 0; /* accept any node in own cluster */ - } else if (in_own_cluster_exact(disc_domain)) + } else if (in_own_cluster_exact(net, disc_domain)) res = 0; /* accept specified node in own cluster */ } if (res) { @@ -313,7 +270,7 @@ restart: bearer_id = MAX_BEARERS; with_this_prio = 1; for (i = MAX_BEARERS; i-- != 0; ) { - b_ptr = rtnl_dereference(bearer_list[i]); + b_ptr = rtnl_dereference(tn->bearer_list[i]); if (!b_ptr) { bearer_id = i; continue; @@ -347,7 +304,7 @@ restart: strcpy(b_ptr->name, name); b_ptr->media = m_ptr; - res = m_ptr->enable_media(b_ptr); + res = m_ptr->enable_media(net, b_ptr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); @@ -361,15 +318,15 @@ restart: b_ptr->net_plane = bearer_id + 'A'; b_ptr->priority = priority; - res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr); + res = tipc_disc_create(net, b_ptr, &b_ptr->bcast_addr); if (res) { - bearer_disable(b_ptr, false); + bearer_disable(net, b_ptr, false); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", name); return -EINVAL; } - rcu_assign_pointer(bearer_list[bearer_id], b_ptr); + rcu_assign_pointer(tn->bearer_list[bearer_id], b_ptr); pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, @@ -380,11 +337,11 @@ restart: /** * tipc_reset_bearer - Reset all links established over this bearer */ -static int tipc_reset_bearer(struct tipc_bearer *b_ptr) +static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) { pr_info("Resetting bearer <%s>\n", b_ptr->name); - tipc_link_reset_list(b_ptr->identity); - tipc_disc_reset(b_ptr); + tipc_link_reset_list(net, b_ptr->identity); + tipc_disc_reset(net, b_ptr); return 0; } @@ -393,49 +350,35 @@ static int tipc_reset_bearer(struct tipc_bearer *b_ptr) * * Note: This routine assumes caller holds RTNL lock. */ -static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down) +static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, + bool shutting_down) { + struct tipc_net *tn = net_generic(net, tipc_net_id); u32 i; pr_info("Disabling bearer <%s>\n", b_ptr->name); b_ptr->media->disable_media(b_ptr); - tipc_link_delete_list(b_ptr->identity, shutting_down); + tipc_link_delete_list(net, b_ptr->identity, shutting_down); if (b_ptr->link_req) tipc_disc_delete(b_ptr->link_req); for (i = 0; i < MAX_BEARERS; i++) { - if (b_ptr == rtnl_dereference(bearer_list[i])) { - RCU_INIT_POINTER(bearer_list[i], NULL); + if (b_ptr == rtnl_dereference(tn->bearer_list[i])) { + RCU_INIT_POINTER(tn->bearer_list[i], NULL); break; } } kfree_rcu(b_ptr, rcu); } -int tipc_disable_bearer(const char *name) -{ - struct tipc_bearer *b_ptr; - int res; - - b_ptr = tipc_bearer_find(name); - if (b_ptr == NULL) { - pr_warn("Attempt to disable unknown bearer <%s>\n", name); - res = -EINVAL; - } else { - bearer_disable(b_ptr, false); - res = 0; - } - return res; -} - -int tipc_enable_l2_media(struct tipc_bearer *b) +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b) { struct net_device *dev; char *driver_name = strchr((const char *)b->name, ':') + 1; /* Find device with specified name */ - dev = dev_get_by_name(&init_net, driver_name); + dev = dev_get_by_name(net, driver_name); if (!dev) return -ENODEV; @@ -474,8 +417,8 @@ void tipc_disable_l2_media(struct tipc_bearer *b) * @b_ptr: the bearer through which the packet is to be sent * @dest: peer destination address */ -int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, - struct tipc_media_addr *dest) +int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b, struct tipc_media_addr *dest) { struct sk_buff *clone; struct net_device *dev; @@ -511,15 +454,16 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, * The media send routine must not alter the buffer being passed in * as it may be needed for later retransmission! */ -void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf, +void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, struct tipc_media_addr *dest) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); if (likely(b_ptr)) - b_ptr->media->send_msg(buf, b_ptr, dest); + b_ptr->media->send_msg(net, buf, b_ptr, dest); rcu_read_unlock(); } @@ -539,17 +483,12 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, { struct tipc_bearer *b_ptr; - if (!net_eq(dev_net(dev), &init_net)) { - kfree_skb(buf); - return NET_RX_DROP; - } - rcu_read_lock(); b_ptr = rcu_dereference_rtnl(dev->tipc_ptr); if (likely(b_ptr)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { buf->next = NULL; - tipc_rcv(buf, b_ptr); + tipc_rcv(dev_net(dev), buf, b_ptr); rcu_read_unlock(); return NET_RX_SUCCESS; } @@ -572,11 +511,9 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, void *ptr) { - struct tipc_bearer *b_ptr; struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; + struct net *net = dev_net(dev); + struct tipc_bearer *b_ptr; b_ptr = rtnl_dereference(dev->tipc_ptr); if (!b_ptr) @@ -590,16 +527,16 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_DOWN: case NETDEV_CHANGEMTU: - tipc_reset_bearer(b_ptr); + tipc_reset_bearer(net, b_ptr); break; case NETDEV_CHANGEADDR: b_ptr->media->raw2addr(b_ptr, &b_ptr->addr, (char *)dev->dev_addr); - tipc_reset_bearer(b_ptr); + tipc_reset_bearer(net, b_ptr); break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: - bearer_disable(b_ptr, false); + bearer_disable(dev_net(dev), b_ptr, false); break; } return NOTIFY_OK; @@ -632,16 +569,17 @@ void tipc_bearer_cleanup(void) dev_remove_pack(&tipc_packet_type); } -void tipc_bearer_stop(void) +void tipc_bearer_stop(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = rtnl_dereference(bearer_list[i]); + b_ptr = rtnl_dereference(tn->bearer_list[i]); if (b_ptr) { - bearer_disable(b_ptr, true); - bearer_list[i] = NULL; + bearer_disable(net, b_ptr, true); + tn->bearer_list[i] = NULL; } } } @@ -654,7 +592,7 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, struct nlattr *attrs; struct nlattr *prop; - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_BEARER_GET); if (!hdr) return -EMSGSIZE; @@ -698,6 +636,8 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) int i = cb->args[0]; struct tipc_bearer *bearer; struct tipc_nl_msg msg; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); if (i == MAX_BEARERS) return 0; @@ -708,7 +648,7 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); for (i = 0; i < MAX_BEARERS; i++) { - bearer = rtnl_dereference(bearer_list[i]); + bearer = rtnl_dereference(tn->bearer_list[i]); if (!bearer) continue; @@ -730,6 +670,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) struct tipc_bearer *bearer; struct tipc_nl_msg msg; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = genl_info_net(info); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; @@ -753,7 +694,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) msg.seq = info->snd_seq; rtnl_lock(); - bearer = tipc_bearer_find(name); + bearer = tipc_bearer_find(net, name); if (!bearer) { err = -EINVAL; goto err_out; @@ -778,6 +719,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) char *name; struct tipc_bearer *bearer; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; @@ -794,13 +736,13 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); rtnl_lock(); - bearer = tipc_bearer_find(name); + bearer = tipc_bearer_find(net, name); if (!bearer) { rtnl_unlock(); return -EINVAL; } - bearer_disable(bearer, false); + bearer_disable(net, bearer, false); rtnl_unlock(); return 0; @@ -811,11 +753,13 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) int err; char *bearer; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); u32 domain; u32 prio; prio = TIPC_MEDIA_LINK_PRI; - domain = tipc_own_addr & TIPC_CLUSTER_MASK; + domain = tn->own_addr & TIPC_CLUSTER_MASK; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; @@ -847,7 +791,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) } rtnl_lock(); - err = tipc_enable_bearer(bearer, domain, prio); + err = tipc_enable_bearer(net, bearer, domain, prio); if (err) { rtnl_unlock(); return err; @@ -863,6 +807,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) char *name; struct tipc_bearer *b; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = genl_info_net(info); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; @@ -878,7 +823,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); rtnl_lock(); - b = tipc_bearer_find(name); + b = tipc_bearer_find(net, name); if (!b) { rtnl_unlock(); return -EINVAL; @@ -913,7 +858,7 @@ static int __tipc_nl_add_media(struct tipc_nl_msg *msg, struct nlattr *attrs; struct nlattr *prop; - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_MEDIA_GET); if (!hdr) return -EMSGSIZE; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 2c1230ac5dfe..6b17795ff8bc 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -37,12 +37,13 @@ #ifndef _TIPC_BEARER_H #define _TIPC_BEARER_H -#include "bcast.h" #include "netlink.h" #include <net/genetlink.h> #define MAX_BEARERS 2 #define MAX_MEDIA 2 +#define MAX_NODES 4096 +#define WSIZE 32 /* Identifiers associated with TIPC message header media address info * - address info field is 32 bytes long @@ -59,6 +60,16 @@ #define TIPC_MEDIA_TYPE_IB 2 /** + * struct tipc_node_map - set of node identifiers + * @count: # of nodes in set + * @map: bitmap of node identifiers that are in the set + */ +struct tipc_node_map { + u32 count; + u32 map[MAX_NODES / WSIZE]; +}; + +/** * struct tipc_media_addr - destination address used by TIPC bearers * @value: address info (format defined by media) * @media_id: TIPC media type identifier @@ -89,10 +100,10 @@ struct tipc_bearer; * @name: media name */ struct tipc_media { - int (*send_msg)(struct sk_buff *buf, + int (*send_msg)(struct net *net, struct sk_buff *buf, struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); - int (*enable_media)(struct tipc_bearer *b_ptr); + int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr); void (*disable_media)(struct tipc_bearer *b_ptr); int (*addr2str)(struct tipc_media_addr *addr, char *strbuf, @@ -157,17 +168,11 @@ struct tipc_bearer_names { char if_name[TIPC_MAX_IF_NAME]; }; -struct tipc_link; - -extern struct tipc_bearer __rcu *bearer_list[]; - /* * TIPC routines available to supported media types */ -void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *tb_ptr); -int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority); -int tipc_disable_bearer(const char *name); +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr); /* * Routines made available to TIPC by supported media types @@ -191,21 +196,19 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); -struct sk_buff *tipc_media_get_names(void); -int tipc_enable_l2_media(struct tipc_bearer *b); +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b); void tipc_disable_l2_media(struct tipc_bearer *b); -int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, - struct tipc_media_addr *dest); +int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b, struct tipc_media_addr *dest); -struct sk_buff *tipc_bearer_get_names(void); -void tipc_bearer_add_dest(u32 bearer_id, u32 dest); -void tipc_bearer_remove_dest(u32 bearer_id, u32 dest); -struct tipc_bearer *tipc_bearer_find(const char *name); +void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest); +void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest); +struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name); struct tipc_media *tipc_media_find(const char *name); int tipc_bearer_setup(void); void tipc_bearer_cleanup(void); -void tipc_bearer_stop(void); -void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf, +void tipc_bearer_stop(struct net *net); +void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, struct tipc_media_addr *dest); #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/config.c b/net/tipc/config.c deleted file mode 100644 index 876f4c6a2631..000000000000 --- a/net/tipc/config.c +++ /dev/null @@ -1,342 +0,0 @@ -/* - * net/tipc/config.c: TIPC configuration management code - * - * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2007, 2010-2013, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" -#include "socket.h" -#include "name_table.h" -#include "config.h" -#include "server.h" - -#define REPLY_TRUNCATED "<truncated>\n" - -static const void *req_tlv_area; /* request message TLV area */ -static int req_tlv_space; /* request message TLV area size */ -static int rep_headroom; /* reply message headroom to use */ - -struct sk_buff *tipc_cfg_reply_alloc(int payload_size) -{ - struct sk_buff *buf; - - buf = alloc_skb(rep_headroom + payload_size, GFP_ATOMIC); - if (buf) - skb_reserve(buf, rep_headroom); - return buf; -} - -int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, - void *tlv_data, int tlv_data_size) -{ - struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(buf); - int new_tlv_space = TLV_SPACE(tlv_data_size); - - if (skb_tailroom(buf) < new_tlv_space) - return 0; - skb_put(buf, new_tlv_space); - tlv->tlv_type = htons(tlv_type); - tlv->tlv_len = htons(TLV_LENGTH(tlv_data_size)); - if (tlv_data_size && tlv_data) - memcpy(TLV_DATA(tlv), tlv_data, tlv_data_size); - return 1; -} - -static struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value) -{ - struct sk_buff *buf; - __be32 value_net; - - buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(value))); - if (buf) { - value_net = htonl(value); - tipc_cfg_append_tlv(buf, tlv_type, &value_net, - sizeof(value_net)); - } - return buf; -} - -static struct sk_buff *tipc_cfg_reply_unsigned(u32 value) -{ - return tipc_cfg_reply_unsigned_type(TIPC_TLV_UNSIGNED, value); -} - -struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string) -{ - struct sk_buff *buf; - int string_len = strlen(string) + 1; - - buf = tipc_cfg_reply_alloc(TLV_SPACE(string_len)); - if (buf) - tipc_cfg_append_tlv(buf, tlv_type, string, string_len); - return buf; -} - -static struct sk_buff *tipc_show_stats(void) -{ - struct sk_buff *buf; - struct tlv_desc *rep_tlv; - char *pb; - int pb_len; - int str_len; - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - value = ntohl(*(u32 *)TLV_DATA(req_tlv_area)); - if (value != 0) - return tipc_cfg_reply_error_string("unsupported argument"); - - buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); - if (buf == NULL) - return NULL; - - rep_tlv = (struct tlv_desc *)buf->data; - pb = TLV_DATA(rep_tlv); - pb_len = ULTRA_STRING_MAX_LEN; - - str_len = tipc_snprintf(pb, pb_len, "TIPC version " TIPC_MOD_VER "\n"); - str_len += 1; /* for "\0" */ - skb_put(buf, TLV_SPACE(str_len)); - TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); - - return buf; -} - -static struct sk_buff *cfg_enable_bearer(void) -{ - struct tipc_bearer_config *args; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_CONFIG)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area); - if (tipc_enable_bearer(args->name, - ntohl(args->disc_domain), - ntohl(args->priority))) - return tipc_cfg_reply_error_string("unable to enable bearer"); - - return tipc_cfg_reply_none(); -} - -static struct sk_buff *cfg_disable_bearer(void) -{ - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_NAME)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - if (tipc_disable_bearer((char *)TLV_DATA(req_tlv_area))) - return tipc_cfg_reply_error_string("unable to disable bearer"); - - return tipc_cfg_reply_none(); -} - -static struct sk_buff *cfg_set_own_addr(void) -{ - u32 addr; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - addr = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (addr == tipc_own_addr) - return tipc_cfg_reply_none(); - if (!tipc_addr_node_valid(addr)) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (node address)"); - if (tipc_own_addr) - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change node address once assigned)"); - if (!tipc_net_start(addr)) - return tipc_cfg_reply_none(); - - return tipc_cfg_reply_error_string("cannot change to network mode"); -} - -static struct sk_buff *cfg_set_max_ports(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value == tipc_max_ports) - return tipc_cfg_reply_none(); - if (value < 127 || value > 65535) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (max ports must be 127-65535)"); - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change max ports while TIPC is active)"); -} - -static struct sk_buff *cfg_set_netid(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value == tipc_net_id) - return tipc_cfg_reply_none(); - if (value < 1 || value > 9999) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (network id must be 1-9999)"); - if (tipc_own_addr) - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change network id once TIPC has joined a network)"); - tipc_net_id = value; - return tipc_cfg_reply_none(); -} - -struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area, - int request_space, int reply_headroom) -{ - struct sk_buff *rep_tlv_buf; - - rtnl_lock(); - - /* Save request and reply details in a well-known location */ - req_tlv_area = request_area; - req_tlv_space = request_space; - rep_headroom = reply_headroom; - - /* Check command authorization */ - if (likely(in_own_node(orig_node))) { - /* command is permitted */ - } else { - rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot be done remotely)"); - goto exit; - } - - /* Call appropriate processing routine */ - switch (cmd) { - case TIPC_CMD_NOOP: - rep_tlv_buf = tipc_cfg_reply_none(); - break; - case TIPC_CMD_GET_NODES: - rep_tlv_buf = tipc_node_get_nodes(req_tlv_area, req_tlv_space); - break; - case TIPC_CMD_GET_LINKS: - rep_tlv_buf = tipc_node_get_links(req_tlv_area, req_tlv_space); - break; - case TIPC_CMD_SHOW_LINK_STATS: - rep_tlv_buf = tipc_link_cmd_show_stats(req_tlv_area, req_tlv_space); - break; - case TIPC_CMD_RESET_LINK_STATS: - rep_tlv_buf = tipc_link_cmd_reset_stats(req_tlv_area, req_tlv_space); - break; - case TIPC_CMD_SHOW_NAME_TABLE: - rep_tlv_buf = tipc_nametbl_get(req_tlv_area, req_tlv_space); - break; - case TIPC_CMD_GET_BEARER_NAMES: - rep_tlv_buf = tipc_bearer_get_names(); - break; - case TIPC_CMD_GET_MEDIA_NAMES: - rep_tlv_buf = tipc_media_get_names(); - break; - case TIPC_CMD_SHOW_PORTS: - rep_tlv_buf = tipc_sk_socks_show(); - break; - case TIPC_CMD_SHOW_STATS: - rep_tlv_buf = tipc_show_stats(); - break; - case TIPC_CMD_SET_LINK_TOL: - case TIPC_CMD_SET_LINK_PRI: - case TIPC_CMD_SET_LINK_WINDOW: - rep_tlv_buf = tipc_link_cmd_config(req_tlv_area, req_tlv_space, cmd); - break; - case TIPC_CMD_ENABLE_BEARER: - rep_tlv_buf = cfg_enable_bearer(); - break; - case TIPC_CMD_DISABLE_BEARER: - rep_tlv_buf = cfg_disable_bearer(); - break; - case TIPC_CMD_SET_NODE_ADDR: - rep_tlv_buf = cfg_set_own_addr(); - break; - case TIPC_CMD_SET_MAX_PORTS: - rep_tlv_buf = cfg_set_max_ports(); - break; - case TIPC_CMD_SET_NETID: - rep_tlv_buf = cfg_set_netid(); - break; - case TIPC_CMD_GET_MAX_PORTS: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_ports); - break; - case TIPC_CMD_GET_NETID: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); - break; - case TIPC_CMD_NOT_NET_ADMIN: - rep_tlv_buf = - tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN); - break; - case TIPC_CMD_SET_MAX_ZONES: - case TIPC_CMD_GET_MAX_ZONES: - case TIPC_CMD_SET_MAX_SLAVES: - case TIPC_CMD_GET_MAX_SLAVES: - case TIPC_CMD_SET_MAX_CLUSTERS: - case TIPC_CMD_GET_MAX_CLUSTERS: - case TIPC_CMD_SET_MAX_NODES: - case TIPC_CMD_GET_MAX_NODES: - case TIPC_CMD_SET_MAX_SUBSCR: - case TIPC_CMD_GET_MAX_SUBSCR: - case TIPC_CMD_SET_MAX_PUBL: - case TIPC_CMD_GET_MAX_PUBL: - case TIPC_CMD_SET_LOG_SIZE: - case TIPC_CMD_SET_REMOTE_MNG: - case TIPC_CMD_GET_REMOTE_MNG: - case TIPC_CMD_DUMP_LOG: - rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (obsolete command)"); - break; - default: - rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (unknown command)"); - break; - } - - WARN_ON(rep_tlv_buf->len > TLV_SPACE(ULTRA_STRING_MAX_LEN)); - - /* Append an error message if we cannot return all requested data */ - if (rep_tlv_buf->len == TLV_SPACE(ULTRA_STRING_MAX_LEN)) { - if (*(rep_tlv_buf->data + ULTRA_STRING_MAX_LEN) != '\0') - sprintf(rep_tlv_buf->data + rep_tlv_buf->len - - sizeof(REPLY_TRUNCATED) - 1, REPLY_TRUNCATED); - } - - /* Return reply buffer */ -exit: - rtnl_unlock(); - return rep_tlv_buf; -} diff --git a/net/tipc/config.h b/net/tipc/config.h deleted file mode 100644 index 47b1bf181612..000000000000 --- a/net/tipc/config.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * net/tipc/config.h: Include file for TIPC configuration service code - * - * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _TIPC_CONFIG_H -#define _TIPC_CONFIG_H - -/* ---------------------------------------------------------------------- */ - -#include "link.h" - -struct sk_buff *tipc_cfg_reply_alloc(int payload_size); -int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, - void *tlv_data, int tlv_data_size); -struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string); - -static inline struct sk_buff *tipc_cfg_reply_none(void) -{ - return tipc_cfg_reply_alloc(0); -} - -static inline struct sk_buff *tipc_cfg_reply_error_string(char *string) -{ - return tipc_cfg_reply_string_type(TIPC_TLV_ERROR_STRING, string); -} - -static inline struct sk_buff *tipc_cfg_reply_ultra_string(char *string) -{ - return tipc_cfg_reply_string_type(TIPC_TLV_ULTRA_STRING, string); -} - -struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, - const void *req_tlv_area, int req_tlv_space, - int headroom); -#endif diff --git a/net/tipc/core.c b/net/tipc/core.c index a5737b8407dd..935205e6bcfe 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -34,82 +34,88 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include "core.h" #include "name_table.h" #include "subscr.h" -#include "config.h" +#include "bearer.h" +#include "net.h" #include "socket.h" #include <linux/module.h> -/* global variables used by multiple sub-systems within TIPC */ -int tipc_random __read_mostly; - /* configurable TIPC parameters */ -u32 tipc_own_addr __read_mostly; -int tipc_max_ports __read_mostly; int tipc_net_id __read_mostly; int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */ -/** - * tipc_buf_acquire - creates a TIPC message buffer - * @size: message size (including TIPC header) - * - * Returns a new buffer with data pointers set to the specified size. - * - * NOTE: Headroom is reserved to allow prepending of a data link header. - * There may also be unrequested tailroom present at the buffer's end. - */ -struct sk_buff *tipc_buf_acquire(u32 size) +static int __net_init tipc_init_net(struct net *net) { - struct sk_buff *skb; - unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; - - skb = alloc_skb_fclone(buf_size, GFP_ATOMIC); - if (skb) { - skb_reserve(skb, BUF_HEADROOM); - skb_put(skb, size); - skb->next = NULL; - } - return skb; + struct tipc_net *tn = net_generic(net, tipc_net_id); + int err; + + tn->net_id = 4711; + tn->own_addr = 0; + get_random_bytes(&tn->random, sizeof(int)); + INIT_LIST_HEAD(&tn->node_list); + spin_lock_init(&tn->node_list_lock); + + err = tipc_sk_rht_init(net); + if (err) + goto out_sk_rht; + + err = tipc_nametbl_init(net); + if (err) + goto out_nametbl; + + err = tipc_subscr_start(net); + if (err) + goto out_subscr; + return 0; + +out_subscr: + tipc_nametbl_stop(net); +out_nametbl: + tipc_sk_rht_destroy(net); +out_sk_rht: + return err; } -/** - * tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode - */ -static void tipc_core_stop(void) +static void __net_exit tipc_exit_net(struct net *net) { - tipc_net_stop(); - tipc_bearer_cleanup(); - tipc_netlink_stop(); - tipc_subscr_stop(); - tipc_nametbl_stop(); - tipc_sk_ref_table_stop(); - tipc_socket_stop(); - tipc_unregister_sysctl(); + tipc_subscr_stop(net); + tipc_net_stop(net); + tipc_nametbl_stop(net); + tipc_sk_rht_destroy(net); } -/** - * tipc_core_start - switch TIPC from NOT RUNNING to SINGLE NODE mode - */ -static int tipc_core_start(void) +static struct pernet_operations tipc_net_ops = { + .init = tipc_init_net, + .exit = tipc_exit_net, + .id = &tipc_net_id, + .size = sizeof(struct tipc_net), +}; + +static int __init tipc_init(void) { int err; - get_random_bytes(&tipc_random, sizeof(tipc_random)); - - err = tipc_sk_ref_table_init(tipc_max_ports, tipc_random); - if (err) - goto out_reftbl; + pr_info("Activated (version " TIPC_MOD_VER ")\n"); - err = tipc_nametbl_init(); - if (err) - goto out_nametbl; + sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << + TIPC_LOW_IMPORTANCE; + sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << + TIPC_CRITICAL_IMPORTANCE; + sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT; err = tipc_netlink_start(); if (err) goto out_netlink; + err = tipc_netlink_compat_start(); + if (err) + goto out_netlink_compat; + err = tipc_socket_init(); if (err) goto out_socket; @@ -118,58 +124,40 @@ static int tipc_core_start(void) if (err) goto out_sysctl; - err = tipc_subscr_start(); + err = register_pernet_subsys(&tipc_net_ops); if (err) - goto out_subscr; + goto out_pernet; err = tipc_bearer_setup(); if (err) goto out_bearer; + pr_info("Started in single node mode\n"); return 0; out_bearer: - tipc_subscr_stop(); -out_subscr: + unregister_pernet_subsys(&tipc_net_ops); +out_pernet: tipc_unregister_sysctl(); out_sysctl: tipc_socket_stop(); out_socket: + tipc_netlink_compat_stop(); +out_netlink_compat: tipc_netlink_stop(); out_netlink: - tipc_nametbl_stop(); -out_nametbl: - tipc_sk_ref_table_stop(); -out_reftbl: + pr_err("Unable to start in single node mode\n"); return err; } -static int __init tipc_init(void) -{ - int res; - - pr_info("Activated (version " TIPC_MOD_VER ")\n"); - - tipc_own_addr = 0; - tipc_max_ports = CONFIG_TIPC_PORTS; - tipc_net_id = 4711; - - sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << - TIPC_LOW_IMPORTANCE; - sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << - TIPC_CRITICAL_IMPORTANCE; - sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT; - - res = tipc_core_start(); - if (res) - pr_err("Unable to start in single node mode\n"); - else - pr_info("Started in single node mode\n"); - return res; -} - static void __exit tipc_exit(void) { - tipc_core_stop(); + tipc_bearer_cleanup(); + tipc_netlink_stop(); + tipc_netlink_compat_stop(); + tipc_socket_stop(); + tipc_unregister_sysctl(); + unregister_pernet_subsys(&tipc_net_ops); + pr_info("Deactivated\n"); } diff --git a/net/tipc/core.h b/net/tipc/core.h index 84602137ce20..3dc68c7a966d 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -37,8 +37,6 @@ #ifndef _TIPC_CORE_H #define _TIPC_CORE_H -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include <linux/tipc.h> #include <linux/tipc_config.h> #include <linux/tipc_netlink.h> @@ -59,47 +57,54 @@ #include <linux/vmalloc.h> #include <linux/rtnetlink.h> #include <linux/etherdevice.h> +#include <net/netns/generic.h> +#include <linux/rhashtable.h> + +#include "node.h" +#include "bearer.h" +#include "bcast.h" +#include "netlink.h" +#include "link.h" +#include "node.h" +#include "msg.h" #define TIPC_MOD_VER "2.0.0" -#define ULTRA_STRING_MAX_LEN 32768 -#define TIPC_MAX_SUBSCRIPTIONS 65535 -#define TIPC_MAX_PUBLICATIONS 65535 +extern int tipc_net_id __read_mostly; +extern int sysctl_tipc_rmem[3] __read_mostly; +extern int sysctl_tipc_named_timeout __read_mostly; -struct tipc_msg; /* msg.h */ +struct tipc_net { + u32 own_addr; + int net_id; + int random; -int tipc_snprintf(char *buf, int len, const char *fmt, ...); + /* Node table and node list */ + spinlock_t node_list_lock; + struct hlist_head node_htable[NODE_HTABLE_SIZE]; + struct list_head node_list; + u32 num_nodes; + u32 num_links; -/* - * TIPC-specific error codes - */ -#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ + /* Bearer list */ + struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; -/* - * Global configuration variables - */ -extern u32 tipc_own_addr __read_mostly; -extern int tipc_max_ports __read_mostly; -extern int tipc_net_id __read_mostly; -extern int sysctl_tipc_rmem[3] __read_mostly; -extern int sysctl_tipc_named_timeout __read_mostly; + /* Broadcast link */ + struct tipc_bcbearer *bcbearer; + struct tipc_bclink *bclink; + struct tipc_link *bcl; -/* - * Other global variables - */ -extern int tipc_random __read_mostly; + /* Socket hash table */ + struct rhashtable sk_rht; -/* - * Routines available to privileged subsystems - */ -int tipc_netlink_start(void); -void tipc_netlink_stop(void); -int tipc_socket_init(void); -void tipc_socket_stop(void); -int tipc_sock_create_local(int type, struct socket **res); -void tipc_sock_release_local(struct socket *sock); -int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, - int flags); + /* Name table */ + spinlock_t nametbl_lock; + struct name_table *nametbl; + + /* Topology subscription server */ + struct tipc_server *topsrv; + atomic_t subscription_count; +}; #ifdef CONFIG_SYSCTL int tipc_register_sysctl(void); @@ -108,102 +113,4 @@ void tipc_unregister_sysctl(void); #define tipc_register_sysctl() 0 #define tipc_unregister_sysctl() #endif - -/* - * TIPC timer code - */ -typedef void (*Handler) (unsigned long); - -/** - * k_init_timer - initialize a timer - * @timer: pointer to timer structure - * @routine: pointer to routine to invoke when timer expires - * @argument: value to pass to routine when timer expires - * - * Timer must be initialized before use (and terminated when no longer needed). - */ -static inline void k_init_timer(struct timer_list *timer, Handler routine, - unsigned long argument) -{ - setup_timer(timer, routine, argument); -} - -/** - * k_start_timer - start a timer - * @timer: pointer to timer structure - * @msec: time to delay (in ms) - * - * Schedules a previously initialized timer for later execution. - * If timer is already running, the new timeout overrides the previous request. - * - * To ensure the timer doesn't expire before the specified delay elapses, - * the amount of delay is rounded up when converting to the jiffies - * then an additional jiffy is added to account for the fact that - * the starting time may be in the middle of the current jiffy. - */ -static inline void k_start_timer(struct timer_list *timer, unsigned long msec) -{ - mod_timer(timer, jiffies + msecs_to_jiffies(msec) + 1); -} - -/** - * k_cancel_timer - cancel a timer - * @timer: pointer to timer structure - * - * Cancels a previously initialized timer. - * Can be called safely even if the timer is already inactive. - * - * WARNING: Must not be called when holding locks required by the timer's - * timeout routine, otherwise deadlock can occur on SMP systems! - */ -static inline void k_cancel_timer(struct timer_list *timer) -{ - del_timer_sync(timer); -} - -/** - * k_term_timer - terminate a timer - * @timer: pointer to timer structure - * - * Prevents further use of a previously initialized timer. - * - * WARNING: Caller must ensure timer isn't currently running. - * - * (Do not "enhance" this routine to automatically cancel an active timer, - * otherwise deadlock can arise when a timeout routine calls k_term_timer.) - */ -static inline void k_term_timer(struct timer_list *timer) -{ -} - -/* - * TIPC message buffer code - * - * TIPC message buffer headroom reserves space for the worst-case - * link-level device header (in case the message is sent off-node). - * - * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields - * are word aligned for quicker access - */ -#define BUF_HEADROOM LL_MAX_HEADER - -struct tipc_skb_cb { - void *handle; - struct sk_buff *tail; - bool deferred; - bool wakeup_pending; - bool bundling; - u16 chain_sz; - u16 chain_imp; -}; - -#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) - -static inline struct tipc_msg *buf_msg(struct sk_buff *skb) -{ - return (struct tipc_msg *)skb->data; -} - -struct sk_buff *tipc_buf_acquire(u32 size); - #endif diff --git a/net/tipc/discover.c b/net/tipc/discover.c index aa722a42ef8b..feef3753615d 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -1,7 +1,7 @@ /* * net/tipc/discover.c * - * Copyright (c) 2003-2006, 2014, Ericsson AB + * Copyright (c) 2003-2006, 2014-2015, Ericsson AB * Copyright (c) 2005-2006, 2010-2011, Wind River Systems * All rights reserved. * @@ -38,15 +38,19 @@ #include "link.h" #include "discover.h" -#define TIPC_LINK_REQ_INIT 125 /* min delay during bearer start up */ -#define TIPC_LINK_REQ_FAST 1000 /* max delay if bearer has no links */ -#define TIPC_LINK_REQ_SLOW 60000 /* max delay if bearer has links */ -#define TIPC_LINK_REQ_INACTIVE 0xffffffff /* indicates no timer in use */ - +/* min delay during bearer start up */ +#define TIPC_LINK_REQ_INIT msecs_to_jiffies(125) +/* max delay if bearer has no links */ +#define TIPC_LINK_REQ_FAST msecs_to_jiffies(1000) +/* max delay if bearer has links */ +#define TIPC_LINK_REQ_SLOW msecs_to_jiffies(60000) +/* indicates no timer in use */ +#define TIPC_LINK_REQ_INACTIVE 0xffffffff /** * struct tipc_link_req - information about an ongoing link setup request * @bearer_id: identity of bearer issuing requests + * @net: network namespace instance * @dest: destination address for request messages * @domain: network domain to which links can be established * @num_nodes: number of nodes currently discovered (i.e. with an active link) @@ -58,31 +62,35 @@ struct tipc_link_req { u32 bearer_id; struct tipc_media_addr dest; + struct net *net; u32 domain; int num_nodes; spinlock_t lock; struct sk_buff *buf; struct timer_list timer; - unsigned int timer_intv; + unsigned long timer_intv; }; /** * tipc_disc_init_msg - initialize a link setup message + * @net: the applicable net namespace * @type: message type (request or response) * @b_ptr: ptr to bearer issuing message */ -static void tipc_disc_init_msg(struct sk_buff *buf, u32 type, +static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, struct tipc_bearer *b_ptr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_msg *msg; u32 dest_domain = b_ptr->domain; msg = buf_msg(buf); - tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); + tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type, + INT_H_SIZE, dest_domain); msg_set_non_seq(msg, 1); - msg_set_node_sig(msg, tipc_random); + msg_set_node_sig(msg, tn->random); msg_set_dest_domain(msg, dest_domain); - msg_set_bc_netid(msg, tipc_net_id); + msg_set_bc_netid(msg, tn->net_id); b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); } @@ -107,11 +115,14 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, /** * tipc_disc_rcv - handle incoming discovery message (request or response) + * @net: the applicable net namespace * @buf: buffer containing message * @bearer: bearer that message arrived on */ -void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer) +void tipc_disc_rcv(struct net *net, struct sk_buff *buf, + struct tipc_bearer *bearer) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; struct tipc_link *link; struct tipc_media_addr maddr; @@ -133,7 +144,7 @@ void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer) kfree_skb(buf); /* Ensure message from node is valid and communication is permitted */ - if (net_id != tipc_net_id) + if (net_id != tn->net_id) return; if (maddr.broadcast) return; @@ -142,23 +153,19 @@ void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer) if (!tipc_addr_node_valid(onode)) return; - if (in_own_node(onode)) { + if (in_own_node(net, onode)) { if (memcmp(&maddr, &bearer->addr, sizeof(maddr))) - disc_dupl_alert(bearer, tipc_own_addr, &maddr); + disc_dupl_alert(bearer, tn->own_addr, &maddr); return; } - if (!tipc_in_scope(ddom, tipc_own_addr)) + if (!tipc_in_scope(ddom, tn->own_addr)) return; if (!tipc_in_scope(bearer->domain, onode)) return; - /* Locate, or if necessary, create, node: */ - node = tipc_node_find(onode); - if (!node) - node = tipc_node_create(onode); + node = tipc_node_create(net, onode); if (!node) return; - tipc_node_lock(node); link = node->links[bearer->identity]; @@ -244,8 +251,8 @@ void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer) if (respond && (mtyp == DSC_REQ_MSG)) { rbuf = tipc_buf_acquire(INT_H_SIZE); if (rbuf) { - tipc_disc_init_msg(rbuf, DSC_RESP_MSG, bearer); - tipc_bearer_send(bearer->identity, rbuf, &maddr); + tipc_disc_init_msg(net, rbuf, DSC_RESP_MSG, bearer); + tipc_bearer_send(net, bearer->identity, rbuf, &maddr); kfree_skb(rbuf); } } @@ -265,7 +272,7 @@ static void disc_update(struct tipc_link_req *req) if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) || (req->timer_intv > TIPC_LINK_REQ_FAST)) { req->timer_intv = TIPC_LINK_REQ_INIT; - k_start_timer(&req->timer, req->timer_intv); + mod_timer(&req->timer, jiffies + req->timer_intv); } } } @@ -295,12 +302,13 @@ void tipc_disc_remove_dest(struct tipc_link_req *req) /** * disc_timeout - send a periodic link setup request - * @req: ptr to link request structure + * @data: ptr to link request structure * * Called whenever a link setup request timer associated with a bearer expires. */ -static void disc_timeout(struct tipc_link_req *req) +static void disc_timeout(unsigned long data) { + struct tipc_link_req *req = (struct tipc_link_req *)data; int max_delay; spin_lock_bh(&req->lock); @@ -318,7 +326,7 @@ static void disc_timeout(struct tipc_link_req *req) * hold at fast polling rate if don't have any associated nodes, * otherwise hold at slow polling rate */ - tipc_bearer_send(req->bearer_id, req->buf, &req->dest); + tipc_bearer_send(req->net, req->bearer_id, req->buf, &req->dest); req->timer_intv *= 2; @@ -329,20 +337,22 @@ static void disc_timeout(struct tipc_link_req *req) if (req->timer_intv > max_delay) req->timer_intv = max_delay; - k_start_timer(&req->timer, req->timer_intv); + mod_timer(&req->timer, jiffies + req->timer_intv); exit: spin_unlock_bh(&req->lock); } /** * tipc_disc_create - create object to send periodic link setup requests + * @net: the applicable net namespace * @b_ptr: ptr to bearer issuing requests * @dest: destination address for request messages * @dest_domain: network domain to which links can be established * * Returns 0 if successful, otherwise -errno. */ -int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) +int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, + struct tipc_media_addr *dest) { struct tipc_link_req *req; @@ -356,17 +366,18 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) return -ENOMEM; } - tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr); + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); memcpy(&req->dest, dest, sizeof(*dest)); + req->net = net; req->bearer_id = b_ptr->identity; req->domain = b_ptr->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; spin_lock_init(&req->lock); - k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req); - k_start_timer(&req->timer, req->timer_intv); + setup_timer(&req->timer, disc_timeout, (unsigned long)req); + mod_timer(&req->timer, jiffies + req->timer_intv); b_ptr->link_req = req; - tipc_bearer_send(req->bearer_id, req->buf, &req->dest); + tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest); return 0; } @@ -376,28 +387,29 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) */ void tipc_disc_delete(struct tipc_link_req *req) { - k_cancel_timer(&req->timer); - k_term_timer(&req->timer); + del_timer_sync(&req->timer); kfree_skb(req->buf); kfree(req); } /** * tipc_disc_reset - reset object to send periodic link setup requests + * @net: the applicable net namespace * @b_ptr: ptr to bearer issuing requests * @dest_domain: network domain to which links can be established */ -void tipc_disc_reset(struct tipc_bearer *b_ptr) +void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr) { struct tipc_link_req *req = b_ptr->link_req; spin_lock_bh(&req->lock); - tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr); + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); + req->net = net; req->bearer_id = b_ptr->identity; req->domain = b_ptr->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; - k_start_timer(&req->timer, req->timer_intv); - tipc_bearer_send(req->bearer_id, req->buf, &req->dest); + mod_timer(&req->timer, jiffies + req->timer_intv); + tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest); spin_unlock_bh(&req->lock); } diff --git a/net/tipc/discover.h b/net/tipc/discover.h index 515b57392f4d..c9b12770c5ed 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -39,11 +39,13 @@ struct tipc_link_req; -int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); +int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, + struct tipc_media_addr *dest); void tipc_disc_delete(struct tipc_link_req *req); -void tipc_disc_reset(struct tipc_bearer *b_ptr); +void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr); void tipc_disc_add_dest(struct tipc_link_req *req); void tipc_disc_remove_dest(struct tipc_link_req *req); -void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr); +void tipc_disc_rcv(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b_ptr); #endif diff --git a/net/tipc/link.c b/net/tipc/link.c index 23bcc1132365..a4cf364316de 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -40,7 +40,6 @@ #include "socket.h" #include "name_distr.h" #include "discover.h" -#include "config.h" #include "netlink.h" #include <linux/pkt_sched.h> @@ -101,19 +100,20 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { */ #define START_CHANGEOVER 100000u -static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, - struct sk_buff *buf); -static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf); -static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, - struct sk_buff **buf); -static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); +static void link_handle_out_of_seq_msg(struct tipc_link *link, + struct sk_buff *skb); +static void tipc_link_proto_rcv(struct tipc_link *link, + struct sk_buff *skb); +static int tipc_link_tunnel_rcv(struct tipc_node *node, + struct sk_buff **skb); +static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol); static void link_state_event(struct tipc_link *l_ptr, u32 event); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); static void tipc_link_sync_xmit(struct tipc_link *l); static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); -static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf); -static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf); +static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb); +static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); /* * Simple link routines @@ -123,13 +123,30 @@ static unsigned int align(unsigned int i) return (i + 3) & ~3u; } +static void tipc_link_release(struct kref *kref) +{ + kfree(container_of(kref, struct tipc_link, ref)); +} + +static void tipc_link_get(struct tipc_link *l_ptr) +{ + kref_get(&l_ptr->ref); +} + +static void tipc_link_put(struct tipc_link *l_ptr) +{ + kref_put(&l_ptr->ref, tipc_link_release); +} + static void link_init_max_pkt(struct tipc_link *l_ptr) { + struct tipc_node *node = l_ptr->owner; + struct tipc_net *tn = net_generic(node->net, tipc_net_id); struct tipc_bearer *b_ptr; u32 max_pkt; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]); if (!b_ptr) { rcu_read_unlock(); return; @@ -169,8 +186,9 @@ int tipc_link_is_active(struct tipc_link *l_ptr) * link_timeout - handle expiration of link timer * @l_ptr: pointer to link */ -static void link_timeout(struct tipc_link *l_ptr) +static void link_timeout(unsigned long data) { + struct tipc_link *l_ptr = (struct tipc_link *)data; struct sk_buff *skb; tipc_node_lock(l_ptr->owner); @@ -215,11 +233,13 @@ static void link_timeout(struct tipc_link *l_ptr) tipc_link_push_packets(l_ptr); tipc_node_unlock(l_ptr->owner); + tipc_link_put(l_ptr); } -static void link_set_timer(struct tipc_link *l_ptr, u32 time) +static void link_set_timer(struct tipc_link *link, unsigned long time) { - k_start_timer(&l_ptr->timer, time); + if (!mod_timer(&link->timer, jiffies + time)) + tipc_link_get(link); } /** @@ -234,6 +254,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr) { + struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); struct tipc_link *l_ptr; struct tipc_msg *msg; char *if_name; @@ -259,12 +280,12 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, pr_warn("Link creation failed, no memory\n"); return NULL; } - + kref_init(&l_ptr->ref); l_ptr->addr = peer; if_name = strchr(b_ptr->name, ':') + 1; sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown", - tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), - tipc_node(tipc_own_addr), + tipc_zone(tn->own_addr), tipc_cluster(tn->own_addr), + tipc_node(tn->own_addr), if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); /* note: peer i/f name is updated by reset/activate message */ @@ -278,9 +299,10 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg; msg = l_ptr->pmsg; - tipc_msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr); + tipc_msg_init(tn->own_addr, msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, + l_ptr->addr); msg_set_size(msg, sizeof(l_ptr->proto_msg)); - msg_set_session(msg, (tipc_random & 0xffff)); + msg_set_session(msg, (tn->random & 0xffff)); msg_set_bearer_id(msg, b_ptr->identity); strcpy((char *)msg_data(msg), if_name); @@ -293,48 +315,52 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->next_out_no = 1; __skb_queue_head_init(&l_ptr->outqueue); __skb_queue_head_init(&l_ptr->deferred_queue); - skb_queue_head_init(&l_ptr->waiting_sks); - + skb_queue_head_init(&l_ptr->wakeupq); + skb_queue_head_init(&l_ptr->inputq); + skb_queue_head_init(&l_ptr->namedq); link_reset_statistics(l_ptr); - tipc_node_attach_link(n_ptr, l_ptr); - - k_init_timer(&l_ptr->timer, (Handler)link_timeout, - (unsigned long)l_ptr); - + setup_timer(&l_ptr->timer, link_timeout, (unsigned long)l_ptr); link_state_event(l_ptr, STARTING_EVT); return l_ptr; } -void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) +/** + * link_delete - Conditional deletion of link. + * If timer still running, real delete is done when it expires + * @link: link to be deleted + */ +void tipc_link_delete(struct tipc_link *link) +{ + tipc_link_reset_fragments(link); + tipc_node_detach_link(link->owner, link); + tipc_link_put(link); +} + +void tipc_link_delete_list(struct net *net, unsigned int bearer_id, + bool shutting_down) { - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *link; + struct tipc_node *node; rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->links[bearer_id]; - if (l_ptr) { - tipc_link_reset(l_ptr); - if (shutting_down || !tipc_node_is_up(n_ptr)) { - tipc_node_detach_link(l_ptr->owner, l_ptr); - tipc_link_reset_fragments(l_ptr); - tipc_node_unlock(n_ptr); - - /* Nobody else can access this link now: */ - del_timer_sync(&l_ptr->timer); - kfree(l_ptr); - } else { - /* Detach/delete when failover is finished: */ - l_ptr->flags |= LINK_STOPPED; - tipc_node_unlock(n_ptr); - del_timer_sync(&l_ptr->timer); - } + list_for_each_entry_rcu(node, &tn->node_list, list) { + tipc_node_lock(node); + link = node->links[bearer_id]; + if (!link) { + tipc_node_unlock(node); continue; } - tipc_node_unlock(n_ptr); + tipc_link_reset(link); + if (del_timer(&link->timer)) + tipc_link_put(link); + link->flags |= LINK_STOPPED; + /* Delete link now, or when failover is finished: */ + if (shutting_down || !tipc_node_is_up(node)) + tipc_link_delete(link); + tipc_node_unlock(node); } rcu_read_unlock(); } @@ -352,13 +378,14 @@ static bool link_schedule_user(struct tipc_link *link, u32 oport, { struct sk_buff *buf; - buf = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, tipc_own_addr, - tipc_own_addr, oport, 0, 0); + buf = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, + link_own_addr(link), link_own_addr(link), + oport, 0, 0); if (!buf) return false; TIPC_SKB_CB(buf)->chain_sz = chain_sz; TIPC_SKB_CB(buf)->chain_imp = imp; - skb_queue_tail(&link->waiting_sks, buf); + skb_queue_tail(&link->wakeupq, buf); link->stats.link_congs++; return true; } @@ -369,17 +396,19 @@ static bool link_schedule_user(struct tipc_link *link, u32 oport, * Move a number of waiting users, as permitted by available space in * the send queue, from link wait queue to node wait queue for wakeup */ -static void link_prepare_wakeup(struct tipc_link *link) +void link_prepare_wakeup(struct tipc_link *link) { uint pend_qsz = skb_queue_len(&link->outqueue); struct sk_buff *skb, *tmp; - skb_queue_walk_safe(&link->waiting_sks, skb, tmp) { + skb_queue_walk_safe(&link->wakeupq, skb, tmp) { if (pend_qsz >= link->queue_limit[TIPC_SKB_CB(skb)->chain_imp]) break; pend_qsz += TIPC_SKB_CB(skb)->chain_sz; - skb_unlink(skb, &link->waiting_sks); - skb_queue_tail(&link->owner->waiting_sks, skb); + skb_unlink(skb, &link->wakeupq); + skb_queue_tail(&link->inputq, skb); + link->owner->inputq = &link->inputq; + link->owner->action_flags |= TIPC_MSG_EVT; } } @@ -425,20 +454,20 @@ void tipc_link_reset(struct tipc_link *l_ptr) return; tipc_node_link_down(l_ptr->owner, l_ptr); - tipc_bearer_remove_dest(l_ptr->bearer_id, l_ptr->addr); + tipc_bearer_remove_dest(owner->net, l_ptr->bearer_id, l_ptr->addr); if (was_active_link && tipc_node_active_links(l_ptr->owner)) { l_ptr->reset_checkpoint = checkpoint; l_ptr->exp_msg_count = START_CHANGEOVER; } - /* Clean up all queues: */ + /* Clean up all queues, except inputq: */ __skb_queue_purge(&l_ptr->outqueue); __skb_queue_purge(&l_ptr->deferred_queue); - if (!skb_queue_empty(&l_ptr->waiting_sks)) { - skb_queue_splice_init(&l_ptr->waiting_sks, &owner->waiting_sks); - owner->action_flags |= TIPC_WAKEUP_USERS; - } + skb_queue_splice_init(&l_ptr->wakeupq, &l_ptr->inputq); + if (!skb_queue_empty(&l_ptr->inputq)) + owner->action_flags |= TIPC_MSG_EVT; + owner->inputq = &l_ptr->inputq; l_ptr->next_out = NULL; l_ptr->unacked_window = 0; l_ptr->checkpoint = 1; @@ -448,13 +477,14 @@ void tipc_link_reset(struct tipc_link *l_ptr) link_reset_statistics(l_ptr); } -void tipc_link_reset_list(unsigned int bearer_id) +void tipc_link_reset_list(struct net *net, unsigned int bearer_id) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *l_ptr; struct tipc_node *n_ptr; rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { + list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { tipc_node_lock(n_ptr); l_ptr = n_ptr->links[bearer_id]; if (l_ptr) @@ -464,11 +494,14 @@ void tipc_link_reset_list(unsigned int bearer_id) rcu_read_unlock(); } -static void link_activate(struct tipc_link *l_ptr) +static void link_activate(struct tipc_link *link) { - l_ptr->next_in_no = l_ptr->stats.recv_info = 1; - tipc_node_link_up(l_ptr->owner, l_ptr); - tipc_bearer_add_dest(l_ptr->bearer_id, l_ptr->addr); + struct tipc_node *node = link->owner; + + link->next_in_no = 1; + link->stats.recv_info = 1; + tipc_node_link_up(node, link); + tipc_bearer_add_dest(node->net, link->bearer_id, link->addr); } /** @@ -479,7 +512,7 @@ static void link_activate(struct tipc_link *l_ptr) static void link_state_event(struct tipc_link *l_ptr, unsigned int event) { struct tipc_link *other; - u32 cont_intv = l_ptr->continuity_interval; + unsigned long cont_intv = l_ptr->cont_intv; if (l_ptr->flags & LINK_STOPPED) return; @@ -522,8 +555,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) link_set_timer(l_ptr, cont_intv / 4); break; case RESET_MSG: - pr_info("%s<%s>, requested by peer\n", link_rst_msg, - l_ptr->name); + pr_debug("%s<%s>, requested by peer\n", + link_rst_msg, l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; @@ -533,7 +566,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) link_set_timer(l_ptr, cont_intv); break; default: - pr_err("%s%u in WW state\n", link_unk_evt, event); + pr_debug("%s%u in WW state\n", link_unk_evt, event); } break; case WORKING_UNKNOWN: @@ -545,8 +578,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) link_set_timer(l_ptr, cont_intv); break; case RESET_MSG: - pr_info("%s<%s>, requested by peer while probing\n", - link_rst_msg, l_ptr->name); + pr_debug("%s<%s>, requested by peer while probing\n", + link_rst_msg, l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; @@ -572,8 +605,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv / 4); } else { /* Link has failed */ - pr_warn("%s<%s>, peer not responding\n", - link_rst_msg, l_ptr->name); + pr_debug("%s<%s>, peer not responding\n", + link_rst_msg, l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_UNKNOWN; l_ptr->fsm_msg_cnt = 0; @@ -614,7 +647,9 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) break; case STARTING_EVT: l_ptr->flags |= LINK_STARTED; - /* fall through */ + l_ptr->fsm_msg_cnt++; + link_set_timer(l_ptr, cont_intv); + break; case TIMEOUT_EVT: tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; @@ -700,7 +735,8 @@ drop: * Only the socket functions tipc_send_stream() and tipc_send_packet() need * to act on the return value, since they may need to do more send attempts. */ -int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list) +int __tipc_link_xmit(struct net *net, struct tipc_link *link, + struct sk_buff_head *list) { struct tipc_msg *msg = buf_msg(skb_peek(list)); uint psz = msg_size(msg); @@ -733,7 +769,8 @@ int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list) if (skb_queue_len(outqueue) < sndlim) { __skb_queue_tail(outqueue, skb); - tipc_bearer_send(link->bearer_id, skb, addr); + tipc_bearer_send(net, link->bearer_id, + skb, addr); link->next_out = NULL; link->unacked_window = 0; } else if (tipc_msg_bundle(outqueue, skb, mtu)) { @@ -758,7 +795,7 @@ int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list) static void skb2list(struct sk_buff *skb, struct sk_buff_head *list) { - __skb_queue_head_init(list); + skb_queue_head_init(list); __skb_queue_tail(list, skb); } @@ -767,19 +804,21 @@ static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb) struct sk_buff_head head; skb2list(skb, &head); - return __tipc_link_xmit(link, &head); + return __tipc_link_xmit(link->owner->net, link, &head); } -int tipc_link_xmit_skb(struct sk_buff *skb, u32 dnode, u32 selector) +int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, + u32 selector) { struct sk_buff_head head; skb2list(skb, &head); - return tipc_link_xmit(&head, dnode, selector); + return tipc_link_xmit(net, &head, dnode, selector); } /** * tipc_link_xmit() is the general link level function for message sending + * @net: the applicable net namespace * @list: chain of buffers containing message * @dsz: amount of user data to be sent * @dnode: address of destination node @@ -787,33 +826,28 @@ int tipc_link_xmit_skb(struct sk_buff *skb, u32 dnode, u32 selector) * Consumes the buffer chain, except when returning -ELINKCONG * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE */ -int tipc_link_xmit(struct sk_buff_head *list, u32 dnode, u32 selector) +int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, + u32 selector) { struct tipc_link *link = NULL; struct tipc_node *node; int rc = -EHOSTUNREACH; - node = tipc_node_find(dnode); + node = tipc_node_find(net, dnode); if (node) { tipc_node_lock(node); link = node->active_links[selector & 1]; if (link) - rc = __tipc_link_xmit(link, list); + rc = __tipc_link_xmit(net, link, list); tipc_node_unlock(node); } - if (link) return rc; - if (likely(in_own_node(dnode))) { - /* As a node local message chain never contains more than one - * buffer, we just need to dequeue one SKB buffer from the - * head list. - */ - return tipc_sk_rcv(__skb_dequeue(list)); - } - __skb_queue_purge(list); + if (likely(in_own_node(net, dnode))) + return tipc_sk_rcv(net, list); + __skb_queue_purge(list); return rc; } @@ -835,7 +869,8 @@ static void tipc_link_sync_xmit(struct tipc_link *link) return; msg = buf_msg(skb); - tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, link->addr); + tipc_msg_init(link_own_addr(link), msg, BCAST_PROTOCOL, STATE_MSG, + INT_H_SIZE, link->addr); msg_set_last_bcast(msg, link->owner->bclink.acked); __tipc_link_xmit_skb(link, skb); } @@ -890,7 +925,8 @@ void tipc_link_push_packets(struct tipc_link *l_ptr) msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); if (msg_user(msg) == MSG_BUNDLER) TIPC_SKB_CB(skb)->bundling = false; - tipc_bearer_send(l_ptr->bearer_id, skb, + tipc_bearer_send(l_ptr->owner->net, + l_ptr->bearer_id, skb, &l_ptr->media_addr); l_ptr->next_out = tipc_skb_queue_next(outqueue, skb); } else { @@ -923,6 +959,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); + struct net *net = l_ptr->owner->net; pr_warn("Retransmission failure on link <%s>\n", l_ptr->name); @@ -940,7 +977,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, pr_cont("Outstanding acks: %lu\n", (unsigned long) TIPC_SKB_CB(buf)->handle); - n_ptr = tipc_bclink_retransmit_to(); + n_ptr = tipc_bclink_retransmit_to(net); tipc_node_lock(n_ptr); tipc_addr_string_fill(addr_string, n_ptr->addr); @@ -955,7 +992,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, tipc_node_unlock(n_ptr); - tipc_bclink_set_flags(TIPC_BCLINK_RESET); + tipc_bclink_set_flags(net, TIPC_BCLINK_RESET); l_ptr->stale_count = 0; } } @@ -987,7 +1024,8 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, msg = buf_msg(skb); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->bearer_id, skb, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb, + &l_ptr->media_addr); retransmits--; l_ptr->stats.retransmitted++; } @@ -1063,14 +1101,16 @@ static int link_recv_buf_validate(struct sk_buff *buf) /** * tipc_rcv - process TIPC packets/messages arriving from off-node + * @net: the applicable net namespace * @skb: TIPC packet * @b_ptr: pointer to bearer message arrived on * * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. */ -void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff_head head; struct tipc_node *n_ptr; struct tipc_link *l_ptr; @@ -1096,19 +1136,19 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) if (unlikely(msg_non_seq(msg))) { if (msg_user(msg) == LINK_CONFIG) - tipc_disc_rcv(skb, b_ptr); + tipc_disc_rcv(net, skb, b_ptr); else - tipc_bclink_rcv(skb); + tipc_bclink_rcv(net, skb); continue; } /* Discard unicast link messages destined for another node */ if (unlikely(!msg_short(msg) && - (msg_destnode(msg) != tipc_own_addr))) + (msg_destnode(msg) != tn->own_addr))) goto discard; /* Locate neighboring node that sent message */ - n_ptr = tipc_node_find(msg_prevnode(msg)); + n_ptr = tipc_node_find(net, msg_prevnode(msg)); if (unlikely(!n_ptr)) goto discard; tipc_node_lock(n_ptr); @@ -1116,7 +1156,7 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) /* Locate unicast link endpoint that should handle message */ l_ptr = n_ptr->links[b_ptr->identity]; if (unlikely(!l_ptr)) - goto unlock_discard; + goto unlock; /* Verify that communication with node is currently allowed */ if ((n_ptr->action_flags & TIPC_WAIT_PEER_LINKS_DOWN) && @@ -1127,7 +1167,7 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) n_ptr->action_flags &= ~TIPC_WAIT_PEER_LINKS_DOWN; if (tipc_node_blocked(n_ptr)) - goto unlock_discard; + goto unlock; /* Validate message sequence number info */ seq_no = msg_seqno(msg); @@ -1151,18 +1191,16 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) if (unlikely(l_ptr->next_out)) tipc_link_push_packets(l_ptr); - if (released && !skb_queue_empty(&l_ptr->waiting_sks)) { + if (released && !skb_queue_empty(&l_ptr->wakeupq)) link_prepare_wakeup(l_ptr); - l_ptr->owner->action_flags |= TIPC_WAKEUP_USERS; - } /* Process the incoming packet */ if (unlikely(!link_working_working(l_ptr))) { if (msg_user(msg) == LINK_PROTOCOL) { tipc_link_proto_rcv(l_ptr, skb); link_retrieve_defq(l_ptr, &head); - tipc_node_unlock(n_ptr); - continue; + skb = NULL; + goto unlock; } /* Traffic message. Conditionally activate link */ @@ -1171,18 +1209,18 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) if (link_working_working(l_ptr)) { /* Re-insert buffer in front of queue */ __skb_queue_head(&head, skb); - tipc_node_unlock(n_ptr); - continue; + skb = NULL; + goto unlock; } - goto unlock_discard; + goto unlock; } /* Link is now in state WORKING_WORKING */ if (unlikely(seq_no != mod(l_ptr->next_in_no))) { link_handle_out_of_seq_msg(l_ptr, skb); link_retrieve_defq(l_ptr, &head); - tipc_node_unlock(n_ptr); - continue; + skb = NULL; + goto unlock; } l_ptr->next_in_no++; if (unlikely(!skb_queue_empty(&l_ptr->deferred_queue))) @@ -1192,95 +1230,102 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) l_ptr->stats.sent_acks++; tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); } - - if (tipc_link_prepare_input(l_ptr, &skb)) { - tipc_node_unlock(n_ptr); - continue; - } - tipc_node_unlock(n_ptr); - - if (tipc_link_input(l_ptr, skb) != 0) - goto discard; - continue; -unlock_discard: + tipc_link_input(l_ptr, skb); + skb = NULL; +unlock: tipc_node_unlock(n_ptr); discard: - kfree_skb(skb); + if (unlikely(skb)) + kfree_skb(skb); } } -/** - * tipc_link_prepare_input - process TIPC link messages - * - * returns nonzero if the message was consumed +/* tipc_data_input - deliver data and name distr msgs to upper layer * + * Consumes buffer if message is of right type * Node lock must be held */ -static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf) +static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb) { - struct tipc_node *n; - struct tipc_msg *msg; - int res = -EINVAL; + struct tipc_node *node = link->owner; + struct tipc_msg *msg = buf_msg(skb); + u32 dport = msg_destport(msg); - n = l->owner; - msg = buf_msg(*buf); switch (msg_user(msg)) { - case CHANGEOVER_PROTOCOL: - if (tipc_link_tunnel_rcv(n, buf)) - res = 0; - break; - case MSG_FRAGMENTER: - l->stats.recv_fragments++; - if (tipc_buf_append(&l->reasm_buf, buf)) { - l->stats.recv_fragmented++; - res = 0; - } else if (!l->reasm_buf) { - tipc_link_reset(l); + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: + case CONN_MANAGER: + if (tipc_skb_queue_tail(&link->inputq, skb, dport)) { + node->inputq = &link->inputq; + node->action_flags |= TIPC_MSG_EVT; } - break; - case MSG_BUNDLER: - l->stats.recv_bundles++; - l->stats.recv_bundled += msg_msgcnt(msg); - res = 0; - break; + return true; case NAME_DISTRIBUTOR: - n->bclink.recv_permitted = true; - res = 0; - break; + node->bclink.recv_permitted = true; + node->namedq = &link->namedq; + skb_queue_tail(&link->namedq, skb); + if (skb_queue_len(&link->namedq) == 1) + node->action_flags |= TIPC_NAMED_MSG_EVT; + return true; + case MSG_BUNDLER: + case CHANGEOVER_PROTOCOL: + case MSG_FRAGMENTER: case BCAST_PROTOCOL: - tipc_link_sync_rcv(n, *buf); - break; + return false; default: - res = 0; - } - return res; + pr_warn("Dropping received illegal msg type\n"); + kfree_skb(skb); + return false; + }; } -/** - * tipc_link_input - Deliver message too higher layers + +/* tipc_link_input - process packet that has passed link protocol check + * + * Consumes buffer + * Node lock must be held */ -static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf) +static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) { - struct tipc_msg *msg = buf_msg(buf); - int res = 0; + struct tipc_node *node = link->owner; + struct tipc_msg *msg = buf_msg(skb); + struct sk_buff *iskb; + int pos = 0; + + if (likely(tipc_data_input(link, skb))) + return; switch (msg_user(msg)) { - case TIPC_LOW_IMPORTANCE: - case TIPC_MEDIUM_IMPORTANCE: - case TIPC_HIGH_IMPORTANCE: - case TIPC_CRITICAL_IMPORTANCE: - case CONN_MANAGER: - tipc_sk_rcv(buf); + case CHANGEOVER_PROTOCOL: + if (!tipc_link_tunnel_rcv(node, &skb)) + break; + if (msg_user(buf_msg(skb)) != MSG_BUNDLER) { + tipc_data_input(link, skb); + break; + } + case MSG_BUNDLER: + link->stats.recv_bundles++; + link->stats.recv_bundled += msg_msgcnt(msg); + + while (tipc_msg_extract(skb, &iskb, &pos)) + tipc_data_input(link, iskb); break; - case NAME_DISTRIBUTOR: - tipc_named_rcv(buf); + case MSG_FRAGMENTER: + link->stats.recv_fragments++; + if (tipc_buf_append(&link->reasm_buf, &skb)) { + link->stats.recv_fragmented++; + tipc_data_input(link, skb); + } else if (!link->reasm_buf) { + tipc_link_reset(link); + } break; - case MSG_BUNDLER: - tipc_link_bundle_rcv(buf); + case BCAST_PROTOCOL: + tipc_link_sync_rcv(node, skb); break; default: - res = -EINVAL; - } - return res; + break; + }; } /** @@ -1381,7 +1426,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, msg_set_type(msg, msg_typ); msg_set_net_plane(msg, l_ptr->net_plane); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - msg_set_last_bcast(msg, tipc_bclink_get_last_sent()); + msg_set_last_bcast(msg, tipc_bclink_get_last_sent(l_ptr->owner->net)); if (msg_typ == STATE_MSG) { u32 next_sent = mod(l_ptr->next_out_no); @@ -1445,7 +1490,8 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); buf->priority = TC_PRIO_CONTROL; - tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, buf, + &l_ptr->media_addr); l_ptr->unacked_window = 0; kfree_skb(buf); } @@ -1455,7 +1501,8 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, * Note that network plane id propagates through the network, and may * change at any time. The node with lowest address rules */ -static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) +static void tipc_link_proto_rcv(struct tipc_link *l_ptr, + struct sk_buff *buf) { u32 rec_gap = 0; u32 max_pkt_info; @@ -1468,7 +1515,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) goto exit; if (l_ptr->net_plane != msg_net_plane(msg)) - if (tipc_own_addr > msg_prevnode(msg)) + if (link_own_addr(l_ptr) > msg_prevnode(msg)) l_ptr->net_plane = msg_net_plane(msg); switch (msg_type(msg)) { @@ -1535,9 +1582,9 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) if (msg_linkprio(msg) && (msg_linkprio(msg) != l_ptr->priority)) { - pr_warn("%s<%s>, priority change %u->%u\n", - link_rst_msg, l_ptr->name, l_ptr->priority, - msg_linkprio(msg)); + pr_debug("%s<%s>, priority change %u->%u\n", + link_rst_msg, l_ptr->name, + l_ptr->priority, msg_linkprio(msg)); l_ptr->priority = msg_linkprio(msg); tipc_link_reset(l_ptr); /* Enforce change to take effect */ break; @@ -1636,8 +1683,8 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) if (!tunnel) return; - tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, - ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); + tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL, + ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); msg_set_msgcnt(&tunnel_hdr, msgcount); @@ -1694,8 +1741,8 @@ void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct sk_buff *skb; struct tipc_msg tunnel_hdr; - tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, - DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr); + tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL, + DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr); msg_set_msgcnt(&tunnel_hdr, skb_queue_len(&l_ptr->outqueue)); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); skb_queue_walk(&l_ptr->outqueue, skb) { @@ -1729,7 +1776,7 @@ void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, * @from_pos: offset to extract from * * Returns a new message buffer containing an embedded message. The - * encapsulating message itself is left unchanged. + * encapsulating buffer is left unchanged. */ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) { @@ -1743,8 +1790,6 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) return eb; } - - /* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet. * Owner node is locked. */ @@ -1804,10 +1849,8 @@ static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr, } } exit: - if ((l_ptr->exp_msg_count == 0) && (l_ptr->flags & LINK_STOPPED)) { - tipc_node_detach_link(l_ptr->owner, l_ptr); - kfree(l_ptr); - } + if ((!l_ptr->exp_msg_count) && (l_ptr->flags & LINK_STOPPED)) + tipc_link_delete(l_ptr); return buf; } @@ -1845,50 +1888,16 @@ exit: return *buf != NULL; } -/* - * Bundler functionality: - */ -void tipc_link_bundle_rcv(struct sk_buff *buf) +static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol) { - u32 msgcount = msg_msgcnt(buf_msg(buf)); - u32 pos = INT_H_SIZE; - struct sk_buff *obuf; - struct tipc_msg *omsg; - - while (msgcount--) { - obuf = buf_extract(buf, pos); - if (obuf == NULL) { - pr_warn("Link unable to unbundle message(s)\n"); - break; - } - omsg = buf_msg(obuf); - pos += align(msg_size(omsg)); - if (msg_isdata(omsg)) { - if (unlikely(msg_type(omsg) == TIPC_MCAST_MSG)) - tipc_sk_mcast_rcv(obuf); - else - tipc_sk_rcv(obuf); - } else if (msg_user(omsg) == CONN_MANAGER) { - tipc_sk_rcv(obuf); - } else if (msg_user(omsg) == NAME_DISTRIBUTOR) { - tipc_named_rcv(obuf); - } else { - pr_warn("Illegal bundled msg: %u\n", msg_user(omsg)); - kfree_skb(obuf); - } - } - kfree_skb(buf); -} + unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4; -static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance) -{ - if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL)) + if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL)) return; - l_ptr->tolerance = tolerance; - l_ptr->continuity_interval = - ((tolerance / 4) > 500) ? 500 : tolerance / 4; - l_ptr->abort_limit = tolerance / (l_ptr->continuity_interval / 4); + l_ptr->tolerance = tol; + l_ptr->cont_intv = msecs_to_jiffies(intv); + l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->cont_intv) / 4); } void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) @@ -1911,22 +1920,25 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) } /* tipc_link_find_owner - locate owner node of link by link's name + * @net: the applicable net namespace * @name: pointer to link name string * @bearer_id: pointer to index in 'node->links' array where the link was found. * * Returns pointer to node owning the link, or 0 if no matching link is found. */ -static struct tipc_node *tipc_link_find_owner(const char *link_name, +static struct tipc_node *tipc_link_find_owner(struct net *net, + const char *link_name, unsigned int *bearer_id) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *l_ptr; struct tipc_node *n_ptr; - struct tipc_node *found_node = 0; + struct tipc_node *found_node = NULL; int i; *bearer_id = 0; rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { + list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { tipc_node_lock(n_ptr); for (i = 0; i < MAX_BEARERS; i++) { l_ptr = n_ptr->links[i]; @@ -1946,148 +1958,6 @@ static struct tipc_node *tipc_link_find_owner(const char *link_name, } /** - * link_value_is_valid -- validate proposed link tolerance/priority/window - * - * @cmd: value type (TIPC_CMD_SET_LINK_*) - * @new_value: the new value - * - * Returns 1 if value is within range, 0 if not. - */ -static int link_value_is_valid(u16 cmd, u32 new_value) -{ - switch (cmd) { - case TIPC_CMD_SET_LINK_TOL: - return (new_value >= TIPC_MIN_LINK_TOL) && - (new_value <= TIPC_MAX_LINK_TOL); - case TIPC_CMD_SET_LINK_PRI: - return (new_value <= TIPC_MAX_LINK_PRI); - case TIPC_CMD_SET_LINK_WINDOW: - return (new_value >= TIPC_MIN_LINK_WIN) && - (new_value <= TIPC_MAX_LINK_WIN); - } - return 0; -} - -/** - * link_cmd_set_value - change priority/tolerance/window for link/bearer/media - * @name: ptr to link, bearer, or media name - * @new_value: new value of link, bearer, or media setting - * @cmd: which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*) - * - * Caller must hold RTNL lock to ensure link/bearer/media is not deleted. - * - * Returns 0 if value updated and negative value on error. - */ -static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd) -{ - struct tipc_node *node; - struct tipc_link *l_ptr; - struct tipc_bearer *b_ptr; - struct tipc_media *m_ptr; - int bearer_id; - int res = 0; - - node = tipc_link_find_owner(name, &bearer_id); - if (node) { - tipc_node_lock(node); - l_ptr = node->links[bearer_id]; - - if (l_ptr) { - switch (cmd) { - case TIPC_CMD_SET_LINK_TOL: - link_set_supervision_props(l_ptr, new_value); - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, - new_value, 0, 0); - break; - case TIPC_CMD_SET_LINK_PRI: - l_ptr->priority = new_value; - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, - 0, new_value, 0); - break; - case TIPC_CMD_SET_LINK_WINDOW: - tipc_link_set_queue_limits(l_ptr, new_value); - break; - default: - res = -EINVAL; - break; - } - } - tipc_node_unlock(node); - return res; - } - - b_ptr = tipc_bearer_find(name); - if (b_ptr) { - switch (cmd) { - case TIPC_CMD_SET_LINK_TOL: - b_ptr->tolerance = new_value; - break; - case TIPC_CMD_SET_LINK_PRI: - b_ptr->priority = new_value; - break; - case TIPC_CMD_SET_LINK_WINDOW: - b_ptr->window = new_value; - break; - default: - res = -EINVAL; - break; - } - return res; - } - - m_ptr = tipc_media_find(name); - if (!m_ptr) - return -ENODEV; - switch (cmd) { - case TIPC_CMD_SET_LINK_TOL: - m_ptr->tolerance = new_value; - break; - case TIPC_CMD_SET_LINK_PRI: - m_ptr->priority = new_value; - break; - case TIPC_CMD_SET_LINK_WINDOW: - m_ptr->window = new_value; - break; - default: - res = -EINVAL; - break; - } - return res; -} - -struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, - u16 cmd) -{ - struct tipc_link_config *args; - u32 new_value; - int res; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_CONFIG)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - args = (struct tipc_link_config *)TLV_DATA(req_tlv_area); - new_value = ntohl(args->value); - - if (!link_value_is_valid(cmd, new_value)) - return tipc_cfg_reply_error_string( - "cannot change, value invalid"); - - if (!strcmp(args->name, tipc_bclink_name)) { - if ((cmd == TIPC_CMD_SET_LINK_WINDOW) && - (tipc_bclink_set_queue_limits(new_value) == 0)) - return tipc_cfg_reply_none(); - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change setting on broadcast link)"); - } - - res = link_cmd_set_value(args->name, new_value, cmd); - if (res) - return tipc_cfg_reply_error_string("cannot change link setting"); - - return tipc_cfg_reply_none(); -} - -/** * link_reset_statistics - reset link statistics * @l_ptr: pointer to link */ @@ -2098,207 +1968,13 @@ static void link_reset_statistics(struct tipc_link *l_ptr) l_ptr->stats.recv_info = l_ptr->next_in_no; } -struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space) -{ - char *link_name; - struct tipc_link *l_ptr; - struct tipc_node *node; - unsigned int bearer_id; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - link_name = (char *)TLV_DATA(req_tlv_area); - if (!strcmp(link_name, tipc_bclink_name)) { - if (tipc_bclink_reset_stats()) - return tipc_cfg_reply_error_string("link not found"); - return tipc_cfg_reply_none(); - } - node = tipc_link_find_owner(link_name, &bearer_id); - if (!node) - return tipc_cfg_reply_error_string("link not found"); - - tipc_node_lock(node); - l_ptr = node->links[bearer_id]; - if (!l_ptr) { - tipc_node_unlock(node); - return tipc_cfg_reply_error_string("link not found"); - } - link_reset_statistics(l_ptr); - tipc_node_unlock(node); - return tipc_cfg_reply_none(); -} - -/** - * percent - convert count to a percentage of total (rounding up or down) - */ -static u32 percent(u32 count, u32 total) -{ - return (count * 100 + (total / 2)) / total; -} - -/** - * tipc_link_stats - print link statistics - * @name: link name - * @buf: print buffer area - * @buf_size: size of print buffer area - * - * Returns length of print buffer data string (or 0 if error) - */ -static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) -{ - struct tipc_link *l; - struct tipc_stats *s; - struct tipc_node *node; - char *status; - u32 profile_total = 0; - unsigned int bearer_id; - int ret; - - if (!strcmp(name, tipc_bclink_name)) - return tipc_bclink_stats(buf, buf_size); - - node = tipc_link_find_owner(name, &bearer_id); - if (!node) - return 0; - - tipc_node_lock(node); - - l = node->links[bearer_id]; - if (!l) { - tipc_node_unlock(node); - return 0; - } - - s = &l->stats; - - if (tipc_link_is_active(l)) - status = "ACTIVE"; - else if (tipc_link_is_up(l)) - status = "STANDBY"; - else - status = "DEFUNCT"; - - ret = tipc_snprintf(buf, buf_size, "Link <%s>\n" - " %s MTU:%u Priority:%u Tolerance:%u ms" - " Window:%u packets\n", - l->name, status, l->max_pkt, l->priority, - l->tolerance, l->queue_limit[0]); - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " RX packets:%u fragments:%u/%u bundles:%u/%u\n", - l->next_in_no - s->recv_info, s->recv_fragments, - s->recv_fragmented, s->recv_bundles, - s->recv_bundled); - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX packets:%u fragments:%u/%u bundles:%u/%u\n", - l->next_out_no - s->sent_info, s->sent_fragments, - s->sent_fragmented, s->sent_bundles, - s->sent_bundled); - - profile_total = s->msg_length_counts; - if (!profile_total) - profile_total = 1; - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX profile sample:%u packets average:%u octets\n" - " 0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% " - "-16384:%u%% -32768:%u%% -66000:%u%%\n", - s->msg_length_counts, - s->msg_lengths_total / profile_total, - percent(s->msg_length_profile[0], profile_total), - percent(s->msg_length_profile[1], profile_total), - percent(s->msg_length_profile[2], profile_total), - percent(s->msg_length_profile[3], profile_total), - percent(s->msg_length_profile[4], profile_total), - percent(s->msg_length_profile[5], profile_total), - percent(s->msg_length_profile[6], profile_total)); - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " RX states:%u probes:%u naks:%u defs:%u" - " dups:%u\n", s->recv_states, s->recv_probes, - s->recv_nacks, s->deferred_recv, s->duplicates); - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX states:%u probes:%u naks:%u acks:%u" - " dups:%u\n", s->sent_states, s->sent_probes, - s->sent_nacks, s->sent_acks, s->retransmitted); - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " Congestion link:%u Send queue" - " max:%u avg:%u\n", s->link_congs, - s->max_queue_sz, s->queue_sz_counts ? - (s->accu_queue_sz / s->queue_sz_counts) : 0); - - tipc_node_unlock(node); - return ret; -} - -struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space) -{ - struct sk_buff *buf; - struct tlv_desc *rep_tlv; - int str_len; - int pb_len; - char *pb; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); - if (!buf) - return NULL; - - rep_tlv = (struct tlv_desc *)buf->data; - pb = TLV_DATA(rep_tlv); - pb_len = ULTRA_STRING_MAX_LEN; - str_len = tipc_link_stats((char *)TLV_DATA(req_tlv_area), - pb, pb_len); - if (!str_len) { - kfree_skb(buf); - return tipc_cfg_reply_error_string("link not found"); - } - str_len += 1; /* for "\0" */ - skb_put(buf, TLV_SPACE(str_len)); - TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); - - return buf; -} - -/** - * tipc_link_get_max_pkt - get maximum packet size to use when sending to destination - * @dest: network address of destination node - * @selector: used to select from set of active links - * - * If no active link can be found, uses default maximum packet size. - */ -u32 tipc_link_get_max_pkt(u32 dest, u32 selector) -{ - struct tipc_node *n_ptr; - struct tipc_link *l_ptr; - u32 res = MAX_PKT_DEFAULT; - - if (dest == tipc_own_addr) - return MAX_MSG_SIZE; - - n_ptr = tipc_node_find(dest); - if (n_ptr) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[selector & 1]; - if (l_ptr) - res = l_ptr->max_pkt; - tipc_node_unlock(n_ptr); - } - return res; -} - static void link_print(struct tipc_link *l_ptr, const char *str) { + struct tipc_net *tn = net_generic(l_ptr->owner->net, tipc_net_id); struct tipc_bearer *b_ptr; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]); if (b_ptr) pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name); rcu_read_unlock(); @@ -2362,6 +2038,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) struct tipc_link *link; struct tipc_node *node; struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; @@ -2377,7 +2054,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_LINK_NAME]); - node = tipc_link_find_owner(name, &bearer_id); + node = tipc_link_find_owner(net, name, &bearer_id); if (!node) return -EINVAL; @@ -2493,14 +2170,16 @@ msg_full: } /* Caller should hold appropriate locks to protect the link */ -static int __tipc_nl_add_link(struct tipc_nl_msg *msg, struct tipc_link *link) +static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link) { int err; void *hdr; struct nlattr *attrs; struct nlattr *prop; + struct tipc_net *tn = net_generic(net, tipc_net_id); - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_LINK_GET); if (!hdr) return -EMSGSIZE; @@ -2512,7 +2191,7 @@ static int __tipc_nl_add_link(struct tipc_nl_msg *msg, struct tipc_link *link) if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, - tipc_cluster_mask(tipc_own_addr))) + tipc_cluster_mask(tn->own_addr))) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->max_pkt)) goto attr_msg_full; @@ -2562,9 +2241,8 @@ msg_full: } /* Caller should hold node lock */ -static int __tipc_nl_add_node_links(struct tipc_nl_msg *msg, - struct tipc_node *node, - u32 *prev_link) +static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, + struct tipc_node *node, u32 *prev_link) { u32 i; int err; @@ -2575,7 +2253,7 @@ static int __tipc_nl_add_node_links(struct tipc_nl_msg *msg, if (!node->links[i]) continue; - err = __tipc_nl_add_link(msg, node->links[i]); + err = __tipc_nl_add_link(net, msg, node->links[i]); if (err) return err; } @@ -2586,6 +2264,8 @@ static int __tipc_nl_add_node_links(struct tipc_nl_msg *msg, int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; struct tipc_nl_msg msg; u32 prev_node = cb->args[0]; @@ -2603,7 +2283,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); if (prev_node) { - node = tipc_node_find(prev_node); + node = tipc_node_find(net, prev_node); if (!node) { /* We never set seq or call nl_dump_check_consistent() * this means that setting prev_seq here will cause the @@ -2615,9 +2295,11 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) goto out; } - list_for_each_entry_continue_rcu(node, &tipc_node_list, list) { + list_for_each_entry_continue_rcu(node, &tn->node_list, + list) { tipc_node_lock(node); - err = __tipc_nl_add_node_links(&msg, node, &prev_link); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); tipc_node_unlock(node); if (err) goto out; @@ -2625,13 +2307,14 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) prev_node = node->addr; } } else { - err = tipc_nl_add_bc_link(&msg); + err = tipc_nl_add_bc_link(net, &msg); if (err) goto out; - list_for_each_entry_rcu(node, &tipc_node_list, list) { + list_for_each_entry_rcu(node, &tn->node_list, list) { tipc_node_lock(node); - err = __tipc_nl_add_node_links(&msg, node, &prev_link); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); tipc_node_unlock(node); if (err) goto out; @@ -2652,6 +2335,7 @@ out: int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) { + struct net *net = genl_info_net(info); struct sk_buff *ans_skb; struct tipc_nl_msg msg; struct tipc_link *link; @@ -2664,7 +2348,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) return -EINVAL; name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); - node = tipc_link_find_owner(name, &bearer_id); + node = tipc_link_find_owner(net, name, &bearer_id); if (!node) return -EINVAL; @@ -2683,7 +2367,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) goto err_out; } - err = __tipc_nl_add_link(&msg, link); + err = __tipc_nl_add_link(net, &msg, link); if (err) goto err_out; @@ -2706,6 +2390,7 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) struct tipc_link *link; struct tipc_node *node; struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; @@ -2722,13 +2407,13 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); if (strcmp(link_name, tipc_bclink_name) == 0) { - err = tipc_bclink_reset_stats(); + err = tipc_bclink_reset_stats(net); if (err) return err; return 0; } - node = tipc_link_find_owner(link_name, &bearer_id); + node = tipc_link_find_owner(net, link_name, &bearer_id); if (!node) return -EINVAL; diff --git a/net/tipc/link.h b/net/tipc/link.h index 55812e87ca1e..7aeb52092bf3 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -41,6 +41,10 @@ #include "msg.h" #include "node.h" +/* TIPC-specific error codes +*/ +#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ + /* Out-of-range value for link sequence numbers */ #define INVALID_LINK_SEQ 0x10000 @@ -99,13 +103,14 @@ struct tipc_stats { * @media_addr: media address to use when sending messages over link * @timer: link timer * @owner: pointer to peer node + * @refcnt: reference counter for permanent references (owner node & timer) * @flags: execution state flags for link endpoint instance * @checkpoint: reference point for triggering link continuity checking * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint * @bearer_id: local bearer id used by link * @tolerance: minimum link continuity loss needed to reset link [in ms] - * @continuity_interval: link continuity testing interval [in ms] + * @cont_intv: link continuity testing interval * @abort_limit: # of unacknowledged continuity probes needed to reset link * @state: current state of link FSM * @fsm_msg_cnt: # of protocol messages link FSM has sent in current state @@ -126,8 +131,10 @@ struct tipc_stats { * @next_in_no: next sequence number to expect for inbound messages * @deferred_queue: deferred queue saved OOS b'cast message received from node * @unacked_window: # of inbound messages rx'd without ack'ing back to peer + * @inputq: buffer queue for messages to be delivered upwards + * @namedq: buffer queue for name table messages to be delivered upwards * @next_out: ptr to first unsent outbound message in queue - * @waiting_sks: linked list of sockets waiting for link congestion to abate + * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate * @long_msg_seq_no: next identifier to use for outbound fragmented messages * @reasm_buf: head of partially reassembled inbound message fragments * @stats: collects statistics regarding link activity @@ -138,6 +145,7 @@ struct tipc_link { struct tipc_media_addr media_addr; struct timer_list timer; struct tipc_node *owner; + struct kref ref; /* Management and link supervision data */ unsigned int flags; @@ -146,7 +154,7 @@ struct tipc_link { u32 peer_bearer_id; u32 bearer_id; u32 tolerance; - u32 continuity_interval; + unsigned long cont_intv; u32 abort_limit; int state; u32 fsm_msg_cnt; @@ -178,10 +186,12 @@ struct tipc_link { u32 next_in_no; struct sk_buff_head deferred_queue; u32 unacked_window; + struct sk_buff_head inputq; + struct sk_buff_head namedq; /* Congestion handling */ struct sk_buff *next_out; - struct sk_buff_head waiting_sks; + struct sk_buff_head wakeupq; /* Fragmentation/reassembly */ u32 long_msg_seq_no; @@ -196,28 +206,24 @@ struct tipc_port; struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr); -void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down); +void tipc_link_delete(struct tipc_link *link); +void tipc_link_delete_list(struct net *net, unsigned int bearer_id, + bool shutting_down); void tipc_link_failover_send_queue(struct tipc_link *l_ptr); void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest); void tipc_link_reset_fragments(struct tipc_link *l_ptr); int tipc_link_is_up(struct tipc_link *l_ptr); int tipc_link_is_active(struct tipc_link *l_ptr); void tipc_link_purge_queues(struct tipc_link *l_ptr); -struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, - int req_tlv_space, - u16 cmd); -struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, - int req_tlv_space); -struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, - int req_tlv_space); void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); -void tipc_link_reset_list(unsigned int bearer_id); -int tipc_link_xmit_skb(struct sk_buff *skb, u32 dest, u32 selector); -int tipc_link_xmit(struct sk_buff_head *list, u32 dest, u32 selector); -int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list); -u32 tipc_link_get_max_pkt(u32 dest, u32 selector); -void tipc_link_bundle_rcv(struct sk_buff *buf); +void tipc_link_reset_list(struct net *net, unsigned int bearer_id); +int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, + u32 selector); +int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest, + u32 selector); +int __tipc_link_xmit(struct net *net, struct tipc_link *link, + struct sk_buff_head *list); void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, u32 gap, u32 tolerance, u32 priority, u32 acked_mtu); void tipc_link_push_packets(struct tipc_link *l_ptr); @@ -233,6 +239,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info); int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info); int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); +void link_prepare_wakeup(struct tipc_link *l); /* * Link sequence number manipulation routines (uses modulo 2**16 arithmetic) @@ -267,6 +274,10 @@ static inline u32 lesser(u32 left, u32 right) return less_eq(left, right) ? left : right; } +static inline u32 link_own_addr(struct tipc_link *l) +{ + return msg_prevnode(l->pmsg); +} /* * Link status checking routines diff --git a/net/tipc/log.c b/net/tipc/log.c deleted file mode 100644 index abef644f27d8..000000000000 --- a/net/tipc/log.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - * net/tipc/log.c: TIPC print buffer routines for debugging - * - * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" -#include "config.h" - -/** - * tipc_snprintf - append formatted output to print buffer - * @buf: pointer to print buffer - * @len: buffer length - * @fmt: formatted info to be printed - */ -int tipc_snprintf(char *buf, int len, const char *fmt, ...) -{ - int i; - va_list args; - - va_start(args, fmt); - i = vscnprintf(buf, len, fmt, args); - va_end(args); - return i; -} diff --git a/net/tipc/msg.c b/net/tipc/msg.c index a687b30a699c..b6eb90cd3ef7 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -34,6 +34,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <net/sock.h> #include "core.h" #include "msg.h" #include "addr.h" @@ -46,25 +47,48 @@ static unsigned int align(unsigned int i) return (i + 3) & ~3u; } -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, - u32 destnode) +/** + * tipc_buf_acquire - creates a TIPC message buffer + * @size: message size (including TIPC header) + * + * Returns a new buffer with data pointers set to the specified size. + * + * NOTE: Headroom is reserved to allow prepending of a data link header. + * There may also be unrequested tailroom present at the buffer's end. + */ +struct sk_buff *tipc_buf_acquire(u32 size) +{ + struct sk_buff *skb; + unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; + + skb = alloc_skb_fclone(buf_size, GFP_ATOMIC); + if (skb) { + skb_reserve(skb, BUF_HEADROOM); + skb_put(skb, size); + skb->next = NULL; + } + return skb; +} + +void tipc_msg_init(u32 own_node, struct tipc_msg *m, u32 user, u32 type, + u32 hsize, u32 dnode) { memset(m, 0, hsize); msg_set_version(m); msg_set_user(m, user); msg_set_hdr_sz(m, hsize); msg_set_size(m, hsize); - msg_set_prevnode(m, tipc_own_addr); + msg_set_prevnode(m, own_node); msg_set_type(m, type); if (hsize > SHORT_H_SIZE) { - msg_set_orignode(m, tipc_own_addr); - msg_set_destnode(m, destnode); + msg_set_orignode(m, own_node); + msg_set_destnode(m, dnode); } } -struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, - uint data_sz, u32 dnode, u32 onode, - u32 dport, u32 oport, int errcode) +struct sk_buff *tipc_msg_create(uint user, uint type, + uint hdr_sz, uint data_sz, u32 dnode, + u32 onode, u32 dport, u32 oport, int errcode) { struct tipc_msg *msg; struct sk_buff *buf; @@ -74,9 +98,8 @@ struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, return NULL; msg = buf_msg(buf); - tipc_msg_init(msg, user, type, hdr_sz, dnode); + tipc_msg_init(onode, msg, user, type, hdr_sz, dnode); msg_set_size(msg, hdr_sz + data_sz); - msg_set_prevnode(msg, onode); msg_set_origport(msg, oport); msg_set_destport(msg, dport); msg_set_errcode(msg, errcode); @@ -163,15 +186,14 @@ err: * tipc_msg_build - create buffer chain containing specified header and data * @mhdr: Message header, to be prepended to data * @m: User message - * @offset: Posision in iov to start copying from * @dsz: Total length of user data * @pktmax: Max packet size that can be used * @list: Buffer or chain of buffers to be returned to caller * * Returns message data size or errno: -ENOMEM, -EFAULT */ -int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, - int dsz, int pktmax, struct sk_buff_head *list) +int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, + int offset, int dsz, int pktmax, struct sk_buff_head *list) { int mhsz = msg_hdr_sz(mhdr); int msz = mhsz + dsz; @@ -191,19 +213,19 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, skb = tipc_buf_acquire(msz); if (unlikely(!skb)) return -ENOMEM; + skb_orphan(skb); __skb_queue_tail(list, skb); skb_copy_to_linear_data(skb, mhdr, mhsz); pktpos = skb->data + mhsz; - if (!dsz || !memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset, - dsz)) + if (copy_from_iter(pktpos, dsz, &m->msg_iter) == dsz) return dsz; rc = -EFAULT; goto error; } /* Prepare reusable fragment header */ - tipc_msg_init(&pkthdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - INT_H_SIZE, msg_destnode(mhdr)); + tipc_msg_init(msg_prevnode(mhdr), &pkthdr, MSG_FRAGMENTER, + FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(mhdr)); msg_set_size(&pkthdr, pktmax); msg_set_fragm_no(&pkthdr, pktno); @@ -211,6 +233,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, skb = tipc_buf_acquire(pktmax); if (!skb) return -ENOMEM; + skb_orphan(skb); __skb_queue_tail(list, skb); pktpos = skb->data; skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE); @@ -224,12 +247,11 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, if (drem < pktrem) pktrem = drem; - if (memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset, pktrem)) { + if (copy_from_iter(pktpos, pktrem, &m->msg_iter) != pktrem) { rc = -EFAULT; goto error; } drem -= pktrem; - offset += pktrem; if (!drem) break; @@ -244,6 +266,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, rc = -ENOMEM; goto error; } + skb_orphan(skb); __skb_queue_tail(list, skb); msg_set_type(&pkthdr, FRAGMENT); msg_set_size(&pkthdr, pktsz); @@ -304,6 +327,40 @@ bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu) } /** + * tipc_msg_extract(): extract bundled inner packet from buffer + * @skb: linear outer buffer, to be extracted from. + * @iskb: extracted inner buffer, to be returned + * @pos: position of msg to be extracted. Returns with pointer of next msg + * Consumes outer buffer when last packet extracted + * Returns true when when there is an extracted buffer, otherwise false + */ +bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) +{ + struct tipc_msg *msg = buf_msg(skb); + int imsz; + struct tipc_msg *imsg = (struct tipc_msg *)(msg_data(msg) + *pos); + + /* Is there space left for shortest possible message? */ + if (*pos > (msg_data_sz(msg) - SHORT_H_SIZE)) + goto none; + imsz = msg_size(imsg); + + /* Is there space left for current message ? */ + if ((*pos + imsz) > msg_data_sz(msg)) + goto none; + *iskb = tipc_buf_acquire(imsz); + if (!*iskb) + goto none; + skb_copy_to_linear_data(*iskb, imsg, imsz); + *pos += align(imsz); + return true; +none: + kfree_skb(skb); + *iskb = NULL; + return false; +} + +/** * tipc_msg_make_bundle(): Create bundle buf and append message to its tail * @list: the buffer chain * @skb: buffer to be appended and replaced @@ -312,8 +369,8 @@ bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu) * Replaces buffer if successful * Returns true if success, otherwise false */ -bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, - u32 mtu, u32 dnode) +bool tipc_msg_make_bundle(struct sk_buff_head *list, + struct sk_buff *skb, u32 mtu, u32 dnode) { struct sk_buff *bskb; struct tipc_msg *bmsg; @@ -336,7 +393,8 @@ bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, skb_trim(bskb, INT_H_SIZE); bmsg = buf_msg(bskb); - tipc_msg_init(bmsg, MSG_BUNDLER, 0, INT_H_SIZE, dnode); + tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0, + INT_H_SIZE, dnode); msg_set_seqno(bmsg, msg_seqno(msg)); msg_set_ack(bmsg, msg_ack(msg)); msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); @@ -353,7 +411,8 @@ bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, * Consumes buffer if failure * Returns true if success, otherwise false */ -bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err) +bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, + int err) { struct tipc_msg *msg = buf_msg(buf); uint imp = msg_importance(msg); @@ -374,7 +433,7 @@ bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err) msg_set_errcode(msg, err); msg_set_origport(msg, msg_destport(&ohdr)); msg_set_destport(msg, msg_origport(&ohdr)); - msg_set_prevnode(msg, tipc_own_addr); + msg_set_prevnode(msg, own_addr); if (!msg_short(msg)) { msg_set_orignode(msg, msg_destnode(&ohdr)); msg_set_destnode(msg, msg_orignode(&ohdr)); @@ -386,43 +445,43 @@ bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err) return true; exit: kfree_skb(buf); + *dnode = 0; return false; } /** - * tipc_msg_eval: determine fate of message that found no destination - * @buf: the buffer containing the message. - * @dnode: return value: next-hop node, if message to be forwarded - * @err: error code to use, if message to be rejected - * + * tipc_msg_lookup_dest(): try to find new destination for named message + * @skb: the buffer containing the message. + * @dnode: return value: next-hop node, if destination found + * @err: return value: error code to use, if message to be rejected * Does not consume buffer - * Returns 0 (TIPC_OK) if message ok and we can try again, -TIPC error - * code if message to be rejected + * Returns true if a destination is found, false otherwise */ -int tipc_msg_eval(struct sk_buff *buf, u32 *dnode) +bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, + u32 *dnode, int *err) { - struct tipc_msg *msg = buf_msg(buf); + struct tipc_msg *msg = buf_msg(skb); u32 dport; - if (msg_type(msg) != TIPC_NAMED_MSG) - return -TIPC_ERR_NO_PORT; - if (skb_linearize(buf)) - return -TIPC_ERR_NO_NAME; - if (msg_data_sz(msg) > MAX_FORWARD_SIZE) - return -TIPC_ERR_NO_NAME; + if (!msg_isdata(msg)) + return false; + if (!msg_named(msg)) + return false; + *err = -TIPC_ERR_NO_NAME; + if (skb_linearize(skb)) + return false; if (msg_reroute_cnt(msg) > 0) - return -TIPC_ERR_NO_NAME; - - *dnode = addr_domain(msg_lookup_scope(msg)); - dport = tipc_nametbl_translate(msg_nametype(msg), - msg_nameinst(msg), - dnode); + return false; + *dnode = addr_domain(net, msg_lookup_scope(msg)); + dport = tipc_nametbl_translate(net, msg_nametype(msg), + msg_nameinst(msg), dnode); if (!dport) - return -TIPC_ERR_NO_NAME; + return false; msg_incr_reroute_cnt(msg); msg_set_destnode(msg, *dnode); msg_set_destport(msg, dport); - return TIPC_OK; + *err = TIPC_OK; + return true; } /* tipc_msg_reassemble() - clone a buffer chain of fragments and diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d5c83d7ecb47..9ace47f44a69 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -37,7 +37,7 @@ #ifndef _TIPC_MSG_H #define _TIPC_MSG_H -#include "bearer.h" +#include <linux/tipc.h> /* * Constants and routines used to read and write TIPC payload message headers @@ -45,6 +45,7 @@ * Note: Some items are also used with TIPC internal message headers */ #define TIPC_VERSION 2 +struct plist; /* * Payload message users are defined in TIPC's public API: @@ -77,11 +78,37 @@ #define TIPC_MEDIA_ADDR_OFFSET 5 +/** + * TIPC message buffer code + * + * TIPC message buffer headroom reserves space for the worst-case + * link-level device header (in case the message is sent off-node). + * + * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields + * are word aligned for quicker access + */ +#define BUF_HEADROOM LL_MAX_HEADER + +struct tipc_skb_cb { + void *handle; + struct sk_buff *tail; + bool deferred; + bool wakeup_pending; + bool bundling; + u16 chain_sz; + u16 chain_imp; +}; + +#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) struct tipc_msg { __be32 hdr[15]; }; +static inline struct tipc_msg *buf_msg(struct sk_buff *skb) +{ + return (struct tipc_msg *)skb->data; +} static inline u32 msg_word(struct tipc_msg *m, u32 pos) { @@ -721,27 +748,111 @@ static inline u32 msg_tot_origport(struct tipc_msg *m) return msg_origport(m); } -bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err); - -int tipc_msg_eval(struct sk_buff *buf, u32 *dnode); - -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, - u32 destnode); - +struct sk_buff *tipc_buf_acquire(u32 size); +bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, + int err); +void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, + u32 hsize, u32 destnode); struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, uint data_sz, u32 dnode, u32 onode, u32 dport, u32 oport, int errcode); - int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); - bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu); +bool tipc_msg_make_bundle(struct sk_buff_head *list, + struct sk_buff *skb, u32 mtu, u32 dnode); +bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); +int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, + int offset, int dsz, int mtu, struct sk_buff_head *list); +bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, u32 *dnode, + int *err); +struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); -bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, - u32 mtu, u32 dnode); - -int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, - int dsz, int mtu, struct sk_buff_head *list); +/* tipc_skb_peek(): peek and reserve first buffer in list + * @list: list to be peeked in + * Returns pointer to first buffer in list, if any + */ +static inline struct sk_buff *tipc_skb_peek(struct sk_buff_head *list, + spinlock_t *lock) +{ + struct sk_buff *skb; + + spin_lock_bh(lock); + skb = skb_peek(list); + if (skb) + skb_get(skb); + spin_unlock_bh(lock); + return skb; +} + +/* tipc_skb_peek_port(): find a destination port, ignoring all destinations + * up to and including 'filter'. + * Note: ignoring previously tried destinations minimizes the risk of + * contention on the socket lock + * @list: list to be peeked in + * @filter: last destination to be ignored from search + * Returns a destination port number, of applicable. + */ +static inline u32 tipc_skb_peek_port(struct sk_buff_head *list, u32 filter) +{ + struct sk_buff *skb; + u32 dport = 0; + bool ignore = true; + + spin_lock_bh(&list->lock); + skb_queue_walk(list, skb) { + dport = msg_destport(buf_msg(skb)); + if (!filter || skb_queue_is_last(list, skb)) + break; + if (dport == filter) + ignore = false; + else if (!ignore) + break; + } + spin_unlock_bh(&list->lock); + return dport; +} + +/* tipc_skb_dequeue(): unlink first buffer with dest 'dport' from list + * @list: list to be unlinked from + * @dport: selection criteria for buffer to unlink + */ +static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list, + u32 dport) +{ + struct sk_buff *_skb, *tmp, *skb = NULL; + + spin_lock_bh(&list->lock); + skb_queue_walk_safe(list, _skb, tmp) { + if (msg_destport(buf_msg(_skb)) == dport) { + __skb_unlink(_skb, list); + skb = _skb; + break; + } + } + spin_unlock_bh(&list->lock); + return skb; +} + +/* tipc_skb_queue_tail(): add buffer to tail of list; + * @list: list to be appended to + * @skb: buffer to append. Always appended + * @dport: the destination port of the buffer + * returns true if dport differs from previous destination + */ +static inline bool tipc_skb_queue_tail(struct sk_buff_head *list, + struct sk_buff *skb, u32 dport) +{ + struct sk_buff *_skb = NULL; + bool rv = false; -struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); + spin_lock_bh(&list->lock); + _skb = skb_peek_tail(list); + if (!_skb || (msg_destport(buf_msg(_skb)) != dport) || + (skb_queue_len(list) > 32)) + rv = true; + __skb_queue_tail(list, skb); + spin_unlock_bh(&list->lock); + return rv; +} #endif diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index ba6083dca95b..fcb07915aaac 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -68,29 +68,33 @@ static void publ_to_item(struct distr_item *i, struct publication *p) /** * named_prepare_buf - allocate & initialize a publication message */ -static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) +static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, + u32 dest) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size); struct tipc_msg *msg; if (buf != NULL) { msg = buf_msg(buf); - tipc_msg_init(msg, NAME_DISTRIBUTOR, type, INT_H_SIZE, dest); + tipc_msg_init(tn->own_addr, msg, NAME_DISTRIBUTOR, type, + INT_H_SIZE, dest); msg_set_size(msg, INT_H_SIZE + size); } return buf; } -void named_cluster_distribute(struct sk_buff *skb) +void named_cluster_distribute(struct net *net, struct sk_buff *skb) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *oskb; struct tipc_node *node; u32 dnode; rcu_read_lock(); - list_for_each_entry_rcu(node, &tipc_node_list, list) { + list_for_each_entry_rcu(node, &tn->node_list, list) { dnode = node->addr; - if (in_own_node(dnode)) + if (in_own_node(net, dnode)) continue; if (!tipc_node_active_links(node)) continue; @@ -98,7 +102,7 @@ void named_cluster_distribute(struct sk_buff *skb) if (!oskb) break; msg_set_destnode(buf_msg(oskb), dnode); - tipc_link_xmit_skb(oskb, dnode, dnode); + tipc_link_xmit_skb(net, oskb, dnode, dnode); } rcu_read_unlock(); @@ -108,18 +112,19 @@ void named_cluster_distribute(struct sk_buff *skb) /** * tipc_named_publish - tell other nodes about a new publication by this node */ -struct sk_buff *tipc_named_publish(struct publication *publ) +struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *buf; struct distr_item *item; list_add_tail_rcu(&publ->local_list, - &tipc_nametbl->publ_list[publ->scope]); + &tn->nametbl->publ_list[publ->scope]); if (publ->scope == TIPC_NODE_SCOPE) return NULL; - buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0); + buf = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0); if (!buf) { pr_warn("Publication distribution failure\n"); return NULL; @@ -133,7 +138,7 @@ struct sk_buff *tipc_named_publish(struct publication *publ) /** * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node */ -struct sk_buff *tipc_named_withdraw(struct publication *publ) +struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) { struct sk_buff *buf; struct distr_item *item; @@ -143,7 +148,7 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ) if (publ->scope == TIPC_NODE_SCOPE) return NULL; - buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0); + buf = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0); if (!buf) { pr_warn("Withdrawal distribution failure\n"); return NULL; @@ -160,19 +165,21 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ) * @dnode: node to be updated * @pls: linked list of publication items to be packed into buffer chain */ -static void named_distribute(struct sk_buff_head *list, u32 dnode, - struct list_head *pls) +static void named_distribute(struct net *net, struct sk_buff_head *list, + u32 dnode, struct list_head *pls) { struct publication *publ; struct sk_buff *skb = NULL; struct distr_item *item = NULL; - uint msg_dsz = (tipc_node_get_mtu(dnode, 0) / ITEM_SIZE) * ITEM_SIZE; + uint msg_dsz = (tipc_node_get_mtu(net, dnode, 0) / ITEM_SIZE) * + ITEM_SIZE; uint msg_rem = msg_dsz; list_for_each_entry(publ, pls, local_list) { /* Prepare next buffer: */ if (!skb) { - skb = named_prepare_buf(PUBLICATION, msg_rem, dnode); + skb = named_prepare_buf(net, PUBLICATION, msg_rem, + dnode); if (!skb) { pr_warn("Bulk publication failure\n"); return; @@ -202,30 +209,32 @@ static void named_distribute(struct sk_buff_head *list, u32 dnode, /** * tipc_named_node_up - tell specified node about all publications by this node */ -void tipc_named_node_up(u32 dnode) +void tipc_named_node_up(struct net *net, u32 dnode) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff_head head; __skb_queue_head_init(&head); rcu_read_lock(); - named_distribute(&head, dnode, - &tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]); - named_distribute(&head, dnode, - &tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]); + named_distribute(net, &head, dnode, + &tn->nametbl->publ_list[TIPC_CLUSTER_SCOPE]); + named_distribute(net, &head, dnode, + &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]); rcu_read_unlock(); - tipc_link_xmit(&head, dnode, dnode); + tipc_link_xmit(net, &head, dnode, dnode); } -static void tipc_publ_subscribe(struct publication *publ, u32 addr) +static void tipc_publ_subscribe(struct net *net, struct publication *publ, + u32 addr) { struct tipc_node *node; - if (in_own_node(addr)) + if (in_own_node(net, addr)) return; - node = tipc_node_find(addr); + node = tipc_node_find(net, addr); if (!node) { pr_warn("Node subscription rejected, unknown node 0x%x\n", addr); @@ -237,11 +246,12 @@ static void tipc_publ_subscribe(struct publication *publ, u32 addr) tipc_node_unlock(node); } -static void tipc_publ_unsubscribe(struct publication *publ, u32 addr) +static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, + u32 addr) { struct tipc_node *node; - node = tipc_node_find(addr); + node = tipc_node_find(net, addr); if (!node) return; @@ -256,16 +266,17 @@ static void tipc_publ_unsubscribe(struct publication *publ, u32 addr) * Invoked for each publication issued by a newly failed node. * Removes publication structure from name table & deletes it. */ -static void tipc_publ_purge(struct publication *publ, u32 addr) +static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct publication *p; - spin_lock_bh(&tipc_nametbl_lock); - p = tipc_nametbl_remove_publ(publ->type, publ->lower, + spin_lock_bh(&tn->nametbl_lock); + p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, publ->node, publ->ref, publ->key); if (p) - tipc_publ_unsubscribe(p, addr); - spin_unlock_bh(&tipc_nametbl_lock); + tipc_publ_unsubscribe(net, p, addr); + spin_unlock_bh(&tn->nametbl_lock); if (p != publ) { pr_err("Unable to remove publication from failed node\n" @@ -277,12 +288,12 @@ static void tipc_publ_purge(struct publication *publ, u32 addr) kfree_rcu(p, rcu); } -void tipc_publ_notify(struct list_head *nsub_list, u32 addr) +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr) { struct publication *publ, *tmp; list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list) - tipc_publ_purge(publ, addr); + tipc_publ_purge(net, publ, addr); } /** @@ -292,25 +303,28 @@ void tipc_publ_notify(struct list_head *nsub_list, u32 addr) * tipc_nametbl_lock must be held. * Returns the publication item if successful, otherwise NULL. */ -static bool tipc_update_nametbl(struct distr_item *i, u32 node, u32 dtype) +static bool tipc_update_nametbl(struct net *net, struct distr_item *i, + u32 node, u32 dtype) { struct publication *publ = NULL; if (dtype == PUBLICATION) { - publ = tipc_nametbl_insert_publ(ntohl(i->type), ntohl(i->lower), + publ = tipc_nametbl_insert_publ(net, ntohl(i->type), + ntohl(i->lower), ntohl(i->upper), TIPC_CLUSTER_SCOPE, node, ntohl(i->ref), ntohl(i->key)); if (publ) { - tipc_publ_subscribe(publ, node); + tipc_publ_subscribe(net, publ, node); return true; } } else if (dtype == WITHDRAWAL) { - publ = tipc_nametbl_remove_publ(ntohl(i->type), ntohl(i->lower), + publ = tipc_nametbl_remove_publ(net, ntohl(i->type), + ntohl(i->lower), node, ntohl(i->ref), ntohl(i->key)); if (publ) { - tipc_publ_unsubscribe(publ, node); + tipc_publ_unsubscribe(net, publ, node); kfree_rcu(publ, rcu); return true; } @@ -343,7 +357,7 @@ static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node) * tipc_named_process_backlog - try to process any pending name table updates * from the network. */ -void tipc_named_process_backlog(void) +void tipc_named_process_backlog(struct net *net) { struct distr_queue_item *e, *tmp; char addr[16]; @@ -351,7 +365,7 @@ void tipc_named_process_backlog(void) list_for_each_entry_safe(e, tmp, &tipc_dist_queue, next) { if (time_after(e->expires, now)) { - if (!tipc_update_nametbl(&e->i, e->node, e->dtype)) + if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype)) continue; } else { tipc_addr_string_fill(addr, e->node); @@ -367,24 +381,34 @@ void tipc_named_process_backlog(void) } /** - * tipc_named_rcv - process name table update message sent by another node + * tipc_named_rcv - process name table update messages sent by another node */ -void tipc_named_rcv(struct sk_buff *buf) +void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) { - struct tipc_msg *msg = buf_msg(buf); - struct distr_item *item = (struct distr_item *)msg_data(msg); - u32 count = msg_data_sz(msg) / ITEM_SIZE; - u32 node = msg_orignode(msg); - - spin_lock_bh(&tipc_nametbl_lock); - while (count--) { - if (!tipc_update_nametbl(item, node, msg_type(msg))) - tipc_named_add_backlog(item, msg_type(msg), node); - item++; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_msg *msg; + struct distr_item *item; + uint count; + u32 node; + struct sk_buff *skb; + int mtype; + + spin_lock_bh(&tn->nametbl_lock); + for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) { + msg = buf_msg(skb); + mtype = msg_type(msg); + item = (struct distr_item *)msg_data(msg); + count = msg_data_sz(msg) / ITEM_SIZE; + node = msg_orignode(msg); + while (count--) { + if (!tipc_update_nametbl(net, item, node, mtype)) + tipc_named_add_backlog(item, mtype, node); + item++; + } + kfree_skb(skb); + tipc_named_process_backlog(net); } - tipc_named_process_backlog(); - spin_unlock_bh(&tipc_nametbl_lock); - kfree_skb(buf); + spin_unlock_bh(&tn->nametbl_lock); } /** @@ -394,17 +418,18 @@ void tipc_named_rcv(struct sk_buff *buf) * All name table entries published by this node are updated to reflect * the node's new network address. */ -void tipc_named_reinit(void) +void tipc_named_reinit(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct publication *publ; int scope; - spin_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tn->nametbl_lock); for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++) - list_for_each_entry_rcu(publ, &tipc_nametbl->publ_list[scope], + list_for_each_entry_rcu(publ, &tn->nametbl->publ_list[scope], local_list) - publ->node = tipc_own_addr; + publ->node = tn->own_addr; - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); } diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index cef55cedcfb2..dd2d9fd80da2 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -67,13 +67,13 @@ struct distr_item { __be32 key; }; -struct sk_buff *tipc_named_publish(struct publication *publ); -struct sk_buff *tipc_named_withdraw(struct publication *publ); -void named_cluster_distribute(struct sk_buff *buf); -void tipc_named_node_up(u32 dnode); -void tipc_named_rcv(struct sk_buff *buf); -void tipc_named_reinit(void); -void tipc_named_process_backlog(void); -void tipc_publ_notify(struct list_head *nsub_list, u32 addr); +struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ); +struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ); +void named_cluster_distribute(struct net *net, struct sk_buff *buf); +void tipc_named_node_up(struct net *net, u32 dnode); +void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue); +void tipc_named_reinit(struct net *net); +void tipc_named_process_backlog(struct net *net); +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr); #endif diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index c8df0223371a..105ba7adf06f 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -1,7 +1,7 @@ /* * net/tipc/name_table.c: TIPC name table code * - * Copyright (c) 2000-2006, 2014, Ericsson AB + * Copyright (c) 2000-2006, 2014-2015, Ericsson AB * Copyright (c) 2004-2008, 2010-2014, Wind River Systems * All rights reserved. * @@ -34,11 +34,15 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <net/sock.h> #include "core.h" -#include "config.h" +#include "netlink.h" #include "name_table.h" #include "name_distr.h" #include "subscr.h" +#include "bcast.h" +#include "addr.h" +#include <net/genetlink.h> #define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ @@ -105,9 +109,6 @@ struct name_seq { struct rcu_head rcu; }; -struct name_table *tipc_nametbl; -DEFINE_SPINLOCK(tipc_nametbl_lock); - static int hash(int x) { return x & (TIPC_NAMETBL_SIZE - 1); @@ -228,9 +229,11 @@ static u32 nameseq_locate_subseq(struct name_seq *nseq, u32 instance) /** * tipc_nameseq_insert_publ */ -static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, - u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 port, u32 key) +static struct publication *tipc_nameseq_insert_publ(struct net *net, + struct name_seq *nseq, + u32 type, u32 lower, + u32 upper, u32 scope, + u32 node, u32 port, u32 key) { struct tipc_subscription *s; struct tipc_subscription *st; @@ -315,12 +318,12 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, list_add(&publ->zone_list, &info->zone_list); info->zone_list_size++; - if (in_own_cluster(node)) { + if (in_own_cluster(net, node)) { list_add(&publ->cluster_list, &info->cluster_list); info->cluster_list_size++; } - if (in_own_node(node)) { + if (in_own_node(net, node)) { list_add(&publ->node_list, &info->node_list); info->node_list_size++; } @@ -349,8 +352,10 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, * A failed withdraw request simply returns a failure indication and lets the * caller issue any error or warning messages associated with such a problem. */ -static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 inst, - u32 node, u32 ref, u32 key) +static struct publication *tipc_nameseq_remove_publ(struct net *net, + struct name_seq *nseq, + u32 inst, u32 node, + u32 ref, u32 key) { struct publication *publ; struct sub_seq *sseq = nameseq_find_subseq(nseq, inst); @@ -378,13 +383,13 @@ found: info->zone_list_size--; /* Remove publication from cluster scope list, if present */ - if (in_own_cluster(node)) { + if (in_own_cluster(net, node)) { list_del(&publ->cluster_list); info->cluster_list_size--; } /* Remove publication from node scope list, if present */ - if (in_own_node(node)) { + if (in_own_node(net, node)) { list_del(&publ->node_list); info->node_list_size--; } @@ -447,12 +452,13 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, } } -static struct name_seq *nametbl_find_seq(u32 type) +static struct name_seq *nametbl_find_seq(struct net *net, u32 type) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct hlist_head *seq_head; struct name_seq *ns; - seq_head = &tipc_nametbl->seq_hlist[hash(type)]; + seq_head = &tn->nametbl->seq_hlist[hash(type)]; hlist_for_each_entry_rcu(ns, seq_head, ns_list) { if (ns->type == type) return ns; @@ -461,11 +467,13 @@ static struct name_seq *nametbl_find_seq(u32 type) return NULL; }; -struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 port, u32 key) +struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, + u32 lower, u32 upper, u32 scope, + u32 node, u32 port, u32 key) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct publication *publ; - struct name_seq *seq = nametbl_find_seq(type); + struct name_seq *seq = nametbl_find_seq(net, type); int index = hash(type); if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) || @@ -476,29 +484,29 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, } if (!seq) - seq = tipc_nameseq_create(type, - &tipc_nametbl->seq_hlist[index]); + seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]); if (!seq) return NULL; spin_lock_bh(&seq->lock); - publ = tipc_nameseq_insert_publ(seq, type, lower, upper, + publ = tipc_nameseq_insert_publ(net, seq, type, lower, upper, scope, node, port, key); spin_unlock_bh(&seq->lock); return publ; } -struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, - u32 node, u32 ref, u32 key) +struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, + u32 lower, u32 node, u32 ref, + u32 key) { struct publication *publ; - struct name_seq *seq = nametbl_find_seq(type); + struct name_seq *seq = nametbl_find_seq(net, type); if (!seq) return NULL; spin_lock_bh(&seq->lock); - publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key); + publ = tipc_nameseq_remove_publ(net, seq, lower, node, ref, key); if (!seq->first_free && list_empty(&seq->subscriptions)) { hlist_del_init_rcu(&seq->ns_list); kfree(seq->sseqs); @@ -523,8 +531,10 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, * - if name translation is attempted and fails, sets 'destnode' to 0 * and returns 0 */ -u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) +u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, + u32 *destnode) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sub_seq *sseq; struct name_info *info; struct publication *publ; @@ -532,11 +542,11 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) u32 ref = 0; u32 node = 0; - if (!tipc_in_scope(*destnode, tipc_own_addr)) + if (!tipc_in_scope(*destnode, tn->own_addr)) return 0; rcu_read_lock(); - seq = nametbl_find_seq(type); + seq = nametbl_find_seq(net, type); if (unlikely(!seq)) goto not_found; spin_lock_bh(&seq->lock); @@ -569,13 +579,13 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) } /* Round-Robin Algorithm */ - else if (*destnode == tipc_own_addr) { + else if (*destnode == tn->own_addr) { if (list_empty(&info->node_list)) goto no_match; publ = list_first_entry(&info->node_list, struct publication, node_list); list_move_tail(&publ->node_list, &info->node_list); - } else if (in_own_cluster_exact(*destnode)) { + } else if (in_own_cluster_exact(net, *destnode)) { if (list_empty(&info->cluster_list)) goto no_match; publ = list_first_entry(&info->cluster_list, struct publication, @@ -609,8 +619,8 @@ not_found: * * Returns non-zero if any off-node ports overlap */ -int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, - struct tipc_port_list *dports) +int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, + u32 limit, struct tipc_plist *dports) { struct name_seq *seq; struct sub_seq *sseq; @@ -619,7 +629,7 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, int res = 0; rcu_read_lock(); - seq = nametbl_find_seq(type); + seq = nametbl_find_seq(net, type); if (!seq) goto exit; @@ -635,7 +645,7 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, info = sseq->info; list_for_each_entry(publ, &info->node_list, node_list) { if (publ->scope <= limit) - tipc_port_list_add(dports, publ->ref); + tipc_plist_push(dports, publ->ref); } if (info->cluster_list_size != info->node_list_size) @@ -650,50 +660,55 @@ exit: /* * tipc_nametbl_publish - add name publication to network name tables */ -struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key) +struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, + u32 upper, u32 scope, u32 port_ref, + u32 key) { struct publication *publ; struct sk_buff *buf = NULL; + struct tipc_net *tn = net_generic(net, tipc_net_id); - spin_lock_bh(&tipc_nametbl_lock); - if (tipc_nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) { + spin_lock_bh(&tn->nametbl_lock); + if (tn->nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) { pr_warn("Publication failed, local publication limit reached (%u)\n", TIPC_MAX_PUBLICATIONS); - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); return NULL; } - publ = tipc_nametbl_insert_publ(type, lower, upper, scope, - tipc_own_addr, port_ref, key); + publ = tipc_nametbl_insert_publ(net, type, lower, upper, scope, + tn->own_addr, port_ref, key); if (likely(publ)) { - tipc_nametbl->local_publ_count++; - buf = tipc_named_publish(publ); + tn->nametbl->local_publ_count++; + buf = tipc_named_publish(net, publ); /* Any pending external events? */ - tipc_named_process_backlog(); + tipc_named_process_backlog(net); } - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); if (buf) - named_cluster_distribute(buf); + named_cluster_distribute(net, buf); return publ; } /** * tipc_nametbl_withdraw - withdraw name publication from network name tables */ -int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) +int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, + u32 key) { struct publication *publ; struct sk_buff *skb = NULL; + struct tipc_net *tn = net_generic(net, tipc_net_id); - spin_lock_bh(&tipc_nametbl_lock); - publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key); + spin_lock_bh(&tn->nametbl_lock); + publ = tipc_nametbl_remove_publ(net, type, lower, tn->own_addr, + ref, key); if (likely(publ)) { - tipc_nametbl->local_publ_count--; - skb = tipc_named_withdraw(publ); + tn->nametbl->local_publ_count--; + skb = tipc_named_withdraw(net, publ); /* Any pending external events? */ - tipc_named_process_backlog(); + tipc_named_process_backlog(net); list_del_init(&publ->pport_list); kfree_rcu(publ, rcu); } else { @@ -701,10 +716,10 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) "(type=%u, lower=%u, ref=%u, key=%u)\n", type, lower, ref, key); } - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); if (skb) { - named_cluster_distribute(skb); + named_cluster_distribute(net, skb); return 1; } return 0; @@ -715,15 +730,15 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) */ void tipc_nametbl_subscribe(struct tipc_subscription *s) { + struct tipc_net *tn = net_generic(s->net, tipc_net_id); u32 type = s->seq.type; int index = hash(type); struct name_seq *seq; - spin_lock_bh(&tipc_nametbl_lock); - seq = nametbl_find_seq(type); + spin_lock_bh(&tn->nametbl_lock); + seq = nametbl_find_seq(s->net, type); if (!seq) - seq = tipc_nameseq_create(type, - &tipc_nametbl->seq_hlist[index]); + seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]); if (seq) { spin_lock_bh(&seq->lock); tipc_nameseq_subscribe(seq, s); @@ -732,7 +747,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s) pr_warn("Failed to create subscription for {%u,%u,%u}\n", s->seq.type, s->seq.lower, s->seq.upper); } - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); } /** @@ -740,10 +755,11 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s) */ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) { + struct tipc_net *tn = net_generic(s->net, tipc_net_id); struct name_seq *seq; - spin_lock_bh(&tipc_nametbl_lock); - seq = nametbl_find_seq(s->seq.type); + spin_lock_bh(&tn->nametbl_lock); + seq = nametbl_find_seq(s->net, s->seq.type); if (seq != NULL) { spin_lock_bh(&seq->lock); list_del_init(&s->nameseq_list); @@ -756,193 +772,13 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) spin_unlock_bh(&seq->lock); } } - spin_unlock_bh(&tipc_nametbl_lock); -} - -/** - * subseq_list - print specified sub-sequence contents into the given buffer - */ -static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth, - u32 index) -{ - char portIdStr[27]; - const char *scope_str[] = {"", " zone", " cluster", " node"}; - struct publication *publ; - struct name_info *info; - int ret; - - ret = tipc_snprintf(buf, len, "%-10u %-10u ", sseq->lower, sseq->upper); - - if (depth == 2) { - ret += tipc_snprintf(buf - ret, len + ret, "\n"); - return ret; - } - - info = sseq->info; - - list_for_each_entry(publ, &info->zone_list, zone_list) { - sprintf(portIdStr, "<%u.%u.%u:%u>", - tipc_zone(publ->node), tipc_cluster(publ->node), - tipc_node(publ->node), publ->ref); - ret += tipc_snprintf(buf + ret, len - ret, "%-26s ", portIdStr); - if (depth > 3) { - ret += tipc_snprintf(buf + ret, len - ret, "%-10u %s", - publ->key, scope_str[publ->scope]); - } - if (!list_is_last(&publ->zone_list, &info->zone_list)) - ret += tipc_snprintf(buf + ret, len - ret, - "\n%33s", " "); - } - - ret += tipc_snprintf(buf + ret, len - ret, "\n"); - return ret; -} - -/** - * nameseq_list - print specified name sequence contents into the given buffer - */ -static int nameseq_list(struct name_seq *seq, char *buf, int len, u32 depth, - u32 type, u32 lowbound, u32 upbound, u32 index) -{ - struct sub_seq *sseq; - char typearea[11]; - int ret = 0; - - if (seq->first_free == 0) - return 0; - - sprintf(typearea, "%-10u", seq->type); - - if (depth == 1) { - ret += tipc_snprintf(buf, len, "%s\n", typearea); - return ret; - } - - for (sseq = seq->sseqs; sseq != &seq->sseqs[seq->first_free]; sseq++) { - if ((lowbound <= sseq->upper) && (upbound >= sseq->lower)) { - ret += tipc_snprintf(buf + ret, len - ret, "%s ", - typearea); - spin_lock_bh(&seq->lock); - ret += subseq_list(sseq, buf + ret, len - ret, - depth, index); - spin_unlock_bh(&seq->lock); - sprintf(typearea, "%10s", " "); - } - } - return ret; -} - -/** - * nametbl_header - print name table header into the given buffer - */ -static int nametbl_header(char *buf, int len, u32 depth) -{ - const char *header[] = { - "Type ", - "Lower Upper ", - "Port Identity ", - "Publication Scope" - }; - - int i; - int ret = 0; - - if (depth > 4) - depth = 4; - for (i = 0; i < depth; i++) - ret += tipc_snprintf(buf + ret, len - ret, header[i]); - ret += tipc_snprintf(buf + ret, len - ret, "\n"); - return ret; -} - -/** - * nametbl_list - print specified name table contents into the given buffer - */ -static int nametbl_list(char *buf, int len, u32 depth_info, - u32 type, u32 lowbound, u32 upbound) -{ - struct hlist_head *seq_head; - struct name_seq *seq; - int all_types; - int ret = 0; - u32 depth; - u32 i; - - all_types = (depth_info & TIPC_NTQ_ALLTYPES); - depth = (depth_info & ~TIPC_NTQ_ALLTYPES); - - if (depth == 0) - return 0; - - if (all_types) { - /* display all entries in name table to specified depth */ - ret += nametbl_header(buf, len, depth); - lowbound = 0; - upbound = ~0; - for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { - seq_head = &tipc_nametbl->seq_hlist[i]; - hlist_for_each_entry_rcu(seq, seq_head, ns_list) { - ret += nameseq_list(seq, buf + ret, len - ret, - depth, seq->type, - lowbound, upbound, i); - } - } - } else { - /* display only the sequence that matches the specified type */ - if (upbound < lowbound) { - ret += tipc_snprintf(buf + ret, len - ret, - "invalid name sequence specified\n"); - return ret; - } - ret += nametbl_header(buf + ret, len - ret, depth); - i = hash(type); - seq_head = &tipc_nametbl->seq_hlist[i]; - hlist_for_each_entry_rcu(seq, seq_head, ns_list) { - if (seq->type == type) { - ret += nameseq_list(seq, buf + ret, len - ret, - depth, type, - lowbound, upbound, i); - break; - } - } - } - return ret; + spin_unlock_bh(&tn->nametbl_lock); } -struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space) -{ - struct sk_buff *buf; - struct tipc_name_table_query *argv; - struct tlv_desc *rep_tlv; - char *pb; - int pb_len; - int str_len; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NAME_TBL_QUERY)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); - if (!buf) - return NULL; - - rep_tlv = (struct tlv_desc *)buf->data; - pb = TLV_DATA(rep_tlv); - pb_len = ULTRA_STRING_MAX_LEN; - argv = (struct tipc_name_table_query *)TLV_DATA(req_tlv_area); - rcu_read_lock(); - str_len = nametbl_list(pb, pb_len, ntohl(argv->depth), - ntohl(argv->type), - ntohl(argv->lowbound), ntohl(argv->upbound)); - rcu_read_unlock(); - str_len += 1; /* for "\0" */ - skb_put(buf, TLV_SPACE(str_len)); - TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); - - return buf; -} - -int tipc_nametbl_init(void) +int tipc_nametbl_init(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct name_table *tipc_nametbl; int i; tipc_nametbl = kzalloc(sizeof(*tipc_nametbl), GFP_ATOMIC); @@ -955,6 +791,8 @@ int tipc_nametbl_init(void) INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]); INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]); INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]); + tn->nametbl = tipc_nametbl; + spin_lock_init(&tn->nametbl_lock); return 0; } @@ -963,7 +801,7 @@ int tipc_nametbl_init(void) * * tipc_nametbl_lock must be held when calling this function */ -static void tipc_purge_publications(struct name_seq *seq) +static void tipc_purge_publications(struct net *net, struct name_seq *seq) { struct publication *publ, *safe; struct sub_seq *sseq; @@ -973,8 +811,8 @@ static void tipc_purge_publications(struct name_seq *seq) sseq = seq->sseqs; info = sseq->info; list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { - tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, - publ->ref, publ->key); + tipc_nametbl_remove_publ(net, publ->type, publ->lower, + publ->node, publ->ref, publ->key); kfree_rcu(publ, rcu); } hlist_del_init_rcu(&seq->ns_list); @@ -984,25 +822,27 @@ static void tipc_purge_publications(struct name_seq *seq) kfree_rcu(seq, rcu); } -void tipc_nametbl_stop(void) +void tipc_nametbl_stop(struct net *net) { u32 i; struct name_seq *seq; struct hlist_head *seq_head; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct name_table *tipc_nametbl = tn->nametbl; /* Verify name table is empty and purge any lingering * publications, then release the name table */ - spin_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tn->nametbl_lock); for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { if (hlist_empty(&tipc_nametbl->seq_hlist[i])) continue; seq_head = &tipc_nametbl->seq_hlist[i]; hlist_for_each_entry_rcu(seq, seq_head, ns_list) { - tipc_purge_publications(seq); + tipc_purge_publications(net, seq); } } - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); synchronize_net(); kfree(tipc_nametbl); @@ -1033,7 +873,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, *last_publ = p->key; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, - &tipc_genl_v2_family, NLM_F_MULTI, + &tipc_genl_family, NLM_F_MULTI, TIPC_NL_NAME_TABLE_GET); if (!hdr) return -EMSGSIZE; @@ -1106,9 +946,10 @@ static int __tipc_nl_subseq_list(struct tipc_nl_msg *msg, struct name_seq *seq, return 0; } -static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type, - u32 *last_lower, u32 *last_publ) +static int tipc_nl_seq_list(struct net *net, struct tipc_nl_msg *msg, + u32 *last_type, u32 *last_lower, u32 *last_publ) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct hlist_head *seq_head; struct name_seq *seq = NULL; int err; @@ -1120,10 +961,10 @@ static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type, i = 0; for (; i < TIPC_NAMETBL_SIZE; i++) { - seq_head = &tipc_nametbl->seq_hlist[i]; + seq_head = &tn->nametbl->seq_hlist[i]; if (*last_type) { - seq = nametbl_find_seq(*last_type); + seq = nametbl_find_seq(net, *last_type); if (!seq) return -EPIPE; } else { @@ -1157,6 +998,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) u32 last_type = cb->args[0]; u32 last_lower = cb->args[1]; u32 last_publ = cb->args[2]; + struct net *net = sock_net(skb->sk); struct tipc_nl_msg msg; if (done) @@ -1167,7 +1009,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); - err = __tipc_nl_seq_list(&msg, &last_type, &last_lower, &last_publ); + err = tipc_nl_seq_list(net, &msg, &last_type, &last_lower, &last_publ); if (!err) { done = 1; } else if (err != -EMSGSIZE) { @@ -1188,3 +1030,41 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } + +void tipc_plist_push(struct tipc_plist *pl, u32 port) +{ + struct tipc_plist *nl; + + if (likely(!pl->port)) { + pl->port = port; + return; + } + if (pl->port == port) + return; + list_for_each_entry(nl, &pl->list, list) { + if (nl->port == port) + return; + } + nl = kmalloc(sizeof(*nl), GFP_ATOMIC); + if (nl) { + nl->port = port; + list_add(&nl->list, &pl->list); + } +} + +u32 tipc_plist_pop(struct tipc_plist *pl) +{ + struct tipc_plist *nl; + u32 port = 0; + + if (likely(list_empty(&pl->list))) { + port = pl->port; + pl->port = 0; + return port; + } + nl = list_first_entry(&pl->list, typeof(*nl), list); + port = nl->port; + list_del(&nl->list); + kfree(nl); + return port; +} diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 5f0dee92010d..1524a73830f7 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -1,7 +1,7 @@ /* * net/tipc/name_table.h: Include file for TIPC name table code * - * Copyright (c) 2000-2006, 2014, Ericsson AB + * Copyright (c) 2000-2006, 2014-2015, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -38,7 +38,7 @@ #define _TIPC_NAME_TABLE_H struct tipc_subscription; -struct tipc_port_list; +struct tipc_plist; /* * TIPC name types reserved for internal TIPC use (both current and planned) @@ -95,26 +95,39 @@ struct name_table { u32 local_publ_count; }; -extern spinlock_t tipc_nametbl_lock; -extern struct name_table *tipc_nametbl; - int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); -struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space); -u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node); -int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, - struct tipc_port_list *dports); -struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key); -int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key); -struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 ref, +u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); +int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, + u32 limit, struct tipc_plist *dports); +struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, + u32 upper, u32 scope, u32 port_ref, + u32 key); +int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, + u32 key); +struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, + u32 lower, u32 upper, u32 scope, + u32 node, u32 ref, u32 key); +struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, + u32 lower, u32 node, u32 ref, u32 key); -struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, u32 node, - u32 ref, u32 key); void tipc_nametbl_subscribe(struct tipc_subscription *s); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); -int tipc_nametbl_init(void); -void tipc_nametbl_stop(void); +int tipc_nametbl_init(struct net *net); +void tipc_nametbl_stop(struct net *net); + +struct tipc_plist { + struct list_head list; + u32 port; +}; + +static inline void tipc_plist_init(struct tipc_plist *pl) +{ + INIT_LIST_HEAD(&pl->list); + pl->port = 0; +} + +void tipc_plist_push(struct tipc_plist *pl, u32 port); +u32 tipc_plist_pop(struct tipc_plist *pl); #endif diff --git a/net/tipc/net.c b/net/tipc/net.c index cf13df3cde8f..a54f3cbe2246 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -40,7 +40,6 @@ #include "subscr.h" #include "socket.h" #include "node.h" -#include "config.h" static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC }, @@ -108,48 +107,54 @@ static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { * - A local spin_lock protecting the queue of subscriber events. */ -int tipc_net_start(u32 addr) +int tipc_net_start(struct net *net, u32 addr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); char addr_string[16]; int res; - tipc_own_addr = addr; - tipc_named_reinit(); - tipc_sk_reinit(); - res = tipc_bclink_init(); + tn->own_addr = addr; + tipc_named_reinit(net); + tipc_sk_reinit(net); + res = tipc_bclink_init(net); if (res) return res; - tipc_nametbl_publish(TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr, - TIPC_ZONE_SCOPE, 0, tipc_own_addr); + tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr, + TIPC_ZONE_SCOPE, 0, tn->own_addr); pr_info("Started in network mode\n"); pr_info("Own node address %s, network identity %u\n", - tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); + tipc_addr_string_fill(addr_string, tn->own_addr), + tn->net_id); return 0; } -void tipc_net_stop(void) +void tipc_net_stop(struct net *net) { - if (!tipc_own_addr) + struct tipc_net *tn = net_generic(net, tipc_net_id); + + if (!tn->own_addr) return; - tipc_nametbl_withdraw(TIPC_CFG_SRV, tipc_own_addr, 0, tipc_own_addr); + tipc_nametbl_withdraw(net, TIPC_CFG_SRV, tn->own_addr, 0, + tn->own_addr); rtnl_lock(); - tipc_bearer_stop(); - tipc_bclink_stop(); - tipc_node_stop(); + tipc_bearer_stop(net); + tipc_bclink_stop(net); + tipc_node_stop(net); rtnl_unlock(); pr_info("Left network mode\n"); } -static int __tipc_nl_add_net(struct tipc_nl_msg *msg) +static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg) { + struct tipc_net *tn = net_generic(net, tipc_net_id); void *hdr; struct nlattr *attrs; - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_NET_GET); if (!hdr) return -EMSGSIZE; @@ -158,7 +163,7 @@ static int __tipc_nl_add_net(struct tipc_nl_msg *msg) if (!attrs) goto msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tipc_net_id)) + if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tn->net_id)) goto attr_msg_full; nla_nest_end(msg->skb, attrs); @@ -176,6 +181,7 @@ msg_full: int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); int err; int done = cb->args[0]; struct tipc_nl_msg msg; @@ -187,7 +193,7 @@ int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; - err = __tipc_nl_add_net(&msg); + err = __tipc_nl_add_net(net, &msg); if (err) goto out; @@ -200,8 +206,10 @@ out: int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) { - int err; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; + int err; if (!info->attrs[TIPC_NLA_NET]) return -EINVAL; @@ -216,21 +224,21 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) u32 val; /* Can't change net id once TIPC has joined a network */ - if (tipc_own_addr) + if (tn->own_addr) return -EPERM; val = nla_get_u32(attrs[TIPC_NLA_NET_ID]); if (val < 1 || val > 9999) return -EINVAL; - tipc_net_id = val; + tn->net_id = val; } if (attrs[TIPC_NLA_NET_ADDR]) { u32 addr; /* Can't change net addr once TIPC has joined a network */ - if (tipc_own_addr) + if (tn->own_addr) return -EPERM; addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); @@ -238,7 +246,7 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) return -EINVAL; rtnl_lock(); - tipc_net_start(addr); + tipc_net_start(net, addr); rtnl_unlock(); } diff --git a/net/tipc/net.h b/net/tipc/net.h index a81c1b9eb150..77a7a118911d 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -39,9 +39,9 @@ #include <net/genetlink.h> -int tipc_net_start(u32 addr); +int tipc_net_start(struct net *net, u32 addr); -void tipc_net_stop(void); +void tipc_net_stop(struct net *net); int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index b891e3905bc4..7f6475efc984 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -35,7 +35,6 @@ */ #include "core.h" -#include "config.h" #include "socket.h" #include "name_table.h" #include "bearer.h" @@ -44,36 +43,6 @@ #include "net.h" #include <net/genetlink.h> -static int handle_cmd(struct sk_buff *skb, struct genl_info *info) -{ - struct sk_buff *rep_buf; - struct nlmsghdr *rep_nlh; - struct nlmsghdr *req_nlh = info->nlhdr; - struct tipc_genlmsghdr *req_userhdr = info->userhdr; - int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); - u16 cmd; - - if ((req_userhdr->cmd & 0xC000) && (!netlink_capable(skb, CAP_NET_ADMIN))) - cmd = TIPC_CMD_NOT_NET_ADMIN; - else - cmd = req_userhdr->cmd; - - rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd, - nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, - nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), - hdr_space); - - if (rep_buf) { - skb_push(rep_buf, hdr_space); - rep_nlh = nlmsg_hdr(rep_buf); - memcpy(rep_nlh, req_nlh, hdr_space); - rep_nlh->nlmsg_len = rep_buf->len; - genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).portid); - } - - return 0; -} - static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { [TIPC_NLA_UNSPEC] = { .type = NLA_UNSPEC, }, [TIPC_NLA_BEARER] = { .type = NLA_NESTED, }, @@ -86,32 +55,16 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, } }; -/* Legacy ASCII API */ -static struct genl_family tipc_genl_family = { - .id = GENL_ID_GENERATE, - .name = TIPC_GENL_NAME, - .version = TIPC_GENL_VERSION, - .hdrsize = TIPC_GENL_HDRLEN, - .maxattr = 0, -}; - -/* Legacy ASCII API */ -static struct genl_ops tipc_genl_ops[] = { - { - .cmd = TIPC_GENL_CMD, - .doit = handle_cmd, - }, -}; - /* Users of the legacy API (tipc-config) can't handle that we add operations, * so we have a separate genl handling for the new API. */ -struct genl_family tipc_genl_v2_family = { +struct genl_family tipc_genl_family = { .id = GENL_ID_GENERATE, .name = TIPC_GENL_V2_NAME, .version = TIPC_GENL_V2_VERSION, .hdrsize = 0, .maxattr = TIPC_NLA_MAX, + .netnsok = true, }; static const struct genl_ops tipc_genl_v2_ops[] = { @@ -197,9 +150,9 @@ static const struct genl_ops tipc_genl_v2_ops[] = { int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) { - u32 maxattr = tipc_genl_v2_family.maxattr; + u32 maxattr = tipc_genl_family.maxattr; - *attr = tipc_genl_v2_family.attrbuf; + *attr = tipc_genl_family.attrbuf; if (!*attr) return -EOPNOTSUPP; @@ -210,13 +163,7 @@ int tipc_netlink_start(void) { int res; - res = genl_register_family_with_ops(&tipc_genl_family, tipc_genl_ops); - if (res) { - pr_err("Failed to register legacy interface\n"); - return res; - } - - res = genl_register_family_with_ops(&tipc_genl_v2_family, + res = genl_register_family_with_ops(&tipc_genl_family, tipc_genl_v2_ops); if (res) { pr_err("Failed to register netlink interface\n"); @@ -228,5 +175,4 @@ int tipc_netlink_start(void) void tipc_netlink_stop(void) { genl_unregister_family(&tipc_genl_family); - genl_unregister_family(&tipc_genl_v2_family); } diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h index 1425c6869de0..08a1db67b927 100644 --- a/net/tipc/netlink.h +++ b/net/tipc/netlink.h @@ -36,7 +36,7 @@ #ifndef _TIPC_NETLINK_H #define _TIPC_NETLINK_H -extern struct genl_family tipc_genl_v2_family; +extern struct genl_family tipc_genl_family; int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf); struct tipc_nl_msg { @@ -45,4 +45,9 @@ struct tipc_nl_msg { u32 seq; }; +int tipc_netlink_start(void); +int tipc_netlink_compat_start(void); +void tipc_netlink_stop(void); +void tipc_netlink_compat_stop(void); + #endif diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c new file mode 100644 index 000000000000..ce9121e8e990 --- /dev/null +++ b/net/tipc/netlink_compat.c @@ -0,0 +1,1084 @@ +/* + * Copyright (c) 2014, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "bearer.h" +#include "link.h" +#include "name_table.h" +#include "socket.h" +#include "node.h" +#include "net.h" +#include <net/genetlink.h> +#include <linux/tipc_config.h> + +/* The legacy API had an artificial message length limit called + * ULTRA_STRING_MAX_LEN. + */ +#define ULTRA_STRING_MAX_LEN 32768 + +#define TIPC_SKB_MAX TLV_SPACE(ULTRA_STRING_MAX_LEN) + +#define REPLY_TRUNCATED "<truncated>\n" + +struct tipc_nl_compat_msg { + u16 cmd; + int rep_type; + int rep_size; + int req_type; + struct sk_buff *rep; + struct tlv_desc *req; + struct sock *dst_sk; +}; + +struct tipc_nl_compat_cmd_dump { + int (*header)(struct tipc_nl_compat_msg *); + int (*dumpit)(struct sk_buff *, struct netlink_callback *); + int (*format)(struct tipc_nl_compat_msg *msg, struct nlattr **attrs); +}; + +struct tipc_nl_compat_cmd_doit { + int (*doit)(struct sk_buff *skb, struct genl_info *info); + int (*transcode)(struct sk_buff *skb, struct tipc_nl_compat_msg *msg); +}; + +static int tipc_skb_tailroom(struct sk_buff *skb) +{ + int tailroom; + int limit; + + tailroom = skb_tailroom(skb); + limit = TIPC_SKB_MAX - skb->len; + + if (tailroom < limit) + return tailroom; + + return limit; +} + +static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len) +{ + struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb); + + if (tipc_skb_tailroom(skb) < TLV_SPACE(len)) + return -EMSGSIZE; + + skb_put(skb, TLV_SPACE(len)); + tlv->tlv_type = htons(type); + tlv->tlv_len = htons(TLV_LENGTH(len)); + if (len && data) + memcpy(TLV_DATA(tlv), data, len); + + return 0; +} + +static void tipc_tlv_init(struct sk_buff *skb, u16 type) +{ + struct tlv_desc *tlv = (struct tlv_desc *)skb->data; + + TLV_SET_LEN(tlv, 0); + TLV_SET_TYPE(tlv, type); + skb_put(skb, sizeof(struct tlv_desc)); +} + +static int tipc_tlv_sprintf(struct sk_buff *skb, const char *fmt, ...) +{ + int n; + u16 len; + u32 rem; + char *buf; + struct tlv_desc *tlv; + va_list args; + + rem = tipc_skb_tailroom(skb); + + tlv = (struct tlv_desc *)skb->data; + len = TLV_GET_LEN(tlv); + buf = TLV_DATA(tlv) + len; + + va_start(args, fmt); + n = vscnprintf(buf, rem, fmt, args); + va_end(args); + + TLV_SET_LEN(tlv, n + len); + skb_put(skb, n); + + return n; +} + +static struct sk_buff *tipc_tlv_alloc(int size) +{ + int hdr_len; + struct sk_buff *buf; + + size = TLV_SPACE(size); + hdr_len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); + + buf = alloc_skb(hdr_len + size, GFP_KERNEL); + if (!buf) + return NULL; + + skb_reserve(buf, hdr_len); + + return buf; +} + +static struct sk_buff *tipc_get_err_tlv(char *str) +{ + int str_len = strlen(str) + 1; + struct sk_buff *buf; + + buf = tipc_tlv_alloc(TLV_SPACE(str_len)); + if (buf) + tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len); + + return buf; +} + +static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, + struct tipc_nl_compat_msg *msg, + struct sk_buff *arg) +{ + int len = 0; + int err; + struct sk_buff *buf; + struct nlmsghdr *nlmsg; + struct netlink_callback cb; + + memset(&cb, 0, sizeof(cb)); + cb.nlh = (struct nlmsghdr *)arg->data; + cb.skb = arg; + + buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf->sk = msg->dst_sk; + + do { + int rem; + + len = (*cmd->dumpit)(buf, &cb); + + nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) { + struct nlattr **attrs; + + err = tipc_nlmsg_parse(nlmsg, &attrs); + if (err) + goto err_out; + + err = (*cmd->format)(msg, attrs); + if (err) + goto err_out; + + if (tipc_skb_tailroom(msg->rep) <= 1) { + err = -EMSGSIZE; + goto err_out; + } + } + + skb_reset_tail_pointer(buf); + buf->len = 0; + + } while (len); + + err = 0; + +err_out: + kfree_skb(buf); + + if (err == -EMSGSIZE) { + /* The legacy API only considered messages filling + * "ULTRA_STRING_MAX_LEN" to be truncated. + */ + if ((TIPC_SKB_MAX - msg->rep->len) <= 1) { + char *tail = skb_tail_pointer(msg->rep); + + if (*tail != '\0') + sprintf(tail - sizeof(REPLY_TRUNCATED) - 1, + REPLY_TRUNCATED); + } + + return 0; + } + + return err; +} + +static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, + struct tipc_nl_compat_msg *msg) +{ + int err; + struct sk_buff *arg; + + if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type)) + return -EINVAL; + + msg->rep = tipc_tlv_alloc(msg->rep_size); + if (!msg->rep) + return -ENOMEM; + + if (msg->rep_type) + tipc_tlv_init(msg->rep, msg->rep_type); + + if (cmd->header) + (*cmd->header)(msg); + + arg = nlmsg_new(0, GFP_KERNEL); + if (!arg) { + kfree_skb(msg->rep); + return -ENOMEM; + } + + err = __tipc_nl_compat_dumpit(cmd, msg, arg); + if (err) + kfree_skb(msg->rep); + + kfree_skb(arg); + + return err; +} + +static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, + struct tipc_nl_compat_msg *msg) +{ + int err; + struct sk_buff *doit_buf; + struct sk_buff *trans_buf; + struct nlattr **attrbuf; + struct genl_info info; + + trans_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!trans_buf) + return -ENOMEM; + + err = (*cmd->transcode)(trans_buf, msg); + if (err) + goto trans_out; + + attrbuf = kmalloc((tipc_genl_family.maxattr + 1) * + sizeof(struct nlattr *), GFP_KERNEL); + if (!attrbuf) { + err = -ENOMEM; + goto trans_out; + } + + err = nla_parse(attrbuf, tipc_genl_family.maxattr, + (const struct nlattr *)trans_buf->data, + trans_buf->len, NULL); + if (err) + goto parse_out; + + doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!doit_buf) { + err = -ENOMEM; + goto parse_out; + } + + doit_buf->sk = msg->dst_sk; + + memset(&info, 0, sizeof(info)); + info.attrs = attrbuf; + + err = (*cmd->doit)(doit_buf, &info); + + kfree_skb(doit_buf); +parse_out: + kfree(attrbuf); +trans_out: + kfree_skb(trans_buf); + + return err; +} + +static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, + struct tipc_nl_compat_msg *msg) +{ + int err; + + if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type)) + return -EINVAL; + + err = __tipc_nl_compat_doit(cmd, msg); + if (err) + return err; + + /* The legacy API considered an empty message a success message */ + msg->rep = tipc_tlv_alloc(0); + if (!msg->rep) + return -ENOMEM; + + return 0; +} + +static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1]; + + nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER], + NULL); + + return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME, + nla_data(bearer[TIPC_NLA_BEARER_NAME]), + nla_len(bearer[TIPC_NLA_BEARER_NAME])); +} + +static int tipc_nl_compat_bearer_enable(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *prop; + struct nlattr *bearer; + struct tipc_bearer_config *b; + + b = (struct tipc_bearer_config *)TLV_DATA(msg->req); + + bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + if (!bearer) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name)) + return -EMSGSIZE; + + if (nla_put_u32(skb, TIPC_NLA_BEARER_DOMAIN, ntohl(b->disc_domain))) + return -EMSGSIZE; + + if (ntohl(b->priority) <= TIPC_MAX_LINK_PRI) { + prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP); + if (!prop) + return -EMSGSIZE; + if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(b->priority))) + return -EMSGSIZE; + nla_nest_end(skb, prop); + } + nla_nest_end(skb, bearer); + + return 0; +} + +static int tipc_nl_compat_bearer_disable(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + char *name; + struct nlattr *bearer; + + name = (char *)TLV_DATA(msg->req); + + bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + if (!bearer) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name)) + return -EMSGSIZE; + + nla_nest_end(skb, bearer); + + return 0; +} + +static inline u32 perc(u32 count, u32 total) +{ + return (count * 100 + (total / 2)) / total; +} + +static void __fill_bc_link_stat(struct tipc_nl_compat_msg *msg, + struct nlattr *prop[], struct nlattr *stats[]) +{ + tipc_tlv_sprintf(msg->rep, " Window:%u packets\n", + nla_get_u32(prop[TIPC_NLA_PROP_WIN])); + + tipc_tlv_sprintf(msg->rep, + " RX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, + " TX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, " RX naks:%u defs:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]), + nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES])); + + tipc_tlv_sprintf(msg->rep, " TX naks:%u acks:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED])); + + tipc_tlv_sprintf(msg->rep, + " Congestion link:%u Send queue max:%u avg:%u", + nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]), + nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]), + nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE])); +} + +static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + char *name; + struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; + struct nlattr *prop[TIPC_NLA_PROP_MAX + 1]; + struct nlattr *stats[TIPC_NLA_STATS_MAX + 1]; + + nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); + + nla_parse_nested(prop, TIPC_NLA_PROP_MAX, link[TIPC_NLA_LINK_PROP], + NULL); + + nla_parse_nested(stats, TIPC_NLA_STATS_MAX, link[TIPC_NLA_LINK_STATS], + NULL); + + name = (char *)TLV_DATA(msg->req); + if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) + return 0; + + tipc_tlv_sprintf(msg->rep, "\nLink <%s>\n", + nla_data(link[TIPC_NLA_LINK_NAME])); + + if (link[TIPC_NLA_LINK_BROADCAST]) { + __fill_bc_link_stat(msg, prop, stats); + return 0; + } + + if (link[TIPC_NLA_LINK_ACTIVE]) + tipc_tlv_sprintf(msg->rep, " ACTIVE"); + else if (link[TIPC_NLA_LINK_UP]) + tipc_tlv_sprintf(msg->rep, " STANDBY"); + else + tipc_tlv_sprintf(msg->rep, " DEFUNCT"); + + tipc_tlv_sprintf(msg->rep, " MTU:%u Priority:%u", + nla_get_u32(link[TIPC_NLA_LINK_MTU]), + nla_get_u32(prop[TIPC_NLA_PROP_PRIO])); + + tipc_tlv_sprintf(msg->rep, " Tolerance:%u ms Window:%u packets\n", + nla_get_u32(prop[TIPC_NLA_PROP_TOL]), + nla_get_u32(prop[TIPC_NLA_PROP_WIN])); + + tipc_tlv_sprintf(msg->rep, + " RX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(link[TIPC_NLA_LINK_RX]) - + nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, + " TX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(link[TIPC_NLA_LINK_TX]) - + nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, + " TX profile sample:%u packets average:%u octets\n", + nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_CNT]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_TOT]) / + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])); + + tipc_tlv_sprintf(msg->rep, + " 0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% ", + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P0]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P1]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P2]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P3]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT]))); + + tipc_tlv_sprintf(msg->rep, "-16384:%u%% -32768:%u%% -66000:%u%%\n", + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P4]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P5]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P6]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT]))); + + tipc_tlv_sprintf(msg->rep, + " RX states:%u probes:%u naks:%u defs:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_RX_STATES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_PROBES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]), + nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES])); + + tipc_tlv_sprintf(msg->rep, + " TX states:%u probes:%u naks:%u acks:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_TX_STATES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_PROBES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED])); + + tipc_tlv_sprintf(msg->rep, + " Congestion link:%u Send queue max:%u avg:%u", + nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]), + nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]), + nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE])); + + return 0; +} + +static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; + struct tipc_link_info link_info; + + nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); + + link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); + link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); + strcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME])); + + return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, + &link_info, sizeof(link_info)); +} + +static int tipc_nl_compat_link_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *link; + struct nlattr *prop; + struct tipc_link_config *lc; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + link = nla_nest_start(skb, TIPC_NLA_LINK); + if (!link) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_LINK_NAME, lc->name)) + return -EMSGSIZE; + + prop = nla_nest_start(skb, TIPC_NLA_LINK_PROP); + if (!prop) + return -EMSGSIZE; + + if (msg->cmd == TIPC_CMD_SET_LINK_PRI) { + if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(lc->value))) + return -EMSGSIZE; + } else if (msg->cmd == TIPC_CMD_SET_LINK_TOL) { + if (nla_put_u32(skb, TIPC_NLA_PROP_TOL, ntohl(lc->value))) + return -EMSGSIZE; + } else if (msg->cmd == TIPC_CMD_SET_LINK_WINDOW) { + if (nla_put_u32(skb, TIPC_NLA_PROP_WIN, ntohl(lc->value))) + return -EMSGSIZE; + } + + nla_nest_end(skb, prop); + nla_nest_end(skb, link); + + return 0; +} + +static int tipc_nl_compat_link_reset_stats(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + char *name; + struct nlattr *link; + + name = (char *)TLV_DATA(msg->req); + + link = nla_nest_start(skb, TIPC_NLA_LINK); + if (!link) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name)) + return -EMSGSIZE; + + nla_nest_end(skb, link); + + return 0; +} + +static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) +{ + int i; + u32 depth; + struct tipc_name_table_query *ntq; + static const char * const header[] = { + "Type ", + "Lower Upper ", + "Port Identity ", + "Publication Scope" + }; + + ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); + + depth = ntohl(ntq->depth); + + if (depth > 4) + depth = 4; + for (i = 0; i < depth; i++) + tipc_tlv_sprintf(msg->rep, header[i]); + tipc_tlv_sprintf(msg->rep, "\n"); + + return 0; +} + +static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + char port_str[27]; + struct tipc_name_table_query *ntq; + struct nlattr *nt[TIPC_NLA_NAME_TABLE_MAX + 1]; + struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1]; + u32 node, depth, type, lowbound, upbound; + static const char * const scope_str[] = {"", " zone", " cluster", + " node"}; + + nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, + attrs[TIPC_NLA_NAME_TABLE], NULL); + + nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, nt[TIPC_NLA_NAME_TABLE_PUBL], + NULL); + + ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); + + depth = ntohl(ntq->depth); + type = ntohl(ntq->type); + lowbound = ntohl(ntq->lowbound); + upbound = ntohl(ntq->upbound); + + if (!(depth & TIPC_NTQ_ALLTYPES) && + (type != nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]))) + return 0; + if (lowbound && (lowbound > nla_get_u32(publ[TIPC_NLA_PUBL_UPPER]))) + return 0; + if (upbound && (upbound < nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]))) + return 0; + + tipc_tlv_sprintf(msg->rep, "%-10u ", + nla_get_u32(publ[TIPC_NLA_PUBL_TYPE])); + + if (depth == 1) + goto out; + + tipc_tlv_sprintf(msg->rep, "%-10u %-10u ", + nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]), + nla_get_u32(publ[TIPC_NLA_PUBL_UPPER])); + + if (depth == 2) + goto out; + + node = nla_get_u32(publ[TIPC_NLA_PUBL_NODE]); + sprintf(port_str, "<%u.%u.%u:%u>", tipc_zone(node), tipc_cluster(node), + tipc_node(node), nla_get_u32(publ[TIPC_NLA_PUBL_REF])); + tipc_tlv_sprintf(msg->rep, "%-26s ", port_str); + + if (depth == 3) + goto out; + + tipc_tlv_sprintf(msg->rep, "%-10u %s", + nla_get_u32(publ[TIPC_NLA_PUBL_REF]), + scope_str[nla_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]); +out: + tipc_tlv_sprintf(msg->rep, "\n"); + + return 0; +} + +static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + u32 type, lower, upper; + struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1]; + + nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], NULL); + + type = nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]); + lower = nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]); + upper = nla_get_u32(publ[TIPC_NLA_PUBL_UPPER]); + + if (lower == upper) + tipc_tlv_sprintf(msg->rep, " {%u,%u}", type, lower); + else + tipc_tlv_sprintf(msg->rep, " {%u,%u,%u}", type, lower, upper); + + return 0; +} + +static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) +{ + int err; + void *hdr; + struct nlattr *nest; + struct sk_buff *args; + struct tipc_nl_compat_cmd_dump dump; + + args = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!args) + return -ENOMEM; + + hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI, + TIPC_NL_PUBL_GET); + + nest = nla_nest_start(args, TIPC_NLA_SOCK); + if (!nest) { + kfree_skb(args); + return -EMSGSIZE; + } + + if (nla_put_u32(args, TIPC_NLA_SOCK_REF, sock)) { + kfree_skb(args); + return -EMSGSIZE; + } + + nla_nest_end(args, nest); + genlmsg_end(args, hdr); + + dump.dumpit = tipc_nl_publ_dump; + dump.format = __tipc_nl_compat_publ_dump; + + err = __tipc_nl_compat_dumpit(&dump, msg, args); + + kfree_skb(args); + + return err; +} + +static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + int err; + u32 sock_ref; + struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; + + nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], NULL); + + sock_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); + tipc_tlv_sprintf(msg->rep, "%u:", sock_ref); + + if (sock[TIPC_NLA_SOCK_CON]) { + u32 node; + struct nlattr *con[TIPC_NLA_CON_MAX + 1]; + + nla_parse_nested(con, TIPC_NLA_CON_MAX, sock[TIPC_NLA_SOCK_CON], + NULL); + + node = nla_get_u32(con[TIPC_NLA_CON_NODE]); + tipc_tlv_sprintf(msg->rep, " connected to <%u.%u.%u:%u>", + tipc_zone(node), + tipc_cluster(node), + tipc_node(node), + nla_get_u32(con[TIPC_NLA_CON_SOCK])); + + if (con[TIPC_NLA_CON_FLAG]) + tipc_tlv_sprintf(msg->rep, " via {%u,%u}\n", + nla_get_u32(con[TIPC_NLA_CON_TYPE]), + nla_get_u32(con[TIPC_NLA_CON_INST])); + else + tipc_tlv_sprintf(msg->rep, "\n"); + } else if (sock[TIPC_NLA_SOCK_HAS_PUBL]) { + tipc_tlv_sprintf(msg->rep, " bound to"); + + err = tipc_nl_compat_publ_dump(msg, sock_ref); + if (err) + return err; + } + tipc_tlv_sprintf(msg->rep, "\n"); + + return 0; +} + +static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct nlattr *media[TIPC_NLA_MEDIA_MAX + 1]; + + nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, attrs[TIPC_NLA_MEDIA], + NULL); + + return tipc_add_tlv(msg->rep, TIPC_TLV_MEDIA_NAME, + nla_data(media[TIPC_NLA_MEDIA_NAME]), + nla_len(media[TIPC_NLA_MEDIA_NAME])); +} + +static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct tipc_node_info node_info; + struct nlattr *node[TIPC_NLA_NODE_MAX + 1]; + + nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE], NULL); + + node_info.addr = htonl(nla_get_u32(node[TIPC_NLA_NODE_ADDR])); + node_info.up = htonl(nla_get_flag(node[TIPC_NLA_NODE_UP])); + + return tipc_add_tlv(msg->rep, TIPC_TLV_NODE_INFO, &node_info, + sizeof(node_info)); +} + +static int tipc_nl_compat_net_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + u32 val; + struct nlattr *net; + + val = ntohl(*(__be32 *)TLV_DATA(msg->req)); + + net = nla_nest_start(skb, TIPC_NLA_NET); + if (!net) + return -EMSGSIZE; + + if (msg->cmd == TIPC_CMD_SET_NODE_ADDR) { + if (nla_put_u32(skb, TIPC_NLA_NET_ADDR, val)) + return -EMSGSIZE; + } else if (msg->cmd == TIPC_CMD_SET_NETID) { + if (nla_put_u32(skb, TIPC_NLA_NET_ID, val)) + return -EMSGSIZE; + } + nla_nest_end(skb, net); + + return 0; +} + +static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + __be32 id; + struct nlattr *net[TIPC_NLA_NET_MAX + 1]; + + nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET], NULL); + id = htonl(nla_get_u32(net[TIPC_NLA_NET_ID])); + + return tipc_add_tlv(msg->rep, TIPC_TLV_UNSIGNED, &id, sizeof(id)); +} + +static int tipc_cmd_show_stats_compat(struct tipc_nl_compat_msg *msg) +{ + msg->rep = tipc_tlv_alloc(ULTRA_STRING_MAX_LEN); + if (!msg->rep) + return -ENOMEM; + + tipc_tlv_init(msg->rep, TIPC_TLV_ULTRA_STRING); + tipc_tlv_sprintf(msg->rep, "TIPC version " TIPC_MOD_VER "\n"); + + return 0; +} + +static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg) +{ + struct tipc_nl_compat_cmd_dump dump; + struct tipc_nl_compat_cmd_doit doit; + + memset(&dump, 0, sizeof(dump)); + memset(&doit, 0, sizeof(doit)); + + switch (msg->cmd) { + case TIPC_CMD_NOOP: + msg->rep = tipc_tlv_alloc(0); + if (!msg->rep) + return -ENOMEM; + return 0; + case TIPC_CMD_GET_BEARER_NAMES: + msg->rep_size = MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME); + dump.dumpit = tipc_nl_bearer_dump; + dump.format = tipc_nl_compat_bearer_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_ENABLE_BEARER: + msg->req_type = TIPC_TLV_BEARER_CONFIG; + doit.doit = tipc_nl_bearer_enable; + doit.transcode = tipc_nl_compat_bearer_enable; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_DISABLE_BEARER: + msg->req_type = TIPC_TLV_BEARER_NAME; + doit.doit = tipc_nl_bearer_disable; + doit.transcode = tipc_nl_compat_bearer_disable; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_SHOW_LINK_STATS: + msg->req_type = TIPC_TLV_LINK_NAME; + msg->rep_size = ULTRA_STRING_MAX_LEN; + msg->rep_type = TIPC_TLV_ULTRA_STRING; + dump.dumpit = tipc_nl_link_dump; + dump.format = tipc_nl_compat_link_stat_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_GET_LINKS: + msg->req_type = TIPC_TLV_NET_ADDR; + msg->rep_size = ULTRA_STRING_MAX_LEN; + dump.dumpit = tipc_nl_link_dump; + dump.format = tipc_nl_compat_link_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SET_LINK_TOL: + case TIPC_CMD_SET_LINK_PRI: + case TIPC_CMD_SET_LINK_WINDOW: + msg->req_type = TIPC_TLV_LINK_CONFIG; + doit.doit = tipc_nl_link_set; + doit.transcode = tipc_nl_compat_link_set; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_RESET_LINK_STATS: + msg->req_type = TIPC_TLV_LINK_NAME; + doit.doit = tipc_nl_link_reset_stats; + doit.transcode = tipc_nl_compat_link_reset_stats; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_SHOW_NAME_TABLE: + msg->req_type = TIPC_TLV_NAME_TBL_QUERY; + msg->rep_size = ULTRA_STRING_MAX_LEN; + msg->rep_type = TIPC_TLV_ULTRA_STRING; + dump.header = tipc_nl_compat_name_table_dump_header; + dump.dumpit = tipc_nl_name_table_dump; + dump.format = tipc_nl_compat_name_table_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SHOW_PORTS: + msg->rep_size = ULTRA_STRING_MAX_LEN; + msg->rep_type = TIPC_TLV_ULTRA_STRING; + dump.dumpit = tipc_nl_sk_dump; + dump.format = tipc_nl_compat_sk_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_GET_MEDIA_NAMES: + msg->rep_size = MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME); + dump.dumpit = tipc_nl_media_dump; + dump.format = tipc_nl_compat_media_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_GET_NODES: + msg->rep_size = ULTRA_STRING_MAX_LEN; + dump.dumpit = tipc_nl_node_dump; + dump.format = tipc_nl_compat_node_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SET_NODE_ADDR: + msg->req_type = TIPC_TLV_NET_ADDR; + doit.doit = tipc_nl_net_set; + doit.transcode = tipc_nl_compat_net_set; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_SET_NETID: + msg->req_type = TIPC_TLV_UNSIGNED; + doit.doit = tipc_nl_net_set; + doit.transcode = tipc_nl_compat_net_set; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_GET_NETID: + msg->rep_size = sizeof(u32); + dump.dumpit = tipc_nl_net_dump; + dump.format = tipc_nl_compat_net_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SHOW_STATS: + return tipc_cmd_show_stats_compat(msg); + } + + return -EOPNOTSUPP; +} + +static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) +{ + int err; + int len; + struct tipc_nl_compat_msg msg; + struct nlmsghdr *req_nlh; + struct nlmsghdr *rep_nlh; + struct tipc_genlmsghdr *req_userhdr = info->userhdr; + struct net *net = genl_info_net(info); + + memset(&msg, 0, sizeof(msg)); + + req_nlh = (struct nlmsghdr *)skb->data; + msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN; + msg.cmd = req_userhdr->cmd; + msg.dst_sk = info->dst_sk; + + if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) { + msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN); + err = -EACCES; + goto send; + } + + len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); + if (TLV_GET_LEN(msg.req) && !TLV_OK(msg.req, len)) { + msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); + err = -EOPNOTSUPP; + goto send; + } + + err = tipc_nl_compat_handle(&msg); + if (err == -EOPNOTSUPP) + msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); + else if (err == -EINVAL) + msg.rep = tipc_get_err_tlv(TIPC_CFG_TLV_ERROR); +send: + if (!msg.rep) + return err; + + len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); + skb_push(msg.rep, len); + rep_nlh = nlmsg_hdr(msg.rep); + memcpy(rep_nlh, info->nlhdr, len); + rep_nlh->nlmsg_len = msg.rep->len; + genlmsg_unicast(net, msg.rep, NETLINK_CB(skb).portid); + + return err; +} + +static struct genl_family tipc_genl_compat_family = { + .id = GENL_ID_GENERATE, + .name = TIPC_GENL_NAME, + .version = TIPC_GENL_VERSION, + .hdrsize = TIPC_GENL_HDRLEN, + .maxattr = 0, + .netnsok = true, +}; + +static struct genl_ops tipc_genl_compat_ops[] = { + { + .cmd = TIPC_GENL_CMD, + .doit = tipc_nl_compat_recv, + }, +}; + +int tipc_netlink_compat_start(void) +{ + int res; + + res = genl_register_family_with_ops(&tipc_genl_compat_family, + tipc_genl_compat_ops); + if (res) { + pr_err("Failed to register legacy compat interface\n"); + return res; + } + + return 0; +} + +void tipc_netlink_compat_stop(void) +{ + genl_unregister_family(&tipc_genl_compat_family); +} diff --git a/net/tipc/node.c b/net/tipc/node.c index 8d353ec77a66..86152de8248d 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -35,22 +35,14 @@ */ #include "core.h" -#include "config.h" +#include "link.h" #include "node.h" #include "name_distr.h" #include "socket.h" -#define NODE_HTABLE_SIZE 512 - static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); -static struct hlist_head node_htable[NODE_HTABLE_SIZE]; -LIST_HEAD(tipc_node_list); -static u32 tipc_num_nodes; -static u32 tipc_num_links; -static DEFINE_SPINLOCK(node_list_lock); - struct tipc_sock_conn { u32 port; u32 peer_port; @@ -78,15 +70,17 @@ static unsigned int tipc_hashfn(u32 addr) /* * tipc_node_find - locate specified node object, if it exists */ -struct tipc_node *tipc_node_find(u32 addr) +struct tipc_node *tipc_node_find(struct net *net, u32 addr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; - if (unlikely(!in_own_cluster_exact(addr))) + if (unlikely(!in_own_cluster_exact(net, addr))) return NULL; rcu_read_lock(); - hlist_for_each_entry_rcu(node, &node_htable[tipc_hashfn(addr)], hash) { + hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)], + hash) { if (node->addr == addr) { rcu_read_unlock(); return node; @@ -96,72 +90,68 @@ struct tipc_node *tipc_node_find(u32 addr) return NULL; } -struct tipc_node *tipc_node_create(u32 addr) +struct tipc_node *tipc_node_create(struct net *net, u32 addr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *n_ptr, *temp_node; - spin_lock_bh(&node_list_lock); - + spin_lock_bh(&tn->node_list_lock); + n_ptr = tipc_node_find(net, addr); + if (n_ptr) + goto exit; n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC); if (!n_ptr) { - spin_unlock_bh(&node_list_lock); pr_warn("Node creation failed, no memory\n"); - return NULL; + goto exit; } - n_ptr->addr = addr; + n_ptr->net = net; spin_lock_init(&n_ptr->lock); INIT_HLIST_NODE(&n_ptr->hash); INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->publ_list); INIT_LIST_HEAD(&n_ptr->conn_sks); - skb_queue_head_init(&n_ptr->waiting_sks); __skb_queue_head_init(&n_ptr->bclink.deferred_queue); - - hlist_add_head_rcu(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]); - - list_for_each_entry_rcu(temp_node, &tipc_node_list, list) { + hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n_ptr->addr < temp_node->addr) break; } list_add_tail_rcu(&n_ptr->list, &temp_node->list); n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; n_ptr->signature = INVALID_NODE_SIG; - - tipc_num_nodes++; - - spin_unlock_bh(&node_list_lock); +exit: + spin_unlock_bh(&tn->node_list_lock); return n_ptr; } -static void tipc_node_delete(struct tipc_node *n_ptr) +static void tipc_node_delete(struct tipc_net *tn, struct tipc_node *n_ptr) { list_del_rcu(&n_ptr->list); hlist_del_rcu(&n_ptr->hash); kfree_rcu(n_ptr, rcu); - - tipc_num_nodes--; } -void tipc_node_stop(void) +void tipc_node_stop(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node, *t_node; - spin_lock_bh(&node_list_lock); - list_for_each_entry_safe(node, t_node, &tipc_node_list, list) - tipc_node_delete(node); - spin_unlock_bh(&node_list_lock); + spin_lock_bh(&tn->node_list_lock); + list_for_each_entry_safe(node, t_node, &tn->node_list, list) + tipc_node_delete(tn, node); + spin_unlock_bh(&tn->node_list_lock); } -int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port) +int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) { struct tipc_node *node; struct tipc_sock_conn *conn; - if (in_own_node(dnode)) + if (in_own_node(net, dnode)) return 0; - node = tipc_node_find(dnode); + node = tipc_node_find(net, dnode); if (!node) { pr_warn("Connecting sock to node 0x%x failed\n", dnode); return -EHOSTUNREACH; @@ -179,15 +169,15 @@ int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port) return 0; } -void tipc_node_remove_conn(u32 dnode, u32 port) +void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) { struct tipc_node *node; struct tipc_sock_conn *conn, *safe; - if (in_own_node(dnode)) + if (in_own_node(net, dnode)) return; - node = tipc_node_find(dnode); + node = tipc_node_find(net, dnode); if (!node) return; @@ -201,23 +191,6 @@ void tipc_node_remove_conn(u32 dnode, u32 port) tipc_node_unlock(node); } -void tipc_node_abort_sock_conns(struct list_head *conns) -{ - struct tipc_sock_conn *conn, *safe; - struct sk_buff *buf; - - list_for_each_entry_safe(conn, safe, conns, list) { - buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, - SHORT_H_SIZE, 0, tipc_own_addr, - conn->peer_node, conn->port, - conn->peer_port, TIPC_ERR_NO_NODE); - if (likely(buf)) - tipc_sk_rcv(buf); - list_del(&conn->list); - kfree(conn); - } -} - /** * tipc_node_link_up - handle addition of link * @@ -231,8 +204,8 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) n_ptr->action_flags |= TIPC_NOTIFY_LINK_UP; n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; - pr_info("Established link <%s> on network plane %c\n", - l_ptr->name, l_ptr->net_plane); + pr_debug("Established link <%s> on network plane %c\n", + l_ptr->name, l_ptr->net_plane); if (!active[0]) { active[0] = active[1] = l_ptr; @@ -240,7 +213,7 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) goto exit; } if (l_ptr->priority < active[0]->priority) { - pr_info("New link <%s> becomes standby\n", l_ptr->name); + pr_debug("New link <%s> becomes standby\n", l_ptr->name); goto exit; } tipc_link_dup_queue_xmit(active[0], l_ptr); @@ -248,9 +221,9 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) active[0] = l_ptr; goto exit; } - pr_info("Old link <%s> becomes standby\n", active[0]->name); + pr_debug("Old link <%s> becomes standby\n", active[0]->name); if (active[1] != active[0]) - pr_info("Old link <%s> becomes standby\n", active[1]->name); + pr_debug("Old link <%s> becomes standby\n", active[1]->name); active[0] = active[1] = l_ptr; exit: /* Leave room for changeover header when returning 'mtu' to users: */ @@ -290,6 +263,7 @@ static void node_select_active_links(struct tipc_node *n_ptr) */ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { + struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); struct tipc_link **active; n_ptr->working_links--; @@ -297,12 +271,12 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; if (!tipc_link_is_active(l_ptr)) { - pr_info("Lost standby link <%s> on network plane %c\n", - l_ptr->name, l_ptr->net_plane); + pr_debug("Lost standby link <%s> on network plane %c\n", + l_ptr->name, l_ptr->net_plane); return; } - pr_info("Lost link <%s> on network plane %c\n", - l_ptr->name, l_ptr->net_plane); + pr_debug("Lost link <%s> on network plane %c\n", + l_ptr->name, l_ptr->net_plane); active = &n_ptr->active_links[0]; if (active[0] == l_ptr) @@ -324,7 +298,7 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) } /* Loopback link went down? No fragmentation needed from now on. */ - if (n_ptr->addr == tipc_own_addr) { + if (n_ptr->addr == tn->own_addr) { n_ptr->act_mtus[0] = MAX_MSG_SIZE; n_ptr->act_mtus[1] = MAX_MSG_SIZE; } @@ -343,9 +317,6 @@ int tipc_node_is_up(struct tipc_node *n_ptr) void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { n_ptr->links[l_ptr->bearer_id] = l_ptr; - spin_lock_bh(&node_list_lock); - tipc_num_links++; - spin_unlock_bh(&node_list_lock); n_ptr->link_cnt++; } @@ -357,9 +328,6 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) if (l_ptr != n_ptr->links[i]) continue; n_ptr->links[i] = NULL; - spin_lock_bh(&node_list_lock); - tipc_num_links--; - spin_unlock_bh(&node_list_lock); n_ptr->link_cnt--; } } @@ -368,17 +336,21 @@ static void node_established_contact(struct tipc_node *n_ptr) { n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP; n_ptr->bclink.oos_state = 0; - n_ptr->bclink.acked = tipc_bclink_get_last_sent(); - tipc_bclink_add_node(n_ptr->addr); + n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net); + tipc_bclink_add_node(n_ptr->net, n_ptr->addr); } static void node_lost_contact(struct tipc_node *n_ptr) { char addr_string[16]; - u32 i; + struct tipc_sock_conn *conn, *safe; + struct list_head *conns = &n_ptr->conn_sks; + struct sk_buff *skb; + struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); + uint i; - pr_info("Lost contact with %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); + pr_debug("Lost contact with %s\n", + tipc_addr_string_fill(addr_string, n_ptr->addr)); /* Flush broadcast link info associated with lost node */ if (n_ptr->bclink.recv_permitted) { @@ -389,7 +361,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) n_ptr->bclink.reasm_buf = NULL; } - tipc_bclink_remove_node(n_ptr->addr); + tipc_bclink_remove_node(n_ptr->net, n_ptr->addr); tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ); n_ptr->bclink.recv_permitted = false; @@ -403,126 +375,33 @@ static void node_lost_contact(struct tipc_node *n_ptr) l_ptr->reset_checkpoint = l_ptr->next_in_no; l_ptr->exp_msg_count = 0; tipc_link_reset_fragments(l_ptr); - } - - n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN; - - /* Notify subscribers and prevent re-contact with node until - * cleanup is done. - */ - n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN | - TIPC_NOTIFY_NODE_DOWN; -} -struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) -{ - u32 domain; - struct sk_buff *buf; - struct tipc_node *n_ptr; - struct tipc_node_info node_info; - u32 payload_size; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (!tipc_addr_domain_valid(domain)) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (network address)"); - - spin_lock_bh(&node_list_lock); - if (!tipc_num_nodes) { - spin_unlock_bh(&node_list_lock); - return tipc_cfg_reply_none(); + /* Link marked for deletion after failover? => do it now */ + if (l_ptr->flags & LINK_STOPPED) + tipc_link_delete(l_ptr); } - /* For now, get space for all other nodes */ - payload_size = TLV_SPACE(sizeof(node_info)) * tipc_num_nodes; - if (payload_size > 32768u) { - spin_unlock_bh(&node_list_lock); - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (too many nodes)"); - } - spin_unlock_bh(&node_list_lock); - - buf = tipc_cfg_reply_alloc(payload_size); - if (!buf) - return NULL; - - /* Add TLVs for all nodes in scope */ - rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - if (!tipc_in_scope(domain, n_ptr->addr)) - continue; - node_info.addr = htonl(n_ptr->addr); - node_info.up = htonl(tipc_node_is_up(n_ptr)); - tipc_cfg_append_tlv(buf, TIPC_TLV_NODE_INFO, - &node_info, sizeof(node_info)); - } - rcu_read_unlock(); - return buf; -} - -struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) -{ - u32 domain; - struct sk_buff *buf; - struct tipc_node *n_ptr; - struct tipc_link_info link_info; - u32 payload_size; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (!tipc_addr_domain_valid(domain)) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (network address)"); - - if (!tipc_own_addr) - return tipc_cfg_reply_none(); - - spin_lock_bh(&node_list_lock); - /* Get space for all unicast links + broadcast link */ - payload_size = TLV_SPACE((sizeof(link_info)) * (tipc_num_links + 1)); - if (payload_size > 32768u) { - spin_unlock_bh(&node_list_lock); - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (too many links)"); - } - spin_unlock_bh(&node_list_lock); - - buf = tipc_cfg_reply_alloc(payload_size); - if (!buf) - return NULL; + n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN; - /* Add TLV for broadcast link */ - link_info.dest = htonl(tipc_cluster_mask(tipc_own_addr)); - link_info.up = htonl(1); - strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME); - tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); + /* Prevent re-contact with node until cleanup is done */ + n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN; - /* Add TLVs for any other links in scope */ - rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - u32 i; + /* Notify publications from this node */ + n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN; - if (!tipc_in_scope(domain, n_ptr->addr)) - continue; - tipc_node_lock(n_ptr); - for (i = 0; i < MAX_BEARERS; i++) { - if (!n_ptr->links[i]) - continue; - link_info.dest = htonl(n_ptr->addr); - link_info.up = htonl(tipc_link_is_up(n_ptr->links[i])); - strcpy(link_info.str, n_ptr->links[i]->name); - tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, - &link_info, sizeof(link_info)); + /* Notify sockets connected to node */ + list_for_each_entry_safe(conn, safe, conns, list) { + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, + SHORT_H_SIZE, 0, tn->own_addr, + conn->peer_node, conn->port, + conn->peer_port, TIPC_ERR_NO_NODE); + if (likely(skb)) { + skb_queue_tail(n_ptr->inputq, skb); + n_ptr->action_flags |= TIPC_MSG_EVT; } - tipc_node_unlock(n_ptr); + list_del(&conn->list); + kfree(conn); } - rcu_read_unlock(); - return buf; } /** @@ -534,10 +413,11 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) * * Returns 0 on success */ -int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len) +int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, + char *linkname, size_t len) { struct tipc_link *link; - struct tipc_node *node = tipc_node_find(addr); + struct tipc_node *node = tipc_node_find(net, addr); if ((bearer_id >= MAX_BEARERS) || !node) return -EINVAL; @@ -554,58 +434,60 @@ int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len) void tipc_node_unlock(struct tipc_node *node) { - LIST_HEAD(nsub_list); - LIST_HEAD(conn_sks); - struct sk_buff_head waiting_sks; + struct net *net = node->net; u32 addr = 0; - int flags = node->action_flags; + u32 flags = node->action_flags; u32 link_id = 0; + struct list_head *publ_list; + struct sk_buff_head *inputq = node->inputq; + struct sk_buff_head *namedq; - if (likely(!flags)) { + if (likely(!flags || (flags == TIPC_MSG_EVT))) { + node->action_flags = 0; spin_unlock_bh(&node->lock); + if (flags == TIPC_MSG_EVT) + tipc_sk_rcv(net, inputq); return; } addr = node->addr; link_id = node->link_id; - __skb_queue_head_init(&waiting_sks); + namedq = node->namedq; + publ_list = &node->publ_list; - if (flags & TIPC_WAKEUP_USERS) - skb_queue_splice_init(&node->waiting_sks, &waiting_sks); - - if (flags & TIPC_NOTIFY_NODE_DOWN) { - list_replace_init(&node->publ_list, &nsub_list); - list_replace_init(&node->conn_sks, &conn_sks); - } - node->action_flags &= ~(TIPC_WAKEUP_USERS | TIPC_NOTIFY_NODE_DOWN | - TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_UP | - TIPC_NOTIFY_LINK_DOWN | - TIPC_WAKEUP_BCAST_USERS); + node->action_flags &= ~(TIPC_MSG_EVT | + TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | + TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP | + TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT | + TIPC_NAMED_MSG_EVT); spin_unlock_bh(&node->lock); - while (!skb_queue_empty(&waiting_sks)) - tipc_sk_rcv(__skb_dequeue(&waiting_sks)); - - if (!list_empty(&conn_sks)) - tipc_node_abort_sock_conns(&conn_sks); - - if (!list_empty(&nsub_list)) - tipc_publ_notify(&nsub_list, addr); + if (flags & TIPC_NOTIFY_NODE_DOWN) + tipc_publ_notify(net, publ_list, addr); if (flags & TIPC_WAKEUP_BCAST_USERS) - tipc_bclink_wakeup_users(); + tipc_bclink_wakeup_users(net); if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(addr); + tipc_named_node_up(net, addr); if (flags & TIPC_NOTIFY_LINK_UP) - tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, + tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE, link_id, addr); if (flags & TIPC_NOTIFY_LINK_DOWN) - tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, + tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, link_id, addr); + + if (flags & TIPC_MSG_EVT) + tipc_sk_rcv(net, inputq); + + if (flags & TIPC_NAMED_MSG_EVT) + tipc_named_rcv(net, namedq); + + if (flags & TIPC_BCAST_MSG_EVT) + tipc_bclink_input(net); } /* Caller should hold node lock for the passed node */ @@ -614,7 +496,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) void *hdr; struct nlattr *attrs; - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_NODE_GET); if (!hdr) return -EMSGSIZE; @@ -645,6 +527,8 @@ msg_full: int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); int done = cb->args[0]; int last_addr = cb->args[1]; struct tipc_node *node; @@ -659,7 +543,7 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); - if (last_addr && !tipc_node_find(last_addr)) { + if (last_addr && !tipc_node_find(net, last_addr)) { rcu_read_unlock(); /* We never set seq or call nl_dump_check_consistent() this * means that setting prev_seq here will cause the consistence @@ -671,7 +555,7 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) return -EPIPE; } - list_for_each_entry_rcu(node, &tipc_node_list, list) { + list_for_each_entry_rcu(node, &tn->node_list, list) { if (last_addr) { if (node->addr == last_addr) last_addr = 0; diff --git a/net/tipc/node.h b/net/tipc/node.h index cbe0e950f1cc..3d18c66b7f78 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -1,7 +1,7 @@ /* * net/tipc/node.h: Include file for TIPC node management routines * - * Copyright (c) 2000-2006, 2014, Ericsson AB + * Copyright (c) 2000-2006, 2014-2015, Ericsson AB * Copyright (c) 2005, 2010-2014, Wind River Systems * All rights reserved. * @@ -42,10 +42,10 @@ #include "bearer.h" #include "msg.h" -/* - * Out-of-range value for node signature - */ -#define INVALID_NODE_SIG 0x10000 +/* Out-of-range value for node signature */ +#define INVALID_NODE_SIG 0x10000 + +#define NODE_HTABLE_SIZE 512 /* Flags used to take different actions according to flag type * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down @@ -55,14 +55,16 @@ * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type */ enum { + TIPC_MSG_EVT = 1, TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1), TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2), TIPC_NOTIFY_NODE_DOWN = (1 << 3), TIPC_NOTIFY_NODE_UP = (1 << 4), - TIPC_WAKEUP_USERS = (1 << 5), - TIPC_WAKEUP_BCAST_USERS = (1 << 6), - TIPC_NOTIFY_LINK_UP = (1 << 7), - TIPC_NOTIFY_LINK_DOWN = (1 << 8) + TIPC_WAKEUP_BCAST_USERS = (1 << 5), + TIPC_NOTIFY_LINK_UP = (1 << 6), + TIPC_NOTIFY_LINK_DOWN = (1 << 7), + TIPC_NAMED_MSG_EVT = (1 << 8), + TIPC_BCAST_MSG_EVT = (1 << 9) }; /** @@ -73,6 +75,7 @@ enum { * @oos_state: state tracker for handling OOS b'cast messages * @deferred_queue: deferred queue saved OOS b'cast message received from node * @reasm_buf: broadcast reassembly queue head from node + * @inputq_map: bitmap indicating which inqueues should be kicked * @recv_permitted: true if node is allowed to receive b'cast messages */ struct tipc_node_bclink { @@ -83,6 +86,7 @@ struct tipc_node_bclink { u32 deferred_size; struct sk_buff_head deferred_queue; struct sk_buff *reasm_buf; + int inputq_map; bool recv_permitted; }; @@ -90,7 +94,11 @@ struct tipc_node_bclink { * struct tipc_node - TIPC node structure * @addr: network address of node * @lock: spinlock governing access to structure + * @net: the applicable net namespace * @hash: links to adjacent nodes in unsorted hash chain + * @inputq: pointer to input queue containing messages for msg event + * @namedq: pointer to name table input queue with name table messages + * @curr_link: the link holding the node lock, if any * @active_links: pointers to active links to node * @links: pointers to all links to node * @action_flags: bit mask of different types of node actions @@ -106,11 +114,14 @@ struct tipc_node_bclink { struct tipc_node { u32 addr; spinlock_t lock; + struct net *net; struct hlist_node hash; + struct sk_buff_head *inputq; + struct sk_buff_head *namedq; struct tipc_link *active_links[2]; u32 act_mtus[2]; struct tipc_link *links[MAX_BEARERS]; - unsigned int action_flags; + int action_flags; struct tipc_node_bclink bclink; struct list_head list; int link_cnt; @@ -118,28 +129,24 @@ struct tipc_node { u32 signature; u32 link_id; struct list_head publ_list; - struct sk_buff_head waiting_sks; struct list_head conn_sks; struct rcu_head rcu; }; -extern struct list_head tipc_node_list; - -struct tipc_node *tipc_node_find(u32 addr); -struct tipc_node *tipc_node_create(u32 addr); -void tipc_node_stop(void); +struct tipc_node *tipc_node_find(struct net *net, u32 addr); +struct tipc_node *tipc_node_create(struct net *net, u32 addr); +void tipc_node_stop(struct net *net); void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr); int tipc_node_active_links(struct tipc_node *n_ptr); int tipc_node_is_up(struct tipc_node *n_ptr); -struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); -struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); -int tipc_node_get_linkname(u32 bearer_id, u32 node, char *linkname, size_t len); +int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, + char *linkname, size_t len); void tipc_node_unlock(struct tipc_node *node); -int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port); -void tipc_node_remove_conn(u32 dnode, u32 port); +int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); +void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); @@ -154,12 +161,12 @@ static inline bool tipc_node_blocked(struct tipc_node *node) TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN)); } -static inline uint tipc_node_get_mtu(u32 addr, u32 selector) +static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector) { struct tipc_node *node; u32 mtu; - node = tipc_node_find(addr); + node = tipc_node_find(net, addr); if (likely(node)) mtu = node->act_mtus[selector & 1]; diff --git a/net/tipc/server.c b/net/tipc/server.c index a538a02f869b..eadd4ed45905 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -35,6 +35,7 @@ #include "server.h" #include "core.h" +#include "socket.h" #include <net/sock.h> /* Number of messages to send before rescheduling */ @@ -255,7 +256,8 @@ static int tipc_receive_from_sock(struct tipc_conn *con) goto out_close; } - s->tipc_conn_recvmsg(con->conid, &addr, con->usr_data, buf, ret); + s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid, &addr, + con->usr_data, buf, ret); kmem_cache_free(s->rcvbuf_cache, buf); @@ -307,7 +309,7 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) struct socket *sock = NULL; int ret; - ret = tipc_sock_create_local(s->type, &sock); + ret = tipc_sock_create_local(s->net, s->type, &sock); if (ret < 0) return NULL; ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, diff --git a/net/tipc/server.h b/net/tipc/server.h index be817b0b547e..9015faedb1b0 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -36,7 +36,9 @@ #ifndef _TIPC_SERVER_H #define _TIPC_SERVER_H -#include "core.h" +#include <linux/idr.h> +#include <linux/tipc.h> +#include <net/net_namespace.h> #define TIPC_SERVER_NAME_LEN 32 @@ -45,6 +47,7 @@ * @conn_idr: identifier set of connection * @idr_lock: protect the connection identifier set * @idr_in_use: amount of allocated identifier entry + * @net: network namspace instance * @rcvbuf_cache: memory cache of server receive buffer * @rcv_wq: receive workqueue * @send_wq: send workqueue @@ -61,16 +64,18 @@ struct tipc_server { struct idr conn_idr; spinlock_t idr_lock; int idr_in_use; + struct net *net; struct kmem_cache *rcvbuf_cache; struct workqueue_struct *rcv_wq; struct workqueue_struct *send_wq; int max_rcvbuf_size; - void *(*tipc_conn_new) (int conid); - void (*tipc_conn_shutdown) (int conid, void *usr_data); - void (*tipc_conn_recvmsg) (int conid, struct sockaddr_tipc *addr, - void *usr_data, void *buf, size_t len); + void *(*tipc_conn_new)(int conid); + void (*tipc_conn_shutdown)(int conid, void *usr_data); + void (*tipc_conn_recvmsg)(struct net *net, int conid, + struct sockaddr_tipc *addr, void *usr_data, + void *buf, size_t len); struct sockaddr_tipc *saddr; - const char name[TIPC_SERVER_NAME_LEN]; + char name[TIPC_SERVER_NAME_LEN]; int imp; int type; }; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 4731cad99d1c..f73e975af80b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,7 +1,7 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2007, 2012-2014, Ericsson AB + * Copyright (c) 2001-2007, 2012-2015, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * @@ -34,22 +34,25 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/rhashtable.h> +#include <linux/jhash.h> #include "core.h" #include "name_table.h" #include "node.h" #include "link.h" -#include <linux/export.h> -#include "config.h" +#include "name_distr.h" #include "socket.h" -#define SS_LISTENING -1 /* socket is listening */ -#define SS_READY -2 /* socket is connectionless */ +#define SS_LISTENING -1 /* socket is listening */ +#define SS_READY -2 /* socket is connectionless */ -#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ -#define CONN_PROBING_INTERVAL 3600000 /* [ms] => 1 h */ -#define TIPC_FWD_MSG 1 -#define TIPC_CONN_OK 0 -#define TIPC_CONN_PROBING 1 +#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ +#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */ +#define TIPC_FWD_MSG 1 +#define TIPC_CONN_OK 0 +#define TIPC_CONN_PROBING 1 +#define TIPC_MAX_PORT 0xffffffff +#define TIPC_MIN_PORT 1 /** * struct tipc_sock - TIPC socket structure @@ -59,21 +62,20 @@ * @conn_instance: TIPC instance used when connection was established * @published: non-zero if port has one or more associated names * @max_pkt: maximum packet size "hint" used when building messages sent by port - * @ref: unique reference to port in TIPC object registry + * @portid: unique port identity in TIPC socket hash table * @phdr: preformatted message header used when sending messages * @port_list: adjacent ports in TIPC's global list of ports * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: - * @probing_interval: - * @timer: - * @port: port - interacts with 'sk' and with the rest of the TIPC stack - * @peer_name: the peer of the connection, if any + * @probing_intv: * @conn_timeout: the time we can wait for an unresponded setup request * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @link_cong: non-zero if owner must sleep because of link congestion * @sent_unacked: # messages sent by socket, and not yet acked by peer * @rcv_unacked: # messages read by user, but not yet acked back to peer + * @node: hash table node + * @rcu: rcu struct for tipc_sock */ struct tipc_sock { struct sock sk; @@ -82,19 +84,20 @@ struct tipc_sock { u32 conn_instance; int published; u32 max_pkt; - u32 ref; + u32 portid; struct tipc_msg phdr; struct list_head sock_list; struct list_head publications; u32 pub_count; u32 probing_state; - u32 probing_interval; - struct timer_list timer; + unsigned long probing_intv; uint conn_timeout; atomic_t dupl_rcvcnt; bool link_cong; uint sent_unacked; uint rcv_unacked; + struct rhash_head node; + struct rcu_head rcu; }; static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); @@ -103,16 +106,14 @@ static void tipc_write_space(struct sock *sk); static int tipc_release(struct socket *sock); static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p); -static void tipc_sk_timeout(unsigned long ref); +static void tipc_sk_timeout(unsigned long data); static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); -static u32 tipc_sk_ref_acquire(struct tipc_sock *tsk); -static void tipc_sk_ref_discard(u32 ref); -static struct tipc_sock *tipc_sk_get(u32 ref); -static struct tipc_sock *tipc_sk_get_next(u32 *ref); -static void tipc_sk_put(struct tipc_sock *tsk); +static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); +static int tipc_sk_insert(struct tipc_sock *tsk); +static void tipc_sk_remove(struct tipc_sock *tsk); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -174,6 +175,11 @@ static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { * - port reference */ +static u32 tsk_own_node(struct tipc_sock *tsk) +{ + return msg_prevnode(&tsk->phdr); +} + static u32 tsk_peer_node(struct tipc_sock *tsk) { return msg_destnode(&tsk->phdr); @@ -246,10 +252,11 @@ static void tsk_rej_rx_queue(struct sock *sk) { struct sk_buff *skb; u32 dnode; + u32 own_node = tsk_own_node(tipc_sk(sk)); while ((skb = __skb_dequeue(&sk->sk_receive_queue))) { - if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit_skb(skb, dnode, 0); + if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_ERR_NO_PORT)) + tipc_link_xmit_skb(sock_net(sk), skb, dnode, 0); } } @@ -260,6 +267,7 @@ static void tsk_rej_rx_queue(struct sock *sk) */ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) { + struct tipc_net *tn = net_generic(sock_net(&tsk->sk), tipc_net_id); u32 peer_port = tsk_peer_port(tsk); u32 orig_node; u32 peer_node; @@ -276,10 +284,10 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) if (likely(orig_node == peer_node)) return true; - if (!orig_node && (peer_node == tipc_own_addr)) + if (!orig_node && (peer_node == tn->own_addr)) return true; - if (!peer_node && (orig_node == tipc_own_addr)) + if (!peer_node && (orig_node == tn->own_addr)) return true; return false; @@ -300,12 +308,12 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, int kern) { + struct tipc_net *tn; const struct proto_ops *ops; socket_state state; struct sock *sk; struct tipc_sock *tsk; struct tipc_msg *msg; - u32 ref; /* Validate arguments */ if (unlikely(protocol != 0)) @@ -339,24 +347,23 @@ static int tipc_sk_create(struct net *net, struct socket *sock, return -ENOMEM; tsk = tipc_sk(sk); - ref = tipc_sk_ref_acquire(tsk); - if (!ref) { - pr_warn("Socket create failed; reference table exhausted\n"); - return -ENOMEM; - } tsk->max_pkt = MAX_PKT_DEFAULT; - tsk->ref = ref; INIT_LIST_HEAD(&tsk->publications); msg = &tsk->phdr; - tipc_msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, + tn = net_generic(sock_net(sk), tipc_net_id); + tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, NAMED_H_SIZE, 0); - msg_set_origport(msg, ref); /* Finish initializing socket data structures */ sock->ops = ops; sock->state = state; sock_init_data(sock, sk); - k_init_timer(&tsk->timer, (Handler)tipc_sk_timeout, ref); + if (tipc_sk_insert(tsk)) { + pr_warn("Socket create failed; port numbrer exhausted\n"); + return -EINVAL; + } + msg_set_origport(msg, tsk->portid); + setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); sk->sk_backlog_rcv = tipc_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; @@ -384,7 +391,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, * * Returns 0 on success, errno otherwise */ -int tipc_sock_create_local(int type, struct socket **res) +int tipc_sock_create_local(struct net *net, int type, struct socket **res) { int rc; @@ -393,7 +400,7 @@ int tipc_sock_create_local(int type, struct socket **res) pr_err("Failed to create kernel socket\n"); return rc; } - tipc_sk_create(&init_net, *res, 0, 1); + tipc_sk_create(net, *res, 0, 1); return 0; } @@ -442,6 +449,13 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, return ret; } +static void tipc_sk_callback(struct rcu_head *head) +{ + struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu); + + sock_put(&tsk->sk); +} + /** * tipc_release - destroy a TIPC socket * @sock: socket to destroy @@ -461,9 +475,10 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, static int tipc_release(struct socket *sock) { struct sock *sk = sock->sk; + struct net *net; struct tipc_sock *tsk; struct sk_buff *skb; - u32 dnode; + u32 dnode, probing_state; /* * Exit if socket isn't fully initialized (occurs when a failed accept() @@ -472,6 +487,7 @@ static int tipc_release(struct socket *sock) if (sk == NULL) return 0; + net = sock_net(sk); tsk = tipc_sk(sk); lock_sock(sk); @@ -491,26 +507,29 @@ static int tipc_release(struct socket *sock) (sock->state == SS_CONNECTED)) { sock->state = SS_DISCONNECTING; tsk->connected = 0; - tipc_node_remove_conn(dnode, tsk->ref); + tipc_node_remove_conn(net, dnode, tsk->portid); } - if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit_skb(skb, dnode, 0); + if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, + TIPC_ERR_NO_PORT)) + tipc_link_xmit_skb(net, skb, dnode, 0); } } tipc_sk_withdraw(tsk, 0, NULL); - tipc_sk_ref_discard(tsk->ref); - k_cancel_timer(&tsk->timer); + probing_state = tsk->probing_state; + if (del_timer_sync(&sk->sk_timer) && + probing_state != TIPC_CONN_PROBING) + sock_put(sk); + tipc_sk_remove(tsk); if (tsk->connected) { - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, - SHORT_H_SIZE, 0, dnode, tipc_own_addr, - tsk_peer_port(tsk), - tsk->ref, TIPC_ERR_NO_PORT); + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, + tsk_own_node(tsk), tsk_peer_port(tsk), + tsk->portid, TIPC_ERR_NO_PORT); if (skb) - tipc_link_xmit_skb(skb, dnode, tsk->ref); - tipc_node_remove_conn(dnode, tsk->ref); + tipc_link_xmit_skb(net, skb, dnode, tsk->portid); + tipc_node_remove_conn(net, dnode, tsk->portid); } - k_term_timer(&tsk->timer); /* Discard any remaining (connection-based) messages in receive queue */ __skb_queue_purge(&sk->sk_receive_queue); @@ -518,7 +537,8 @@ static int tipc_release(struct socket *sock) /* Reject any messages that accumulated in backlog queue */ sock->state = SS_DISCONNECTING; release_sock(sk); - sock_put(sk); + + call_rcu(&tsk->rcu, tipc_sk_callback); sock->sk = NULL; return 0; @@ -602,6 +622,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, { struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; struct tipc_sock *tsk = tipc_sk(sock->sk); + struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id); memset(addr, 0, sizeof(*addr)); if (peer) { @@ -611,8 +632,8 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, addr->addr.id.ref = tsk_peer_port(tsk); addr->addr.id.node = tsk_peer_node(tsk); } else { - addr->addr.id.ref = tsk->ref; - addr->addr.id.node = tipc_own_addr; + addr->addr.id.ref = tsk->portid; + addr->addr.id.node = tn->own_addr; } *uaddr_len = sizeof(*addr); @@ -711,8 +732,11 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct msghdr *msg, size_t dsz, long timeo) { struct sock *sk = sock->sk; - struct tipc_msg *mhdr = &tipc_sk(sk)->phdr; - struct sk_buff_head head; + struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); + struct tipc_msg *mhdr = &tsk->phdr; + struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct iov_iter save = msg->msg_iter; uint mtu; int rc; @@ -727,83 +751,97 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, new_mtu: mtu = tipc_bclink_get_mtu(); - __skb_queue_head_init(&head); - rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &head); + rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain); if (unlikely(rc < 0)) return rc; do { - rc = tipc_bclink_xmit(&head); + rc = tipc_bclink_xmit(net, pktchain); if (likely(rc >= 0)) { rc = dsz; break; } - if (rc == -EMSGSIZE) + if (rc == -EMSGSIZE) { + msg->msg_iter = save; goto new_mtu; + } if (rc != -ELINKCONG) break; tipc_sk(sk)->link_cong = 1; rc = tipc_wait_for_sndmsg(sock, &timeo); if (rc) - __skb_queue_purge(&head); + __skb_queue_purge(pktchain); } while (!rc); return rc; } -/* tipc_sk_mcast_rcv - Deliver multicast message to all destination sockets +/** + * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets + * @arrvq: queue with arriving messages, to be cloned after destination lookup + * @inputq: queue with cloned messages, delivered to socket after dest lookup + * + * Multi-threaded: parallel calls with reference to same queues may occur */ -void tipc_sk_mcast_rcv(struct sk_buff *buf) +void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, + struct sk_buff_head *inputq) { - struct tipc_msg *msg = buf_msg(buf); - struct tipc_port_list dports = {0, NULL, }; - struct tipc_port_list *item; - struct sk_buff *b; - uint i, last, dst = 0; + struct tipc_msg *msg; + struct tipc_plist dports; + u32 portid; u32 scope = TIPC_CLUSTER_SCOPE; - - if (in_own_node(msg_orignode(msg))) - scope = TIPC_NODE_SCOPE; - - /* Create destination port list: */ - tipc_nametbl_mc_translate(msg_nametype(msg), - msg_namelower(msg), - msg_nameupper(msg), - scope, - &dports); - last = dports.count; - if (!last) { - kfree_skb(buf); - return; - } - - for (item = &dports; item; item = item->next) { - for (i = 0; i < PLSIZE && ++dst <= last; i++) { - b = (dst != last) ? skb_clone(buf, GFP_ATOMIC) : buf; - if (!b) { - pr_warn("Failed do clone mcast rcv buffer\n"); + struct sk_buff_head tmpq; + uint hsz; + struct sk_buff *skb, *_skb; + + __skb_queue_head_init(&tmpq); + tipc_plist_init(&dports); + + skb = tipc_skb_peek(arrvq, &inputq->lock); + for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { + msg = buf_msg(skb); + hsz = skb_headroom(skb) + msg_hdr_sz(msg); + + if (in_own_node(net, msg_orignode(msg))) + scope = TIPC_NODE_SCOPE; + + /* Create destination port list and message clones: */ + tipc_nametbl_mc_translate(net, + msg_nametype(msg), msg_namelower(msg), + msg_nameupper(msg), scope, &dports); + portid = tipc_plist_pop(&dports); + for (; portid; portid = tipc_plist_pop(&dports)) { + _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); + if (_skb) { + msg_set_destport(buf_msg(_skb), portid); + __skb_queue_tail(&tmpq, _skb); continue; } - msg_set_destport(msg, item->ports[i]); - tipc_sk_rcv(b); + pr_warn("Failed to clone mcast rcv buffer\n"); } + /* Append to inputq if not already done by other thread */ + spin_lock_bh(&inputq->lock); + if (skb_peek(arrvq) == skb) { + skb_queue_splice_tail_init(&tmpq, inputq); + kfree_skb(__skb_dequeue(arrvq)); + } + spin_unlock_bh(&inputq->lock); + __skb_queue_purge(&tmpq); + kfree_skb(skb); } - tipc_port_list_free(&dports); + tipc_sk_rcv(net, inputq); } /** * tipc_sk_proto_rcv - receive a connection mng protocol message * @tsk: receiving socket - * @dnode: node to send response message to, if any - * @buf: buffer containing protocol message - * Returns 0 (TIPC_OK) if message was consumed, 1 (TIPC_FWD_MSG) if - * (CONN_PROBE_REPLY) message should be forwarded. + * @skb: pointer to message buffer. Set to NULL if buffer is consumed. */ -static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, - struct sk_buff *buf) +static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff **skb) { - struct tipc_msg *msg = buf_msg(buf); + struct tipc_msg *msg = buf_msg(*skb); int conn_cong; - + u32 dnode; + u32 own_node = tsk_own_node(tsk); /* Ignore if connection cannot be validated: */ if (!tsk_peer_msg(tsk, msg)) goto exit; @@ -816,15 +854,15 @@ static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, if (conn_cong) tsk->sk.sk_write_space(&tsk->sk); } else if (msg_type(msg) == CONN_PROBE) { - if (!tipc_msg_reverse(buf, dnode, TIPC_OK)) - return TIPC_OK; - msg_set_type(msg, CONN_PROBE_REPLY); - return TIPC_FWD_MSG; + if (tipc_msg_reverse(own_node, *skb, &dnode, TIPC_OK)) { + msg_set_type(msg, CONN_PROBE_REPLY); + return; + } } /* Do nothing if msg_type() == CONN_PROBE_REPLY */ exit: - kfree_skb(buf); - return TIPC_OK; + kfree_skb(*skb); + *skb = NULL; } static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) @@ -872,11 +910,13 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; u32 dnode, dport; - struct sk_buff_head head; + struct sk_buff_head *pktchain = &sk->sk_write_queue; struct sk_buff *skb; struct tipc_name_seq *seq = &dest->addr.nameseq; + struct iov_iter save; u32 mtu; long timeo; int rc; @@ -929,7 +969,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, msg_set_nametype(mhdr, type); msg_set_nameinst(mhdr, inst); msg_set_lookup_scope(mhdr, tipc_addr_scope(domain)); - dport = tipc_nametbl_translate(type, inst, &dnode); + dport = tipc_nametbl_translate(net, type, inst, &dnode); msg_set_destnode(mhdr, dnode); msg_set_destport(mhdr, dport); if (unlikely(!dport && !dnode)) { @@ -945,31 +985,33 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, msg_set_hdr_sz(mhdr, BASIC_H_SIZE); } + save = m->msg_iter; new_mtu: - mtu = tipc_node_get_mtu(dnode, tsk->ref); - __skb_queue_head_init(&head); - rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &head); + mtu = tipc_node_get_mtu(net, dnode, tsk->portid); + rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain); if (rc < 0) goto exit; do { - skb = skb_peek(&head); + skb = skb_peek(pktchain); TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; - rc = tipc_link_xmit(&head, dnode, tsk->ref); + rc = tipc_link_xmit(net, pktchain, dnode, tsk->portid); if (likely(rc >= 0)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; rc = dsz; break; } - if (rc == -EMSGSIZE) + if (rc == -EMSGSIZE) { + m->msg_iter = save; goto new_mtu; + } if (rc != -ELINKCONG) break; tsk->link_cong = 1; rc = tipc_wait_for_sndmsg(sock, &timeo); if (rc) - __skb_queue_purge(&head); + __skb_queue_purge(pktchain); } while (!rc); exit: if (iocb) @@ -1024,15 +1066,17 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; + struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *mhdr = &tsk->phdr; - struct sk_buff_head head; + struct sk_buff_head *pktchain = &sk->sk_write_queue; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - u32 ref = tsk->ref; + u32 portid = tsk->portid; int rc = -EINVAL; long timeo; u32 dnode; uint mtu, send, sent = 0; + struct iov_iter save; /* Handle implied connection establishment */ if (unlikely(dest)) { @@ -1059,15 +1103,15 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, dnode = tsk_peer_node(tsk); next: + save = m->msg_iter; mtu = tsk->max_pkt; send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); - __skb_queue_head_init(&head); - rc = tipc_msg_build(mhdr, m, sent, send, mtu, &head); + rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain); if (unlikely(rc < 0)) goto exit; do { if (likely(!tsk_conn_cong(tsk))) { - rc = tipc_link_xmit(&head, dnode, ref); + rc = tipc_link_xmit(net, pktchain, dnode, portid); if (likely(!rc)) { tsk->sent_unacked++; sent += send; @@ -1076,7 +1120,9 @@ next: goto next; } if (rc == -EMSGSIZE) { - tsk->max_pkt = tipc_node_get_mtu(dnode, ref); + tsk->max_pkt = tipc_node_get_mtu(net, dnode, + portid); + m->msg_iter = save; goto next; } if (rc != -ELINKCONG) @@ -1085,7 +1131,7 @@ next: } rc = tipc_wait_for_sndpkt(sock, &timeo); if (rc) - __skb_queue_purge(&head); + __skb_queue_purge(pktchain); } while (!rc); exit: if (iocb) @@ -1118,6 +1164,8 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, u32 peer_node) { + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); struct tipc_msg *msg = &tsk->phdr; msg_set_destnode(msg, peer_node); @@ -1126,12 +1174,12 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, msg_set_lookup_scope(msg, 0); msg_set_hdr_sz(msg, SHORT_H_SIZE); - tsk->probing_interval = CONN_PROBING_INTERVAL; + tsk->probing_intv = CONN_PROBING_INTERVAL; tsk->probing_state = TIPC_CONN_OK; tsk->connected = 1; - k_start_timer(&tsk->timer, tsk->probing_interval); - tipc_node_add_conn(peer_node, tsk->ref, peer_port); - tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->ref); + sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); + tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); + tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); } /** @@ -1230,6 +1278,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) { + struct net *net = sock_net(&tsk->sk); struct sk_buff *skb = NULL; struct tipc_msg *msg; u32 peer_port = tsk_peer_port(tsk); @@ -1237,13 +1286,14 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) if (!tsk->connected) return; - skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, - tipc_own_addr, peer_port, tsk->ref, TIPC_OK); + skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, + dnode, tsk_own_node(tsk), peer_port, + tsk->portid, TIPC_OK); if (!skb) return; msg = buf_msg(skb); msg_set_msgcnt(msg, ack); - tipc_link_xmit_skb(skb, dnode, msg_link_selector(msg)); + tipc_link_xmit_skb(net, skb, dnode, msg_link_selector(msg)); } static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) @@ -1529,15 +1579,16 @@ static void tipc_data_ready(struct sock *sk) /** * filter_connect - Handle all incoming messages for a connection-based socket * @tsk: TIPC socket - * @msg: message + * @skb: pointer to message buffer. Set to NULL if buffer is consumed * * Returns 0 (TIPC_OK) if everything ok, -TIPC_ERR_NO_PORT otherwise */ -static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) +static int filter_connect(struct tipc_sock *tsk, struct sk_buff **skb) { struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); struct socket *sock = sk->sk_socket; - struct tipc_msg *msg = buf_msg(*buf); + struct tipc_msg *msg = buf_msg(*skb); int retval = -TIPC_ERR_NO_PORT; if (msg_mcast(msg)) @@ -1551,8 +1602,8 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) sock->state = SS_DISCONNECTING; tsk->connected = 0; /* let timer expire on it's own */ - tipc_node_remove_conn(tsk_peer_node(tsk), - tsk->ref); + tipc_node_remove_conn(net, tsk_peer_node(tsk), + tsk->portid); } retval = TIPC_OK; } @@ -1587,8 +1638,8 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) * connect() routine if sleeping. */ if (msg_data_sz(msg) == 0) { - kfree_skb(*buf); - *buf = NULL; + kfree_skb(*skb); + *skb = NULL; if (waitqueue_active(sk_sleep(sk))) wake_up_interruptible(sk_sleep(sk)); } @@ -1640,32 +1691,33 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) /** * filter_rcv - validate incoming message * @sk: socket - * @buf: message + * @skb: pointer to message. Set to NULL if buffer is consumed. * * Enqueues message on receive queue if acceptable; optionally handles * disconnect indication for a connected socket. * - * Called with socket lock already taken; port lock may also be taken. + * Called with socket lock already taken * - * Returns 0 (TIPC_OK) if message was consumed, -TIPC error code if message - * to be rejected, 1 (TIPC_FWD_MSG) if (CONN_MANAGER) message to be forwarded + * Returns 0 (TIPC_OK) if message was ok, -TIPC error code if rejected */ -static int filter_rcv(struct sock *sk, struct sk_buff *buf) +static int filter_rcv(struct sock *sk, struct sk_buff **skb) { struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_msg *msg = buf_msg(buf); - unsigned int limit = rcvbuf_limit(sk, buf); - u32 onode; + struct tipc_msg *msg = buf_msg(*skb); + unsigned int limit = rcvbuf_limit(sk, *skb); int rc = TIPC_OK; - if (unlikely(msg_user(msg) == CONN_MANAGER)) - return tipc_sk_proto_rcv(tsk, &onode, buf); + if (unlikely(msg_user(msg) == CONN_MANAGER)) { + tipc_sk_proto_rcv(tsk, skb); + return TIPC_OK; + } if (unlikely(msg_user(msg) == SOCK_WAKEUP)) { - kfree_skb(buf); + kfree_skb(*skb); tsk->link_cong = 0; sk->sk_write_space(sk); + *skb = NULL; return TIPC_OK; } @@ -1677,21 +1729,22 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf) if (msg_connected(msg)) return -TIPC_ERR_NO_PORT; } else { - rc = filter_connect(tsk, &buf); - if (rc != TIPC_OK || buf == NULL) + rc = filter_connect(tsk, skb); + if (rc != TIPC_OK || !*skb) return rc; } /* Reject message if there isn't room to queue it */ - if (sk_rmem_alloc_get(sk) + buf->truesize >= limit) + if (sk_rmem_alloc_get(sk) + (*skb)->truesize >= limit) return -TIPC_ERR_OVERLOAD; /* Enqueue message */ - TIPC_SKB_CB(buf)->handle = NULL; - __skb_queue_tail(&sk->sk_receive_queue, buf); - skb_set_owner_r(buf, sk); + TIPC_SKB_CB(*skb)->handle = NULL; + __skb_queue_tail(&sk->sk_receive_queue, *skb); + skb_set_owner_r(*skb, sk); sk->sk_data_ready(sk); + *skb = NULL; return TIPC_OK; } @@ -1700,78 +1753,125 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf) * @sk: socket * @skb: message * - * Caller must hold socket lock, but not port lock. + * Caller must hold socket lock * * Returns 0 */ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) { - int rc; - u32 onode; + int err; + atomic_t *dcnt; + u32 dnode; struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); uint truesize = skb->truesize; - rc = filter_rcv(sk, skb); - - if (likely(!rc)) { - if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) - atomic_add(truesize, &tsk->dupl_rcvcnt); + err = filter_rcv(sk, &skb); + if (likely(!skb)) { + dcnt = &tsk->dupl_rcvcnt; + if (atomic_read(dcnt) < TIPC_CONN_OVERLOAD_LIMIT) + atomic_add(truesize, dcnt); return 0; } + if (!err || tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, -err)) + tipc_link_xmit_skb(net, skb, dnode, tsk->portid); + return 0; +} - if ((rc < 0) && !tipc_msg_reverse(skb, &onode, -rc)) - return 0; - - tipc_link_xmit_skb(skb, onode, 0); +/** + * tipc_sk_enqueue - extract all buffers with destination 'dport' from + * inputq and try adding them to socket or backlog queue + * @inputq: list of incoming buffers with potentially different destinations + * @sk: socket where the buffers should be enqueued + * @dport: port number for the socket + * @_skb: returned buffer to be forwarded or rejected, if applicable + * + * Caller must hold socket lock + * + * Returns TIPC_OK if all buffers enqueued, otherwise -TIPC_ERR_OVERLOAD + * or -TIPC_ERR_NO_PORT + */ +static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, + u32 dport, struct sk_buff **_skb) +{ + unsigned int lim; + atomic_t *dcnt; + int err; + struct sk_buff *skb; + unsigned long time_limit = jiffies + 2; - return 0; + while (skb_queue_len(inputq)) { + if (unlikely(time_after_eq(jiffies, time_limit))) + return TIPC_OK; + skb = tipc_skb_dequeue(inputq, dport); + if (unlikely(!skb)) + return TIPC_OK; + if (!sock_owned_by_user(sk)) { + err = filter_rcv(sk, &skb); + if (likely(!skb)) + continue; + *_skb = skb; + return err; + } + dcnt = &tipc_sk(sk)->dupl_rcvcnt; + if (sk->sk_backlog.len) + atomic_set(dcnt, 0); + lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); + if (likely(!sk_add_backlog(sk, skb, lim))) + continue; + *_skb = skb; + return -TIPC_ERR_OVERLOAD; + } + return TIPC_OK; } /** - * tipc_sk_rcv - handle incoming message - * @skb: buffer containing arriving message - * Consumes buffer - * Returns 0 if success, or errno: -EHOSTUNREACH + * tipc_sk_rcv - handle a chain of incoming buffers + * @inputq: buffer list containing the buffers + * Consumes all buffers in list until inputq is empty + * Note: may be called in multiple threads referring to the same queue + * Returns 0 if last buffer was accepted, otherwise -EHOSTUNREACH + * Only node local calls check the return value, sending single-buffer queues */ -int tipc_sk_rcv(struct sk_buff *skb) +int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) { + u32 dnode, dport = 0; + int err = -TIPC_ERR_NO_PORT; + struct sk_buff *skb; struct tipc_sock *tsk; + struct tipc_net *tn; struct sock *sk; - u32 dport = msg_destport(buf_msg(skb)); - int rc = TIPC_OK; - uint limit; - u32 dnode; - /* Validate destination and message */ - tsk = tipc_sk_get(dport); - if (unlikely(!tsk)) { - rc = tipc_msg_eval(skb, &dnode); - goto exit; + while (skb_queue_len(inputq)) { + skb = NULL; + dport = tipc_skb_peek_port(inputq, dport); + tsk = tipc_sk_lookup(net, dport); + if (likely(tsk)) { + sk = &tsk->sk; + if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { + err = tipc_sk_enqueue(inputq, sk, dport, &skb); + spin_unlock_bh(&sk->sk_lock.slock); + dport = 0; + } + sock_put(sk); + } else { + skb = tipc_skb_dequeue(inputq, dport); + } + if (likely(!skb)) + continue; + if (tipc_msg_lookup_dest(net, skb, &dnode, &err)) + goto xmit; + if (!err) { + dnode = msg_destnode(buf_msg(skb)); + goto xmit; + } + tn = net_generic(net, tipc_net_id); + if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err)) + continue; +xmit: + tipc_link_xmit_skb(net, skb, dnode, dport); } - sk = &tsk->sk; - - /* Queue message */ - spin_lock_bh(&sk->sk_lock.slock); - - if (!sock_owned_by_user(sk)) { - rc = filter_rcv(sk, skb); - } else { - if (sk->sk_backlog.len == 0) - atomic_set(&tsk->dupl_rcvcnt, 0); - limit = rcvbuf_limit(sk, skb) + atomic_read(&tsk->dupl_rcvcnt); - if (sk_add_backlog(sk, skb, limit)) - rc = -TIPC_ERR_OVERLOAD; - } - spin_unlock_bh(&sk->sk_lock.slock); - tipc_sk_put(tsk); - if (likely(!rc)) - return 0; -exit: - if ((rc < 0) && !tipc_msg_reverse(skb, &dnode, -rc)) - return -EHOSTUNREACH; - - tipc_link_xmit_skb(skb, dnode, 0); - return (rc < 0) ? -EHOSTUNREACH : 0; + return err ? -EHOSTUNREACH : 0; } static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) @@ -2027,6 +2127,7 @@ exit: static int tipc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; + struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); struct sk_buff *skb; u32 dnode; @@ -2049,21 +2150,24 @@ restart: kfree_skb(skb); goto restart; } - if (tipc_msg_reverse(skb, &dnode, TIPC_CONN_SHUTDOWN)) - tipc_link_xmit_skb(skb, dnode, tsk->ref); - tipc_node_remove_conn(dnode, tsk->ref); + if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, + TIPC_CONN_SHUTDOWN)) + tipc_link_xmit_skb(net, skb, dnode, + tsk->portid); + tipc_node_remove_conn(net, dnode, tsk->portid); } else { dnode = tsk_peer_node(tsk); + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, - 0, dnode, tipc_own_addr, + 0, dnode, tsk_own_node(tsk), tsk_peer_port(tsk), - tsk->ref, TIPC_CONN_SHUTDOWN); - tipc_link_xmit_skb(skb, dnode, tsk->ref); + tsk->portid, TIPC_CONN_SHUTDOWN); + tipc_link_xmit_skb(net, skb, dnode, tsk->portid); } tsk->connected = 0; sock->state = SS_DISCONNECTING; - tipc_node_remove_conn(dnode, tsk->ref); + tipc_node_remove_conn(net, dnode, tsk->portid); /* fall through */ case SS_DISCONNECTING: @@ -2084,18 +2188,14 @@ restart: return res; } -static void tipc_sk_timeout(unsigned long ref) +static void tipc_sk_timeout(unsigned long data) { - struct tipc_sock *tsk; - struct sock *sk; + struct tipc_sock *tsk = (struct tipc_sock *)data; + struct sock *sk = &tsk->sk; struct sk_buff *skb = NULL; u32 peer_port, peer_node; + u32 own_node = tsk_own_node(tsk); - tsk = tipc_sk_get(ref); - if (!tsk) - return; - - sk = &tsk->sk; bh_lock_sock(sk); if (!tsk->connected) { bh_unlock_sock(sk); @@ -2106,38 +2206,39 @@ static void tipc_sk_timeout(unsigned long ref) if (tsk->probing_state == TIPC_CONN_PROBING) { /* Previous probe not answered -> self abort */ - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, - SHORT_H_SIZE, 0, tipc_own_addr, - peer_node, ref, peer_port, - TIPC_ERR_NO_PORT); + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, 0, + own_node, peer_node, tsk->portid, + peer_port, TIPC_ERR_NO_PORT); } else { - skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, - 0, peer_node, tipc_own_addr, - peer_port, ref, TIPC_OK); + skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, + INT_H_SIZE, 0, peer_node, own_node, + peer_port, tsk->portid, TIPC_OK); tsk->probing_state = TIPC_CONN_PROBING; - k_start_timer(&tsk->timer, tsk->probing_interval); + sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); } bh_unlock_sock(sk); if (skb) - tipc_link_xmit_skb(skb, peer_node, ref); + tipc_link_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); exit: - tipc_sk_put(tsk); + sock_put(sk); } static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq) { + struct net *net = sock_net(&tsk->sk); struct publication *publ; u32 key; if (tsk->connected) return -EINVAL; - key = tsk->ref + tsk->pub_count + 1; - if (key == tsk->ref) + key = tsk->portid + tsk->pub_count + 1; + if (key == tsk->portid) return -EADDRINUSE; - publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, - scope, tsk->ref, key); + publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper, + scope, tsk->portid, key); if (unlikely(!publ)) return -EINVAL; @@ -2150,6 +2251,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq) { + struct net *net = sock_net(&tsk->sk); struct publication *publ; struct publication *safe; int rc = -EINVAL; @@ -2164,12 +2266,12 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, continue; if (publ->upper != seq->upper) break; - tipc_nametbl_withdraw(publ->type, publ->lower, + tipc_nametbl_withdraw(net, publ->type, publ->lower, publ->ref, publ->key); rc = 0; break; } - tipc_nametbl_withdraw(publ->type, publ->lower, + tipc_nametbl_withdraw(net, publ->type, publ->lower, publ->ref, publ->key); rc = 0; } @@ -2178,336 +2280,105 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, return rc; } -static int tipc_sk_show(struct tipc_sock *tsk, char *buf, - int len, int full_id) -{ - struct publication *publ; - int ret; - - if (full_id) - ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:", - tipc_zone(tipc_own_addr), - tipc_cluster(tipc_own_addr), - tipc_node(tipc_own_addr), tsk->ref); - else - ret = tipc_snprintf(buf, len, "%-10u:", tsk->ref); - - if (tsk->connected) { - u32 dport = tsk_peer_port(tsk); - u32 destnode = tsk_peer_node(tsk); - - ret += tipc_snprintf(buf + ret, len - ret, - " connected to <%u.%u.%u:%u>", - tipc_zone(destnode), - tipc_cluster(destnode), - tipc_node(destnode), dport); - if (tsk->conn_type != 0) - ret += tipc_snprintf(buf + ret, len - ret, - " via {%u,%u}", tsk->conn_type, - tsk->conn_instance); - } else if (tsk->published) { - ret += tipc_snprintf(buf + ret, len - ret, " bound to"); - list_for_each_entry(publ, &tsk->publications, pport_list) { - if (publ->lower == publ->upper) - ret += tipc_snprintf(buf + ret, len - ret, - " {%u,%u}", publ->type, - publ->lower); - else - ret += tipc_snprintf(buf + ret, len - ret, - " {%u,%u,%u}", publ->type, - publ->lower, publ->upper); - } - } - ret += tipc_snprintf(buf + ret, len - ret, "\n"); - return ret; -} - -struct sk_buff *tipc_sk_socks_show(void) -{ - struct sk_buff *buf; - struct tlv_desc *rep_tlv; - char *pb; - int pb_len; - struct tipc_sock *tsk; - int str_len = 0; - u32 ref = 0; - - buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); - if (!buf) - return NULL; - rep_tlv = (struct tlv_desc *)buf->data; - pb = TLV_DATA(rep_tlv); - pb_len = ULTRA_STRING_MAX_LEN; - - tsk = tipc_sk_get_next(&ref); - for (; tsk; tsk = tipc_sk_get_next(&ref)) { - lock_sock(&tsk->sk); - str_len += tipc_sk_show(tsk, pb + str_len, - pb_len - str_len, 0); - release_sock(&tsk->sk); - tipc_sk_put(tsk); - } - str_len += 1; /* for "\0" */ - skb_put(buf, TLV_SPACE(str_len)); - TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); - - return buf; -} - /* tipc_sk_reinit: set non-zero address in all existing sockets * when we go from standalone to network mode. */ -void tipc_sk_reinit(void) +void tipc_sk_reinit(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + const struct bucket_table *tbl; + struct rhash_head *pos; + struct tipc_sock *tsk; struct tipc_msg *msg; - u32 ref = 0; - struct tipc_sock *tsk = tipc_sk_get_next(&ref); + int i; - for (; tsk; tsk = tipc_sk_get_next(&ref)) { - lock_sock(&tsk->sk); - msg = &tsk->phdr; - msg_set_prevnode(msg, tipc_own_addr); - msg_set_orignode(msg, tipc_own_addr); - release_sock(&tsk->sk); - tipc_sk_put(tsk); + rcu_read_lock(); + tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); + for (i = 0; i < tbl->size; i++) { + rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { + spin_lock_bh(&tsk->sk.sk_lock.slock); + msg = &tsk->phdr; + msg_set_prevnode(msg, tn->own_addr); + msg_set_orignode(msg, tn->own_addr); + spin_unlock_bh(&tsk->sk.sk_lock.slock); + } } + rcu_read_unlock(); } -/** - * struct reference - TIPC socket reference entry - * @tsk: pointer to socket associated with reference entry - * @ref: reference value for socket (combines instance & array index info) - */ -struct reference { - struct tipc_sock *tsk; - u32 ref; -}; - -/** - * struct tipc_ref_table - table of TIPC socket reference entries - * @entries: pointer to array of reference entries - * @capacity: array index of first unusable entry - * @init_point: array index of first uninitialized entry - * @first_free: array index of first unused socket reference entry - * @last_free: array index of last unused socket reference entry - * @index_mask: bitmask for array index portion of reference values - * @start_mask: initial value for instance value portion of reference values - */ -struct ref_table { - struct reference *entries; - u32 capacity; - u32 init_point; - u32 first_free; - u32 last_free; - u32 index_mask; - u32 start_mask; -}; - -/* Socket reference table consists of 2**N entries. - * - * State Socket ptr Reference - * ----- ---------- --------- - * In use non-NULL XXXX|own index - * (XXXX changes each time entry is acquired) - * Free NULL YYYY|next free index - * (YYYY is one more than last used XXXX) - * Uninitialized NULL 0 - * - * Entry 0 is not used; this allows index 0 to denote the end of the free list. - * - * Note that a reference value of 0 does not necessarily indicate that an - * entry is uninitialized, since the last entry in the free list could also - * have a reference value of 0 (although this is unlikely). - */ - -static struct ref_table tipc_ref_table; - -static DEFINE_RWLOCK(ref_table_lock); - -/** - * tipc_ref_table_init - create reference table for sockets - */ -int tipc_sk_ref_table_init(u32 req_sz, u32 start) +static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) { - struct reference *table; - u32 actual_sz; - - /* account for unused entry, then round up size to a power of 2 */ - - req_sz++; - for (actual_sz = 16; actual_sz < req_sz; actual_sz <<= 1) { - /* do nothing */ - }; - - /* allocate table & mark all entries as uninitialized */ - table = vzalloc(actual_sz * sizeof(struct reference)); - if (table == NULL) - return -ENOMEM; - - tipc_ref_table.entries = table; - tipc_ref_table.capacity = req_sz; - tipc_ref_table.init_point = 1; - tipc_ref_table.first_free = 0; - tipc_ref_table.last_free = 0; - tipc_ref_table.index_mask = actual_sz - 1; - tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_sock *tsk; - return 0; -} + rcu_read_lock(); + tsk = rhashtable_lookup(&tn->sk_rht, &portid); + if (tsk) + sock_hold(&tsk->sk); + rcu_read_unlock(); -/** - * tipc_ref_table_stop - destroy reference table for sockets - */ -void tipc_sk_ref_table_stop(void) -{ - if (!tipc_ref_table.entries) - return; - vfree(tipc_ref_table.entries); - tipc_ref_table.entries = NULL; + return tsk; } -/* tipc_ref_acquire - create reference to a socket - * - * Register an socket pointer in the reference table. - * Returns a unique reference value that is used from then on to retrieve the - * socket pointer, or to determine if the socket has been deregistered. - */ -u32 tipc_sk_ref_acquire(struct tipc_sock *tsk) +static int tipc_sk_insert(struct tipc_sock *tsk) { - u32 index; - u32 index_mask; - u32 next_plus_upper; - u32 ref = 0; - struct reference *entry; - - if (unlikely(!tsk)) { - pr_err("Attempt to acquire ref. to non-existent obj\n"); - return 0; - } - if (unlikely(!tipc_ref_table.entries)) { - pr_err("Ref. table not found in acquisition attempt\n"); - return 0; - } - - /* Take a free entry, if available; otherwise initialize a new one */ - write_lock_bh(&ref_table_lock); - index = tipc_ref_table.first_free; - entry = &tipc_ref_table.entries[index]; - - if (likely(index)) { - index = tipc_ref_table.first_free; - entry = &tipc_ref_table.entries[index]; - index_mask = tipc_ref_table.index_mask; - next_plus_upper = entry->ref; - tipc_ref_table.first_free = next_plus_upper & index_mask; - ref = (next_plus_upper & ~index_mask) + index; - entry->tsk = tsk; - } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) { - index = tipc_ref_table.init_point++; - entry = &tipc_ref_table.entries[index]; - ref = tipc_ref_table.start_mask + index; + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1; + u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT; + + while (remaining--) { + portid++; + if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT)) + portid = TIPC_MIN_PORT; + tsk->portid = portid; + sock_hold(&tsk->sk); + if (rhashtable_lookup_insert(&tn->sk_rht, &tsk->node)) + return 0; + sock_put(&tsk->sk); } - if (ref) { - entry->ref = ref; - entry->tsk = tsk; - } - write_unlock_bh(&ref_table_lock); - return ref; + return -1; } -/* tipc_sk_ref_discard - invalidate reference to an socket - * - * Disallow future references to an socket and free up the entry for re-use. - */ -void tipc_sk_ref_discard(u32 ref) +static void tipc_sk_remove(struct tipc_sock *tsk) { - struct reference *entry; - u32 index; - u32 index_mask; - - if (unlikely(!tipc_ref_table.entries)) { - pr_err("Ref. table not found during discard attempt\n"); - return; - } - - index_mask = tipc_ref_table.index_mask; - index = ref & index_mask; - entry = &tipc_ref_table.entries[index]; - - write_lock_bh(&ref_table_lock); + struct sock *sk = &tsk->sk; + struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); - if (unlikely(!entry->tsk)) { - pr_err("Attempt to discard ref. to non-existent socket\n"); - goto exit; + if (rhashtable_remove(&tn->sk_rht, &tsk->node)) { + WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + __sock_put(sk); } - if (unlikely(entry->ref != ref)) { - pr_err("Attempt to discard non-existent reference\n"); - goto exit; - } - - /* Mark entry as unused; increment instance part of entry's - * reference to invalidate any subsequent references - */ - - entry->tsk = NULL; - entry->ref = (ref & ~index_mask) + (index_mask + 1); - - /* Append entry to free entry list */ - if (unlikely(tipc_ref_table.first_free == 0)) - tipc_ref_table.first_free = index; - else - tipc_ref_table.entries[tipc_ref_table.last_free].ref |= index; - tipc_ref_table.last_free = index; -exit: - write_unlock_bh(&ref_table_lock); } -/* tipc_sk_get - find referenced socket and return pointer to it - */ -struct tipc_sock *tipc_sk_get(u32 ref) +int tipc_sk_rht_init(struct net *net) { - struct reference *entry; - struct tipc_sock *tsk; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct rhashtable_params rht_params = { + .nelem_hint = 192, + .head_offset = offsetof(struct tipc_sock, node), + .key_offset = offsetof(struct tipc_sock, portid), + .key_len = sizeof(u32), /* portid */ + .hashfn = jhash, + .max_shift = 20, /* 1M */ + .min_shift = 8, /* 256 */ + .grow_decision = rht_grow_above_75, + .shrink_decision = rht_shrink_below_30, + }; - if (unlikely(!tipc_ref_table.entries)) - return NULL; - read_lock_bh(&ref_table_lock); - entry = &tipc_ref_table.entries[ref & tipc_ref_table.index_mask]; - tsk = entry->tsk; - if (likely(tsk && (entry->ref == ref))) - sock_hold(&tsk->sk); - else - tsk = NULL; - read_unlock_bh(&ref_table_lock); - return tsk; + return rhashtable_init(&tn->sk_rht, &rht_params); } -/* tipc_sk_get_next - lock & return next socket after referenced one -*/ -struct tipc_sock *tipc_sk_get_next(u32 *ref) +void tipc_sk_rht_destroy(struct net *net) { - struct reference *entry; - struct tipc_sock *tsk = NULL; - uint index = *ref & tipc_ref_table.index_mask; + struct tipc_net *tn = net_generic(net, tipc_net_id); - read_lock_bh(&ref_table_lock); - while (++index < tipc_ref_table.capacity) { - entry = &tipc_ref_table.entries[index]; - if (!entry->tsk) - continue; - tsk = entry->tsk; - sock_hold(&tsk->sk); - *ref = entry->ref; - break; - } - read_unlock_bh(&ref_table_lock); - return tsk; -} + /* Wait for socket readers to complete */ + synchronize_net(); -static void tipc_sk_put(struct tipc_sock *tsk) -{ - sock_put(&tsk->sk); + rhashtable_destroy(&tn->sk_rht); } /** @@ -2639,8 +2510,9 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, return put_user(sizeof(value), ol); } -static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) +static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { + struct sock *sk = sock->sk; struct tipc_sioc_ln_req lnr; void __user *argp = (void __user *)arg; @@ -2648,7 +2520,8 @@ static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) case SIOCGETLINKNAME: if (copy_from_user(&lnr, argp, sizeof(lnr))) return -EFAULT; - if (!tipc_node_get_linkname(lnr.bearer_id & 0xffff, lnr.peer, + if (!tipc_node_get_linkname(sock_net(sk), + lnr.bearer_id & 0xffff, lnr.peer, lnr.linkname, TIPC_MAX_LINK_NAME)) { if (copy_to_user(argp, &lnr, sizeof(lnr))) return -EFAULT; @@ -2820,18 +2693,20 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, int err; void *hdr; struct nlattr *attrs; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); + &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); if (!hdr) goto msg_cancel; attrs = nla_nest_start(skb, TIPC_NLA_SOCK); if (!attrs) goto genlmsg_cancel; - if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->ref)) + if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr)) + if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr)) goto attr_msg_cancel; if (tsk->connected) { @@ -2859,22 +2734,37 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; struct tipc_sock *tsk; - u32 prev_ref = cb->args[0]; - u32 ref = prev_ref; - - tsk = tipc_sk_get_next(&ref); - for (; tsk; tsk = tipc_sk_get_next(&ref)) { - lock_sock(&tsk->sk); - err = __tipc_nl_add_sk(skb, cb, tsk); - release_sock(&tsk->sk); - tipc_sk_put(tsk); - if (err) - break; + const struct bucket_table *tbl; + struct rhash_head *pos; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 tbl_id = cb->args[0]; + u32 prev_portid = cb->args[1]; - prev_ref = ref; - } + rcu_read_lock(); + tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); + for (; tbl_id < tbl->size; tbl_id++) { + rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) { + spin_lock_bh(&tsk->sk.sk_lock.slock); + if (prev_portid && prev_portid != tsk->portid) { + spin_unlock_bh(&tsk->sk.sk_lock.slock); + continue; + } - cb->args[0] = prev_ref; + err = __tipc_nl_add_sk(skb, cb, tsk); + if (err) { + prev_portid = tsk->portid; + spin_unlock_bh(&tsk->sk.sk_lock.slock); + goto out; + } + prev_portid = 0; + spin_unlock_bh(&tsk->sk.sk_lock.slock); + } + } +out: + rcu_read_unlock(); + cb->args[0] = tbl_id; + cb->args[1] = prev_portid; return skb->len; } @@ -2888,7 +2778,7 @@ static int __tipc_nl_add_sk_publ(struct sk_buff *skb, struct nlattr *attrs; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); + &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); if (!hdr) goto msg_cancel; @@ -2962,12 +2852,13 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; - u32 tsk_ref = cb->args[0]; + u32 tsk_portid = cb->args[0]; u32 last_publ = cb->args[1]; u32 done = cb->args[2]; + struct net *net = sock_net(skb->sk); struct tipc_sock *tsk; - if (!tsk_ref) { + if (!tsk_portid) { struct nlattr **attrs; struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; @@ -2984,13 +2875,13 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!sock[TIPC_NLA_SOCK_REF]) return -EINVAL; - tsk_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); + tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); } if (done) return 0; - tsk = tipc_sk_get(tsk_ref); + tsk = tipc_sk_lookup(net, tsk_portid); if (!tsk) return -EINVAL; @@ -2999,9 +2890,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!err) done = 1; release_sock(&tsk->sk); - tipc_sk_put(tsk); + sock_put(&tsk->sk); - cb->args[0] = tsk_ref; + cb->args[0] = tsk_portid; cb->args[1] = last_publ; cb->args[2] = done; diff --git a/net/tipc/socket.h b/net/tipc/socket.h index d34089387006..238f1b7bd9bd 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -1,6 +1,6 @@ /* net/tipc/socket.h: Include file for TIPC socket code * - * Copyright (c) 2014, Ericsson AB + * Copyright (c) 2014-2015, Ericsson AB * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,12 +42,18 @@ #define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2) #define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \ SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) -int tipc_sk_rcv(struct sk_buff *buf); -struct sk_buff *tipc_sk_socks_show(void); -void tipc_sk_mcast_rcv(struct sk_buff *buf); -void tipc_sk_reinit(void); -int tipc_sk_ref_table_init(u32 requested_size, u32 start); -void tipc_sk_ref_table_stop(void); +int tipc_socket_init(void); +void tipc_socket_stop(void); +int tipc_sock_create_local(struct net *net, int type, struct socket **res); +void tipc_sock_release_local(struct socket *sock); +int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, + int flags); +int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq); +void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, + struct sk_buff_head *inputq); +void tipc_sk_reinit(struct net *net); +int tipc_sk_rht_init(struct net *net); +void tipc_sk_rht_destroy(struct net *net); int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0344206b984f..72c339e432aa 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -50,33 +50,6 @@ struct tipc_subscriber { struct list_head subscription_list; }; -static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, - void *usr_data, void *buf, size_t len); -static void *subscr_named_msg_event(int conid); -static void subscr_conn_shutdown_event(int conid, void *usr_data); - -static atomic_t subscription_count = ATOMIC_INIT(0); - -static struct sockaddr_tipc topsrv_addr __read_mostly = { - .family = AF_TIPC, - .addrtype = TIPC_ADDR_NAMESEQ, - .addr.nameseq.type = TIPC_TOP_SRV, - .addr.nameseq.lower = TIPC_TOP_SRV, - .addr.nameseq.upper = TIPC_TOP_SRV, - .scope = TIPC_NODE_SCOPE -}; - -static struct tipc_server topsrv __read_mostly = { - .saddr = &topsrv_addr, - .imp = TIPC_CRITICAL_IMPORTANCE, - .type = SOCK_SEQPACKET, - .max_rcvbuf_size = sizeof(struct tipc_subscr), - .name = "topology_server", - .tipc_conn_recvmsg = subscr_conn_msg_event, - .tipc_conn_new = subscr_named_msg_event, - .tipc_conn_shutdown = subscr_conn_shutdown_event, -}; - /** * htohl - convert value to endianness used by destination * @in: value to convert @@ -93,6 +66,7 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, u32 port_ref, u32 node) { + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); struct tipc_subscriber *subscriber = sub->subscriber; struct kvec msg_sect; @@ -103,8 +77,8 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, sub->evt.found_upper = htohl(found_upper, sub->swap); sub->evt.port.ref = htohl(port_ref, sub->swap); sub->evt.port.node = htohl(node, sub->swap); - tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, msg_sect.iov_base, - msg_sect.iov_len); + tipc_conn_sendmsg(tn->topsrv, subscriber->conid, NULL, + msg_sect.iov_base, msg_sect.iov_len); } /** @@ -141,9 +115,11 @@ void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); } -static void subscr_timeout(struct tipc_subscription *sub) +static void subscr_timeout(unsigned long data) { + struct tipc_subscription *sub = (struct tipc_subscription *)data; struct tipc_subscriber *subscriber = sub->subscriber; + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); /* The spin lock per subscriber is used to protect its members */ spin_lock_bh(&subscriber->lock); @@ -167,9 +143,8 @@ static void subscr_timeout(struct tipc_subscription *sub) TIPC_SUBSCR_TIMEOUT, 0, 0); /* Now destroy subscription */ - k_term_timer(&sub->timer); kfree(sub); - atomic_dec(&subscription_count); + atomic_dec(&tn->subscription_count); } /** @@ -179,10 +154,12 @@ static void subscr_timeout(struct tipc_subscription *sub) */ static void subscr_del(struct tipc_subscription *sub) { + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + tipc_nametbl_unsubscribe(sub); list_del(&sub->subscription_list); kfree(sub); - atomic_dec(&subscription_count); + atomic_dec(&tn->subscription_count); } /** @@ -190,9 +167,12 @@ static void subscr_del(struct tipc_subscription *sub) * * Note: Must call it in process context since it might sleep. */ -static void subscr_terminate(struct tipc_subscriber *subscriber) +static void subscr_terminate(struct tipc_subscription *sub) { - tipc_conn_terminate(&topsrv, subscriber->conid); + struct tipc_subscriber *subscriber = sub->subscriber; + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + + tipc_conn_terminate(tn->topsrv, subscriber->conid); } static void subscr_release(struct tipc_subscriber *subscriber) @@ -207,8 +187,7 @@ static void subscr_release(struct tipc_subscriber *subscriber) subscription_list) { if (sub->timeout != TIPC_WAIT_FOREVER) { spin_unlock_bh(&subscriber->lock); - k_cancel_timer(&sub->timer); - k_term_timer(&sub->timer); + del_timer_sync(&sub->timer); spin_lock_bh(&subscriber->lock); } subscr_del(sub); @@ -250,8 +229,7 @@ static void subscr_cancel(struct tipc_subscr *s, if (sub->timeout != TIPC_WAIT_FOREVER) { sub->timeout = TIPC_WAIT_FOREVER; spin_unlock_bh(&subscriber->lock); - k_cancel_timer(&sub->timer); - k_term_timer(&sub->timer); + del_timer_sync(&sub->timer); spin_lock_bh(&subscriber->lock); } subscr_del(sub); @@ -262,9 +240,11 @@ static void subscr_cancel(struct tipc_subscr *s, * * Called with subscriber lock held. */ -static int subscr_subscribe(struct tipc_subscr *s, +static int subscr_subscribe(struct net *net, struct tipc_subscr *s, struct tipc_subscriber *subscriber, - struct tipc_subscription **sub_p) { + struct tipc_subscription **sub_p) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_subscription *sub; int swap; @@ -279,7 +259,7 @@ static int subscr_subscribe(struct tipc_subscr *s, } /* Refuse subscription if global limit exceeded */ - if (atomic_read(&subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { + if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { pr_warn("Subscription rejected, limit reached (%u)\n", TIPC_MAX_SUBSCRIPTIONS); return -EINVAL; @@ -293,10 +273,11 @@ static int subscr_subscribe(struct tipc_subscr *s, } /* Initialize subscription object */ + sub->net = net; sub->seq.type = htohl(s->seq.type, swap); sub->seq.lower = htohl(s->seq.lower, swap); sub->seq.upper = htohl(s->seq.upper, swap); - sub->timeout = htohl(s->timeout, swap); + sub->timeout = msecs_to_jiffies(htohl(s->timeout, swap)); sub->filter = htohl(s->filter, swap); if ((!(sub->filter & TIPC_SUB_PORTS) == !(sub->filter & TIPC_SUB_SERVICE)) || @@ -309,11 +290,10 @@ static int subscr_subscribe(struct tipc_subscr *s, sub->subscriber = subscriber; sub->swap = swap; memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); - atomic_inc(&subscription_count); + atomic_inc(&tn->subscription_count); if (sub->timeout != TIPC_WAIT_FOREVER) { - k_init_timer(&sub->timer, - (Handler)subscr_timeout, (unsigned long)sub); - k_start_timer(&sub->timer, sub->timeout); + setup_timer(&sub->timer, subscr_timeout, (unsigned long)sub); + mod_timer(&sub->timer, jiffies + sub->timeout); } *sub_p = sub; return 0; @@ -326,16 +306,18 @@ static void subscr_conn_shutdown_event(int conid, void *usr_data) } /* Handle one request to create a new subscription for the subscriber */ -static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, - void *usr_data, void *buf, size_t len) +static void subscr_conn_msg_event(struct net *net, int conid, + struct sockaddr_tipc *addr, void *usr_data, + void *buf, size_t len) { struct tipc_subscriber *subscriber = usr_data; struct tipc_subscription *sub = NULL; spin_lock_bh(&subscriber->lock); - if (subscr_subscribe((struct tipc_subscr *)buf, subscriber, &sub) < 0) { + if (subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, + &sub) < 0) { spin_unlock_bh(&subscriber->lock); - subscr_terminate(subscriber); + subscr_terminate(sub); return; } if (sub) @@ -343,7 +325,6 @@ static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, spin_unlock_bh(&subscriber->lock); } - /* Handle one request to establish a new subscriber */ static void *subscr_named_msg_event(int conid) { @@ -362,12 +343,50 @@ static void *subscr_named_msg_event(int conid) return (void *)subscriber; } -int tipc_subscr_start(void) +int tipc_subscr_start(struct net *net) { - return tipc_server_start(&topsrv); + struct tipc_net *tn = net_generic(net, tipc_net_id); + const char name[] = "topology_server"; + struct tipc_server *topsrv; + struct sockaddr_tipc *saddr; + + saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC); + if (!saddr) + return -ENOMEM; + saddr->family = AF_TIPC; + saddr->addrtype = TIPC_ADDR_NAMESEQ; + saddr->addr.nameseq.type = TIPC_TOP_SRV; + saddr->addr.nameseq.lower = TIPC_TOP_SRV; + saddr->addr.nameseq.upper = TIPC_TOP_SRV; + saddr->scope = TIPC_NODE_SCOPE; + + topsrv = kzalloc(sizeof(*topsrv), GFP_ATOMIC); + if (!topsrv) { + kfree(saddr); + return -ENOMEM; + } + topsrv->net = net; + topsrv->saddr = saddr; + topsrv->imp = TIPC_CRITICAL_IMPORTANCE; + topsrv->type = SOCK_SEQPACKET; + topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr); + topsrv->tipc_conn_recvmsg = subscr_conn_msg_event; + topsrv->tipc_conn_new = subscr_named_msg_event; + topsrv->tipc_conn_shutdown = subscr_conn_shutdown_event; + + strncpy(topsrv->name, name, strlen(name) + 1); + tn->topsrv = topsrv; + atomic_set(&tn->subscription_count, 0); + + return tipc_server_start(topsrv); } -void tipc_subscr_stop(void) +void tipc_subscr_stop(struct net *net) { - tipc_server_stop(&topsrv); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_server *topsrv = tn->topsrv; + + tipc_server_stop(topsrv); + kfree(topsrv->saddr); + kfree(topsrv); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 393e417bee3f..33488bd9fe3c 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -39,6 +39,9 @@ #include "server.h" +#define TIPC_MAX_SUBSCRIPTIONS 65535 +#define TIPC_MAX_PUBLICATIONS 65535 + struct tipc_subscription; struct tipc_subscriber; @@ -46,6 +49,7 @@ struct tipc_subscriber; * struct tipc_subscription - TIPC network topology subscription object * @subscriber: pointer to its subscriber * @seq: name sequence associated with subscription + * @net: point to network namespace * @timeout: duration of subscription (in ms) * @filter: event filtering to be done for subscription * @timer: timer governing subscription duration (optional) @@ -58,7 +62,8 @@ struct tipc_subscriber; struct tipc_subscription { struct tipc_subscriber *subscriber; struct tipc_name_seq seq; - u32 timeout; + struct net *net; + unsigned long timeout; u32 filter; struct timer_list timer; struct list_head nameseq_list; @@ -69,13 +74,10 @@ struct tipc_subscription { int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper); - void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, u32 port_ref, u32 node, int must); - -int tipc_subscr_start(void); - -void tipc_subscr_stop(void); +int tipc_subscr_start(struct net *net); +void tipc_subscr_stop(struct net *net); #endif diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8e1b10274b02..526b6edab018 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1445,7 +1445,6 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); @@ -1456,14 +1455,12 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, unsigned int hash; struct sk_buff *skb; long timeo; - struct scm_cookie tmp_scm; + struct scm_cookie scm; int max_level; int data_len = 0; - if (NULL == siocb->scm) - siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm, false); + err = scm_send(sock, msg, &scm, false); if (err < 0) return err; @@ -1507,11 +1504,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; - err = unix_scm_to_skb(siocb->scm, skb, true); + err = unix_scm_to_skb(&scm, skb, true); if (err < 0) goto out_free; max_level = err + 1; - unix_get_secdata(siocb->scm, skb); + unix_get_secdata(&scm, skb); skb_put(skb, len - data_len); skb->data_len = data_len; @@ -1606,7 +1603,7 @@ restart: unix_state_unlock(other); other->sk_data_ready(other); sock_put(other); - scm_destroy(siocb->scm); + scm_destroy(&scm); return len; out_unlock: @@ -1616,7 +1613,7 @@ out_free: out: if (other) sock_put(other); - scm_destroy(siocb->scm); + scm_destroy(&scm); return err; } @@ -1628,21 +1625,18 @@ out: static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct sock *other = NULL; int err, size; struct sk_buff *skb; int sent = 0; - struct scm_cookie tmp_scm; + struct scm_cookie scm; bool fds_sent = false; int max_level; int data_len; - if (NULL == siocb->scm) - siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm, false); + err = scm_send(sock, msg, &scm, false); if (err < 0) return err; @@ -1683,7 +1677,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out_err; /* Only send the fds in the first buffer */ - err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); + err = unix_scm_to_skb(&scm, skb, !fds_sent); if (err < 0) { kfree_skb(skb); goto out_err; @@ -1715,8 +1709,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, sent += size; } - scm_destroy(siocb->scm); - siocb->scm = NULL; + scm_destroy(&scm); return sent; @@ -1728,8 +1721,7 @@ pipe_err: send_sig(SIGPIPE, current, 0); err = -EPIPE; out_err: - scm_destroy(siocb->scm); - siocb->scm = NULL; + scm_destroy(&scm); return sent ? : err; } @@ -1778,8 +1770,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { - struct sock_iocb *siocb = kiocb_to_siocb(iocb); - struct scm_cookie tmp_scm; + struct scm_cookie scm; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); int noblock = flags & MSG_DONTWAIT; @@ -1831,16 +1822,14 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (sock_flag(sk, SOCK_RCVTSTAMP)) __sock_recv_timestamp(msg, sk, skb); - if (!siocb->scm) { - siocb->scm = &tmp_scm; - memset(&tmp_scm, 0, sizeof(tmp_scm)); - } - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); - unix_set_secdata(siocb->scm, skb); + memset(&scm, 0, sizeof(scm)); + + scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); + unix_set_secdata(&scm, skb); if (!(flags & MSG_PEEK)) { if (UNIXCB(skb).fp) - unix_detach_fds(siocb->scm, skb); + unix_detach_fds(&scm, skb); sk_peek_offset_bwd(sk, skb->len); } else { @@ -1860,11 +1849,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, sk_peek_offset_fwd(sk, size); if (UNIXCB(skb).fp) - siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); + scm.fp = scm_fp_dup(UNIXCB(skb).fp); } err = (flags & MSG_TRUNC) ? skb->len - skip : size; - scm_recv(sock, msg, siocb->scm, flags); + scm_recv(sock, msg, &scm, flags); out_free: skb_free_datagram(sk, skb); @@ -1915,8 +1904,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { - struct sock_iocb *siocb = kiocb_to_siocb(iocb); - struct scm_cookie tmp_scm; + struct scm_cookie scm; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); @@ -1943,10 +1931,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, * while sleeps in memcpy_tomsg */ - if (!siocb->scm) { - siocb->scm = &tmp_scm; - memset(&tmp_scm, 0, sizeof(tmp_scm)); - } + memset(&scm, 0, sizeof(scm)); err = mutex_lock_interruptible(&u->readlock); if (unlikely(err)) { @@ -2012,13 +1997,13 @@ again: if (check_creds) { /* Never glue messages from different writers */ - if ((UNIXCB(skb).pid != siocb->scm->pid) || - !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) || - !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid)) + if ((UNIXCB(skb).pid != scm.pid) || + !uid_eq(UNIXCB(skb).uid, scm.creds.uid) || + !gid_eq(UNIXCB(skb).gid, scm.creds.gid)) break; } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); + scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); check_creds = 1; } @@ -2045,7 +2030,7 @@ again: sk_peek_offset_bwd(sk, chunk); if (UNIXCB(skb).fp) - unix_detach_fds(siocb->scm, skb); + unix_detach_fds(&scm, skb); if (unix_skb_len(skb)) break; @@ -2053,13 +2038,13 @@ again: skb_unlink(skb, &sk->sk_receive_queue); consume_skb(skb); - if (siocb->scm->fp) + if (scm.fp) break; } else { /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) - siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); + scm.fp = scm_fp_dup(UNIXCB(skb).fp); sk_peek_offset_fwd(sk, chunk); @@ -2068,7 +2053,7 @@ again: } while (size); mutex_unlock(&u->readlock); - scm_recv(sock, msg, siocb->scm, flags); + scm_recv(sock, msg, &scm, flags); out: return copied ? : err; } diff --git a/net/unix/diag.c b/net/unix/diag.c index 86fa0f3b2caf..ef542fbca9fe 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -155,7 +155,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown)) goto out_nlmsg_trim; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_nlmsg_trim: nlmsg_cancel(skb, nlh); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 02d2e5229240..7f3255084a6c 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1850,8 +1850,7 @@ static ssize_t vmci_transport_stream_enqueue( struct msghdr *msg, size_t len) { - /* XXX: stripping const */ - return vmci_qpair_enquev(vmci_trans(vsk)->qpair, (struct iovec *)msg->msg_iter.iov, len, 0); + return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0); } static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk) diff --git a/net/wireless/core.c b/net/wireless/core.c index 53dda7728f86..3af0ecf1cc16 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -320,6 +320,20 @@ static void cfg80211_destroy_iface_wk(struct work_struct *work) rtnl_unlock(); } +static void cfg80211_sched_scan_stop_wk(struct work_struct *work) +{ + struct cfg80211_registered_device *rdev; + + rdev = container_of(work, struct cfg80211_registered_device, + sched_scan_stop_wk); + + rtnl_lock(); + + __cfg80211_stop_sched_scan(rdev, false); + + rtnl_unlock(); +} + /* exported functions */ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv, @@ -406,6 +420,7 @@ use_default_name: INIT_LIST_HEAD(&rdev->destroy_list); spin_lock_init(&rdev->destroy_list_lock); INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk); + INIT_WORK(&rdev->sched_scan_stop_wk, cfg80211_sched_scan_stop_wk); #ifdef CONFIG_CFG80211_DEFAULT_PS rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; @@ -560,6 +575,14 @@ int wiphy_register(struct wiphy *wiphy) BIT(NL80211_IFTYPE_MONITOR))) wiphy->regulatory_flags |= REGULATORY_IGNORE_STALE_KICKOFF; + if (WARN_ON((wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) && + (wiphy->regulatory_flags & + (REGULATORY_CUSTOM_REG | + REGULATORY_STRICT_REG | + REGULATORY_COUNTRY_IE_FOLLOW_POWER | + REGULATORY_COUNTRY_IE_IGNORE)))) + return -EINVAL; + if (WARN_ON(wiphy->coalesce && (!wiphy->coalesce->n_rules || !wiphy->coalesce->n_patterns) && @@ -778,6 +801,7 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); flush_work(&rdev->destroy_work); + flush_work(&rdev->sched_scan_stop_wk); #ifdef CONFIG_PM if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) @@ -858,6 +882,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; + struct cfg80211_sched_scan_request *sched_scan_req; ASSERT_RTNL(); ASSERT_WDEV_LOCK(wdev); @@ -868,7 +893,8 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - if (rdev->sched_scan_req && dev == rdev->sched_scan_req->dev) + sched_scan_req = rtnl_dereference(rdev->sched_scan_req); + if (sched_scan_req && dev == sched_scan_req->dev) __cfg80211_stop_sched_scan(rdev, false); #ifdef CONFIG_CFG80211_WEXT @@ -943,6 +969,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev; + struct cfg80211_sched_scan_request *sched_scan_req; if (!wdev) return NOTIFY_DONE; @@ -1007,8 +1034,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, ___cfg80211_scan_done(rdev, false); } - if (WARN_ON(rdev->sched_scan_req && - rdev->sched_scan_req->dev == wdev->netdev)) { + sched_scan_req = rtnl_dereference(rdev->sched_scan_req); + if (WARN_ON(sched_scan_req && + sched_scan_req->dev == wdev->netdev)) { __cfg80211_stop_sched_scan(rdev, false); } diff --git a/net/wireless/core.h b/net/wireless/core.h index faa5b1609aae..801cd49c5a0c 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -36,6 +36,13 @@ struct cfg80211_registered_device { * the country on the country IE changed. */ char country_ie_alpha2[2]; + /* + * the driver requests the regulatory core to set this regulatory + * domain as the wiphy's. Only used for %REGULATORY_WIPHY_SELF_MANAGED + * devices using the regulatory_set_wiphy_regd() API + */ + const struct ieee80211_regdomain *requested_regd; + /* If a Country IE has been received this tells us the environment * which its telling us its in. This defaults to ENVIRON_ANY */ enum environment_cap env; @@ -63,7 +70,7 @@ struct cfg80211_registered_device { u32 bss_generation; struct cfg80211_scan_request *scan_req; /* protected by RTNL */ struct sk_buff *scan_msg; - struct cfg80211_sched_scan_request *sched_scan_req; + struct cfg80211_sched_scan_request __rcu *sched_scan_req; unsigned long suspend_at; struct work_struct scan_done_wk; struct work_struct sched_scan_results_wk; @@ -84,6 +91,8 @@ struct cfg80211_registered_device { struct list_head destroy_list; struct work_struct destroy_work; + struct work_struct sched_scan_stop_wk; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8887c6e5fca8..d78fd8b54515 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -59,13 +59,13 @@ enum nl80211_multicast_groups { }; static const struct genl_multicast_group nl80211_mcgrps[] = { - [NL80211_MCGRP_CONFIG] = { .name = "config", }, - [NL80211_MCGRP_SCAN] = { .name = "scan", }, - [NL80211_MCGRP_REGULATORY] = { .name = "regulatory", }, - [NL80211_MCGRP_MLME] = { .name = "mlme", }, - [NL80211_MCGRP_VENDOR] = { .name = "vendor", }, + [NL80211_MCGRP_CONFIG] = { .name = NL80211_MULTICAST_GROUP_CONFIG }, + [NL80211_MCGRP_SCAN] = { .name = NL80211_MULTICAST_GROUP_SCAN }, + [NL80211_MCGRP_REGULATORY] = { .name = NL80211_MULTICAST_GROUP_REG }, + [NL80211_MCGRP_MLME] = { .name = NL80211_MULTICAST_GROUP_MLME }, + [NL80211_MCGRP_VENDOR] = { .name = NL80211_MULTICAST_GROUP_VENDOR }, #ifdef CONFIG_NL80211_TESTMODE - [NL80211_MCGRP_TESTMODE] = { .name = "testmode", } + [NL80211_MCGRP_TESTMODE] = { .name = NL80211_MULTICAST_GROUP_TESTMODE } #endif }; @@ -396,6 +396,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 }, [NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 }, [NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN }, + [NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG }, + [NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 }, + [NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -1087,6 +1090,11 @@ static int nl80211_send_wowlan(struct sk_buff *msg, return -ENOBUFS; } + if ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_NET_DETECT) && + nla_put_u32(msg, NL80211_WOWLAN_TRIG_NET_DETECT, + rdev->wiphy.wowlan->max_nd_match_sets)) + return -ENOBUFS; + if (large && nl80211_send_wowlan_tcp_caps(rdev, msg)) return -ENOBUFS; @@ -1701,12 +1709,22 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, rdev->wiphy.max_num_csa_counters)) goto nla_put_failure; + if (rdev->wiphy.regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED && + nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG)) + goto nla_put_failure; + + if (nla_put(msg, NL80211_ATTR_EXT_FEATURES, + sizeof(rdev->wiphy.ext_features), + rdev->wiphy.ext_features)) + goto nla_put_failure; + /* done */ state->split_start = 0; break; } finish: - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -2389,7 +2407,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag goto nla_put_failure; } - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -3562,6 +3581,7 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, struct nlattr *rate; u32 bitrate; u16 bitrate_compat; + enum nl80211_attrs rate_flg; rate = nla_nest_start(msg, attr); if (!rate) @@ -3578,12 +3598,36 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, nla_put_u16(msg, NL80211_RATE_INFO_BITRATE, bitrate_compat)) return false; + switch (info->bw) { + case RATE_INFO_BW_5: + rate_flg = NL80211_RATE_INFO_5_MHZ_WIDTH; + break; + case RATE_INFO_BW_10: + rate_flg = NL80211_RATE_INFO_10_MHZ_WIDTH; + break; + default: + WARN_ON(1); + /* fall through */ + case RATE_INFO_BW_20: + rate_flg = 0; + break; + case RATE_INFO_BW_40: + rate_flg = NL80211_RATE_INFO_40_MHZ_WIDTH; + break; + case RATE_INFO_BW_80: + rate_flg = NL80211_RATE_INFO_80_MHZ_WIDTH; + break; + case RATE_INFO_BW_160: + rate_flg = NL80211_RATE_INFO_160_MHZ_WIDTH; + break; + } + + if (rate_flg && nla_put_flag(msg, rate_flg)) + return false; + if (info->flags & RATE_INFO_FLAGS_MCS) { if (nla_put_u8(msg, NL80211_RATE_INFO_MCS, info->mcs)) return false; - if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH && - nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH)) - return false; if (info->flags & RATE_INFO_FLAGS_SHORT_GI && nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) return false; @@ -3592,18 +3636,6 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, return false; if (nla_put_u8(msg, NL80211_RATE_INFO_VHT_NSS, info->nss)) return false; - if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH && - nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH)) - return false; - if (info->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH && - nla_put_flag(msg, NL80211_RATE_INFO_80_MHZ_WIDTH)) - return false; - if (info->flags & RATE_INFO_FLAGS_80P80_MHZ_WIDTH && - nla_put_flag(msg, NL80211_RATE_INFO_80P80_MHZ_WIDTH)) - return false; - if (info->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH && - nla_put_flag(msg, NL80211_RATE_INFO_160_MHZ_WIDTH)) - return false; if (info->flags & RATE_INFO_FLAGS_SHORT_GI && nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) return false; @@ -3639,8 +3671,8 @@ static bool nl80211_put_signal(struct sk_buff *msg, u8 mask, s8 *signal, return true; } -static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, - int flags, +static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, + u32 seq, int flags, struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo) @@ -3648,7 +3680,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, void *hdr; struct nlattr *sinfoattr, *bss_param; - hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION); + hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; @@ -3660,115 +3692,77 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, sinfoattr = nla_nest_start(msg, NL80211_ATTR_STA_INFO); if (!sinfoattr) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_CONNECTED_TIME) && - nla_put_u32(msg, NL80211_STA_INFO_CONNECTED_TIME, - sinfo->connected_time)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_INACTIVE_TIME) && - nla_put_u32(msg, NL80211_STA_INFO_INACTIVE_TIME, - sinfo->inactive_time)) - goto nla_put_failure; - if ((sinfo->filled & (STATION_INFO_RX_BYTES | - STATION_INFO_RX_BYTES64)) && + +#define PUT_SINFO(attr, memb, type) do { \ + if (sinfo->filled & BIT(NL80211_STA_INFO_ ## attr) && \ + nla_put_ ## type(msg, NL80211_STA_INFO_ ## attr, \ + sinfo->memb)) \ + goto nla_put_failure; \ + } while (0) + + PUT_SINFO(CONNECTED_TIME, connected_time, u32); + PUT_SINFO(INACTIVE_TIME, inactive_time, u32); + + if (sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES) | + BIT(NL80211_STA_INFO_RX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES, (u32)sinfo->rx_bytes)) goto nla_put_failure; - if ((sinfo->filled & (STATION_INFO_TX_BYTES | - STATION_INFO_TX_BYTES64)) && + + if (sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES) | + BIT(NL80211_STA_INFO_TX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES, (u32)sinfo->tx_bytes)) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_RX_BYTES64) && - nla_put_u64(msg, NL80211_STA_INFO_RX_BYTES64, - sinfo->rx_bytes)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_TX_BYTES64) && - nla_put_u64(msg, NL80211_STA_INFO_TX_BYTES64, - sinfo->tx_bytes)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_LLID) && - nla_put_u16(msg, NL80211_STA_INFO_LLID, sinfo->llid)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_PLID) && - nla_put_u16(msg, NL80211_STA_INFO_PLID, sinfo->plid)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_PLINK_STATE) && - nla_put_u8(msg, NL80211_STA_INFO_PLINK_STATE, - sinfo->plink_state)) - goto nla_put_failure; + + PUT_SINFO(RX_BYTES64, rx_bytes, u64); + PUT_SINFO(TX_BYTES64, tx_bytes, u64); + PUT_SINFO(LLID, llid, u16); + PUT_SINFO(PLID, plid, u16); + PUT_SINFO(PLINK_STATE, plink_state, u8); + switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: - if ((sinfo->filled & STATION_INFO_SIGNAL) && - nla_put_u8(msg, NL80211_STA_INFO_SIGNAL, - sinfo->signal)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_SIGNAL_AVG) && - nla_put_u8(msg, NL80211_STA_INFO_SIGNAL_AVG, - sinfo->signal_avg)) - goto nla_put_failure; + PUT_SINFO(SIGNAL, signal, u8); + PUT_SINFO(SIGNAL_AVG, signal_avg, u8); break; default: break; } - if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL) { + if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL)) { if (!nl80211_put_signal(msg, sinfo->chains, sinfo->chain_signal, NL80211_STA_INFO_CHAIN_SIGNAL)) goto nla_put_failure; } - if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL_AVG) { + if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) { if (!nl80211_put_signal(msg, sinfo->chains, sinfo->chain_signal_avg, NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) goto nla_put_failure; } - if (sinfo->filled & STATION_INFO_TX_BITRATE) { + if (sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE)) { if (!nl80211_put_sta_rate(msg, &sinfo->txrate, NL80211_STA_INFO_TX_BITRATE)) goto nla_put_failure; } - if (sinfo->filled & STATION_INFO_RX_BITRATE) { + if (sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE)) { if (!nl80211_put_sta_rate(msg, &sinfo->rxrate, NL80211_STA_INFO_RX_BITRATE)) goto nla_put_failure; } - if ((sinfo->filled & STATION_INFO_RX_PACKETS) && - nla_put_u32(msg, NL80211_STA_INFO_RX_PACKETS, - sinfo->rx_packets)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_TX_PACKETS) && - nla_put_u32(msg, NL80211_STA_INFO_TX_PACKETS, - sinfo->tx_packets)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_TX_RETRIES) && - nla_put_u32(msg, NL80211_STA_INFO_TX_RETRIES, - sinfo->tx_retries)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_TX_FAILED) && - nla_put_u32(msg, NL80211_STA_INFO_TX_FAILED, - sinfo->tx_failed)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_EXPECTED_THROUGHPUT) && - nla_put_u32(msg, NL80211_STA_INFO_EXPECTED_THROUGHPUT, - sinfo->expected_throughput)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT) && - nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS, - sinfo->beacon_loss_count)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_LOCAL_PM) && - nla_put_u32(msg, NL80211_STA_INFO_LOCAL_PM, - sinfo->local_pm)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_PEER_PM) && - nla_put_u32(msg, NL80211_STA_INFO_PEER_PM, - sinfo->peer_pm)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_NONPEER_PM) && - nla_put_u32(msg, NL80211_STA_INFO_NONPEER_PM, - sinfo->nonpeer_pm)) - goto nla_put_failure; - if (sinfo->filled & STATION_INFO_BSS_PARAM) { + + PUT_SINFO(RX_PACKETS, rx_packets, u32); + PUT_SINFO(TX_PACKETS, tx_packets, u32); + PUT_SINFO(TX_RETRIES, tx_retries, u32); + PUT_SINFO(TX_FAILED, tx_failed, u32); + PUT_SINFO(EXPECTED_THROUGHPUT, expected_throughput, u32); + PUT_SINFO(BEACON_LOSS, beacon_loss_count, u32); + PUT_SINFO(LOCAL_PM, local_pm, u32); + PUT_SINFO(PEER_PM, peer_pm, u32); + PUT_SINFO(NONPEER_PM, nonpeer_pm, u32); + + if (sinfo->filled & BIT(NL80211_STA_INFO_BSS_PARAM)) { bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM); if (!bss_param) goto nla_put_failure; @@ -3787,23 +3781,68 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_nest_end(msg, bss_param); } - if ((sinfo->filled & STATION_INFO_STA_FLAGS) && + if ((sinfo->filled & BIT(NL80211_STA_INFO_STA_FLAGS)) && nla_put(msg, NL80211_STA_INFO_STA_FLAGS, sizeof(struct nl80211_sta_flag_update), &sinfo->sta_flags)) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_T_OFFSET) && - nla_put_u64(msg, NL80211_STA_INFO_T_OFFSET, - sinfo->t_offset)) - goto nla_put_failure; + + PUT_SINFO(T_OFFSET, t_offset, u64); + PUT_SINFO(RX_DROP_MISC, rx_dropped_misc, u64); + PUT_SINFO(BEACON_RX, rx_beacon, u64); + PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8); + +#undef PUT_SINFO + + if (sinfo->filled & BIT(NL80211_STA_INFO_TID_STATS)) { + struct nlattr *tidsattr; + int tid; + + tidsattr = nla_nest_start(msg, NL80211_STA_INFO_TID_STATS); + if (!tidsattr) + goto nla_put_failure; + + for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) { + struct cfg80211_tid_stats *tidstats; + struct nlattr *tidattr; + + tidstats = &sinfo->pertid[tid]; + + if (!tidstats->filled) + continue; + + tidattr = nla_nest_start(msg, tid + 1); + if (!tidattr) + goto nla_put_failure; + +#define PUT_TIDVAL(attr, memb, type) do { \ + if (tidstats->filled & BIT(NL80211_TID_STATS_ ## attr) && \ + nla_put_ ## type(msg, NL80211_TID_STATS_ ## attr, \ + tidstats->memb)) \ + goto nla_put_failure; \ + } while (0) + + PUT_TIDVAL(RX_MSDU, rx_msdu, u64); + PUT_TIDVAL(TX_MSDU, tx_msdu, u64); + PUT_TIDVAL(TX_MSDU_RETRIES, tx_msdu_retries, u64); + PUT_TIDVAL(TX_MSDU_FAILED, tx_msdu_failed, u64); + +#undef PUT_TIDVAL + nla_nest_end(msg, tidattr); + } + + nla_nest_end(msg, tidsattr); + } + nla_nest_end(msg, sinfoattr); - if ((sinfo->filled & STATION_INFO_ASSOC_REQ_IES) && + if (sinfo->assoc_req_ies_len && nla_put(msg, NL80211_ATTR_IE, sinfo->assoc_req_ies_len, sinfo->assoc_req_ies)) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -3843,7 +3882,7 @@ static int nl80211_dump_station(struct sk_buff *skb, if (err) goto out_err; - if (nl80211_send_station(skb, + if (nl80211_send_station(skb, NL80211_CMD_NEW_STATION, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev->netdev, mac_addr, @@ -3890,7 +3929,8 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_station(msg, info->snd_portid, info->snd_seq, 0, + if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, + info->snd_portid, info->snd_seq, 0, rdev, dev, mac_addr, &sinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -4532,7 +4572,8 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, nla_nest_end(msg, pinfoattr); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -5326,42 +5367,20 @@ static int nl80211_update_mesh_config(struct sk_buff *skb, return err; } -static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) +static int nl80211_put_regdom(const struct ieee80211_regdomain *regdom, + struct sk_buff *msg) { - const struct ieee80211_regdomain *regdom; - struct sk_buff *msg; - void *hdr = NULL; struct nlattr *nl_reg_rules; unsigned int i; - if (!cfg80211_regdomain) - return -EINVAL; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOBUFS; - - hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, - NL80211_CMD_GET_REG); - if (!hdr) - goto put_failure; - - if (reg_last_request_cell_base() && - nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE, - NL80211_USER_REG_HINT_CELL_BASE)) - goto nla_put_failure; - - rcu_read_lock(); - regdom = rcu_dereference(cfg80211_regdomain); - if (nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, regdom->alpha2) || (regdom->dfs_region && nla_put_u8(msg, NL80211_ATTR_DFS_REGION, regdom->dfs_region))) - goto nla_put_failure_rcu; + goto nla_put_failure; nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES); if (!nl_reg_rules) - goto nla_put_failure_rcu; + goto nla_put_failure; for (i = 0; i < regdom->n_reg_rules; i++) { struct nlattr *nl_reg_rule; @@ -5376,7 +5395,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) nl_reg_rule = nla_nest_start(msg, i); if (!nl_reg_rule) - goto nla_put_failure_rcu; + goto nla_put_failure; max_bandwidth_khz = freq_range->max_bandwidth_khz; if (!max_bandwidth_khz) @@ -5397,13 +5416,74 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) power_rule->max_eirp) || nla_put_u32(msg, NL80211_ATTR_DFS_CAC_TIME, reg_rule->dfs_cac_ms)) - goto nla_put_failure_rcu; + goto nla_put_failure; nla_nest_end(msg, nl_reg_rule); } - rcu_read_unlock(); nla_nest_end(msg, nl_reg_rules); + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info) +{ + const struct ieee80211_regdomain *regdom = NULL; + struct cfg80211_registered_device *rdev; + struct wiphy *wiphy = NULL; + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOBUFS; + + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, + NL80211_CMD_GET_REG); + if (!hdr) + goto put_failure; + + if (info->attrs[NL80211_ATTR_WIPHY]) { + bool self_managed; + + rdev = cfg80211_get_dev_from_info(genl_info_net(info), info); + if (IS_ERR(rdev)) { + nlmsg_free(msg); + return PTR_ERR(rdev); + } + + wiphy = &rdev->wiphy; + self_managed = wiphy->regulatory_flags & + REGULATORY_WIPHY_SELF_MANAGED; + regdom = get_wiphy_regdom(wiphy); + + /* a self-managed-reg device must have a private regdom */ + if (WARN_ON(!regdom && self_managed)) { + nlmsg_free(msg); + return -EINVAL; + } + + if (regdom && + nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy))) + goto nla_put_failure; + } + + if (!wiphy && reg_last_request_cell_base() && + nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE, + NL80211_USER_REG_HINT_CELL_BASE)) + goto nla_put_failure; + + rcu_read_lock(); + + if (!regdom) + regdom = rcu_dereference(cfg80211_regdomain); + + if (nl80211_put_regdom(regdom, msg)) + goto nla_put_failure_rcu; + + rcu_read_unlock(); genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); @@ -5417,6 +5497,84 @@ put_failure: return -EMSGSIZE; } +static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb, + u32 seq, int flags, struct wiphy *wiphy, + const struct ieee80211_regdomain *regdom) +{ + void *hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags, + NL80211_CMD_GET_REG); + + if (!hdr) + return -1; + + genl_dump_check_consistent(cb, hdr, &nl80211_fam); + + if (nl80211_put_regdom(regdom, msg)) + goto nla_put_failure; + + if (!wiphy && reg_last_request_cell_base() && + nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE, + NL80211_USER_REG_HINT_CELL_BASE)) + goto nla_put_failure; + + if (wiphy && + nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy))) + goto nla_put_failure; + + if (wiphy && wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED && + nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int nl80211_get_reg_dump(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct ieee80211_regdomain *regdom = NULL; + struct cfg80211_registered_device *rdev; + int err, reg_idx, start = cb->args[2]; + + rtnl_lock(); + + if (cfg80211_regdomain && start == 0) { + err = nl80211_send_regdom(skb, cb, cb->nlh->nlmsg_seq, + NLM_F_MULTI, NULL, + rtnl_dereference(cfg80211_regdomain)); + if (err < 0) + goto out_err; + } + + /* the global regdom is idx 0 */ + reg_idx = 1; + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + regdom = get_wiphy_regdom(&rdev->wiphy); + if (!regdom) + continue; + + if (++reg_idx <= start) + continue; + + err = nl80211_send_regdom(skb, cb, cb->nlh->nlmsg_seq, + NLM_F_MULTI, &rdev->wiphy, regdom); + if (err < 0) { + reg_idx--; + break; + } + } + + cb->args[2] = reg_idx; + err = skb->len; +out_err: + rtnl_unlock(); + return err; +} + static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1]; @@ -5622,7 +5780,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->ssids = (void *)&request->channels[n_channels]; request->n_ssids = n_ssids; if (ie_len) { - if (request->ssids) + if (n_ssids) request->ie = (void *)(request->ssids + n_ssids); else request->ie = (void *)(request->channels + n_channels); @@ -5678,7 +5836,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->n_channels = i; i = 0; - if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) { + if (n_ssids) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { if (nla_len(attr) > IEEE80211_MAX_SSID_LEN) { err = -EINVAL; @@ -5876,7 +6034,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, request->ssids = (void *)&request->channels[n_channels]; request->n_ssids = n_ssids; if (ie_len) { - if (request->ssids) + if (n_ssids) request->ie = (void *)(request->ssids + n_ssids); else request->ie = (void *)(request->channels + n_channels); @@ -5885,7 +6043,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, if (n_match_sets) { if (request->ie) request->match_sets = (void *)(request->ie + ie_len); - else if (request->ssids) + else if (n_ssids) request->match_sets = (void *)(request->ssids + n_ssids); else @@ -5944,7 +6102,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, request->n_channels = i; i = 0; - if (attrs[NL80211_ATTR_SCAN_SSIDS]) { + if (n_ssids) { nla_for_each_nested(attr, attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { if (nla_len(attr) > IEEE80211_MAX_SSID_LEN) { @@ -6052,6 +6210,10 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, } } + if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY]) + request->delay = + nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_DELAY]); + request->interval = interval; request->scan_start = jiffies; @@ -6068,6 +6230,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_sched_scan_request *sched_scan_req; int err; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) || @@ -6077,27 +6240,32 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (rdev->sched_scan_req) return -EINPROGRESS; - rdev->sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, wdev, - info->attrs); - err = PTR_ERR_OR_ZERO(rdev->sched_scan_req); + sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, wdev, + info->attrs); + + err = PTR_ERR_OR_ZERO(sched_scan_req); if (err) goto out_err; - err = rdev_sched_scan_start(rdev, dev, rdev->sched_scan_req); + err = rdev_sched_scan_start(rdev, dev, sched_scan_req); if (err) goto out_free; - rdev->sched_scan_req->dev = dev; - rdev->sched_scan_req->wiphy = &rdev->wiphy; + sched_scan_req->dev = dev; + sched_scan_req->wiphy = &rdev->wiphy; + + if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) + sched_scan_req->owner_nlportid = info->snd_portid; + + rcu_assign_pointer(rdev->sched_scan_req, sched_scan_req); nl80211_send_sched_scan(rdev, dev, NL80211_CMD_START_SCHED_SCAN); return 0; out_free: - kfree(rdev->sched_scan_req); + kfree(sched_scan_req); out_err: - rdev->sched_scan_req = NULL; return err; } @@ -6432,7 +6600,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, nla_nest_end(msg, bss); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; fail_unlock_rcu: rcu_read_unlock(); @@ -6480,12 +6649,17 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) } static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, - int flags, struct net_device *dev, - struct survey_info *survey) + int flags, struct net_device *dev, + bool allow_radio_stats, + struct survey_info *survey) { void *hdr; struct nlattr *infoattr; + /* skip radio stats if userspace didn't request them */ + if (!survey->channel && !allow_radio_stats) + return 0; + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_SURVEY_RESULTS); if (!hdr) @@ -6498,7 +6672,8 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, if (!infoattr) goto nla_put_failure; - if (nla_put_u32(msg, NL80211_SURVEY_INFO_FREQUENCY, + if (survey->channel && + nla_put_u32(msg, NL80211_SURVEY_INFO_FREQUENCY, survey->channel->center_freq)) goto nla_put_failure; @@ -6508,49 +6683,57 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, if ((survey->filled & SURVEY_INFO_IN_USE) && nla_put_flag(msg, NL80211_SURVEY_INFO_IN_USE)) goto nla_put_failure; - if ((survey->filled & SURVEY_INFO_CHANNEL_TIME) && - nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME, - survey->channel_time)) + if ((survey->filled & SURVEY_INFO_TIME) && + nla_put_u64(msg, NL80211_SURVEY_INFO_TIME, + survey->time)) goto nla_put_failure; - if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_BUSY) && - nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY, - survey->channel_time_busy)) + if ((survey->filled & SURVEY_INFO_TIME_BUSY) && + nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_BUSY, + survey->time_busy)) goto nla_put_failure; - if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY) && - nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY, - survey->channel_time_ext_busy)) + if ((survey->filled & SURVEY_INFO_TIME_EXT_BUSY) && + nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_EXT_BUSY, + survey->time_ext_busy)) goto nla_put_failure; - if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_RX) && - nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_RX, - survey->channel_time_rx)) + if ((survey->filled & SURVEY_INFO_TIME_RX) && + nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_RX, + survey->time_rx)) goto nla_put_failure; - if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_TX) && - nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_TX, - survey->channel_time_tx)) + if ((survey->filled & SURVEY_INFO_TIME_TX) && + nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_TX, + survey->time_tx)) + goto nla_put_failure; + if ((survey->filled & SURVEY_INFO_TIME_SCAN) && + nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_SCAN, + survey->time_scan)) goto nla_put_failure; nla_nest_end(msg, infoattr); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } -static int nl80211_dump_survey(struct sk_buff *skb, - struct netlink_callback *cb) +static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) { struct survey_info survey; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; int survey_idx = cb->args[2]; int res; + bool radio_stats; res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (res) return res; + /* prepare_wdev_dump parsed the attributes */ + radio_stats = nl80211_fam.attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; + if (!wdev->netdev) { res = -EINVAL; goto out_err; @@ -6568,13 +6751,9 @@ static int nl80211_dump_survey(struct sk_buff *skb, if (res) goto out_err; - /* Survey without a channel doesn't make sense */ - if (!survey.channel) { - res = -EINVAL; - goto out; - } - - if (survey.channel->flags & IEEE80211_CHAN_DISABLED) { + /* don't send disabled channels, but do send non-channel data */ + if (survey.channel && + survey.channel->flags & IEEE80211_CHAN_DISABLED) { survey_idx++; continue; } @@ -6582,7 +6761,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, if (nl80211_send_survey(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - wdev->netdev, &survey) < 0) + wdev->netdev, radio_stats, &survey) < 0) goto out; survey_idx++; } @@ -7595,14 +7774,19 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net *net; int err; - u32 pid; - if (!info->attrs[NL80211_ATTR_PID]) - return -EINVAL; + if (info->attrs[NL80211_ATTR_PID]) { + u32 pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]); + + net = get_net_ns_by_pid(pid); + } else if (info->attrs[NL80211_ATTR_NETNS_FD]) { + u32 fd = nla_get_u32(info->attrs[NL80211_ATTR_NETNS_FD]); - pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]); + net = get_net_ns_by_fd(fd); + } else { + return -EINVAL; + } - net = get_net_ns_by_pid(pid); if (IS_ERR(net)) return PTR_ERR(net); @@ -8598,6 +8782,48 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg, return 0; } +static int nl80211_send_wowlan_nd(struct sk_buff *msg, + struct cfg80211_sched_scan_request *req) +{ + struct nlattr *nd, *freqs, *matches, *match; + int i; + + if (!req) + return 0; + + nd = nla_nest_start(msg, NL80211_WOWLAN_TRIG_NET_DETECT); + if (!nd) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_INTERVAL, req->interval)) + return -ENOBUFS; + + freqs = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQUENCIES); + if (!freqs) + return -ENOBUFS; + + for (i = 0; i < req->n_channels; i++) + nla_put_u32(msg, i, req->channels[i]->center_freq); + + nla_nest_end(msg, freqs); + + if (req->n_match_sets) { + matches = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_MATCH); + for (i = 0; i < req->n_match_sets; i++) { + match = nla_nest_start(msg, i); + nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID, + req->match_sets[i].ssid.ssid_len, + req->match_sets[i].ssid.ssid); + nla_nest_end(msg, match); + } + nla_nest_end(msg, matches); + } + + nla_nest_end(msg, nd); + + return 0; +} + static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -8655,6 +8881,11 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) rdev->wiphy.wowlan_config->tcp)) goto nla_put_failure; + if (nl80211_send_wowlan_nd( + msg, + rdev->wiphy.wowlan_config->nd_config)) + goto nla_put_failure; + nla_nest_end(msg, nl_wowlan); } @@ -10224,7 +10455,8 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_GET_REG, - .doit = nl80211_get_reg, + .doit = nl80211_get_reg_do, + .dumpit = nl80211_get_reg_dump, .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_RTNL, /* can be retrieved by unprivileged users */ @@ -10823,7 +11055,8 @@ static int nl80211_send_scan_msg(struct sk_buff *msg, /* ignore errors and send incomplete event anyway */ nl80211_add_scan_req(msg, rdev); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -10846,7 +11079,8 @@ nl80211_send_sched_scan_msg(struct sk_buff *msg, nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -10938,25 +11172,9 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev, NL80211_MCGRP_SCAN, GFP_KERNEL); } -/* - * This can happen on global regulatory changes or device specific settings - * based on custom world regulatory domains. - */ -void nl80211_send_reg_change_event(struct regulatory_request *request) +static bool nl80211_reg_change_event_fill(struct sk_buff *msg, + struct regulatory_request *request) { - struct sk_buff *msg; - void *hdr; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return; - - hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_REG_CHANGE); - if (!hdr) { - nlmsg_free(msg); - return; - } - /* Userspace can always count this one always being set */ if (nla_put_u8(msg, NL80211_ATTR_REG_INITIATOR, request->initiator)) goto nla_put_failure; @@ -10982,8 +11200,46 @@ void nl80211_send_reg_change_event(struct regulatory_request *request) goto nla_put_failure; } - if (request->wiphy_idx != WIPHY_IDX_INVALID && - nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx)) + if (request->wiphy_idx != WIPHY_IDX_INVALID) { + struct wiphy *wiphy = wiphy_idx_to_wiphy(request->wiphy_idx); + + if (wiphy && + nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx)) + goto nla_put_failure; + + if (wiphy && + wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED && + nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG)) + goto nla_put_failure; + } + + return true; + +nla_put_failure: + return false; +} + +/* + * This can happen on global regulatory changes or device specific settings + * based on custom regulatory domains. + */ +void nl80211_common_reg_change_event(enum nl80211_commands cmd_id, + struct regulatory_request *request) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, cmd_id); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nl80211_reg_change_event_fill(msg, request) == false) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -11522,7 +11778,7 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, if (!msg) return; - if (nl80211_send_station(msg, 0, 0, 0, + if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, 0, 0, 0, rdev, dev, mac_addr, sinfo) < 0) { nlmsg_free(msg); return; @@ -11533,12 +11789,16 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, } EXPORT_SYMBOL(cfg80211_new_sta); -void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp) +void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr, + struct station_info *sinfo, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; - void *hdr; + struct station_info empty_sinfo = {}; + + if (!sinfo) + sinfo = &empty_sinfo; trace_cfg80211_del_sta(dev, mac_addr); @@ -11546,27 +11806,16 @@ void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp) if (!msg) return; - hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DEL_STATION); - if (!hdr) { + if (nl80211_send_station(msg, NL80211_CMD_DEL_STATION, 0, 0, 0, + rdev, dev, mac_addr, sinfo) < 0) { nlmsg_free(msg); return; } - if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr)) - goto nla_put_failure; - - genlmsg_end(msg, hdr); - genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); - return; - - nla_put_failure: - genlmsg_cancel(msg, hdr); - nlmsg_free(msg); } -EXPORT_SYMBOL(cfg80211_del_sta); +EXPORT_SYMBOL(cfg80211_del_sta_sinfo); void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, enum nl80211_connect_failed_reason reason, @@ -12470,6 +12719,13 @@ static int nl80211_netlink_notify(struct notifier_block * nb, list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { bool schedule_destroy_work = false; + bool schedule_scan_stop = false; + struct cfg80211_sched_scan_request *sched_scan_req = + rcu_dereference(rdev->sched_scan_req); + + if (sched_scan_req && notify->portid && + sched_scan_req->owner_nlportid == notify->portid) + schedule_scan_stop = true; list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) { cfg80211_mlme_unregister_socket(wdev, notify->portid); @@ -12500,6 +12756,12 @@ static int nl80211_netlink_notify(struct notifier_block * nb, spin_unlock(&rdev->destroy_list_lock); schedule_work(&rdev->destroy_work); } + } else if (schedule_scan_stop) { + sched_scan_req->owner_nlportid = 0; + + if (rdev->ops->sched_scan_stop && + rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + schedule_work(&rdev->sched_scan_stop_wk); } } diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 7ad70d6f0cc6..84d4edf1d545 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -17,7 +17,21 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev, struct net_device *netdev, u32 cmd); void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev, struct net_device *netdev); -void nl80211_send_reg_change_event(struct regulatory_request *request); +void nl80211_common_reg_change_event(enum nl80211_commands cmd_id, + struct regulatory_request *request); + +static inline void +nl80211_send_reg_change_event(struct regulatory_request *request) +{ + nl80211_common_reg_change_event(NL80211_CMD_REG_CHANGE, request); +} + +static inline void +nl80211_send_wiphy_reg_change_event(struct regulatory_request *request) +{ + nl80211_common_reg_change_event(NL80211_CMD_WIPHY_REG_CHANGE, request); +} + void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, gfp_t gfp); diff --git a/net/wireless/reg.c b/net/wireless/reg.c index d39d1cbc86b1..b586d0dcb09e 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -109,7 +109,7 @@ static struct regulatory_request core_request_world = { * protected by RTNL (and can be accessed with RCU protection) */ static struct regulatory_request __rcu *last_request = - (void __rcu *)&core_request_world; + (void __force __rcu *)&core_request_world; /* To trigger userspace events */ static struct platform_device *reg_pdev; @@ -142,7 +142,7 @@ static const struct ieee80211_regdomain *get_cfg80211_regdom(void) return rtnl_dereference(cfg80211_regdomain); } -static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) +const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) { return rtnl_dereference(wiphy->regd); } @@ -1307,6 +1307,9 @@ static bool ignore_reg_update(struct wiphy *wiphy, { struct regulatory_request *lr = get_last_request(); + if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) + return true; + if (!lr) { REG_DBG_PRINT("Ignoring regulatory request set by %s " "since last_request is not set\n", @@ -1695,8 +1698,12 @@ static void handle_channel_custom(struct wiphy *wiphy, if (IS_ERR(reg_rule)) { REG_DBG_PRINT("Disabling freq %d MHz as custom regd has no rule that fits it\n", chan->center_freq); - chan->orig_flags |= IEEE80211_CHAN_DISABLED; - chan->flags = chan->orig_flags; + if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) { + chan->flags |= IEEE80211_CHAN_DISABLED; + } else { + chan->orig_flags |= IEEE80211_CHAN_DISABLED; + chan->flags = chan->orig_flags; + } return; } @@ -1721,7 +1728,13 @@ static void handle_channel_custom(struct wiphy *wiphy, chan->dfs_state = NL80211_DFS_USABLE; chan->beacon_found = false; - chan->flags |= map_regdom_flags(reg_rule->flags) | bw_flags; + + if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) + chan->flags = chan->orig_flags | bw_flags | + map_regdom_flags(reg_rule->flags); + else + chan->flags |= map_regdom_flags(reg_rule->flags) | bw_flags; + chan->max_antenna_gain = (int) MBI_TO_DBI(power_rule->max_antenna_gain); chan->max_reg_power = chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp); @@ -2107,6 +2120,26 @@ out_free: reg_free_request(reg_request); } +static bool reg_only_self_managed_wiphys(void) +{ + struct cfg80211_registered_device *rdev; + struct wiphy *wiphy; + bool self_managed_found = false; + + ASSERT_RTNL(); + + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + wiphy = &rdev->wiphy; + if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) + self_managed_found = true; + else + return false; + } + + /* make sure at least one self-managed wiphy exists */ + return self_managed_found; +} + /* * Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* * Regulatory hints come on a first come first serve basis and we @@ -2138,6 +2171,11 @@ static void reg_process_pending_hints(void) spin_unlock(®_requests_lock); + if (reg_only_self_managed_wiphys()) { + reg_free_request(reg_request); + return; + } + reg_process_hint(reg_request); } @@ -2165,11 +2203,52 @@ static void reg_process_pending_beacon_hints(void) spin_unlock_bh(®_pending_beacons_lock); } +static void reg_process_self_managed_hints(void) +{ + struct cfg80211_registered_device *rdev; + struct wiphy *wiphy; + const struct ieee80211_regdomain *tmp; + const struct ieee80211_regdomain *regd; + enum ieee80211_band band; + struct regulatory_request request = {}; + + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + wiphy = &rdev->wiphy; + + spin_lock(®_requests_lock); + regd = rdev->requested_regd; + rdev->requested_regd = NULL; + spin_unlock(®_requests_lock); + + if (regd == NULL) + continue; + + tmp = get_wiphy_regdom(wiphy); + rcu_assign_pointer(wiphy->regd, regd); + rcu_free_regdom(tmp); + + for (band = 0; band < IEEE80211_NUM_BANDS; band++) + handle_band_custom(wiphy, wiphy->bands[band], regd); + + reg_process_ht_flags(wiphy); + + request.wiphy_idx = get_wiphy_idx(wiphy); + request.alpha2[0] = regd->alpha2[0]; + request.alpha2[1] = regd->alpha2[1]; + request.initiator = NL80211_REGDOM_SET_BY_DRIVER; + + nl80211_send_wiphy_reg_change_event(&request); + } + + reg_check_channels(); +} + static void reg_todo(struct work_struct *work) { rtnl_lock(); reg_process_pending_hints(); reg_process_pending_beacon_hints(); + reg_process_self_managed_hints(); rtnl_unlock(); } @@ -2450,6 +2529,8 @@ static void restore_regulatory_settings(bool reset_user) world_alpha2[1] = cfg80211_world_regdom->alpha2[1]; list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + if (rdev->wiphy.regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) + continue; if (rdev->wiphy.regulatory_flags & REGULATORY_CUSTOM_REG) restore_custom_reg_settings(&rdev->wiphy); } @@ -2853,10 +2934,79 @@ int set_regdom(const struct ieee80211_regdomain *rd) return 0; } +static int __regulatory_set_wiphy_regd(struct wiphy *wiphy, + struct ieee80211_regdomain *rd) +{ + const struct ieee80211_regdomain *regd; + const struct ieee80211_regdomain *prev_regd; + struct cfg80211_registered_device *rdev; + + if (WARN_ON(!wiphy || !rd)) + return -EINVAL; + + if (WARN(!(wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED), + "wiphy should have REGULATORY_WIPHY_SELF_MANAGED\n")) + return -EPERM; + + if (WARN(!is_valid_rd(rd), "Invalid regulatory domain detected\n")) { + print_regdomain_info(rd); + return -EINVAL; + } + + regd = reg_copy_regd(rd); + if (IS_ERR(regd)) + return PTR_ERR(regd); + + rdev = wiphy_to_rdev(wiphy); + + spin_lock(®_requests_lock); + prev_regd = rdev->requested_regd; + rdev->requested_regd = regd; + spin_unlock(®_requests_lock); + + kfree(prev_regd); + return 0; +} + +int regulatory_set_wiphy_regd(struct wiphy *wiphy, + struct ieee80211_regdomain *rd) +{ + int ret = __regulatory_set_wiphy_regd(wiphy, rd); + + if (ret) + return ret; + + schedule_work(®_work); + return 0; +} +EXPORT_SYMBOL(regulatory_set_wiphy_regd); + +int regulatory_set_wiphy_regd_sync_rtnl(struct wiphy *wiphy, + struct ieee80211_regdomain *rd) +{ + int ret; + + ASSERT_RTNL(); + + ret = __regulatory_set_wiphy_regd(wiphy, rd); + if (ret) + return ret; + + /* process the request immediately */ + reg_process_self_managed_hints(); + return 0; +} +EXPORT_SYMBOL(regulatory_set_wiphy_regd_sync_rtnl); + void wiphy_regulatory_register(struct wiphy *wiphy) { struct regulatory_request *lr; + /* self-managed devices ignore external hints */ + if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) + wiphy->regulatory_flags |= REGULATORY_DISABLE_BEACON_HINTS | + REGULATORY_COUNTRY_IE_IGNORE; + if (!reg_dev_ignore_cell_hint(wiphy)) reg_num_devs_support_basehint++; diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 5e48031ccb9a..4b45d6e61d24 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -38,6 +38,7 @@ unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd, const struct ieee80211_reg_rule *rule); bool reg_last_request_cell_base(void); +const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy); /** * regulatory_hint_found_beacon - hints a beacon was found on a channel diff --git a/net/wireless/scan.c b/net/wireless/scan.c index bda39f149810..c705c3e2b751 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -257,7 +257,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk) rtnl_lock(); - request = rdev->sched_scan_req; + request = rtnl_dereference(rdev->sched_scan_req); /* we don't have sched_scan_req anymore if the scan is stopping */ if (request) { @@ -279,7 +279,8 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy) { trace_cfg80211_sched_scan_results(wiphy); /* ignore if we're not scanning */ - if (wiphy_to_rdev(wiphy)->sched_scan_req) + + if (rcu_access_pointer(wiphy_to_rdev(wiphy)->sched_scan_req)) queue_work(cfg80211_wq, &wiphy_to_rdev(wiphy)->sched_scan_results_wk); } @@ -308,6 +309,7 @@ EXPORT_SYMBOL(cfg80211_sched_scan_stopped); int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, bool driver_initiated) { + struct cfg80211_sched_scan_request *sched_scan_req; struct net_device *dev; ASSERT_RTNL(); @@ -315,7 +317,8 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, if (!rdev->sched_scan_req) return -ENOENT; - dev = rdev->sched_scan_req->dev; + sched_scan_req = rtnl_dereference(rdev->sched_scan_req); + dev = sched_scan_req->dev; if (!driver_initiated) { int err = rdev_sched_scan_stop(rdev, dev); @@ -325,8 +328,8 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, nl80211_send_sched_scan(rdev, dev, NL80211_CMD_SCHED_SCAN_STOPPED); - kfree(rdev->sched_scan_req); - rdev->sched_scan_req = NULL; + RCU_INIT_POINTER(rdev->sched_scan_req, NULL); + kfree_rcu(sched_scan_req, rcu_head); return 0; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ad38910f7036..b17b3692f8c2 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1604,11 +1604,12 @@ TRACE_EVENT(rdev_return_int_survey_info, WIPHY_ENTRY CHAN_ENTRY __field(int, ret) - __field(u64, channel_time) - __field(u64, channel_time_busy) - __field(u64, channel_time_ext_busy) - __field(u64, channel_time_rx) - __field(u64, channel_time_tx) + __field(u64, time) + __field(u64, time_busy) + __field(u64, time_ext_busy) + __field(u64, time_rx) + __field(u64, time_tx) + __field(u64, time_scan) __field(u32, filled) __field(s8, noise) ), @@ -1616,22 +1617,24 @@ TRACE_EVENT(rdev_return_int_survey_info, WIPHY_ASSIGN; CHAN_ASSIGN(info->channel); __entry->ret = ret; - __entry->channel_time = info->channel_time; - __entry->channel_time_busy = info->channel_time_busy; - __entry->channel_time_ext_busy = info->channel_time_ext_busy; - __entry->channel_time_rx = info->channel_time_rx; - __entry->channel_time_tx = info->channel_time_tx; + __entry->time = info->time; + __entry->time_busy = info->time_busy; + __entry->time_ext_busy = info->time_ext_busy; + __entry->time_rx = info->time_rx; + __entry->time_tx = info->time_tx; + __entry->time_scan = info->time_scan; __entry->filled = info->filled; __entry->noise = info->noise; ), TP_printk(WIPHY_PR_FMT ", returned: %d, " CHAN_PR_FMT ", channel time: %llu, channel time busy: %llu, " "channel time extension busy: %llu, channel time rx: %llu, " - "channel time tx: %llu, filled: %u, noise: %d", + "channel time tx: %llu, scan time: %llu, filled: %u, noise: %d", WIPHY_PR_ARG, __entry->ret, CHAN_PR_ARG, - __entry->channel_time, __entry->channel_time_busy, - __entry->channel_time_ext_busy, __entry->channel_time_rx, - __entry->channel_time_tx, __entry->filled, __entry->noise) + __entry->time, __entry->time_busy, + __entry->time_ext_busy, __entry->time_rx, + __entry->time_tx, __entry->time_scan, + __entry->filled, __entry->noise) ); TRACE_EVENT(rdev_tdls_oper, diff --git a/net/wireless/util.c b/net/wireless/util.c index 5488c3662f7d..6903dbdcb8c1 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -227,18 +227,32 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, if (pairwise && !mac_addr) return -EINVAL; - /* - * Disallow pairwise keys with non-zero index unless it's WEP - * or a vendor specific cipher (because current deployments use - * pairwise WEP keys with non-zero indices and for vendor specific - * ciphers this should be validated in the driver or hardware level - * - but 802.11i clearly specifies to use zero) - */ - if (pairwise && key_idx && - ((params->cipher == WLAN_CIPHER_SUITE_TKIP) || - (params->cipher == WLAN_CIPHER_SUITE_CCMP) || - (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC))) - return -EINVAL; + switch (params->cipher) { + case WLAN_CIPHER_SUITE_TKIP: + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + /* Disallow pairwise keys with non-zero index unless it's WEP + * or a vendor specific cipher (because current deployments use + * pairwise WEP keys with non-zero indices and for vendor + * specific ciphers this should be validated in the driver or + * hardware level - but 802.11i clearly specifies to use zero) + */ + if (pairwise && key_idx) + return -EINVAL; + break; + case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + /* Disallow BIP (group-only) cipher as pairwise cipher */ + if (pairwise) + return -EINVAL; + break; + default: + break; + } switch (params->cipher) { case WLAN_CIPHER_SUITE_WEP40: @@ -253,6 +267,18 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, if (params->key_len != WLAN_KEY_LEN_CCMP) return -EINVAL; break; + case WLAN_CIPHER_SUITE_CCMP_256: + if (params->key_len != WLAN_KEY_LEN_CCMP_256) + return -EINVAL; + break; + case WLAN_CIPHER_SUITE_GCMP: + if (params->key_len != WLAN_KEY_LEN_GCMP) + return -EINVAL; + break; + case WLAN_CIPHER_SUITE_GCMP_256: + if (params->key_len != WLAN_KEY_LEN_GCMP_256) + return -EINVAL; + break; case WLAN_CIPHER_SUITE_WEP104: if (params->key_len != WLAN_KEY_LEN_WEP104) return -EINVAL; @@ -261,6 +287,18 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, if (params->key_len != WLAN_KEY_LEN_AES_CMAC) return -EINVAL; break; + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + if (params->key_len != WLAN_KEY_LEN_BIP_CMAC_256) + return -EINVAL; + break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + if (params->key_len != WLAN_KEY_LEN_BIP_GMAC_128) + return -EINVAL; + break; + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + if (params->key_len != WLAN_KEY_LEN_BIP_GMAC_256) + return -EINVAL; + break; default: /* * We don't know anything about this algorithm, @@ -280,7 +318,13 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, return -EINVAL; case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: if (params->seq_len != 6) return -EINVAL; break; @@ -714,8 +758,8 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, if (skb->priority >= 256 && skb->priority <= 263) return skb->priority - 256; - if (vlan_tx_tag_present(skb)) { - vlan_priority = (vlan_tx_tag_get(skb) & VLAN_PRIO_MASK) + if (skb_vlan_tag_present(skb)) { + vlan_priority = (skb_vlan_tag_get(skb) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; if (vlan_priority > 0) return vlan_priority; @@ -1079,10 +1123,24 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) if (WARN_ON_ONCE(rate->mcs > 9)) return 0; - idx = rate->flags & (RATE_INFO_FLAGS_160_MHZ_WIDTH | - RATE_INFO_FLAGS_80P80_MHZ_WIDTH) ? 3 : - rate->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH ? 2 : - rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH ? 1 : 0; + switch (rate->bw) { + case RATE_INFO_BW_160: + idx = 3; + break; + case RATE_INFO_BW_80: + idx = 2; + break; + case RATE_INFO_BW_40: + idx = 1; + break; + case RATE_INFO_BW_5: + case RATE_INFO_BW_10: + default: + WARN_ON(1); + /* fall through */ + case RATE_INFO_BW_20: + idx = 0; + } bitrate = base[idx][rate->mcs]; bitrate *= rate->nss; @@ -1113,8 +1171,7 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate) modulation = rate->mcs & 7; streams = (rate->mcs >> 3) + 1; - bitrate = (rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) ? - 13500000 : 6500000; + bitrate = (rate->bw == RATE_INFO_BW_40) ? 13500000 : 6500000; if (modulation < 4) bitrate *= (modulation + 1); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 0f47948c572f..5b24d39d7903 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1300,7 +1300,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev, if (err) return err; - if (!(sinfo.filled & STATION_INFO_TX_BITRATE)) + if (!(sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE))) return -EOPNOTSUPP; rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate); @@ -1340,7 +1340,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: - if (sinfo.filled & STATION_INFO_SIGNAL) { + if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) { int sig = sinfo.signal; wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED; wstats.qual.updated |= IW_QUAL_QUAL_UPDATED; @@ -1354,7 +1354,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) break; } case CFG80211_SIGNAL_TYPE_UNSPEC: - if (sinfo.filled & STATION_INFO_SIGNAL) { + if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) { wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED; wstats.qual.updated |= IW_QUAL_QUAL_UPDATED; wstats.qual.level = sinfo.signal; @@ -1367,9 +1367,9 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) } wstats.qual.updated |= IW_QUAL_NOISE_INVALID; - if (sinfo.filled & STATION_INFO_RX_DROP_MISC) + if (sinfo.filled & BIT(NL80211_STA_INFO_RX_DROP_MISC)) wstats.discard.misc = sinfo.rx_dropped_misc; - if (sinfo.filled & STATION_INFO_TX_FAILED) + if (sinfo.filled & BIT(NL80211_STA_INFO_TX_FAILED)) wstats.discard.retries = sinfo.tx_failed; return &wstats; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index debe733386f8..12e82a5e4ad5 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -561,11 +561,6 @@ static struct xfrm_algo_desc calg_list[] = { }, }; -static inline int aead_entries(void) -{ - return ARRAY_SIZE(aead_list); -} - static inline int aalg_entries(void) { return ARRAY_SIZE(aalg_list); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8128594ab379..7de2ed9ec46d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1019,7 +1019,8 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net, return err; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_set_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1121,7 +1122,8 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net, return err; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1842,7 +1844,8 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct if (err) goto out_cancel; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_cancel: nlmsg_cancel(skb, nlh); @@ -2282,7 +2285,8 @@ static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m, goto out_cancel; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_cancel: nlmsg_cancel(skb, nlh); @@ -2490,7 +2494,8 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct if (err) return err; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c) @@ -2712,7 +2717,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, return err; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, @@ -2827,7 +2833,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, } upe->hard = !!hard; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) @@ -2986,7 +2993,8 @@ static int build_report(struct sk_buff *skb, u8 proto, return err; } } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_send_report(struct net *net, u8 proto, @@ -3031,7 +3039,8 @@ static int build_mapping(struct sk_buff *skb, struct xfrm_state *x, um->old_sport = x->encap->encap_sport; um->reqid = x->props.reqid; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, |