diff options
Diffstat (limited to 'net')
110 files changed, 3871 insertions, 1037 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index ee070722a3a3..30ee4bc0f7cc 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -294,7 +294,7 @@ static void vlan_transfer_features(struct net_device *dev, else vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#if IS_ENABLED(CONFIG_FCOE) vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; #endif diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 402442402af7..4a6d31a082b9 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -409,7 +409,7 @@ static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa) return err; } -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#if IS_ENABLED(CONFIG_FCOE) static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid, struct scatterlist *sgl, unsigned int sgc) { @@ -531,6 +531,10 @@ static const struct header_ops vlan_header_ops = { .parse = eth_header_parse, }; +static struct device_type vlan_type = { + .name = "vlan", +}; + static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) @@ -564,7 +568,7 @@ static int vlan_dev_init(struct net_device *dev) if (is_zero_ether_addr(dev->broadcast)) memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#if IS_ENABLED(CONFIG_FCOE) dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid; #endif @@ -579,6 +583,8 @@ static int vlan_dev_init(struct net_device *dev) dev->netdev_ops = &vlan_netdev_ops; + SET_NETDEV_DEVTYPE(dev, &vlan_type); + if (is_vlan_dev(real_dev)) subclass = 1; @@ -741,7 +747,7 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, .ndo_get_stats64 = vlan_dev_get_stats64, -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#if IS_ENABLED(CONFIG_FCOE) .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, .ndo_fcoe_enable = vlan_dev_fcoe_enable, diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 53f5244e28f8..250e0b58109c 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -25,6 +25,16 @@ config BATMAN_ADV_BLA more than one mesh node in the same LAN, you can safely remove this feature and save some space. +config BATMAN_ADV_DAT + bool "Distributed ARP Table" + depends on BATMAN_ADV && INET + default n + help + This option enables DAT (Distributed ARP Table), a DHT based + mechanism that increases ARP reliability on sparse wireless + mesh networks. If you think that your network does not need + this option you can safely remove it and save some space. + config BATMAN_ADV_DEBUG bool "B.A.T.M.A.N. debugging" depends on BATMAN_ADV diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 8676d2b1d574..e45e3b4e32e3 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -23,6 +23,7 @@ batman-adv-y += bat_iv_ogm.o batman-adv-y += bitarray.o batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o batman-adv-y += debugfs.o +batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o batman-adv-y += gateway_client.o batman-adv-y += gateway_common.o batman-adv-y += hard-interface.o diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index b02b75dae3a8..9f3925a85aab 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -57,20 +57,22 @@ out: static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; + unsigned char *ogm_buff; uint32_t random_seqno; int res = -ENOMEM; /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); - atomic_set(&hard_iface->seqno, random_seqno); + atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno); - hard_iface->packet_len = BATADV_OGM_HLEN; - hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC); - - if (!hard_iface->packet_buff) + hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN; + ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC); + if (!ogm_buff) goto out; - batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff; + hard_iface->bat_iv.ogm_buff = ogm_buff; + + batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; batadv_ogm_packet->header.packet_type = BATADV_IV_OGM; batadv_ogm_packet->header.version = BATADV_COMPAT_VERSION; batadv_ogm_packet->header.ttl = 2; @@ -87,15 +89,16 @@ out: static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface) { - kfree(hard_iface->packet_buff); - hard_iface->packet_buff = NULL; + kfree(hard_iface->bat_iv.ogm_buff); + hard_iface->bat_iv.ogm_buff = NULL; } static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; + unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff; + batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; memcpy(batadv_ogm_packet->orig, hard_iface->net_dev->dev_addr, ETH_ALEN); memcpy(batadv_ogm_packet->prev_sender, @@ -106,8 +109,9 @@ static void batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; + unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff; + batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; batadv_ogm_packet->flags = BATADV_PRIMARIES_FIRST_HOP; batadv_ogm_packet->header.ttl = BATADV_TTL; } @@ -407,9 +411,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, if ((atomic_read(&bat_priv->aggregated_ogms)) && (packet_len < BATADV_MAX_AGGREGATION_BYTES)) - skb_size = BATADV_MAX_AGGREGATION_BYTES + ETH_HLEN; + skb_size = BATADV_MAX_AGGREGATION_BYTES; else - skb_size = packet_len + ETH_HLEN; + skb_size = packet_len; + + skb_size += ETH_HLEN + NET_IP_ALIGN; forw_packet_aggr->skb = dev_alloc_skb(skb_size); if (!forw_packet_aggr->skb) { @@ -418,7 +424,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, kfree(forw_packet_aggr); goto out; } - skb_reserve(forw_packet_aggr->skb, ETH_HLEN); + skb_reserve(forw_packet_aggr->skb, ETH_HLEN + NET_IP_ALIGN); INIT_HLIST_NODE(&forw_packet_aggr->list); @@ -590,8 +596,10 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff; struct batadv_ogm_packet *batadv_ogm_packet; struct batadv_hard_iface *primary_if; + int *ogm_buff_len = &hard_iface->bat_iv.ogm_buff_len; int vis_server, tt_num_changes = 0; uint32_t seqno; uint8_t bandwidth; @@ -600,17 +608,16 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) primary_if = batadv_primary_if_get_selected(bat_priv); if (hard_iface == primary_if) - tt_num_changes = batadv_tt_append_diff(bat_priv, - &hard_iface->packet_buff, - &hard_iface->packet_len, + tt_num_changes = batadv_tt_append_diff(bat_priv, ogm_buff, + ogm_buff_len, BATADV_OGM_HLEN); - batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff; + batadv_ogm_packet = (struct batadv_ogm_packet *)(*ogm_buff); /* change sequence number to network order */ - seqno = (uint32_t)atomic_read(&hard_iface->seqno); + seqno = (uint32_t)atomic_read(&hard_iface->bat_iv.ogm_seqno); batadv_ogm_packet->seqno = htonl(seqno); - atomic_inc(&hard_iface->seqno); + atomic_inc(&hard_iface->bat_iv.ogm_seqno); batadv_ogm_packet->ttvn = atomic_read(&bat_priv->tt.vn); batadv_ogm_packet->tt_crc = htons(bat_priv->tt.local_crc); @@ -631,8 +638,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) } batadv_slide_own_bcast_window(hard_iface); - batadv_iv_ogm_queue_add(bat_priv, hard_iface->packet_buff, - hard_iface->packet_len, hard_iface, 1, + batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff, + hard_iface->bat_iv.ogm_buff_len, hard_iface, 1, batadv_iv_ogm_emit_send_time(bat_priv)); if (primary_if) @@ -1015,7 +1022,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, return; /* could be changed by schedule_own_packet() */ - if_incoming_seqno = atomic_read(&if_incoming->seqno); + if_incoming_seqno = atomic_read(&if_incoming->bat_iv.ogm_seqno); if (batadv_ogm_packet->flags & BATADV_DIRECTLINK) has_directlink_flag = 1; diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index aea174cdbfbd..5453b17d8df2 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -79,20 +79,17 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, * or the old packet got delayed somewhere in the network. The * packet should be dropped without calling this function if the * seqno window is protected. + * + * seq_num_diff <= -BATADV_TQ_LOCAL_WINDOW_SIZE + * or + * seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE */ - if (seq_num_diff <= -BATADV_TQ_LOCAL_WINDOW_SIZE || - seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE) { + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Other host probably restarted!\n"); - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Other host probably restarted!\n"); - - bitmap_zero(seq_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); - if (set_mark) - batadv_set_bit(seq_bits, 0); - - return 1; - } + bitmap_zero(seq_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); + if (set_mark) + batadv_set_bit(seq_bits, 0); - /* never reached */ - return 0; + return 1; } diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index fd8d5afec0dd..29a5542aac75 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1585,23 +1585,11 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) struct hlist_head *head; uint32_t i; bool is_own; - int ret = 0; uint8_t *primary_addr; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - please specify interfaces to enable it\n", - net_dev->name); - goto out; - } - - if (primary_if->if_status != BATADV_IF_ACTIVE) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - primary interface not active\n", - net_dev->name); + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) goto out; - } primary_addr = primary_if->net_dev->dev_addr; seq_printf(seq, @@ -1628,7 +1616,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) out: if (primary_if) batadv_hardif_free_ref(primary_if); - return ret; + return 0; } int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) @@ -1643,23 +1631,11 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) int secs, msecs; uint32_t i; bool is_own; - int ret = 0; uint8_t *primary_addr; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - please specify interfaces to enable it\n", - net_dev->name); - goto out; - } - - if (primary_if->if_status != BATADV_IF_ACTIVE) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - primary interface not active\n", - net_dev->name); + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) goto out; - } primary_addr = primary_if->net_dev->dev_addr; seq_printf(seq, @@ -1693,5 +1669,5 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) out: if (primary_if) batadv_hardif_free_ref(primary_if); - return ret; + return 0; } diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 391d4fb2026f..3f679cb2d0e2 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -31,6 +31,7 @@ #include "vis.h" #include "icmp_socket.h" #include "bridge_loop_avoidance.h" +#include "distributed-arp-table.h" static struct dentry *batadv_debugfs; @@ -99,15 +100,17 @@ int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) static int batadv_log_open(struct inode *inode, struct file *file) { + if (!try_module_get(THIS_MODULE)) + return -EBUSY; + nonseekable_open(inode, file); file->private_data = inode->i_private; - batadv_inc_module_count(); return 0; } static int batadv_log_release(struct inode *inode, struct file *file) { - batadv_dec_module_count(); + module_put(THIS_MODULE); return 0; } @@ -278,6 +281,19 @@ static int batadv_bla_backbone_table_open(struct inode *inode, #endif +#ifdef CONFIG_BATMAN_ADV_DAT +/** + * batadv_dat_cache_open - Prepare file handler for reads from dat_chache + * @inode: inode which was opened + * @file: file handle to be initialized + */ +static int batadv_dat_cache_open(struct inode *inode, struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_dat_cache_seq_print_text, net_dev); +} +#endif + static int batadv_transtable_local_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; @@ -317,6 +333,9 @@ static BATADV_DEBUGINFO(bla_claim_table, S_IRUGO, batadv_bla_claim_table_open); static BATADV_DEBUGINFO(bla_backbone_table, S_IRUGO, batadv_bla_backbone_table_open); #endif +#ifdef CONFIG_BATMAN_ADV_DAT +static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open); +#endif static BATADV_DEBUGINFO(transtable_local, S_IRUGO, batadv_transtable_local_open); static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open); @@ -329,6 +348,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { &batadv_debuginfo_bla_claim_table, &batadv_debuginfo_bla_backbone_table, #endif +#ifdef CONFIG_BATMAN_ADV_DAT + &batadv_debuginfo_dat_cache, +#endif &batadv_debuginfo_transtable_local, &batadv_debuginfo_vis_data, NULL, diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c new file mode 100644 index 000000000000..8e1d89d2b1c1 --- /dev/null +++ b/net/batman-adv/distributed-arp-table.c @@ -0,0 +1,1066 @@ +/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors: + * + * Antonio Quartulli + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include <linux/if_ether.h> +#include <linux/if_arp.h> +#include <net/arp.h> + +#include "main.h" +#include "hash.h" +#include "distributed-arp-table.h" +#include "hard-interface.h" +#include "originator.h" +#include "send.h" +#include "types.h" +#include "translation-table.h" +#include "unicast.h" + +static void batadv_dat_purge(struct work_struct *work); + +/** + * batadv_dat_start_timer - initialise the DAT periodic worker + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_dat_start_timer(struct batadv_priv *bat_priv) +{ + INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge); + queue_delayed_work(batadv_event_workqueue, &bat_priv->dat.work, + msecs_to_jiffies(10000)); +} + +/** + * batadv_dat_entry_free_ref - decrements the dat_entry refcounter and possibly + * free it + * @dat_entry: the oentry to free + */ +static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) +{ + if (atomic_dec_and_test(&dat_entry->refcount)) + kfree_rcu(dat_entry, rcu); +} + +/** + * batadv_dat_to_purge - checks whether a dat_entry has to be purged or not + * @dat_entry: the entry to check + * + * Returns true if the entry has to be purged now, false otherwise + */ +static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) +{ + return batadv_has_timed_out(dat_entry->last_update, + BATADV_DAT_ENTRY_TIMEOUT); +} + +/** + * __batadv_dat_purge - delete entries from the DAT local storage + * @bat_priv: the bat priv with all the soft interface information + * @to_purge: function in charge to decide whether an entry has to be purged or + * not. This function takes the dat_entry as argument and has to + * returns a boolean value: true is the entry has to be deleted, + * false otherwise + * + * Loops over each entry in the DAT local storage and delete it if and only if + * the to_purge function passed as argument returns true + */ +static void __batadv_dat_purge(struct batadv_priv *bat_priv, + bool (*to_purge)(struct batadv_dat_entry *)) +{ + spinlock_t *list_lock; /* protects write access to the hash lists */ + struct batadv_dat_entry *dat_entry; + struct hlist_node *node, *node_tmp; + struct hlist_head *head; + uint32_t i; + + if (!bat_priv->dat.hash) + return; + + for (i = 0; i < bat_priv->dat.hash->size; i++) { + head = &bat_priv->dat.hash->table[i]; + list_lock = &bat_priv->dat.hash->list_locks[i]; + + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(dat_entry, node, node_tmp, head, + hash_entry) { + /* if an helper function has been passed as parameter, + * ask it if the entry has to be purged or not + */ + if (to_purge && !to_purge(dat_entry)) + continue; + + hlist_del_rcu(node); + batadv_dat_entry_free_ref(dat_entry); + } + spin_unlock_bh(list_lock); + } +} + +/** + * batadv_dat_purge - periodic task that deletes old entries from the local DAT + * hash table + * @work: kernel work struct + */ +static void batadv_dat_purge(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct batadv_priv_dat *priv_dat; + struct batadv_priv *bat_priv; + + delayed_work = container_of(work, struct delayed_work, work); + priv_dat = container_of(delayed_work, struct batadv_priv_dat, work); + bat_priv = container_of(priv_dat, struct batadv_priv, dat); + + __batadv_dat_purge(bat_priv, batadv_dat_to_purge); + batadv_dat_start_timer(bat_priv); +} + +/** + * batadv_compare_dat - comparing function used in the local DAT hash table + * @node: node in the local table + * @data2: second object to compare the node to + * + * Returns 1 if the two entry are the same, 0 otherwise + */ +static int batadv_compare_dat(const struct hlist_node *node, const void *data2) +{ + const void *data1 = container_of(node, struct batadv_dat_entry, + hash_entry); + + return (memcmp(data1, data2, sizeof(__be32)) == 0 ? 1 : 0); +} + +/** + * batadv_arp_hw_src - extract the hw_src field from an ARP packet + * @skb: ARP packet + * @hdr_size: size of the possible header before the ARP packet + * + * Returns the value of the hw_src field in the ARP packet + */ +static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) +{ + uint8_t *addr; + + addr = (uint8_t *)(skb->data + hdr_size); + addr += ETH_HLEN + sizeof(struct arphdr); + + return addr; +} + +/** + * batadv_arp_ip_src - extract the ip_src field from an ARP packet + * @skb: ARP packet + * @hdr_size: size of the possible header before the ARP packet + * + * Returns the value of the ip_src field in the ARP packet + */ +static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) +{ + return *(__be32 *)(batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN); +} + +/** + * batadv_arp_hw_dst - extract the hw_dst field from an ARP packet + * @skb: ARP packet + * @hdr_size: size of the possible header before the ARP packet + * + * Returns the value of the hw_dst field in the ARP packet + */ +static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) +{ + return batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN + 4; +} + +/** + * batadv_arp_ip_dst - extract the ip_dst field from an ARP packet + * @skb: ARP packet + * @hdr_size: size of the possible header before the ARP packet + * + * Returns the value of the ip_dst field in the ARP packet + */ +static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) +{ + return *(__be32 *)(batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN * 2 + 4); +} + +/** + * batadv_hash_dat - compute the hash value for an IP address + * @data: data to hash + * @size: size of the hash table + * + * Returns the selected index in the hash table for the given data + */ +static uint32_t batadv_hash_dat(const void *data, uint32_t size) +{ + const unsigned char *key = data; + uint32_t hash = 0; + size_t i; + + for (i = 0; i < 4; i++) { + hash += key[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + return hash % size; +} + +/** + * batadv_dat_entry_hash_find - looks for a given dat_entry in the local hash + * table + * @bat_priv: the bat priv with all the soft interface information + * @ip: search key + * + * Returns the dat_entry if found, NULL otherwise + */ +static struct batadv_dat_entry * +batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) +{ + struct hlist_head *head; + struct hlist_node *node; + struct batadv_dat_entry *dat_entry, *dat_entry_tmp = NULL; + struct batadv_hashtable *hash = bat_priv->dat.hash; + uint32_t index; + + if (!hash) + return NULL; + + index = batadv_hash_dat(&ip, hash->size); + head = &hash->table[index]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) { + if (dat_entry->ip != ip) + continue; + + if (!atomic_inc_not_zero(&dat_entry->refcount)) + continue; + + dat_entry_tmp = dat_entry; + break; + } + rcu_read_unlock(); + + return dat_entry_tmp; +} + +/** + * batadv_dat_entry_add - add a new dat entry or update it if already exists + * @bat_priv: the bat priv with all the soft interface information + * @ip: ipv4 to add/edit + * @mac_addr: mac address to assign to the given ipv4 + */ +static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, + uint8_t *mac_addr) +{ + struct batadv_dat_entry *dat_entry; + int hash_added; + + dat_entry = batadv_dat_entry_hash_find(bat_priv, ip); + /* if this entry is already known, just update it */ + if (dat_entry) { + if (!batadv_compare_eth(dat_entry->mac_addr, mac_addr)) + memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN); + dat_entry->last_update = jiffies; + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "Entry updated: %pI4 %pM\n", &dat_entry->ip, + dat_entry->mac_addr); + goto out; + } + + dat_entry = kmalloc(sizeof(*dat_entry), GFP_ATOMIC); + if (!dat_entry) + goto out; + + dat_entry->ip = ip; + memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN); + dat_entry->last_update = jiffies; + atomic_set(&dat_entry->refcount, 2); + + hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat, + batadv_hash_dat, &dat_entry->ip, + &dat_entry->hash_entry); + + if (unlikely(hash_added != 0)) { + /* remove the reference for the hash */ + batadv_dat_entry_free_ref(dat_entry); + goto out; + } + + batadv_dbg(BATADV_DBG_DAT, bat_priv, "New entry added: %pI4 %pM\n", + &dat_entry->ip, dat_entry->mac_addr); + +out: + if (dat_entry) + batadv_dat_entry_free_ref(dat_entry); +} + +#ifdef CONFIG_BATMAN_ADV_DEBUG + +/** + * batadv_dbg_arp - print a debug message containing all the ARP packet details + * @bat_priv: the bat priv with all the soft interface information + * @skb: ARP packet + * @type: ARP type + * @hdr_size: size of the possible header before the ARP packet + * @msg: message to print together with the debugging information + */ +static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, + uint16_t type, int hdr_size, char *msg) +{ + struct batadv_unicast_4addr_packet *unicast_4addr_packet; + struct batadv_bcast_packet *bcast_pkt; + uint8_t *orig_addr; + __be32 ip_src, ip_dst; + + if (msg) + batadv_dbg(BATADV_DBG_DAT, bat_priv, "%s\n", msg); + + ip_src = batadv_arp_ip_src(skb, hdr_size); + ip_dst = batadv_arp_ip_dst(skb, hdr_size); + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "ARP MSG = [src: %pM-%pI4 dst: %pM-%pI4]\n", + batadv_arp_hw_src(skb, hdr_size), &ip_src, + batadv_arp_hw_dst(skb, hdr_size), &ip_dst); + + if (hdr_size == 0) + return; + + /* if the ARP packet is encapsulated in a batman packet, let's print + * some debug messages + */ + unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; + + switch (unicast_4addr_packet->u.header.packet_type) { + case BATADV_UNICAST: + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "* encapsulated within a UNICAST packet\n"); + break; + case BATADV_UNICAST_4ADDR: + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "* encapsulated within a UNICAST_4ADDR packet (src: %pM)\n", + unicast_4addr_packet->src); + switch (unicast_4addr_packet->subtype) { + case BATADV_P_DAT_DHT_PUT: + batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: DAT_DHT_PUT\n"); + break; + case BATADV_P_DAT_DHT_GET: + batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: DAT_DHT_GET\n"); + break; + case BATADV_P_DAT_CACHE_REPLY: + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "* type: DAT_CACHE_REPLY\n"); + break; + case BATADV_P_DATA: + batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: DATA\n"); + break; + default: + batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: Unknown (%u)!\n", + unicast_4addr_packet->u.header.packet_type); + } + break; + case BATADV_BCAST: + bcast_pkt = (struct batadv_bcast_packet *)unicast_4addr_packet; + orig_addr = bcast_pkt->orig; + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "* encapsulated within a BCAST packet (src: %pM)\n", + orig_addr); + break; + default: + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "* encapsulated within an unknown packet type (0x%x)\n", + unicast_4addr_packet->u.header.packet_type); + } +} + +#else + +static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, + uint16_t type, int hdr_size, char *msg) +{ +} + +#endif /* CONFIG_BATMAN_ADV_DEBUG */ + +/** + * batadv_is_orig_node_eligible - check whether a node can be a DHT candidate + * @res: the array with the already selected candidates + * @select: number of already selected candidates + * @tmp_max: address of the currently evaluated node + * @max: current round max address + * @last_max: address of the last selected candidate + * @candidate: orig_node under evaluation + * @max_orig_node: last selected candidate + * + * Returns true if the node has been elected as next candidate or false othrwise + */ +static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, + int select, batadv_dat_addr_t tmp_max, + batadv_dat_addr_t max, + batadv_dat_addr_t last_max, + struct batadv_orig_node *candidate, + struct batadv_orig_node *max_orig_node) +{ + bool ret = false; + int j; + + /* Check if this node has already been selected... */ + for (j = 0; j < select; j++) + if (res[j].orig_node == candidate) + break; + /* ..and possibly skip it */ + if (j < select) + goto out; + /* sanity check: has it already been selected? This should not happen */ + if (tmp_max > last_max) + goto out; + /* check if during this iteration an originator with a closer dht + * address has already been found + */ + if (tmp_max < max) + goto out; + /* this is an hash collision with the temporary selected node. Choose + * the one with the lowest address + */ + if ((tmp_max == max) && + (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)) + goto out; + + ret = true; +out: + return ret; +} + +/** + * batadv_choose_next_candidate - select the next DHT candidate + * @bat_priv: the bat priv with all the soft interface information + * @cands: candidates array + * @select: number of candidates already present in the array + * @ip_key: key to look up in the DHT + * @last_max: pointer where the address of the selected candidate will be saved + */ +static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, + struct batadv_dat_candidate *cands, + int select, batadv_dat_addr_t ip_key, + batadv_dat_addr_t *last_max) +{ + batadv_dat_addr_t max = 0, tmp_max = 0; + struct batadv_orig_node *orig_node, *max_orig_node = NULL; + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_node *node; + struct hlist_head *head; + int i; + + /* if no node is eligible as candidate, leave the candidate type as + * NOT_FOUND + */ + cands[select].type = BATADV_DAT_CANDIDATE_NOT_FOUND; + + /* iterate over the originator list and find the node with closest + * dat_address which has not been selected yet + */ + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + /* the dht space is a ring and addresses are unsigned */ + tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr + + ip_key; + + if (!batadv_is_orig_node_eligible(cands, select, + tmp_max, max, + *last_max, orig_node, + max_orig_node)) + continue; + + if (!atomic_inc_not_zero(&orig_node->refcount)) + continue; + + max = tmp_max; + if (max_orig_node) + batadv_orig_node_free_ref(max_orig_node); + max_orig_node = orig_node; + } + rcu_read_unlock(); + } + if (max_orig_node) { + cands[select].type = BATADV_DAT_CANDIDATE_ORIG; + cands[select].orig_node = max_orig_node; + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "dat_select_candidates() %d: selected %pM addr=%u dist=%u\n", + select, max_orig_node->orig, max_orig_node->dat_addr, + max); + } + *last_max = max; +} + +/** + * batadv_dat_select_candidates - selects the nodes which the DHT message has to + * be sent to + * @bat_priv: the bat priv with all the soft interface information + * @ip_dst: ipv4 to look up in the DHT + * + * An originator O is selected if and only if its DHT_ID value is one of three + * closest values (from the LEFT, with wrap around if needed) then the hash + * value of the key. ip_dst is the key. + * + * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM + */ +static struct batadv_dat_candidate * +batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) +{ + int select; + batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key; + struct batadv_dat_candidate *res; + + if (!bat_priv->orig_hash) + return NULL; + + res = kmalloc(BATADV_DAT_CANDIDATES_NUM * sizeof(*res), GFP_ATOMIC); + if (!res) + return NULL; + + ip_key = (batadv_dat_addr_t)batadv_hash_dat(&ip_dst, + BATADV_DAT_ADDR_MAX); + + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "dat_select_candidates(): IP=%pI4 hash(IP)=%u\n", &ip_dst, + ip_key); + + for (select = 0; select < BATADV_DAT_CANDIDATES_NUM; select++) + batadv_choose_next_candidate(bat_priv, res, select, ip_key, + &last_max); + + return res; +} + +/** + * batadv_dat_send_data - send a payload to the selected candidates + * @bat_priv: the bat priv with all the soft interface information + * @skb: payload to send + * @ip: the DHT key + * @packet_subtype: unicast4addr packet subtype to use + * + * In this function the skb is copied by means of pskb_copy() and is sent as + * unicast packet to each of the selected candidates + * + * Returns true if the packet is sent to at least one candidate, false otherwise + */ +static bool batadv_dat_send_data(struct batadv_priv *bat_priv, + struct sk_buff *skb, __be32 ip, + int packet_subtype) +{ + int i; + bool ret = false; + int send_status; + struct batadv_neigh_node *neigh_node = NULL; + struct sk_buff *tmp_skb; + struct batadv_dat_candidate *cand; + + cand = batadv_dat_select_candidates(bat_priv, ip); + if (!cand) + goto out; + + batadv_dbg(BATADV_DBG_DAT, bat_priv, "DHT_SEND for %pI4\n", &ip); + + for (i = 0; i < BATADV_DAT_CANDIDATES_NUM; i++) { + if (cand[i].type == BATADV_DAT_CANDIDATE_NOT_FOUND) + continue; + + neigh_node = batadv_orig_node_get_router(cand[i].orig_node); + if (!neigh_node) + goto free_orig; + + tmp_skb = pskb_copy(skb, GFP_ATOMIC); + if (!batadv_unicast_4addr_prepare_skb(bat_priv, tmp_skb, + cand[i].orig_node, + packet_subtype)) { + kfree_skb(tmp_skb); + goto free_neigh; + } + + send_status = batadv_send_skb_packet(tmp_skb, + neigh_node->if_incoming, + neigh_node->addr); + if (send_status == NET_XMIT_SUCCESS) { + /* count the sent packet */ + switch (packet_subtype) { + case BATADV_P_DAT_DHT_GET: + batadv_inc_counter(bat_priv, + BATADV_CNT_DAT_GET_TX); + break; + case BATADV_P_DAT_DHT_PUT: + batadv_inc_counter(bat_priv, + BATADV_CNT_DAT_PUT_TX); + break; + } + + /* packet sent to a candidate: return true */ + ret = true; + } +free_neigh: + batadv_neigh_node_free_ref(neigh_node); +free_orig: + batadv_orig_node_free_ref(cand[i].orig_node); + } + +out: + kfree(cand); + return ret; +} + +/** + * batadv_dat_hash_free - free the local DAT hash table + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_dat_hash_free(struct batadv_priv *bat_priv) +{ + if (!bat_priv->dat.hash) + return; + + __batadv_dat_purge(bat_priv, NULL); + + batadv_hash_destroy(bat_priv->dat.hash); + + bat_priv->dat.hash = NULL; +} + +/** + * batadv_dat_init - initialise the DAT internals + * @bat_priv: the bat priv with all the soft interface information + */ +int batadv_dat_init(struct batadv_priv *bat_priv) +{ + if (bat_priv->dat.hash) + return 0; + + bat_priv->dat.hash = batadv_hash_new(1024); + + if (!bat_priv->dat.hash) + return -ENOMEM; + + batadv_dat_start_timer(bat_priv); + + return 0; +} + +/** + * batadv_dat_free - free the DAT internals + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_dat_free(struct batadv_priv *bat_priv) +{ + cancel_delayed_work_sync(&bat_priv->dat.work); + + batadv_dat_hash_free(bat_priv); +} + +/** + * batadv_dat_cache_seq_print_text - print the local DAT hash table + * @seq: seq file to print on + * @offset: not used + */ +int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hashtable *hash = bat_priv->dat.hash; + struct batadv_dat_entry *dat_entry; + struct batadv_hard_iface *primary_if; + struct hlist_node *node; + struct hlist_head *head; + unsigned long last_seen_jiffies; + int last_seen_msecs, last_seen_secs, last_seen_mins; + uint32_t i; + + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) + goto out; + + seq_printf(seq, "Distributed ARP Table (%s):\n", net_dev->name); + seq_printf(seq, " %-7s %-13s %5s\n", "IPv4", "MAC", + "last-seen"); + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) { + last_seen_jiffies = jiffies - dat_entry->last_update; + last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); + last_seen_mins = last_seen_msecs / 60000; + last_seen_msecs = last_seen_msecs % 60000; + last_seen_secs = last_seen_msecs / 1000; + + seq_printf(seq, " * %15pI4 %14pM %6i:%02i\n", + &dat_entry->ip, dat_entry->mac_addr, + last_seen_mins, last_seen_secs); + } + rcu_read_unlock(); + } + +out: + if (primary_if) + batadv_hardif_free_ref(primary_if); + return 0; +} + +/** + * batadv_arp_get_type - parse an ARP packet and gets the type + * @bat_priv: the bat priv with all the soft interface information + * @skb: packet to analyse + * @hdr_size: size of the possible header before the ARP packet in the skb + * + * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise + */ +static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) +{ + struct arphdr *arphdr; + struct ethhdr *ethhdr; + __be32 ip_src, ip_dst; + uint16_t type = 0; + + /* pull the ethernet header */ + if (unlikely(!pskb_may_pull(skb, hdr_size + ETH_HLEN))) + goto out; + + ethhdr = (struct ethhdr *)(skb->data + hdr_size); + + if (ethhdr->h_proto != htons(ETH_P_ARP)) + goto out; + + /* pull the ARP payload */ + if (unlikely(!pskb_may_pull(skb, hdr_size + ETH_HLEN + + arp_hdr_len(skb->dev)))) + goto out; + + arphdr = (struct arphdr *)(skb->data + hdr_size + ETH_HLEN); + + /* Check whether the ARP packet carries a valid + * IP information + */ + if (arphdr->ar_hrd != htons(ARPHRD_ETHER)) + goto out; + + if (arphdr->ar_pro != htons(ETH_P_IP)) + goto out; + + if (arphdr->ar_hln != ETH_ALEN) + goto out; + + if (arphdr->ar_pln != 4) + goto out; + + /* Check for bad reply/request. If the ARP message is not sane, DAT + * will simply ignore it + */ + ip_src = batadv_arp_ip_src(skb, hdr_size); + ip_dst = batadv_arp_ip_dst(skb, hdr_size); + if (ipv4_is_loopback(ip_src) || ipv4_is_multicast(ip_src) || + ipv4_is_loopback(ip_dst) || ipv4_is_multicast(ip_dst)) + goto out; + + type = ntohs(arphdr->ar_op); +out: + return type; +} + +/** + * batadv_dat_snoop_outgoing_arp_request - snoop the ARP request and try to + * answer using DAT + * @bat_priv: the bat priv with all the soft interface information + * @skb: packet to check + * + * Returns true if the message has been sent to the dht candidates, false + * otherwise. In case of true the message has to be enqueued to permit the + * fallback + */ +bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + uint16_t type = 0; + __be32 ip_dst, ip_src; + uint8_t *hw_src; + bool ret = false; + struct batadv_dat_entry *dat_entry = NULL; + struct sk_buff *skb_new; + struct batadv_hard_iface *primary_if = NULL; + + if (!atomic_read(&bat_priv->distributed_arp_table)) + goto out; + + type = batadv_arp_get_type(bat_priv, skb, 0); + /* If the node gets an ARP_REQUEST it has to send a DHT_GET unicast + * message to the selected DHT candidates + */ + if (type != ARPOP_REQUEST) + goto out; + + batadv_dbg_arp(bat_priv, skb, type, 0, "Parsing outgoing ARP REQUEST"); + + ip_src = batadv_arp_ip_src(skb, 0); + hw_src = batadv_arp_hw_src(skb, 0); + ip_dst = batadv_arp_ip_dst(skb, 0); + + batadv_dat_entry_add(bat_priv, ip_src, hw_src); + + dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst); + if (dat_entry) { + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, + primary_if->soft_iface, ip_dst, hw_src, + dat_entry->mac_addr, hw_src); + if (!skb_new) + goto out; + + skb_reset_mac_header(skb_new); + skb_new->protocol = eth_type_trans(skb_new, + primary_if->soft_iface); + bat_priv->stats.rx_packets++; + bat_priv->stats.rx_bytes += skb->len + ETH_HLEN; + primary_if->soft_iface->last_rx = jiffies; + + netif_rx(skb_new); + batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); + ret = true; + } else { + /* Send the request on the DHT */ + ret = batadv_dat_send_data(bat_priv, skb, ip_dst, + BATADV_P_DAT_DHT_GET); + } +out: + if (dat_entry) + batadv_dat_entry_free_ref(dat_entry); + if (primary_if) + batadv_hardif_free_ref(primary_if); + return ret; +} + +/** + * batadv_dat_snoop_incoming_arp_request - snoop the ARP request and try to + * answer using the local DAT storage + * @bat_priv: the bat priv with all the soft interface information + * @skb: packet to check + * @hdr_size: size of the encapsulation header + * + * Returns true if the request has been answered, false otherwise + */ +bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) +{ + uint16_t type; + __be32 ip_src, ip_dst; + uint8_t *hw_src; + struct sk_buff *skb_new; + struct batadv_hard_iface *primary_if = NULL; + struct batadv_dat_entry *dat_entry = NULL; + bool ret = false; + int err; + + if (!atomic_read(&bat_priv->distributed_arp_table)) + goto out; + + type = batadv_arp_get_type(bat_priv, skb, hdr_size); + if (type != ARPOP_REQUEST) + goto out; + + hw_src = batadv_arp_hw_src(skb, hdr_size); + ip_src = batadv_arp_ip_src(skb, hdr_size); + ip_dst = batadv_arp_ip_dst(skb, hdr_size); + + batadv_dbg_arp(bat_priv, skb, type, hdr_size, + "Parsing incoming ARP REQUEST"); + + batadv_dat_entry_add(bat_priv, ip_src, hw_src); + + dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst); + if (!dat_entry) + goto out; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, + primary_if->soft_iface, ip_dst, hw_src, + dat_entry->mac_addr, hw_src); + + if (!skb_new) + goto out; + + /* to preserve backwards compatibility, here the node has to answer + * using the same packet type it received for the request. This is due + * to that if a node is not using the 4addr packet format it may not + * support it. + */ + if (hdr_size == sizeof(struct batadv_unicast_4addr_packet)) + err = batadv_unicast_4addr_send_skb(bat_priv, skb_new, + BATADV_P_DAT_CACHE_REPLY); + else + err = batadv_unicast_send_skb(bat_priv, skb_new); + + if (!err) { + batadv_inc_counter(bat_priv, BATADV_CNT_DAT_CACHED_REPLY_TX); + ret = true; + } +out: + if (dat_entry) + batadv_dat_entry_free_ref(dat_entry); + if (primary_if) + batadv_hardif_free_ref(primary_if); + if (ret) + kfree_skb(skb); + return ret; +} + +/** + * batadv_dat_snoop_outgoing_arp_reply - snoop the ARP reply and fill the DHT + * @bat_priv: the bat priv with all the soft interface information + * @skb: packet to check + */ +void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + uint16_t type; + __be32 ip_src, ip_dst; + uint8_t *hw_src, *hw_dst; + + if (!atomic_read(&bat_priv->distributed_arp_table)) + return; + + type = batadv_arp_get_type(bat_priv, skb, 0); + if (type != ARPOP_REPLY) + return; + + batadv_dbg_arp(bat_priv, skb, type, 0, "Parsing outgoing ARP REPLY"); + + hw_src = batadv_arp_hw_src(skb, 0); + ip_src = batadv_arp_ip_src(skb, 0); + hw_dst = batadv_arp_hw_dst(skb, 0); + ip_dst = batadv_arp_ip_dst(skb, 0); + + batadv_dat_entry_add(bat_priv, ip_src, hw_src); + batadv_dat_entry_add(bat_priv, ip_dst, hw_dst); + + /* Send the ARP reply to the candidates for both the IP addresses that + * the node got within the ARP reply + */ + batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); + batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); +} +/** + * batadv_dat_snoop_incoming_arp_reply - snoop the ARP reply and fill the local + * DAT storage only + * @bat_priv: the bat priv with all the soft interface information + * @skb: packet to check + * @hdr_size: siaze of the encapsulation header + */ +bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) +{ + uint16_t type; + __be32 ip_src, ip_dst; + uint8_t *hw_src, *hw_dst; + bool ret = false; + + if (!atomic_read(&bat_priv->distributed_arp_table)) + goto out; + + type = batadv_arp_get_type(bat_priv, skb, hdr_size); + if (type != ARPOP_REPLY) + goto out; + + batadv_dbg_arp(bat_priv, skb, type, hdr_size, + "Parsing incoming ARP REPLY"); + + hw_src = batadv_arp_hw_src(skb, hdr_size); + ip_src = batadv_arp_ip_src(skb, hdr_size); + hw_dst = batadv_arp_hw_dst(skb, hdr_size); + ip_dst = batadv_arp_ip_dst(skb, hdr_size); + + /* Update our internal cache with both the IP addresses the node got + * within the ARP reply + */ + batadv_dat_entry_add(bat_priv, ip_src, hw_src); + batadv_dat_entry_add(bat_priv, ip_dst, hw_dst); + + /* if this REPLY is directed to a client of mine, let's deliver the + * packet to the interface + */ + ret = !batadv_is_my_client(bat_priv, hw_dst); +out: + /* if ret == false -> packet has to be delivered to the interface */ + return ret; +} + +/** + * batadv_dat_drop_broadcast_packet - check if an ARP request has to be dropped + * (because the node has already got the reply via DAT) or not + * @bat_priv: the bat priv with all the soft interface information + * @forw_packet: the broadcast packet + * + * Returns true if the node can drop the packet, false otherwise + */ +bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, + struct batadv_forw_packet *forw_packet) +{ + uint16_t type; + __be32 ip_dst; + struct batadv_dat_entry *dat_entry = NULL; + bool ret = false; + const size_t bcast_len = sizeof(struct batadv_bcast_packet); + + if (!atomic_read(&bat_priv->distributed_arp_table)) + goto out; + + /* If this packet is an ARP_REQUEST and the node already has the + * information that it is going to ask, then the packet can be dropped + */ + if (forw_packet->num_packets) + goto out; + + type = batadv_arp_get_type(bat_priv, forw_packet->skb, bcast_len); + if (type != ARPOP_REQUEST) + goto out; + + ip_dst = batadv_arp_ip_dst(forw_packet->skb, bcast_len); + dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst); + /* check if the node already got this entry */ + if (!dat_entry) { + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "ARP Request for %pI4: fallback\n", &ip_dst); + goto out; + } + + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "ARP Request for %pI4: fallback prevented\n", &ip_dst); + ret = true; + +out: + if (dat_entry) + batadv_dat_entry_free_ref(dat_entry); + return ret; +} diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h new file mode 100644 index 000000000000..d060c033e7de --- /dev/null +++ b/net/batman-adv/distributed-arp-table.h @@ -0,0 +1,167 @@ +/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors: + * + * Antonio Quartulli + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef _NET_BATMAN_ADV_ARP_H_ +#define _NET_BATMAN_ADV_ARP_H_ + +#ifdef CONFIG_BATMAN_ADV_DAT + +#include "types.h" +#include "originator.h" + +#include <linux/if_arp.h> + +#define BATADV_DAT_ADDR_MAX ((batadv_dat_addr_t)~(batadv_dat_addr_t)0) + +bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, + struct sk_buff *skb); +bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size); +void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, + struct sk_buff *skb); +bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size); +bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, + struct batadv_forw_packet *forw_packet); + +/** + * batadv_dat_init_orig_node_addr - assign a DAT address to the orig_node + * @orig_node: the node to assign the DAT address to + */ +static inline void +batadv_dat_init_orig_node_addr(struct batadv_orig_node *orig_node) +{ + uint32_t addr; + + addr = batadv_choose_orig(orig_node->orig, BATADV_DAT_ADDR_MAX); + orig_node->dat_addr = (batadv_dat_addr_t)addr; +} + +/** + * batadv_dat_init_own_addr - assign a DAT address to the node itself + * @bat_priv: the bat priv with all the soft interface information + * @primary_if: a pointer to the primary interface + */ +static inline void +batadv_dat_init_own_addr(struct batadv_priv *bat_priv, + struct batadv_hard_iface *primary_if) +{ + uint32_t addr; + + addr = batadv_choose_orig(primary_if->net_dev->dev_addr, + BATADV_DAT_ADDR_MAX); + + bat_priv->dat.addr = (batadv_dat_addr_t)addr; +} + +int batadv_dat_init(struct batadv_priv *bat_priv); +void batadv_dat_free(struct batadv_priv *bat_priv); +int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset); + +/** + * batadv_dat_inc_counter - increment the correct DAT packet counter + * @bat_priv: the bat priv with all the soft interface information + * @subtype: the 4addr subtype of the packet to be counted + * + * Updates the ethtool statistics for the received packet if it is a DAT subtype + */ +static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv, + uint8_t subtype) +{ + switch (subtype) { + case BATADV_P_DAT_DHT_GET: + batadv_inc_counter(bat_priv, + BATADV_CNT_DAT_GET_RX); + break; + case BATADV_P_DAT_DHT_PUT: + batadv_inc_counter(bat_priv, + BATADV_CNT_DAT_PUT_RX); + break; + } +} + +#else + +static inline bool +batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return false; +} + +static inline bool +batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) +{ + return false; +} + +static inline bool +batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return false; +} + +static inline bool +batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) +{ + return false; +} + +static inline bool +batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, + struct batadv_forw_packet *forw_packet) +{ + return false; +} + +static inline void +batadv_dat_init_orig_node_addr(struct batadv_orig_node *orig_node) +{ +} + +static inline void batadv_dat_init_own_addr(struct batadv_priv *bat_priv, + struct batadv_hard_iface *iface) +{ +} + +static inline void batadv_arp_change_timeout(struct net_device *soft_iface, + const char *name) +{ +} + +static inline int batadv_dat_init(struct batadv_priv *bat_priv) +{ + return 0; +} + +static inline void batadv_dat_free(struct batadv_priv *bat_priv) +{ +} + +static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv, + uint8_t subtype) +{ +} + +#endif /* CONFIG_BATMAN_ADV_DAT */ + +#endif /* _NET_BATMAN_ADV_ARP_H_ */ diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 15d67abc10a4..dd07c7e3654f 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -477,22 +477,11 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) struct batadv_hard_iface *primary_if; struct batadv_gw_node *gw_node; struct hlist_node *node; - int gw_count = 0, ret = 0; + int gw_count = 0; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - please specify interfaces to enable it\n", - net_dev->name); + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) goto out; - } - - if (primary_if->if_status != BATADV_IF_ACTIVE) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - primary interface not active\n", - net_dev->name); - goto out; - } seq_printf(seq, " %-12s (%s/%i) %17s [%10s]: gw_class ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", @@ -519,7 +508,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) out: if (primary_if) batadv_hardif_free_ref(primary_if); - return ret; + return 0; } static bool batadv_is_type_dhcprequest(struct sk_buff *skb, int header_len) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index d112fd6750b0..6b7a5d3eeb77 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -18,6 +18,7 @@ */ #include "main.h" +#include "distributed-arp-table.h" #include "hard-interface.h" #include "soft-interface.h" #include "send.h" @@ -109,6 +110,8 @@ static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv, if (!primary_if) goto out; + batadv_dat_init_own_addr(bat_priv, primary_if); + skb = bat_priv->vis.my_info->skb_packet; vis_packet = (struct batadv_vis_packet *)skb->data; memcpy(vis_packet->vis_orig, primary_if->net_dev->dev_addr, ETH_ALEN); @@ -450,8 +453,8 @@ batadv_hardif_add_interface(struct net_device *net_dev) /* This can't be called via a bat_priv callback because * we have no bat_priv yet. */ - atomic_set(&hard_iface->seqno, 1); - hard_iface->packet_buff = NULL; + atomic_set(&hard_iface->bat_iv.ogm_seqno, 1); + hard_iface->bat_iv.ogm_buff = NULL; return hard_iface; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index bde3cf747507..87ca8095b011 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -42,12 +42,16 @@ static int batadv_socket_open(struct inode *inode, struct file *file) unsigned int i; struct batadv_socket_client *socket_client; + if (!try_module_get(THIS_MODULE)) + return -EBUSY; + nonseekable_open(inode, file); socket_client = kmalloc(sizeof(*socket_client), GFP_KERNEL); - - if (!socket_client) + if (!socket_client) { + module_put(THIS_MODULE); return -ENOMEM; + } for (i = 0; i < ARRAY_SIZE(batadv_socket_client_hash); i++) { if (!batadv_socket_client_hash[i]) { @@ -59,6 +63,7 @@ static int batadv_socket_open(struct inode *inode, struct file *file) if (i == ARRAY_SIZE(batadv_socket_client_hash)) { pr_err("Error - can't add another packet client: maximum number of clients reached\n"); kfree(socket_client); + module_put(THIS_MODULE); return -EXFULL; } @@ -71,7 +76,6 @@ static int batadv_socket_open(struct inode *inode, struct file *file) file->private_data = socket_client; - batadv_inc_module_count(); return 0; } @@ -96,7 +100,7 @@ static int batadv_socket_release(struct inode *inode, struct file *file) spin_unlock_bh(&socket_client->lock); kfree(socket_client); - batadv_dec_module_count(); + module_put(THIS_MODULE); return 0; } @@ -173,13 +177,13 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, if (len >= sizeof(struct batadv_icmp_packet_rr)) packet_len = sizeof(struct batadv_icmp_packet_rr); - skb = dev_alloc_skb(packet_len + ETH_HLEN); + skb = dev_alloc_skb(packet_len + ETH_HLEN + NET_IP_ALIGN); if (!skb) { len = -ENOMEM; goto out; } - skb_reserve(skb, ETH_HLEN); + skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len); if (copy_from_user(icmp_packet, buff, packet_len)) { diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index b4aa470bc4a6..dc33a0c484a4 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -29,6 +29,7 @@ #include "hard-interface.h" #include "gateway_client.h" #include "bridge_loop_avoidance.h" +#include "distributed-arp-table.h" #include "vis.h" #include "hash.h" #include "bat_algo.h" @@ -128,6 +129,10 @@ int batadv_mesh_init(struct net_device *soft_iface) if (ret < 0) goto err; + ret = batadv_dat_init(bat_priv); + if (ret < 0) + goto err; + atomic_set(&bat_priv->gw.reselect, 0); atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE); @@ -155,21 +160,13 @@ void batadv_mesh_free(struct net_device *soft_iface) batadv_bla_free(bat_priv); + batadv_dat_free(bat_priv); + free_percpu(bat_priv->bat_counters); atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); } -void batadv_inc_module_count(void) -{ - try_module_get(THIS_MODULE); -} - -void batadv_dec_module_count(void) -{ - module_put(THIS_MODULE); -} - int batadv_is_my_mac(const uint8_t *addr) { const struct batadv_hard_iface *hard_iface; @@ -188,6 +185,42 @@ int batadv_is_my_mac(const uint8_t *addr) return 0; } +/** + * batadv_seq_print_text_primary_if_get - called from debugfs table printing + * function that requires the primary interface + * @seq: debugfs table seq_file struct + * + * Returns primary interface if found or NULL otherwise. + */ +struct batadv_hard_iface * +batadv_seq_print_text_primary_if_get(struct seq_file *seq) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hard_iface *primary_if; + + primary_if = batadv_primary_if_get_selected(bat_priv); + + if (!primary_if) { + seq_printf(seq, + "BATMAN mesh %s disabled - please specify interfaces to enable it\n", + net_dev->name); + goto out; + } + + if (primary_if->if_status == BATADV_IF_ACTIVE) + goto out; + + seq_printf(seq, + "BATMAN mesh %s disabled - primary interface not active\n", + net_dev->name); + batadv_hardif_free_ref(primary_if); + primary_if = NULL; + +out: + return primary_if; +} + static int batadv_recv_unhandled_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { @@ -274,6 +307,8 @@ static void batadv_recv_handler_init(void) /* batman icmp packet */ batadv_rx_handler[BATADV_ICMP] = batadv_recv_icmp_packet; + /* unicast with 4 addresses packet */ + batadv_rx_handler[BATADV_UNICAST_4ADDR] = batadv_recv_unicast_packet; /* unicast packet */ batadv_rx_handler[BATADV_UNICAST] = batadv_recv_unicast_packet; /* fragmented unicast packet */ diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index d57b746219de..240c74ffeb93 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -44,6 +44,7 @@ #define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in milliseconds */ #define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in milliseconds */ #define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */ +#define BATADV_DAT_ENTRY_TIMEOUT (5*60000) /* 5 mins in milliseconds */ /* sliding packet range of received originator messages in sequence numbers * (should be a multiple of our word size) */ @@ -73,6 +74,11 @@ #define BATADV_LOG_BUF_LEN 8192 /* has to be a power of 2 */ +/* msecs after which an ARP_REQUEST is sent in broadcast as fallback */ +#define ARP_REQ_DELAY 250 +/* numbers of originator to contact for any PUT/GET DHT operation */ +#define BATADV_DAT_CANDIDATES_NUM 3 + #define BATADV_VIS_INTERVAL 5000 /* 5 seconds */ /* how much worse secondary interfaces may be to be considered as bonding @@ -117,6 +123,9 @@ enum batadv_uev_type { #define BATADV_GW_THRESHOLD 50 +#define BATADV_DAT_CANDIDATE_NOT_FOUND 0 +#define BATADV_DAT_CANDIDATE_ORIG 1 + /* Debug Messages */ #ifdef pr_fmt #undef pr_fmt @@ -150,9 +159,9 @@ extern struct workqueue_struct *batadv_event_workqueue; int batadv_mesh_init(struct net_device *soft_iface); void batadv_mesh_free(struct net_device *soft_iface); -void batadv_inc_module_count(void); -void batadv_dec_module_count(void); int batadv_is_my_mac(const uint8_t *addr); +struct batadv_hard_iface * +batadv_seq_print_text_primary_if_get(struct seq_file *seq); int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev); @@ -165,13 +174,22 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops); int batadv_algo_select(struct batadv_priv *bat_priv, char *name); int batadv_algo_seq_print_text(struct seq_file *seq, void *offset); -/* all messages related to routing / flooding / broadcasting / etc */ +/** + * enum batadv_dbg_level - available log levels + * @BATADV_DBG_BATMAN: OGM and TQ computations related messages + * @BATADV_DBG_ROUTES: route added / changed / deleted + * @BATADV_DBG_TT: translation table messages + * @BATADV_DBG_BLA: bridge loop avoidance messages + * @BATADV_DBG_DAT: ARP snooping and DAT related messages + * @BATADV_DBG_ALL: the union of all the above log levels + */ enum batadv_dbg_level { BATADV_DBG_BATMAN = BIT(0), - BATADV_DBG_ROUTES = BIT(1), /* route added / changed / deleted */ - BATADV_DBG_TT = BIT(2), /* translation table operations */ - BATADV_DBG_BLA = BIT(3), /* bridge loop avoidance */ - BATADV_DBG_ALL = 15, + BATADV_DBG_ROUTES = BIT(1), + BATADV_DBG_TT = BIT(2), + BATADV_DBG_BLA = BIT(3), + BATADV_DBG_DAT = BIT(4), + BATADV_DBG_ALL = 31, }; #ifdef CONFIG_BATMAN_ADV_DEBUG diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index ac9bdf8f80a6..84930a4f5369 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -18,6 +18,7 @@ */ #include "main.h" +#include "distributed-arp-table.h" #include "originator.h" #include "hash.h" #include "translation-table.h" @@ -223,6 +224,7 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, orig_node->tt_poss_change = false; orig_node->bat_priv = bat_priv; memcpy(orig_node->orig, addr, ETH_ALEN); + batadv_dat_init_orig_node_addr(orig_node); orig_node->router = NULL; orig_node->tt_crc = 0; atomic_set(&orig_node->last_ttvn, 0); @@ -415,23 +417,10 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) int last_seen_msecs; unsigned long last_seen_jiffies; uint32_t i; - int ret = 0; - primary_if = batadv_primary_if_get_selected(bat_priv); - - if (!primary_if) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - please specify interfaces to enable it\n", - net_dev->name); - goto out; - } - - if (primary_if->if_status != BATADV_IF_ACTIVE) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - primary interface not active\n", - net_dev->name); + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) goto out; - } seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", BATADV_SOURCE_VERSION, primary_if->net_dev->name, @@ -485,7 +474,7 @@ next: out: if (primary_if) batadv_hardif_free_ref(primary_if); - return ret; + return 0; } static int batadv_orig_node_add_if(struct batadv_orig_node *orig_node, diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 2d23a14c220e..df548ed196d3 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -23,14 +23,29 @@ #define BATADV_ETH_P_BATMAN 0x4305 /* unofficial/not registered Ethertype */ enum batadv_packettype { - BATADV_IV_OGM = 0x01, - BATADV_ICMP = 0x02, - BATADV_UNICAST = 0x03, - BATADV_BCAST = 0x04, - BATADV_VIS = 0x05, - BATADV_UNICAST_FRAG = 0x06, - BATADV_TT_QUERY = 0x07, - BATADV_ROAM_ADV = 0x08, + BATADV_IV_OGM = 0x01, + BATADV_ICMP = 0x02, + BATADV_UNICAST = 0x03, + BATADV_BCAST = 0x04, + BATADV_VIS = 0x05, + BATADV_UNICAST_FRAG = 0x06, + BATADV_TT_QUERY = 0x07, + BATADV_ROAM_ADV = 0x08, + BATADV_UNICAST_4ADDR = 0x09, +}; + +/** + * enum batadv_subtype - packet subtype for unicast4addr + * @BATADV_P_DATA: user payload + * @BATADV_P_DAT_DHT_GET: DHT request message + * @BATADV_P_DAT_DHT_PUT: DHT store message + * @BATADV_P_DAT_CACHE_REPLY: ARP reply generated by DAT + */ +enum batadv_subtype { + BATADV_P_DATA = 0x01, + BATADV_P_DAT_DHT_GET = 0x02, + BATADV_P_DAT_DHT_PUT = 0x03, + BATADV_P_DAT_CACHE_REPLY = 0x04, }; /* this file is included by batctl which needs these defines */ @@ -106,13 +121,16 @@ struct batadv_bla_claim_dst { uint8_t magic[3]; /* FF:43:05 */ uint8_t type; /* bla_claimframe */ __be16 group; /* group id */ -} __packed; +}; struct batadv_header { uint8_t packet_type; uint8_t version; /* batman version field */ uint8_t ttl; -} __packed; + /* the parent struct has to add a byte after the header to make + * everything 4 bytes aligned again + */ +}; struct batadv_ogm_packet { struct batadv_header header; @@ -137,7 +155,7 @@ struct batadv_icmp_packet { __be16 seqno; uint8_t uid; uint8_t reserved; -} __packed; +}; #define BATADV_RR_LEN 16 @@ -153,13 +171,32 @@ struct batadv_icmp_packet_rr { uint8_t uid; uint8_t rr_cur; uint8_t rr[BATADV_RR_LEN][ETH_ALEN]; -} __packed; +}; struct batadv_unicast_packet { struct batadv_header header; uint8_t ttvn; /* destination translation table version number */ uint8_t dest[ETH_ALEN]; -} __packed; + /* "4 bytes boundary + 2 bytes" long to make the payload after the + * following ethernet header again 4 bytes boundary aligned + */ +}; + +/** + * struct batadv_unicast_4addr_packet - extended unicast packet + * @u: common unicast packet header + * @src: address of the source + * @subtype: packet subtype + */ +struct batadv_unicast_4addr_packet { + struct batadv_unicast_packet u; + uint8_t src[ETH_ALEN]; + uint8_t subtype; + uint8_t reserved; + /* "4 bytes boundary + 2 bytes" long to make the payload after the + * following ethernet header again 4 bytes boundary aligned + */ +}; struct batadv_unicast_frag_packet { struct batadv_header header; @@ -176,6 +213,9 @@ struct batadv_bcast_packet { uint8_t reserved; __be32 seqno; uint8_t orig[ETH_ALEN]; + /* "4 bytes boundary + 2 bytes" long to make the payload after the + * following ethernet header again 4 bytes boundary aligned + */ } __packed; struct batadv_vis_packet { @@ -187,7 +227,7 @@ struct batadv_vis_packet { uint8_t vis_orig[ETH_ALEN]; /* originator reporting its neighbors */ uint8_t target_orig[ETH_ALEN]; /* who should receive this packet */ uint8_t sender_orig[ETH_ALEN]; /* who sent or forwarded this packet */ -} __packed; +}; struct batadv_tt_query_packet { struct batadv_header header; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 376b4cc6ca82..32aa4d460e1f 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -28,6 +28,7 @@ #include "vis.h" #include "unicast.h" #include "bridge_loop_avoidance.h" +#include "distributed-arp-table.h" static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); @@ -549,25 +550,18 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig, if (tmp_neigh_node->if_incoming == recv_if) continue; - if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + if (router && tmp_neigh_node->tq_avg <= router->tq_avg) continue; - /* if we don't have a router yet - * or this one is better, choose it. - */ - if ((!router) || - (tmp_neigh_node->tq_avg > router->tq_avg)) { - /* decrement refcount of - * previously selected router - */ - if (router) - batadv_neigh_node_free_ref(router); + if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + continue; - router = tmp_neigh_node; - atomic_inc_not_zero(&router->refcount); - } + /* decrement refcount of previously selected router */ + if (router) + batadv_neigh_node_free_ref(router); - batadv_neigh_node_free_ref(tmp_neigh_node); + /* we found a better router (or at least one valid router) */ + router = tmp_neigh_node; } /* use the first candidate if nothing was found. */ @@ -687,21 +681,8 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if) struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct batadv_roam_adv_packet *roam_adv_packet; struct batadv_orig_node *orig_node; - struct ethhdr *ethhdr; - /* drop packet if it has not necessary minimum size */ - if (unlikely(!pskb_may_pull(skb, - sizeof(struct batadv_roam_adv_packet)))) - goto out; - - ethhdr = (struct ethhdr *)skb_mac_header(skb); - - /* packet with unicast indication but broadcast recipient */ - if (is_broadcast_ether_addr(ethhdr->h_dest)) - goto out; - - /* packet with broadcast sender address */ - if (is_broadcast_ether_addr(ethhdr->h_source)) + if (batadv_check_unicast_packet(skb, sizeof(*roam_adv_packet)) < 0) goto out; batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX); @@ -928,8 +909,12 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, bool tt_poss_change; int is_old_ttvn; - /* I could need to modify it */ - if (skb_cow(skb, sizeof(struct batadv_unicast_packet)) < 0) + /* check if there is enough data before accessing it */ + if (pskb_may_pull(skb, sizeof(*unicast_packet) + ETH_HLEN) < 0) + return 0; + + /* create a copy of the skb (in case of for re-routing) to modify it. */ + if (skb_cow(skb, sizeof(*unicast_packet)) < 0) return 0; unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -985,10 +970,10 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, batadv_orig_node_free_ref(orig_node); } - batadv_dbg(BATADV_DBG_ROUTES, bat_priv, - "TTVN mismatch (old_ttvn %u new_ttvn %u)! Rerouting unicast packet (for %pM) to %pM\n", - unicast_packet->ttvn, curr_ttvn, ethhdr->h_dest, - unicast_packet->dest); + net_ratelimited_function(batadv_dbg, BATADV_DBG_TT, bat_priv, + "TTVN mismatch (old_ttvn %u new_ttvn %u)! Rerouting unicast packet (for %pM) to %pM\n", + unicast_packet->ttvn, curr_ttvn, + ethhdr->h_dest, unicast_packet->dest); unicast_packet->ttvn = curr_ttvn; } @@ -1000,7 +985,19 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, { struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct batadv_unicast_packet *unicast_packet; + struct batadv_unicast_4addr_packet *unicast_4addr_packet; + uint8_t *orig_addr; + struct batadv_orig_node *orig_node = NULL; int hdr_size = sizeof(*unicast_packet); + bool is4addr; + + unicast_packet = (struct batadv_unicast_packet *)skb->data; + unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; + + is4addr = unicast_packet->header.packet_type == BATADV_UNICAST_4ADDR; + /* the caller function should have already pulled 2 bytes */ + if (is4addr) + hdr_size = sizeof(*unicast_4addr_packet); if (batadv_check_unicast_packet(skb, hdr_size) < 0) return NET_RX_DROP; @@ -1008,12 +1005,28 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, if (!batadv_check_unicast_ttvn(bat_priv, skb)) return NET_RX_DROP; - unicast_packet = (struct batadv_unicast_packet *)skb->data; - /* packet for me */ if (batadv_is_my_mac(unicast_packet->dest)) { + if (is4addr) { + batadv_dat_inc_counter(bat_priv, + unicast_4addr_packet->subtype); + orig_addr = unicast_4addr_packet->src; + orig_node = batadv_orig_hash_find(bat_priv, orig_addr); + } + + if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb, + hdr_size)) + goto rx_success; + if (batadv_dat_snoop_incoming_arp_reply(bat_priv, skb, + hdr_size)) + goto rx_success; + batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size, - NULL); + orig_node); + +rx_success: + if (orig_node) + batadv_orig_node_free_ref(orig_node); return NET_RX_SUCCESS; } @@ -1050,8 +1063,17 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, if (!new_skb) return NET_RX_SUCCESS; + if (batadv_dat_snoop_incoming_arp_request(bat_priv, new_skb, + hdr_size)) + goto rx_success; + if (batadv_dat_snoop_incoming_arp_reply(bat_priv, new_skb, + hdr_size)) + goto rx_success; + batadv_interface_rx(recv_if->soft_iface, new_skb, recv_if, sizeof(struct batadv_unicast_packet), NULL); + +rx_success: return NET_RX_SUCCESS; } @@ -1143,9 +1165,16 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, if (batadv_bla_is_backbone_gw(skb, orig_node, hdr_size)) goto out; + if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb, hdr_size)) + goto rx_success; + if (batadv_dat_snoop_incoming_arp_reply(bat_priv, skb, hdr_size)) + goto rx_success; + /* broadcast for me */ batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size, orig_node); + +rx_success: ret = NET_RX_SUCCESS; goto out; diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 570a8bce0364..660d9bf7d219 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -18,6 +18,7 @@ */ #include "main.h" +#include "distributed-arp-table.h" #include "send.h" #include "routing.h" #include "translation-table.h" @@ -209,6 +210,9 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) goto out; + if (batadv_dat_drop_broadcast_packet(bat_priv, forw_packet)) + goto out; + /* rebroadcast packet */ rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index b9a28d2dd3e8..c283d87c4cce 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -20,6 +20,7 @@ #include "main.h" #include "soft-interface.h" #include "hard-interface.h" +#include "distributed-arp-table.h" #include "routing.h" #include "send.h" #include "debugfs.h" @@ -146,13 +147,16 @@ static int batadv_interface_tx(struct sk_buff *skb, struct batadv_bcast_packet *bcast_packet; struct vlan_ethhdr *vhdr; __be16 ethertype = __constant_htons(BATADV_ETH_P_BATMAN); - static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 0x00, - 0x00}; + static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, + 0x00, 0x00}; + static const uint8_t ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00, + 0x00, 0x00}; unsigned int header_len = 0; int data_len = skb->len, ret; short vid __maybe_unused = -1; bool do_bcast = false; uint32_t seqno; + unsigned long brd_delay = 1; if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; @@ -180,10 +184,16 @@ static int batadv_interface_tx(struct sk_buff *skb, /* don't accept stp packets. STP does not help in meshes. * better use the bridge loop avoidance ... + * + * The same goes for ECTP sent at least by some Cisco Switches, + * it might confuse the mesh when used with bridge loop avoidance. */ if (batadv_compare_eth(ethhdr->h_dest, stp_addr)) goto dropped; + if (batadv_compare_eth(ethhdr->h_dest, ectp_addr)) + goto dropped; + if (is_multicast_ether_addr(ethhdr->h_dest)) { do_bcast = true; @@ -216,6 +226,13 @@ static int batadv_interface_tx(struct sk_buff *skb, if (!primary_if) goto dropped; + /* in case of ARP request, we do not immediately broadcasti the + * packet, instead we first wait for DAT to try to retrieve the + * correct ARP entry + */ + if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) + brd_delay = msecs_to_jiffies(ARP_REQ_DELAY); + if (batadv_skb_head_push(skb, sizeof(*bcast_packet)) < 0) goto dropped; @@ -237,7 +254,7 @@ static int batadv_interface_tx(struct sk_buff *skb, seqno = atomic_inc_return(&bat_priv->bcast_seqno); bcast_packet->seqno = htonl(seqno); - batadv_add_bcast_packet_to_list(bat_priv, skb, 1); + batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay); /* a copy is stored in the bcast list, therefore removing * the original skb. @@ -252,7 +269,12 @@ static int batadv_interface_tx(struct sk_buff *skb, goto dropped; } - ret = batadv_unicast_send_skb(skb, bat_priv); + if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) + goto dropped; + + batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb); + + ret = batadv_unicast_send_skb(bat_priv, skb); if (ret != 0) goto dropped_freed; } @@ -347,7 +369,51 @@ out: return; } +/* batman-adv network devices have devices nesting below it and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key batadv_netdev_xmit_lock_key; +static struct lock_class_key batadv_netdev_addr_lock_key; + +/** + * batadv_set_lockdep_class_one - Set lockdep class for a single tx queue + * @dev: device which owns the tx queue + * @txq: tx queue to modify + * @_unused: always NULL + */ +static void batadv_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &batadv_netdev_xmit_lock_key); +} + +/** + * batadv_set_lockdep_class - Set txq and addr_list lockdep class + * @dev: network device to modify + */ +static void batadv_set_lockdep_class(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key); + netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL); +} + +/** + * batadv_softif_init - Late stage initialization of soft interface + * @dev: registered network device to modify + * + * Returns error code on failures + */ +static int batadv_softif_init(struct net_device *dev) +{ + batadv_set_lockdep_class(dev); + + return 0; +} + static const struct net_device_ops batadv_netdev_ops = { + .ndo_init = batadv_softif_init, .ndo_open = batadv_interface_open, .ndo_stop = batadv_interface_release, .ndo_get_stats = batadv_interface_stats, @@ -414,6 +480,9 @@ struct net_device *batadv_softif_create(const char *name) atomic_set(&bat_priv->aggregated_ogms, 1); atomic_set(&bat_priv->bonding, 0); atomic_set(&bat_priv->bridge_loop_avoidance, 0); +#ifdef CONFIG_BATMAN_ADV_DAT + atomic_set(&bat_priv->distributed_arp_table, 1); +#endif atomic_set(&bat_priv->ap_isolation, 0); atomic_set(&bat_priv->vis_mode, BATADV_VIS_TYPE_CLIENT_UPDATE); atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF); @@ -556,6 +625,13 @@ static const struct { { "tt_response_rx" }, { "tt_roam_adv_tx" }, { "tt_roam_adv_rx" }, +#ifdef CONFIG_BATMAN_ADV_DAT + { "dat_get_tx" }, + { "dat_get_rx" }, + { "dat_put_tx" }, + { "dat_put_rx" }, + { "dat_cached_reply_tx" }, +#endif }; static void batadv_get_strings(struct net_device *dev, uint32_t stringset, diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 66518c75c217..fa3cc1af0918 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -20,6 +20,7 @@ #include "main.h" #include "sysfs.h" #include "translation-table.h" +#include "distributed-arp-table.h" #include "originator.h" #include "hard-interface.h" #include "gateway_common.h" @@ -122,55 +123,6 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \ batadv_store_##_name) -#define BATADV_ATTR_HIF_STORE_UINT(_name, _min, _max, _post_func) \ -ssize_t batadv_store_##_name(struct kobject *kobj, \ - struct attribute *attr, char *buff, \ - size_t count) \ -{ \ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ - struct batadv_hard_iface *hard_iface; \ - ssize_t length; \ - \ - hard_iface = batadv_hardif_get_by_netdev(net_dev); \ - if (!hard_iface) \ - return 0; \ - \ - length = __batadv_store_uint_attr(buff, count, _min, _max, \ - _post_func, attr, \ - &hard_iface->_name, net_dev); \ - \ - batadv_hardif_free_ref(hard_iface); \ - return length; \ -} - -#define BATADV_ATTR_HIF_SHOW_UINT(_name) \ -ssize_t batadv_show_##_name(struct kobject *kobj, \ - struct attribute *attr, char *buff) \ -{ \ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ - struct batadv_hard_iface *hard_iface; \ - ssize_t length; \ - \ - hard_iface = batadv_hardif_get_by_netdev(net_dev); \ - if (!hard_iface) \ - return 0; \ - \ - length = sprintf(buff, "%i\n", atomic_read(&hard_iface->_name));\ - \ - batadv_hardif_free_ref(hard_iface); \ - return length; \ -} - -/* Use this, if you are going to set [name] in hard_iface to an - * unsigned integer value - */ -#define BATADV_ATTR_HIF_UINT(_name, _mode, _min, _max, _post_func) \ - static BATADV_ATTR_HIF_STORE_UINT(_name, _min, _max, _post_func)\ - static BATADV_ATTR_HIF_SHOW_UINT(_name) \ - static BATADV_ATTR(_name, _mode, batadv_show_##_name, \ - batadv_store_##_name) - - static int batadv_store_bool_attr(char *buff, size_t count, struct net_device *net_dev, const char *attr_name, atomic_t *attr) @@ -469,6 +421,9 @@ BATADV_ATTR_SIF_BOOL(bonding, S_IRUGO | S_IWUSR, NULL); #ifdef CONFIG_BATMAN_ADV_BLA BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR, NULL); #endif +#ifdef CONFIG_BATMAN_ADV_DAT +BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR, NULL); +#endif BATADV_ATTR_SIF_BOOL(fragmentation, S_IRUGO | S_IWUSR, batadv_update_min_mtu); BATADV_ATTR_SIF_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL); static BATADV_ATTR(vis_mode, S_IRUGO | S_IWUSR, batadv_show_vis_mode, @@ -494,6 +449,9 @@ static struct batadv_attribute *batadv_mesh_attrs[] = { #ifdef CONFIG_BATMAN_ADV_BLA &batadv_attr_bridge_loop_avoidance, #endif +#ifdef CONFIG_BATMAN_ADV_DAT + &batadv_attr_distributed_arp_table, +#endif &batadv_attr_fragmentation, &batadv_attr_ap_isolation, &batadv_attr_vis_mode, diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 112edd371b2f..f8b9c32c29a5 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -434,22 +434,10 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) struct hlist_node *node; struct hlist_head *head; uint32_t i; - int ret = 0; - - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - please specify interfaces to enable it\n", - net_dev->name); - goto out; - } - if (primary_if->if_status != BATADV_IF_ACTIVE) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - primary interface not active\n", - net_dev->name); + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) goto out; - } seq_printf(seq, "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n", @@ -479,7 +467,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) out: if (primary_if) batadv_hardif_free_ref(primary_if); - return ret; + return 0; } static void @@ -501,24 +489,39 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, tt_local_entry->common.addr, message); } -void batadv_tt_local_remove(struct batadv_priv *bat_priv, const uint8_t *addr, - const char *message, bool roaming) +/** + * batadv_tt_local_remove - logically remove an entry from the local table + * @bat_priv: the bat priv with all the soft interface information + * @addr: the MAC address of the client to remove + * @message: message to append to the log on deletion + * @roaming: true if the deletion is due to a roaming event + * + * Returns the flags assigned to the local entry before being deleted + */ +uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, + const uint8_t *addr, const char *message, + bool roaming) { struct batadv_tt_local_entry *tt_local_entry = NULL; - uint16_t flags; + uint16_t flags, curr_flags = BATADV_NO_FLAGS; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr); if (!tt_local_entry) goto out; + curr_flags = tt_local_entry->common.flags; + flags = BATADV_TT_CLIENT_DEL; if (roaming) flags |= BATADV_TT_CLIENT_ROAM; batadv_tt_local_set_pending(bat_priv, tt_local_entry, flags, message); + out: if (tt_local_entry) batadv_tt_local_entry_free_ref(tt_local_entry); + + return curr_flags; } static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv, @@ -725,6 +728,7 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv, int ret = 0; int hash_added; struct batadv_tt_common_entry *common; + uint16_t local_flags; tt_global_entry = batadv_tt_global_hash_find(bat_priv, tt_addr); @@ -738,6 +742,12 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv, common->flags = flags; tt_global_entry->roam_at = 0; + /* node must store current time in case of roaming. This is + * needed to purge this entry out on timeout (if nobody claims + * it) + */ + if (flags & BATADV_TT_CLIENT_ROAM) + tt_global_entry->roam_at = jiffies; atomic_set(&common->refcount, 2); common->added_at = jiffies; @@ -788,13 +798,16 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new global tt entry: %pM (via %pM)\n", tt_global_entry->common.addr, orig_node->orig); + ret = 1; out_remove: + /* remove address from local hash if present */ - batadv_tt_local_remove(bat_priv, tt_global_entry->common.addr, - "global tt received", - flags & BATADV_TT_CLIENT_ROAM); - ret = 1; + local_flags = batadv_tt_local_remove(bat_priv, tt_addr, + "global tt received", + flags & BATADV_TT_CLIENT_ROAM); + tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI; + out: if (tt_global_entry) batadv_tt_global_entry_free_ref(tt_global_entry); @@ -842,22 +855,10 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) struct hlist_node *node; struct hlist_head *head; uint32_t i; - int ret = 0; - - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - please specify interfaces to enable it\n", - net_dev->name); - goto out; - } - if (primary_if->if_status != BATADV_IF_ACTIVE) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - primary interface not active\n", - net_dev->name); + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) goto out; - } seq_printf(seq, "Globally announced TT entries received via the mesh %s\n", @@ -881,7 +882,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) out: if (primary_if) batadv_hardif_free_ref(primary_if); - return ret; + return 0; } /* deletes the orig list of a tt_global_entry */ @@ -1471,11 +1472,11 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = tt_query_size + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN); + skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN); + skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len); tt_response->ttvn = ttvn; @@ -1537,11 +1538,11 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, if (!tt_req_node) goto out; - skb = dev_alloc_skb(sizeof(*tt_request) + ETH_HLEN); + skb = dev_alloc_skb(sizeof(*tt_request) + ETH_HLEN + NET_IP_ALIGN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN); + skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); tt_req_len = sizeof(*tt_request); tt_request = (struct batadv_tt_query_packet *)skb_put(skb, tt_req_len); @@ -1652,11 +1653,11 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN); + skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); if (!skb) goto unlock; - skb_reserve(skb, ETH_HLEN); + skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); packet_pos = skb_put(skb, len); tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; @@ -1779,11 +1780,11 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN); + skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); if (!skb) goto unlock; - skb_reserve(skb, ETH_HLEN); + skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); packet_pos = skb_put(skb, len); tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; @@ -2117,11 +2118,11 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, if (!batadv_tt_check_roam_count(bat_priv, client)) goto out; - skb = dev_alloc_skb(sizeof(*roam_adv_packet) + ETH_HLEN); + skb = dev_alloc_skb(sizeof(*roam_adv_packet) + ETH_HLEN + NET_IP_ALIGN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN); + skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len); @@ -2438,7 +2439,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, if (!tt_global_entry) goto out; - ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM; + ret = !!(tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM); batadv_tt_global_entry_free_ref(tt_global_entry); out: return ret; diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 811fffd4760c..9fa4fe41c868 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -24,9 +24,9 @@ int batadv_tt_len(int changes_num); int batadv_tt_init(struct batadv_priv *bat_priv); void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, int ifindex); -void batadv_tt_local_remove(struct batadv_priv *bat_priv, - const uint8_t *addr, const char *message, - bool roaming); +uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, + const uint8_t *addr, const char *message, + bool roaming); int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset); void batadv_tt_global_add_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index ac1e07a80454..8ce16c1cbafb 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -28,20 +28,41 @@ (ETH_HLEN + max(sizeof(struct batadv_unicast_packet), \ sizeof(struct batadv_bcast_packet))) +#ifdef CONFIG_BATMAN_ADV_DAT + +/* batadv_dat_addr_t is the type used for all DHT addresses. If it is changed, + * BATADV_DAT_ADDR_MAX is changed as well. + * + * *Please be careful: batadv_dat_addr_t must be UNSIGNED* + */ +#define batadv_dat_addr_t uint16_t + +#endif /* CONFIG_BATMAN_ADV_DAT */ + +/** + * struct batadv_hard_iface_bat_iv - per hard interface B.A.T.M.A.N. IV data + * @ogm_buff: buffer holding the OGM packet + * @ogm_buff_len: length of the OGM packet buffer + * @ogm_seqno: OGM sequence number - used to identify each OGM + */ +struct batadv_hard_iface_bat_iv { + unsigned char *ogm_buff; + int ogm_buff_len; + atomic_t ogm_seqno; +}; + struct batadv_hard_iface { struct list_head list; int16_t if_num; char if_status; struct net_device *net_dev; - atomic_t seqno; atomic_t frag_seqno; - unsigned char *packet_buff; - int packet_len; struct kobject *hardif_obj; atomic_t refcount; struct packet_type batman_adv_ptype; struct net_device *soft_iface; struct rcu_head rcu; + struct batadv_hard_iface_bat_iv bat_iv; }; /** @@ -63,6 +84,9 @@ struct batadv_orig_node { uint8_t orig[ETH_ALEN]; uint8_t primary_addr[ETH_ALEN]; struct batadv_neigh_node __rcu *router; /* rcu protected pointer */ +#ifdef CONFIG_BATMAN_ADV_DAT + batadv_dat_addr_t dat_addr; +#endif unsigned long *bcast_own; uint8_t *bcast_own_sum; unsigned long last_seen; @@ -162,6 +186,13 @@ enum batadv_counters { BATADV_CNT_TT_RESPONSE_RX, BATADV_CNT_TT_ROAM_ADV_TX, BATADV_CNT_TT_ROAM_ADV_RX, +#ifdef CONFIG_BATMAN_ADV_DAT + BATADV_CNT_DAT_GET_TX, + BATADV_CNT_DAT_GET_RX, + BATADV_CNT_DAT_PUT_TX, + BATADV_CNT_DAT_PUT_RX, + BATADV_CNT_DAT_CACHED_REPLY_TX, +#endif BATADV_CNT_NUM, }; @@ -228,6 +259,20 @@ struct batadv_priv_vis { struct batadv_vis_info *my_info; }; +/** + * struct batadv_priv_dat - per mesh interface DAT private data + * @addr: node DAT address + * @hash: hashtable representing the local ARP cache + * @work: work queue callback item for cache purging + */ +#ifdef CONFIG_BATMAN_ADV_DAT +struct batadv_priv_dat { + batadv_dat_addr_t addr; + struct batadv_hashtable *hash; + struct delayed_work work; +}; +#endif + struct batadv_priv { atomic_t mesh_state; struct net_device_stats stats; @@ -237,6 +282,9 @@ struct batadv_priv { atomic_t fragmentation; /* boolean */ atomic_t ap_isolation; /* boolean */ atomic_t bridge_loop_avoidance; /* boolean */ +#ifdef CONFIG_BATMAN_ADV_DAT + atomic_t distributed_arp_table; /* boolean */ +#endif atomic_t vis_mode; /* VIS_TYPE_* */ atomic_t gw_mode; /* GW_MODE_* */ atomic_t gw_sel_class; /* uint */ @@ -265,6 +313,9 @@ struct batadv_priv { struct batadv_priv_gw gw; struct batadv_priv_tt tt; struct batadv_priv_vis vis; +#ifdef CONFIG_BATMAN_ADV_DAT + struct batadv_priv_dat dat; +#endif }; struct batadv_socket_client { @@ -437,4 +488,36 @@ struct batadv_algo_ops { void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet); }; +/** + * struct batadv_dat_entry - it is a single entry of batman-adv ARP backend. It + * is used to stored ARP entries needed for the global DAT cache + * @ip: the IPv4 corresponding to this DAT/ARP entry + * @mac_addr: the MAC address associated to the stored IPv4 + * @last_update: time in jiffies when this entry was refreshed last time + * @hash_entry: hlist node for batadv_priv_dat::hash + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ +struct batadv_dat_entry { + __be32 ip; + uint8_t mac_addr[ETH_ALEN]; + unsigned long last_update; + struct hlist_node hash_entry; + atomic_t refcount; + struct rcu_head rcu; +}; + +/** + * struct batadv_dat_candidate - candidate destination for DAT operations + * @type: the type of the selected candidate. It can one of the following: + * - BATADV_DAT_CANDIDATE_NOT_FOUND + * - BATADV_DAT_CANDIDATE_ORIG + * @orig_node: if type is BATADV_DAT_CANDIDATE_ORIG this field points to the + * corresponding originator node structure + */ +struct batadv_dat_candidate { + int type; + struct batadv_orig_node *orig_node; +}; + #endif /* _NET_BATMAN_ADV_TYPES_H_ */ diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index f39723281ca1..c9a1f6523c36 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -291,7 +291,111 @@ out: return ret; } -int batadv_unicast_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv) +/** + * batadv_unicast_push_and_fill_skb - extends the buffer and initializes the + * common fields for unicast packets + * @skb: packet + * @hdr_size: amount of bytes to push at the beginning of the skb + * @orig_node: the destination node + * + * Returns false if the buffer extension was not possible or true otherwise + */ +static bool batadv_unicast_push_and_fill_skb(struct sk_buff *skb, int hdr_size, + struct batadv_orig_node *orig_node) +{ + struct batadv_unicast_packet *unicast_packet; + uint8_t ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); + + if (batadv_skb_head_push(skb, hdr_size) < 0) + return false; + + unicast_packet = (struct batadv_unicast_packet *)skb->data; + unicast_packet->header.version = BATADV_COMPAT_VERSION; + /* batman packet type: unicast */ + unicast_packet->header.packet_type = BATADV_UNICAST; + /* set unicast ttl */ + unicast_packet->header.ttl = BATADV_TTL; + /* copy the destination for faster routing */ + memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); + /* set the destination tt version number */ + unicast_packet->ttvn = ttvn; + + return true; +} + +/** + * batadv_unicast_prepare_skb - encapsulate an skb with a unicast header + * @skb: the skb containing the payload to encapsulate + * @orig_node: the destination node + * + * Returns false if the payload could not be encapsulated or true otherwise + */ +static bool batadv_unicast_prepare_skb(struct sk_buff *skb, + struct batadv_orig_node *orig_node) +{ + size_t uni_size = sizeof(struct batadv_unicast_packet); + return batadv_unicast_push_and_fill_skb(skb, uni_size, orig_node); +} + +/** + * batadv_unicast_4addr_prepare_skb - encapsulate an skb with a unicast4addr + * header + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb containing the payload to encapsulate + * @orig_node: the destination node + * @packet_subtype: the batman 4addr packet subtype to use + * + * Returns false if the payload could not be encapsulated or true otherwise + */ +bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv, + struct sk_buff *skb, + struct batadv_orig_node *orig, + int packet_subtype) +{ + struct batadv_hard_iface *primary_if; + struct batadv_unicast_4addr_packet *unicast_4addr_packet; + bool ret = false; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + /* pull the header space and fill the unicast_packet substructure. + * We can do that because the first member of the unicast_4addr_packet + * is of type struct unicast_packet + */ + if (!batadv_unicast_push_and_fill_skb(skb, + sizeof(*unicast_4addr_packet), + orig)) + goto out; + + unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; + unicast_4addr_packet->u.header.packet_type = BATADV_UNICAST_4ADDR; + memcpy(unicast_4addr_packet->src, primary_if->net_dev->dev_addr, + ETH_ALEN); + unicast_4addr_packet->subtype = packet_subtype; + unicast_4addr_packet->reserved = 0; + + ret = true; +out: + if (primary_if) + batadv_hardif_free_ref(primary_if); + return ret; +} + +/** + * batadv_unicast_generic_send_skb - send an skb as unicast + * @bat_priv: the bat priv with all the soft interface information + * @skb: payload to send + * @packet_type: the batman unicast packet type to use + * @packet_subtype: the batman packet subtype. It is ignored if packet_type is + * not BATADV_UNICAT_4ADDR + * + * Returns 1 in case of error or 0 otherwise + */ +int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv, + struct sk_buff *skb, int packet_type, + int packet_subtype) { struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct batadv_unicast_packet *unicast_packet; @@ -324,21 +428,23 @@ find_router: if (!neigh_node) goto out; - if (batadv_skb_head_push(skb, sizeof(*unicast_packet)) < 0) + switch (packet_type) { + case BATADV_UNICAST: + batadv_unicast_prepare_skb(skb, orig_node); + break; + case BATADV_UNICAST_4ADDR: + batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node, + packet_subtype); + break; + default: + /* this function supports UNICAST and UNICAST_4ADDR only. It + * should never be invoked with any other packet type + */ goto out; + } unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_packet->header.version = BATADV_COMPAT_VERSION; - /* batman packet type: unicast */ - unicast_packet->header.packet_type = BATADV_UNICAST; - /* set unicast ttl */ - unicast_packet->header.ttl = BATADV_TTL; - /* copy the destination for faster routing */ - memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); - /* set the destination tt version number */ - unicast_packet->ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); - /* inform the destination node that we are still missing a correct route * for this client. The destination will receive this packet and will * try to reroute it because the ttvn contained in the header is less @@ -348,7 +454,9 @@ find_router: unicast_packet->ttvn = unicast_packet->ttvn - 1; dev_mtu = neigh_node->if_incoming->net_dev->mtu; - if (atomic_read(&bat_priv->fragmentation) && + /* fragmentation mechanism only works for UNICAST (now) */ + if (packet_type == BATADV_UNICAST && + atomic_read(&bat_priv->fragmentation) && data_len + sizeof(*unicast_packet) > dev_mtu) { /* send frag skb decreases ttl */ unicast_packet->header.ttl++; @@ -360,7 +468,6 @@ find_router: batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = 0; - goto out; out: if (neigh_node) diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h index 1c46e2eb1ef9..61abba58bd8f 100644 --- a/net/batman-adv/unicast.h +++ b/net/batman-adv/unicast.h @@ -29,10 +29,44 @@ int batadv_frag_reassemble_skb(struct sk_buff *skb, struct batadv_priv *bat_priv, struct sk_buff **new_skb); void batadv_frag_list_free(struct list_head *head); -int batadv_unicast_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv); int batadv_frag_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv, struct batadv_hard_iface *hard_iface, const uint8_t dstaddr[]); +bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv, + struct sk_buff *skb, + struct batadv_orig_node *orig_node, + int packet_subtype); +int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv, + struct sk_buff *skb, int packet_type, + int packet_subtype); + + +/** + * batadv_unicast_send_skb - send the skb encapsulated in a unicast packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: the payload to send + */ +static inline int batadv_unicast_send_skb(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return batadv_unicast_generic_send_skb(bat_priv, skb, BATADV_UNICAST, + 0); +} + +/** + * batadv_unicast_send_skb - send the skb encapsulated in a unicast4addr packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: the payload to send + * @packet_subtype: the batman 4addr packet subtype to use + */ +static inline int batadv_unicast_4addr_send_skb(struct batadv_priv *bat_priv, + struct sk_buff *skb, + int packet_subtype) +{ + return batadv_unicast_generic_send_skb(bat_priv, skb, + BATADV_UNICAST_4ADDR, + packet_subtype); +} static inline int batadv_frag_can_reassemble(const struct sk_buff *skb, int mtu) { diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 5abd1454fb07..ad14a6c91d6a 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -396,12 +396,12 @@ batadv_add_packet(struct batadv_priv *bat_priv, return NULL; len = sizeof(*packet) + vis_info_len; - info->skb_packet = dev_alloc_skb(len + ETH_HLEN); + info->skb_packet = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); if (!info->skb_packet) { kfree(info); return NULL; } - skb_reserve(info->skb_packet, ETH_HLEN); + skb_reserve(info->skb_packet, ETH_HLEN + NET_IP_ALIGN); packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len); kref_init(&info->refcount); @@ -873,12 +873,13 @@ int batadv_vis_init(struct batadv_priv *bat_priv) if (!bat_priv->vis.my_info) goto err; - len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN; + len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE; + len += ETH_HLEN + NET_IP_ALIGN; bat_priv->vis.my_info->skb_packet = dev_alloc_skb(len); if (!bat_priv->vis.my_info->skb_packet) goto free_info; - skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN); + skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN + NET_IP_ALIGN); tmp_skb = bat_priv->vis.my_info->skb_packet; packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet)); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 070e8a68cfc6..7c78e2640190 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -313,6 +313,8 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_add = br_fdb_add, .ndo_fdb_del = br_fdb_delete, .ndo_fdb_dump = br_fdb_dump, + .ndo_bridge_getlink = br_getlink, + .ndo_bridge_setlink = br_setlink, }; static void br_dev_free(struct net_device *dev) @@ -356,7 +358,7 @@ void br_dev_setup(struct net_device *dev) br->bridge_id.prio[0] = 0x80; br->bridge_id.prio[1] = 0x00; - memcpy(br->group_addr, br_group_address, ETH_ALEN); + memcpy(br->group_addr, eth_reserved_addr_base, ETH_ALEN); br->stp_enabled = BR_NO_STP; br->group_fwd_mask = BR_GROUPFWD_DEFAULT; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 76f15fda0212..4b34207419b1 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -19,9 +19,6 @@ #include <linux/export.h> #include "br_private.h" -/* Bridge group multicast address 802.1d (pg 51). */ -const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; - /* Hook for brouter */ br_should_route_hook_t __rcu *br_should_route_hook __read_mostly; EXPORT_SYMBOL(br_should_route_hook); @@ -127,18 +124,6 @@ static int br_handle_local_finish(struct sk_buff *skb) return 0; /* process further */ } -/* Does address match the link local multicast address. - * 01:80:c2:00:00:0X - */ -static inline int is_link_local(const unsigned char *dest) -{ - __be16 *a = (__be16 *)dest; - static const __be16 *b = (const __be16 *)br_group_address; - static const __be16 m = cpu_to_be16(0xfff0); - - return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0; -} - /* * Return NULL if skb is handled * note: already called with rcu_read_lock @@ -162,7 +147,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) p = br_port_get_rcu(skb->dev); - if (unlikely(is_link_local(dest))) { + if (unlikely(is_link_local_ether_addr(dest))) { /* * See IEEE 802.1D Table 7-10 Reserved addresses * diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 093f527276a3..14b065cbd214 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -111,54 +111,33 @@ errout: /* * Dump information about all ports, in response to GETLINK */ -static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) +int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev) { - struct net *net = sock_net(skb->sk); - struct net_device *dev; - int idx; - - idx = 0; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - struct net_bridge_port *port = br_port_get_rcu(dev); - - /* not a bridge port */ - if (!port || idx < cb->args[0]) - goto skip; - - if (br_fill_ifinfo(skb, port, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWLINK, - NLM_F_MULTI) < 0) - break; -skip: - ++idx; - } - rcu_read_unlock(); - cb->args[0] = idx; + int err = 0; + struct net_bridge_port *port = br_port_get_rcu(dev); + + /* not a bridge port */ + if (!port) + goto out; - return skb->len; + err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI); +out: + return err; } /* * Change state of port (ie from forwarding to blocking etc) * Used by spanning tree in user space. */ -static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) { - struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct nlattr *protinfo; - struct net_device *dev; struct net_bridge_port *p; u8 new_state; - if (nlmsg_len(nlh) < sizeof(*ifm)) - return -EINVAL; - ifm = nlmsg_data(nlh); - if (ifm->ifi_family != AF_BRIDGE) - return -EPFNOSUPPORT; protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); if (!protinfo || nla_len(protinfo) < sizeof(u8)) @@ -168,10 +147,6 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (new_state > BR_STATE_BLOCKING) return -EINVAL; - dev = __dev_get_by_index(net, ifm->ifi_index); - if (!dev) - return -ENODEV; - p = br_port_get_rtnl(dev); if (!p) return -EINVAL; @@ -191,8 +166,6 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) br_port_state_selection(p->br); spin_unlock_bh(&p->br->lock); - br_ifinfo_notify(RTM_NEWLINK, p); - return 0; } @@ -218,29 +191,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = { int __init br_netlink_init(void) { - int err; - - err = rtnl_link_register(&br_link_ops); - if (err < 0) - goto err1; - - err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, - br_dump_ifinfo, NULL); - if (err) - goto err2; - err = __rtnl_register(PF_BRIDGE, RTM_SETLINK, - br_rtm_setlink, NULL, NULL); - if (err) - goto err3; - - return 0; - -err3: - rtnl_unregister_all(PF_BRIDGE); -err2: - rtnl_link_unregister(&br_link_ops); -err1: - return err; + return rtnl_link_register(&br_link_ops); } void __exit br_netlink_fini(void) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 9b278c4ebee1..22111ffd68df 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -158,7 +158,9 @@ struct net_bridge_port static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) { - struct net_bridge_port *port = rcu_dereference(dev->rx_handler_data); + struct net_bridge_port *port = + rcu_dereference_rtnl(dev->rx_handler_data); + return br_port_exists(dev) ? port : NULL; } @@ -288,7 +290,6 @@ struct br_input_skb_cb { pr_debug("%s: " format, (br)->dev->name, ##args) extern struct notifier_block br_device_notifier; -extern const u8 br_group_address[ETH_ALEN]; /* called under bridge lock */ static inline int br_is_root_bridge(const struct net_bridge *br) @@ -553,6 +554,9 @@ extern struct rtnl_link_ops br_link_ops; extern int br_netlink_init(void); extern void br_netlink_fini(void); extern void br_ifinfo_notify(int event, struct net_bridge_port *port); +extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); +extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ @@ -566,10 +570,10 @@ extern void br_sysfs_delbr(struct net_device *dev); #else -#define br_sysfs_addif(p) (0) -#define br_sysfs_renameif(p) (0) -#define br_sysfs_addbr(dev) (0) -#define br_sysfs_delbr(dev) do { } while(0) +static inline int br_sysfs_addif(struct net_bridge_port *p) { return 0; } +static inline int br_sysfs_renameif(struct net_bridge_port *p) { return 0; } +static inline int br_sysfs_addbr(struct net_device *dev) { return 0; } +static inline void br_sysfs_delbr(struct net_device *dev) { return; } #endif /* CONFIG_SYSFS */ #endif diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index c5c059333eab..cffb76e2161c 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -14,6 +14,7 @@ #include <linux/capability.h> #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/etherdevice.h> #include <linux/if_bridge.h> #include <linux/rtnetlink.h> #include <linux/spinlock.h> @@ -297,23 +298,18 @@ static ssize_t store_group_addr(struct device *d, const char *buf, size_t len) { struct net_bridge *br = to_bridge(d); - unsigned int new_addr[6]; + u8 new_addr[6]; int i; if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (sscanf(buf, "%x:%x:%x:%x:%x:%x", + if (sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &new_addr[0], &new_addr[1], &new_addr[2], &new_addr[3], &new_addr[4], &new_addr[5]) != 6) return -EINVAL; - /* Must be 01:80:c2:00:00:0X */ - for (i = 0; i < 5; i++) - if (new_addr[i] != br_group_address[i]) - return -EINVAL; - - if (new_addr[5] & ~0xf) + if (!is_link_local_ether_addr(new_addr)) return -EINVAL; if (new_addr[5] == 1 || /* 802.3x Pause address */ diff --git a/net/core/dev.c b/net/core/dev.c index bda6d004f9f0..83232a1be1e7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6264,7 +6264,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char goto out; /* Ensure the device has been registrered */ - err = -EINVAL; if (dev->reg_state != NETREG_REGISTERED) goto out; diff --git a/net/core/filter.c b/net/core/filter.c index 3d92ebb7fbcf..c23543cba132 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -39,6 +39,7 @@ #include <linux/reciprocal_div.h> #include <linux/ratelimit.h> #include <linux/seccomp.h> +#include <linux/if_vlan.h> /* No hurry in this branch * @@ -341,6 +342,12 @@ load_b: case BPF_S_ANC_CPU: A = raw_smp_processor_id(); continue; + case BPF_S_ANC_VLAN_TAG: + A = vlan_tx_tag_get(skb); + continue; + case BPF_S_ANC_VLAN_TAG_PRESENT: + A = !!vlan_tx_tag_present(skb); + continue; case BPF_S_ANC_NLATTR: { struct nlattr *nla; @@ -600,6 +607,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(RXHASH); ANCILLARY(CPU); ANCILLARY(ALU_XOR_X); + ANCILLARY(VLAN_TAG); + ANCILLARY(VLAN_TAG_PRESENT); } } ftest->code = code; @@ -751,3 +760,133 @@ int sk_detach_filter(struct sock *sk) return ret; } EXPORT_SYMBOL_GPL(sk_detach_filter); + +static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) +{ + static const u16 decodes[] = { + [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K, + [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X, + [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K, + [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X, + [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K, + [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X, + [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X, + [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K, + [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X, + [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K, + [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X, + [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K, + [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X, + [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K, + [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X, + [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K, + [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X, + [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K, + [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X, + [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG, + [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS, + [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS, + [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, + [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, + [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, + [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND, + [BPF_S_LD_IMM] = BPF_LD|BPF_IMM, + [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN, + [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH, + [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM, + [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX, + [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA, + [BPF_S_RET_K] = BPF_RET|BPF_K, + [BPF_S_RET_A] = BPF_RET|BPF_A, + [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K, + [BPF_S_LD_MEM] = BPF_LD|BPF_MEM, + [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM, + [BPF_S_ST] = BPF_ST, + [BPF_S_STX] = BPF_STX, + [BPF_S_JMP_JA] = BPF_JMP|BPF_JA, + [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K, + [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X, + [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K, + [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X, + [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K, + [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X, + [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K, + [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X, + }; + u16 code; + + code = filt->code; + + to->code = decodes[code]; + to->jt = filt->jt; + to->jf = filt->jf; + + if (code == BPF_S_ALU_DIV_K) { + /* + * When loaded this rule user gave us X, which was + * translated into R = r(X). Now we calculate the + * RR = r(R) and report it back. If next time this + * value is loaded and RRR = r(RR) is calculated + * then the R == RRR will be true. + * + * One exception. X == 1 translates into R == 0 and + * we can't calculate RR out of it with r(). + */ + + if (filt->k == 0) + to->k = 1; + else + to->k = reciprocal_value(filt->k); + + BUG_ON(reciprocal_value(to->k) != filt->k); + } else + to->k = filt->k; +} + +int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) +{ + struct sk_filter *filter; + int i, ret; + + lock_sock(sk); + filter = rcu_dereference_protected(sk->sk_filter, + sock_owned_by_user(sk)); + ret = 0; + if (!filter) + goto out; + ret = filter->len; + if (!len) + goto out; + ret = -EINVAL; + if (len < filter->len) + goto out; + + ret = -EFAULT; + for (i = 0; i < filter->len; i++) { + struct sock_filter fb; + + sk_decode_filter(&filter->insns[i], &fb); + if (copy_to_user(&ubuf[i], &fb, sizeof(fb))) + goto out; + } + + ret = filter->len; +out: + release_sock(sk); + return ret; +} diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 79285a36035f..847c02b197b0 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -248,7 +248,7 @@ static int update_netprio(const void *v, struct file *file, unsigned n) return 0; } -void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) +static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *p; void *v; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index d1dc14c2aac4..b29dacf900f9 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -419,20 +419,6 @@ struct pktgen_thread { #define REMOVE 1 #define FIND 0 -static inline ktime_t ktime_now(void) -{ - struct timespec ts; - ktime_get_ts(&ts); - - return timespec_to_ktime(ts); -} - -/* This works even if 32 bit because of careful byte order choice */ -static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2) -{ - return cmp1.tv64 < cmp2.tv64; -} - static const char version[] = "Packet Generator for packet performance testing. " "Version: " VERSION "\n"; @@ -675,7 +661,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_puts(seq, "\n"); /* not really stopped, more like last-running-at */ - stopped = pkt_dev->running ? ktime_now() : pkt_dev->stopped_at; + stopped = pkt_dev->running ? ktime_get() : pkt_dev->stopped_at; idle = pkt_dev->idle_acc; do_div(idle, NSEC_PER_USEC); @@ -2141,12 +2127,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) return; } - start_time = ktime_now(); + start_time = ktime_get(); if (remaining < 100000) { /* for small delays (<100us), just loop until limit is reached */ do { - end_time = ktime_now(); - } while (ktime_lt(end_time, spin_until)); + end_time = ktime_get(); + } while (ktime_compare(end_time, spin_until) < 0); } else { /* see do_nanosleep */ hrtimer_init_sleeper(&t, current); @@ -2162,7 +2148,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) hrtimer_cancel(&t.timer); } while (t.task && pkt_dev->running && !signal_pending(current)); __set_current_state(TASK_RUNNING); - end_time = ktime_now(); + end_time = ktime_get(); } pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); @@ -2427,11 +2413,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) } } else { /* IPV6 * */ - if (pkt_dev->min_in6_daddr.s6_addr32[0] == 0 && - pkt_dev->min_in6_daddr.s6_addr32[1] == 0 && - pkt_dev->min_in6_daddr.s6_addr32[2] == 0 && - pkt_dev->min_in6_daddr.s6_addr32[3] == 0) ; - else { + if (!ipv6_addr_any(&pkt_dev->min_in6_daddr)) { int i; /* Only random destinations yet */ @@ -2916,8 +2898,7 @@ static void pktgen_run(struct pktgen_thread *t) pktgen_clear_counters(pkt_dev); pkt_dev->running = 1; /* Cranke yeself! */ pkt_dev->skb = NULL; - pkt_dev->started_at = - pkt_dev->next_tx = ktime_now(); + pkt_dev->started_at = pkt_dev->next_tx = ktime_get(); set_pkt_overhead(pkt_dev); @@ -3076,7 +3057,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) kfree_skb(pkt_dev->skb); pkt_dev->skb = NULL; - pkt_dev->stopped_at = ktime_now(); + pkt_dev->stopped_at = ktime_get(); pkt_dev->running = 0; show_results(pkt_dev, nr_frags); @@ -3095,7 +3076,7 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t) continue; if (best == NULL) best = pkt_dev; - else if (ktime_lt(pkt_dev->next_tx, best->next_tx)) + else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0) best = pkt_dev; } if_unlock(t); @@ -3180,14 +3161,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t) static void pktgen_resched(struct pktgen_dev *pkt_dev) { - ktime_t idle_start = ktime_now(); + ktime_t idle_start = ktime_get(); schedule(); - pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); + pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start)); } static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) { - ktime_t idle_start = ktime_now(); + ktime_t idle_start = ktime_get(); while (atomic_read(&(pkt_dev->skb->users)) != 1) { if (signal_pending(current)) @@ -3198,7 +3179,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) else cpu_relax(); } - pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); + pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start)); } static void pktgen_xmit(struct pktgen_dev *pkt_dev) @@ -3220,7 +3201,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) * "never transmit" */ if (unlikely(pkt_dev->delay == ULLONG_MAX)) { - pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX); + pkt_dev->next_tx = ktime_add_ns(ktime_get(), ULONG_MAX); return; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fad649ae4dec..a810f6a61372 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -128,7 +128,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex) if (tab == NULL || tab[msgindex].doit == NULL) tab = rtnl_msg_handlers[PF_UNSPEC]; - return tab ? tab[msgindex].doit : NULL; + return tab[msgindex].doit; } static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) @@ -143,7 +143,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) if (tab == NULL || tab[msgindex].dumpit == NULL) tab = rtnl_msg_handlers[PF_UNSPEC]; - return tab ? tab[msgindex].dumpit : NULL; + return tab[msgindex].dumpit; } static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) @@ -158,7 +158,7 @@ static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) if (tab == NULL || tab[msgindex].calcit == NULL) tab = rtnl_msg_handlers[PF_UNSPEC]; - return tab ? tab[msgindex].calcit : NULL; + return tab[msgindex].calcit; } /** @@ -2253,6 +2253,211 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u16 mode) +{ + struct nlmsghdr *nlh; + struct ifinfomsg *ifm; + struct nlattr *br_afspec; + u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + + nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI); + if (nlh == NULL) + return -EMSGSIZE; + + ifm = nlmsg_data(nlh); + ifm->ifi_family = AF_BRIDGE; + ifm->__ifi_pad = 0; + ifm->ifi_type = dev->type; + ifm->ifi_index = dev->ifindex; + ifm->ifi_flags = dev_get_flags(dev); + ifm->ifi_change = 0; + + + if (nla_put_string(skb, IFLA_IFNAME, dev->name) || + nla_put_u32(skb, IFLA_MTU, dev->mtu) || + nla_put_u8(skb, IFLA_OPERSTATE, operstate) || + (dev->master && + nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || + (dev->addr_len && + nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || + (dev->ifindex != dev->iflink && + nla_put_u32(skb, IFLA_LINK, dev->iflink))) + goto nla_put_failure; + + br_afspec = nla_nest_start(skb, IFLA_AF_SPEC); + if (!br_afspec) + goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_BRIDGE_FLAGS, BRIDGE_FLAGS_SELF) || + nla_put_u16(skb, IFLA_BRIDGE_MODE, mode)) { + nla_nest_cancel(skb, br_afspec); + goto nla_put_failure; + } + nla_nest_end(skb, br_afspec); + + return nlmsg_end(skb, nlh); +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} +EXPORT_SYMBOL(ndo_dflt_bridge_getlink); + +static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int idx = 0; + u32 portid = NETLINK_CB(cb->skb).portid; + u32 seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + const struct net_device_ops *ops = dev->netdev_ops; + struct net_device *master = dev->master; + + if (master && master->netdev_ops->ndo_bridge_getlink) { + if (idx >= cb->args[0] && + master->netdev_ops->ndo_bridge_getlink( + skb, portid, seq, dev) < 0) + break; + idx++; + } + + if (ops->ndo_bridge_getlink) { + if (idx >= cb->args[0] && + ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0) + break; + idx++; + } + } + rcu_read_unlock(); + cb->args[0] = idx; + + return skb->len; +} + +static inline size_t bridge_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(sizeof(u32)) /* IFLA_MASTER */ + + nla_total_size(sizeof(u32)) /* IFLA_MTU */ + + nla_total_size(sizeof(u32)) /* IFLA_LINK */ + + nla_total_size(sizeof(u32)) /* IFLA_OPERSTATE */ + + nla_total_size(sizeof(u8)) /* IFLA_PROTINFO */ + + nla_total_size(sizeof(struct nlattr)) /* IFLA_AF_SPEC */ + + nla_total_size(sizeof(u16)) /* IFLA_BRIDGE_FLAGS */ + + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */ +} + +static int rtnl_bridge_notify(struct net_device *dev, u16 flags) +{ + struct net *net = dev_net(dev); + struct net_device *master = dev->master; + struct sk_buff *skb; + int err = -EOPNOTSUPP; + + skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC); + if (!skb) { + err = -ENOMEM; + goto errout; + } + + if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && + master && master->netdev_ops->ndo_bridge_getlink) { + err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + if (err < 0) + goto errout; + } + + if ((flags & BRIDGE_FLAGS_SELF) && + dev->netdev_ops->ndo_bridge_getlink) { + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + if (err < 0) + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + return 0; +errout: + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); + return err; +} + +static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ifinfomsg *ifm; + struct net_device *dev; + struct nlattr *br_spec, *attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 oflags, flags = 0; + bool have_flags = false; + + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_family != AF_BRIDGE) + return -EPFNOSUPPORT; + + dev = __dev_get_by_index(net, ifm->ifi_index); + if (!dev) { + pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + return -ENODEV; + } + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + have_flags = true; + flags = nla_get_u16(attr); + break; + } + } + } + + oflags = flags; + + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { + if (!dev->master || + !dev->master->netdev_ops->ndo_bridge_setlink) { + err = -EOPNOTSUPP; + goto out; + } + + err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh); + if (err) + goto out; + + flags &= ~BRIDGE_FLAGS_MASTER; + } + + if ((flags & BRIDGE_FLAGS_SELF)) { + if (!dev->netdev_ops->ndo_bridge_setlink) + err = -EOPNOTSUPP; + else + err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + + if (!err) + flags &= ~BRIDGE_FLAGS_SELF; + } + + if (have_flags) + memcpy(nla_data(attr), &flags, sizeof(flags)); + /* Generate event to notify upper layer of bridge change */ + if (!err) + err = rtnl_bridge_notify(dev, oflags); +out: + return err; +} + /* Protected by RTNL sempahore. */ static struct rtattr **rta_buf; static int rtattr_max; @@ -2434,5 +2639,8 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); + + rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); + rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4007c1437fda..880722e22cc5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -519,7 +519,7 @@ static void skb_release_data(struct sk_buff *skb) uarg = skb_shinfo(skb)->destructor_arg; if (uarg->callback) - uarg->callback(uarg); + uarg->callback(uarg, true); } if (skb_has_frag_list(skb)) @@ -635,6 +635,26 @@ void kfree_skb(struct sk_buff *skb) EXPORT_SYMBOL(kfree_skb); /** + * skb_tx_error - report an sk_buff xmit error + * @skb: buffer that triggered an error + * + * Report xmit error if a device callback is tracking this skb. + * skb must be freed afterwards. + */ +void skb_tx_error(struct sk_buff *skb) +{ + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + struct ubuf_info *uarg; + + uarg = skb_shinfo(skb)->destructor_arg; + if (uarg->callback) + uarg->callback(uarg, false); + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; + } +} +EXPORT_SYMBOL(skb_tx_error); + +/** * consume_skb - free an skbuff * @skb: buffer to free * @@ -797,7 +817,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) for (i = 0; i < num_frags; i++) skb_frag_unref(skb, i); - uarg->callback(uarg); + uarg->callback(uarg, false); /* skb frags point to kernel buffers */ for (i = num_frags - 1; i >= 0; i--) { diff --git a/net/core/sock.c b/net/core/sock.c index 8a146cfcc366..06286006a2cc 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1074,6 +1074,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_NOFCS: v.val = sock_flag(sk, SOCK_NOFCS); break; + case SO_BINDTODEVICE: + v.val = sk->sk_bound_dev_if; + break; + case SO_GET_FILTER: + len = sk_get_filter(sk, (struct sock_filter __user *)optval, len); + if (len < 0) + return len; + + goto lenout; default: return -ENOPROTOOPT; } @@ -1214,13 +1223,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) #ifdef CONFIG_CGROUPS #if IS_ENABLED(CONFIG_NET_CLS_CGROUP) -void sock_update_classid(struct sock *sk) +void sock_update_classid(struct sock *sk, struct task_struct *task) { u32 classid; - rcu_read_lock(); /* doing current task, which cannot vanish. */ - classid = task_cls_classid(current); - rcu_read_unlock(); + classid = task_cls_classid(task); if (classid != sk->sk_classid) sk->sk_classid = classid; } @@ -1263,7 +1270,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, get_net(net)); atomic_set(&sk->sk_wmem_alloc, 1); - sock_update_classid(sk); + sock_update_classid(sk, current); sock_update_netprioidx(sk, current); } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index ea850ce35d4a..662071b249cc 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -174,8 +174,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, * To protect against Request floods, increment retrans * counter (backoff, monitored by dccp_response_timer). */ - req->retrans++; - req->rsk_ops->rtx_syn_ack(sk, req, NULL); + inet_rtx_syn_ack(sk, req); } /* Network Duplicate, discard packet */ return NULL; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 2a6abc163ed2..f6db227c1fd9 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -55,6 +55,7 @@ #include <linux/sysctl.h> #endif #include <linux/kmod.h> +#include <linux/netconf.h> #include <net/arp.h> #include <net/ip.h> @@ -1442,6 +1443,149 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) return 0; } +static int inet_netconf_msgsize_devconf(int type) +{ + int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) + + nla_total_size(4); /* NETCONFA_IFINDEX */ + + /* type -1 is used for ALL */ + if (type == -1 || type == NETCONFA_FORWARDING) + size += nla_total_size(4); + if (type == -1 || type == NETCONFA_RP_FILTER) + size += nla_total_size(4); + + return size; +} + +static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, + struct ipv4_devconf *devconf, u32 portid, + u32 seq, int event, unsigned int flags, + int type) +{ + struct nlmsghdr *nlh; + struct netconfmsg *ncm; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), + flags); + if (nlh == NULL) + return -EMSGSIZE; + + ncm = nlmsg_data(nlh); + ncm->ncm_family = AF_INET; + + if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) + goto nla_put_failure; + + /* type -1 is used for ALL */ + if ((type == -1 || type == NETCONFA_FORWARDING) && + nla_put_s32(skb, NETCONFA_FORWARDING, + IPV4_DEVCONF(*devconf, FORWARDING)) < 0) + goto nla_put_failure; + if ((type == -1 || type == NETCONFA_RP_FILTER) && + nla_put_s32(skb, NETCONFA_RP_FILTER, + IPV4_DEVCONF(*devconf, RP_FILTER)) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv4_devconf *devconf) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, + RTM_NEWNETCONF, 0, type); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC); + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err); +} + +static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { + [NETCONFA_IFINDEX] = { .len = sizeof(int) }, + [NETCONFA_FORWARDING] = { .len = sizeof(int) }, + [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, +}; + +static int inet_netconf_get_devconf(struct sk_buff *in_skb, + struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(in_skb->sk); + struct nlattr *tb[NETCONFA_MAX+1]; + struct netconfmsg *ncm; + struct sk_buff *skb; + struct ipv4_devconf *devconf; + struct in_device *in_dev; + struct net_device *dev; + int ifindex; + int err; + + err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, + devconf_ipv4_policy); + if (err < 0) + goto errout; + + err = EINVAL; + if (!tb[NETCONFA_IFINDEX]) + goto errout; + + ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); + switch (ifindex) { + case NETCONFA_IFINDEX_ALL: + devconf = net->ipv4.devconf_all; + break; + case NETCONFA_IFINDEX_DEFAULT: + devconf = net->ipv4.devconf_dflt; + break; + default: + dev = __dev_get_by_index(net, ifindex); + if (dev == NULL) + goto errout; + in_dev = __in_dev_get_rtnl(dev); + if (in_dev == NULL) + goto errout; + devconf = &in_dev->cnf; + break; + } + + err = -ENOBUFS; + skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet_netconf_fill_devconf(skb, ifindex, devconf, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, RTM_NEWNETCONF, 0, + -1); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); +errout: + return err; +} + #ifdef CONFIG_SYSCTL static void devinet_copy_dflt_conf(struct net *net, int i) @@ -1467,6 +1611,12 @@ static void inet_forward_change(struct net *net) IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; IPV4_DEVCONF_DFLT(net, FORWARDING) = on; + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv4.devconf_all); + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv4.devconf_dflt); for_each_netdev(net, dev) { struct in_device *in_dev; @@ -1474,8 +1624,11 @@ static void inet_forward_change(struct net *net) dev_disable_lro(dev); rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); - if (in_dev) + if (in_dev) { IN_DEV_CONF_SET(in_dev, FORWARDING, on); + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + dev->ifindex, &in_dev->cnf); + } rcu_read_unlock(); } } @@ -1501,6 +1654,23 @@ static int devinet_conf_proc(ctl_table *ctl, int write, i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) if ((new_value == 0) && (old_value != 0)) rt_cache_flush(net); + if (i == IPV4_DEVCONF_RP_FILTER - 1 && + new_value != old_value) { + int ifindex; + + if (cnf == net->ipv4.devconf_dflt) + ifindex = NETCONFA_IFINDEX_DEFAULT; + else if (cnf == net->ipv4.devconf_all) + ifindex = NETCONFA_IFINDEX_ALL; + else { + struct in_device *idev = + container_of(cnf, struct in_device, + cnf); + ifindex = idev->dev->ifindex; + } + inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER, + ifindex, cnf); + } } return ret; @@ -1527,15 +1697,23 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, } if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { inet_forward_change(net); - } else if (*valp) { + } else { struct ipv4_devconf *cnf = ctl->extra1; struct in_device *idev = container_of(cnf, struct in_device, cnf); - dev_disable_lro(idev->dev); + if (*valp) + dev_disable_lro(idev->dev); + inet_netconf_notify_devconf(net, + NETCONFA_FORWARDING, + idev->dev->ifindex, + cnf); } rtnl_unlock(); rt_cache_flush(net); - } + } else + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv4.devconf_dflt); } return ret; @@ -1809,5 +1987,7 @@ void __init devinet_init(void) rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); + rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, + NULL, NULL); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 71b125cd5db1..4797a800faf8 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -803,7 +803,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) unsigned int bytes; if (!new_size) - new_size = 1; + new_size = 16; bytes = new_size * sizeof(struct hlist_head *); new_info_hash = fib_info_hash_alloc(bytes); new_laddrhash = fib_info_hash_alloc(bytes); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d34ce2972c8f..2026542d6836 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -521,21 +521,31 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh, int *expire, int *resend) { if (!rskq_defer_accept) { - *expire = req->retrans >= thresh; + *expire = req->num_timeout >= thresh; *resend = 1; return; } - *expire = req->retrans >= thresh && - (!inet_rsk(req)->acked || req->retrans >= max_retries); + *expire = req->num_timeout >= thresh && + (!inet_rsk(req)->acked || req->num_timeout >= max_retries); /* * Do not resend while waiting for data after ACK, * start to resend on end of deferring period to give * last chance for data or ACK to create established socket. */ *resend = !inet_rsk(req)->acked || - req->retrans >= rskq_defer_accept - 1; + req->num_timeout >= rskq_defer_accept - 1; } +int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) +{ + int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL); + + if (!err) + req->num_retrans++; + return err; +} +EXPORT_SYMBOL(inet_rtx_syn_ack); + void inet_csk_reqsk_queue_prune(struct sock *parent, const unsigned long interval, const unsigned long timeout, @@ -599,13 +609,14 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, req->rsk_ops->syn_ack_timeout(parent, req); if (!expire && (!resend || - !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || + !inet_rtx_syn_ack(parent, req) || inet_rsk(req)->acked)) { unsigned long timeo; - if (req->retrans++ == 0) + if (req->num_timeout++ == 0) lopt->qlen_young--; - timeo = min((timeout << req->retrans), max_rto); + timeo = min(timeout << req->num_timeout, + max_rto); req->expires = now + timeo; reqp = &req->dl_next; continue; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 0c34bfabc11f..cb98cbed1973 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -105,6 +105,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->id.idiag_src[0] = inet->inet_rcv_saddr; r->id.idiag_dst[0] = inet->inet_daddr; + if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) + goto errout; + /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, * hence this needs to be included regardless of socket family. */ @@ -617,7 +620,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->idiag_family = sk->sk_family; r->idiag_state = TCP_SYN_RECV; r->idiag_timer = 1; - r->idiag_retrans = req->retrans; + r->idiag_retrans = req->num_retrans; r->id.idiag_if = sk->sk_bound_dev_if; sock_diag_save_cookie(req, r->id.idiag_cookie); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 798358b10717..d763701cff1b 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1500,8 +1500,10 @@ static int __init ip_auto_config(void) * Clue in the operator. */ pr_info("IP-Config: Complete:\n"); - pr_info(" device=%s, addr=%pI4, mask=%pI4, gw=%pI4\n", - ic_dev->name, &ic_myaddr, &ic_netmask, &ic_gateway); + + pr_info(" device=%s, hwaddr=%*phC, ipaddr=%pI4, mask=%pI4, gw=%pI4\n", + ic_dev->name, ic_dev->addr_len, ic_dev->dev_addr, + &ic_myaddr, &ic_netmask, &ic_gateway); pr_info(" host=%s, domain=%s, nis-domain=%s\n", utsname()->nodename, ic_domain, utsname()->domainname); pr_info(" bootserver=%pI4, rootserver=%pI4, rootpath=%s", diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index e15b45297c09..720855e41100 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -138,6 +138,7 @@ struct ipip_net { static int ipip_tunnel_init(struct net_device *dev); static void ipip_tunnel_setup(struct net_device *dev); static void ipip_dev_free(struct net_device *dev); +static struct rtnl_link_ops ipip_link_ops __read_mostly; /* * Locking : hash tables are protected by RCU and RTNL @@ -305,6 +306,7 @@ static struct ip_tunnel *ipip_tunnel_locate(struct net *net, goto failed_free; strcpy(nt->parms.name, dev->name); + dev->rtnl_link_ops = &ipip_link_ops; dev_hold(dev); ipip_tunnel_link(ipn, nt); @@ -479,6 +481,10 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (skb->protocol != htons(ETH_P_IP)) goto tx_error; + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb)) + goto tx_error; + if (tos & 1) tos = old_iph->tos; @@ -773,6 +779,11 @@ static void ipip_dev_free(struct net_device *dev) free_netdev(dev); } +#define IPIP_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_HW_CSUM) + static void ipip_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipip_netdev_ops; @@ -787,6 +798,9 @@ static void ipip_tunnel_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; dev->features |= NETIF_F_LLTX; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + + dev->features |= IPIP_FEATURES; + dev->hw_features |= IPIP_FEATURES; } static int ipip_tunnel_init(struct net_device *dev) @@ -829,6 +843,47 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev) return 0; } +static size_t ipip_get_size(const struct net_device *dev) +{ + return + /* IFLA_IPTUN_LINK */ + nla_total_size(4) + + /* IFLA_IPTUN_LOCAL */ + nla_total_size(4) + + /* IFLA_IPTUN_REMOTE */ + nla_total_size(4) + + /* IFLA_IPTUN_TTL */ + nla_total_size(1) + + /* IFLA_IPTUN_TOS */ + nla_total_size(1) + + 0; +} + +static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_parm *parm = &tunnel->parms; + + if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || + nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || + nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || + nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || + nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops ipip_link_ops __read_mostly = { + .kind = "ipip", + .maxtype = IFLA_IPTUN_MAX, + .priv_size = sizeof(struct ip_tunnel), + .get_size = ipip_get_size, + .fill_info = ipip_fill_info, +}; + static struct xfrm_tunnel ipip_handler __read_mostly = { .handler = ipip_rcv, .err_handler = ipip_err, @@ -925,14 +980,26 @@ static int __init ipip_init(void) return err; err = xfrm4_tunnel_register(&ipip_handler, AF_INET); if (err < 0) { - unregister_pernet_device(&ipip_net_ops); pr_info("%s: can't register tunnel\n", __func__); + goto xfrm_tunnel_failed; } + err = rtnl_link_register(&ipip_link_ops); + if (err < 0) + goto rtnl_link_failed; + +out: return err; + +rtnl_link_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +xfrm_tunnel_failed: + unregister_pernet_device(&ipip_net_ops); + goto out; } static void __exit ipip_fini(void) { + rtnl_link_unregister(&ipip_link_ops); if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) pr_info("%s: can't deregister tunnel\n", __func__); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index ba48e799b031..b236ef04914f 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -340,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, } req->expires = 0UL; - req->retrans = 0; + req->num_retrans = 0; /* * We need to lookup the route here to get at the correct diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 197c0008503c..733f48593ec3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -536,13 +536,14 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) { struct tcp_sock *tp = tcp_sk(sk); int answ; + bool slow; switch (cmd) { case SIOCINQ: if (sk->sk_state == TCP_LISTEN) return -EINVAL; - lock_sock(sk); + slow = lock_sock_fast(sk); if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) answ = 0; else if (sock_flag(sk, SOCK_URGINLINE) || @@ -557,7 +558,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) answ--; } else answ = tp->urg_seq - tp->copied_seq; - release_sock(sk); + unlock_sock_fast(sk, slow); break; case SIOCATMARK: answ = tp->urg_data && tp->urg_seq == tp->copied_seq; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2c2b13a999ea..7839d51fb65b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3552,6 +3552,24 @@ static bool tcp_process_frto(struct sock *sk, int flag) return false; } +/* RFC 5961 7 [ACK Throttling] */ +static void tcp_send_challenge_ack(struct sock *sk) +{ + /* unprotected vars, we dont care of overwrites */ + static u32 challenge_timestamp; + static unsigned int challenge_count; + u32 now = jiffies / HZ; + + if (now != challenge_timestamp) { + challenge_timestamp = now; + challenge_count = 0; + } + if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); + tcp_send_ack(sk); + } +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { @@ -3571,8 +3589,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* If the ack is older than previous acks * then we can probably ignore it. */ - if (before(ack, prior_snd_una)) + if (before(ack, prior_snd_una)) { + /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ + if (before(ack, prior_snd_una - tp->max_window)) { + tcp_send_challenge_ack(sk); + return -1; + } goto old_ack; + } /* If the ack includes data we haven't sent yet, discard * this segment (RFC793 Section 3.9). @@ -5244,23 +5268,6 @@ out: } #endif /* CONFIG_NET_DMA */ -static void tcp_send_challenge_ack(struct sock *sk) -{ - /* unprotected vars, we dont care of overwrites */ - static u32 challenge_timestamp; - static unsigned int challenge_count; - u32 now = jiffies / HZ; - - if (now != challenge_timestamp) { - challenge_timestamp = now; - challenge_count = 0; - } - if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); - tcp_send_ack(sk); - } -} - /* Does PAWS and seqno based validation of an incoming segment, flags will * play significant role here. */ @@ -5988,7 +5995,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, */ if (req) { tcp_synack_rtt_meas(sk, req); - tp->total_retrans = req->retrans; + tp->total_retrans = req->num_retrans; reqsk_fastopen_remove(sk, req, false); } else { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0c4a64355603..9dd5b34eb112 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -877,10 +877,13 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, } static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, - struct request_values *rvp) + struct request_values *rvp) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); + int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); + + if (!res) + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + return res; } /* @@ -1070,7 +1073,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) } EXPORT_SYMBOL(tcp_md5_do_del); -void tcp_clear_md5_list(struct sock *sk) +static void tcp_clear_md5_list(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; @@ -1386,7 +1389,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, struct sock *child; int err; - req->retrans = 0; + req->num_retrans = 0; + req->num_timeout = 0; req->sk = NULL; child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); @@ -1741,7 +1745,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_initialize_rcv_mss(newsk); tcp_synack_rtt_meas(newsk, req); - newtp->total_retrans = req->retrans; + newtp->total_retrans = req->num_retrans; #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ @@ -1919,7 +1923,6 @@ EXPORT_SYMBOL(tcp_v4_do_rcv); void tcp_v4_early_demux(struct sk_buff *skb) { - struct net *net = dev_net(skb->dev); const struct iphdr *iph; const struct tcphdr *th; struct sock *sk; @@ -1927,16 +1930,16 @@ void tcp_v4_early_demux(struct sk_buff *skb) if (skb->pkt_type != PACKET_HOST) return; - if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr))) + if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) return; iph = ip_hdr(skb); - th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb)); + th = tcp_hdr(skb); if (th->doff < sizeof(struct tcphdr) / 4) return; - sk = __inet_lookup_established(net, &tcp_hashinfo, + sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, ntohs(th->dest), skb->skb_iif); @@ -2640,7 +2643,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, 0, 0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ jiffies_delta_to_clock_t(delta), - req->retrans, + req->num_timeout, from_kuid_munged(seq_user_ns(f), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a7302d974f32..f35f2dfb6401 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -553,7 +553,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * it can be estimated (approximately) * from another data. */ - tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans); + tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout); paws_reject = tcp_paws_reject(&tmp_opt, th->rst); } } @@ -582,7 +582,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * Note that even if there is new data in the SYN packet * they will be thrown away too. */ - req->rsk_ops->rtx_syn_ack(sk, req, NULL); + inet_rtx_syn_ack(sk, req); return NULL; } @@ -696,7 +696,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */ if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; - else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ + else if (req->num_retrans) /* don't take RTT sample if retrans && ~TS */ tcp_rsk(req)->snt_synack = 0; /* For Fast Open no more processing is needed (sk is the @@ -706,7 +706,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, return sk; /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ - if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { inet_rsk(req)->acked = 1; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index d47c1b4421a3..b78aac30c498 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -318,7 +318,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk) req = tcp_sk(sk)->fastopen_rsk; req->rsk_ops->syn_ack_timeout(sk, req); - if (req->retrans >= max_retries) { + if (req->num_timeout >= max_retries) { tcp_write_err(sk); return; } @@ -327,10 +327,10 @@ static void tcp_fastopen_synack_timer(struct sock *sk) * regular retransmit because if the child socket has been accepted * it's not good to give up too easily. */ - req->rsk_ops->rtx_syn_ack(sk, req, NULL); - req->retrans++; + inet_rtx_syn_ack(sk, req); + req->num_timeout++; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - TCP_TIMEOUT_INIT << req->retrans, TCP_RTO_MAX); + TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); } /* diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0424e4e27414..fab23db8ee73 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -81,6 +81,7 @@ #include <net/pkt_sched.h> #include <linux/if_tunnel.h> #include <linux/rtnetlink.h> +#include <linux/netconf.h> #ifdef CONFIG_IPV6_PRIVACY #include <linux/random.h> @@ -401,7 +402,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) ndev->cnf.accept_dad = -1; -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) { pr_info("%s: Disabled Multicast RS\n", dev->name); ndev->cnf.rtr_solicits = 0; @@ -460,6 +461,141 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev) return idev; } +static int inet6_netconf_msgsize_devconf(int type) +{ + int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) + + nla_total_size(4); /* NETCONFA_IFINDEX */ + + /* type -1 is used for ALL */ + if (type == -1 || type == NETCONFA_FORWARDING) + size += nla_total_size(4); + + return size; +} + +static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, + struct ipv6_devconf *devconf, u32 portid, + u32 seq, int event, unsigned int flags, + int type) +{ + struct nlmsghdr *nlh; + struct netconfmsg *ncm; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), + flags); + if (nlh == NULL) + return -EMSGSIZE; + + ncm = nlmsg_data(nlh); + ncm->ncm_family = AF_INET6; + + if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) + goto nla_put_failure; + + /* type -1 is used for ALL */ + if ((type == -1 || type == NETCONFA_FORWARDING) && + nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv6_devconf *devconf) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, + RTM_NEWNETCONF, 0, type); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC); + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err); +} + +static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { + [NETCONFA_IFINDEX] = { .len = sizeof(int) }, + [NETCONFA_FORWARDING] = { .len = sizeof(int) }, +}; + +static int inet6_netconf_get_devconf(struct sk_buff *in_skb, + struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(in_skb->sk); + struct nlattr *tb[NETCONFA_MAX+1]; + struct netconfmsg *ncm; + struct sk_buff *skb; + struct ipv6_devconf *devconf; + struct inet6_dev *in6_dev; + struct net_device *dev; + int ifindex; + int err; + + err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, + devconf_ipv6_policy); + if (err < 0) + goto errout; + + err = EINVAL; + if (!tb[NETCONFA_IFINDEX]) + goto errout; + + ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); + switch (ifindex) { + case NETCONFA_IFINDEX_ALL: + devconf = net->ipv6.devconf_all; + break; + case NETCONFA_IFINDEX_DEFAULT: + devconf = net->ipv6.devconf_dflt; + break; + default: + dev = __dev_get_by_index(net, ifindex); + if (dev == NULL) + goto errout; + in6_dev = __in6_dev_get(dev); + if (in6_dev == NULL) + goto errout; + devconf = &in6_dev->cnf; + break; + } + + err = -ENOBUFS; + skb = nlmsg_new(inet6_netconf_msgsize_devconf(-1), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet6_netconf_fill_devconf(skb, ifindex, devconf, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, RTM_NEWNETCONF, 0, + -1); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); +errout: + return err; +} + #ifdef CONFIG_SYSCTL static void dev_forward_change(struct inet6_dev *idev) { @@ -471,7 +607,7 @@ static void dev_forward_change(struct inet6_dev *idev) dev = idev->dev; if (idev->cnf.forwarding) dev_disable_lro(dev); - if (dev && (dev->flags & IFF_MULTICAST)) { + if (dev->flags & IFF_MULTICAST) { if (idev->cnf.forwarding) ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); else @@ -486,6 +622,8 @@ static void dev_forward_change(struct inet6_dev *idev) else addrconf_leave_anycast(ifa); } + inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING, + dev->ifindex, &idev->cnf); } @@ -518,6 +656,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) *p = newf; if (p == &net->ipv6.devconf_dflt->forwarding) { + if ((!newf) ^ (!old)) + inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt); rtnl_unlock(); return 0; } @@ -525,6 +667,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) if (p == &net->ipv6.devconf_all->forwarding) { net->ipv6.devconf_dflt->forwarding = newf; addrconf_forward_change(net, newf); + if ((!newf) ^ (!old)) + inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); } else if ((!newf) ^ (!old)) dev_forward_change((struct inet6_dev *)table->extra1); rtnl_unlock(); @@ -553,7 +699,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) pr_warn("Freeing alive inet6 address %p\n", ifp); return; } - dst_release(&ifp->rt->dst); + ip6_rt_put(ifp->rt); kfree_rcu(ifp, rcu); } @@ -805,7 +951,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) rt6_set_expires(rt, expires); } } - dst_release(&rt->dst); + ip6_rt_put(rt); } /* clean up prefsrc entries */ @@ -1692,7 +1838,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, This thing is done here expecting that the whole class of non-broadcast devices need not cloning. */ -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) cfg.fc_flags |= RTF_NONEXTHOP; #endif @@ -1752,7 +1898,7 @@ static void addrconf_add_mroute(struct net_device *dev) ip6_route_add(&cfg); } -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) static void sit_route_add(struct net_device *dev) { struct fib6_config cfg = { @@ -1881,8 +2027,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, dev, expires, flags); } - if (rt) - dst_release(&rt->dst); + ip6_rt_put(rt); } /* Try to figure out our local address for this prefix */ @@ -2104,7 +2249,7 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) if (dev == NULL) goto err_exit; -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) if (dev->type == ARPHRD_SIT) { const struct net_device_ops *ops = dev->netdev_ops; struct ifreq ifr; @@ -2315,7 +2460,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, } } -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) static void sit_add_v4_addrs(struct inet6_dev *idev) { struct in6_addr addr; @@ -2434,7 +2579,7 @@ static void addrconf_dev_config(struct net_device *dev) addrconf_add_linklocal(idev, &addr); } -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) static void addrconf_sit_config(struct net_device *dev) { struct inet6_dev *idev; @@ -2471,7 +2616,7 @@ static void addrconf_sit_config(struct net_device *dev) } #endif -#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) +#if IS_ENABLED(CONFIG_NET_IPGRE) static void addrconf_gre_config(struct net_device *dev) { struct inet6_dev *idev; @@ -2601,12 +2746,12 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } switch (dev->type) { -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) case ARPHRD_SIT: addrconf_sit_config(dev); break; #endif -#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) +#if IS_ENABLED(CONFIG_NET_IPGRE) case ARPHRD_IPGRE: addrconf_gre_config(dev); break; @@ -3194,7 +3339,7 @@ void if6_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) /* Check if address is a home address configured on any interface. */ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) { @@ -4784,6 +4929,8 @@ int __init addrconf_init(void) inet6_dump_ifmcaddr, NULL); __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr, NULL); + __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, + NULL, NULL); ipv6_addr_label_rtnl_register(); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 7e6139508ee7..ecc35b93314b 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -44,7 +44,7 @@ #define IPV6HDR_BASELEN 8 struct tmp_ext { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) struct in6_addr saddr; #endif struct in6_addr daddr; @@ -152,7 +152,7 @@ bad: return false; } -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) /** * ipv6_rearrange_destopt - rearrange IPv6 destination options header * @iph: IPv6 header @@ -320,7 +320,7 @@ static void ah6_output_done(struct crypto_async_request *base, int err) memcpy(top_iph, iph_base, IPV6HDR_BASELEN); if (extlen) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) memcpy(&top_iph->saddr, iph_ext, extlen); #else memcpy(&top_iph->daddr, iph_ext, extlen); @@ -385,7 +385,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(iph_base, top_iph, IPV6HDR_BASELEN); if (extlen) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) memcpy(iph_ext, &top_iph->saddr, extlen); #else memcpy(iph_ext, &top_iph->daddr, extlen); @@ -434,7 +434,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(top_iph, iph_base, IPV6HDR_BASELEN); if (extlen) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) memcpy(&top_iph->saddr, iph_ext, extlen); #else memcpy(&top_iph->daddr, iph_ext, extlen); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index cdf02be5f191..4963c769a13f 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -84,7 +84,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->dst.dev; - dst_release(&rt->dst); + ip6_rt_put(rt); } else if (ishost) { err = -EADDRNOTAVAIL; goto error; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index be2b67d631e5..93cbad2c0aa7 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -769,7 +769,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk, rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); switch (rthdr->type) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (rthdr->hdrlen != 2 || rthdr->segments_left != 1) { diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index fa3d9c328092..f005acc58b2a 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -43,7 +43,7 @@ #include <net/ndisc.h> #include <net/ip6_route.h> #include <net/addrconf.h> -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/xfrm.h> #endif @@ -224,7 +224,7 @@ bad: Destination options header. *****************************/ -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) { struct ipv6_destopt_hao *hao; @@ -288,7 +288,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) #endif static const struct tlvtype_proc tlvprocdestopt_lst[] = { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) { .type = IPV6_TLV_HAO, .func = ipv6_dest_hao, @@ -300,7 +300,7 @@ static const struct tlvtype_proc tlvprocdestopt_lst[] = { static int ipv6_destopt_rcv(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) __u16 dstbuf; #endif struct dst_entry *dst = skb_dst(skb); @@ -315,14 +315,14 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) } opt->lastopt = opt->dst1 = skb_network_header_len(skb); -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) dstbuf = opt->dst1; #endif if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) opt->nhoff = dstbuf; #else opt->nhoff = opt->dst1; @@ -378,7 +378,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) looped_back: if (hdr->segments_left == 0) { switch (hdr->type) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: /* Silently discard type 2 header unless it was * processed by own @@ -404,7 +404,7 @@ looped_back: } switch (hdr->type) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (accept_source_route < 0) goto unknown_rh; @@ -461,7 +461,7 @@ looped_back: addr += i - 1; switch (hdr->type) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index d9fb9110f607..2e1a432867c0 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -100,7 +100,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto out; } again: - dst_release(&rt->dst); + ip6_rt_put(rt); rt = NULL; goto out; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 24d69dbca4d6..b4a9fd51dae7 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -280,7 +280,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st return 0; } -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) static void mip6_addr_swap(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 24995a93ef8c..710cafd2e1a9 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -672,6 +672,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, iter->rt6i_idev == rt->rt6i_idev && ipv6_addr_equal(&iter->rt6i_gateway, &rt->rt6i_gateway)) { + if (rt->rt6i_nsiblings) + rt->rt6i_nsiblings = 0; if (!(iter->rt6i_flags & RTF_EXPIRES)) return -EEXIST; if (!(rt->rt6i_flags & RTF_EXPIRES)) @@ -680,6 +682,21 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, rt6_set_expires(iter, rt->dst.expires); return -EEXIST; } + /* If we have the same destination and the same metric, + * but not the same gateway, then the route we try to + * add is sibling to this route, increment our counter + * of siblings, and later we will add our route to the + * list. + * Only static routes (which don't have flag + * RTF_EXPIRES) are used for ECMPv6. + * + * To avoid long list, we only had siblings if the + * route have a gateway. + */ + if (rt->rt6i_flags & RTF_GATEWAY && + !(rt->rt6i_flags & RTF_EXPIRES) && + !(iter->rt6i_flags & RTF_EXPIRES)) + rt->rt6i_nsiblings++; } if (iter->rt6i_metric > rt->rt6i_metric) @@ -692,6 +709,35 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, if (ins == &fn->leaf) fn->rr_ptr = NULL; + /* Link this route to others same route. */ + if (rt->rt6i_nsiblings) { + unsigned int rt6i_nsiblings; + struct rt6_info *sibling, *temp_sibling; + + /* Find the first route that have the same metric */ + sibling = fn->leaf; + while (sibling) { + if (sibling->rt6i_metric == rt->rt6i_metric) { + list_add_tail(&rt->rt6i_siblings, + &sibling->rt6i_siblings); + break; + } + sibling = sibling->dst.rt6_next; + } + /* For each sibling in the list, increment the counter of + * siblings. BUG() if counters does not match, list of siblings + * is broken! + */ + rt6i_nsiblings = 0; + list_for_each_entry_safe(sibling, temp_sibling, + &rt->rt6i_siblings, rt6i_siblings) { + sibling->rt6i_nsiblings++; + BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings); + rt6i_nsiblings++; + } + BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); + } + /* * insert node */ @@ -1193,6 +1239,17 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, if (fn->rr_ptr == rt) fn->rr_ptr = NULL; + /* Remove this entry from other siblings */ + if (rt->rt6i_nsiblings) { + struct rt6_info *sibling, *next_sibling; + + list_for_each_entry_safe(sibling, next_sibling, + &rt->rt6i_siblings, rt6i_siblings) + sibling->rt6i_nsiblings--; + rt->rt6i_nsiblings = 0; + list_del_init(&rt->rt6i_siblings); + } + /* Adjust walkers */ read_lock(&fib6_walker_lock); FOR_WALKERS(w) { diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d5cb3c4e66f8..12aa473e9793 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1069,7 +1069,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) dev->mtu = IPV6_MIN_MTU; } } - dst_release(&rt->dst); + ip6_rt_put(rt); } t->hlen = addend; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index aece3e792f84..3deaa4e2e8e2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -538,8 +538,7 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) to->nf_trace = from->nf_trace; #endif skb_copy_secmark(to, from); @@ -564,7 +563,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) found_rhdr = 1; break; case NEXTHDR_DEST: -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) break; #endif @@ -756,7 +755,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (err == 0) { IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); - dst_release(&rt->dst); + ip6_rt_put(rt); return 0; } @@ -768,7 +767,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); - dst_release(&rt->dst); + ip6_rt_put(rt); return err; slow_path_clean: diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index cb7e2ded6f08..424ed45ef122 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -83,6 +83,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) static int ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); +static struct rtnl_link_ops ip6_link_ops __read_mostly; static int ip6_tnl_net_id __read_mostly; struct ip6_tnl_net { @@ -299,6 +300,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) goto failed_free; strcpy(t->parms.name, dev->name); + dev->rtnl_link_ops = &ip6_link_ops; dev_hold(dev); ip6_tnl_link(ip6n, t); @@ -663,8 +665,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, icmpv6_send(skb2, rel_type, rel_code, rel_info); - if (rt) - dst_release(&rt->dst); + ip6_rt_put(rt); kfree_skb(skb2); } @@ -1208,7 +1209,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; } - dst_release(&rt->dst); + ip6_rt_put(rt); } } @@ -1505,6 +1506,55 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) return 0; } +static size_t ip6_get_size(const struct net_device *dev) +{ + return + /* IFLA_IPTUN_LINK */ + nla_total_size(4) + + /* IFLA_IPTUN_LOCAL */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_REMOTE */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_TTL */ + nla_total_size(1) + + /* IFLA_IPTUN_ENCAP_LIMIT */ + nla_total_size(1) + + /* IFLA_IPTUN_FLOWINFO */ + nla_total_size(4) + + /* IFLA_IPTUN_FLAGS */ + nla_total_size(4) + + 0; +} + +static int ip6_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + struct __ip6_tnl_parm *parm = &tunnel->parms; + + if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || + nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr), + &parm->raddr) || + nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr), + &parm->laddr) || + nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || + nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || + nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || + nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops ip6_link_ops __read_mostly = { + .kind = "ip6tnl", + .maxtype = IFLA_IPTUN_MAX, + .priv_size = sizeof(struct ip6_tnl), + .get_size = ip6_get_size, + .fill_info = ip6_fill_info, +}; + static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { .handler = ip4ip6_rcv, .err_handler = ip4ip6_err, @@ -1613,9 +1663,14 @@ static int __init ip6_tunnel_init(void) pr_err("%s: can't register ip6ip6\n", __func__); goto out_ip6ip6; } + err = rtnl_link_register(&ip6_link_ops); + if (err < 0) + goto rtnl_link_failed; return 0; +rtnl_link_failed: + xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); out_ip6ip6: xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); out_ip4ip6: @@ -1630,6 +1685,7 @@ out_pernet: static void __exit ip6_tunnel_cleanup(void) { + rtnl_link_unregister(&ip6_link_ops); if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) pr_info("%s: can't deregister ip4ip6\n", __func__); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index ba6d13d1f1e1..a7bee6a91335 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -397,7 +397,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_RTHDR && opt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = opt->srcrt; switch (rthdr->type) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (rthdr->hdrlen != 2 || rthdr->segments_left != 1) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 92f8e48e4ba4..b19ed51a45bb 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -163,7 +163,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->dst.dev; - dst_release(&rt->dst); + ip6_rt_put(rt); } } else dev = dev_get_by_index_rcu(net, ifindex); @@ -260,7 +260,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, if (rt) { dev = rt->dst.dev; - dst_release(&rt->dst); + ip6_rt_put(rt); } } else dev = dev_get_by_index_rcu(net, ifindex); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2edce30ef733..4f47aa5183ae 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -905,7 +905,7 @@ static void ndisc_recv_na(struct sk_buff *skb) if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp && pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { - /* XXX: idev->cnf.prixy_ndp */ + /* XXX: idev->cnf.proxy_ndp */ goto out; } @@ -1144,7 +1144,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); - dst_release(&rt->dst); + ip6_rt_put(rt); return; } } @@ -1169,7 +1169,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); - dst_release(&rt->dst); + ip6_rt_put(rt); return; } neigh->flags |= NTF_ROUTER; @@ -1325,8 +1325,7 @@ skip_routeinfo: ND_PRINTK(2, warn, "RA: invalid RA options\n"); } out: - if (rt) - dst_release(&rt->dst); + ip6_rt_put(rt); if (neigh) neigh_release(neigh); } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d7cb04506c3d..10ce76a2cb94 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -207,8 +207,7 @@ ip6t_get_target_c(const struct ip6t_entry *e) return ip6t_get_target((struct ip6t_entry *)e); } -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* This cries for unification! */ static const char *const hooknames[] = { [NF_INET_PRE_ROUTING] = "PREROUTING", @@ -381,8 +380,7 @@ ip6t_do_table(struct sk_buff *skb, t = ip6t_get_target_c(e); IP_NF_ASSERT(t->u.kernel.target); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) trace_packet(skb, hook, in, out, diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 5d1d8b04d694..5060d54199ab 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -67,7 +67,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE)) ret = true; out: - dst_release(&rt->dst); + ip6_rt_put(rt); return ret; } diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 8860d23e61cf..ccb5cbe93549 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -295,7 +295,7 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { }, }; -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> @@ -346,7 +346,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .invert_tuple = ipv6_invert_tuple, .print_tuple = ipv6_print_tuple, .get_l4proto = ipv6_get_l4proto, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = ipv6_tuple_to_nlattr, .nlattr_tuple_size = ipv6_nlattr_tuple_size, .nlattr_to_tuple = ipv6_nlattr_to_tuple, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 2d54b2061d68..24df3dde0076 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -232,7 +232,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum); } -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> @@ -375,7 +375,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .get_timeouts = icmpv6_get_timeouts, .new = icmpv6_new, .error = icmpv6_error, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = icmpv6_tuple_to_nlattr, .nlattr_tuple_size = icmpv6_nlattr_tuple_size, .nlattr_to_tuple = icmpv6_nlattr_to_tuple, diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index cdd6d045e42e..aacd121fe8c5 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -19,7 +19,7 @@ #include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_l4proto.h> @@ -35,7 +35,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, { u16 zone = NF_CT_DEFAULT_ZONE; -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) if (skb->nfct) zone = nf_ct_zone((struct nf_conn *)skb->nfct); #endif @@ -60,7 +60,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, { struct sk_buff *reasm; -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Previously seen (loopback)? */ if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) return NF_ACCEPT; diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c index 5d6da784305b..61aaf70f376e 100644 --- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c @@ -84,7 +84,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = { .manip_pkt = icmpv6_manip_pkt, .in_range = icmpv6_in_range, .unique_tuple = icmpv6_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d8e95c77db99..6cd29b1e8b92 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -50,7 +50,7 @@ #include <net/udp.h> #include <net/inet_common.h> #include <net/tcp_states.h> -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/mip6.h> #endif #include <linux/mroute6.h> @@ -123,7 +123,7 @@ static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) return 1; } -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb); static mh_filter_t __rcu *mh_filter __read_mostly; @@ -184,7 +184,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) filtered = icmpv6_filter(sk, skb); break; -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: { /* XXX: To validate MH only once for each packet, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b1e6cf0b95fd..30458726accf 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -57,6 +57,7 @@ #include <net/xfrm.h> #include <net/netevent.h> #include <net/netlink.h> +#include <net/nexthop.h> #include <asm/uaccess.h> @@ -289,6 +290,8 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); rt->rt6i_genid = rt_genid(net); + INIT_LIST_HEAD(&rt->rt6i_siblings); + rt->rt6i_nsiblings = 0; } return rt; } @@ -318,13 +321,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) } } -static atomic_t __rt6_peer_genid = ATOMIC_INIT(0); - -static u32 rt6_peer_genid(void) -{ - return atomic_read(&__rt6_peer_genid); -} - void rt6_bind_peer(struct rt6_info *rt, int create) { struct inet_peer_base *base; @@ -338,8 +334,6 @@ void rt6_bind_peer(struct rt6_info *rt, int create) if (peer) { if (!rt6_set_peer(rt, peer)) inet_putpeer(peer); - else - rt->rt6i_peer_genid = rt6_peer_genid(); } } @@ -385,6 +379,69 @@ static bool rt6_need_strict(const struct in6_addr *daddr) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } +/* Multipath route selection: + * Hash based function using packet header and flowlabel. + * Adapted from fib_info_hashfn() + */ +static int rt6_info_hash_nhsfn(unsigned int candidate_count, + const struct flowi6 *fl6) +{ + unsigned int val = fl6->flowi6_proto; + + val ^= (__force u32)fl6->daddr.s6_addr32[0]; + val ^= (__force u32)fl6->daddr.s6_addr32[1]; + val ^= (__force u32)fl6->daddr.s6_addr32[2]; + val ^= (__force u32)fl6->daddr.s6_addr32[3]; + + val ^= (__force u32)fl6->saddr.s6_addr32[0]; + val ^= (__force u32)fl6->saddr.s6_addr32[1]; + val ^= (__force u32)fl6->saddr.s6_addr32[2]; + val ^= (__force u32)fl6->saddr.s6_addr32[3]; + + /* Work only if this not encapsulated */ + switch (fl6->flowi6_proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_SCTP: + val ^= (__force u16)fl6->fl6_sport; + val ^= (__force u16)fl6->fl6_dport; + break; + + case IPPROTO_ICMPV6: + val ^= (__force u16)fl6->fl6_icmp_type; + val ^= (__force u16)fl6->fl6_icmp_code; + break; + } + /* RFC6438 recommands to use flowlabel */ + val ^= (__force u32)fl6->flowlabel; + + /* Perhaps, we need to tune, this function? */ + val = val ^ (val >> 7) ^ (val >> 12); + return val % candidate_count; +} + +static struct rt6_info *rt6_multipath_select(struct rt6_info *match, + struct flowi6 *fl6) +{ + struct rt6_info *sibling, *next_sibling; + int route_choosen; + + route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6); + /* Don't change the route, if route_choosen == 0 + * (siblings does not include ourself) + */ + if (route_choosen) + list_for_each_entry_safe(sibling, next_sibling, + &match->rt6i_siblings, rt6i_siblings) { + route_choosen--; + if (route_choosen == 0) { + match = sibling; + break; + } + } + return match; +} + /* * Route lookup. Any table->tb6_lock is implied. */ @@ -666,7 +723,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, else rt6_set_expires(rt, jiffies + HZ * lifetime); - dst_release(&rt->dst); + ip6_rt_put(rt); } return 0; } @@ -702,6 +759,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, restart: rt = fn->leaf; rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); + if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) + rt = rt6_multipath_select(rt, fl6); BACKTRACK(net, &fl6->saddr); out: dst_use(&rt->dst, jiffies); @@ -863,7 +922,8 @@ restart_2: restart: rt = rt6_select(fn, oif, strict | reachable); - + if (rt->rt6i_nsiblings && oif == 0) + rt = rt6_multipath_select(rt, fl6); BACKTRACK(net, &fl6->saddr); if (rt == net->ipv6.ip6_null_entry || rt->rt6i_flags & RTF_CACHE) @@ -879,7 +939,7 @@ restart: else goto out2; - dst_release(&rt->dst); + ip6_rt_put(rt); rt = nrt ? : net->ipv6.ip6_null_entry; dst_hold(&rt->dst); @@ -896,7 +956,7 @@ restart: * Race condition! In the gap, when table->tb6_lock was * released someone could insert this route. Relookup. */ - dst_release(&rt->dst); + ip6_rt_put(rt); goto relookup; out: @@ -1030,14 +1090,9 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) return NULL; - if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { - if (rt->rt6i_peer_genid != rt6_peer_genid()) { - if (!rt6_has_peer(rt)) - rt6_bind_peer(rt, 0); - rt->rt6i_peer_genid = rt6_peer_genid(); - } + if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) return dst; - } + return NULL; } @@ -1507,7 +1562,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; if (dev) { if (dev != grt->dst.dev) { - dst_release(&grt->dst); + ip6_rt_put(grt); goto out; } } else { @@ -1518,7 +1573,7 @@ int ip6_route_add(struct fib6_config *cfg) } if (!(grt->rt6i_flags & RTF_GATEWAY)) err = 0; - dst_release(&grt->dst); + ip6_rt_put(grt); if (err) goto out; @@ -1604,7 +1659,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) write_unlock_bh(&table->tb6_lock); out: - dst_release(&rt->dst); + ip6_rt_put(rt); return err; } @@ -2249,6 +2304,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_IIF] = { .type = NLA_U32 }, [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2326,11 +2382,71 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_TABLE]) cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); + if (tb[RTA_MULTIPATH]) { + cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); + cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); + } + err = 0; errout: return err; } +static int ip6_route_multipath(struct fib6_config *cfg, int add) +{ + struct fib6_config r_cfg; + struct rtnexthop *rtnh; + int remaining; + int attrlen; + int err = 0, last_err = 0; + +beginning: + rtnh = (struct rtnexthop *)cfg->fc_mp; + remaining = cfg->fc_mp_len; + + /* Parse a Multipath Entry */ + while (rtnh_ok(rtnh, remaining)) { + memcpy(&r_cfg, cfg, sizeof(*cfg)); + if (rtnh->rtnh_ifindex) + r_cfg.fc_ifindex = rtnh->rtnh_ifindex; + + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + if (nla) { + nla_memcpy(&r_cfg.fc_gateway, nla, 16); + r_cfg.fc_flags |= RTF_GATEWAY; + } + } + err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg); + if (err) { + last_err = err; + /* If we are trying to remove a route, do not stop the + * loop when ip6_route_del() fails (because next hop is + * already gone), we should try to remove all next hops. + */ + if (add) { + /* If add fails, we should try to delete all + * next hops that have been already added. + */ + add = 0; + goto beginning; + } + } + /* Because each route is added like a single route we remove + * this flag after the first nexthop (if there is a collision, + * we have already fail to add the first nexthop: + * fib6_add_rt2node() has reject it). + */ + cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL; + rtnh = rtnh_next(rtnh, &remaining); + } + + return last_err; +} + static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { struct fib6_config cfg; @@ -2340,7 +2456,10 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a if (err < 0) return err; - return ip6_route_del(&cfg); + if (cfg.fc_mp) + return ip6_route_multipath(&cfg, 0); + else + return ip6_route_del(&cfg); } static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) @@ -2352,7 +2471,10 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a if (err < 0) return err; - return ip6_route_add(&cfg); + if (cfg.fc_mp) + return ip6_route_multipath(&cfg, 1); + else + return ip6_route_add(&cfg); } static inline size_t rt6_nlmsg_size(void) @@ -2596,7 +2718,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) { - dst_release(&rt->dst); + ip6_rt_put(rt); err = -ENOBUFS; goto errout; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3ed54ffd8d50..b543c56cad28 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -68,6 +68,7 @@ static int ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); static void ipip6_dev_free(struct net_device *dev); +static struct rtnl_link_ops sit_link_ops __read_mostly; static int sit_net_id __read_mostly; struct sit_net { @@ -282,6 +283,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, goto failed_free; strcpy(nt->parms.name, dev->name); + dev->rtnl_link_ops = &sit_link_ops; dev_hold(dev); @@ -1216,6 +1218,47 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) return 0; } +static size_t sit_get_size(const struct net_device *dev) +{ + return + /* IFLA_IPTUN_LINK */ + nla_total_size(4) + + /* IFLA_IPTUN_LOCAL */ + nla_total_size(4) + + /* IFLA_IPTUN_REMOTE */ + nla_total_size(4) + + /* IFLA_IPTUN_TTL */ + nla_total_size(1) + + /* IFLA_IPTUN_TOS */ + nla_total_size(1) + + 0; +} + +static int sit_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_parm *parm = &tunnel->parms; + + if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || + nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || + nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || + nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || + nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops sit_link_ops __read_mostly = { + .kind = "sit", + .maxtype = IFLA_IPTUN_MAX, + .priv_size = sizeof(struct ip_tunnel), + .get_size = sit_get_size, + .fill_info = sit_fill_info, +}; + static struct xfrm_tunnel sit_handler __read_mostly = { .handler = ipip6_rcv, .err_handler = ipip6_err, @@ -1302,6 +1345,7 @@ static struct pernet_operations sit_net_ops = { static void __exit sit_cleanup(void) { + rtnl_link_unregister(&sit_link_ops); xfrm4_tunnel_deregister(&sit_handler, AF_INET6); unregister_pernet_device(&sit_net_ops); @@ -1319,10 +1363,21 @@ static int __init sit_init(void) return err; err = xfrm4_tunnel_register(&sit_handler, AF_INET6); if (err < 0) { - unregister_pernet_device(&sit_net_ops); pr_info("%s: can't add protocol\n", __func__); + goto xfrm_tunnel_failed; } + err = rtnl_link_register(&sit_link_ops); + if (err < 0) + goto rtnl_link_failed; + +out: return err; + +rtnl_link_failed: + xfrm4_tunnel_deregister(&sit_handler, AF_INET6); +xfrm_tunnel_failed: + unregister_pernet_device(&sit_net_ops); + goto out; } module_init(sit_init); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 182ab9a85d6c..40161977f7cf 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -214,7 +214,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq6->iif = inet6_iif(skb); req->expires = 0UL; - req->retrans = 0; + req->num_retrans = 0; ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 26175bffbaa0..c73d0ebde9c8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -495,9 +495,12 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, struct request_values *rvp) { struct flowi6 fl6; + int res; - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); + res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); + if (!res) + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + return res; } static void tcp_v6_reqsk_destructor(struct request_sock *req) @@ -1364,7 +1367,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_initialize_rcv_mss(newsk); tcp_synack_rtt_meas(newsk, req); - newtp->total_retrans = req->retrans; + newtp->total_retrans = req->num_retrans; newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; newinet->inet_rcv_saddr = LOOPBACK4_IPV6; @@ -1741,11 +1744,11 @@ static void tcp_v6_early_demux(struct sk_buff *skb) skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { struct dst_entry *dst = sk->sk_rx_dst; - struct inet_sock *icsk = inet_sk(sk); + if (dst) dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); if (dst && - icsk->rx_dst_ifindex == skb->skb_iif) + inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } @@ -1866,7 +1869,7 @@ static void get_openreq6(struct seq_file *seq, 0,0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), - req->retrans, + req->num_timeout, from_kuid_munged(seq_user_ns(seq), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index f8c4c08ffb60..f3ed8ca59b94 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -20,7 +20,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/mip6.h> #endif @@ -182,7 +182,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) fl6->flowi6_proto = nexthdr; return; -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { struct ip6_mh *mh; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 3f2f7c4ab721..d8c70b8efc24 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -101,7 +101,7 @@ static int __xfrm6_state_sort_cmp(void *p) return 1; else return 3; -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case XFRM_MODE_ROUTEOPTIMIZATION: case XFRM_MODE_IN_TRIGGER: return 2; @@ -134,7 +134,7 @@ static int __xfrm6_tmpl_sort_cmp(void *p) switch (v->mode) { case XFRM_MODE_TRANSPORT: return 1; -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case XFRM_MODE_ROUTEOPTIMIZATION: case XFRM_MODE_IN_TRIGGER: return 2; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 6c4cc12c7414..bbba3a19e944 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -632,7 +632,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl nla_put_u16(skb, L2TP_ATTR_MRU, session->mru))) goto nla_put_failure; - if ((session->ifname && session->ifname[0] && + if ((session->ifname[0] && nla_put_string(skb, L2TP_ATTR_IFNAME, session->ifname)) || (session->cookie_len && nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len, diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 8b2cffdfdd99..0c3b1670b0d1 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -28,12 +28,11 @@ if IP_VS config IP_VS_IPV6 bool "IPv6 support for IPVS" depends on IPV6 = y || IP_VS = IPV6 + select IP6_NF_IPTABLES ---help--- - Add IPv6 support to IPVS. This is incomplete and might be dangerous. + Add IPv6 support to IPVS. - See http://www.mindbasket.com/ipvs for more information. - - Say N if unsure. + Say Y if unsure. config IP_VS_DEBUG bool "IP virtual server debugging" diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 1548df9a7524..30e764ad021f 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -308,13 +308,12 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p) static int ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, - unsigned int proto_off, int inverse, - struct ip_vs_conn_param *p) + int inverse, struct ip_vs_conn_param *p) { __be16 _ports[2], *pptr; struct net *net = skb_net(skb); - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); if (pptr == NULL) return 1; @@ -329,12 +328,11 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, int inverse) + const struct ip_vs_iphdr *iph, int inverse) { struct ip_vs_conn_param p; - if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p)) + if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p)) return NULL; return ip_vs_conn_in_get(&p); @@ -432,12 +430,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, int inverse) + const struct ip_vs_iphdr *iph, int inverse) { struct ip_vs_conn_param p; - if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p)) + if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p)) return NULL; return ip_vs_conn_out_get(&p); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 58918e20f9d5..fb45640dc1fb 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -222,11 +222,10 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, */ static struct ip_vs_conn * ip_vs_sched_persist(struct ip_vs_service *svc, - struct sk_buff *skb, - __be16 src_port, __be16 dst_port, int *ignored) + struct sk_buff *skb, __be16 src_port, __be16 dst_port, + int *ignored, struct ip_vs_iphdr *iph) { struct ip_vs_conn *cp = NULL; - struct ip_vs_iphdr iph; struct ip_vs_dest *dest; struct ip_vs_conn *ct; __be16 dport = 0; /* destination port to forward */ @@ -236,20 +235,18 @@ ip_vs_sched_persist(struct ip_vs_service *svc, union nf_inet_addr snet; /* source network of the client, after masking */ - ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); - /* Mask saddr with the netmask to adjust template granularity */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) - ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask); + ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, svc->netmask); else #endif - snet.ip = iph.saddr.ip & svc->netmask; + snet.ip = iph->saddr.ip & svc->netmask; IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " "mnet %s\n", - IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port), - IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port), + IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port), + IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port), IP_VS_DBG_ADDR(svc->af, &snet)); /* @@ -266,8 +263,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * is created for other persistent services. */ { - int protocol = iph.protocol; - const union nf_inet_addr *vaddr = &iph.daddr; + int protocol = iph->protocol; + const union nf_inet_addr *vaddr = &iph->daddr; __be16 vport = 0; if (dst_port == svc->port) { @@ -342,14 +339,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, dport = dest->port; flags = (svc->flags & IP_VS_SVC_F_ONEPACKET - && iph.protocol == IPPROTO_UDP)? + && iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; /* * Create a new connection according to the template */ - ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, - src_port, &iph.daddr, dst_port, ¶m); + ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr, + src_port, &iph->daddr, dst_port, ¶m); cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark); if (cp == NULL) { @@ -392,18 +389,20 @@ ip_vs_sched_persist(struct ip_vs_service *svc, */ struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_proto_data *pd, int *ignored) + struct ip_vs_proto_data *pd, int *ignored, + struct ip_vs_iphdr *iph) { struct ip_vs_protocol *pp = pd->pp; struct ip_vs_conn *cp = NULL; - struct ip_vs_iphdr iph; struct ip_vs_dest *dest; __be16 _ports[2], *pptr; unsigned int flags; *ignored = 1; - ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); - pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); + /* + * IPv6 frags, only the first hit here. + */ + pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); if (pptr == NULL) return NULL; @@ -423,7 +422,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * Do not schedule replies from local real server. */ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && - (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) { + (cp = pp->conn_in_get(svc->af, skb, iph, 1))) { IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, "Not scheduling reply for existing connection"); __ip_vs_conn_put(cp); @@ -434,7 +433,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * Persistent service */ if (svc->flags & IP_VS_SVC_F_PERSISTENT) - return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored); + return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored, + iph); *ignored = 0; @@ -456,7 +456,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, } flags = (svc->flags & IP_VS_SVC_F_ONEPACKET - && iph.protocol == IPPROTO_UDP)? + && iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; /* @@ -465,9 +465,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, - &iph.saddr, pptr[0], &iph.daddr, pptr[1], - &p); + ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, + &iph->saddr, pptr[0], &iph->daddr, + pptr[1], &p); cp = ip_vs_conn_new(&p, &dest->addr, dest->port ? dest->port : pptr[1], flags, dest, skb->mark); @@ -496,19 +496,16 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * no destination is available for a new connection. */ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_proto_data *pd) + struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph) { __be16 _ports[2], *pptr; - struct ip_vs_iphdr iph; #ifdef CONFIG_SYSCTL struct net *net; struct netns_ipvs *ipvs; int unicast; #endif - ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); - - pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); + pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); if (pptr == NULL) { ip_vs_service_put(svc); return NF_DROP; @@ -519,10 +516,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) - unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; + unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST; else #endif - unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST); + unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST); /* if it is fwmark-based service, the cache_bypass sysctl is up and the destination is a non-local unicast, then create @@ -532,7 +529,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, int ret; struct ip_vs_conn *cp; unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && - iph.protocol == IPPROTO_UDP)? + iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; @@ -542,9 +539,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, - &iph.saddr, pptr[0], - &iph.daddr, pptr[1], &p); + ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1], &p); cp = ip_vs_conn_new(&p, &daddr, 0, IP_VS_CONN_F_BYPASS | flags, NULL, skb->mark); @@ -559,7 +556,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); /* transmit the first SYN packet */ - ret = cp->packet_xmit(skb, cp, pd->pp); + ret = cp->packet_xmit(skb, cp, pd->pp, iph); /* do not touch skb anymore */ atomic_inc(&cp->in_pkts); @@ -654,14 +651,6 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) return err; } -#ifdef CONFIG_IP_VS_IPV6 -static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user) -{ - /* TODO IPv6: Find out what to do here for IPv6 */ - return 0; -} -#endif - static int ip_vs_route_me_harder(int af, struct sk_buff *skb) { #ifdef CONFIG_IP_VS_IPV6 @@ -732,10 +721,19 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int inout) { struct ipv6hdr *iph = ipv6_hdr(skb); - unsigned int icmp_offset = sizeof(struct ipv6hdr); - struct icmp6hdr *icmph = (struct icmp6hdr *)(skb_network_header(skb) + - icmp_offset); - struct ipv6hdr *ciph = (struct ipv6hdr *)(icmph + 1); + unsigned int icmp_offset = 0; + unsigned int offs = 0; /* header offset*/ + int protocol; + struct icmp6hdr *icmph; + struct ipv6hdr *ciph; + unsigned short fragoffs; + + ipv6_find_hdr(skb, &icmp_offset, IPPROTO_ICMPV6, &fragoffs, NULL); + icmph = (struct icmp6hdr *)(skb_network_header(skb) + icmp_offset); + offs = icmp_offset + sizeof(struct icmp6hdr); + ciph = (struct ipv6hdr *)(skb_network_header(skb) + offs); + + protocol = ipv6_find_hdr(skb, &offs, -1, &fragoffs, NULL); if (inout) { iph->saddr = cp->vaddr.in6; @@ -746,10 +744,13 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, } /* the TCP/UDP/SCTP port */ - if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr || - IPPROTO_SCTP == ciph->nexthdr) { - __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr); + if (!fragoffs && (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol || + IPPROTO_SCTP == protocol)) { + __be16 *ports = (void *)(skb_network_header(skb) + offs); + IP_VS_DBG(11, "%s() changed port %d to %d\n", __func__, + ntohs(inout ? ports[1] : ports[0]), + ntohs(inout ? cp->vport : cp->dport)); if (inout) ports[1] = cp->vport; else @@ -898,51 +899,35 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, "Checking outgoing ICMP for"); - offset += cih->ihl * 4; - - ip_vs_fill_iphdr(AF_INET, cih, &ciph); + ip_vs_fill_ip4hdr(cih, &ciph); + ciph.len += offset; /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET, skb, &ciph, 1); if (!cp) return NF_ACCEPT; snet.ip = iph->saddr; return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp, - pp, offset, ihl); + pp, ciph.len, ihl); } #ifdef CONFIG_IP_VS_IPV6 static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, - unsigned int hooknum) + unsigned int hooknum, struct ip_vs_iphdr *ipvsh) { - struct ipv6hdr *iph; struct icmp6hdr _icmph, *ic; - struct ipv6hdr _ciph, *cih; /* The ip header contained - within the ICMP */ - struct ip_vs_iphdr ciph; + struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */ + struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; - unsigned int offset; union nf_inet_addr snet; + unsigned int writable; *related = 1; - - /* reassemble IP fragments */ - if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { - if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum))) - return NF_STOLEN; - } - - iph = ipv6_hdr(skb); - offset = sizeof(struct ipv6hdr); - ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); + ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh); if (ic == NULL) return NF_DROP; - IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) %pI6->%pI6\n", - ic->icmp6_type, ntohs(icmpv6_id(ic)), - &iph->saddr, &iph->daddr); - /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy @@ -950,42 +935,45 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ - if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && - (ic->icmp6_type != ICMPV6_PKT_TOOBIG) && - (ic->icmp6_type != ICMPV6_TIME_EXCEED)) { + if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) { *related = 0; return NF_ACCEPT; } + /* Fragment header that is before ICMP header tells us that: + * it's not an error message since they can't be fragmented. + */ + if (ipvsh->flags & IP6T_FH_F_FRAG) + return NF_DROP; + + IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n", + ic->icmp6_type, ntohs(icmpv6_id(ic)), + &ipvsh->saddr, &ipvsh->daddr); /* Now find the contained IP header */ - offset += sizeof(_icmph); - cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); - if (cih == NULL) + ciph.len = ipvsh->len + sizeof(_icmph); + ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h); + if (ip6h == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - - pp = ip_vs_proto_get(cih->nexthdr); + ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */ + ciph.daddr.in6 = ip6h->daddr; + /* skip possible IPv6 exthdrs of contained IPv6 packet */ + ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL); + if (ciph.protocol < 0) + return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */ + + pp = ip_vs_proto_get(ciph.protocol); if (!pp) return NF_ACCEPT; - /* Is the embedded protocol header present? */ - /* TODO: we don't support fragmentation at the moment anyways */ - if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) - return NF_ACCEPT; - - IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, - "Checking outgoing ICMPv6 for"); - - offset += sizeof(struct ipv6hdr); - - ip_vs_fill_iphdr(AF_INET6, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1); if (!cp) return NF_ACCEPT; - snet.in6 = iph->saddr; - return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp, - pp, offset, sizeof(struct ipv6hdr)); + snet.in6 = ciph.saddr.in6; + writable = ciph.len; + return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp, + pp, writable, sizeof(struct ipv6hdr)); } #endif @@ -1018,17 +1006,17 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) */ static unsigned int handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, - struct ip_vs_conn *cp, int ihl) + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct ip_vs_protocol *pp = pd->pp; IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); - if (!skb_make_writable(skb, ihl)) + if (!skb_make_writable(skb, iph->len)) goto drop; /* mangle the packet */ - if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) + if (pp->snat_handler && !pp->snat_handler(skb, pp, cp, iph)) goto drop; #ifdef CONFIG_IP_VS_IPV6 @@ -1115,17 +1103,22 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) if (!net_ipvs(net)->enable) return NF_ACCEPT; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + ip_vs_fill_iph_skb(af, skb, &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { + if (!iph.fragoffs && skb_nfct_reasm(skb)) { + struct sk_buff *reasm = skb_nfct_reasm(skb); + /* Save fw mark for coming frags */ + reasm->ipvs_property = 1; + reasm->mark = skb->mark; + } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_out_icmp_v6(skb, &related, - hooknum); + hooknum, &iph); if (related) return verdict; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } } else #endif @@ -1135,7 +1128,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) if (related) return verdict; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } pd = ip_vs_proto_data_get(net, iph.protocol); @@ -1145,39 +1137,31 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) /* reassemble IP fragments */ #ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) { - if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { - if (ip_vs_gather_frags_v6(skb, - ip_vs_defrag_user(hooknum))) - return NF_STOLEN; - } - - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); - } else + if (af == AF_INET) #endif if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) { if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + ip_vs_fill_ip4hdr(skb_network_header(skb), &iph); } /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); + cp = pp->conn_out_get(af, skb, &iph, 0); if (likely(cp)) - return handle_response(af, skb, pd, cp, iph.len); + return handle_response(af, skb, pd, cp, &iph); if (sysctl_nat_icmp_send(net) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || pp->protocol == IPPROTO_SCTP)) { __be16 _ports[2], *pptr; - pptr = skb_header_pointer(skb, iph.len, - sizeof(_ports), _ports); + pptr = frag_safe_skb_hp(skb, iph.len, + sizeof(_ports), _ports, &iph); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ if (ip_vs_lookup_real_service(net, af, iph.protocol, @@ -1375,13 +1359,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) "Checking incoming ICMP for"); offset2 = offset; - offset += cih->ihl * 4; - - ip_vs_fill_iphdr(AF_INET, cih, &ciph); + ip_vs_fill_ip4hdr(cih, &ciph); + ciph.len += offset; + offset = ciph.len; /* The embedded headers contain source and dest in reverse order. * For IPIP this is error for request, not for reply. */ - cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1); + cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1); if (!cp) return NF_ACCEPT; @@ -1450,7 +1434,7 @@ ignore_ipip: ip_vs_in_stats(cp, skb); if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) offset += 2 * sizeof(__u16); - verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum); + verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph); out: __ip_vs_conn_put(cp); @@ -1459,38 +1443,24 @@ out: } #ifdef CONFIG_IP_VS_IPV6 -static int -ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) +static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, + unsigned int hooknum, struct ip_vs_iphdr *iph) { struct net *net = NULL; - struct ipv6hdr *iph; + struct ipv6hdr _ip6h, *ip6h; struct icmp6hdr _icmph, *ic; - struct ipv6hdr _ciph, *cih; /* The ip header contained - within the ICMP */ - struct ip_vs_iphdr ciph; + struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; - unsigned int offset, verdict; + unsigned int offs_ciph, writable, verdict; *related = 1; - /* reassemble IP fragments */ - if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { - if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum))) - return NF_STOLEN; - } - - iph = ipv6_hdr(skb); - offset = sizeof(struct ipv6hdr); - ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); + ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph, iph); if (ic == NULL) return NF_DROP; - IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) %pI6->%pI6\n", - ic->icmp6_type, ntohs(icmpv6_id(ic)), - &iph->saddr, &iph->daddr); - /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy @@ -1498,47 +1468,71 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ - if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && - (ic->icmp6_type != ICMPV6_PKT_TOOBIG) && - (ic->icmp6_type != ICMPV6_TIME_EXCEED)) { + if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) { *related = 0; return NF_ACCEPT; } + /* Fragment header that is before ICMP header tells us that: + * it's not an error message since they can't be fragmented. + */ + if (iph->flags & IP6T_FH_F_FRAG) + return NF_DROP; + + IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n", + ic->icmp6_type, ntohs(icmpv6_id(ic)), + &iph->saddr, &iph->daddr); /* Now find the contained IP header */ - offset += sizeof(_icmph); - cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); - if (cih == NULL) + ciph.len = iph->len + sizeof(_icmph); + offs_ciph = ciph.len; /* Save ip header offset */ + ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h); + if (ip6h == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ + ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */ + ciph.daddr.in6 = ip6h->daddr; + /* skip possible IPv6 exthdrs of contained IPv6 packet */ + ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL); + if (ciph.protocol < 0) + return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */ net = skb_net(skb); - pd = ip_vs_proto_data_get(net, cih->nexthdr); + pd = ip_vs_proto_data_get(net, ciph.protocol); if (!pd) return NF_ACCEPT; pp = pd->pp; - /* Is the embedded protocol header present? */ - /* TODO: we don't support fragmentation at the moment anyways */ - if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) + /* Cannot handle fragmented embedded protocol */ + if (ciph.fragoffs) return NF_ACCEPT; - IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, + IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph, "Checking incoming ICMPv6 for"); - offset += sizeof(struct ipv6hdr); + /* The embedded headers contain source and dest in reverse order + * if not from localhost + */ + cp = pp->conn_in_get(AF_INET6, skb, &ciph, + (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1); - ip_vs_fill_iphdr(AF_INET6, cih, &ciph); - /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1); if (!cp) return NF_ACCEPT; + /* VS/TUN, VS/DR and LOCALNODE just let it go */ + if ((hooknum == NF_INET_LOCAL_OUT) && + (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) { + __ip_vs_conn_put(cp); + return NF_ACCEPT; + } /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); - if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr || - IPPROTO_SCTP == cih->nexthdr) - offset += 2 * sizeof(__u16); - verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum); + + /* Need to mangle contained IPv6 header in ICMPv6 packet */ + writable = ciph.len; + if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol || + IPPROTO_SCTP == ciph.protocol) + writable += 2 * sizeof(__u16); /* Also mangle ports */ + + verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph); __ip_vs_conn_put(cp); @@ -1574,7 +1568,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (unlikely((skb->pkt_type != PACKET_HOST && hooknum != NF_INET_LOCAL_OUT) || !skb_dst(skb))) { - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + ip_vs_fill_iph_skb(af, skb, &iph); IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s" " ignored in hook %u\n", skb->pkt_type, iph.protocol, @@ -1586,7 +1580,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (!net_ipvs(net)->enable) return NF_ACCEPT; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + ip_vs_fill_iph_skb(af, skb, &iph); /* Bad... Do not break raw sockets */ if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && @@ -1600,13 +1594,19 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { + if (!iph.fragoffs && skb_nfct_reasm(skb)) { + struct sk_buff *reasm = skb_nfct_reasm(skb); + /* Save fw mark for coming frags. */ + reasm->ipvs_property = 1; + reasm->mark = skb->mark; + } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; - int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum); + int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum, + &iph); if (related) return verdict; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } } else #endif @@ -1616,7 +1616,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (related) return verdict; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } /* Protocol supported? */ @@ -1627,12 +1626,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) /* * Check if the packet belongs to an existing connection entry */ - cp = pp->conn_in_get(af, skb, &iph, iph.len, 0); - - if (unlikely(!cp)) { + cp = pp->conn_in_get(af, skb, &iph, 0); + if (unlikely(!cp) && !iph.fragoffs) { + /* No (second) fragments need to enter here, as nf_defrag_ipv6 + * replayed fragment zero will already have created the cp + */ int v; - if (!pp->conn_schedule(af, skb, pd, &v, &cp)) + /* Schedule and create new connection entry into &cp */ + if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph)) return v; } @@ -1640,6 +1642,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, 0, "ip_vs_in: packet continues traversal as normal"); + if (iph.fragoffs && !skb_nfct_reasm(skb)) { + /* Fragment that couldn't be mapped to a conn entry + * and don't have any pointer to a reasm skb + * is missing module nf_defrag_ipv6 + */ + IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); + IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment"); + } return NF_ACCEPT; } @@ -1662,7 +1672,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_in_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); if (cp->packet_xmit) - ret = cp->packet_xmit(skb, cp, pp); + ret = cp->packet_xmit(skb, cp, pp, &iph); /* do not touch skb anymore */ else { IP_VS_DBG_RL("warning: packet_xmit is null"); @@ -1724,6 +1734,38 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 /* + * AF_INET6 fragment handling + * Copy info from first fragment, to the rest of them. + */ +static unsigned int +ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *reasm = skb_nfct_reasm(skb); + struct net *net; + + /* Skip if not a "replay" from nf_ct_frag6_output or first fragment. + * ipvs_property is set when checking first fragment + * in ip_vs_in() and ip_vs_out(). + */ + if (reasm) + IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property); + if (!reasm || !reasm->ipvs_property) + return NF_ACCEPT; + + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + + /* Copy stored fw mark, saved in ip_vs_{in,out} */ + skb->mark = reasm->mark; + + return NF_ACCEPT; +} + +/* * AF_INET6 handler in NF_INET_LOCAL_IN chain * Schedule and forward packets from remote clients */ @@ -1793,8 +1835,10 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, { int r; struct net *net; + struct ip_vs_iphdr iphdr; - if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) + ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr); + if (iphdr.protocol != IPPROTO_ICMPV6) return NF_ACCEPT; /* ipvs enabled in this netns ? */ @@ -1802,7 +1846,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, if (!net_ipvs(net)->enable) return NF_ACCEPT; - return ip_vs_in_icmp_v6(skb, &r, hooknum); + return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr); } #endif @@ -1860,6 +1904,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .priority = 100, }, #ifdef CONFIG_IP_VS_IPV6 + /* After mangle & nat fetch 2:nd fragment and following */ + { + .hook = ip_vs_preroute_frag6, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP6_PRI_NAT_DST + 1, + }, /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply6, diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index 8b7dca9ea422..7f3b0cc00b7a 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -215,7 +215,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_dh_bucket *tbl; struct ip_vs_iphdr iph; - ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); + ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index df646ccf08a7..cbd37489ac77 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -479,7 +479,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_dest *dest = NULL; struct ip_vs_lblc_entry *en; - ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); + ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 570e31ea427a..161b67972e3f 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -649,7 +649,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_dest *dest = NULL; struct ip_vs_lblcr_entry *en; - ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); + ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 1aa5cac748c4..12475ef88daf 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -68,23 +68,31 @@ static int get_callid(const char *dptr, unsigned int dataoff, static int ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) { + struct sk_buff *reasm = skb_nfct_reasm(skb); struct ip_vs_iphdr iph; unsigned int dataoff, datalen, matchoff, matchlen; const char *dptr; int retc; - ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); + ip_vs_fill_iph_skb(p->af, skb, &iph); /* Only useful with UDP */ if (iph.protocol != IPPROTO_UDP) return -EINVAL; + /* todo: IPv6 fragments: + * I think this only should be done for the first fragment. /HS + */ + if (reasm) { + skb = reasm; + dataoff = iph.thoff_reasm + sizeof(struct udphdr); + } else + dataoff = iph.len + sizeof(struct udphdr); - /* No Data ? */ - dataoff = iph.len + sizeof(struct udphdr); if (dataoff >= skb->len) return -EINVAL; - - if ((retc=skb_linearize(skb)) < 0) + /* todo: Check if this will mess-up the reasm skb !!! /HS */ + retc = skb_linearize(skb); + if (retc < 0) return retc; dptr = skb->data + dataoff; datalen = skb->len - dataoff; diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 50d82186da87..939f7fbe9b46 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -280,17 +280,17 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, if (ih == NULL) sprintf(buf, "TRUNCATED"); else if (ih->nexthdr == IPPROTO_FRAGMENT) - sprintf(buf, "%pI6->%pI6 frag", &ih->saddr, &ih->daddr); + sprintf(buf, "%pI6c->%pI6c frag", &ih->saddr, &ih->daddr); else { __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), sizeof(_ports), _ports); if (pptr == NULL) - sprintf(buf, "TRUNCATED %pI6->%pI6", + sprintf(buf, "TRUNCATED %pI6c->%pI6c", &ih->saddr, &ih->daddr); else - sprintf(buf, "%pI6:%u->%pI6:%u", + sprintf(buf, "%pI6c:%u->%pI6c:%u", &ih->saddr, ntohs(pptr[0]), &ih->daddr, ntohs(pptr[1])); } diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 5b8eb8b12c3e..5de3dd312c0f 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -57,7 +57,7 @@ ah_esp_conn_fill_param_proto(struct net *net, int af, static struct ip_vs_conn * ah_esp_conn_in_get(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, unsigned int proto_off, + const struct ip_vs_iphdr *iph, int inverse) { struct ip_vs_conn *cp; @@ -85,9 +85,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, static struct ip_vs_conn * ah_esp_conn_out_get(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, - int inverse) + const struct ip_vs_iphdr *iph, int inverse) { struct ip_vs_conn *cp; struct ip_vs_conn_param p; @@ -110,7 +108,8 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, static int ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, - int *verdict, struct ip_vs_conn **cpp) + int *verdict, struct ip_vs_conn **cpp, + struct ip_vs_iphdr *iph) { /* * AH/ESP is only related traffic. Pass the packet to IP stack. diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 9f3fb751c491..746048b13ef3 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -10,28 +10,26 @@ static int sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, - int *verdict, struct ip_vs_conn **cpp) + int *verdict, struct ip_vs_conn **cpp, + struct ip_vs_iphdr *iph) { struct net *net; struct ip_vs_service *svc; sctp_chunkhdr_t _schunkh, *sch; sctp_sctphdr_t *sh, _sctph; - struct ip_vs_iphdr iph; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); - - sh = skb_header_pointer(skb, iph.len, sizeof(_sctph), &_sctph); + sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); if (sh == NULL) return 0; - sch = skb_header_pointer(skb, iph.len + sizeof(sctp_sctphdr_t), + sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), sizeof(_schunkh), &_schunkh); if (sch == NULL) return 0; net = skb_net(skb); if ((sch->type == SCTP_CID_INIT) && - (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, - &iph.daddr, sh->dest))) { + (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, + &iph->daddr, sh->dest))) { int ignored; if (ip_vs_todrop(net_ipvs(net))) { @@ -47,10 +45,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pd, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pd); + *verdict = ip_vs_leave(svc, skb, pd, iph); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -64,20 +62,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, } static int -sctp_snat_handler(struct sk_buff *skb, - struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { sctp_sctphdr_t *sctph; - unsigned int sctphoff; + unsigned int sctphoff = iph->len; struct sk_buff *iter; __be32 crc32; #ifdef CONFIG_IP_VS_IPV6 - if (cp->af == AF_INET6) - sctphoff = sizeof(struct ipv6hdr); - else + if (cp->af == AF_INET6 && iph->fragoffs) + return 1; #endif - sctphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) @@ -108,20 +104,18 @@ sctp_snat_handler(struct sk_buff *skb, } static int -sctp_dnat_handler(struct sk_buff *skb, - struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { sctp_sctphdr_t *sctph; - unsigned int sctphoff; + unsigned int sctphoff = iph->len; struct sk_buff *iter; __be32 crc32; #ifdef CONFIG_IP_VS_IPV6 - if (cp->af == AF_INET6) - sctphoff = sizeof(struct ipv6hdr); - else + if (cp->af == AF_INET6 && iph->fragoffs) + return 1; #endif - sctphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index cd609cc62721..9af653a75825 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -33,16 +33,14 @@ static int tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, - int *verdict, struct ip_vs_conn **cpp) + int *verdict, struct ip_vs_conn **cpp, + struct ip_vs_iphdr *iph) { struct net *net; struct ip_vs_service *svc; struct tcphdr _tcph, *th; - struct ip_vs_iphdr iph; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); - - th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); if (th == NULL) { *verdict = NF_DROP; return 0; @@ -50,8 +48,8 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, net = skb_net(skb); /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ if (th->syn && - (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, - &iph.daddr, th->dest))) { + (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, + &iph->daddr, th->dest))) { int ignored; if (ip_vs_todrop(net_ipvs(net))) { @@ -68,10 +66,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pd, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pd); + *verdict = ip_vs_leave(svc, skb, pd, iph); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -128,20 +126,18 @@ tcp_partial_csum_update(int af, struct tcphdr *tcph, static int -tcp_snat_handler(struct sk_buff *skb, - struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct tcphdr *tcph; - unsigned int tcphoff; + unsigned int tcphoff = iph->len; int oldlen; int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 - if (cp->af == AF_INET6) - tcphoff = sizeof(struct ipv6hdr); - else + if (cp->af == AF_INET6 && iph->fragoffs) + return 1; #endif - tcphoff = ip_hdrlen(skb); oldlen = skb->len - tcphoff; /* csum_check requires unshared skb */ @@ -208,20 +204,18 @@ tcp_snat_handler(struct sk_buff *skb, static int -tcp_dnat_handler(struct sk_buff *skb, - struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct tcphdr *tcph; - unsigned int tcphoff; + unsigned int tcphoff = iph->len; int oldlen; int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 - if (cp->af == AF_INET6) - tcphoff = sizeof(struct ipv6hdr); - else + if (cp->af == AF_INET6 && iph->fragoffs) + return 1; #endif - tcphoff = ip_hdrlen(skb); oldlen = skb->len - tcphoff; /* csum_check requires unshared skb */ diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 2fedb2dcb3d1..503a842c90d2 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -30,23 +30,22 @@ static int udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, - int *verdict, struct ip_vs_conn **cpp) + int *verdict, struct ip_vs_conn **cpp, + struct ip_vs_iphdr *iph) { struct net *net; struct ip_vs_service *svc; struct udphdr _udph, *uh; - struct ip_vs_iphdr iph; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); - - uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph); + /* IPv6 fragments, only first fragment will hit this */ + uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); if (uh == NULL) { *verdict = NF_DROP; return 0; } net = skb_net(skb); - svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, - &iph.daddr, uh->dest); + svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, + &iph->daddr, uh->dest); if (svc) { int ignored; @@ -64,10 +63,10 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pd, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pd); + *verdict = ip_vs_leave(svc, skb, pd, iph); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -125,20 +124,18 @@ udp_partial_csum_update(int af, struct udphdr *uhdr, static int -udp_snat_handler(struct sk_buff *skb, - struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct udphdr *udph; - unsigned int udphoff; + unsigned int udphoff = iph->len; int oldlen; int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 - if (cp->af == AF_INET6) - udphoff = sizeof(struct ipv6hdr); - else + if (cp->af == AF_INET6 && iph->fragoffs) + return 1; #endif - udphoff = ip_hdrlen(skb); oldlen = skb->len - udphoff; /* csum_check requires unshared skb */ @@ -210,20 +207,18 @@ udp_snat_handler(struct sk_buff *skb, static int -udp_dnat_handler(struct sk_buff *skb, - struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct udphdr *udph; - unsigned int udphoff; + unsigned int udphoff = iph->len; int oldlen; int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 - if (cp->af == AF_INET6) - udphoff = sizeof(struct ipv6hdr); - else + if (cp->af == AF_INET6 && iph->fragoffs) + return 1; #endif - udphoff = ip_hdrlen(skb); oldlen = skb->len - udphoff; /* csum_check requires unshared skb */ diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 08dbdd5bc18f..d6bf20d6cdbe 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -159,7 +159,7 @@ void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) svc->fwmark, msg); #ifdef CONFIG_IP_VS_IPV6 } else if (svc->af == AF_INET6) { - IP_VS_ERR_RL("%s: %s [%pI6]:%d - %s\n", + IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", svc->scheduler->name, ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), msg); diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 05126521743e..e33126994628 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -228,7 +228,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_sh_bucket *tbl; struct ip_vs_iphdr iph; - ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); + ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index cc4c8095681a..12008b47e5ca 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -338,7 +338,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, local = __ip_vs_is_local_route6(rt); if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & rt_mode)) { - IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n", + IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n", local ? "local":"non-local", daddr); dst_release(&rt->dst); return NULL; @@ -346,8 +346,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && !((ort = (struct rt6_info *) skb_dst(skb)) && __ip_vs_is_local_route6(ort))) { - IP_VS_DBG_RL("Redirect from non-local address %pI6 to local " - "requires NAT method, dest: %pI6\n", + IP_VS_DBG_RL("Redirect from non-local address %pI6c to local " + "requires NAT method, dest: %pI6c\n", &ipv6_hdr(skb)->daddr, daddr); dst_release(&rt->dst); return NULL; @@ -355,8 +355,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LOOPBACK)) { - IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 " - "to non-local address, dest: %pI6\n", + IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c " + "to non-local address, dest: %pI6c\n", &ipv6_hdr(skb)->saddr, daddr); dst_release(&rt->dst); return NULL; @@ -427,7 +427,7 @@ do { \ */ int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { /* we do not touch skb and do not need pskb ptr */ IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); @@ -441,7 +441,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, */ int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct rtable *rt; /* Route to the other host */ struct iphdr *iph = ip_hdr(skb); @@ -496,16 +496,16 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #ifdef CONFIG_IP_VS_IPV6 int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) { struct rt6_info *rt; /* Route to the other host */ - struct ipv6hdr *iph = ipv6_hdr(skb); int mtu; EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, - IP_VS_RT_MODE_NON_LOCAL))) + rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0, + IP_VS_RT_MODE_NON_LOCAL); + if (!rt) goto tx_error_icmp; /* MTU checking */ @@ -516,7 +516,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->dev = net->loopback_dev; } - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + /* only send ICMP too big on first fragment */ + if (!iph->fragoffs) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); dst_release(&rt->dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -559,7 +561,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct rtable *rt; /* Route to the other host */ int mtu; @@ -629,7 +631,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; /* mangle the packet */ - if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) + if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) goto tx_error_put; ip_hdr(skb)->daddr = cp->daddr.ip; ip_send_check(ip_hdr(skb)); @@ -677,7 +679,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #ifdef CONFIG_IP_VS_IPV6 int ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) { struct rt6_info *rt; /* Route to the other host */ int mtu; @@ -686,10 +688,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); /* check if it is a connection of no-client-port */ - if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { + if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) { __be16 _pt, *p; - p = skb_header_pointer(skb, sizeof(struct ipv6hdr), - sizeof(_pt), &_pt); + p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt); if (p == NULL) goto tx_error; ip_vs_conn_fill_cport(cp, *p); @@ -737,7 +738,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->dev = net->loopback_dev; } - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + /* only send ICMP too big on first fragment */ + if (!iph->fragoffs) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): frag needed for"); goto tx_error_put; @@ -751,7 +754,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; /* mangle the packet */ - if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) + if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph)) goto tx_error; ipv6_hdr(skb)->daddr = cp->daddr.in6; @@ -812,7 +815,7 @@ tx_error_put: */ int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); struct rtable *rt; /* Route to the other host */ @@ -932,7 +935,7 @@ tx_error_put: #ifdef CONFIG_IP_VS_IPV6 int ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct rt6_info *rt; /* Route to the other host */ struct in6_addr saddr; /* Source for tunnel */ @@ -972,7 +975,9 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->dev = net->loopback_dev; } - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + /* only send ICMP too big on first fragment */ + if (!ipvsh->fragoffs) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } @@ -1053,7 +1058,7 @@ tx_error_put: */ int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct rtable *rt; /* Route to the other host */ struct iphdr *iph = ip_hdr(skb); @@ -1115,7 +1120,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #ifdef CONFIG_IP_VS_IPV6 int ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) { struct rt6_info *rt; /* Route to the other host */ int mtu; @@ -1139,7 +1144,9 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->dev = net->loopback_dev; } - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + /* only send ICMP too big on first fragment */ + if (!iph->fragoffs) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); dst_release(&rt->dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -1183,7 +1190,8 @@ tx_error: */ int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp, int offset, unsigned int hooknum) + struct ip_vs_protocol *pp, int offset, unsigned int hooknum, + struct ip_vs_iphdr *iph) { struct rtable *rt; /* Route to the other host */ int mtu; @@ -1198,7 +1206,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) - rc = cp->packet_xmit(skb, cp, pp); + rc = cp->packet_xmit(skb, cp, pp, iph); else rc = NF_ACCEPT; /* do not touch skb anymore */ @@ -1304,7 +1312,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #ifdef CONFIG_IP_VS_IPV6 int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp, int offset, unsigned int hooknum) + struct ip_vs_protocol *pp, int offset, unsigned int hooknum, + struct ip_vs_iphdr *iph) { struct rt6_info *rt; /* Route to the other host */ int mtu; @@ -1319,7 +1328,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) - rc = cp->packet_xmit(skb, cp, pp); + rc = cp->packet_xmit(skb, cp, pp, iph); else rc = NF_ACCEPT; /* do not touch skb anymore */ @@ -1375,7 +1384,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->dev = net->loopback_dev; } - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + /* only send ICMP too big on first fragment */ + if (!iph->fragoffs) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index bb10b0717f1b..8d47c3780fda 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -67,7 +67,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) goto out; } - ip_vs_fill_iphdr(family, skb_network_header(skb), &iph); + ip_vs_fill_iph_skb(family, skb, &iph); if (data->bitmask & XT_IPVS_PROTO) if ((iph.protocol == data->l4proto) ^ @@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */); + cp = pp->conn_out_get(family, skb, &iph, 1 /* inverse */); if (unlikely(cp == NULL)) { match = false; goto out; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 94060edbbd70..f262dbfc7f06 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1881,7 +1881,35 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb_reserve(skb, hlen); skb_reset_network_header(skb); - data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); + if (po->tp_tx_has_off) { + int off_min, off_max, off; + off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); + off_max = po->tx_ring.frame_size - tp_len; + if (sock->type == SOCK_DGRAM) { + switch (po->tp_version) { + case TPACKET_V2: + off = ph.h2->tp_net; + break; + default: + off = ph.h1->tp_net; + break; + } + } else { + switch (po->tp_version) { + case TPACKET_V2: + off = ph.h2->tp_mac; + break; + default: + off = ph.h1->tp_mac; + break; + } + } + if (unlikely((off < off_min) || (off_max < off))) + return -EINVAL; + data = ph.raw + off; + } else { + data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); + } to_write = tp_len; if (sock->type == SOCK_DGRAM) { @@ -1907,7 +1935,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, to_write -= dev->hard_header_len; } - err = -EFAULT; offset = offset_in_page(data); len_max = PAGE_SIZE - offset; len = ((to_write > len_max) ? len_max : to_write); @@ -1957,7 +1984,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); - err = -EBUSY; if (saddr == NULL) { dev = po->prot_hook.dev; proto = po->num; @@ -3111,6 +3137,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return fanout_add(sk, val & 0xffff, val >> 16); } + case PACKET_TX_HAS_OFF: + { + unsigned int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) + return -EBUSY; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + po->tp_tx_has_off = !!val; + return 0; + } default: return -ENOPROTOOPT; } @@ -3202,6 +3241,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, ((u32)po->fanout->type << 16)) : 0); break; + case PACKET_TX_HAS_OFF: + val = po->tp_tx_has_off; + break; default: return -ENOPROTOOPT; } diff --git a/net/packet/internal.h b/net/packet/internal.h index 44945f6b7252..e84cab8cb7a9 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -109,6 +109,7 @@ struct packet_sock { unsigned int tp_hdrlen; unsigned int tp_reserve; unsigned int tp_loss:1; + unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 2ecde225ae60..709b0fb38a18 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -17,6 +17,7 @@ #include <linux/skbuff.h> #include <linux/cgroup.h> #include <linux/rcupdate.h> +#include <linux/fdtable.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> #include <net/sock.h> @@ -53,6 +54,28 @@ static void cgrp_destroy(struct cgroup *cgrp) kfree(cgrp_cls_state(cgrp)); } +static int update_classid(const void *v, struct file *file, unsigned n) +{ + int err; + struct socket *sock = sock_from_file(file, &err); + if (sock) + sock->sk->sk_classid = (u32)(unsigned long)v; + return 0; +} + +static void cgrp_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) +{ + struct task_struct *p; + void *v; + + cgroup_taskset_for_each(p, cgrp, tset) { + task_lock(p); + v = (void *)(unsigned long)task_cls_classid(p); + iterate_fd(p->files, 0, update_classid, v); + task_unlock(p); + } +} + static u64 read_classid(struct cgroup *cgrp, struct cftype *cft) { return cgrp_cls_state(cgrp)->classid; @@ -77,6 +100,7 @@ struct cgroup_subsys net_cls_subsys = { .name = "net_cls", .create = cgrp_create, .destroy = cgrp_destroy, + .attach = cgrp_attach, .subsys_id = net_cls_subsys_id, .base_cftypes = ss_files, .module = THIS_MODULE, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a18d975db59c..13cc744a2498 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -495,16 +495,15 @@ EXPORT_SYMBOL(qdisc_watchdog_init); void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) { - ktime_t time; - if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc_root_sleeping(wd->qdisc)->state)) return; qdisc_throttled(wd->qdisc); - time = ktime_set(0, 0); - time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); - hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); + + hrtimer_start(&wd->timer, + ns_to_ktime(PSCHED_TICKS2NS(expires)), + HRTIMER_MODE_ABS); } EXPORT_SYMBOL(qdisc_watchdog_schedule); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 564b9fc8efd3..0e19948470b8 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -509,8 +509,7 @@ static void cbq_ovl_delay(struct cbq_class *cl) cl->cpriority = TC_CBQ_MAXPRIO; q->pmask |= (1<<TC_CBQ_MAXPRIO); - expires = ktime_set(0, 0); - expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched)); + expires = ns_to_ktime(PSCHED_TICKS2NS(sched)); if (hrtimer_try_to_cancel(&q->delay_timer) && ktime_to_ns(ktime_sub( hrtimer_get_expires(&q->delay_timer), diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 9d75b7761313..d2922c0ef57a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -71,6 +71,12 @@ enum htb_cmode { HTB_CAN_SEND /* class can send */ }; +struct htb_rate_cfg { + u64 rate_bps; + u32 mult; + u32 shift; +}; + /* interior & leaf nodes; props specific to leaves are marked L: */ struct htb_class { struct Qdisc_class_common common; @@ -118,11 +124,11 @@ struct htb_class { int filter_cnt; /* token bucket parameters */ - struct qdisc_rate_table *rate; /* rate table of the class itself */ - struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ - long buffer, cbuffer; /* token bucket depth/rate */ + struct htb_rate_cfg rate; + struct htb_rate_cfg ceil; + s64 buffer, cbuffer; /* token bucket depth/rate */ psched_tdiff_t mbuffer; /* max wait time */ - long tokens, ctokens; /* current number of tokens */ + s64 tokens, ctokens; /* current number of tokens */ psched_time_t t_c; /* checkpoint time */ }; @@ -162,6 +168,45 @@ struct htb_sched { struct work_struct work; }; +static u64 l2t_ns(struct htb_rate_cfg *r, unsigned int len) +{ + return ((u64)len * r->mult) >> r->shift; +} + +static void htb_precompute_ratedata(struct htb_rate_cfg *r) +{ + u64 factor; + u64 mult; + int shift; + + r->shift = 0; + r->mult = 1; + /* + * Calibrate mult, shift so that token counting is accurate + * for smallest packet size (64 bytes). Token (time in ns) is + * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will + * work as long as the smallest packet transfer time can be + * accurately represented in nanosec. + */ + if (r->rate_bps > 0) { + /* + * Higher shift gives better accuracy. Find the largest + * shift such that mult fits in 32 bits. + */ + for (shift = 0; shift < 16; shift++) { + r->shift = shift; + factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); + mult = div64_u64(factor, r->rate_bps); + if (mult > UINT_MAX) + break; + } + + r->shift = shift - 1; + factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); + r->mult = div64_u64(factor, r->rate_bps); + } +} + /* find class in global hash table using given handle */ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) { @@ -273,7 +318,7 @@ static void htb_add_to_id_tree(struct rb_root *root, * already in the queue. */ static void htb_add_to_wait_tree(struct htb_sched *q, - struct htb_class *cl, long delay) + struct htb_class *cl, s64 delay) { struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; @@ -441,14 +486,14 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) htb_remove_class_from_row(q, cl, mask); } -static inline long htb_lowater(const struct htb_class *cl) +static inline s64 htb_lowater(const struct htb_class *cl) { if (htb_hysteresis) return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; else return 0; } -static inline long htb_hiwater(const struct htb_class *cl) +static inline s64 htb_hiwater(const struct htb_class *cl) { if (htb_hysteresis) return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; @@ -469,9 +514,9 @@ static inline long htb_hiwater(const struct htb_class *cl) * mode transitions per time unit. The speed gain is about 1/6. */ static inline enum htb_cmode -htb_class_mode(struct htb_class *cl, long *diff) +htb_class_mode(struct htb_class *cl, s64 *diff) { - long toks; + s64 toks; if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) { *diff = -toks; @@ -495,7 +540,7 @@ htb_class_mode(struct htb_class *cl, long *diff) * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree). */ static void -htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) +htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) { enum htb_cmode new_mode = htb_class_mode(cl, diff); @@ -581,26 +626,26 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS; } -static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff) +static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff) { - long toks = diff + cl->tokens; + s64 toks = diff + cl->tokens; if (toks > cl->buffer) toks = cl->buffer; - toks -= (long) qdisc_l2t(cl->rate, bytes); + toks -= (s64) l2t_ns(&cl->rate, bytes); if (toks <= -cl->mbuffer) toks = 1 - cl->mbuffer; cl->tokens = toks; } -static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff) +static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff) { - long toks = diff + cl->ctokens; + s64 toks = diff + cl->ctokens; if (toks > cl->cbuffer) toks = cl->cbuffer; - toks -= (long) qdisc_l2t(cl->ceil, bytes); + toks -= (s64) l2t_ns(&cl->ceil, bytes); if (toks <= -cl->mbuffer) toks = 1 - cl->mbuffer; @@ -623,10 +668,10 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, { int bytes = qdisc_pkt_len(skb); enum htb_cmode old_mode; - long diff; + s64 diff; while (cl) { - diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer); + diff = min_t(s64, q->now - cl->t_c, cl->mbuffer); if (cl->level >= level) { if (cl->level == level) cl->xstats.lends++; @@ -673,7 +718,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level, unsigned long stop_at = start + 2; while (time_before(jiffies, stop_at)) { struct htb_class *cl; - long diff; + s64 diff; struct rb_node *p = rb_first(&q->wait_pq[level]); if (!p) @@ -684,7 +729,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level, return cl->pq_key; htb_safe_rb_erase(p, q->wait_pq + level); - diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer); + diff = min_t(s64, q->now - cl->t_c, cl->mbuffer); htb_change_class_mode(q, cl, &diff); if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); @@ -871,10 +916,10 @@ ok: if (!sch->q.qlen) goto fin; - q->now = psched_get_time(); + q->now = ktime_to_ns(ktime_get()); start_at = jiffies; - next_event = q->now + 5 * PSCHED_TICKS_PER_SEC; + next_event = q->now + 5 * NSEC_PER_SEC; for (level = 0; level < TC_HTB_MAXDEPTH; level++) { /* common case optimization - skip event handler quickly */ @@ -884,7 +929,7 @@ ok: if (q->now >= q->near_ev_cache[level]) { event = htb_do_events(q, level, start_at); if (!event) - event = q->now + PSCHED_TICKS_PER_SEC; + event = q->now + NSEC_PER_SEC; q->near_ev_cache[level] = event; } else event = q->near_ev_cache[level]; @@ -903,10 +948,17 @@ ok: } } sch->qstats.overlimits++; - if (likely(next_event > q->now)) - qdisc_watchdog_schedule(&q->watchdog, next_event); - else + if (likely(next_event > q->now)) { + if (!test_bit(__QDISC_STATE_DEACTIVATED, + &qdisc_root_sleeping(q->watchdog.qdisc)->state)) { + ktime_t time = ns_to_ktime(next_event); + qdisc_throttled(q->watchdog.qdisc); + hrtimer_start(&q->watchdog.timer, time, + HRTIMER_MODE_ABS); + } + } else { schedule_work(&q->work); + } fin: return skb; } @@ -1082,9 +1134,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, memset(&opt, 0, sizeof(opt)); - opt.rate = cl->rate->rate; + opt.rate.rate = cl->rate.rate_bps >> 3; opt.buffer = cl->buffer; - opt.ceil = cl->ceil->rate; + opt.ceil.rate = cl->ceil.rate_bps >> 3; opt.cbuffer = cl->cbuffer; opt.quantum = cl->quantum; opt.prio = cl->prio; @@ -1203,9 +1255,6 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) qdisc_destroy(cl->un.leaf.q); } gen_kill_estimator(&cl->bstats, &cl->rate_est); - qdisc_put_rtab(cl->rate); - qdisc_put_rtab(cl->ceil); - tcf_destroy_chain(&cl->filter_list); kfree(cl); } @@ -1307,7 +1356,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; struct nlattr *opt = tca[TCA_OPTIONS]; - struct qdisc_rate_table *rtab = NULL, *ctab = NULL; struct nlattr *tb[__TCA_HTB_MAX]; struct tc_htb_opt *hopt; @@ -1326,10 +1374,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch); hopt = nla_data(tb[TCA_HTB_PARMS]); - - rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]); - ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]); - if (!rtab || !ctab) + if (!hopt->rate.rate || !hopt->ceil.rate) goto failure; if (!cl) { /* new class */ @@ -1439,7 +1484,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, * is really leaf before changing cl->un.leaf ! */ if (!cl->level) { - cl->quantum = rtab->rate.rate / q->rate2quantum; + cl->quantum = hopt->rate.rate / q->rate2quantum; if (!hopt->quantum && cl->quantum < 1000) { pr_warning( "HTB: quantum of class %X is small. Consider r2q change.\n", @@ -1460,12 +1505,16 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->buffer = hopt->buffer; cl->cbuffer = hopt->cbuffer; - if (cl->rate) - qdisc_put_rtab(cl->rate); - cl->rate = rtab; - if (cl->ceil) - qdisc_put_rtab(cl->ceil); - cl->ceil = ctab; + + cl->rate.rate_bps = (u64)hopt->rate.rate << 3; + cl->ceil.rate_bps = (u64)hopt->ceil.rate << 3; + + htb_precompute_ratedata(&cl->rate); + htb_precompute_ratedata(&cl->ceil); + + cl->buffer = hopt->buffer << PSCHED_SHIFT; + cl->cbuffer = hopt->buffer << PSCHED_SHIFT; + sch_tree_unlock(sch); qdisc_class_hash_grow(sch, &q->clhash); @@ -1474,10 +1523,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, return 0; failure: - if (rtab) - qdisc_put_rtab(rtab); - if (ctab) - qdisc_put_rtab(ctab); return err; } diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 126b014eb79b..a9edd2e205f4 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -9,7 +9,6 @@ menuconfig IP_SCTP select CRYPTO select CRYPTO_HMAC select CRYPTO_SHA1 - select CRYPTO_MD5 if SCTP_HMAC_MD5 select LIBCRC32C ---help--- Stream Control Transmission Protocol @@ -68,33 +67,21 @@ config SCTP_DBG_OBJCNT If unsure, say N -choice - prompt "SCTP: Cookie HMAC Algorithm" - default SCTP_HMAC_MD5 +config SCTP_COOKIE_HMAC_MD5 + bool "Enable optional MD5 hmac cookie generation" help - HMAC algorithm to be used during association initialization. It - is strongly recommended to use HMAC-SHA1 or HMAC-MD5. See - configuration for Cryptographic API and enable those algorithms - to make usable by SCTP. - -config SCTP_HMAC_NONE - bool "None" - help - Choosing this disables the use of an HMAC during association - establishment. It is advised to use either HMAC-MD5 or HMAC-SHA1. - -config SCTP_HMAC_SHA1 - bool "HMAC-SHA1" - help - Enable the use of HMAC-SHA1 during association establishment. It - is advised to use either HMAC-MD5 or HMAC-SHA1. - -config SCTP_HMAC_MD5 - bool "HMAC-MD5" + Enable optional MD5 hmac based SCTP cookie generation + default y + select CRYPTO_HMAC if SCTP_COOKIE_HMAC_MD5 + select CRYPTO_MD5 if SCTP_COOKIE_HMAC_MD5 + +config SCTP_COOKIE_HMAC_SHA1 + bool "Enable optional SHA1 hmac cookie generation" help - Enable the use of HMAC-MD5 during association establishment. It is - advised to use either HMAC-MD5 or HMAC-SHA1. + Enable optional SHA1 hmac based SCTP cookie generation + default y + select CRYPTO_HMAC if SCTP_COOKIE_HMAC_SHA1 + select CRYPTO_SHA1 if SCTP_COOKIE_HMAC_SHA1 -endchoice endif # IP_SCTP diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 2d518425d598..456bc3dbdd51 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1190,6 +1190,15 @@ static int sctp_net_init(struct net *net) /* Whether Cookie Preservative is enabled(1) or not(0) */ net->sctp.cookie_preserve_enable = 1; + /* Default sctp sockets to use md5 as their hmac alg */ +#if defined (CONFIG_CRYPTO_MD5) + net->sctp.sctp_hmac_alg = "md5"; +#elif defined (CONFIG_CRYPTO_SHA1) + net->sctp.sctp_hmac_alg = "sha1"; +#else + net->sctp.sctp_hmac_alg = NULL; +#endif + /* Max.Burst - 4 */ net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 6773d7803627..6eecf7e6338d 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1268,14 +1268,14 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_outq_uncork(&asoc->outqueue); local_cork = 0; } - asoc = cmd->obj.ptr; + asoc = cmd->obj.asoc; /* Register with the endpoint. */ sctp_endpoint_add_asoc(ep, asoc); sctp_hash_established(asoc); break; case SCTP_CMD_UPDATE_ASSOC: - sctp_assoc_update(asoc, cmd->obj.ptr); + sctp_assoc_update(asoc, cmd->obj.asoc); break; case SCTP_CMD_PURGE_OUTQUEUE: @@ -1315,7 +1315,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_PROCESS_FWDTSN: - sctp_cmd_process_fwdtsn(&asoc->ulpq, cmd->obj.ptr); + sctp_cmd_process_fwdtsn(&asoc->ulpq, cmd->obj.chunk); break; case SCTP_CMD_GEN_SACK: @@ -1331,7 +1331,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_PROCESS_SACK: /* Process an inbound SACK. */ error = sctp_cmd_process_sack(commands, asoc, - cmd->obj.ptr); + cmd->obj.chunk); break; case SCTP_CMD_GEN_INIT_ACK: @@ -1352,15 +1352,15 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, * layer which will bail. */ error = sctp_cmd_process_init(commands, asoc, chunk, - cmd->obj.ptr, gfp); + cmd->obj.init, gfp); break; case SCTP_CMD_GEN_COOKIE_ECHO: /* Generate a COOKIE ECHO chunk. */ new_obj = sctp_make_cookie_echo(asoc, chunk); if (!new_obj) { - if (cmd->obj.ptr) - sctp_chunk_free(cmd->obj.ptr); + if (cmd->obj.chunk) + sctp_chunk_free(cmd->obj.chunk); goto nomem; } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, @@ -1369,9 +1369,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, /* If there is an ERROR chunk to be sent along with * the COOKIE_ECHO, send it, too. */ - if (cmd->obj.ptr) + if (cmd->obj.chunk) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, - SCTP_CHUNK(cmd->obj.ptr)); + SCTP_CHUNK(cmd->obj.chunk)); if (new_obj->transport) { new_obj->transport->init_sent_count++; @@ -1417,18 +1417,18 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_CHUNK_ULP: /* Send a chunk to the sockets layer. */ SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n", - "chunk_up:", cmd->obj.ptr, + "chunk_up:", cmd->obj.chunk, "ulpq:", &asoc->ulpq); - sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.ptr, + sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.chunk, GFP_ATOMIC); break; case SCTP_CMD_EVENT_ULP: /* Send a notification to the sockets layer. */ SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n", - "event_up:",cmd->obj.ptr, + "event_up:",cmd->obj.ulpevent, "ulpq:",&asoc->ulpq); - sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ptr); + sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ulpevent); break; case SCTP_CMD_REPLY: @@ -1438,12 +1438,12 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, local_cork = 1; } /* Send a chunk to our peer. */ - error = sctp_outq_tail(&asoc->outqueue, cmd->obj.ptr); + error = sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk); break; case SCTP_CMD_SEND_PKT: /* Send a full packet to our peer. */ - packet = cmd->obj.ptr; + packet = cmd->obj.packet; sctp_packet_transmit(packet); sctp_ootb_pkt_free(packet); break; @@ -1480,7 +1480,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_SETUP_T2: - sctp_cmd_setup_t2(commands, asoc, cmd->obj.ptr); + sctp_cmd_setup_t2(commands, asoc, cmd->obj.chunk); break; case SCTP_CMD_TIMER_START_ONCE: @@ -1514,7 +1514,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_INIT_CHOOSE_TRANSPORT: - chunk = cmd->obj.ptr; + chunk = cmd->obj.chunk; t = sctp_assoc_choose_alter_transport(asoc, asoc->init_last_sent_to); asoc->init_last_sent_to = t; @@ -1665,17 +1665,16 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_PART_DELIVER: - sctp_ulpq_partial_delivery(&asoc->ulpq, cmd->obj.ptr, - GFP_ATOMIC); + sctp_ulpq_partial_delivery(&asoc->ulpq, GFP_ATOMIC); break; case SCTP_CMD_RENEGE: - sctp_ulpq_renege(&asoc->ulpq, cmd->obj.ptr, + sctp_ulpq_renege(&asoc->ulpq, cmd->obj.chunk, GFP_ATOMIC); break; case SCTP_CMD_SETUP_T4: - sctp_cmd_setup_t4(commands, asoc, cmd->obj.ptr); + sctp_cmd_setup_t4(commands, asoc, cmd->obj.chunk); break; case SCTP_CMD_PROCESS_OPERR: @@ -1734,8 +1733,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; default: - pr_warn("Impossible command: %u, %p\n", - cmd->verb, cmd->obj.ptr); + pr_warn("Impossible command: %u\n", + cmd->verb); break; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a60d1f8b41c5..15379acd9c08 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -110,7 +110,6 @@ static int sctp_do_bind(struct sock *, union sctp_addr *, int); static int sctp_autobind(struct sock *sk); static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); -static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; extern struct kmem_cache *sctp_bucket_cachep; extern long sysctl_sctp_mem[3]; @@ -3890,6 +3889,8 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->default_rcv_context = 0; sp->max_burst = net->sctp.max_burst; + sp->sctp_hmac_alg = net->sctp.sctp_hmac_alg; + /* Initialize default setup parameters. These parameters * can be modified with the SCTP_INITMSG socket option or * overridden by the SCTP_INIT CMSG. @@ -5981,13 +5982,15 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog) struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; struct crypto_hash *tfm = NULL; + char alg[32]; /* Allocate HMAC for generating cookie. */ - if (!sctp_sk(sk)->hmac && sctp_hmac_alg) { - tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); + if (!sp->hmac && sp->sctp_hmac_alg) { + sprintf(alg, "hmac(%s)", sp->sctp_hmac_alg); + tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { net_info_ratelimited("failed to load transform for %s: %ld\n", - sctp_hmac_alg, PTR_ERR(tfm)); + sp->sctp_hmac_alg, PTR_ERR(tfm)); return -ENOSYS; } sctp_sk(sk)->hmac = tfm; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 70e3ba5cb50b..043889ac86c0 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -62,6 +62,11 @@ extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; extern int sysctl_sctp_wmem[3]; +static int proc_sctp_do_hmac_alg(ctl_table *ctl, + int write, + void __user *buffer, size_t *lenp, + + loff_t *ppos); static ctl_table sctp_table[] = { { .procname = "sctp_mem", @@ -147,6 +152,12 @@ static ctl_table sctp_net_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "cookie_hmac_alg", + .maxlen = 8, + .mode = 0644, + .proc_handler = proc_sctp_do_hmac_alg, + }, + { .procname = "valid_cookie_life", .data = &init_net.sctp.valid_cookie_life, .maxlen = sizeof(unsigned int), @@ -289,6 +300,54 @@ static ctl_table sctp_net_table[] = { { /* sentinel */ } }; +static int proc_sctp_do_hmac_alg(ctl_table *ctl, + int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + char tmp[8]; + ctl_table tbl; + int ret; + int changed = 0; + char *none = "none"; + + memset(&tbl, 0, sizeof(struct ctl_table)); + + if (write) { + tbl.data = tmp; + tbl.maxlen = 8; + } else { + tbl.data = net->sctp.sctp_hmac_alg ? : none; + tbl.maxlen = strlen(tbl.data); + } + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + + if (write) { +#ifdef CONFIG_CRYPTO_MD5 + if (!strncmp(tmp, "md5", 3)) { + net->sctp.sctp_hmac_alg = "md5"; + changed = 1; + } +#endif +#ifdef CONFIG_CRYPTO_SHA1 + if (!strncmp(tmp, "sha1", 4)) { + net->sctp.sctp_hmac_alg = "sha1"; + changed = 1; + } +#endif + if (!strncmp(tmp, "none", 4)) { + net->sctp.sctp_hmac_alg = NULL; + changed = 1; + } + + if (!changed) + ret = -EINVAL; + } + + return ret; +} + int sctp_sysctl_net_register(struct net *net) { struct ctl_table *table; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 360d8697b95c..ada17464b65b 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -997,7 +997,6 @@ static __u16 sctp_ulpq_renege_frags(struct sctp_ulpq *ulpq, __u16 needed) /* Partial deliver the first message as there is pressure on rwnd. */ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, - struct sctp_chunk *chunk, gfp_t gfp) { struct sctp_ulpevent *event; @@ -1060,7 +1059,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn, chunk->transport); sctp_ulpq_tail_data(ulpq, chunk, gfp); - sctp_ulpq_partial_delivery(ulpq, chunk, gfp); + sctp_ulpq_partial_delivery(ulpq, gfp); } sk_mem_reclaim(asoc->base.sk); diff --git a/net/socket.c b/net/socket.c index d92c490e66fa..2ca51c719ef9 100644 --- a/net/socket.c +++ b/net/socket.c @@ -620,8 +620,6 @@ static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, { struct sock_iocb *si = kiocb_to_siocb(iocb); - sock_update_classid(sock->sk); - si->sock = sock; si->scm = NULL; si->msg = msg; @@ -784,8 +782,6 @@ static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, { struct sock_iocb *si = kiocb_to_siocb(iocb); - sock_update_classid(sock->sk); - si->sock = sock; si->scm = NULL; si->msg = msg; @@ -896,8 +892,6 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, if (unlikely(!sock->ops->splice_read)) return -EINVAL; - sock_update_classid(sock->sk); - return sock->ops->splice_read(sock, ppos, pipe, len, flags); } @@ -3437,8 +3431,6 @@ EXPORT_SYMBOL(kernel_setsockopt); int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { - sock_update_classid(sock->sk); - if (sock->ops->sendpage) return sock->ops->sendpage(sock, page, offset, size, flags); diff --git a/net/unix/diag.c b/net/unix/diag.c index 06748f108a57..5ac19dc1d5e4 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -151,6 +151,9 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO)) goto out_nlmsg_trim; + if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown)) + goto out_nlmsg_trim; + return nlmsg_end(skb, nlh); out_nlmsg_trim: |