diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/atm/mpc.h | 4 | ||||
-rw-r--r-- | net/atm/mpoa_caches.c | 4 | ||||
-rw-r--r-- | net/core/dev.c | 120 | ||||
-rw-r--r-- | net/core/net-traces.c | 4 | ||||
-rw-r--r-- | net/dsa/dsa.c | 18 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 19 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 597 | ||||
-rw-r--r-- | net/ipv6/route.c | 10 | ||||
-rw-r--r-- | net/tipc/bcast.c | 126 | ||||
-rw-r--r-- | net/tipc/bcast.h | 1 | ||||
-rw-r--r-- | net/tipc/bearer.c | 140 | ||||
-rw-r--r-- | net/tipc/bearer.h | 8 | ||||
-rw-r--r-- | net/tipc/core.h | 5 | ||||
-rw-r--r-- | net/tipc/discover.c | 38 | ||||
-rw-r--r-- | net/tipc/link.c | 626 | ||||
-rw-r--r-- | net/tipc/link.h | 175 | ||||
-rw-r--r-- | net/tipc/name_distr.c | 68 | ||||
-rw-r--r-- | net/tipc/name_distr.h | 1 | ||||
-rw-r--r-- | net/tipc/name_table.c | 5 | ||||
-rw-r--r-- | net/tipc/netlink.c | 8 | ||||
-rw-r--r-- | net/tipc/netlink_compat.c | 8 | ||||
-rw-r--r-- | net/tipc/node.c | 884 | ||||
-rw-r--r-- | net/tipc/node.h | 127 | ||||
-rw-r--r-- | net/tipc/udp_media.c | 5 | ||||
-rw-r--r-- | net/vmw_vsock/vmci_transport.h | 2 | ||||
-rw-r--r-- | net/vmw_vsock/vmci_transport_notify.c | 2 | ||||
-rw-r--r-- | net/vmw_vsock/vmci_transport_notify.h | 5 | ||||
-rw-r--r-- | net/vmw_vsock/vmci_transport_notify_qstate.c | 2 |
28 files changed, 1565 insertions, 1447 deletions
diff --git a/net/atm/mpc.h b/net/atm/mpc.h index 0919a88bbc70..cfc7b745aa91 100644 --- a/net/atm/mpc.h +++ b/net/atm/mpc.h @@ -21,11 +21,11 @@ struct mpoa_client { uint8_t our_ctrl_addr[ATM_ESA_LEN]; /* MPC's control ATM address */ rwlock_t ingress_lock; - struct in_cache_ops *in_ops; /* ingress cache operations */ + const struct in_cache_ops *in_ops; /* ingress cache operations */ in_cache_entry *in_cache; /* the ingress cache of this MPC */ rwlock_t egress_lock; - struct eg_cache_ops *eg_ops; /* egress cache operations */ + const struct eg_cache_ops *eg_ops; /* egress cache operations */ eg_cache_entry *eg_cache; /* the egress cache of this MPC */ uint8_t *mps_macs; /* array of MPS MAC addresses, >=1 */ diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index d1b2d9a03144..9e60e74c807d 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -534,7 +534,7 @@ static void eg_destroy_cache(struct mpoa_client *mpc) } -static struct in_cache_ops ingress_ops = { +static const struct in_cache_ops ingress_ops = { in_cache_add_entry, /* add_entry */ in_cache_get, /* get */ in_cache_get_with_mask, /* get_with_mask */ @@ -548,7 +548,7 @@ static struct in_cache_ops ingress_ops = { in_destroy_cache /* destroy_cache */ }; -static struct eg_cache_ops egress_ops = { +static const struct eg_cache_ops egress_ops = { eg_cache_add_entry, /* add_entry */ eg_cache_get_by_cache_id, /* get_by_cache_id */ eg_cache_get_by_tag, /* get_by_tag */ diff --git a/net/core/dev.c b/net/core/dev.c index ae00b894e675..5df6cbce727c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -96,6 +96,7 @@ #include <linux/skbuff.h> #include <net/net_namespace.h> #include <net/sock.h> +#include <net/busy_poll.h> #include <linux/rtnetlink.h> #include <linux/stat.h> #include <net/dst.h> @@ -182,8 +183,8 @@ EXPORT_SYMBOL(dev_base_lock); /* protects napi_hash addition/deletion and napi_gen_id */ static DEFINE_SPINLOCK(napi_hash_lock); -static unsigned int napi_gen_id; -static DEFINE_HASHTABLE(napi_hash, 8); +static unsigned int napi_gen_id = NR_CPUS; +static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8); static seqcount_t devnet_rename_seq; @@ -3021,7 +3022,9 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, int queue_index = 0; #ifdef CONFIG_XPS - if (skb->sender_cpu == 0) + u32 sender_cpu = skb->sender_cpu - 1; + + if (sender_cpu >= (u32)NR_CPUS) skb->sender_cpu = raw_smp_processor_id() + 1; #endif @@ -4353,6 +4356,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { + skb_mark_napi_id(skb, napi); trace_napi_gro_receive_entry(skb); skb_gro_reset_offset(skb); @@ -4386,7 +4390,10 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) if (!skb) { skb = napi_alloc_skb(napi, GRO_MAX_HEAD); - napi->skb = skb; + if (skb) { + napi->skb = skb; + skb_mark_napi_id(skb, napi); + } } return skb; } @@ -4661,7 +4668,7 @@ void napi_complete_done(struct napi_struct *n, int work_done) EXPORT_SYMBOL(napi_complete_done); /* must be called under rcu_read_lock(), as we dont take a reference */ -struct napi_struct *napi_by_id(unsigned int napi_id) +static struct napi_struct *napi_by_id(unsigned int napi_id) { unsigned int hash = napi_id % HASH_SIZE(napi_hash); struct napi_struct *napi; @@ -4672,43 +4679,101 @@ struct napi_struct *napi_by_id(unsigned int napi_id) return NULL; } -EXPORT_SYMBOL_GPL(napi_by_id); -void napi_hash_add(struct napi_struct *napi) +#if defined(CONFIG_NET_RX_BUSY_POLL) +#define BUSY_POLL_BUDGET 8 +bool sk_busy_loop(struct sock *sk, int nonblock) { - if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) { + unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; + int (*busy_poll)(struct napi_struct *dev); + struct napi_struct *napi; + int rc = false; - spin_lock(&napi_hash_lock); + rcu_read_lock(); - /* 0 is not a valid id, we also skip an id that is taken - * we expect both events to be extremely rare - */ - napi->napi_id = 0; - while (!napi->napi_id) { - napi->napi_id = ++napi_gen_id; - if (napi_by_id(napi->napi_id)) - napi->napi_id = 0; + napi = napi_by_id(sk->sk_napi_id); + if (!napi) + goto out; + + /* Note: ndo_busy_poll method is optional in linux-4.5 */ + busy_poll = napi->dev->netdev_ops->ndo_busy_poll; + + do { + rc = 0; + local_bh_disable(); + if (busy_poll) { + rc = busy_poll(napi); + } else if (napi_schedule_prep(napi)) { + void *have = netpoll_poll_lock(napi); + + if (test_bit(NAPI_STATE_SCHED, &napi->state)) { + rc = napi->poll(napi, BUSY_POLL_BUDGET); + trace_napi_poll(napi); + if (rc == BUSY_POLL_BUDGET) { + napi_complete_done(napi, rc); + napi_schedule(napi); + } + } + netpoll_poll_unlock(have); } + if (rc > 0) + NET_ADD_STATS_BH(sock_net(sk), + LINUX_MIB_BUSYPOLLRXPACKETS, rc); + local_bh_enable(); - hlist_add_head_rcu(&napi->napi_hash_node, - &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); + if (rc == LL_FLUSH_FAILED) + break; /* permanent failure */ - spin_unlock(&napi_hash_lock); - } + cpu_relax(); + } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && + !need_resched() && !busy_loop_timeout(end_time)); + + rc = !skb_queue_empty(&sk->sk_receive_queue); +out: + rcu_read_unlock(); + return rc; +} +EXPORT_SYMBOL(sk_busy_loop); + +#endif /* CONFIG_NET_RX_BUSY_POLL */ + +void napi_hash_add(struct napi_struct *napi) +{ + if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || + test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) + return; + + spin_lock(&napi_hash_lock); + + /* 0..NR_CPUS+1 range is reserved for sender_cpu use */ + do { + if (unlikely(++napi_gen_id < NR_CPUS + 1)) + napi_gen_id = NR_CPUS + 1; + } while (napi_by_id(napi_gen_id)); + napi->napi_id = napi_gen_id; + + hlist_add_head_rcu(&napi->napi_hash_node, + &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); + + spin_unlock(&napi_hash_lock); } EXPORT_SYMBOL_GPL(napi_hash_add); /* Warning : caller is responsible to make sure rcu grace period * is respected before freeing memory containing @napi */ -void napi_hash_del(struct napi_struct *napi) +bool napi_hash_del(struct napi_struct *napi) { + bool rcu_sync_needed = false; + spin_lock(&napi_hash_lock); - if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) + if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) { + rcu_sync_needed = true; hlist_del_rcu(&napi->napi_hash_node); - + } spin_unlock(&napi_hash_lock); + return rcu_sync_needed; } EXPORT_SYMBOL_GPL(napi_hash_del); @@ -4744,6 +4809,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, napi->poll_owner = -1; #endif set_bit(NAPI_STATE_SCHED, &napi->state); + napi_hash_add(napi); } EXPORT_SYMBOL(netif_napi_add); @@ -4763,8 +4829,12 @@ void napi_disable(struct napi_struct *n) } EXPORT_SYMBOL(napi_disable); +/* Must be called in process context */ void netif_napi_del(struct napi_struct *napi) { + might_sleep(); + if (napi_hash_del(napi)) + synchronize_net(); list_del_init(&napi->dev_list); napi_free_frags(napi); @@ -7164,11 +7234,13 @@ EXPORT_SYMBOL(alloc_netdev_mqs); * This function does the last stage of destroying an allocated device * interface. The reference to the device object is released. * If this is the last reference then it will be freed. + * Must be called in process context. */ void free_netdev(struct net_device *dev) { struct napi_struct *p, *n; + might_sleep(); netif_free_tx_queues(dev); #ifdef CONFIG_SYSFS kvfree(dev->_rx); diff --git a/net/core/net-traces.c b/net/core/net-traces.c index adef015b2f41..92da5e4ceb4f 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -32,6 +32,10 @@ #include <trace/events/sock.h> #include <trace/events/udp.h> #include <trace/events/fib.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <trace/events/fib6.h> +EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup); +#endif EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 1eba07feb34a..b7448c8490ac 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -21,8 +21,10 @@ #include <linux/of_mdio.h> #include <linux/of_platform.h> #include <linux/of_net.h> +#include <linux/of_gpio.h> #include <linux/sysfs.h> #include <linux/phy_fixed.h> +#include <linux/gpio/consumer.h> #include "dsa_priv.h" char dsa_driver_version[] = "0.1"; @@ -688,6 +690,9 @@ static int dsa_of_probe(struct device *dev) const char *port_name; int chip_index, port_index; const unsigned int *sw_addr, *port_reg; + int gpio; + enum of_gpio_flags of_flags; + unsigned long flags; u32 eeprom_len; int ret; @@ -766,6 +771,19 @@ static int dsa_of_probe(struct device *dev) put_device(cd->host_dev); cd->host_dev = &mdio_bus_switch->dev; } + gpio = of_get_named_gpio_flags(child, "reset-gpios", 0, + &of_flags); + if (gpio_is_valid(gpio)) { + flags = (of_flags == OF_GPIO_ACTIVE_LOW ? + GPIOF_ACTIVE_LOW : 0); + ret = devm_gpio_request_one(dev, gpio, flags, + "switch_reset"); + if (ret) + goto out_free_chip; + + cd->reset = gpio_to_desc(gpio); + gpiod_direction_output(cd->reset, 0); + } for_each_available_child_of_node(child, port) { port_reg = of_get_property(port, "reg", NULL); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index e86e8a9738ea..67f7c9de0b16 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1239,13 +1239,13 @@ static int __init ic_dynamic(void) (ic_proto_enabled & IC_USE_DHCP) && ic_dhcp_msgtype != DHCPACK) { ic_got_reply = 0; - pr_notice(","); + pr_cont(","); continue; } #endif /* IPCONFIG_DHCP */ if (ic_got_reply) { - pr_notice(" OK\n"); + pr_cont(" OK\n"); break; } @@ -1253,7 +1253,7 @@ static int __init ic_dynamic(void) continue; if (! --retries) { - pr_notice(" timed out!\n"); + pr_cont(" timed out!\n"); break; } @@ -1263,7 +1263,7 @@ static int __init ic_dynamic(void) if (timeout > CONF_TIMEOUT_MAX) timeout = CONF_TIMEOUT_MAX; - pr_notice("."); + pr_cont("."); } #ifdef IPCONFIG_BOOTP @@ -1280,11 +1280,10 @@ static int __init ic_dynamic(void) return -1; } - pr_info("IP-Config: Got %s answer from %pI4, ", + pr_info("IP-Config: Got %s answer from %pI4, my address is %pI4\n", ((ic_got_reply & IC_RARP) ? "RARP" : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), - &ic_addrservaddr); - pr_info("my address is %pI4\n", &ic_myaddr); + &ic_addrservaddr, &ic_myaddr); return 0; } @@ -1527,14 +1526,14 @@ static int __init ip_auto_config(void) pr_cont(", mtu=%d", ic_dev_mtu); for (i = 0; i < CONF_NAMESERVERS_MAX; i++) if (ic_nameservers[i] != NONE) { - pr_info(" nameserver%u=%pI4", + pr_cont(" nameserver%u=%pI4", i, &ic_nameservers[i]); break; } for (i++; i < CONF_NAMESERVERS_MAX; i++) if (ic_nameservers[i] != NONE) - pr_info(", nameserver%u=%pI4", i, &ic_nameservers[i]); - pr_info("\n"); + pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]); + pr_cont("\n"); #endif /* !SILENT */ return 0; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 92dd4b74d513..a2d248d9c35c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -67,10 +67,6 @@ #include <net/fib_rules.h> #include <linux/netconf.h> -#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) -#define CONFIG_IP_PIMSM 1 -#endif - struct mr_table { struct list_head list; possible_net_t net; @@ -84,9 +80,7 @@ struct mr_table { atomic_t cache_resolve_queue_len; bool mroute_do_assert; bool mroute_do_pim; -#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) int mroute_reg_vif_num; -#endif }; struct ipmr_rule { @@ -97,15 +91,18 @@ struct ipmr_result { struct mr_table *mrt; }; +static inline bool pimsm_enabled(void) +{ + return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); +} + /* Big lock, protecting vif table, mrt cache and mroute socket state. * Note that the changes are semaphored via rtnl_lock. */ static DEFINE_RWLOCK(mrt_lock); -/* - * Multicast router control variables - */ +/* Multicast router control variables */ #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) @@ -252,8 +249,8 @@ static int __net_init ipmr_rules_init(struct net *net) INIT_LIST_HEAD(&net->ipv4.mr_tables); mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); - if (!mrt) { - err = -ENOMEM; + if (IS_ERR(mrt)) { + err = PTR_ERR(mrt); goto err1; } @@ -301,8 +298,13 @@ static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, static int __net_init ipmr_rules_init(struct net *net) { - net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); - return net->ipv4.mrt ? 0 : -ENOMEM; + struct mr_table *mrt; + + mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); + if (IS_ERR(mrt)) + return PTR_ERR(mrt); + net->ipv4.mrt = mrt; + return 0; } static void __net_exit ipmr_rules_exit(struct net *net) @@ -319,13 +321,17 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) struct mr_table *mrt; unsigned int i; + /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ + if (id != RT_TABLE_DEFAULT && id >= 1000000000) + return ERR_PTR(-EINVAL); + mrt = ipmr_get_table(net, id); if (mrt) return mrt; mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); if (!mrt) - return NULL; + return ERR_PTR(-ENOMEM); write_pnet(&mrt->net, net); mrt->id = id; @@ -338,9 +344,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, (unsigned long)mrt); -#ifdef CONFIG_IP_PIMSM mrt->mroute_reg_vif_num = -1; -#endif #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); #endif @@ -387,8 +391,24 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) } } -static -struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) +/* Initialize ipmr pimreg/tunnel in_device */ +static bool ipmr_init_vif_indev(const struct net_device *dev) +{ + struct in_device *in_dev; + + ASSERT_RTNL(); + + in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) + return false; + ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); + IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; + + return true; +} + +static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) { struct net_device *dev; @@ -399,7 +419,6 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) int err; struct ifreq ifr; struct ip_tunnel_parm p; - struct in_device *in_dev; memset(&p, 0, sizeof(p)); p.iph.daddr = v->vifc_rmt_addr.s_addr; @@ -424,15 +443,8 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) if (err == 0 && (dev = __dev_get_by_name(net, p.name)) != NULL) { dev->flags |= IFF_MULTICAST; - - in_dev = __in_dev_get_rtnl(dev); - if (!in_dev) + if (!ipmr_init_vif_indev(dev)) goto failure; - - ipv4_devconf_setall(in_dev); - neigh_parms_data_state_setall(in_dev->arp_parms); - IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; - if (dev_open(dev)) goto failure; dev_hold(dev); @@ -449,8 +461,7 @@ failure: return NULL; } -#ifdef CONFIG_IP_PIMSM - +#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); @@ -500,7 +511,6 @@ static void reg_vif_setup(struct net_device *dev) static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) { struct net_device *dev; - struct in_device *in_dev; char name[IFNAMSIZ]; if (mrt->id == RT_TABLE_DEFAULT) @@ -520,18 +530,8 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) return NULL; } - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (!in_dev) { - rcu_read_unlock(); + if (!ipmr_init_vif_indev(dev)) goto failure; - } - - ipv4_devconf_setall(in_dev); - neigh_parms_data_state_setall(in_dev->arp_parms); - IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; - rcu_read_unlock(); - if (dev_open(dev)) goto failure; @@ -547,13 +547,56 @@ failure: unregister_netdevice(dev); return NULL; } + +/* called with rcu_read_lock() */ +static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, + unsigned int pimlen) +{ + struct net_device *reg_dev = NULL; + struct iphdr *encap; + + encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); + /* Check that: + * a. packet is really sent to a multicast group + * b. packet is not a NULL-REGISTER + * c. packet is not truncated + */ + if (!ipv4_is_multicast(encap->daddr) || + encap->tot_len == 0 || + ntohs(encap->tot_len) + pimlen > skb->len) + return 1; + + read_lock(&mrt_lock); + if (mrt->mroute_reg_vif_num >= 0) + reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; + read_unlock(&mrt_lock); + + if (!reg_dev) + return 1; + + skb->mac_header = skb->network_header; + skb_pull(skb, (u8 *)encap - skb->data); + skb_reset_network_header(skb); + skb->protocol = htons(ETH_P_IP); + skb->ip_summed = CHECKSUM_NONE; + + skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); + + netif_rx(skb); + + return NET_RX_SUCCESS; +} +#else +static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) +{ + return NULL; +} #endif /** * vif_delete - Delete a VIF entry * @notify: Set to 1, if the caller is a notifier_call */ - static int vif_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head) { @@ -575,10 +618,8 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, return -EADDRNOTAVAIL; } -#ifdef CONFIG_IP_PIMSM if (vifi == mrt->mroute_reg_vif_num) mrt->mroute_reg_vif_num = -1; -#endif if (vifi + 1 == mrt->maxvif) { int tmp; @@ -625,7 +666,6 @@ static inline void ipmr_cache_free(struct mfc_cache *c) /* Destroy an unresolved cache entry, killing queued skbs * and reporting error to netlink readers. */ - static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) { struct net *net = read_pnet(&mrt->net); @@ -653,9 +693,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) ipmr_cache_free(c); } - /* Timer process for the unresolved queue. */ - static void ipmr_expire_process(unsigned long arg) { struct mr_table *mrt = (struct mr_table *)arg; @@ -695,7 +733,6 @@ out: } /* Fill oifs list. It is called under write locked mrt_lock. */ - static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, unsigned char *ttls) { @@ -731,10 +768,10 @@ static int vif_add(struct net *net, struct mr_table *mrt, return -EADDRINUSE; switch (vifc->vifc_flags) { -#ifdef CONFIG_IP_PIMSM case VIFF_REGISTER: - /* - * Special Purpose VIF in PIM + if (!pimsm_enabled()) + return -EINVAL; + /* Special Purpose VIF in PIM * All the packets will be sent to the daemon */ if (mrt->mroute_reg_vif_num >= 0) @@ -749,7 +786,6 @@ static int vif_add(struct net *net, struct mr_table *mrt, return err; } break; -#endif case VIFF_TUNNEL: dev = ipmr_new_tunnel(net, vifc); if (!dev) @@ -761,7 +797,6 @@ static int vif_add(struct net *net, struct mr_table *mrt, return err; } break; - case VIFF_USE_IFINDEX: case 0: if (vifc->vifc_flags == VIFF_USE_IFINDEX) { @@ -815,10 +850,8 @@ static int vif_add(struct net *net, struct mr_table *mrt, /* And finish update writing critical data */ write_lock_bh(&mrt_lock); v->dev = dev; -#ifdef CONFIG_IP_PIMSM if (v->flags & VIFF_REGISTER) mrt->mroute_reg_vif_num = vifi; -#endif if (vifi+1 > mrt->maxvif) mrt->maxvif = vifi+1; write_unlock_bh(&mrt_lock); @@ -883,9 +916,7 @@ skip: return ipmr_cache_find_any_parent(mrt, vifi); } -/* - * Allocate a multicast cache entry - */ +/* Allocate a multicast cache entry */ static struct mfc_cache *ipmr_cache_alloc(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); @@ -906,10 +937,7 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void) return c; } -/* - * A cache entry has gone into a resolved state from queued - */ - +/* A cache entry has gone into a resolved state from queued */ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, struct mfc_cache *uc, struct mfc_cache *c) { @@ -917,7 +945,6 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, struct nlmsgerr *e; /* Play the pending entries through our router */ - while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); @@ -941,34 +968,29 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, } } -/* - * Bounce a cache query up to mrouted. We could use netlink for this but mrouted - * expects the following bizarre scheme. +/* Bounce a cache query up to mrouted. We could use netlink for this but mrouted + * expects the following bizarre scheme. * - * Called under mrt_lock. + * Called under mrt_lock. */ - static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert) { - struct sk_buff *skb; const int ihl = ip_hdrlen(pkt); + struct sock *mroute_sk; struct igmphdr *igmp; struct igmpmsg *msg; - struct sock *mroute_sk; + struct sk_buff *skb; int ret; -#ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); else -#endif skb = alloc_skb(128, GFP_ATOMIC); if (!skb) return -ENOBUFS; -#ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) { /* Ugly, but we have no choice with this interface. * Duplicate old header, fix ihl, length etc. @@ -986,28 +1008,23 @@ static int ipmr_cache_report(struct mr_table *mrt, ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + sizeof(struct iphdr)); - } else -#endif - { - - /* Copy the IP header */ - - skb_set_network_header(skb, skb->len); - skb_put(skb, ihl); - skb_copy_to_linear_data(skb, pkt->data, ihl); - ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ - msg = (struct igmpmsg *)skb_network_header(skb); - msg->im_vif = vifi; - skb_dst_set(skb, dst_clone(skb_dst(pkt))); - - /* Add our header */ - - igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); - igmp->type = - msg->im_msgtype = assert; - igmp->code = 0; - ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ - skb->transport_header = skb->network_header; + } else { + /* Copy the IP header */ + skb_set_network_header(skb, skb->len); + skb_put(skb, ihl); + skb_copy_to_linear_data(skb, pkt->data, ihl); + /* Flag to the kernel this is a route add */ + ip_hdr(skb)->protocol = 0; + msg = (struct igmpmsg *)skb_network_header(skb); + msg->im_vif = vifi; + skb_dst_set(skb, dst_clone(skb_dst(pkt))); + /* Add our header */ + igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); + igmp->type = assert; + msg->im_msgtype = assert; + igmp->code = 0; + ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ + skb->transport_header = skb->network_header; } rcu_read_lock(); @@ -1019,7 +1036,6 @@ static int ipmr_cache_report(struct mr_table *mrt, } /* Deliver to mrouted */ - ret = sock_queue_rcv_skb(mroute_sk, skb); rcu_read_unlock(); if (ret < 0) { @@ -1030,12 +1046,9 @@ static int ipmr_cache_report(struct mr_table *mrt, return ret; } -/* - * Queue a packet for resolution. It gets locked cache entry! - */ - -static int -ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) +/* Queue a packet for resolution. It gets locked cache entry! */ +static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, + struct sk_buff *skb) { bool found = false; int err; @@ -1053,7 +1066,6 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) if (!found) { /* Create a new entry if allowable */ - if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || (c = ipmr_cache_alloc_unres()) == NULL) { spin_unlock_bh(&mfc_unres_lock); @@ -1063,13 +1075,11 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) } /* Fill in the new cache entry */ - c->mfc_parent = -1; c->mfc_origin = iph->saddr; c->mfc_mcastgrp = iph->daddr; /* Reflect first query at mrouted. */ - err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); if (err < 0) { /* If the report failed throw the cache entry @@ -1091,7 +1101,6 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) } /* See if we can append the packet */ - if (c->mfc_un.unres.unresolved.qlen > 3) { kfree_skb(skb); err = -ENOBUFS; @@ -1104,9 +1113,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) return err; } -/* - * MFC cache manipulation by user space mroute daemon - */ +/* MFC cache manipulation by user space mroute daemon */ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) { @@ -1177,9 +1184,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, list_add_rcu(&c->list, &mrt->mfc_cache_array[line]); - /* - * Check to see if we resolved a queued list. If so we - * need to send on the frames and tidy up. + /* Check to see if we resolved a queued list. If so we + * need to send on the frames and tidy up. */ found = false; spin_lock_bh(&mfc_unres_lock); @@ -1204,10 +1210,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, return 0; } -/* - * Close the multicast socket, and clear the vif tables etc - */ - +/* Close the multicast socket, and clear the vif tables etc */ static void mroute_clean_tables(struct mr_table *mrt) { int i; @@ -1215,7 +1218,6 @@ static void mroute_clean_tables(struct mr_table *mrt) struct mfc_cache *c, *next; /* Shut down all active vif entries */ - for (i = 0; i < mrt->maxvif; i++) { if (!(mrt->vif_table[i].flags & VIFF_STATIC)) vif_delete(mrt, i, 0, &list); @@ -1223,7 +1225,6 @@ static void mroute_clean_tables(struct mr_table *mrt) unregister_netdevice_many(&list); /* Wipe the cache */ - for (i = 0; i < MFC_LINES; i++) { list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { if (c->mfc_flags & MFC_STATIC) @@ -1267,45 +1268,51 @@ static void mrtsock_destruct(struct sock *sk) rtnl_unlock(); } -/* - * Socket options and virtual interface manipulation. The whole - * virtual interface system is a complete heap, but unfortunately - * that's how BSD mrouted happens to think. Maybe one day with a proper - * MOSPF/PIM router set up we can clean this up. +/* Socket options and virtual interface manipulation. The whole + * virtual interface system is a complete heap, but unfortunately + * that's how BSD mrouted happens to think. Maybe one day with a proper + * MOSPF/PIM router set up we can clean this up. */ -int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) +int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, + unsigned int optlen) { - int ret, parent = 0; - struct vifctl vif; - struct mfcctl mfc; struct net *net = sock_net(sk); + int val, ret = 0, parent = 0; struct mr_table *mrt; + struct vifctl vif; + struct mfcctl mfc; + u32 uval; + /* There's one exception to the lock - MRT_DONE which needs to unlock */ + rtnl_lock(); if (sk->sk_type != SOCK_RAW || - inet_sk(sk)->inet_num != IPPROTO_IGMP) - return -EOPNOTSUPP; + inet_sk(sk)->inet_num != IPPROTO_IGMP) { + ret = -EOPNOTSUPP; + goto out_unlock; + } mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); - if (!mrt) - return -ENOENT; - + if (!mrt) { + ret = -ENOENT; + goto out_unlock; + } if (optname != MRT_INIT) { if (sk != rcu_access_pointer(mrt->mroute_sk) && - !ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EACCES; + !ns_capable(net->user_ns, CAP_NET_ADMIN)) { + ret = -EACCES; + goto out_unlock; + } } switch (optname) { case MRT_INIT: if (optlen != sizeof(int)) - return -EINVAL; - - rtnl_lock(); - if (rtnl_dereference(mrt->mroute_sk)) { - rtnl_unlock(); - return -EADDRINUSE; - } + ret = -EINVAL; + if (rtnl_dereference(mrt->mroute_sk)) + ret = -EADDRINUSE; + if (ret) + break; ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { @@ -1315,129 +1322,133 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); } - rtnl_unlock(); - return ret; + break; case MRT_DONE: - if (sk != rcu_access_pointer(mrt->mroute_sk)) - return -EACCES; - return ip_ra_control(sk, 0, NULL); + if (sk != rcu_access_pointer(mrt->mroute_sk)) { + ret = -EACCES; + } else { + /* We need to unlock here because mrtsock_destruct takes + * care of rtnl itself and we can't change that due to + * the IP_ROUTER_ALERT setsockopt which runs without it. + */ + rtnl_unlock(); + ret = ip_ra_control(sk, 0, NULL); + goto out; + } + break; case MRT_ADD_VIF: case MRT_DEL_VIF: - if (optlen != sizeof(vif)) - return -EINVAL; - if (copy_from_user(&vif, optval, sizeof(vif))) - return -EFAULT; - if (vif.vifc_vifi >= MAXVIFS) - return -ENFILE; - rtnl_lock(); + if (optlen != sizeof(vif)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&vif, optval, sizeof(vif))) { + ret = -EFAULT; + break; + } + if (vif.vifc_vifi >= MAXVIFS) { + ret = -ENFILE; + break; + } if (optname == MRT_ADD_VIF) { ret = vif_add(net, mrt, &vif, sk == rtnl_dereference(mrt->mroute_sk)); } else { ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); } - rtnl_unlock(); - return ret; - - /* - * Manipulate the forwarding caches. These live - * in a sort of kernel/user symbiosis. - */ + break; + /* Manipulate the forwarding caches. These live + * in a sort of kernel/user symbiosis. + */ case MRT_ADD_MFC: case MRT_DEL_MFC: parent = -1; case MRT_ADD_MFC_PROXY: case MRT_DEL_MFC_PROXY: - if (optlen != sizeof(mfc)) - return -EINVAL; - if (copy_from_user(&mfc, optval, sizeof(mfc))) - return -EFAULT; + if (optlen != sizeof(mfc)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&mfc, optval, sizeof(mfc))) { + ret = -EFAULT; + break; + } if (parent == 0) parent = mfc.mfcc_parent; - rtnl_lock(); if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) ret = ipmr_mfc_delete(mrt, &mfc, parent); else ret = ipmr_mfc_add(net, mrt, &mfc, sk == rtnl_dereference(mrt->mroute_sk), parent); - rtnl_unlock(); - return ret; - /* - * Control PIM assert. - */ + break; + /* Control PIM assert. */ case MRT_ASSERT: - { - int v; - if (optlen != sizeof(v)) - return -EINVAL; - if (get_user(v, (int __user *)optval)) - return -EFAULT; - mrt->mroute_do_assert = v; - return 0; - } -#ifdef CONFIG_IP_PIMSM + if (optlen != sizeof(val)) { + ret = -EINVAL; + break; + } + if (get_user(val, (int __user *)optval)) { + ret = -EFAULT; + break; + } + mrt->mroute_do_assert = val; + break; case MRT_PIM: - { - int v; - - if (optlen != sizeof(v)) - return -EINVAL; - if (get_user(v, (int __user *)optval)) - return -EFAULT; - v = !!v; + if (!pimsm_enabled()) { + ret = -ENOPROTOOPT; + break; + } + if (optlen != sizeof(val)) { + ret = -EINVAL; + break; + } + if (get_user(val, (int __user *)optval)) { + ret = -EFAULT; + break; + } - rtnl_lock(); - ret = 0; - if (v != mrt->mroute_do_pim) { - mrt->mroute_do_pim = v; - mrt->mroute_do_assert = v; + val = !!val; + if (val != mrt->mroute_do_pim) { + mrt->mroute_do_pim = val; + mrt->mroute_do_assert = val; } - rtnl_unlock(); - return ret; - } -#endif -#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES + break; case MRT_TABLE: - { - u32 v; - - if (optlen != sizeof(u32)) - return -EINVAL; - if (get_user(v, (u32 __user *)optval)) - return -EFAULT; - - /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ - if (v != RT_TABLE_DEFAULT && v >= 1000000000) - return -EINVAL; + if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { + ret = -ENOPROTOOPT; + break; + } + if (optlen != sizeof(uval)) { + ret = -EINVAL; + break; + } + if (get_user(uval, (u32 __user *)optval)) { + ret = -EFAULT; + break; + } - rtnl_lock(); - ret = 0; if (sk == rtnl_dereference(mrt->mroute_sk)) { ret = -EBUSY; } else { - if (!ipmr_new_table(net, v)) - ret = -ENOMEM; + mrt = ipmr_new_table(net, uval); + if (IS_ERR(mrt)) + ret = PTR_ERR(mrt); else - raw_sk(sk)->ipmr_table = v; + raw_sk(sk)->ipmr_table = uval; } - rtnl_unlock(); - return ret; - } -#endif - /* - * Spurious command, or MRT_VERSION which you cannot - * set. - */ + break; + /* Spurious command, or MRT_VERSION which you cannot set. */ default: - return -ENOPROTOOPT; + ret = -ENOPROTOOPT; } +out_unlock: + rtnl_unlock(); +out: + return ret; } -/* - * Getsock opt support for the multicast routing system. - */ - +/* Getsock opt support for the multicast routing system. */ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) { int olr; @@ -1453,39 +1464,35 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int if (!mrt) return -ENOENT; - if (optname != MRT_VERSION && -#ifdef CONFIG_IP_PIMSM - optname != MRT_PIM && -#endif - optname != MRT_ASSERT) + switch (optname) { + case MRT_VERSION: + val = 0x0305; + break; + case MRT_PIM: + if (!pimsm_enabled()) + return -ENOPROTOOPT; + val = mrt->mroute_do_pim; + break; + case MRT_ASSERT: + val = mrt->mroute_do_assert; + break; + default: return -ENOPROTOOPT; + } if (get_user(olr, optlen)) return -EFAULT; - olr = min_t(unsigned int, olr, sizeof(int)); if (olr < 0) return -EINVAL; - if (put_user(olr, optlen)) return -EFAULT; - if (optname == MRT_VERSION) - val = 0x0305; -#ifdef CONFIG_IP_PIMSM - else if (optname == MRT_PIM) - val = mrt->mroute_do_pim; -#endif - else - val = mrt->mroute_do_assert; if (copy_to_user(optval, &val, olr)) return -EFAULT; return 0; } -/* - * The IP multicast ioctl support routines. - */ - +/* The IP multicast ioctl support routines. */ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) { struct sioc_sg_req sr; @@ -1618,7 +1625,6 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) } #endif - static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); @@ -1640,17 +1646,14 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v return NOTIFY_DONE; } - static struct notifier_block ip_mr_notifier = { .notifier_call = ipmr_device_event, }; -/* - * Encapsulate a packet by attaching a valid IPIP header to it. - * This avoids tunnel drivers and other mess and gives us the speed so - * important for multicast video. +/* Encapsulate a packet by attaching a valid IPIP header to it. + * This avoids tunnel drivers and other mess and gives us the speed so + * important for multicast video. */ - static void ip_encap(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr) { @@ -1692,9 +1695,7 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk, return dst_output(net, sk, skb); } -/* - * Processing handlers for ipmr_forward - */ +/* Processing handlers for ipmr_forward */ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, int vifi) @@ -1709,7 +1710,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, if (!vif->dev) goto out_free; -#ifdef CONFIG_IP_PIMSM if (vif->flags & VIFF_REGISTER) { vif->pkt_out++; vif->bytes_out += skb->len; @@ -1718,7 +1718,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); goto out_free; } -#endif if (vif->flags & VIFF_TUNNEL) { rt = ip_route_output_ports(net, &fl4, NULL, @@ -1745,7 +1744,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * allow to send ICMP, so that packets will disappear * to blackhole. */ - IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); ip_rt_put(rt); goto out_free; @@ -1777,8 +1775,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, IPCB(skb)->flags |= IPSKB_FORWARDED; - /* - * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally + /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally * not only before forwarding, but after forwarding on all output * interfaces. It is clear, if mrouter runs a multicasting * program, it should receive packets not depending to what interface @@ -1809,7 +1806,6 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) } /* "local" means that we should preserve one skb (for local delivery) */ - static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *cache, int local) @@ -1834,9 +1830,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, goto forward; } - /* - * Wrong interface: drop packet and (maybe) send PIM assert. - */ + /* Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif_table[vif].dev != skb->dev) { if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. @@ -1875,9 +1869,7 @@ forward: mrt->vif_table[vif].pkt_in++; mrt->vif_table[vif].bytes_in += skb->len; - /* - * Forward the frame - */ + /* Forward the frame */ if (cache->mfc_origin == htonl(INADDR_ANY) && cache->mfc_mcastgrp == htonl(INADDR_ANY)) { if (true_vifi >= 0 && @@ -1951,11 +1943,9 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) return mrt; } -/* - * Multicast packets for forwarding arrive here - * Called with rcu_read_lock(); +/* Multicast packets for forwarding arrive here + * Called with rcu_read_lock(); */ - int ip_mr_input(struct sk_buff *skb) { struct mfc_cache *cache; @@ -2006,9 +1996,7 @@ int ip_mr_input(struct sk_buff *skb) vif); } - /* - * No usable cache entry - */ + /* No usable cache entry */ if (!cache) { int vif; @@ -2049,53 +2037,8 @@ dont_forward: return 0; } -#ifdef CONFIG_IP_PIMSM -/* called with rcu_read_lock() */ -static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, - unsigned int pimlen) -{ - struct net_device *reg_dev = NULL; - struct iphdr *encap; - - encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); - /* - * Check that: - * a. packet is really sent to a multicast group - * b. packet is not a NULL-REGISTER - * c. packet is not truncated - */ - if (!ipv4_is_multicast(encap->daddr) || - encap->tot_len == 0 || - ntohs(encap->tot_len) + pimlen > skb->len) - return 1; - - read_lock(&mrt_lock); - if (mrt->mroute_reg_vif_num >= 0) - reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; - read_unlock(&mrt_lock); - - if (!reg_dev) - return 1; - - skb->mac_header = skb->network_header; - skb_pull(skb, (u8 *)encap - skb->data); - skb_reset_network_header(skb); - skb->protocol = htons(ETH_P_IP); - skb->ip_summed = CHECKSUM_NONE; - - skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); - - netif_rx(skb); - - return NET_RX_SUCCESS; -} -#endif - #ifdef CONFIG_IP_PIMSM_V1 -/* - * Handle IGMP messages of PIMv1 - */ - +/* Handle IGMP messages of PIMv1 */ int pim_rcv_v1(struct sk_buff *skb) { struct igmphdr *pim; @@ -2420,9 +2363,8 @@ done: } #ifdef CONFIG_PROC_FS -/* - * The /proc interfaces to multicast routing : - * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif +/* The /proc interfaces to multicast routing : + * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif */ struct ipmr_vif_iter { struct seq_net_private p; @@ -2706,10 +2648,7 @@ static const struct net_protocol pim_protocol = { }; #endif - -/* - * Setup for IP multicast routing - */ +/* Setup for IP multicast routing */ static int __net_init ipmr_net_init(struct net *net) { int err; @@ -2759,8 +2698,6 @@ int __init ip_mr_init(void) sizeof(struct mfc_cache), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - if (!mrt_cachep) - return -ENOMEM; err = register_pernet_subsys(&ipmr_net_ops); if (err) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6f01fe122abd..89758be9c6a6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -62,6 +62,7 @@ #include <net/lwtunnel.h> #include <net/ip_tunnels.h> #include <net/l3mdev.h> +#include <trace/events/fib6.h> #include <asm/uaccess.h> @@ -865,6 +866,9 @@ restart: } dst_use(&rt->dst, jiffies); read_unlock_bh(&table->tb6_lock); + + trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); + return rt; } @@ -1078,6 +1082,8 @@ redo_rt6_select: read_unlock_bh(&table->tb6_lock); rt6_dst_from_metrics_check(rt); + + trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && !(rt->rt6i_flags & RTF_GATEWAY))) { @@ -1101,6 +1107,8 @@ redo_rt6_select: uncached_rt = net->ipv6.ip6_null_entry; dst_hold(&uncached_rt->dst); + + trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6); return uncached_rt; } else { @@ -1125,6 +1133,7 @@ redo_rt6_select: dst_release(&rt->dst); } + trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); return pcpu_rt; } @@ -1474,6 +1483,7 @@ out: read_unlock_bh(&table->tb6_lock); + trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); return rt; }; diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 9dc239dfe192..e401108360a2 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -332,131 +332,15 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l) tipc_sk_rcv(net, inputq); } -static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, - struct tipc_stats *stats) -{ - int i; - struct nlattr *nest; - - struct nla_map { - __u32 key; - __u32 val; - }; - - struct nla_map map[] = { - {TIPC_NLA_STATS_RX_INFO, stats->recv_info}, - {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments}, - {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented}, - {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles}, - {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled}, - {TIPC_NLA_STATS_TX_INFO, stats->sent_info}, - {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments}, - {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented}, - {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles}, - {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled}, - {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks}, - {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv}, - {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks}, - {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks}, - {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted}, - {TIPC_NLA_STATS_DUPLICATES, stats->duplicates}, - {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs}, - {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz}, - {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ? - (stats->accu_queue_sz / stats->queue_sz_counts) : 0} - }; - - nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS); - if (!nest) - return -EMSGSIZE; - - for (i = 0; i < ARRAY_SIZE(map); i++) - if (nla_put_u32(skb, map[i].key, map[i].val)) - goto msg_full; - - nla_nest_end(skb, nest); - - return 0; -msg_full: - nla_nest_cancel(skb, nest); - - return -EMSGSIZE; -} - -int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) -{ - int err; - void *hdr; - struct nlattr *attrs; - struct nlattr *prop; - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; - - if (!bcl) - return 0; - - tipc_bcast_lock(net); - - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, - NLM_F_MULTI, TIPC_NL_LINK_GET); - if (!hdr) - return -EMSGSIZE; - - attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); - if (!attrs) - goto msg_full; - - /* The broadcast link is always up */ - if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) - goto attr_msg_full; - - if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST)) - goto attr_msg_full; - if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) - goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->rcv_nxt)) - goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->snd_nxt)) - goto attr_msg_full; - - prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); - if (!prop) - goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) - goto prop_msg_full; - nla_nest_end(msg->skb, prop); - - err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); - if (err) - goto attr_msg_full; - - tipc_bcast_unlock(net); - nla_nest_end(msg->skb, attrs); - genlmsg_end(msg->skb, hdr); - - return 0; - -prop_msg_full: - nla_nest_cancel(msg->skb, prop); -attr_msg_full: - nla_nest_cancel(msg->skb, attrs); -msg_full: - tipc_bcast_unlock(net); - genlmsg_cancel(msg->skb, hdr); - - return -EMSGSIZE; -} - int tipc_bclink_reset_stats(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; + struct tipc_link *l = tipc_bc_sndlink(net); - if (!bcl) + if (!l) return -ENOPROTOOPT; tipc_bcast_lock(net); - memset(&bcl->stats, 0, sizeof(bcl->stats)); + tipc_link_reset_stats(l); tipc_bcast_unlock(net); return 0; } @@ -530,9 +414,7 @@ enomem: void tipc_bcast_reinit(struct net *net) { - struct tipc_bc_base *b = tipc_bc_base(net); - - msg_set_prevnode(b->link->pmsg, tipc_own_addr(net)); + tipc_link_reinit(tipc_bc_sndlink(net), tipc_own_addr(net)); } void tipc_bcast_stop(struct net *net) diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 2855b9356a15..1944c6c00bb9 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -43,6 +43,7 @@ struct tipc_node; struct tipc_msg; struct tipc_nl_msg; struct tipc_node_map; +extern const char tipc_bclink_name[]; int tipc_bcast_init(struct net *net); void tipc_bcast_reinit(struct net *net); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 648f2a67f314..802ffad3200d 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -71,7 +71,7 @@ static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = { [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED } }; -static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr); +static void bearer_disable(struct net *net, struct tipc_bearer *b); /** * tipc_media_find - locates specified media object by name @@ -107,13 +107,13 @@ static struct tipc_media *media_find_id(u8 type) void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) { char addr_str[MAX_ADDR_STR]; - struct tipc_media *m_ptr; + struct tipc_media *m; int ret; - m_ptr = media_find_id(a->media_id); + m = media_find_id(a->media_id); - if (m_ptr && !m_ptr->addr2str(a, addr_str, sizeof(addr_str))) - ret = scnprintf(buf, len, "%s(%s)", m_ptr->name, addr_str); + if (m && !m->addr2str(a, addr_str, sizeof(addr_str))) + ret = scnprintf(buf, len, "%s(%s)", m->name, addr_str); else { u32 i; @@ -175,13 +175,13 @@ static int bearer_name_validate(const char *name, struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = rtnl_dereference(tn->bearer_list[i]); - if (b_ptr && (!strcmp(b_ptr->name, name))) - return b_ptr; + b = rtnl_dereference(tn->bearer_list[i]); + if (b && (!strcmp(b->name, name))) + return b; } return NULL; } @@ -189,24 +189,24 @@ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (b_ptr) - tipc_disc_add_dest(b_ptr->link_req); + b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (b) + tipc_disc_add_dest(b->link_req); rcu_read_unlock(); } void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (b_ptr) - tipc_disc_remove_dest(b_ptr->link_req); + b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (b) + tipc_disc_remove_dest(b->link_req); rcu_read_unlock(); } @@ -218,8 +218,8 @@ static int tipc_enable_bearer(struct net *net, const char *name, struct nlattr *attr[]) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; - struct tipc_media *m_ptr; + struct tipc_bearer *b; + struct tipc_media *m; struct tipc_bearer_names b_names; char addr_string[16]; u32 bearer_id; @@ -255,31 +255,31 @@ static int tipc_enable_bearer(struct net *net, const char *name, return -EINVAL; } - m_ptr = tipc_media_find(b_names.media_name); - if (!m_ptr) { + m = tipc_media_find(b_names.media_name); + if (!m) { pr_warn("Bearer <%s> rejected, media <%s> not registered\n", name, b_names.media_name); return -EINVAL; } if (priority == TIPC_MEDIA_LINK_PRI) - priority = m_ptr->priority; + priority = m->priority; restart: bearer_id = MAX_BEARERS; with_this_prio = 1; for (i = MAX_BEARERS; i-- != 0; ) { - b_ptr = rtnl_dereference(tn->bearer_list[i]); - if (!b_ptr) { + b = rtnl_dereference(tn->bearer_list[i]); + if (!b) { bearer_id = i; continue; } - if (!strcmp(name, b_ptr->name)) { + if (!strcmp(name, b->name)) { pr_warn("Bearer <%s> rejected, already enabled\n", name); return -EINVAL; } - if ((b_ptr->priority == priority) && + if ((b->priority == priority) && (++with_this_prio > 2)) { if (priority-- == 0) { pr_warn("Bearer <%s> rejected, duplicate priority\n", @@ -297,35 +297,35 @@ restart: return -EINVAL; } - b_ptr = kzalloc(sizeof(*b_ptr), GFP_ATOMIC); - if (!b_ptr) + b = kzalloc(sizeof(*b), GFP_ATOMIC); + if (!b) return -ENOMEM; - strcpy(b_ptr->name, name); - b_ptr->media = m_ptr; - res = m_ptr->enable_media(net, b_ptr, attr); + strcpy(b->name, name); + b->media = m; + res = m->enable_media(net, b, attr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); return -EINVAL; } - b_ptr->identity = bearer_id; - b_ptr->tolerance = m_ptr->tolerance; - b_ptr->window = m_ptr->window; - b_ptr->domain = disc_domain; - b_ptr->net_plane = bearer_id + 'A'; - b_ptr->priority = priority; + b->identity = bearer_id; + b->tolerance = m->tolerance; + b->window = m->window; + b->domain = disc_domain; + b->net_plane = bearer_id + 'A'; + b->priority = priority; - res = tipc_disc_create(net, b_ptr, &b_ptr->bcast_addr); + res = tipc_disc_create(net, b, &b->bcast_addr); if (res) { - bearer_disable(net, b_ptr); + bearer_disable(net, b); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", name); return -EINVAL; } - rcu_assign_pointer(tn->bearer_list[bearer_id], b_ptr); + rcu_assign_pointer(tn->bearer_list[bearer_id], b); pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, @@ -336,11 +336,11 @@ restart: /** * tipc_reset_bearer - Reset all links established over this bearer */ -static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) +static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) { - pr_info("Resetting bearer <%s>\n", b_ptr->name); - tipc_node_delete_links(net, b_ptr->identity); - tipc_disc_reset(net, b_ptr); + pr_info("Resetting bearer <%s>\n", b->name); + tipc_node_delete_links(net, b->identity); + tipc_disc_reset(net, b); return 0; } @@ -349,26 +349,26 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) * * Note: This routine assumes caller holds RTNL lock. */ -static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr) +static void bearer_disable(struct net *net, struct tipc_bearer *b) { struct tipc_net *tn = net_generic(net, tipc_net_id); u32 i; - pr_info("Disabling bearer <%s>\n", b_ptr->name); - b_ptr->media->disable_media(b_ptr); + pr_info("Disabling bearer <%s>\n", b->name); + b->media->disable_media(b); - tipc_node_delete_links(net, b_ptr->identity); - RCU_INIT_POINTER(b_ptr->media_ptr, NULL); - if (b_ptr->link_req) - tipc_disc_delete(b_ptr->link_req); + tipc_node_delete_links(net, b->identity); + RCU_INIT_POINTER(b->media_ptr, NULL); + if (b->link_req) + tipc_disc_delete(b->link_req); for (i = 0; i < MAX_BEARERS; i++) { - if (b_ptr == rtnl_dereference(tn->bearer_list[i])) { + if (b == rtnl_dereference(tn->bearer_list[i])) { RCU_INIT_POINTER(tn->bearer_list[i], NULL); break; } } - kfree_rcu(b_ptr, rcu); + kfree_rcu(b, rcu); } int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, @@ -411,7 +411,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b) /** * tipc_l2_send_msg - send a TIPC packet out over an L2 interface * @buf: the packet to be sent - * @b_ptr: the bearer through which the packet is to be sent + * @b: the bearer through which the packet is to be sent * @dest: peer destination address */ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, @@ -532,14 +532,14 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(dev->tipc_ptr); - if (likely(b_ptr)) { + b = rcu_dereference_rtnl(dev->tipc_ptr); + if (likely(b)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { buf->next = NULL; - tipc_rcv(dev_net(dev), buf, b_ptr); + tipc_rcv(dev_net(dev), buf, b); rcu_read_unlock(); return NET_RX_SUCCESS; } @@ -564,13 +564,13 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; - b_ptr = rtnl_dereference(dev->tipc_ptr); - if (!b_ptr) + b = rtnl_dereference(dev->tipc_ptr); + if (!b) return NOTIFY_DONE; - b_ptr->mtu = dev->mtu; + b->mtu = dev->mtu; switch (evt) { case NETDEV_CHANGE: @@ -578,16 +578,16 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_GOING_DOWN: case NETDEV_CHANGEMTU: - tipc_reset_bearer(net, b_ptr); + tipc_reset_bearer(net, b); break; case NETDEV_CHANGEADDR: - b_ptr->media->raw2addr(b_ptr, &b_ptr->addr, + b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr); - tipc_reset_bearer(net, b_ptr); + tipc_reset_bearer(net, b); break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: - bearer_disable(dev_net(dev), b_ptr); + bearer_disable(dev_net(dev), b); break; } return NOTIFY_OK; @@ -623,13 +623,13 @@ void tipc_bearer_cleanup(void) void tipc_bearer_stop(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = rtnl_dereference(tn->bearer_list[i]); - if (b_ptr) { - bearer_disable(net, b_ptr); + b = rtnl_dereference(tn->bearer_list[i]); + if (b) { + bearer_disable(net, b); tn->bearer_list[i] = NULL; } } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 552185bc4773..e31820516774 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -103,11 +103,11 @@ struct tipc_bearer; */ struct tipc_media { int (*send_msg)(struct net *net, struct sk_buff *buf, - struct tipc_bearer *b_ptr, + struct tipc_bearer *b, struct tipc_media_addr *dest); - int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr, + int (*enable_media)(struct net *net, struct tipc_bearer *b, struct nlattr *attr[]); - void (*disable_media)(struct tipc_bearer *b_ptr); + void (*disable_media)(struct tipc_bearer *b); int (*addr2str)(struct tipc_media_addr *addr, char *strbuf, int bufsz); @@ -176,7 +176,7 @@ struct tipc_bearer_names { * TIPC routines available to supported media types */ -void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr); +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b); /* * Routines made available to TIPC by supported media types diff --git a/net/tipc/core.h b/net/tipc/core.h index 18e95a8020cd..5504d63503df 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -118,6 +118,11 @@ static inline int tipc_netid(struct net *net) return tipc_net(net)->net_id; } +static inline struct list_head *tipc_nodes(struct net *net) +{ + return &tipc_net(net)->node_list; +} + static inline u16 mod(u16 x) { return x & 0xffffu; diff --git a/net/tipc/discover.c b/net/tipc/discover.c index afe8c47c4085..f1e738e80535 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -75,14 +75,14 @@ struct tipc_link_req { * tipc_disc_init_msg - initialize a link setup message * @net: the applicable net namespace * @type: message type (request or response) - * @b_ptr: ptr to bearer issuing message + * @b: ptr to bearer issuing message */ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, - struct tipc_bearer *b_ptr) + struct tipc_bearer *b) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_msg *msg; - u32 dest_domain = b_ptr->domain; + u32 dest_domain = b->domain; msg = buf_msg(buf); tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type, @@ -92,16 +92,16 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, msg_set_node_capabilities(msg, TIPC_NODE_CAPABILITIES); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tn->net_id); - b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); + b->media->addr2msg(msg_media_addr(msg), &b->addr); } /** * disc_dupl_alert - issue node address duplication alert - * @b_ptr: pointer to bearer detecting duplication + * @b: pointer to bearer detecting duplication * @node_addr: duplicated node address * @media_addr: media address advertised by duplicated node */ -static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, +static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr, struct tipc_media_addr *media_addr) { char node_addr_str[16]; @@ -111,7 +111,7 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str), media_addr); pr_warn("Duplicate %s using %s seen on <%s>\n", node_addr_str, - media_addr_str, b_ptr->name); + media_addr_str, b->name); } /** @@ -261,13 +261,13 @@ exit: /** * tipc_disc_create - create object to send periodic link setup requests * @net: the applicable net namespace - * @b_ptr: ptr to bearer issuing requests + * @b: ptr to bearer issuing requests * @dest: destination address for request messages * @dest_domain: network domain to which links can be established * * Returns 0 if successful, otherwise -errno. */ -int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, +int tipc_disc_create(struct net *net, struct tipc_bearer *b, struct tipc_media_addr *dest) { struct tipc_link_req *req; @@ -282,17 +282,17 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, return -ENOMEM; } - tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b); memcpy(&req->dest, dest, sizeof(*dest)); req->net = net; - req->bearer_id = b_ptr->identity; - req->domain = b_ptr->domain; + req->bearer_id = b->identity; + req->domain = b->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; spin_lock_init(&req->lock); setup_timer(&req->timer, disc_timeout, (unsigned long)req); mod_timer(&req->timer, jiffies + req->timer_intv); - b_ptr->link_req = req; + b->link_req = req; skb = skb_clone(req->buf, GFP_ATOMIC); if (skb) tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest); @@ -313,19 +313,19 @@ void tipc_disc_delete(struct tipc_link_req *req) /** * tipc_disc_reset - reset object to send periodic link setup requests * @net: the applicable net namespace - * @b_ptr: ptr to bearer issuing requests + * @b: ptr to bearer issuing requests * @dest_domain: network domain to which links can be established */ -void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr) +void tipc_disc_reset(struct net *net, struct tipc_bearer *b) { - struct tipc_link_req *req = b_ptr->link_req; + struct tipc_link_req *req = b->link_req; struct sk_buff *skb; spin_lock_bh(&req->lock); - tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b); req->net = net; - req->bearer_id = b_ptr->identity; - req->domain = b_ptr->domain; + req->bearer_id = b->identity; + req->domain = b->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; mod_timer(&req->timer, jiffies + req->timer_intv); diff --git a/net/tipc/link.c b/net/tipc/link.c index 9efbdbde2b08..b11afe71dfc1 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -45,28 +45,156 @@ #include <linux/pkt_sched.h> +struct tipc_stats { + u32 sent_info; /* used in counting # sent packets */ + u32 recv_info; /* used in counting # recv'd packets */ + u32 sent_states; + u32 recv_states; + u32 sent_probes; + u32 recv_probes; + u32 sent_nacks; + u32 recv_nacks; + u32 sent_acks; + u32 sent_bundled; + u32 sent_bundles; + u32 recv_bundled; + u32 recv_bundles; + u32 retransmitted; + u32 sent_fragmented; + u32 sent_fragments; + u32 recv_fragmented; + u32 recv_fragments; + u32 link_congs; /* # port sends blocked by congestion */ + u32 deferred_recv; + u32 duplicates; + u32 max_queue_sz; /* send queue size high water mark */ + u32 accu_queue_sz; /* used for send queue size profiling */ + u32 queue_sz_counts; /* used for send queue size profiling */ + u32 msg_length_counts; /* used for message length profiling */ + u32 msg_lengths_total; /* used for message length profiling */ + u32 msg_length_profile[7]; /* used for msg. length profiling */ +}; + +/** + * struct tipc_link - TIPC link data structure + * @addr: network address of link's peer node + * @name: link name character string + * @media_addr: media address to use when sending messages over link + * @timer: link timer + * @net: pointer to namespace struct + * @refcnt: reference counter for permanent references (owner node & timer) + * @peer_session: link session # being used by peer end of link + * @peer_bearer_id: bearer id used by link's peer endpoint + * @bearer_id: local bearer id used by link + * @tolerance: minimum link continuity loss needed to reset link [in ms] + * @keepalive_intv: link keepalive timer interval + * @abort_limit: # of unacknowledged continuity probes needed to reset link + * @state: current state of link FSM + * @peer_caps: bitmap describing capabilities of peer node + * @silent_intv_cnt: # of timer intervals without any reception from peer + * @proto_msg: template for control messages generated by link + * @pmsg: convenience pointer to "proto_msg" field + * @priority: current link priority + * @net_plane: current link network plane ('A' through 'H') + * @backlog_limit: backlog queue congestion thresholds (indexed by importance) + * @exp_msg_count: # of tunnelled messages expected during link changeover + * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset + * @mtu: current maximum packet size for this link + * @advertised_mtu: advertised own mtu when link is being established + * @transmitq: queue for sent, non-acked messages + * @backlogq: queue for messages waiting to be sent + * @snt_nxt: next sequence number to use for outbound messages + * @last_retransmitted: sequence number of most recently retransmitted message + * @stale_count: # of identical retransmit requests made by peer + * @ackers: # of peers that needs to ack each packet before it can be released + * @acked: # last packet acked by a certain peer. Used for broadcast. + * @rcv_nxt: next sequence number to expect for inbound messages + * @deferred_queue: deferred queue saved OOS b'cast message received from node + * @unacked_window: # of inbound messages rx'd without ack'ing back to peer + * @inputq: buffer queue for messages to be delivered upwards + * @namedq: buffer queue for name table messages to be delivered upwards + * @next_out: ptr to first unsent outbound message in queue + * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate + * @long_msg_seq_no: next identifier to use for outbound fragmented messages + * @reasm_buf: head of partially reassembled inbound message fragments + * @bc_rcvr: marks that this is a broadcast receiver link + * @stats: collects statistics regarding link activity + */ +struct tipc_link { + u32 addr; + char name[TIPC_MAX_LINK_NAME]; + struct tipc_media_addr *media_addr; + struct net *net; + + /* Management and link supervision data */ + u32 peer_session; + u32 peer_bearer_id; + u32 bearer_id; + u32 tolerance; + unsigned long keepalive_intv; + u32 abort_limit; + u32 state; + u16 peer_caps; + bool active; + u32 silent_intv_cnt; + struct { + unchar hdr[INT_H_SIZE]; + unchar body[TIPC_MAX_IF_NAME]; + } proto_msg; + struct tipc_msg *pmsg; + u32 priority; + char net_plane; + + /* Failover/synch */ + u16 drop_point; + struct sk_buff *failover_reasm_skb; + + /* Max packet negotiation */ + u16 mtu; + u16 advertised_mtu; + + /* Sending */ + struct sk_buff_head transmq; + struct sk_buff_head backlogq; + struct { + u16 len; + u16 limit; + } backlog[5]; + u16 snd_nxt; + u16 last_retransm; + u16 window; + u32 stale_count; + + /* Reception */ + u16 rcv_nxt; + u32 rcv_unacked; + struct sk_buff_head deferdq; + struct sk_buff_head *inputq; + struct sk_buff_head *namedq; + + /* Congestion handling */ + struct sk_buff_head wakeupq; + + /* Fragmentation/reassembly */ + struct sk_buff *reasm_buf; + + /* Broadcast */ + u16 ackers; + u16 acked; + struct tipc_link *bc_rcvlink; + struct tipc_link *bc_sndlink; + int nack_state; + bool bc_peer_is_up; + + /* Statistics */ + struct tipc_stats stats; +}; + /* * Error message prefixes */ static const char *link_co_err = "Link tunneling error, "; static const char *link_rst_msg = "Resetting link "; -static const char tipc_bclink_name[] = "broadcast-link"; - -static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { - [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, - [TIPC_NLA_LINK_NAME] = { - .type = NLA_STRING, - .len = TIPC_MAX_LINK_NAME - }, - [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, - [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, - [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG }, - [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG }, - [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED }, - [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED }, - [TIPC_NLA_LINK_RX] = { .type = NLA_U32 }, - [TIPC_NLA_LINK_TX] = { .type = NLA_U32 } -}; /* Properties valid for media, bearar and link */ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { @@ -117,8 +245,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq); -static void link_reset_statistics(struct tipc_link *l_ptr); -static void link_print(struct tipc_link *l_ptr, const char *str); +static void link_print(struct tipc_link *l, const char *str); static void tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static void tipc_link_build_bc_init_msg(struct tipc_link *l, @@ -183,6 +310,36 @@ void tipc_link_set_active(struct tipc_link *l, bool active) l->active = active; } +u32 tipc_link_id(struct tipc_link *l) +{ + return l->peer_bearer_id << 16 | l->bearer_id; +} + +int tipc_link_window(struct tipc_link *l) +{ + return l->window; +} + +int tipc_link_prio(struct tipc_link *l) +{ + return l->priority; +} + +unsigned long tipc_link_tolerance(struct tipc_link *l) +{ + return l->tolerance; +} + +struct sk_buff_head *tipc_link_inputq(struct tipc_link *l) +{ + return l->inputq; +} + +char tipc_link_plane(struct tipc_link *l) +{ + return l->net_plane; +} + void tipc_link_add_bc_peer(struct tipc_link *snd_l, struct tipc_link *uc_l, struct sk_buff_head *xmitq) @@ -225,11 +382,31 @@ int tipc_link_mtu(struct tipc_link *l) return l->mtu; } +u16 tipc_link_rcv_nxt(struct tipc_link *l) +{ + return l->rcv_nxt; +} + +u16 tipc_link_acked(struct tipc_link *l) +{ + return l->acked; +} + +char *tipc_link_name(struct tipc_link *l) +{ + return l->name; +} + static u32 link_own_addr(struct tipc_link *l) { return msg_prevnode(l->pmsg); } +void tipc_link_reinit(struct tipc_link *l, u32 addr) +{ + msg_set_prevnode(l->pmsg, addr); +} + /** * tipc_link_create - create a new link * @n: pointer to associated node @@ -692,7 +869,7 @@ void tipc_link_reset(struct tipc_link *l) l->stats.recv_info = 0; l->stale_count = 0; l->bc_peer_is_up = false; - link_reset_statistics(l); + tipc_link_reset_stats(l); } /** @@ -1085,8 +1262,9 @@ drop: /* * Send protocol message to the other endpoint. */ -void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg, - u32 gap, u32 tolerance, u32 priority) +static void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, + int probe_msg, u32 gap, u32 tolerance, + u32 priority) { struct sk_buff *skb = NULL; struct sk_buff_head xmitq; @@ -1260,6 +1438,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, /* fall thru' */ case ACTIVATE_MSG: + skb_linearize(skb); + hdr = buf_msg(skb); /* Complete own link name with peer's interface name */ if_name = strrchr(l->name, ':') + 1; @@ -1525,53 +1705,17 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } -/* tipc_link_find_owner - locate owner node of link by link's name - * @net: the applicable net namespace - * @name: pointer to link name string - * @bearer_id: pointer to index in 'node->links' array where the link was found. - * - * Returns pointer to node owning the link, or 0 if no matching link is found. - */ -static struct tipc_node *tipc_link_find_owner(struct net *net, - const char *link_name, - unsigned int *bearer_id) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; - struct tipc_node *found_node = NULL; - int i; - - *bearer_id = 0; - rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { - tipc_node_lock(n_ptr); - for (i = 0; i < MAX_BEARERS; i++) { - l_ptr = n_ptr->links[i].link; - if (l_ptr && !strcmp(l_ptr->name, link_name)) { - *bearer_id = i; - found_node = n_ptr; - break; - } - } - tipc_node_unlock(n_ptr); - if (found_node) - break; - } - rcu_read_unlock(); - - return found_node; -} - /** - * link_reset_statistics - reset link statistics - * @l_ptr: pointer to link + * link_reset_stats - reset link statistics + * @l: pointer to link */ -static void link_reset_statistics(struct tipc_link *l_ptr) +void tipc_link_reset_stats(struct tipc_link *l) { - memset(&l_ptr->stats, 0, sizeof(l_ptr->stats)); - l_ptr->stats.sent_info = l_ptr->snd_nxt; - l_ptr->stats.recv_info = l_ptr->rcv_nxt; + memset(&l->stats, 0, sizeof(l->stats)); + if (!link_is_bc_sndlink(l)) { + l->stats.sent_info = l->snd_nxt; + l->stats.recv_info = l->rcv_nxt; + } } static void link_print(struct tipc_link *l, const char *str) @@ -1624,84 +1768,6 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) return 0; } -int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) -{ - int err; - int res = 0; - int bearer_id; - char *name; - struct tipc_link *link; - struct tipc_node *node; - struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; - struct net *net = sock_net(skb->sk); - - if (!info->attrs[TIPC_NLA_LINK]) - return -EINVAL; - - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy); - if (err) - return err; - - if (!attrs[TIPC_NLA_LINK_NAME]) - return -EINVAL; - - name = nla_data(attrs[TIPC_NLA_LINK_NAME]); - - if (strcmp(name, tipc_bclink_name) == 0) - return tipc_nl_bc_link_set(net, attrs); - - node = tipc_link_find_owner(net, name, &bearer_id); - if (!node) - return -EINVAL; - - tipc_node_lock(node); - - link = node->links[bearer_id].link; - if (!link) { - res = -EINVAL; - goto out; - } - - if (attrs[TIPC_NLA_LINK_PROP]) { - struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; - - err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], - props); - if (err) { - res = err; - goto out; - } - - if (props[TIPC_NLA_PROP_TOL]) { - u32 tol; - - tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); - link->tolerance = tol; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0); - } - if (props[TIPC_NLA_PROP_PRIO]) { - u32 prio; - - prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); - link->priority = prio; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio); - } - if (props[TIPC_NLA_PROP_WIN]) { - u32 win; - - win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); - tipc_link_set_queue_limits(link, win); - } - } - -out: - tipc_node_unlock(node); - - return res; -} - static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) { int i; @@ -1768,8 +1834,8 @@ msg_full: } /* Caller should hold appropriate locks to protect the link */ -static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, - struct tipc_link *link, int nlflags) +int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link, int nlflags) { int err; void *hdr; @@ -1838,198 +1904,134 @@ msg_full: return -EMSGSIZE; } -/* Caller should hold node lock */ -static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, - struct tipc_node *node, u32 *prev_link) +static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, + struct tipc_stats *stats) { - u32 i; - int err; - - for (i = *prev_link; i < MAX_BEARERS; i++) { - *prev_link = i; - - if (!node->links[i].link) - continue; + int i; + struct nlattr *nest; - err = __tipc_nl_add_link(net, msg, - node->links[i].link, NLM_F_MULTI); - if (err) - return err; - } - *prev_link = 0; + struct nla_map { + __u32 key; + __u32 val; + }; - return 0; -} + struct nla_map map[] = { + {TIPC_NLA_STATS_RX_INFO, stats->recv_info}, + {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments}, + {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented}, + {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles}, + {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled}, + {TIPC_NLA_STATS_TX_INFO, stats->sent_info}, + {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments}, + {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented}, + {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles}, + {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled}, + {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks}, + {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv}, + {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks}, + {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks}, + {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted}, + {TIPC_NLA_STATS_DUPLICATES, stats->duplicates}, + {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs}, + {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz}, + {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ? + (stats->accu_queue_sz / stats->queue_sz_counts) : 0} + }; -int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net *net = sock_net(skb->sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_node *node; - struct tipc_nl_msg msg; - u32 prev_node = cb->args[0]; - u32 prev_link = cb->args[1]; - int done = cb->args[2]; - int err; + nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + if (!nest) + return -EMSGSIZE; - if (done) - return 0; + for (i = 0; i < ARRAY_SIZE(map); i++) + if (nla_put_u32(skb, map[i].key, map[i].val)) + goto msg_full; - msg.skb = skb; - msg.portid = NETLINK_CB(cb->skb).portid; - msg.seq = cb->nlh->nlmsg_seq; - - rcu_read_lock(); - if (prev_node) { - node = tipc_node_find(net, prev_node); - if (!node) { - /* We never set seq or call nl_dump_check_consistent() - * this means that setting prev_seq here will cause the - * consistence check to fail in the netlink callback - * handler. Resulting in the last NLMSG_DONE message - * having the NLM_F_DUMP_INTR flag set. - */ - cb->prev_seq = 1; - goto out; - } - tipc_node_put(node); - - list_for_each_entry_continue_rcu(node, &tn->node_list, - list) { - tipc_node_lock(node); - err = __tipc_nl_add_node_links(net, &msg, node, - &prev_link); - tipc_node_unlock(node); - if (err) - goto out; - - prev_node = node->addr; - } - } else { - err = tipc_nl_add_bc_link(net, &msg); - if (err) - goto out; - - list_for_each_entry_rcu(node, &tn->node_list, list) { - tipc_node_lock(node); - err = __tipc_nl_add_node_links(net, &msg, node, - &prev_link); - tipc_node_unlock(node); - if (err) - goto out; - - prev_node = node->addr; - } - } - done = 1; -out: - rcu_read_unlock(); + nla_nest_end(skb, nest); - cb->args[0] = prev_node; - cb->args[1] = prev_link; - cb->args[2] = done; + return 0; +msg_full: + nla_nest_cancel(skb, nest); - return skb->len; + return -EMSGSIZE; } -int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) { - struct net *net = genl_info_net(info); - struct tipc_nl_msg msg; - char *name; int err; + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; - msg.portid = info->snd_portid; - msg.seq = info->snd_seq; - - if (!info->attrs[TIPC_NLA_LINK_NAME]) - return -EINVAL; - name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); + if (!bcl) + return 0; - msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); - if (!msg.skb) - return -ENOMEM; + tipc_bcast_lock(net); - if (strcmp(name, tipc_bclink_name) == 0) { - err = tipc_nl_add_bc_link(net, &msg); - if (err) { - nlmsg_free(msg.skb); - return err; - } - } else { - int bearer_id; - struct tipc_node *node; - struct tipc_link *link; + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_LINK_GET); + if (!hdr) + return -EMSGSIZE; - node = tipc_link_find_owner(net, name, &bearer_id); - if (!node) - return -EINVAL; + attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + if (!attrs) + goto msg_full; - tipc_node_lock(node); - link = node->links[bearer_id].link; - if (!link) { - tipc_node_unlock(node); - nlmsg_free(msg.skb); - return -EINVAL; - } + /* The broadcast link is always up */ + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) + goto attr_msg_full; - err = __tipc_nl_add_link(net, &msg, link, 0); - tipc_node_unlock(node); - if (err) { - nlmsg_free(msg.skb); - return err; - } - } + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST)) + goto attr_msg_full; + if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->rcv_nxt)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->snd_nxt)) + goto attr_msg_full; - return genlmsg_reply(msg.skb, info); -} + prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + if (!prop) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) + goto prop_msg_full; + nla_nest_end(msg->skb, prop); -int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) -{ - int err; - char *link_name; - unsigned int bearer_id; - struct tipc_link *link; - struct tipc_node *node; - struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; - struct net *net = sock_net(skb->sk); - - if (!info->attrs[TIPC_NLA_LINK]) - return -EINVAL; - - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy); + err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); if (err) - return err; - - if (!attrs[TIPC_NLA_LINK_NAME]) - return -EINVAL; - - link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + goto attr_msg_full; - if (strcmp(link_name, tipc_bclink_name) == 0) { - err = tipc_bclink_reset_stats(net); - if (err) - return err; - return 0; - } + tipc_bcast_unlock(net); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); - node = tipc_link_find_owner(net, link_name, &bearer_id); - if (!node) - return -EINVAL; + return 0; - tipc_node_lock(node); +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + tipc_bcast_unlock(net); + genlmsg_cancel(msg->skb, hdr); - link = node->links[bearer_id].link; - if (!link) { - tipc_node_unlock(node); - return -EINVAL; - } + return -EMSGSIZE; +} - link_reset_statistics(link); +void tipc_link_set_tolerance(struct tipc_link *l, u32 tol) +{ + l->tolerance = tol; + tipc_link_proto_xmit(l, STATE_MSG, 0, 0, tol, 0); +} - tipc_node_unlock(node); +void tipc_link_set_prio(struct tipc_link *l, u32 prio) +{ + l->priority = prio; + tipc_link_proto_xmit(l, STATE_MSG, 0, 0, 0, prio); +} - return 0; +void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) +{ + l->abort_limit = limit; } diff --git a/net/tipc/link.h b/net/tipc/link.h index 66d859b66c84..b2ae0f4276af 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -45,10 +45,6 @@ */ #define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ -/* Out-of-range value for link sequence numbers - */ -#define INVALID_LINK_SEQ 0x10000 - /* Link FSM events: */ enum { @@ -75,151 +71,6 @@ enum { */ #define MAX_PKT_DEFAULT 1500 -struct tipc_stats { - u32 sent_info; /* used in counting # sent packets */ - u32 recv_info; /* used in counting # recv'd packets */ - u32 sent_states; - u32 recv_states; - u32 sent_probes; - u32 recv_probes; - u32 sent_nacks; - u32 recv_nacks; - u32 sent_acks; - u32 sent_bundled; - u32 sent_bundles; - u32 recv_bundled; - u32 recv_bundles; - u32 retransmitted; - u32 sent_fragmented; - u32 sent_fragments; - u32 recv_fragmented; - u32 recv_fragments; - u32 link_congs; /* # port sends blocked by congestion */ - u32 deferred_recv; - u32 duplicates; - u32 max_queue_sz; /* send queue size high water mark */ - u32 accu_queue_sz; /* used for send queue size profiling */ - u32 queue_sz_counts; /* used for send queue size profiling */ - u32 msg_length_counts; /* used for message length profiling */ - u32 msg_lengths_total; /* used for message length profiling */ - u32 msg_length_profile[7]; /* used for msg. length profiling */ -}; - -/** - * struct tipc_link - TIPC link data structure - * @addr: network address of link's peer node - * @name: link name character string - * @media_addr: media address to use when sending messages over link - * @timer: link timer - * @net: pointer to namespace struct - * @refcnt: reference counter for permanent references (owner node & timer) - * @peer_session: link session # being used by peer end of link - * @peer_bearer_id: bearer id used by link's peer endpoint - * @bearer_id: local bearer id used by link - * @tolerance: minimum link continuity loss needed to reset link [in ms] - * @keepalive_intv: link keepalive timer interval - * @abort_limit: # of unacknowledged continuity probes needed to reset link - * @state: current state of link FSM - * @peer_caps: bitmap describing capabilities of peer node - * @silent_intv_cnt: # of timer intervals without any reception from peer - * @proto_msg: template for control messages generated by link - * @pmsg: convenience pointer to "proto_msg" field - * @priority: current link priority - * @net_plane: current link network plane ('A' through 'H') - * @backlog_limit: backlog queue congestion thresholds (indexed by importance) - * @exp_msg_count: # of tunnelled messages expected during link changeover - * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset - * @mtu: current maximum packet size for this link - * @advertised_mtu: advertised own mtu when link is being established - * @transmitq: queue for sent, non-acked messages - * @backlogq: queue for messages waiting to be sent - * @snt_nxt: next sequence number to use for outbound messages - * @last_retransmitted: sequence number of most recently retransmitted message - * @stale_count: # of identical retransmit requests made by peer - * @ackers: # of peers that needs to ack each packet before it can be released - * @acked: # last packet acked by a certain peer. Used for broadcast. - * @rcv_nxt: next sequence number to expect for inbound messages - * @deferred_queue: deferred queue saved OOS b'cast message received from node - * @unacked_window: # of inbound messages rx'd without ack'ing back to peer - * @inputq: buffer queue for messages to be delivered upwards - * @namedq: buffer queue for name table messages to be delivered upwards - * @next_out: ptr to first unsent outbound message in queue - * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate - * @long_msg_seq_no: next identifier to use for outbound fragmented messages - * @reasm_buf: head of partially reassembled inbound message fragments - * @bc_rcvr: marks that this is a broadcast receiver link - * @stats: collects statistics regarding link activity - */ -struct tipc_link { - u32 addr; - char name[TIPC_MAX_LINK_NAME]; - struct tipc_media_addr *media_addr; - struct net *net; - - /* Management and link supervision data */ - u32 peer_session; - u32 peer_bearer_id; - u32 bearer_id; - u32 tolerance; - unsigned long keepalive_intv; - u32 abort_limit; - u32 state; - u16 peer_caps; - bool active; - u32 silent_intv_cnt; - struct { - unchar hdr[INT_H_SIZE]; - unchar body[TIPC_MAX_IF_NAME]; - } proto_msg; - struct tipc_msg *pmsg; - u32 priority; - char net_plane; - - /* Failover/synch */ - u16 drop_point; - struct sk_buff *failover_reasm_skb; - - /* Max packet negotiation */ - u16 mtu; - u16 advertised_mtu; - - /* Sending */ - struct sk_buff_head transmq; - struct sk_buff_head backlogq; - struct { - u16 len; - u16 limit; - } backlog[5]; - u16 snd_nxt; - u16 last_retransm; - u16 window; - u32 stale_count; - - /* Reception */ - u16 rcv_nxt; - u32 rcv_unacked; - struct sk_buff_head deferdq; - struct sk_buff_head *inputq; - struct sk_buff_head *namedq; - - /* Congestion handling */ - struct sk_buff_head wakeupq; - - /* Fragmentation/reassembly */ - struct sk_buff *reasm_buf; - - /* Broadcast */ - u16 ackers; - u16 acked; - struct tipc_link *bc_rcvlink; - struct tipc_link *bc_sndlink; - int nack_state; - bool bc_peer_is_up; - - /* Statistics */ - struct tipc_stats stats; -}; - bool tipc_link_create(struct net *net, char *if_name, int bearer_id, int tolerance, char net_plane, u32 mtu, int priority, int window, u32 session, u32 ownnode, u32 peer, @@ -235,11 +86,11 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, struct sk_buff_head *namedq, struct tipc_link *bc_sndlink, struct tipc_link **link); +void tipc_link_reinit(struct tipc_link *l, u32 addr); void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_fsm_evt(struct tipc_link *l, int evt); -void tipc_link_reset_fragments(struct tipc_link *l_ptr); bool tipc_link_is_up(struct tipc_link *l); bool tipc_link_peer_is_down(struct tipc_link *l); bool tipc_link_is_reset(struct tipc_link *l); @@ -248,15 +99,25 @@ bool tipc_link_is_synching(struct tipc_link *l); bool tipc_link_is_failingover(struct tipc_link *l); bool tipc_link_is_blocked(struct tipc_link *l); void tipc_link_set_active(struct tipc_link *l, bool active); -void tipc_link_reset(struct tipc_link *l_ptr); -int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, +void tipc_link_reset(struct tipc_link *l); +void tipc_link_reset_stats(struct tipc_link *l); +int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, struct sk_buff_head *xmitq); +struct sk_buff_head *tipc_link_inputq(struct tipc_link *l); +u16 tipc_link_rcv_nxt(struct tipc_link *l); +u16 tipc_link_acked(struct tipc_link *l); +u32 tipc_link_id(struct tipc_link *l); +char *tipc_link_name(struct tipc_link *l); +char tipc_link_plane(struct tipc_link *l); +int tipc_link_prio(struct tipc_link *l); +int tipc_link_window(struct tipc_link *l); +unsigned long tipc_link_tolerance(struct tipc_link *l); +void tipc_link_set_tolerance(struct tipc_link *l, u32 tol); +void tipc_link_set_prio(struct tipc_link *l, u32 prio); +void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit); void tipc_link_set_queue_limits(struct tipc_link *l, u32 window); - -int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb); -int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info); -int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info); -int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link, int nlflags); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index c07612bab95c..ebe9d0ff6e9e 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -84,31 +84,6 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, return buf; } -void named_cluster_distribute(struct net *net, struct sk_buff *skb) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct sk_buff *oskb; - struct tipc_node *node; - u32 dnode; - - rcu_read_lock(); - list_for_each_entry_rcu(node, &tn->node_list, list) { - dnode = node->addr; - if (in_own_node(net, dnode)) - continue; - if (!tipc_node_is_up(node)) - continue; - oskb = pskb_copy(skb, GFP_ATOMIC); - if (!oskb) - break; - msg_set_destnode(buf_msg(oskb), dnode); - tipc_node_xmit_skb(net, oskb, dnode, 0); - } - rcu_read_unlock(); - - kfree_skb(skb); -} - /** * tipc_named_publish - tell other nodes about a new publication by this node */ @@ -226,42 +201,6 @@ void tipc_named_node_up(struct net *net, u32 dnode) tipc_node_xmit(net, &head, dnode, 0); } -static void tipc_publ_subscribe(struct net *net, struct publication *publ, - u32 addr) -{ - struct tipc_node *node; - - if (in_own_node(net, addr)) - return; - - node = tipc_node_find(net, addr); - if (!node) { - pr_warn("Node subscription rejected, unknown node 0x%x\n", - addr); - return; - } - - tipc_node_lock(node); - list_add_tail(&publ->nodesub_list, &node->publ_list); - tipc_node_unlock(node); - tipc_node_put(node); -} - -static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, - u32 addr) -{ - struct tipc_node *node; - - node = tipc_node_find(net, addr); - if (!node) - return; - - tipc_node_lock(node); - list_del_init(&publ->nodesub_list); - tipc_node_unlock(node); - tipc_node_put(node); -} - /** * tipc_publ_purge - remove publication associated with a failed node * @@ -277,7 +216,7 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, publ->node, publ->ref, publ->key); if (p) - tipc_publ_unsubscribe(net, p, addr); + tipc_node_unsubscribe(net, &p->nodesub_list, addr); spin_unlock_bh(&tn->nametbl_lock); if (p != publ) { @@ -317,7 +256,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, TIPC_CLUSTER_SCOPE, node, ntohl(i->ref), ntohl(i->key)); if (publ) { - tipc_publ_subscribe(net, publ, node); + tipc_node_subscribe(net, &publ->nodesub_list, node); return true; } } else if (dtype == WITHDRAWAL) { @@ -326,7 +265,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, node, ntohl(i->ref), ntohl(i->key)); if (publ) { - tipc_publ_unsubscribe(net, publ, node); + tipc_node_unsubscribe(net, &publ->nodesub_list, node); kfree_rcu(publ, rcu); return true; } @@ -397,6 +336,7 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) spin_lock_bh(&tn->nametbl_lock); for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) { + skb_linearize(skb); msg = buf_msg(skb); mtype = msg_type(msg); item = (struct distr_item *)msg_data(msg); diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index dd2d9fd80da2..1264ba0af937 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -69,7 +69,6 @@ struct distr_item { struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ); struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ); -void named_cluster_distribute(struct net *net, struct sk_buff *buf); void tipc_named_node_up(struct net *net, u32 dnode); void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue); void tipc_named_reinit(struct net *net); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 0f47f08bf38f..91fce70291a8 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -42,6 +42,7 @@ #include "subscr.h" #include "bcast.h" #include "addr.h" +#include "node.h" #include <net/genetlink.h> #define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ @@ -677,7 +678,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, spin_unlock_bh(&tn->nametbl_lock); if (buf) - named_cluster_distribute(net, buf); + tipc_node_broadcast(net, buf); return publ; } @@ -709,7 +710,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, spin_unlock_bh(&tn->nametbl_lock); if (skb) { - named_cluster_distribute(net, skb); + tipc_node_broadcast(net, skb); return 1; } return 0; diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 7f6475efc984..8975b0135b76 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -101,18 +101,18 @@ static const struct genl_ops tipc_genl_v2_ops[] = { }, { .cmd = TIPC_NL_LINK_GET, - .doit = tipc_nl_link_get, - .dumpit = tipc_nl_link_dump, + .doit = tipc_nl_node_get_link, + .dumpit = tipc_nl_node_dump_link, .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_SET, - .doit = tipc_nl_link_set, + .doit = tipc_nl_node_set_link, .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_RESET_STATS, - .doit = tipc_nl_link_reset_stats, + .doit = tipc_nl_node_reset_link_stats, .policy = tipc_nl_policy, }, { diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 1eadc95e1132..2c016fdefe97 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1023,25 +1023,25 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg) msg->req_type = TIPC_TLV_LINK_NAME; msg->rep_size = ULTRA_STRING_MAX_LEN; msg->rep_type = TIPC_TLV_ULTRA_STRING; - dump.dumpit = tipc_nl_link_dump; + dump.dumpit = tipc_nl_node_dump_link; dump.format = tipc_nl_compat_link_stat_dump; return tipc_nl_compat_dumpit(&dump, msg); case TIPC_CMD_GET_LINKS: msg->req_type = TIPC_TLV_NET_ADDR; msg->rep_size = ULTRA_STRING_MAX_LEN; - dump.dumpit = tipc_nl_link_dump; + dump.dumpit = tipc_nl_node_dump_link; dump.format = tipc_nl_compat_link_dump; return tipc_nl_compat_dumpit(&dump, msg); case TIPC_CMD_SET_LINK_TOL: case TIPC_CMD_SET_LINK_PRI: case TIPC_CMD_SET_LINK_WINDOW: msg->req_type = TIPC_TLV_LINK_CONFIG; - doit.doit = tipc_nl_link_set; + doit.doit = tipc_nl_node_set_link; doit.transcode = tipc_nl_compat_link_set; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_RESET_LINK_STATS: msg->req_type = TIPC_TLV_LINK_NAME; - doit.doit = tipc_nl_link_reset_stats; + doit.doit = tipc_nl_node_reset_link_stats; doit.transcode = tipc_nl_compat_link_reset_stats; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_SHOW_NAME_TABLE: diff --git a/net/tipc/node.c b/net/tipc/node.c index 20cddec0a43c..3f7a4ed71990 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -42,6 +42,84 @@ #include "bcast.h" #include "discover.h" +#define INVALID_NODE_SIG 0x10000 + +/* Flags used to take different actions according to flag type + * TIPC_NOTIFY_NODE_DOWN: notify node is down + * TIPC_NOTIFY_NODE_UP: notify node is up + * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type + */ +enum { + TIPC_NOTIFY_NODE_DOWN = (1 << 3), + TIPC_NOTIFY_NODE_UP = (1 << 4), + TIPC_NOTIFY_LINK_UP = (1 << 6), + TIPC_NOTIFY_LINK_DOWN = (1 << 7) +}; + +struct tipc_link_entry { + struct tipc_link *link; + spinlock_t lock; /* per link */ + u32 mtu; + struct sk_buff_head inputq; + struct tipc_media_addr maddr; +}; + +struct tipc_bclink_entry { + struct tipc_link *link; + struct sk_buff_head inputq1; + struct sk_buff_head arrvq; + struct sk_buff_head inputq2; + struct sk_buff_head namedq; +}; + +/** + * struct tipc_node - TIPC node structure + * @addr: network address of node + * @ref: reference counter to node object + * @lock: rwlock governing access to structure + * @net: the applicable net namespace + * @hash: links to adjacent nodes in unsorted hash chain + * @inputq: pointer to input queue containing messages for msg event + * @namedq: pointer to name table input queue with name table messages + * @active_links: bearer ids of active links, used as index into links[] array + * @links: array containing references to all links to node + * @action_flags: bit mask of different types of node actions + * @state: connectivity state vs peer node + * @sync_point: sequence number where synch/failover is finished + * @list: links to adjacent nodes in sorted list of cluster's nodes + * @working_links: number of working links to node (both active and standby) + * @link_cnt: number of links to node + * @capabilities: bitmap, indicating peer node's functional capabilities + * @signature: node instance identifier + * @link_id: local and remote bearer ids of changing link, if any + * @publ_list: list of publications + * @rcu: rcu struct for tipc_node + */ +struct tipc_node { + u32 addr; + struct kref kref; + rwlock_t lock; + struct net *net; + struct hlist_node hash; + int active_links[2]; + struct tipc_link_entry links[MAX_BEARERS]; + struct tipc_bclink_entry bc_entry; + int action_flags; + struct list_head list; + int state; + u16 sync_point; + int link_cnt; + u16 working_links; + u16 capabilities; + u32 signature; + u32 link_id; + struct list_head publ_list; + struct list_head conn_sks; + unsigned long keepalive_intv; + struct timer_list timer; + struct rcu_head rcu; +}; + /* Node FSM states and events: */ enum { @@ -75,6 +153,9 @@ static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); static void tipc_node_delete(struct tipc_node *node); static void tipc_node_timeout(unsigned long data); static void tipc_node_fsm_evt(struct tipc_node *n, int evt); +static struct tipc_node *tipc_node_find(struct net *net, u32 addr); +static void tipc_node_put(struct tipc_node *node); +static bool tipc_node_is_up(struct tipc_node *n); struct tipc_sock_conn { u32 port; @@ -83,12 +164,54 @@ struct tipc_sock_conn { struct list_head list; }; +static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { + [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_LINK_NAME] = { + .type = NLA_STRING, + .len = TIPC_MAX_LINK_NAME + }, + [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_RX] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_TX] = { .type = NLA_U32 } +}; + static const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = { [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 }, [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG } }; +static struct tipc_link *node_active_link(struct tipc_node *n, int sel) +{ + int bearer_id = n->active_links[sel & 1]; + + if (unlikely(bearer_id == INVALID_BEARER_ID)) + return NULL; + + return n->links[bearer_id].link; +} + +int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) +{ + struct tipc_node *n; + int bearer_id; + unsigned int mtu = MAX_MSG_SIZE; + + n = tipc_node_find(net, addr); + if (unlikely(!n)) + return mtu; + + bearer_id = n->active_links[sel & 1]; + if (likely(bearer_id != INVALID_BEARER_ID)) + mtu = n->links[bearer_id].mtu; + tipc_node_put(n); + return mtu; +} /* * A trivial power-of-two bitmask technique is used for speed, since this * operation is done for every incoming TIPC packet. The number of hash table @@ -107,7 +230,7 @@ static void tipc_node_kref_release(struct kref *kref) tipc_node_delete(node); } -void tipc_node_put(struct tipc_node *node) +static void tipc_node_put(struct tipc_node *node) { kref_put(&node->kref, tipc_node_kref_release); } @@ -120,7 +243,7 @@ static void tipc_node_get(struct tipc_node *node) /* * tipc_node_find - locate specified node object, if it exists */ -struct tipc_node *tipc_node_find(struct net *net, u32 addr) +static struct tipc_node *tipc_node_find(struct net *net, u32 addr) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; @@ -141,66 +264,122 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr) return NULL; } +static void tipc_node_read_lock(struct tipc_node *n) +{ + read_lock_bh(&n->lock); +} + +static void tipc_node_read_unlock(struct tipc_node *n) +{ + read_unlock_bh(&n->lock); +} + +static void tipc_node_write_lock(struct tipc_node *n) +{ + write_lock_bh(&n->lock); +} + +static void tipc_node_write_unlock(struct tipc_node *n) +{ + struct net *net = n->net; + u32 addr = 0; + u32 flags = n->action_flags; + u32 link_id = 0; + struct list_head *publ_list; + + if (likely(!flags)) { + write_unlock_bh(&n->lock); + return; + } + + addr = n->addr; + link_id = n->link_id; + publ_list = &n->publ_list; + + n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | + TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP); + + write_unlock_bh(&n->lock); + + if (flags & TIPC_NOTIFY_NODE_DOWN) + tipc_publ_notify(net, publ_list, addr); + + if (flags & TIPC_NOTIFY_NODE_UP) + tipc_named_node_up(net, addr); + + if (flags & TIPC_NOTIFY_LINK_UP) + tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, + TIPC_NODE_SCOPE, link_id, addr); + + if (flags & TIPC_NOTIFY_LINK_DOWN) + tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, + link_id, addr); +} + struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_node *n_ptr, *temp_node; + struct tipc_node *n, *temp_node; + int i; spin_lock_bh(&tn->node_list_lock); - n_ptr = tipc_node_find(net, addr); - if (n_ptr) + n = tipc_node_find(net, addr); + if (n) goto exit; - n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC); - if (!n_ptr) { + n = kzalloc(sizeof(*n), GFP_ATOMIC); + if (!n) { pr_warn("Node creation failed, no memory\n"); goto exit; } - n_ptr->addr = addr; - n_ptr->net = net; - n_ptr->capabilities = capabilities; - kref_init(&n_ptr->kref); - spin_lock_init(&n_ptr->lock); - INIT_HLIST_NODE(&n_ptr->hash); - INIT_LIST_HEAD(&n_ptr->list); - INIT_LIST_HEAD(&n_ptr->publ_list); - INIT_LIST_HEAD(&n_ptr->conn_sks); - skb_queue_head_init(&n_ptr->bc_entry.namedq); - skb_queue_head_init(&n_ptr->bc_entry.inputq1); - __skb_queue_head_init(&n_ptr->bc_entry.arrvq); - skb_queue_head_init(&n_ptr->bc_entry.inputq2); - hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); + n->addr = addr; + n->net = net; + n->capabilities = capabilities; + kref_init(&n->kref); + rwlock_init(&n->lock); + INIT_HLIST_NODE(&n->hash); + INIT_LIST_HEAD(&n->list); + INIT_LIST_HEAD(&n->publ_list); + INIT_LIST_HEAD(&n->conn_sks); + skb_queue_head_init(&n->bc_entry.namedq); + skb_queue_head_init(&n->bc_entry.inputq1); + __skb_queue_head_init(&n->bc_entry.arrvq); + skb_queue_head_init(&n->bc_entry.inputq2); + for (i = 0; i < MAX_BEARERS; i++) + spin_lock_init(&n->links[i].lock); + hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { - if (n_ptr->addr < temp_node->addr) + if (n->addr < temp_node->addr) break; } - list_add_tail_rcu(&n_ptr->list, &temp_node->list); - n_ptr->state = SELF_DOWN_PEER_LEAVING; - n_ptr->signature = INVALID_NODE_SIG; - n_ptr->active_links[0] = INVALID_BEARER_ID; - n_ptr->active_links[1] = INVALID_BEARER_ID; - if (!tipc_link_bc_create(net, tipc_own_addr(net), n_ptr->addr, - U16_MAX, tipc_bc_sndlink(net)->window, - n_ptr->capabilities, - &n_ptr->bc_entry.inputq1, - &n_ptr->bc_entry.namedq, + list_add_tail_rcu(&n->list, &temp_node->list); + n->state = SELF_DOWN_PEER_LEAVING; + n->signature = INVALID_NODE_SIG; + n->active_links[0] = INVALID_BEARER_ID; + n->active_links[1] = INVALID_BEARER_ID; + if (!tipc_link_bc_create(net, tipc_own_addr(net), n->addr, + U16_MAX, + tipc_link_window(tipc_bc_sndlink(net)), + n->capabilities, + &n->bc_entry.inputq1, + &n->bc_entry.namedq, tipc_bc_sndlink(net), - &n_ptr->bc_entry.link)) { + &n->bc_entry.link)) { pr_warn("Broadcast rcv link creation failed, no memory\n"); - kfree(n_ptr); - n_ptr = NULL; + kfree(n); + n = NULL; goto exit; } - tipc_node_get(n_ptr); - setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr); - n_ptr->keepalive_intv = U32_MAX; + tipc_node_get(n); + setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n); + n->keepalive_intv = U32_MAX; exit: spin_unlock_bh(&tn->node_list_lock); - return n_ptr; + return n; } static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) { - unsigned long tol = l->tolerance; + unsigned long tol = tipc_link_tolerance(l); unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4; unsigned long keepalive_intv = msecs_to_jiffies(intv); @@ -209,7 +388,7 @@ static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) n->keepalive_intv = keepalive_intv; /* Ensure link's abort limit corresponds to current interval */ - l->abort_limit = l->tolerance / jiffies_to_msecs(n->keepalive_intv); + tipc_link_set_abort_limit(l, tol / jiffies_to_msecs(n->keepalive_intv)); } static void tipc_node_delete(struct tipc_node *node) @@ -234,6 +413,42 @@ void tipc_node_stop(struct net *net) spin_unlock_bh(&tn->node_list_lock); } +void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr) +{ + struct tipc_node *n; + + if (in_own_node(net, addr)) + return; + + n = tipc_node_find(net, addr); + if (!n) { + pr_warn("Node subscribe rejected, unknown node 0x%x\n", addr); + return; + } + tipc_node_write_lock(n); + list_add_tail(subscr, &n->publ_list); + tipc_node_write_unlock(n); + tipc_node_put(n); +} + +void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr) +{ + struct tipc_node *n; + + if (in_own_node(net, addr)) + return; + + n = tipc_node_find(net, addr); + if (!n) { + pr_warn("Node unsubscribe rejected, unknown node 0x%x\n", addr); + return; + } + tipc_node_write_lock(n); + list_del_init(subscr); + tipc_node_write_unlock(n); + tipc_node_put(n); +} + int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) { struct tipc_node *node; @@ -257,9 +472,9 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) conn->port = port; conn->peer_port = peer_port; - tipc_node_lock(node); + tipc_node_write_lock(node); list_add_tail(&conn->list, &node->conn_sks); - tipc_node_unlock(node); + tipc_node_write_unlock(node); exit: tipc_node_put(node); return err; @@ -277,14 +492,14 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) if (!node) return; - tipc_node_lock(node); + tipc_node_write_lock(node); list_for_each_entry_safe(conn, safe, &node->conn_sks, list) { if (port != conn->port) continue; list_del(&conn->list); kfree(conn); } - tipc_node_unlock(node); + tipc_node_write_unlock(node); tipc_node_put(node); } @@ -301,14 +516,16 @@ static void tipc_node_timeout(unsigned long data) __skb_queue_head_init(&xmitq); for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { - tipc_node_lock(n); + tipc_node_read_lock(n); le = &n->links[bearer_id]; + spin_lock_bh(&le->lock); if (le->link) { /* Link tolerance may change asynchronously: */ tipc_node_calculate_timer(n, le->link); rc = tipc_link_timeout(le->link, &xmitq); } - tipc_node_unlock(n); + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(n); tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr); if (rc & TIPC_LINK_DOWN_EVT) tipc_node_link_down(n, bearer_id, false); @@ -340,16 +557,16 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, n->working_links++; n->action_flags |= TIPC_NOTIFY_LINK_UP; - n->link_id = nl->peer_bearer_id << 16 | bearer_id; + n->link_id = tipc_link_id(nl); /* Leave room for tunnel header when returning 'mtu' to users: */ - n->links[bearer_id].mtu = nl->mtu - INT_H_SIZE; + n->links[bearer_id].mtu = tipc_link_mtu(nl) - INT_H_SIZE; tipc_bearer_add_dest(n->net, bearer_id, n->addr); tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id); pr_debug("Established link <%s> on network plane %c\n", - nl->name, nl->net_plane); + tipc_link_name(nl), tipc_link_plane(nl)); /* First link? => give it both slots */ if (!ol) { @@ -362,17 +579,17 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, } /* Second link => redistribute slots */ - if (nl->priority > ol->priority) { - pr_debug("Old link <%s> becomes standby\n", ol->name); + if (tipc_link_prio(nl) > tipc_link_prio(ol)) { + pr_debug("Old link <%s> becomes standby\n", tipc_link_name(ol)); *slot0 = bearer_id; *slot1 = bearer_id; tipc_link_set_active(nl, true); tipc_link_set_active(ol, false); - } else if (nl->priority == ol->priority) { + } else if (tipc_link_prio(nl) == tipc_link_prio(ol)) { tipc_link_set_active(nl, true); *slot1 = bearer_id; } else { - pr_debug("New link <%s> is standby\n", nl->name); + pr_debug("New link <%s> is standby\n", tipc_link_name(nl)); } /* Prepare synchronization with first link */ @@ -387,9 +604,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, static void tipc_node_link_up(struct tipc_node *n, int bearer_id, struct sk_buff_head *xmitq) { - tipc_node_lock(n); + tipc_node_write_lock(n); __tipc_node_link_up(n, bearer_id, xmitq); - tipc_node_unlock(n); + tipc_node_write_unlock(n); } /** @@ -402,7 +619,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, struct tipc_link_entry *le = &n->links[*bearer_id]; int *slot0 = &n->active_links[0]; int *slot1 = &n->active_links[1]; - int i, highest = 0; + int i, highest = 0, prio; struct tipc_link *l, *_l, *tnl; l = n->links[*bearer_id].link; @@ -411,12 +628,12 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, n->working_links--; n->action_flags |= TIPC_NOTIFY_LINK_DOWN; - n->link_id = l->peer_bearer_id << 16 | *bearer_id; + n->link_id = tipc_link_id(l); tipc_bearer_remove_dest(n->net, *bearer_id, n->addr); pr_debug("Lost link <%s> on network plane %c\n", - l->name, l->net_plane); + tipc_link_name(l), tipc_link_plane(l)); /* Select new active link if any available */ *slot0 = INVALID_BEARER_ID; @@ -427,10 +644,11 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, continue; if (_l == l) continue; - if (_l->priority < highest) + prio = tipc_link_prio(_l); + if (prio < highest) continue; - if (_l->priority > highest) { - highest = _l->priority; + if (prio > highest) { + highest = prio; *slot0 = i; *slot1 = i; continue; @@ -453,17 +671,17 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); /* There is still a working link => initiate failover */ - tnl = node_active_link(n, 0); + *bearer_id = n->active_links[0]; + tnl = n->links[*bearer_id].link; tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); - n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1); + n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1); tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq); tipc_link_reset(l); tipc_link_fsm_evt(l, LINK_RESET_EVT); tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); - *maddr = &n->links[tnl->bearer_id].maddr; - *bearer_id = tnl->bearer_id; + *maddr = &n->links[*bearer_id].maddr; } static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) @@ -478,7 +696,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) __skb_queue_head_init(&xmitq); - tipc_node_lock(n); + tipc_node_write_lock(n); if (!tipc_link_is_establishing(l)) { __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); if (delete) { @@ -490,12 +708,12 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) /* Defuse pending tipc_node_link_up() */ tipc_link_fsm_evt(l, LINK_RESET_EVT); } - tipc_node_unlock(n); + tipc_node_write_unlock(n); tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); tipc_sk_rcv(n->net, &le->inputq); } -bool tipc_node_is_up(struct tipc_node *n) +static bool tipc_node_is_up(struct tipc_node *n) { return n->active_links[0] != INVALID_BEARER_ID; } @@ -523,7 +741,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, if (!n) return; - tipc_node_lock(n); + tipc_node_write_lock(n); le = &n->links[b->identity]; @@ -626,7 +844,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, } memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: - tipc_node_unlock(n); + tipc_node_write_unlock(n); if (reset && !tipc_link_is_reset(l)) tipc_node_link_down(n, b->identity, false); tipc_node_put(n); @@ -834,24 +1052,6 @@ illegal_evt: pr_err("Illegal node fsm evt %x in state %x\n", evt, state); } -bool tipc_node_filter_pkt(struct tipc_node *n, struct tipc_msg *hdr) -{ - int state = n->state; - - if (likely(state == SELF_UP_PEER_UP)) - return true; - - if (state == SELF_LEAVING_PEER_DOWN) - return false; - - if (state == SELF_DOWN_PEER_LEAVING) { - if (msg_peer_node_is_up(hdr)) - return false; - } - - return true; -} - static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq) { @@ -913,56 +1113,18 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, if (bearer_id >= MAX_BEARERS) goto exit; - tipc_node_lock(node); + tipc_node_read_lock(node); link = node->links[bearer_id].link; if (link) { - strncpy(linkname, link->name, len); + strncpy(linkname, tipc_link_name(link), len); err = 0; } exit: - tipc_node_unlock(node); + tipc_node_read_unlock(node); tipc_node_put(node); return err; } -void tipc_node_unlock(struct tipc_node *node) -{ - struct net *net = node->net; - u32 addr = 0; - u32 flags = node->action_flags; - u32 link_id = 0; - struct list_head *publ_list; - - if (likely(!flags)) { - spin_unlock_bh(&node->lock); - return; - } - - addr = node->addr; - link_id = node->link_id; - publ_list = &node->publ_list; - - node->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | - TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP); - - spin_unlock_bh(&node->lock); - - if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, addr); - - if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, addr); - - if (flags & TIPC_NOTIFY_LINK_UP) - tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, - TIPC_NODE_SCOPE, link_id, addr); - - if (flags & TIPC_NOTIFY_LINK_DOWN) - tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, - link_id, addr); - -} - /* Caller should hold node lock for the passed node */ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) { @@ -997,20 +1159,6 @@ msg_full: return -EMSGSIZE; } -static struct tipc_link *tipc_node_select_link(struct tipc_node *n, int sel, - int *bearer_id, - struct tipc_media_addr **maddr) -{ - int id = n->active_links[sel & 1]; - - if (unlikely(id < 0)) - return NULL; - - *bearer_id = id; - *maddr = &n->links[id].maddr; - return n->links[id].link; -} - /** * tipc_node_xmit() is the general link level function for message sending * @net: the applicable net namespace @@ -1023,34 +1171,38 @@ static struct tipc_link *tipc_node_select_link(struct tipc_node *n, int sel, int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector) { - struct tipc_link *l = NULL; + struct tipc_link_entry *le = NULL; struct tipc_node *n; struct sk_buff_head xmitq; - struct tipc_media_addr *maddr; - int bearer_id; + int bearer_id = -1; int rc = -EHOSTUNREACH; __skb_queue_head_init(&xmitq); n = tipc_node_find(net, dnode); if (likely(n)) { - tipc_node_lock(n); - l = tipc_node_select_link(n, selector, &bearer_id, &maddr); - if (likely(l)) - rc = tipc_link_xmit(l, list, &xmitq); - tipc_node_unlock(n); + tipc_node_read_lock(n); + bearer_id = n->active_links[selector & 1]; + if (bearer_id >= 0) { + le = &n->links[bearer_id]; + spin_lock_bh(&le->lock); + rc = tipc_link_xmit(le->link, list, &xmitq); + spin_unlock_bh(&le->lock); + } + tipc_node_read_unlock(n); + if (likely(!skb_queue_empty(&xmitq))) { + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + return 0; + } if (unlikely(rc == -ENOBUFS)) tipc_node_link_down(n, bearer_id, false); tipc_node_put(n); + return rc; } - if (likely(!rc)) { - tipc_bearer_xmit(net, bearer_id, &xmitq, maddr); - return 0; - } - if (likely(in_own_node(net, dnode))) { - tipc_sk_rcv(net, list); - return 0; - } - return rc; + + if (unlikely(!in_own_node(net, dnode))) + return rc; + tipc_sk_rcv(net, list); + return 0; } /* tipc_node_xmit_skb(): send single buffer to destination @@ -1075,6 +1227,30 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, return 0; } +void tipc_node_broadcast(struct net *net, struct sk_buff *skb) +{ + struct sk_buff *txskb; + struct tipc_node *n; + u32 dst; + + rcu_read_lock(); + list_for_each_entry_rcu(n, tipc_nodes(net), list) { + dst = n->addr; + if (in_own_node(net, dst)) + continue; + if (!tipc_node_is_up(n)) + continue; + txskb = pskb_copy(skb, GFP_ATOMIC); + if (!txskb) + break; + msg_set_destnode(buf_msg(txskb), dst); + tipc_node_xmit_skb(net, txskb, dst, 0); + } + rcu_read_unlock(); + + kfree_skb(skb); +} + /** * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node * @net: the applicable net namespace @@ -1116,9 +1292,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id /* Broadcast ACKs are sent on a unicast link */ if (rc & TIPC_LINK_SND_BC_ACK) { - tipc_node_lock(n); + tipc_node_read_lock(n); tipc_link_build_ack_msg(le->link, &xmitq); - tipc_node_unlock(n); + tipc_node_read_unlock(n); } if (!skb_queue_empty(&xmitq)) @@ -1151,30 +1327,30 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, u16 oseqno = msg_seqno(hdr); u16 iseqno = msg_seqno(msg_get_wrapped(hdr)); u16 exp_pkts = msg_msgcnt(hdr); - u16 rcv_nxt, syncpt, dlv_nxt; + u16 rcv_nxt, syncpt, dlv_nxt, inputq_len; int state = n->state; struct tipc_link *l, *tnl, *pl = NULL; struct tipc_media_addr *maddr; - int i, pb_id; + int pb_id; l = n->links[bearer_id].link; if (!l) return false; - rcv_nxt = l->rcv_nxt; + rcv_nxt = tipc_link_rcv_nxt(l); if (likely((state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) return true; /* Find parallel link, if any */ - for (i = 0; i < MAX_BEARERS; i++) { - if ((i != bearer_id) && n->links[i].link) { - pl = n->links[i].link; + for (pb_id = 0; pb_id < MAX_BEARERS; pb_id++) { + if ((pb_id != bearer_id) && n->links[pb_id].link) { + pl = n->links[pb_id].link; break; } } - /* Update node accesibility if applicable */ + /* Check and update node accesibility if applicable */ if (state == SELF_UP_PEER_COMING) { if (!tipc_link_is_up(l)) return true; @@ -1187,8 +1363,12 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, if (msg_peer_node_is_up(hdr)) return false; tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); + return true; } + if (state == SELF_LEAVING_PEER_DOWN) + return false; + /* Ignore duplicate packets */ if ((usr != LINK_PROTOCOL) && less(oseqno, rcv_nxt)) return true; @@ -1197,9 +1377,9 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) { syncpt = oseqno + exp_pkts - 1; if (pl && tipc_link_is_up(pl)) { - pb_id = pl->bearer_id; __tipc_node_link_down(n, &pb_id, xmitq, &maddr); - tipc_skb_queue_splice_tail_init(pl->inputq, l->inputq); + tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl), + tipc_link_inputq(l)); } /* If pkts arrive out of order, use lowest calculated syncpt */ if (less(syncpt, n->sync_point)) @@ -1232,19 +1412,18 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_BEGIN_EVT); } - if (less(syncpt, n->sync_point)) - n->sync_point = syncpt; } /* Open tunnel link when parallel link reaches synch point */ - if ((n->state == NODE_SYNCHING) && tipc_link_is_synching(l)) { + if (n->state == NODE_SYNCHING) { if (tipc_link_is_synching(l)) { tnl = l; } else { tnl = pl; pl = l; } - dlv_nxt = pl->rcv_nxt - mod(skb_queue_len(pl->inputq)); + inputq_len = skb_queue_len(tipc_link_inputq(pl)); + dlv_nxt = tipc_link_rcv_nxt(pl) - inputq_len; if (more(dlv_nxt, n->sync_point)) { tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); @@ -1304,22 +1483,32 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) /* Ensure broadcast reception is in synch with peer's send state */ if (unlikely(usr == LINK_PROTOCOL)) tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr); - else if (unlikely(n->bc_entry.link->acked != bc_ack)) + else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack); - tipc_node_lock(n); - - /* Is reception permitted at the moment ? */ - if (!tipc_node_filter_pkt(n, hdr)) - goto unlock; - - /* Check and if necessary update node state */ - if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) { - rc = tipc_link_rcv(le->link, skb, &xmitq); - skb = NULL; + /* Receive packet directly if conditions permit */ + tipc_node_read_lock(n); + if (likely((n->state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) { + spin_lock_bh(&le->lock); + if (le->link) { + rc = tipc_link_rcv(le->link, skb, &xmitq); + skb = NULL; + } + spin_unlock_bh(&le->lock); + } + tipc_node_read_unlock(n); + + /* Check/update node state before receiving */ + if (unlikely(skb)) { + tipc_node_write_lock(n); + if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) { + if (le->link) { + rc = tipc_link_rcv(le->link, skb, &xmitq); + skb = NULL; + } + } + tipc_node_write_unlock(n); } -unlock: - tipc_node_unlock(n); if (unlikely(rc & TIPC_LINK_UP_EVT)) tipc_node_link_up(n, bearer_id, &xmitq); @@ -1384,15 +1573,15 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; } - tipc_node_lock(node); + tipc_node_read_lock(node); err = __tipc_nl_add_node(&msg, node); if (err) { last_addr = node->addr; - tipc_node_unlock(node); + tipc_node_read_unlock(node); goto out; } - tipc_node_unlock(node); + tipc_node_read_unlock(node); } done = 1; out: @@ -1402,3 +1591,314 @@ out: return skb->len; } + +/* tipc_node_find_by_name - locate owner node of link by link's name + * @net: the applicable net namespace + * @name: pointer to link name string + * @bearer_id: pointer to index in 'node->links' array where the link was found. + * + * Returns pointer to node owning the link, or 0 if no matching link is found. + */ +static struct tipc_node *tipc_node_find_by_name(struct net *net, + const char *link_name, + unsigned int *bearer_id) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l; + struct tipc_node *n; + struct tipc_node *found_node = NULL; + int i; + + *bearer_id = 0; + rcu_read_lock(); + list_for_each_entry_rcu(n, &tn->node_list, list) { + tipc_node_read_lock(n); + for (i = 0; i < MAX_BEARERS; i++) { + l = n->links[i].link; + if (l && !strcmp(tipc_link_name(l), link_name)) { + *bearer_id = i; + found_node = n; + break; + } + } + tipc_node_read_unlock(n); + if (found_node) + break; + } + rcu_read_unlock(); + + return found_node; +} + +int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) +{ + int err; + int res = 0; + int bearer_id; + char *name; + struct tipc_link *link; + struct tipc_node *node; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + if (strcmp(name, tipc_bclink_name) == 0) + return tipc_nl_bc_link_set(net, attrs); + + node = tipc_node_find_by_name(net, name, &bearer_id); + if (!node) + return -EINVAL; + + tipc_node_read_lock(node); + + link = node->links[bearer_id].link; + if (!link) { + res = -EINVAL; + goto out; + } + + if (attrs[TIPC_NLA_LINK_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], + props); + if (err) { + res = err; + goto out; + } + + if (props[TIPC_NLA_PROP_TOL]) { + u32 tol; + + tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + tipc_link_set_tolerance(link, tol); + } + if (props[TIPC_NLA_PROP_PRIO]) { + u32 prio; + + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + tipc_link_set_prio(link, prio); + } + if (props[TIPC_NLA_PROP_WIN]) { + u32 win; + + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + tipc_link_set_queue_limits(link, win); + } + } + +out: + tipc_node_read_unlock(node); + + return res; +} + +int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct tipc_nl_msg msg; + char *name; + int err; + + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + if (!info->attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); + + msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg.skb) + return -ENOMEM; + + if (strcmp(name, tipc_bclink_name) == 0) { + err = tipc_nl_add_bc_link(net, &msg); + if (err) { + nlmsg_free(msg.skb); + return err; + } + } else { + int bearer_id; + struct tipc_node *node; + struct tipc_link *link; + + node = tipc_node_find_by_name(net, name, &bearer_id); + if (!node) + return -EINVAL; + + tipc_node_read_lock(node); + link = node->links[bearer_id].link; + if (!link) { + tipc_node_read_unlock(node); + nlmsg_free(msg.skb); + return -EINVAL; + } + + err = __tipc_nl_add_link(net, &msg, link, 0); + tipc_node_read_unlock(node); + if (err) { + nlmsg_free(msg.skb); + return err; + } + } + + return genlmsg_reply(msg.skb, info); +} + +int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *link_name; + unsigned int bearer_id; + struct tipc_link *link; + struct tipc_node *node; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_link_entry *le; + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + if (strcmp(link_name, tipc_bclink_name) == 0) { + err = tipc_bclink_reset_stats(net); + if (err) + return err; + return 0; + } + + node = tipc_node_find_by_name(net, link_name, &bearer_id); + if (!node) + return -EINVAL; + + le = &node->links[bearer_id]; + tipc_node_read_lock(node); + spin_lock_bh(&le->lock); + link = node->links[bearer_id].link; + if (!link) { + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(node); + return -EINVAL; + } + tipc_link_reset_stats(link); + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(node); + return 0; +} + +/* Caller should hold node lock */ +static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, + struct tipc_node *node, u32 *prev_link) +{ + u32 i; + int err; + + for (i = *prev_link; i < MAX_BEARERS; i++) { + *prev_link = i; + + if (!node->links[i].link) + continue; + + err = __tipc_nl_add_link(net, msg, + node->links[i].link, NLM_F_MULTI); + if (err) + return err; + } + *prev_link = 0; + + return 0; +} + +int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_node *node; + struct tipc_nl_msg msg; + u32 prev_node = cb->args[0]; + u32 prev_link = cb->args[1]; + int done = cb->args[2]; + int err; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + if (prev_node) { + node = tipc_node_find(net, prev_node); + if (!node) { + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the last NLMSG_DONE message + * having the NLM_F_DUMP_INTR flag set. + */ + cb->prev_seq = 1; + goto out; + } + tipc_node_put(node); + + list_for_each_entry_continue_rcu(node, &tn->node_list, + list) { + tipc_node_read_lock(node); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); + tipc_node_read_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } else { + err = tipc_nl_add_bc_link(net, &msg); + if (err) + goto out; + + list_for_each_entry_rcu(node, &tn->node_list, list) { + tipc_node_read_lock(node); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); + tipc_node_read_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } + done = 1; +out: + rcu_read_unlock(); + + cb->args[0] = prev_node; + cb->args[1] = prev_link; + cb->args[2] = done; + + return skb->len; +} diff --git a/net/tipc/node.h b/net/tipc/node.h index 6734562d3c6e..f39d9d06e8bb 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -42,23 +42,6 @@ #include "bearer.h" #include "msg.h" -/* Out-of-range value for node signature */ -#define INVALID_NODE_SIG 0x10000 - -#define INVALID_BEARER_ID -1 - -/* Flags used to take different actions according to flag type - * TIPC_NOTIFY_NODE_DOWN: notify node is down - * TIPC_NOTIFY_NODE_UP: notify node is up - * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type - */ -enum { - TIPC_NOTIFY_NODE_DOWN = (1 << 3), - TIPC_NOTIFY_NODE_UP = (1 << 4), - TIPC_NOTIFY_LINK_UP = (1 << 6), - TIPC_NOTIFY_LINK_DOWN = (1 << 7) -}; - /* Optional capabilities supported by this code version */ enum { @@ -66,72 +49,8 @@ enum { }; #define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH +#define INVALID_BEARER_ID -1 -struct tipc_link_entry { - struct tipc_link *link; - u32 mtu; - struct sk_buff_head inputq; - struct tipc_media_addr maddr; -}; - -struct tipc_bclink_entry { - struct tipc_link *link; - struct sk_buff_head inputq1; - struct sk_buff_head arrvq; - struct sk_buff_head inputq2; - struct sk_buff_head namedq; -}; - -/** - * struct tipc_node - TIPC node structure - * @addr: network address of node - * @ref: reference counter to node object - * @lock: spinlock governing access to structure - * @net: the applicable net namespace - * @hash: links to adjacent nodes in unsorted hash chain - * @inputq: pointer to input queue containing messages for msg event - * @namedq: pointer to name table input queue with name table messages - * @active_links: bearer ids of active links, used as index into links[] array - * @links: array containing references to all links to node - * @action_flags: bit mask of different types of node actions - * @state: connectivity state vs peer node - * @sync_point: sequence number where synch/failover is finished - * @list: links to adjacent nodes in sorted list of cluster's nodes - * @working_links: number of working links to node (both active and standby) - * @link_cnt: number of links to node - * @capabilities: bitmap, indicating peer node's functional capabilities - * @signature: node instance identifier - * @link_id: local and remote bearer ids of changing link, if any - * @publ_list: list of publications - * @rcu: rcu struct for tipc_node - */ -struct tipc_node { - u32 addr; - struct kref kref; - spinlock_t lock; - struct net *net; - struct hlist_node hash; - int active_links[2]; - struct tipc_link_entry links[MAX_BEARERS]; - struct tipc_bclink_entry bc_entry; - int action_flags; - struct list_head list; - int state; - u16 sync_point; - int link_cnt; - u16 working_links; - u16 capabilities; - u32 signature; - u32 link_id; - struct list_head publ_list; - struct list_head conn_sks; - unsigned long keepalive_intv; - struct timer_list timer; - struct rcu_head rcu; -}; - -struct tipc_node *tipc_node_find(struct net *net, u32 addr); -void tipc_node_put(struct tipc_node *node); void tipc_node_stop(struct net *net); void tipc_node_check_dest(struct net *net, u32 onode, struct tipc_bearer *bearer, @@ -139,50 +58,22 @@ void tipc_node_check_dest(struct net *net, u32 onode, struct tipc_media_addr *maddr, bool *respond, bool *dupl_addr); void tipc_node_delete_links(struct net *net, int bearer_id); -void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -bool tipc_node_is_up(struct tipc_node *n); int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); -void tipc_node_unlock(struct tipc_node *node); int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector); int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, u32 selector); +void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr); +void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr); +void tipc_node_broadcast(struct net *net, struct sk_buff *skb); int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); +int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); - -static inline void tipc_node_lock(struct tipc_node *node) -{ - spin_lock_bh(&node->lock); -} - -static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel) -{ - int bearer_id = n->active_links[sel & 1]; - - if (unlikely(bearer_id == INVALID_BEARER_ID)) - return NULL; - - return n->links[bearer_id].link; -} - -static inline unsigned int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) -{ - struct tipc_node *n; - int bearer_id; - unsigned int mtu = MAX_MSG_SIZE; - - n = tipc_node_find(net, addr); - if (unlikely(!n)) - return mtu; - - bearer_id = n->active_links[sel & 1]; - if (likely(bearer_id != INVALID_BEARER_ID)) - mtu = n->links[bearer_id].mtu; - tipc_node_put(n); - return mtu; -} +int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info); #endif diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ad2719ad4c1b..816914ef228d 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -48,7 +48,6 @@ #include <linux/tipc_netlink.h> #include "core.h" #include "bearer.h" -#include "msg.h" /* IANA assigned UDP port */ #define UDP_PORT_DEFAULT 6118 @@ -221,10 +220,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) { struct udp_bearer *ub; struct tipc_bearer *b; - int usr = msg_user(buf_msg(skb)); - - if ((usr == LINK_PROTOCOL) || (usr == NAME_DISTRIBUTOR)) - skb_linearize(skb); ub = rcu_dereference_sk_user_data(sk); if (!ub) { diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h index 2ad46f39649f..1820e74a5752 100644 --- a/net/vmw_vsock/vmci_transport.h +++ b/net/vmw_vsock/vmci_transport.h @@ -121,7 +121,7 @@ struct vmci_transport { u64 queue_pair_max_size; u32 detach_sub_id; union vmci_transport_notify notify; - struct vmci_transport_notify_ops *notify_ops; + const struct vmci_transport_notify_ops *notify_ops; struct list_head elem; struct sock *sk; spinlock_t lock; /* protects sk. */ diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c index 9b7f207f2bee..fd8cf0214d51 100644 --- a/net/vmw_vsock/vmci_transport_notify.c +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -661,7 +661,7 @@ static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) } /* Socket control packet based operations. */ -struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { +const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { vmci_transport_notify_pkt_socket_init, vmci_transport_notify_pkt_socket_destruct, vmci_transport_notify_pkt_poll_in, diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h index 7df793249b6c..3c464d394a8f 100644 --- a/net/vmw_vsock/vmci_transport_notify.h +++ b/net/vmw_vsock/vmci_transport_notify.h @@ -77,7 +77,8 @@ struct vmci_transport_notify_ops { void (*process_negotiate) (struct sock *sk); }; -extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; -extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; +extern const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; +extern const +struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; #endif /* __VMCI_TRANSPORT_NOTIFY_H__ */ diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c index dc9c7929a2f9..21e591dafb03 100644 --- a/net/vmw_vsock/vmci_transport_notify_qstate.c +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -419,7 +419,7 @@ vmci_transport_notify_pkt_send_pre_enqueue( } /* Socket always on control packet based operations. */ -struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { +const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { vmci_transport_notify_pkt_socket_init, vmci_transport_notify_pkt_socket_destruct, vmci_transport_notify_pkt_poll_in, |