diff options
91 files changed, 3284 insertions, 1187 deletions
diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt index 2362dcd5afc9..21fd199e89b5 100644 --- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt +++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt @@ -33,6 +33,7 @@ Required properties: Optional properties: - tx_delay: Delay value for TXD timing. Range value is 0~0x7F, 0x30 as default. - rx_delay: Delay value for RXD timing. Range value is 0~0x7F, 0x10 as default. + - phy-supply: phandle to a regulator if the PHY needs one Example: diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 85b022179104..a5e4c813f17f 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1287,6 +1287,13 @@ accept_ra_rtr_pref - BOOLEAN Functional default: enabled if accept_ra is enabled. disabled if accept_ra is disabled. +accept_ra_mtu - BOOLEAN + Apply the MTU value specified in RA option 5 (RFC4861). If + disabled, the MTU specified in the RA will be ignored. + + Functional default: enabled if accept_ra is enabled. + disabled if accept_ra is disabled. + accept_redirects - BOOLEAN Accept Redirects. diff --git a/arch/arm/boot/dts/rk3288-evb-rk808.dts b/arch/arm/boot/dts/rk3288-evb-rk808.dts index 831a7aa85136..e1d3eeb8f094 100644 --- a/arch/arm/boot/dts/rk3288-evb-rk808.dts +++ b/arch/arm/boot/dts/rk3288-evb-rk808.dts @@ -161,7 +161,7 @@ }; &gmac { - phy_regulator = "vcc_phy"; + phy-supply = <&vcc_phy>; phy-mode = "rgmii"; clock_in_out = "input"; snps,reset-gpio = <&gpio4 7 0>; diff --git a/arch/arm/boot/dts/rk3288-evb.dtsi b/arch/arm/boot/dts/rk3288-evb.dtsi index 048cb170c884..98f51140a8c6 100644 --- a/arch/arm/boot/dts/rk3288-evb.dtsi +++ b/arch/arm/boot/dts/rk3288-evb.dtsi @@ -98,6 +98,8 @@ pinctrl-names = "default"; pinctrl-0 = <ð_phy_pwr>; regulator-name = "vcc_phy"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; regulator-boot-on; }; diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 0eb141c41416..a31e031afd87 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -154,7 +154,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, continue; slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; - if (slave_id >= dev->dev->num_vfs + 1) + if (slave_id >= dev->dev->persist->num_vfs + 1) return; tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE]; form_cache_ag = get_cached_alias_guid(dev, port_num, diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 82a7dd87089b..c7619716c31d 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1951,7 +1951,8 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, ctx->ib_dev = &dev->ib_dev; for (i = 0; - i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1)); + i < min(dev->dev->caps.sqp_demux, + (u16)(dev->dev->persist->num_vfs + 1)); i++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev->dev, i); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 57ecc5b204f3..b4fa6f658800 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -198,7 +198,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; - props->vendor_part_id = dev->dev->pdev->device; + props->vendor_part_id = dev->dev->persist->pdev->device; props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); @@ -1375,7 +1375,7 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr, { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); - return sprintf(buf, "MT%d\n", dev->dev->pdev->device); + return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); } static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, @@ -1937,7 +1937,8 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev) int i; if (mlx4_is_master(ibdev->dev)) { - for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) { + for (slave = 0; slave <= ibdev->dev->persist->num_vfs; + ++slave) { for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { for (i = 0; i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; @@ -1994,7 +1995,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { for (j = 0; j < eq_per_port; j++) { snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s", - i, j, dev->pdev->bus->name); + i, j, dev->persist->pdev->bus->name); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, NULL, &ibdev->eq_table[eq])) { @@ -2058,7 +2059,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); if (!ibdev) { - dev_err(&dev->pdev->dev, "Device struct alloc failed\n"); + dev_err(&dev->persist->pdev->dev, + "Device struct alloc failed\n"); return NULL; } @@ -2085,7 +2087,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->num_ports = num_ports; ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; - ibdev->ib_dev.dma_device = &dev->pdev->dev; + ibdev->ib_dev.dma_device = &dev->persist->pdev->dev; if (dev->caps.userspace_caps) ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; @@ -2236,7 +2238,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) sizeof(long), GFP_KERNEL); if (!ibdev->ib_uc_qpns_bitmap) { - dev_err(&dev->pdev->dev, "bit map alloc failed\n"); + dev_err(&dev->persist->pdev->dev, + "bit map alloc failed\n"); goto err_steer_qp_release; } diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index c36ccbd9a644..e0d271782d0a 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -401,7 +401,8 @@ struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device if (!mfrpl->ibfrpl.page_list) goto err_free; - mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev, + mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist-> + pdev->dev, size, &mfrpl->map, GFP_KERNEL); if (!mfrpl->mapped_page_list) @@ -423,7 +424,8 @@ void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); int size = page_list->max_page_list_len * sizeof (u64); - dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list, + dma_free_coherent(&dev->dev->persist->pdev->dev, size, + mfrpl->mapped_page_list, mfrpl->map); kfree(mfrpl->ibfrpl.page_list); kfree(mfrpl); diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index cb4c66e723b5..d10c2b8a5dad 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -375,7 +375,7 @@ static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max) char base_name[9]; /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */ - strlcpy(name, pci_name(dev->dev->pdev), max); + strlcpy(name, pci_name(dev->dev->persist->pdev), max); strncpy(base_name, name, 8); /*till xxxx:yy:*/ base_name[8] = '\0'; /* with no ARI only 3 last bits are used so when the fn is higher than 8 @@ -792,7 +792,7 @@ static int register_pkey_tree(struct mlx4_ib_dev *device) if (!mlx4_is_master(device->dev)) return 0; - for (i = 0; i <= device->dev->num_vfs; ++i) + for (i = 0; i <= device->dev->persist->num_vfs; ++i) register_one_pkey_tree(device, i); return 0; @@ -807,7 +807,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device) if (!mlx4_is_master(device->dev)) return; - for (slave = device->dev->num_vfs; slave >= 0; --slave) { + for (slave = device->dev->persist->num_vfs; slave >= 0; --slave) { list_for_each_entry_safe(p, t, &device->pkeys.pkey_port_list[slave], entry) { diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 0dceba1a2ba1..f47bc433407a 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -998,7 +998,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev, NETIF_F_HIGHDMA | NETIF_F_LRO) #define BOND_ENC_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_RXCSUM |\ - NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL) + NETIF_F_TSO) static void bond_compute_features(struct bonding *bond) { @@ -1034,7 +1034,7 @@ static void bond_compute_features(struct bonding *bond) done: bond_dev->vlan_features = vlan_features; - bond_dev->hw_enc_features = enc_features; + bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL; bond_dev->hard_header_len = max_hard_header_len; bond_dev->gso_max_segs = gso_max_segs; netif_set_gso_max_size(bond_dev, gso_max_size); @@ -4010,7 +4010,7 @@ void bond_setup(struct net_device *bond_dev) NETIF_F_HW_VLAN_CTAG_FILTER; bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); - bond_dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; bond_dev->features |= bond_dev->hw_features; } diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index feb29c4526f7..09f6b3cc1f66 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -400,6 +400,16 @@ static int bcm_sf2_sw_rst(struct bcm_sf2_priv *priv) return 0; } +static void bcm_sf2_intr_disable(struct bcm_sf2_priv *priv) +{ + intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET); + intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR); + intrl2_0_writel(priv, 0, INTRL2_CPU_MASK_CLEAR); + intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET); + intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR); + intrl2_1_writel(priv, 0, INTRL2_CPU_MASK_CLEAR); +} + static int bcm_sf2_sw_setup(struct dsa_switch *ds) { const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME; @@ -440,12 +450,7 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) } /* Disable all interrupts and request them */ - intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET); - intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR); - intrl2_0_writel(priv, 0, INTRL2_CPU_MASK_CLEAR); - intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET); - intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR); - intrl2_1_writel(priv, 0, INTRL2_CPU_MASK_CLEAR); + bcm_sf2_intr_disable(priv); ret = request_irq(priv->irq0, bcm_sf2_switch_0_isr, 0, "switch_0", priv); @@ -747,12 +752,7 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds) struct bcm_sf2_priv *priv = ds_to_priv(ds); unsigned int port; - intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET); - intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR); - intrl2_0_writel(priv, 0, INTRL2_CPU_MASK_CLEAR); - intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET); - intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR); - intrl2_1_writel(priv, 0, INTRL2_CPU_MASK_CLEAR); + bcm_sf2_intr_disable(priv); /* Disable all ports physically present including the IMP * port, the other ones have already been disabled during diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c index c74a898fcd4f..184a8d545ac4 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c @@ -727,9 +727,9 @@ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p) p->xauicfg[1] = simple_strtoul(vpd.xaui1cfg_data, NULL, 16); } - for (i = 0; i < 6; i++) - p->eth_base[i] = hex_to_bin(vpd.na_data[2 * i]) * 16 + - hex_to_bin(vpd.na_data[2 * i + 1]); + ret = hex2bin(p->eth_base, vpd.na_data, 6); + if (ret < 0) + return -EINVAL; return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index e468f920892f..24fc162ee57a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1008,7 +1008,10 @@ static inline int t4_memory_write(struct adapter *adap, int mtype, u32 addr, int t4_seeprom_wp(struct adapter *adapter, bool enable); int get_vpd_params(struct adapter *adapter, struct vpd_params *p); +int t4_read_flash(struct adapter *adapter, unsigned int addr, + unsigned int nwords, u32 *data, int byte_oriented); int t4_load_fw(struct adapter *adapter, const u8 *fw_data, unsigned int size); +int t4_fwcache(struct adapter *adap, enum fw_params_param_dev_fwcache op); int t4_fw_upgrade(struct adapter *adap, unsigned int mbox, const u8 *fw_data, unsigned int size, int force); unsigned int t4_flash_cfg_addr(struct adapter *adapter); @@ -1035,6 +1038,16 @@ int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid, int start, int n, const u16 *rspq, unsigned int nrspq); int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode, unsigned int flags); +int t4_read_rss(struct adapter *adapter, u16 *entries); +void t4_read_rss_key(struct adapter *adapter, u32 *key); +void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx); +void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index, + u32 *valp); +void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index, + u32 *vfl, u32 *vfh); +u32 t4_read_rss_pf_map(struct adapter *adapter); +u32 t4_read_rss_pf_mask(struct adapter *adapter); + int t4_mc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *parity); int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 6dabfe5ba44e..714cc70b97cc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -36,6 +36,7 @@ #include <linux/debugfs.h> #include <linux/string_helpers.h> #include <linux/sort.h> +#include <linux/ctype.h> #include "cxgb4.h" #include "t4_regs.h" @@ -434,6 +435,51 @@ static const struct file_operations devlog_fops = { .release = seq_release_private }; +static ssize_t flash_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + loff_t pos = *ppos; + loff_t avail = FILE_DATA(file)->i_size; + struct adapter *adap = file->private_data; + + if (pos < 0) + return -EINVAL; + if (pos >= avail) + return 0; + if (count > avail - pos) + count = avail - pos; + + while (count) { + size_t len; + int ret, ofst; + u8 data[256]; + + ofst = pos & 3; + len = min(count + ofst, sizeof(data)); + ret = t4_read_flash(adap, pos - ofst, (len + 3) / 4, + (u32 *)data, 1); + if (ret) + return ret; + + len -= ofst; + if (copy_to_user(buf, data + ofst, len)) + return -EFAULT; + + buf += len; + pos += len; + count -= len; + } + count = pos - *ppos; + *ppos = pos; + return count; +} + +static const struct file_operations flash_debugfs_fops = { + .owner = THIS_MODULE, + .open = mem_open, + .read = flash_read, +}; + static inline void tcamxy2valmask(u64 x, u64 y, u8 *addr, u64 *mask) { *mask = x | y; @@ -579,6 +625,422 @@ static const struct file_operations clip_tbl_debugfs_fops = { }; #endif +/*RSS Table. + */ + +static int rss_show(struct seq_file *seq, void *v, int idx) +{ + u16 *entry = v; + + seq_printf(seq, "%4d: %4u %4u %4u %4u %4u %4u %4u %4u\n", + idx * 8, entry[0], entry[1], entry[2], entry[3], entry[4], + entry[5], entry[6], entry[7]); + return 0; +} + +static int rss_open(struct inode *inode, struct file *file) +{ + int ret; + struct seq_tab *p; + struct adapter *adap = inode->i_private; + + p = seq_open_tab(file, RSS_NENTRIES / 8, 8 * sizeof(u16), 0, rss_show); + if (!p) + return -ENOMEM; + + ret = t4_read_rss(adap, (u16 *)p->data); + if (ret) + seq_release_private(inode, file); + + return ret; +} + +static const struct file_operations rss_debugfs_fops = { + .owner = THIS_MODULE, + .open = rss_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private +}; + +/* RSS Configuration. + */ + +/* Small utility function to return the strings "yes" or "no" if the supplied + * argument is non-zero. + */ +static const char *yesno(int x) +{ + static const char *yes = "yes"; + static const char *no = "no"; + + return x ? yes : no; +} + +static int rss_config_show(struct seq_file *seq, void *v) +{ + struct adapter *adapter = seq->private; + static const char * const keymode[] = { + "global", + "global and per-VF scramble", + "per-PF and per-VF scramble", + "per-VF and per-VF scramble", + }; + u32 rssconf; + + rssconf = t4_read_reg(adapter, TP_RSS_CONFIG_A); + seq_printf(seq, "TP_RSS_CONFIG: %#x\n", rssconf); + seq_printf(seq, " Tnl4TupEnIpv6: %3s\n", yesno(rssconf & + TNL4TUPENIPV6_F)); + seq_printf(seq, " Tnl2TupEnIpv6: %3s\n", yesno(rssconf & + TNL2TUPENIPV6_F)); + seq_printf(seq, " Tnl4TupEnIpv4: %3s\n", yesno(rssconf & + TNL4TUPENIPV4_F)); + seq_printf(seq, " Tnl2TupEnIpv4: %3s\n", yesno(rssconf & + TNL2TUPENIPV4_F)); + seq_printf(seq, " TnlTcpSel: %3s\n", yesno(rssconf & TNLTCPSEL_F)); + seq_printf(seq, " TnlIp6Sel: %3s\n", yesno(rssconf & TNLIP6SEL_F)); + seq_printf(seq, " TnlVrtSel: %3s\n", yesno(rssconf & TNLVRTSEL_F)); + seq_printf(seq, " TnlMapEn: %3s\n", yesno(rssconf & TNLMAPEN_F)); + seq_printf(seq, " OfdHashSave: %3s\n", yesno(rssconf & + OFDHASHSAVE_F)); + seq_printf(seq, " OfdVrtSel: %3s\n", yesno(rssconf & OFDVRTSEL_F)); + seq_printf(seq, " OfdMapEn: %3s\n", yesno(rssconf & OFDMAPEN_F)); + seq_printf(seq, " OfdLkpEn: %3s\n", yesno(rssconf & OFDLKPEN_F)); + seq_printf(seq, " Syn4TupEnIpv6: %3s\n", yesno(rssconf & + SYN4TUPENIPV6_F)); + seq_printf(seq, " Syn2TupEnIpv6: %3s\n", yesno(rssconf & + SYN2TUPENIPV6_F)); + seq_printf(seq, " Syn4TupEnIpv4: %3s\n", yesno(rssconf & + SYN4TUPENIPV4_F)); + seq_printf(seq, " Syn2TupEnIpv4: %3s\n", yesno(rssconf & + SYN2TUPENIPV4_F)); + seq_printf(seq, " Syn4TupEnIpv6: %3s\n", yesno(rssconf & + SYN4TUPENIPV6_F)); + seq_printf(seq, " SynIp6Sel: %3s\n", yesno(rssconf & SYNIP6SEL_F)); + seq_printf(seq, " SynVrt6Sel: %3s\n", yesno(rssconf & SYNVRTSEL_F)); + seq_printf(seq, " SynMapEn: %3s\n", yesno(rssconf & SYNMAPEN_F)); + seq_printf(seq, " SynLkpEn: %3s\n", yesno(rssconf & SYNLKPEN_F)); + seq_printf(seq, " ChnEn: %3s\n", yesno(rssconf & + CHANNELENABLE_F)); + seq_printf(seq, " PrtEn: %3s\n", yesno(rssconf & + PORTENABLE_F)); + seq_printf(seq, " TnlAllLkp: %3s\n", yesno(rssconf & + TNLALLLOOKUP_F)); + seq_printf(seq, " VrtEn: %3s\n", yesno(rssconf & + VIRTENABLE_F)); + seq_printf(seq, " CngEn: %3s\n", yesno(rssconf & + CONGESTIONENABLE_F)); + seq_printf(seq, " HashToeplitz: %3s\n", yesno(rssconf & + HASHTOEPLITZ_F)); + seq_printf(seq, " Udp4En: %3s\n", yesno(rssconf & UDPENABLE_F)); + seq_printf(seq, " Disable: %3s\n", yesno(rssconf & DISABLE_F)); + + seq_puts(seq, "\n"); + + rssconf = t4_read_reg(adapter, TP_RSS_CONFIG_TNL_A); + seq_printf(seq, "TP_RSS_CONFIG_TNL: %#x\n", rssconf); + seq_printf(seq, " MaskSize: %3d\n", MASKSIZE_G(rssconf)); + seq_printf(seq, " MaskFilter: %3d\n", MASKFILTER_G(rssconf)); + if (CHELSIO_CHIP_VERSION(adapter->params.chip) > CHELSIO_T5) { + seq_printf(seq, " HashAll: %3s\n", + yesno(rssconf & HASHALL_F)); + seq_printf(seq, " HashEth: %3s\n", + yesno(rssconf & HASHETH_F)); + } + seq_printf(seq, " UseWireCh: %3s\n", yesno(rssconf & USEWIRECH_F)); + + seq_puts(seq, "\n"); + + rssconf = t4_read_reg(adapter, TP_RSS_CONFIG_OFD_A); + seq_printf(seq, "TP_RSS_CONFIG_OFD: %#x\n", rssconf); + seq_printf(seq, " MaskSize: %3d\n", MASKSIZE_G(rssconf)); + seq_printf(seq, " RRCplMapEn: %3s\n", yesno(rssconf & + RRCPLMAPEN_F)); + seq_printf(seq, " RRCplQueWidth: %3d\n", RRCPLQUEWIDTH_G(rssconf)); + + seq_puts(seq, "\n"); + + rssconf = t4_read_reg(adapter, TP_RSS_CONFIG_SYN_A); + seq_printf(seq, "TP_RSS_CONFIG_SYN: %#x\n", rssconf); + seq_printf(seq, " MaskSize: %3d\n", MASKSIZE_G(rssconf)); + seq_printf(seq, " UseWireCh: %3s\n", yesno(rssconf & USEWIRECH_F)); + + seq_puts(seq, "\n"); + + rssconf = t4_read_reg(adapter, TP_RSS_CONFIG_VRT_A); + seq_printf(seq, "TP_RSS_CONFIG_VRT: %#x\n", rssconf); + if (CHELSIO_CHIP_VERSION(adapter->params.chip) > CHELSIO_T5) { + seq_printf(seq, " KeyWrAddrX: %3d\n", + KEYWRADDRX_G(rssconf)); + seq_printf(seq, " KeyExtend: %3s\n", + yesno(rssconf & KEYEXTEND_F)); + } + seq_printf(seq, " VfRdRg: %3s\n", yesno(rssconf & VFRDRG_F)); + seq_printf(seq, " VfRdEn: %3s\n", yesno(rssconf & VFRDEN_F)); + seq_printf(seq, " VfPerrEn: %3s\n", yesno(rssconf & VFPERREN_F)); + seq_printf(seq, " KeyPerrEn: %3s\n", yesno(rssconf & KEYPERREN_F)); + seq_printf(seq, " DisVfVlan: %3s\n", yesno(rssconf & + DISABLEVLAN_F)); + seq_printf(seq, " EnUpSwt: %3s\n", yesno(rssconf & ENABLEUP0_F)); + seq_printf(seq, " HashDelay: %3d\n", HASHDELAY_G(rssconf)); + if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) + seq_printf(seq, " VfWrAddr: %3d\n", VFWRADDR_G(rssconf)); + seq_printf(seq, " KeyMode: %s\n", keymode[KEYMODE_G(rssconf)]); + seq_printf(seq, " VfWrEn: %3s\n", yesno(rssconf & VFWREN_F)); + seq_printf(seq, " KeyWrEn: %3s\n", yesno(rssconf & KEYWREN_F)); + seq_printf(seq, " KeyWrAddr: %3d\n", KEYWRADDR_G(rssconf)); + + seq_puts(seq, "\n"); + + rssconf = t4_read_reg(adapter, TP_RSS_CONFIG_CNG_A); + seq_printf(seq, "TP_RSS_CONFIG_CNG: %#x\n", rssconf); + seq_printf(seq, " ChnCount3: %3s\n", yesno(rssconf & CHNCOUNT3_F)); + seq_printf(seq, " ChnCount2: %3s\n", yesno(rssconf & CHNCOUNT2_F)); + seq_printf(seq, " ChnCount1: %3s\n", yesno(rssconf & CHNCOUNT1_F)); + seq_printf(seq, " ChnCount0: %3s\n", yesno(rssconf & CHNCOUNT0_F)); + seq_printf(seq, " ChnUndFlow3: %3s\n", yesno(rssconf & + CHNUNDFLOW3_F)); + seq_printf(seq, " ChnUndFlow2: %3s\n", yesno(rssconf & + CHNUNDFLOW2_F)); + seq_printf(seq, " ChnUndFlow1: %3s\n", yesno(rssconf & + CHNUNDFLOW1_F)); + seq_printf(seq, " ChnUndFlow0: %3s\n", yesno(rssconf & + CHNUNDFLOW0_F)); + seq_printf(seq, " RstChn3: %3s\n", yesno(rssconf & RSTCHN3_F)); + seq_printf(seq, " RstChn2: %3s\n", yesno(rssconf & RSTCHN2_F)); + seq_printf(seq, " RstChn1: %3s\n", yesno(rssconf & RSTCHN1_F)); + seq_printf(seq, " RstChn0: %3s\n", yesno(rssconf & RSTCHN0_F)); + seq_printf(seq, " UpdVld: %3s\n", yesno(rssconf & UPDVLD_F)); + seq_printf(seq, " Xoff: %3s\n", yesno(rssconf & XOFF_F)); + seq_printf(seq, " UpdChn3: %3s\n", yesno(rssconf & UPDCHN3_F)); + seq_printf(seq, " UpdChn2: %3s\n", yesno(rssconf & UPDCHN2_F)); + seq_printf(seq, " UpdChn1: %3s\n", yesno(rssconf & UPDCHN1_F)); + seq_printf(seq, " UpdChn0: %3s\n", yesno(rssconf & UPDCHN0_F)); + seq_printf(seq, " Queue: %3d\n", QUEUE_G(rssconf)); + + return 0; +} + +DEFINE_SIMPLE_DEBUGFS_FILE(rss_config); + +/* RSS Secret Key. + */ + +static int rss_key_show(struct seq_file *seq, void *v) +{ + u32 key[10]; + + t4_read_rss_key(seq->private, key); + seq_printf(seq, "%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x\n", + key[9], key[8], key[7], key[6], key[5], key[4], key[3], + key[2], key[1], key[0]); + return 0; +} + +static int rss_key_open(struct inode *inode, struct file *file) +{ + return single_open(file, rss_key_show, inode->i_private); +} + +static ssize_t rss_key_write(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + int i, j; + u32 key[10]; + char s[100], *p; + struct adapter *adap = FILE_DATA(file)->i_private; + + if (count > sizeof(s) - 1) + return -EINVAL; + if (copy_from_user(s, buf, count)) + return -EFAULT; + for (i = count; i > 0 && isspace(s[i - 1]); i--) + ; + s[i] = '\0'; + + for (p = s, i = 9; i >= 0; i--) { + key[i] = 0; + for (j = 0; j < 8; j++, p++) { + if (!isxdigit(*p)) + return -EINVAL; + key[i] = (key[i] << 4) | hex2val(*p); + } + } + + t4_write_rss_key(adap, key, -1); + return count; +} + +static const struct file_operations rss_key_debugfs_fops = { + .owner = THIS_MODULE, + .open = rss_key_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = rss_key_write +}; + +/* PF RSS Configuration. + */ + +struct rss_pf_conf { + u32 rss_pf_map; + u32 rss_pf_mask; + u32 rss_pf_config; +}; + +static int rss_pf_config_show(struct seq_file *seq, void *v, int idx) +{ + struct rss_pf_conf *pfconf; + + if (v == SEQ_START_TOKEN) { + /* use the 0th entry to dump the PF Map Index Size */ + pfconf = seq->private + offsetof(struct seq_tab, data); + seq_printf(seq, "PF Map Index Size = %d\n\n", + LKPIDXSIZE_G(pfconf->rss_pf_map)); + + seq_puts(seq, " RSS PF VF Hash Tuple Enable Default\n"); + seq_puts(seq, " Enable IPF Mask Mask IPv6 IPv4 UDP Queue\n"); + seq_puts(seq, " PF Map Chn Prt Map Size Size Four Two Four Two Four Ch1 Ch0\n"); + } else { + #define G_PFnLKPIDX(map, n) \ + (((map) >> PF1LKPIDX_S*(n)) & PF0LKPIDX_M) + #define G_PFnMSKSIZE(mask, n) \ + (((mask) >> PF1MSKSIZE_S*(n)) & PF1MSKSIZE_M) + + pfconf = v; + seq_printf(seq, "%3d %3s %3s %3s %3d %3d %3d %3s %3s %3s %3s %3s %3d %3d\n", + idx, + yesno(pfconf->rss_pf_config & MAPENABLE_F), + yesno(pfconf->rss_pf_config & CHNENABLE_F), + yesno(pfconf->rss_pf_config & PRTENABLE_F), + G_PFnLKPIDX(pfconf->rss_pf_map, idx), + G_PFnMSKSIZE(pfconf->rss_pf_mask, idx), + IVFWIDTH_G(pfconf->rss_pf_config), + yesno(pfconf->rss_pf_config & IP6FOURTUPEN_F), + yesno(pfconf->rss_pf_config & IP6TWOTUPEN_F), + yesno(pfconf->rss_pf_config & IP4FOURTUPEN_F), + yesno(pfconf->rss_pf_config & IP4TWOTUPEN_F), + yesno(pfconf->rss_pf_config & UDPFOURTUPEN_F), + CH1DEFAULTQUEUE_G(pfconf->rss_pf_config), + CH0DEFAULTQUEUE_G(pfconf->rss_pf_config)); + + #undef G_PFnLKPIDX + #undef G_PFnMSKSIZE + } + return 0; +} + +static int rss_pf_config_open(struct inode *inode, struct file *file) +{ + struct adapter *adapter = inode->i_private; + struct seq_tab *p; + u32 rss_pf_map, rss_pf_mask; + struct rss_pf_conf *pfconf; + int pf; + + p = seq_open_tab(file, 8, sizeof(*pfconf), 1, rss_pf_config_show); + if (!p) + return -ENOMEM; + + pfconf = (struct rss_pf_conf *)p->data; + rss_pf_map = t4_read_rss_pf_map(adapter); + rss_pf_mask = t4_read_rss_pf_mask(adapter); + for (pf = 0; pf < 8; pf++) { + pfconf[pf].rss_pf_map = rss_pf_map; + pfconf[pf].rss_pf_mask = rss_pf_mask; + t4_read_rss_pf_config(adapter, pf, &pfconf[pf].rss_pf_config); + } + return 0; +} + +static const struct file_operations rss_pf_config_debugfs_fops = { + .owner = THIS_MODULE, + .open = rss_pf_config_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private +}; + +/* VF RSS Configuration. + */ + +struct rss_vf_conf { + u32 rss_vf_vfl; + u32 rss_vf_vfh; +}; + +static int rss_vf_config_show(struct seq_file *seq, void *v, int idx) +{ + if (v == SEQ_START_TOKEN) { + seq_puts(seq, " RSS Hash Tuple Enable\n"); + seq_puts(seq, " Enable IVF Dis Enb IPv6 IPv4 UDP Def Secret Key\n"); + seq_puts(seq, " VF Chn Prt Map VLAN uP Four Two Four Two Four Que Idx Hash\n"); + } else { + struct rss_vf_conf *vfconf = v; + + seq_printf(seq, "%3d %3s %3s %3d %3s %3s %3s %3s %3s %3s %3s %4d %3d %#10x\n", + idx, + yesno(vfconf->rss_vf_vfh & VFCHNEN_F), + yesno(vfconf->rss_vf_vfh & VFPRTEN_F), + VFLKPIDX_G(vfconf->rss_vf_vfh), + yesno(vfconf->rss_vf_vfh & VFVLNEX_F), + yesno(vfconf->rss_vf_vfh & VFUPEN_F), + yesno(vfconf->rss_vf_vfh & VFIP4FOURTUPEN_F), + yesno(vfconf->rss_vf_vfh & VFIP6TWOTUPEN_F), + yesno(vfconf->rss_vf_vfh & VFIP4FOURTUPEN_F), + yesno(vfconf->rss_vf_vfh & VFIP4TWOTUPEN_F), + yesno(vfconf->rss_vf_vfh & ENABLEUDPHASH_F), + DEFAULTQUEUE_G(vfconf->rss_vf_vfh), + KEYINDEX_G(vfconf->rss_vf_vfh), + vfconf->rss_vf_vfl); + } + return 0; +} + +static int rss_vf_config_open(struct inode *inode, struct file *file) +{ + struct adapter *adapter = inode->i_private; + struct seq_tab *p; + struct rss_vf_conf *vfconf; + int vf; + + p = seq_open_tab(file, 128, sizeof(*vfconf), 1, rss_vf_config_show); + if (!p) + return -ENOMEM; + + vfconf = (struct rss_vf_conf *)p->data; + for (vf = 0; vf < 128; vf++) { + t4_read_rss_vf_config(adapter, vf, &vfconf[vf].rss_vf_vfl, + &vfconf[vf].rss_vf_vfh); + } + return 0; +} + +static const struct file_operations rss_vf_config_debugfs_fops = { + .owner = THIS_MODULE, + .open = rss_vf_config_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private +}; + +int mem_open(struct inode *inode, struct file *file) +{ + unsigned int mem; + struct adapter *adap; + + file->private_data = inode->i_private; + + mem = (uintptr_t)file->private_data & 0x3; + adap = file->private_data - mem; + + (void)t4_fwcache(adap, FW_PARAM_DEV_FWCACHE_FLUSH); + + return 0; +} + static ssize_t mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -616,7 +1078,6 @@ static ssize_t mem_read(struct file *file, char __user *buf, size_t count, *ppos = pos + count; return count; } - static const struct file_operations mem_debugfs_fops = { .owner = THIS_MODULE, .open = simple_open, @@ -624,6 +1085,12 @@ static const struct file_operations mem_debugfs_fops = { .llseek = default_llseek, }; +static void set_debugfs_file_size(struct dentry *de, loff_t size) +{ + if (!IS_ERR(de) && de->d_inode) + de->d_inode->i_size = size; +} + static void add_debugfs_mem(struct adapter *adap, const char *name, unsigned int idx, unsigned int size_mb) { @@ -655,6 +1122,7 @@ int t4_setup_debugfs(struct adapter *adap) { int i; u32 size; + struct dentry *de; static struct t4_debugfs_entry t4_debugfs_files[] = { { "cim_la", &cim_la_fops, S_IRUSR, 0 }, @@ -662,6 +1130,11 @@ int t4_setup_debugfs(struct adapter *adap) { "devlog", &devlog_fops, S_IRUSR, 0 }, { "l2t", &t4_l2t_fops, S_IRUSR, 0}, { "mps_tcam", &mps_tcam_debugfs_fops, S_IRUSR, 0 }, + { "rss", &rss_debugfs_fops, S_IRUSR, 0 }, + { "rss_config", &rss_config_debugfs_fops, S_IRUSR, 0 }, + { "rss_key", &rss_key_debugfs_fops, S_IRUSR, 0 }, + { "rss_pf_config", &rss_pf_config_debugfs_fops, S_IRUSR, 0 }, + { "rss_vf_config", &rss_vf_config_debugfs_fops, S_IRUSR, 0 }, #if IS_ENABLED(CONFIG_IPV6) { "clip_tbl", &clip_tbl_debugfs_fops, S_IRUSR, 0 }, #endif @@ -697,5 +1170,10 @@ int t4_setup_debugfs(struct adapter *adap) EXT_MEM1_SIZE_G(size)); } } + + de = debugfs_create_file("flash", S_IRUSR, adap->debugfs_root, adap, + &flash_debugfs_fops); + set_debugfs_file_size(de, adap->params.sf_size); + return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h index 70fcbc930826..b63cfee2d963 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h @@ -37,6 +37,21 @@ #include <linux/export.h> +#define FILE_DATA(_file) ((_file)->f_path.dentry->d_inode) + +#define DEFINE_SIMPLE_DEBUGFS_FILE(name) \ +static int name##_open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, name##_show, inode->i_private); \ +} \ +static const struct file_operations name##_debugfs_fops = { \ + .owner = THIS_MODULE, \ + .open = name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release \ +} + struct t4_debugfs_entry { const char *name; const struct file_operations *ops; @@ -52,6 +67,11 @@ struct seq_tab { char data[0]; /* the table data */ }; +static inline unsigned int hex2val(char c) +{ + return isdigit(c) ? c - '0' : tolower(c) - 'a' + 10; +} + struct seq_tab *seq_open_tab(struct file *f, unsigned int rows, unsigned int width, unsigned int have_header, int (*show)(struct seq_file *seq, void *v, int i)); @@ -60,5 +80,6 @@ int t4_setup_debugfs(struct adapter *adap); void add_debugfs_files(struct adapter *adap, struct t4_debugfs_entry *files, unsigned int nfiles); +int mem_open(struct inode *inode, struct file *file); #endif diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 1147e1e88314..12c1a3fbf296 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -6290,7 +6290,7 @@ static int __init cxgb4_init_module(void) static void __exit cxgb4_cleanup_module(void) { #if IS_ENABLED(CONFIG_IPV6) - if (inet6addr_registered && list_empty(&adapter_list)) { + if (inet6addr_registered) { unregister_inet6addr_notifier(&cxgb4_inet6addr_notifier); inet6addr_registered = false; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 734d33e3f53b..8f99878a4913 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -835,8 +835,8 @@ static int flash_wait_op(struct adapter *adapter, int attempts, int delay) * (i.e., big-endian), otherwise as 32-bit words in the platform's * natural endianess. */ -static int t4_read_flash(struct adapter *adapter, unsigned int addr, - unsigned int nwords, u32 *data, int byte_oriented) +int t4_read_flash(struct adapter *adapter, unsigned int addr, + unsigned int nwords, u32 *data, int byte_oriented) { int ret; @@ -1239,6 +1239,30 @@ out: return ret; } +/** + * t4_fwcache - firmware cache operation + * @adap: the adapter + * @op : the operation (flush or flush and invalidate) + */ +int t4_fwcache(struct adapter *adap, enum fw_params_param_dev_fwcache op) +{ + struct fw_params_cmd c; + + memset(&c, 0, sizeof(c)); + c.op_to_vfn = + cpu_to_be32(FW_CMD_OP_V(FW_PARAMS_CMD) | + FW_CMD_REQUEST_F | FW_CMD_WRITE_F | + FW_PARAMS_CMD_PFN_V(adap->fn) | + FW_PARAMS_CMD_VFN_V(0)); + c.retval_len16 = cpu_to_be32(FW_LEN16(c)); + c.param[0].mnem = + cpu_to_be32(FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_FWCACHE)); + c.param[0].val = (__force __be32)op; + + return t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), NULL); +} + #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\ FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \ FW_PORT_CAP_ANEG) @@ -2176,6 +2200,147 @@ int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode, return t4_wr_mbox(adapter, mbox, &c, sizeof(c), NULL); } +/* Read an RSS table row */ +static int rd_rss_row(struct adapter *adap, int row, u32 *val) +{ + t4_write_reg(adap, TP_RSS_LKP_TABLE_A, 0xfff00000 | row); + return t4_wait_op_done_val(adap, TP_RSS_LKP_TABLE_A, LKPTBLROWVLD_F, 1, + 5, 0, val); +} + +/** + * t4_read_rss - read the contents of the RSS mapping table + * @adapter: the adapter + * @map: holds the contents of the RSS mapping table + * + * Reads the contents of the RSS hash->queue mapping table. + */ +int t4_read_rss(struct adapter *adapter, u16 *map) +{ + u32 val; + int i, ret; + + for (i = 0; i < RSS_NENTRIES / 2; ++i) { + ret = rd_rss_row(adapter, i, &val); + if (ret) + return ret; + *map++ = LKPTBLQUEUE0_G(val); + *map++ = LKPTBLQUEUE1_G(val); + } + return 0; +} + +/** + * t4_read_rss_key - read the global RSS key + * @adap: the adapter + * @key: 10-entry array holding the 320-bit RSS key + * + * Reads the global 320-bit RSS key. + */ +void t4_read_rss_key(struct adapter *adap, u32 *key) +{ + t4_read_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, key, 10, + TP_RSS_SECRET_KEY0_A); +} + +/** + * t4_write_rss_key - program one of the RSS keys + * @adap: the adapter + * @key: 10-entry array holding the 320-bit RSS key + * @idx: which RSS key to write + * + * Writes one of the RSS keys with the given 320-bit value. If @idx is + * 0..15 the corresponding entry in the RSS key table is written, + * otherwise the global RSS key is written. + */ +void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx) +{ + t4_write_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, key, 10, + TP_RSS_SECRET_KEY0_A); + if (idx >= 0 && idx < 16) + t4_write_reg(adap, TP_RSS_CONFIG_VRT_A, + KEYWRADDR_V(idx) | KEYWREN_F); +} + +/** + * t4_read_rss_pf_config - read PF RSS Configuration Table + * @adapter: the adapter + * @index: the entry in the PF RSS table to read + * @valp: where to store the returned value + * + * Reads the PF RSS Configuration Table at the specified index and returns + * the value found there. + */ +void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index, + u32 *valp) +{ + t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A, + valp, 1, TP_RSS_PF0_CONFIG_A + index); +} + +/** + * t4_read_rss_vf_config - read VF RSS Configuration Table + * @adapter: the adapter + * @index: the entry in the VF RSS table to read + * @vfl: where to store the returned VFL + * @vfh: where to store the returned VFH + * + * Reads the VF RSS Configuration Table at the specified index and returns + * the (VFL, VFH) values found there. + */ +void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index, + u32 *vfl, u32 *vfh) +{ + u32 vrt, mask, data; + + mask = VFWRADDR_V(VFWRADDR_M); + data = VFWRADDR_V(index); + + /* Request that the index'th VF Table values be read into VFL/VFH. + */ + vrt = t4_read_reg(adapter, TP_RSS_CONFIG_VRT_A); + vrt &= ~(VFRDRG_F | VFWREN_F | KEYWREN_F | mask); + vrt |= data | VFRDEN_F; + t4_write_reg(adapter, TP_RSS_CONFIG_VRT_A, vrt); + + /* Grab the VFL/VFH values ... + */ + t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A, + vfl, 1, TP_RSS_VFL_CONFIG_A); + t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A, + vfh, 1, TP_RSS_VFH_CONFIG_A); +} + +/** + * t4_read_rss_pf_map - read PF RSS Map + * @adapter: the adapter + * + * Reads the PF RSS Map register and returns its value. + */ +u32 t4_read_rss_pf_map(struct adapter *adapter) +{ + u32 pfmap; + + t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A, + &pfmap, 1, TP_RSS_PF_MAP_A); + return pfmap; +} + +/** + * t4_read_rss_pf_mask - read PF RSS Mask + * @adapter: the adapter + * + * Reads the PF RSS Mask register and returns its value. + */ +u32 t4_read_rss_pf_mask(struct adapter *adapter) +{ + u32 pfmask; + + t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A, + &pfmask, 1, TP_RSS_PF_MSK_A); + return pfmask; +} + /** * t4_tp_get_tcp_stats - read TP's TCP MIB counters * @adap: the adapter diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 7ce55f9be9d4..e036b563bf3c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1678,6 +1678,408 @@ #define QUEUENUMBER_S 0 #define QUEUENUMBER_V(x) ((x) << QUEUENUMBER_S) +#define TP_RSS_CONFIG_A 0x7df0 + +#define TNL4TUPENIPV6_S 31 +#define TNL4TUPENIPV6_V(x) ((x) << TNL4TUPENIPV6_S) +#define TNL4TUPENIPV6_F TNL4TUPENIPV6_V(1U) + +#define TNL2TUPENIPV6_S 30 +#define TNL2TUPENIPV6_V(x) ((x) << TNL2TUPENIPV6_S) +#define TNL2TUPENIPV6_F TNL2TUPENIPV6_V(1U) + +#define TNL4TUPENIPV4_S 29 +#define TNL4TUPENIPV4_V(x) ((x) << TNL4TUPENIPV4_S) +#define TNL4TUPENIPV4_F TNL4TUPENIPV4_V(1U) + +#define TNL2TUPENIPV4_S 28 +#define TNL2TUPENIPV4_V(x) ((x) << TNL2TUPENIPV4_S) +#define TNL2TUPENIPV4_F TNL2TUPENIPV4_V(1U) + +#define TNLTCPSEL_S 27 +#define TNLTCPSEL_V(x) ((x) << TNLTCPSEL_S) +#define TNLTCPSEL_F TNLTCPSEL_V(1U) + +#define TNLIP6SEL_S 26 +#define TNLIP6SEL_V(x) ((x) << TNLIP6SEL_S) +#define TNLIP6SEL_F TNLIP6SEL_V(1U) + +#define TNLVRTSEL_S 25 +#define TNLVRTSEL_V(x) ((x) << TNLVRTSEL_S) +#define TNLVRTSEL_F TNLVRTSEL_V(1U) + +#define TNLMAPEN_S 24 +#define TNLMAPEN_V(x) ((x) << TNLMAPEN_S) +#define TNLMAPEN_F TNLMAPEN_V(1U) + +#define OFDHASHSAVE_S 19 +#define OFDHASHSAVE_V(x) ((x) << OFDHASHSAVE_S) +#define OFDHASHSAVE_F OFDHASHSAVE_V(1U) + +#define OFDVRTSEL_S 18 +#define OFDVRTSEL_V(x) ((x) << OFDVRTSEL_S) +#define OFDVRTSEL_F OFDVRTSEL_V(1U) + +#define OFDMAPEN_S 17 +#define OFDMAPEN_V(x) ((x) << OFDMAPEN_S) +#define OFDMAPEN_F OFDMAPEN_V(1U) + +#define OFDLKPEN_S 16 +#define OFDLKPEN_V(x) ((x) << OFDLKPEN_S) +#define OFDLKPEN_F OFDLKPEN_V(1U) + +#define SYN4TUPENIPV6_S 15 +#define SYN4TUPENIPV6_V(x) ((x) << SYN4TUPENIPV6_S) +#define SYN4TUPENIPV6_F SYN4TUPENIPV6_V(1U) + +#define SYN2TUPENIPV6_S 14 +#define SYN2TUPENIPV6_V(x) ((x) << SYN2TUPENIPV6_S) +#define SYN2TUPENIPV6_F SYN2TUPENIPV6_V(1U) + +#define SYN4TUPENIPV4_S 13 +#define SYN4TUPENIPV4_V(x) ((x) << SYN4TUPENIPV4_S) +#define SYN4TUPENIPV4_F SYN4TUPENIPV4_V(1U) + +#define SYN2TUPENIPV4_S 12 +#define SYN2TUPENIPV4_V(x) ((x) << SYN2TUPENIPV4_S) +#define SYN2TUPENIPV4_F SYN2TUPENIPV4_V(1U) + +#define SYNIP6SEL_S 11 +#define SYNIP6SEL_V(x) ((x) << SYNIP6SEL_S) +#define SYNIP6SEL_F SYNIP6SEL_V(1U) + +#define SYNVRTSEL_S 10 +#define SYNVRTSEL_V(x) ((x) << SYNVRTSEL_S) +#define SYNVRTSEL_F SYNVRTSEL_V(1U) + +#define SYNMAPEN_S 9 +#define SYNMAPEN_V(x) ((x) << SYNMAPEN_S) +#define SYNMAPEN_F SYNMAPEN_V(1U) + +#define SYNLKPEN_S 8 +#define SYNLKPEN_V(x) ((x) << SYNLKPEN_S) +#define SYNLKPEN_F SYNLKPEN_V(1U) + +#define CHANNELENABLE_S 7 +#define CHANNELENABLE_V(x) ((x) << CHANNELENABLE_S) +#define CHANNELENABLE_F CHANNELENABLE_V(1U) + +#define PORTENABLE_S 6 +#define PORTENABLE_V(x) ((x) << PORTENABLE_S) +#define PORTENABLE_F PORTENABLE_V(1U) + +#define TNLALLLOOKUP_S 5 +#define TNLALLLOOKUP_V(x) ((x) << TNLALLLOOKUP_S) +#define TNLALLLOOKUP_F TNLALLLOOKUP_V(1U) + +#define VIRTENABLE_S 4 +#define VIRTENABLE_V(x) ((x) << VIRTENABLE_S) +#define VIRTENABLE_F VIRTENABLE_V(1U) + +#define CONGESTIONENABLE_S 3 +#define CONGESTIONENABLE_V(x) ((x) << CONGESTIONENABLE_S) +#define CONGESTIONENABLE_F CONGESTIONENABLE_V(1U) + +#define HASHTOEPLITZ_S 2 +#define HASHTOEPLITZ_V(x) ((x) << HASHTOEPLITZ_S) +#define HASHTOEPLITZ_F HASHTOEPLITZ_V(1U) + +#define UDPENABLE_S 1 +#define UDPENABLE_V(x) ((x) << UDPENABLE_S) +#define UDPENABLE_F UDPENABLE_V(1U) + +#define DISABLE_S 0 +#define DISABLE_V(x) ((x) << DISABLE_S) +#define DISABLE_F DISABLE_V(1U) + +#define TP_RSS_CONFIG_TNL_A 0x7df4 + +#define MASKSIZE_S 28 +#define MASKSIZE_M 0xfU +#define MASKSIZE_V(x) ((x) << MASKSIZE_S) +#define MASKSIZE_G(x) (((x) >> MASKSIZE_S) & MASKSIZE_M) + +#define MASKFILTER_S 16 +#define MASKFILTER_M 0x7ffU +#define MASKFILTER_V(x) ((x) << MASKFILTER_S) +#define MASKFILTER_G(x) (((x) >> MASKFILTER_S) & MASKFILTER_M) + +#define USEWIRECH_S 0 +#define USEWIRECH_V(x) ((x) << USEWIRECH_S) +#define USEWIRECH_F USEWIRECH_V(1U) + +#define HASHALL_S 2 +#define HASHALL_V(x) ((x) << HASHALL_S) +#define HASHALL_F HASHALL_V(1U) + +#define HASHETH_S 1 +#define HASHETH_V(x) ((x) << HASHETH_S) +#define HASHETH_F HASHETH_V(1U) + +#define TP_RSS_CONFIG_OFD_A 0x7df8 + +#define RRCPLMAPEN_S 20 +#define RRCPLMAPEN_V(x) ((x) << RRCPLMAPEN_S) +#define RRCPLMAPEN_F RRCPLMAPEN_V(1U) + +#define RRCPLQUEWIDTH_S 16 +#define RRCPLQUEWIDTH_M 0xfU +#define RRCPLQUEWIDTH_V(x) ((x) << RRCPLQUEWIDTH_S) +#define RRCPLQUEWIDTH_G(x) (((x) >> RRCPLQUEWIDTH_S) & RRCPLQUEWIDTH_M) + +#define TP_RSS_CONFIG_SYN_A 0x7dfc +#define TP_RSS_CONFIG_VRT_A 0x7e00 + +#define VFRDRG_S 25 +#define VFRDRG_V(x) ((x) << VFRDRG_S) +#define VFRDRG_F VFRDRG_V(1U) + +#define VFRDEN_S 24 +#define VFRDEN_V(x) ((x) << VFRDEN_S) +#define VFRDEN_F VFRDEN_V(1U) + +#define VFPERREN_S 23 +#define VFPERREN_V(x) ((x) << VFPERREN_S) +#define VFPERREN_F VFPERREN_V(1U) + +#define KEYPERREN_S 22 +#define KEYPERREN_V(x) ((x) << KEYPERREN_S) +#define KEYPERREN_F KEYPERREN_V(1U) + +#define DISABLEVLAN_S 21 +#define DISABLEVLAN_V(x) ((x) << DISABLEVLAN_S) +#define DISABLEVLAN_F DISABLEVLAN_V(1U) + +#define ENABLEUP0_S 20 +#define ENABLEUP0_V(x) ((x) << ENABLEUP0_S) +#define ENABLEUP0_F ENABLEUP0_V(1U) + +#define HASHDELAY_S 16 +#define HASHDELAY_M 0xfU +#define HASHDELAY_V(x) ((x) << HASHDELAY_S) +#define HASHDELAY_G(x) (((x) >> HASHDELAY_S) & HASHDELAY_M) + +#define VFWRADDR_S 8 +#define VFWRADDR_M 0x7fU +#define VFWRADDR_V(x) ((x) << VFWRADDR_S) +#define VFWRADDR_G(x) (((x) >> VFWRADDR_S) & VFWRADDR_M) + +#define KEYMODE_S 6 +#define KEYMODE_M 0x3U +#define KEYMODE_V(x) ((x) << KEYMODE_S) +#define KEYMODE_G(x) (((x) >> KEYMODE_S) & KEYMODE_M) + +#define VFWREN_S 5 +#define VFWREN_V(x) ((x) << VFWREN_S) +#define VFWREN_F VFWREN_V(1U) + +#define KEYWREN_S 4 +#define KEYWREN_V(x) ((x) << KEYWREN_S) +#define KEYWREN_F KEYWREN_V(1U) + +#define KEYWRADDR_S 0 +#define KEYWRADDR_M 0xfU +#define KEYWRADDR_V(x) ((x) << KEYWRADDR_S) +#define KEYWRADDR_G(x) (((x) >> KEYWRADDR_S) & KEYWRADDR_M) + +#define KEYWRADDRX_S 30 +#define KEYWRADDRX_M 0x3U +#define KEYWRADDRX_V(x) ((x) << KEYWRADDRX_S) +#define KEYWRADDRX_G(x) (((x) >> KEYWRADDRX_S) & KEYWRADDRX_M) + +#define KEYEXTEND_S 26 +#define KEYEXTEND_V(x) ((x) << KEYEXTEND_S) +#define KEYEXTEND_F KEYEXTEND_V(1U) + +#define LKPIDXSIZE_S 24 +#define LKPIDXSIZE_M 0x3U +#define LKPIDXSIZE_V(x) ((x) << LKPIDXSIZE_S) +#define LKPIDXSIZE_G(x) (((x) >> LKPIDXSIZE_S) & LKPIDXSIZE_M) + +#define TP_RSS_VFL_CONFIG_A 0x3a +#define TP_RSS_VFH_CONFIG_A 0x3b + +#define ENABLEUDPHASH_S 31 +#define ENABLEUDPHASH_V(x) ((x) << ENABLEUDPHASH_S) +#define ENABLEUDPHASH_F ENABLEUDPHASH_V(1U) + +#define VFUPEN_S 30 +#define VFUPEN_V(x) ((x) << VFUPEN_S) +#define VFUPEN_F VFUPEN_V(1U) + +#define VFVLNEX_S 28 +#define VFVLNEX_V(x) ((x) << VFVLNEX_S) +#define VFVLNEX_F VFVLNEX_V(1U) + +#define VFPRTEN_S 27 +#define VFPRTEN_V(x) ((x) << VFPRTEN_S) +#define VFPRTEN_F VFPRTEN_V(1U) + +#define VFCHNEN_S 26 +#define VFCHNEN_V(x) ((x) << VFCHNEN_S) +#define VFCHNEN_F VFCHNEN_V(1U) + +#define DEFAULTQUEUE_S 16 +#define DEFAULTQUEUE_M 0x3ffU +#define DEFAULTQUEUE_G(x) (((x) >> DEFAULTQUEUE_S) & DEFAULTQUEUE_M) + +#define VFIP6TWOTUPEN_S 6 +#define VFIP6TWOTUPEN_V(x) ((x) << VFIP6TWOTUPEN_S) +#define VFIP6TWOTUPEN_F VFIP6TWOTUPEN_V(1U) + +#define VFIP4FOURTUPEN_S 5 +#define VFIP4FOURTUPEN_V(x) ((x) << VFIP4FOURTUPEN_S) +#define VFIP4FOURTUPEN_F VFIP4FOURTUPEN_V(1U) + +#define VFIP4TWOTUPEN_S 4 +#define VFIP4TWOTUPEN_V(x) ((x) << VFIP4TWOTUPEN_S) +#define VFIP4TWOTUPEN_F VFIP4TWOTUPEN_V(1U) + +#define KEYINDEX_S 0 +#define KEYINDEX_M 0xfU +#define KEYINDEX_G(x) (((x) >> KEYINDEX_S) & KEYINDEX_M) + +#define MAPENABLE_S 31 +#define MAPENABLE_V(x) ((x) << MAPENABLE_S) +#define MAPENABLE_F MAPENABLE_V(1U) + +#define CHNENABLE_S 30 +#define CHNENABLE_V(x) ((x) << CHNENABLE_S) +#define CHNENABLE_F CHNENABLE_V(1U) + +#define PRTENABLE_S 29 +#define PRTENABLE_V(x) ((x) << PRTENABLE_S) +#define PRTENABLE_F PRTENABLE_V(1U) + +#define UDPFOURTUPEN_S 28 +#define UDPFOURTUPEN_V(x) ((x) << UDPFOURTUPEN_S) +#define UDPFOURTUPEN_F UDPFOURTUPEN_V(1U) + +#define IP6FOURTUPEN_S 27 +#define IP6FOURTUPEN_V(x) ((x) << IP6FOURTUPEN_S) +#define IP6FOURTUPEN_F IP6FOURTUPEN_V(1U) + +#define IP6TWOTUPEN_S 26 +#define IP6TWOTUPEN_V(x) ((x) << IP6TWOTUPEN_S) +#define IP6TWOTUPEN_F IP6TWOTUPEN_V(1U) + +#define IP4FOURTUPEN_S 25 +#define IP4FOURTUPEN_V(x) ((x) << IP4FOURTUPEN_S) +#define IP4FOURTUPEN_F IP4FOURTUPEN_V(1U) + +#define IP4TWOTUPEN_S 24 +#define IP4TWOTUPEN_V(x) ((x) << IP4TWOTUPEN_S) +#define IP4TWOTUPEN_F IP4TWOTUPEN_V(1U) + +#define IVFWIDTH_S 20 +#define IVFWIDTH_M 0xfU +#define IVFWIDTH_V(x) ((x) << IVFWIDTH_S) +#define IVFWIDTH_G(x) (((x) >> IVFWIDTH_S) & IVFWIDTH_M) + +#define CH1DEFAULTQUEUE_S 10 +#define CH1DEFAULTQUEUE_M 0x3ffU +#define CH1DEFAULTQUEUE_V(x) ((x) << CH1DEFAULTQUEUE_S) +#define CH1DEFAULTQUEUE_G(x) (((x) >> CH1DEFAULTQUEUE_S) & CH1DEFAULTQUEUE_M) + +#define CH0DEFAULTQUEUE_S 0 +#define CH0DEFAULTQUEUE_M 0x3ffU +#define CH0DEFAULTQUEUE_V(x) ((x) << CH0DEFAULTQUEUE_S) +#define CH0DEFAULTQUEUE_G(x) (((x) >> CH0DEFAULTQUEUE_S) & CH0DEFAULTQUEUE_M) + +#define VFLKPIDX_S 8 +#define VFLKPIDX_M 0xffU +#define VFLKPIDX_G(x) (((x) >> VFLKPIDX_S) & VFLKPIDX_M) + +#define TP_RSS_CONFIG_CNG_A 0x7e04 +#define TP_RSS_SECRET_KEY0_A 0x40 +#define TP_RSS_PF0_CONFIG_A 0x30 +#define TP_RSS_PF_MAP_A 0x38 +#define TP_RSS_PF_MSK_A 0x39 + +#define PF1LKPIDX_S 3 + +#define PF0LKPIDX_M 0x7U + +#define PF1MSKSIZE_S 4 +#define PF1MSKSIZE_M 0xfU + +#define CHNCOUNT3_S 31 +#define CHNCOUNT3_V(x) ((x) << CHNCOUNT3_S) +#define CHNCOUNT3_F CHNCOUNT3_V(1U) + +#define CHNCOUNT2_S 30 +#define CHNCOUNT2_V(x) ((x) << CHNCOUNT2_S) +#define CHNCOUNT2_F CHNCOUNT2_V(1U) + +#define CHNCOUNT1_S 29 +#define CHNCOUNT1_V(x) ((x) << CHNCOUNT1_S) +#define CHNCOUNT1_F CHNCOUNT1_V(1U) + +#define CHNCOUNT0_S 28 +#define CHNCOUNT0_V(x) ((x) << CHNCOUNT0_S) +#define CHNCOUNT0_F CHNCOUNT0_V(1U) + +#define CHNUNDFLOW3_S 27 +#define CHNUNDFLOW3_V(x) ((x) << CHNUNDFLOW3_S) +#define CHNUNDFLOW3_F CHNUNDFLOW3_V(1U) + +#define CHNUNDFLOW2_S 26 +#define CHNUNDFLOW2_V(x) ((x) << CHNUNDFLOW2_S) +#define CHNUNDFLOW2_F CHNUNDFLOW2_V(1U) + +#define CHNUNDFLOW1_S 25 +#define CHNUNDFLOW1_V(x) ((x) << CHNUNDFLOW1_S) +#define CHNUNDFLOW1_F CHNUNDFLOW1_V(1U) + +#define CHNUNDFLOW0_S 24 +#define CHNUNDFLOW0_V(x) ((x) << CHNUNDFLOW0_S) +#define CHNUNDFLOW0_F CHNUNDFLOW0_V(1U) + +#define RSTCHN3_S 19 +#define RSTCHN3_V(x) ((x) << RSTCHN3_S) +#define RSTCHN3_F RSTCHN3_V(1U) + +#define RSTCHN2_S 18 +#define RSTCHN2_V(x) ((x) << RSTCHN2_S) +#define RSTCHN2_F RSTCHN2_V(1U) + +#define RSTCHN1_S 17 +#define RSTCHN1_V(x) ((x) << RSTCHN1_S) +#define RSTCHN1_F RSTCHN1_V(1U) + +#define RSTCHN0_S 16 +#define RSTCHN0_V(x) ((x) << RSTCHN0_S) +#define RSTCHN0_F RSTCHN0_V(1U) + +#define UPDVLD_S 15 +#define UPDVLD_V(x) ((x) << UPDVLD_S) +#define UPDVLD_F UPDVLD_V(1U) + +#define XOFF_S 14 +#define XOFF_V(x) ((x) << XOFF_S) +#define XOFF_F XOFF_V(1U) + +#define UPDCHN3_S 13 +#define UPDCHN3_V(x) ((x) << UPDCHN3_S) +#define UPDCHN3_F UPDCHN3_V(1U) + +#define UPDCHN2_S 12 +#define UPDCHN2_V(x) ((x) << UPDCHN2_S) +#define UPDCHN2_F UPDCHN2_V(1U) + +#define UPDCHN1_S 11 +#define UPDCHN1_V(x) ((x) << UPDCHN1_S) +#define UPDCHN1_F UPDCHN1_V(1U) + +#define UPDCHN0_S 10 +#define UPDCHN0_V(x) ((x) << UPDCHN0_S) +#define UPDCHN0_F UPDCHN0_V(1U) + +#define QUEUE_S 0 +#define QUEUE_M 0x3ffU +#define QUEUE_V(x) ((x) << QUEUE_S) +#define QUEUE_G(x) (((x) >> QUEUE_S) & QUEUE_M) + #define MPS_TRC_INT_CAUSE_A 0x985c #define MISCPERR_S 8 diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index de8283324f1f..1e72cda5eb1a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1062,6 +1062,7 @@ enum fw_params_param_dev { FW_PARAMS_PARAM_DEV_MAXORDIRD_QP = 0x13, /* max supported QP IRD/ORD */ FW_PARAMS_PARAM_DEV_MAXIRD_ADAPTER = 0x14, /* max supported adap IRD */ FW_PARAMS_PARAM_DEV_ULPTX_MEMWRITE_DSGL = 0x17, + FW_PARAMS_PARAM_DEV_FWCACHE = 0x18, }; /* @@ -1121,6 +1122,11 @@ enum fw_params_param_dmaq { FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH = 0x13, }; +enum fw_params_param_dev_fwcache { + FW_PARAM_DEV_FWCACHE_FLUSH = 0x00, + FW_PARAM_DEV_FWCACHE_FLUSHINV = 0x01, +}; + #define FW_PARAMS_MNEM_S 24 #define FW_PARAMS_MNEM_V(x) ((x) << FW_PARAMS_MNEM_S) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index fead5c65a4f0..4bd425ea3421 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -573,7 +573,7 @@ static int lancer_wait_ready(struct be_adapter *adapter) { #define SLIPORT_READY_TIMEOUT 30 u32 sliport_status; - int status = 0, i; + int i; for (i = 0; i < SLIPORT_READY_TIMEOUT; i++) { sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET); @@ -584,9 +584,9 @@ static int lancer_wait_ready(struct be_adapter *adapter) } if (i == SLIPORT_READY_TIMEOUT) - status = -1; + return sliport_status ? : -1; - return status; + return 0; } static bool lancer_provisioning_error(struct be_adapter *adapter) @@ -624,7 +624,7 @@ int lancer_test_and_set_rdy_state(struct be_adapter *adapter) iowrite32(SLI_PORT_CONTROL_IP_MASK, adapter->db + SLIPORT_CONTROL_OFFSET); - /* check adapter has corrected the error */ + /* check if adapter has corrected the error */ status = lancer_wait_ready(adapter); sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET); @@ -655,7 +655,11 @@ int be_fw_wait_ready(struct be_adapter *adapter) if (lancer_chip(adapter)) { status = lancer_wait_ready(adapter); - return status; + if (status) { + stage = status; + goto err; + } + return 0; } do { @@ -671,7 +675,8 @@ int be_fw_wait_ready(struct be_adapter *adapter) timeout += 2; } while (timeout < 60); - dev_err(dev, "POST timeout; stage=0x%x\n", stage); +err: + dev_err(dev, "POST timeout; stage=%#x\n", stage); return -1; } @@ -3751,6 +3756,7 @@ int be_cmd_config_qos(struct be_adapter *adapter, u32 max_rate, u16 link_speed, be_reset_nic_desc(&nic_desc); nic_desc.pf_num = adapter->pf_number; nic_desc.vf_num = domain; + nic_desc.bw_min = 0; if (lancer_chip(adapter)) { nic_desc.hdr.desc_type = NIC_RESOURCE_DESC_TYPE_V0; nic_desc.hdr.desc_len = RESOURCE_DESC_SIZE_V0; @@ -4092,7 +4098,7 @@ int be_cmd_set_logical_link_config(struct be_adapter *adapter, int status; if (BEx_chip(adapter) || lancer_chip(adapter)) - return 0; + return -EOPNOTSUPP; spin_lock_bh(&adapter->mcc_lock); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index eb5085d6794f..c2701ccd0a1d 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -1161,7 +1161,167 @@ struct be_cmd_resp_get_beacon_state { u8 rsvd0[3]; } __packed; +/* Flashrom related descriptors */ +#define MAX_FLASH_COMP 32 + +#define OPTYPE_ISCSI_ACTIVE 0 +#define OPTYPE_REDBOOT 1 +#define OPTYPE_BIOS 2 +#define OPTYPE_PXE_BIOS 3 +#define OPTYPE_FCOE_BIOS 8 +#define OPTYPE_ISCSI_BACKUP 9 +#define OPTYPE_FCOE_FW_ACTIVE 10 +#define OPTYPE_FCOE_FW_BACKUP 11 +#define OPTYPE_NCSI_FW 13 +#define OPTYPE_REDBOOT_DIR 18 +#define OPTYPE_REDBOOT_CONFIG 19 +#define OPTYPE_SH_PHY_FW 21 +#define OPTYPE_FLASHISM_JUMPVECTOR 22 +#define OPTYPE_UFI_DIR 23 +#define OPTYPE_PHY_FW 99 + +#define FLASH_BIOS_IMAGE_MAX_SIZE_g2 262144 /* Max OPTION ROM image sz */ +#define FLASH_REDBOOT_IMAGE_MAX_SIZE_g2 262144 /* Max Redboot image sz */ +#define FLASH_IMAGE_MAX_SIZE_g2 1310720 /* Max firmware image size */ + +#define FLASH_NCSI_IMAGE_MAX_SIZE_g3 262144 +#define FLASH_PHY_FW_IMAGE_MAX_SIZE_g3 262144 +#define FLASH_BIOS_IMAGE_MAX_SIZE_g3 524288 /* Max OPTION ROM image sz */ +#define FLASH_REDBOOT_IMAGE_MAX_SIZE_g3 1048576 /* Max Redboot image sz */ +#define FLASH_IMAGE_MAX_SIZE_g3 2097152 /* Max firmware image size */ + +/* Offsets for components on Flash. */ +#define FLASH_REDBOOT_START_g2 0 +#define FLASH_FCoE_BIOS_START_g2 524288 +#define FLASH_iSCSI_PRIMARY_IMAGE_START_g2 1048576 +#define FLASH_iSCSI_BACKUP_IMAGE_START_g2 2359296 +#define FLASH_FCoE_PRIMARY_IMAGE_START_g2 3670016 +#define FLASH_FCoE_BACKUP_IMAGE_START_g2 4980736 +#define FLASH_iSCSI_BIOS_START_g2 7340032 +#define FLASH_PXE_BIOS_START_g2 7864320 + +#define FLASH_REDBOOT_START_g3 262144 +#define FLASH_PHY_FW_START_g3 1310720 +#define FLASH_iSCSI_PRIMARY_IMAGE_START_g3 2097152 +#define FLASH_iSCSI_BACKUP_IMAGE_START_g3 4194304 +#define FLASH_FCoE_PRIMARY_IMAGE_START_g3 6291456 +#define FLASH_FCoE_BACKUP_IMAGE_START_g3 8388608 +#define FLASH_iSCSI_BIOS_START_g3 12582912 +#define FLASH_PXE_BIOS_START_g3 13107200 +#define FLASH_FCoE_BIOS_START_g3 13631488 +#define FLASH_NCSI_START_g3 15990784 + +#define IMAGE_NCSI 16 +#define IMAGE_OPTION_ROM_PXE 32 +#define IMAGE_OPTION_ROM_FCoE 33 +#define IMAGE_OPTION_ROM_ISCSI 34 +#define IMAGE_FLASHISM_JUMPVECTOR 48 +#define IMAGE_FIRMWARE_iSCSI 160 +#define IMAGE_FIRMWARE_FCoE 162 +#define IMAGE_FIRMWARE_BACKUP_iSCSI 176 +#define IMAGE_FIRMWARE_BACKUP_FCoE 178 +#define IMAGE_FIRMWARE_PHY 192 +#define IMAGE_REDBOOT_DIR 208 +#define IMAGE_REDBOOT_CONFIG 209 +#define IMAGE_UFI_DIR 210 +#define IMAGE_BOOT_CODE 224 + +struct controller_id { + u32 vendor; + u32 device; + u32 subvendor; + u32 subdevice; +}; + +struct flash_comp { + unsigned long offset; + int optype; + int size; + int img_type; +}; + +struct image_hdr { + u32 imageid; + u32 imageoffset; + u32 imagelength; + u32 image_checksum; + u8 image_version[32]; +}; + +struct flash_file_hdr_g2 { + u8 sign[32]; + u32 cksum; + u32 antidote; + struct controller_id cont_id; + u32 file_len; + u32 chunk_num; + u32 total_chunks; + u32 num_imgs; + u8 build[24]; +}; + +struct flash_file_hdr_g3 { + u8 sign[52]; + u8 ufi_version[4]; + u32 file_len; + u32 cksum; + u32 antidote; + u32 num_imgs; + u8 build[24]; + u8 asic_type_rev; + u8 rsvd[31]; +}; + +struct flash_section_hdr { + u32 format_rev; + u32 cksum; + u32 antidote; + u32 num_images; + u8 id_string[128]; + u32 rsvd[4]; +} __packed; + +struct flash_section_hdr_g2 { + u32 format_rev; + u32 cksum; + u32 antidote; + u32 build_num; + u8 id_string[128]; + u32 rsvd[8]; +} __packed; + +struct flash_section_entry { + u32 type; + u32 offset; + u32 pad_size; + u32 image_size; + u32 cksum; + u32 entry_point; + u16 optype; + u16 rsvd0; + u32 rsvd1; + u8 ver_data[32]; +} __packed; + +struct flash_section_info { + u8 cookie[32]; + struct flash_section_hdr fsec_hdr; + struct flash_section_entry fsec_entry[32]; +} __packed; + +struct flash_section_info_g2 { + u8 cookie[32]; + struct flash_section_hdr_g2 fsec_hdr; + struct flash_section_entry fsec_entry[32]; +} __packed; + /****************** Firmware Flash ******************/ +#define FLASHROM_OPER_FLASH 1 +#define FLASHROM_OPER_SAVE 2 +#define FLASHROM_OPER_REPORT 4 +#define FLASHROM_OPER_PHY_FLASH 9 +#define FLASHROM_OPER_PHY_SAVE 10 + struct flashrom_params { u32 op_code; u32 op_type; @@ -1366,6 +1526,7 @@ enum { PHY_TYPE_QSFP, PHY_TYPE_KR4_40GB, PHY_TYPE_KR2_20GB, + PHY_TYPE_TN_8022, PHY_TYPE_DISABLED = 255 }; @@ -1429,6 +1590,20 @@ struct be_cmd_req_set_qos { }; /*********************** Controller Attributes ***********************/ +struct mgmt_hba_attribs { + u32 rsvd0[24]; + u8 controller_model_number[32]; + u32 rsvd1[79]; + u8 rsvd2[3]; + u8 phy_port; + u32 rsvd3[13]; +} __packed; + +struct mgmt_controller_attrib { + struct mgmt_hba_attribs hba_attribs; + u32 rsvd0[10]; +} __packed; + struct be_cmd_req_cntl_attribs { struct be_cmd_req_hdr hdr; }; diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 32c53bc0e07a..4d2de4700769 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -705,15 +705,17 @@ be_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *ecmd) if (ecmd->autoneg != adapter->phy.fc_autoneg) return -EINVAL; - adapter->tx_fc = ecmd->tx_pause; - adapter->rx_fc = ecmd->rx_pause; - status = be_cmd_set_flow_control(adapter, - adapter->tx_fc, adapter->rx_fc); - if (status) + status = be_cmd_set_flow_control(adapter, ecmd->tx_pause, + ecmd->rx_pause); + if (status) { dev_warn(&adapter->pdev->dev, "Pause param set failed\n"); + return be_cmd_status(status); + } - return be_cmd_status(status); + adapter->tx_fc = ecmd->tx_pause; + adapter->rx_fc = ecmd->rx_pause; + return 0; } static int be_set_phys_id(struct net_device *netdev, diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h index 6d7b3a4d3cff..8e91ae851a7c 100644 --- a/drivers/net/ethernet/emulex/benet/be_hw.h +++ b/drivers/net/ethernet/emulex/benet/be_hw.h @@ -171,94 +171,6 @@ #define RETRIEVE_FAT 0 #define QUERY_FAT 1 -/* Flashrom related descriptors */ -#define MAX_FLASH_COMP 32 -#define IMAGE_TYPE_FIRMWARE 160 -#define IMAGE_TYPE_BOOTCODE 224 -#define IMAGE_TYPE_OPTIONROM 32 - -#define NUM_FLASHDIR_ENTRIES 32 - -#define OPTYPE_ISCSI_ACTIVE 0 -#define OPTYPE_REDBOOT 1 -#define OPTYPE_BIOS 2 -#define OPTYPE_PXE_BIOS 3 -#define OPTYPE_FCOE_BIOS 8 -#define OPTYPE_ISCSI_BACKUP 9 -#define OPTYPE_FCOE_FW_ACTIVE 10 -#define OPTYPE_FCOE_FW_BACKUP 11 -#define OPTYPE_NCSI_FW 13 -#define OPTYPE_REDBOOT_DIR 18 -#define OPTYPE_REDBOOT_CONFIG 19 -#define OPTYPE_SH_PHY_FW 21 -#define OPTYPE_FLASHISM_JUMPVECTOR 22 -#define OPTYPE_UFI_DIR 23 -#define OPTYPE_PHY_FW 99 -#define TN_8022 13 - -#define FLASHROM_OPER_PHY_FLASH 9 -#define FLASHROM_OPER_PHY_SAVE 10 -#define FLASHROM_OPER_FLASH 1 -#define FLASHROM_OPER_SAVE 2 -#define FLASHROM_OPER_REPORT 4 - -#define FLASH_IMAGE_MAX_SIZE_g2 (1310720) /* Max firmware image size */ -#define FLASH_BIOS_IMAGE_MAX_SIZE_g2 (262144) /* Max OPTION ROM image sz */ -#define FLASH_REDBOOT_IMAGE_MAX_SIZE_g2 (262144) /* Max Redboot image sz */ -#define FLASH_IMAGE_MAX_SIZE_g3 (2097152) /* Max firmware image size */ -#define FLASH_BIOS_IMAGE_MAX_SIZE_g3 (524288) /* Max OPTION ROM image sz */ -#define FLASH_REDBOOT_IMAGE_MAX_SIZE_g3 (1048576) /* Max Redboot image sz */ -#define FLASH_NCSI_IMAGE_MAX_SIZE_g3 (262144) -#define FLASH_PHY_FW_IMAGE_MAX_SIZE_g3 262144 - -#define FLASH_NCSI_MAGIC (0x16032009) -#define FLASH_NCSI_DISABLED (0) -#define FLASH_NCSI_ENABLED (1) - -#define FLASH_NCSI_BITFILE_HDR_OFFSET (0x600000) - -/* Offsets for components on Flash. */ -#define FLASH_iSCSI_PRIMARY_IMAGE_START_g2 (1048576) -#define FLASH_iSCSI_BACKUP_IMAGE_START_g2 (2359296) -#define FLASH_FCoE_PRIMARY_IMAGE_START_g2 (3670016) -#define FLASH_FCoE_BACKUP_IMAGE_START_g2 (4980736) -#define FLASH_iSCSI_BIOS_START_g2 (7340032) -#define FLASH_PXE_BIOS_START_g2 (7864320) -#define FLASH_FCoE_BIOS_START_g2 (524288) -#define FLASH_REDBOOT_START_g2 (0) - -#define FLASH_NCSI_START_g3 (15990784) -#define FLASH_iSCSI_PRIMARY_IMAGE_START_g3 (2097152) -#define FLASH_iSCSI_BACKUP_IMAGE_START_g3 (4194304) -#define FLASH_FCoE_PRIMARY_IMAGE_START_g3 (6291456) -#define FLASH_FCoE_BACKUP_IMAGE_START_g3 (8388608) -#define FLASH_iSCSI_BIOS_START_g3 (12582912) -#define FLASH_PXE_BIOS_START_g3 (13107200) -#define FLASH_FCoE_BIOS_START_g3 (13631488) -#define FLASH_REDBOOT_START_g3 (262144) -#define FLASH_PHY_FW_START_g3 1310720 - -#define IMAGE_NCSI 16 -#define IMAGE_OPTION_ROM_PXE 32 -#define IMAGE_OPTION_ROM_FCoE 33 -#define IMAGE_OPTION_ROM_ISCSI 34 -#define IMAGE_FLASHISM_JUMPVECTOR 48 -#define IMAGE_FLASH_ISM 49 -#define IMAGE_JUMP_VECTOR 50 -#define IMAGE_FIRMWARE_iSCSI 160 -#define IMAGE_FIRMWARE_COMP_iSCSI 161 -#define IMAGE_FIRMWARE_FCoE 162 -#define IMAGE_FIRMWARE_COMP_FCoE 163 -#define IMAGE_FIRMWARE_BACKUP_iSCSI 176 -#define IMAGE_FIRMWARE_BACKUP_COMP_iSCSI 177 -#define IMAGE_FIRMWARE_BACKUP_FCoE 178 -#define IMAGE_FIRMWARE_BACKUP_COMP_FCoE 179 -#define IMAGE_FIRMWARE_PHY 192 -#define IMAGE_REDBOOT_DIR 208 -#define IMAGE_REDBOOT_CONFIG 209 -#define IMAGE_UFI_DIR 210 -#define IMAGE_BOOT_CODE 224 - /************* Rx Packet Type Encoding **************/ #define BE_UNICAST_PACKET 0 #define BE_MULTICAST_PACKET 1 @@ -440,138 +352,3 @@ struct amap_eth_rx_compl_v1 { struct be_eth_rx_compl { u32 dw[4]; }; - -struct mgmt_hba_attribs { - u8 flashrom_version_string[32]; - u8 manufacturer_name[32]; - u32 supported_modes; - u32 rsvd0[3]; - u8 ncsi_ver_string[12]; - u32 default_extended_timeout; - u8 controller_model_number[32]; - u8 controller_description[64]; - u8 controller_serial_number[32]; - u8 ip_version_string[32]; - u8 firmware_version_string[32]; - u8 bios_version_string[32]; - u8 redboot_version_string[32]; - u8 driver_version_string[32]; - u8 fw_on_flash_version_string[32]; - u32 functionalities_supported; - u16 max_cdblength; - u8 asic_revision; - u8 generational_guid[16]; - u8 hba_port_count; - u16 default_link_down_timeout; - u8 iscsi_ver_min_max; - u8 multifunction_device; - u8 cache_valid; - u8 hba_status; - u8 max_domains_supported; - u8 phy_port; - u32 firmware_post_status; - u32 hba_mtu[8]; - u32 rsvd1[4]; -}; - -struct mgmt_controller_attrib { - struct mgmt_hba_attribs hba_attribs; - u16 pci_vendor_id; - u16 pci_device_id; - u16 pci_sub_vendor_id; - u16 pci_sub_system_id; - u8 pci_bus_number; - u8 pci_device_number; - u8 pci_function_number; - u8 interface_type; - u64 unique_identifier; - u32 rsvd0[5]; -}; - -struct controller_id { - u32 vendor; - u32 device; - u32 subvendor; - u32 subdevice; -}; - -struct flash_comp { - unsigned long offset; - int optype; - int size; - int img_type; -}; - -struct image_hdr { - u32 imageid; - u32 imageoffset; - u32 imagelength; - u32 image_checksum; - u8 image_version[32]; -}; -struct flash_file_hdr_g2 { - u8 sign[32]; - u32 cksum; - u32 antidote; - struct controller_id cont_id; - u32 file_len; - u32 chunk_num; - u32 total_chunks; - u32 num_imgs; - u8 build[24]; -}; - -struct flash_file_hdr_g3 { - u8 sign[52]; - u8 ufi_version[4]; - u32 file_len; - u32 cksum; - u32 antidote; - u32 num_imgs; - u8 build[24]; - u8 asic_type_rev; - u8 rsvd[31]; -}; - -struct flash_section_hdr { - u32 format_rev; - u32 cksum; - u32 antidote; - u32 num_images; - u8 id_string[128]; - u32 rsvd[4]; -} __packed; - -struct flash_section_hdr_g2 { - u32 format_rev; - u32 cksum; - u32 antidote; - u32 build_num; - u8 id_string[128]; - u32 rsvd[8]; -} __packed; - -struct flash_section_entry { - u32 type; - u32 offset; - u32 pad_size; - u32 image_size; - u32 cksum; - u32 entry_point; - u16 optype; - u16 rsvd0; - u32 rsvd1; - u8 ver_data[32]; -} __packed; - -struct flash_section_info { - u8 cookie[32]; - struct flash_section_hdr fsec_hdr; - struct flash_section_entry fsec_entry[32]; -} __packed; - -struct flash_section_info_g2 { - u8 cookie[32]; - struct flash_section_hdr_g2 fsec_hdr; - struct flash_section_entry fsec_entry[32]; -} __packed; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index ed46610e5453..6c10fece1245 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -3183,13 +3183,32 @@ static int be_clear(struct be_adapter *adapter) return 0; } +static int be_if_create(struct be_adapter *adapter, u32 *if_handle, + u32 cap_flags, u32 vf) +{ + u32 en_flags; + int status; + + en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | + BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS | + BE_IF_FLAGS_RSS; + + en_flags &= cap_flags; + + status = be_cmd_if_create(adapter, cap_flags, en_flags, + if_handle, vf); + + return status; +} + static int be_vfs_if_create(struct be_adapter *adapter) { struct be_resources res = {0}; struct be_vf_cfg *vf_cfg; - u32 cap_flags, en_flags, vf; - int status = 0; + u32 cap_flags, vf; + int status; + /* If a FW profile exists, then cap_flags are updated */ cap_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_MULTICAST; @@ -3201,18 +3220,13 @@ static int be_vfs_if_create(struct be_adapter *adapter) cap_flags = res.if_cap_flags; } - /* If a FW profile exists, then cap_flags are updated */ - en_flags = cap_flags & (BE_IF_FLAGS_UNTAGGED | - BE_IF_FLAGS_BROADCAST | - BE_IF_FLAGS_MULTICAST); - status = - be_cmd_if_create(adapter, cap_flags, en_flags, - &vf_cfg->if_handle, vf + 1); + status = be_if_create(adapter, &vf_cfg->if_handle, + cap_flags, vf + 1); if (status) - goto err; + return status; } -err: - return status; + + return 0; } static int be_vf_setup_init(struct be_adapter *adapter) @@ -3653,7 +3667,6 @@ int be_update_queues(struct be_adapter *adapter) static int be_setup(struct be_adapter *adapter) { struct device *dev = &adapter->pdev->dev; - u32 tx_fc, rx_fc, en_flags; int status; be_setup_init(adapter); @@ -3669,13 +3682,8 @@ static int be_setup(struct be_adapter *adapter) if (status) goto err; - en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | - BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS; - if (adapter->function_caps & BE_FUNCTION_CAPS_RSS) - en_flags |= BE_IF_FLAGS_RSS; - en_flags = en_flags & be_if_cap_flags(adapter); - status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags, - &adapter->if_handle, 0); + status = be_if_create(adapter, &adapter->if_handle, + be_if_cap_flags(adapter), 0); if (status) goto err; @@ -3708,11 +3716,14 @@ static int be_setup(struct be_adapter *adapter) be_cmd_get_acpi_wol_cap(adapter); - be_cmd_get_flow_control(adapter, &tx_fc, &rx_fc); + status = be_cmd_set_flow_control(adapter, adapter->tx_fc, + adapter->rx_fc); + if (status) + be_cmd_get_flow_control(adapter, &adapter->tx_fc, + &adapter->rx_fc); - if (rx_fc != adapter->rx_fc || tx_fc != adapter->tx_fc) - be_cmd_set_flow_control(adapter, adapter->tx_fc, - adapter->rx_fc); + dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n", + adapter->tx_fc, adapter->rx_fc); if (be_physfn(adapter)) be_cmd_set_logical_link_config(adapter, @@ -3751,7 +3762,7 @@ static char flash_cookie[2][16] = {"*** SE FLAS", "H DIRECTORY *** "}; static bool phy_flashing_required(struct be_adapter *adapter) { - return (adapter->phy.phy_type == TN_8022 && + return (adapter->phy.phy_type == PHY_TYPE_TN_8022 && adapter->phy.interface_type == PHY_TYPE_BASET_10GB); } @@ -5060,6 +5071,10 @@ static int be_resume(struct pci_dev *pdev) if (status) return status; + status = be_cmd_reset_function(adapter); + if (status) + return status; + be_intr_set(adapter, true); /* tell fw we're ready to fire cmds */ status = be_cmd_fw_init(adapter); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1c7a7e43dd9c..58cabee00abf 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1189,12 +1189,13 @@ static void fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) { struct fec_enet_private *fep; - struct bufdesc *bdp; + struct bufdesc *bdp, *bdp_t; unsigned short status; struct sk_buff *skb; struct fec_enet_priv_tx_q *txq; struct netdev_queue *nq; int index = 0; + int i, bdnum; int entries_free; fep = netdev_priv(ndev); @@ -1215,18 +1216,29 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) if (bdp == txq->cur_tx) break; - index = fec_enet_get_bd_index(txq->tx_bd_base, bdp, fep); - + bdp_t = bdp; + bdnum = 1; + index = fec_enet_get_bd_index(txq->tx_bd_base, bdp_t, fep); skb = txq->tx_skbuff[index]; - txq->tx_skbuff[index] = NULL; - if (!IS_TSO_HEADER(txq, bdp->cbd_bufaddr)) - dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, - bdp->cbd_datlen, DMA_TO_DEVICE); - bdp->cbd_bufaddr = 0; - if (!skb) { - bdp = fec_enet_get_nextdesc(bdp, fep, queue_id); - continue; + while (!skb) { + bdp_t = fec_enet_get_nextdesc(bdp_t, fep, queue_id); + index = fec_enet_get_bd_index(txq->tx_bd_base, bdp_t, fep); + skb = txq->tx_skbuff[index]; + bdnum++; } + if (skb_shinfo(skb)->nr_frags && + (status = bdp_t->cbd_sc) & BD_ENET_TX_READY) + break; + + for (i = 0; i < bdnum; i++) { + if (!IS_TSO_HEADER(txq, bdp->cbd_bufaddr)) + dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, + bdp->cbd_datlen, DMA_TO_DEVICE); + bdp->cbd_bufaddr = 0; + if (i < bdnum - 1) + bdp = fec_enet_get_nextdesc(bdp, fep, queue_id); + } + txq->tx_skbuff[index] = NULL; /* Check for errors. */ if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC | diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index 3a76e235fff6..3a83bc2c613c 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -52,12 +52,16 @@ struct tgec_mdio_controller { static int xgmac_wait_until_free(struct device *dev, struct tgec_mdio_controller __iomem *regs) { - uint32_t status; + unsigned int timeout; /* Wait till the bus is free */ - status = spin_event_timeout( - !((in_be32(®s->mdio_stat)) & MDIO_STAT_BSY), TIMEOUT, 0); - if (!status) { + timeout = TIMEOUT; + while ((ioread32be(®s->mdio_stat) & MDIO_STAT_BSY) && timeout) { + cpu_relax(); + timeout--; + } + + if (!timeout) { dev_err(dev, "timeout waiting for bus to be free\n"); return -ETIMEDOUT; } @@ -71,12 +75,16 @@ static int xgmac_wait_until_free(struct device *dev, static int xgmac_wait_until_done(struct device *dev, struct tgec_mdio_controller __iomem *regs) { - uint32_t status; + unsigned int timeout; /* Wait till the MDIO write is complete */ - status = spin_event_timeout( - !((in_be32(®s->mdio_data)) & MDIO_DATA_BSY), TIMEOUT, 0); - if (!status) { + timeout = TIMEOUT; + while ((ioread32be(®s->mdio_data) & MDIO_DATA_BSY) && timeout) { + cpu_relax(); + timeout--; + } + + if (!timeout) { dev_err(dev, "timeout waiting for operation to complete\n"); return -ETIMEDOUT; } @@ -96,7 +104,7 @@ static int xgmac_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 val u32 mdio_ctl, mdio_stat; int ret; - mdio_stat = in_be32(®s->mdio_stat); + mdio_stat = ioread32be(®s->mdio_stat); if (regnum & MII_ADDR_C45) { /* Clause 45 (ie 10G) */ dev_addr = (regnum >> 16) & 0x1f; @@ -107,7 +115,7 @@ static int xgmac_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 val mdio_stat &= ~MDIO_STAT_ENC; } - out_be32(®s->mdio_stat, mdio_stat); + iowrite32be(mdio_stat, ®s->mdio_stat); ret = xgmac_wait_until_free(&bus->dev, regs); if (ret) @@ -115,11 +123,11 @@ static int xgmac_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 val /* Set the port and dev addr */ mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr); - out_be32(®s->mdio_ctl, mdio_ctl); + iowrite32be(mdio_ctl, ®s->mdio_ctl); /* Set the register address */ if (regnum & MII_ADDR_C45) { - out_be32(®s->mdio_addr, regnum & 0xffff); + iowrite32be(regnum & 0xffff, ®s->mdio_addr); ret = xgmac_wait_until_free(&bus->dev, regs); if (ret) @@ -127,7 +135,7 @@ static int xgmac_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 val } /* Write the value to the register */ - out_be32(®s->mdio_data, MDIO_DATA(value)); + iowrite32be(MDIO_DATA(value), ®s->mdio_data); ret = xgmac_wait_until_done(&bus->dev, regs); if (ret) @@ -150,7 +158,7 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) uint16_t value; int ret; - mdio_stat = in_be32(®s->mdio_stat); + mdio_stat = ioread32be(®s->mdio_stat); if (regnum & MII_ADDR_C45) { dev_addr = (regnum >> 16) & 0x1f; mdio_stat |= MDIO_STAT_ENC; @@ -159,7 +167,7 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) mdio_stat &= ~MDIO_STAT_ENC; } - out_be32(®s->mdio_stat, mdio_stat); + iowrite32be(mdio_stat, ®s->mdio_stat); ret = xgmac_wait_until_free(&bus->dev, regs); if (ret) @@ -167,11 +175,11 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) /* Set the Port and Device Addrs */ mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr); - out_be32(®s->mdio_ctl, mdio_ctl); + iowrite32be(mdio_ctl, ®s->mdio_ctl); /* Set the register address */ if (regnum & MII_ADDR_C45) { - out_be32(®s->mdio_addr, regnum & 0xffff); + iowrite32be(regnum & 0xffff, ®s->mdio_addr); ret = xgmac_wait_until_free(&bus->dev, regs); if (ret) @@ -179,21 +187,21 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) } /* Initiate the read */ - out_be32(®s->mdio_ctl, mdio_ctl | MDIO_CTL_READ); + iowrite32be(mdio_ctl | MDIO_CTL_READ, ®s->mdio_ctl); ret = xgmac_wait_until_done(&bus->dev, regs); if (ret) return ret; /* Return all Fs if nothing was there */ - if (in_be32(®s->mdio_stat) & MDIO_STAT_RD_ER) { + if (ioread32be(®s->mdio_stat) & MDIO_STAT_RD_ER) { dev_err(&bus->dev, "Error while reading PHY%d reg at %d.%hhu\n", phy_id, dev_addr, regnum); return 0xffff; } - value = in_be32(®s->mdio_data) & 0xffff; + value = ioread32be(®s->mdio_data) & 0xffff; dev_dbg(&bus->dev, "read %04x\n", value); return value; diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index b691eb4f6376..4270ad2d4ddf 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -24,6 +24,7 @@ /* ethtool support for e1000 */ #include "e1000.h" +#include <linux/jiffies.h> #include <linux/uaccess.h> enum {NETDEV_STATS, E1000_STATS}; @@ -1460,7 +1461,7 @@ static int e1000_run_loopback_test(struct e1000_adapter *adapter) ret_val = 13; /* ret_val is the same as mis-compare */ break; } - if (jiffies >= (time + 2)) { + if (time_after_eq(jiffies, time + 2)) { ret_val = 14; /* error code for time out error */ break; } diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 9242982db3e0..7f997d36948f 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -2977,7 +2977,6 @@ static void e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, int tx_flags, int count) { - struct e1000_hw *hw = &adapter->hw; struct e1000_tx_desc *tx_desc = NULL; struct e1000_tx_buffer *buffer_info; u32 txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS; @@ -3031,11 +3030,6 @@ static void e1000_tx_queue(struct e1000_adapter *adapter, wmb(); tx_ring->next_to_use = i; - writel(i, hw->hw_addr + tx_ring->tdt); - /* we need this if more than one processor can write to our tail - * at a time, it synchronizes IO on IA64/Altix systems - */ - mmiowb(); } /* 82547 workaround to avoid controller hang in half-duplex environment. @@ -3264,6 +3258,15 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, /* Make sure there is space in the ring for the next send. */ e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2); + if (!skb->xmit_more || + netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) { + writel(tx_ring->next_to_use, hw->hw_addr + tx_ring->tdt); + /* we need this if more than one processor can write to + * our tail at a time, it synchronizes IO on IA64/Altix + * systems + */ + mmiowb(); + } } else { dev_kfree_skb_any(skb); tx_ring->buffer_info[first].time_stamp = 0; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 38cb586b1bf4..1e8c40fd5c3d 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5444,16 +5444,6 @@ static void e1000_tx_queue(struct e1000_ring *tx_ring, int tx_flags, int count) wmb(); tx_ring->next_to_use = i; - - if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) - e1000e_update_tdt_wa(tx_ring, i); - else - writel(i, tx_ring->tail); - - /* we need this if more than one processor can write to our tail - * at a time, it synchronizes IO on IA64/Altix systems - */ - mmiowb(); } #define MINIMUM_DHCP_PACKET_SIZE 282 @@ -5636,8 +5626,9 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, count = e1000_tx_map(tx_ring, skb, first, adapter->tx_fifo_limit, nr_frags); if (count) { - if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - !adapter->tx_hwtstamp_skb)) { + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + (adapter->flags & FLAG_HAS_HW_TIMESTAMP) && + !adapter->tx_hwtstamp_skb) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= E1000_TX_FLAGS_HWTSTAMP; adapter->tx_hwtstamp_skb = skb_get(skb); @@ -5654,6 +5645,21 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, (MAX_SKB_FRAGS * DIV_ROUND_UP(PAGE_SIZE, adapter->tx_fifo_limit) + 2)); + + if (!skb->xmit_more || + netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) { + if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) + e1000e_update_tdt_wa(tx_ring, + tx_ring->next_to_use); + else + writel(tx_ring->next_to_use, tx_ring->tail); + + /* we need this if more than one processor can write + * to our tail at a time, it synchronizes IO on + *IA64/Altix systems + */ + mmiowb(); + } } else { dev_kfree_skb_any(skb); tx_ring->buffer_info[first].time_stamp = 0; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index caa43f7c2931..84ab9eea2768 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -97,7 +97,6 @@ static bool fm10k_alloc_mapped_page(struct fm10k_ring *rx_ring, */ if (dma_mapping_error(rx_ring->dev, dma)) { __free_page(page); - bi->page = NULL; rx_ring->rx_stats.alloc_failed++; return false; @@ -147,8 +146,8 @@ void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count) i -= rx_ring->count; } - /* clear the hdr_addr for the next_to_use descriptor */ - rx_desc->q.hdr_addr = 0; + /* clear the status bits for the next_to_use descriptor */ + rx_desc->d.staterr = 0; cleaned_count--; } while (cleaned_count); @@ -194,7 +193,7 @@ static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring, rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; /* transfer page from old buffer to new buffer */ - memcpy(new_buff, old_buff, sizeof(struct fm10k_rx_buffer)); + *new_buff = *old_buff; /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma, @@ -203,12 +202,17 @@ static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring, DMA_FROM_DEVICE); } +static inline bool fm10k_page_is_reserved(struct page *page) +{ + return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc; +} + static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer, struct page *page, unsigned int truesize) { /* avoid re-using remote pages */ - if (unlikely(page_to_nid(page) != numa_mem_id())) + if (unlikely(fm10k_page_is_reserved(page))) return false; #if (PAGE_SIZE < 8192) @@ -218,22 +222,19 @@ static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer, /* flip page offset to other buffer */ rx_buffer->page_offset ^= FM10K_RX_BUFSZ; - - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - atomic_inc(&page->_count); #else /* move offset up to the next cache line */ rx_buffer->page_offset += truesize; if (rx_buffer->page_offset > (PAGE_SIZE - FM10K_RX_BUFSZ)) return false; - - /* bump ref count on page before it is given to the stack */ - get_page(page); #endif + /* Even if we own the page, we are not allowed to use atomic_set() + * This would break get_page_unless_zero() users. + */ + atomic_inc(&page->_count); + return true; } @@ -270,12 +271,12 @@ static bool fm10k_add_rx_frag(struct fm10k_ring *rx_ring, memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - /* we can reuse buffer as-is, just make sure it is local */ - if (likely(page_to_nid(page) == numa_mem_id())) + /* page is not reserved, we can reuse buffer as-is */ + if (likely(!fm10k_page_is_reserved(page))) return true; /* this page cannot be reused so discard it */ - put_page(page); + __free_page(page); return false; } @@ -293,7 +294,6 @@ static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring, struct page *page; rx_buffer = &rx_ring->rx_buffer[rx_ring->next_to_clean]; - page = rx_buffer->page; prefetchw(page); @@ -727,6 +727,12 @@ static __be16 fm10k_tx_encap_offload(struct sk_buff *skb) struct ethhdr *eth_hdr; u8 l4_hdr = 0; +/* fm10k supports 184 octets of outer+inner headers. Minus 20 for inner L4. */ +#define FM10K_MAX_ENCAP_TRANSPORT_OFFSET 164 + if (skb_inner_transport_header(skb) - skb_mac_header(skb) > + FM10K_MAX_ENCAP_TRANSPORT_OFFSET) + return 0; + switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP): l4_hdr = ip_hdr(skb)->protocol; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 945b35d31c71..cfde8bac1aeb 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1414,13 +1414,12 @@ struct net_device *fm10k_alloc_netdev(void) dev->vlan_features |= dev->features; /* configure tunnel offloads */ - dev->hw_enc_features = NETIF_F_IP_CSUM | - NETIF_F_TSO | - NETIF_F_TSO6 | - NETIF_F_TSO_ECN | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_IPV6_CSUM | - NETIF_F_SG; + dev->hw_enc_features |= NETIF_F_IP_CSUM | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_TSO_ECN | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_IPV6_CSUM; /* we want to leave these both on as we cannot disable VLAN tag * insertion or stripping on the hardware since it is contained diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h index 280296f29154..7c6d9d5a8ae5 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h @@ -354,7 +354,7 @@ struct fm10k_hw; /* Define timeouts for resets and disables */ #define FM10K_QUEUE_DISABLE_TIMEOUT 100 -#define FM10K_RESET_TIMEOUT 100 +#define FM10K_RESET_TIMEOUT 150 /* VF registers */ #define FM10K_VFCTRL 0x00000 diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index ee22da391474..c2bd4f98a837 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -343,6 +343,9 @@ struct hwmon_buff { }; #endif +#define IGB_N_EXTTS 2 +#define IGB_N_PEROUT 2 +#define IGB_N_SDP 4 #define IGB_RETA_SIZE 128 /* board specific private data structure */ @@ -439,6 +442,12 @@ struct igb_adapter { u32 tx_hwtstamp_timeouts; u32 rx_hwtstamp_cleared; + struct ptp_pin_desc sdp_config[IGB_N_SDP]; + struct { + struct timespec start; + struct timespec period; + } perout[IGB_N_PEROUT]; + char fw_version[32]; #ifdef CONFIG_IGB_HWMON struct hwmon_buff *igb_hwmon_buff; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 6c25ec314183..f366b3b96d03 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5384,6 +5384,80 @@ void igb_update_stats(struct igb_adapter *adapter, } } +static void igb_tsync_interrupt(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct ptp_clock_event event; + struct timespec ts; + u32 ack = 0, tsauxc, sec, nsec, tsicr = rd32(E1000_TSICR); + + if (tsicr & TSINTR_SYS_WRAP) { + event.type = PTP_CLOCK_PPS; + if (adapter->ptp_caps.pps) + ptp_clock_event(adapter->ptp_clock, &event); + else + dev_err(&adapter->pdev->dev, "unexpected SYS WRAP"); + ack |= TSINTR_SYS_WRAP; + } + + if (tsicr & E1000_TSICR_TXTS) { + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + ack |= E1000_TSICR_TXTS; + } + + if (tsicr & TSINTR_TT0) { + spin_lock(&adapter->tmreg_lock); + ts = timespec_add(adapter->perout[0].start, + adapter->perout[0].period); + wr32(E1000_TRGTTIML0, ts.tv_nsec); + wr32(E1000_TRGTTIMH0, ts.tv_sec); + tsauxc = rd32(E1000_TSAUXC); + tsauxc |= TSAUXC_EN_TT0; + wr32(E1000_TSAUXC, tsauxc); + adapter->perout[0].start = ts; + spin_unlock(&adapter->tmreg_lock); + ack |= TSINTR_TT0; + } + + if (tsicr & TSINTR_TT1) { + spin_lock(&adapter->tmreg_lock); + ts = timespec_add(adapter->perout[1].start, + adapter->perout[1].period); + wr32(E1000_TRGTTIML1, ts.tv_nsec); + wr32(E1000_TRGTTIMH1, ts.tv_sec); + tsauxc = rd32(E1000_TSAUXC); + tsauxc |= TSAUXC_EN_TT1; + wr32(E1000_TSAUXC, tsauxc); + adapter->perout[1].start = ts; + spin_unlock(&adapter->tmreg_lock); + ack |= TSINTR_TT1; + } + + if (tsicr & TSINTR_AUTT0) { + nsec = rd32(E1000_AUXSTMPL0); + sec = rd32(E1000_AUXSTMPH0); + event.type = PTP_CLOCK_EXTTS; + event.index = 0; + event.timestamp = sec * 1000000000ULL + nsec; + ptp_clock_event(adapter->ptp_clock, &event); + ack |= TSINTR_AUTT0; + } + + if (tsicr & TSINTR_AUTT1) { + nsec = rd32(E1000_AUXSTMPL1); + sec = rd32(E1000_AUXSTMPH1); + event.type = PTP_CLOCK_EXTTS; + event.index = 1; + event.timestamp = sec * 1000000000ULL + nsec; + ptp_clock_event(adapter->ptp_clock, &event); + ack |= TSINTR_AUTT1; + } + + /* acknowledge the interrupts */ + wr32(E1000_TSICR, ack); +} + static irqreturn_t igb_msix_other(int irq, void *data) { struct igb_adapter *adapter = data; @@ -5415,16 +5489,8 @@ static irqreturn_t igb_msix_other(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - if (icr & E1000_ICR_TS) { - u32 tsicr = rd32(E1000_TSICR); - - if (tsicr & E1000_TSICR_TXTS) { - /* acknowledge the interrupt */ - wr32(E1000_TSICR, E1000_TSICR_TXTS); - /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); - } - } + if (icr & E1000_ICR_TS) + igb_tsync_interrupt(adapter); wr32(E1000_EIMS, adapter->eims_other); @@ -6011,8 +6077,12 @@ static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS; /* reply to reset with ack and vf mac address */ - msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; - memcpy(addr, vf_mac, ETH_ALEN); + if (!is_zero_ether_addr(vf_mac)) { + msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; + memcpy(addr, vf_mac, ETH_ALEN); + } else { + msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_NACK; + } igb_write_mbx(hw, msgbuf, 3, vf); } @@ -6203,16 +6273,8 @@ static irqreturn_t igb_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - if (icr & E1000_ICR_TS) { - u32 tsicr = rd32(E1000_TSICR); - - if (tsicr & E1000_TSICR_TXTS) { - /* acknowledge the interrupt */ - wr32(E1000_TSICR, E1000_TSICR_TXTS); - /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); - } - } + if (icr & E1000_ICR_TS) + igb_tsync_interrupt(adapter); napi_schedule(&q_vector->napi); @@ -6257,16 +6319,8 @@ static irqreturn_t igb_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - if (icr & E1000_ICR_TS) { - u32 tsicr = rd32(E1000_TSICR); - - if (tsicr & E1000_TSICR_TXTS) { - /* acknowledge the interrupt */ - wr32(E1000_TSICR, E1000_TSICR_TXTS); - /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); - } - } + if (icr & E1000_ICR_TS) + igb_tsync_interrupt(adapter); napi_schedule(&q_vector->napi); @@ -6527,15 +6581,17 @@ static void igb_reuse_rx_page(struct igb_ring *rx_ring, DMA_FROM_DEVICE); } +static inline bool igb_page_is_reserved(struct page *page) +{ + return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc; +} + static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, struct page *page, unsigned int truesize) { /* avoid re-using remote pages */ - if (unlikely(page_to_nid(page) != numa_node_id())) - return false; - - if (unlikely(page->pfmemalloc)) + if (unlikely(igb_page_is_reserved(page))) return false; #if (PAGE_SIZE < 8192) @@ -6545,22 +6601,19 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, /* flip page offset to other buffer */ rx_buffer->page_offset ^= IGB_RX_BUFSZ; - - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - atomic_inc(&page->_count); #else /* move offset up to the next cache line */ rx_buffer->page_offset += truesize; if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ)) return false; - - /* bump ref count on page before it is given to the stack */ - get_page(page); #endif + /* Even if we own the page, we are not allowed to use atomic_set() + * This would break get_page_unless_zero() users. + */ + atomic_inc(&page->_count); + return true; } @@ -6603,13 +6656,12 @@ static bool igb_add_rx_frag(struct igb_ring *rx_ring, memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - /* we can reuse buffer as-is, just make sure it is local */ - if (likely((page_to_nid(page) == numa_node_id()) && - !page->pfmemalloc)) + /* page is not reserved, we can reuse buffer as-is */ + if (likely(!igb_page_is_reserved(page))) return true; /* this page cannot be reused so discard it */ - put_page(page); + __free_page(page); return false; } @@ -6627,7 +6679,6 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, struct page *page; rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - page = rx_buffer->page; prefetchw(page); @@ -7042,8 +7093,8 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) i -= rx_ring->count; } - /* clear the hdr_addr for the next_to_use descriptor */ - rx_desc->read.hdr_addr = 0; + /* clear the status bits for the next_to_use descriptor */ + rx_desc->wb.upper.status_error = 0; cleaned_count--; } while (cleaned_count); diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 5e7a4e30a7b6..d20fc8ed11f1 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -355,12 +355,239 @@ static int igb_ptp_settime_i210(struct ptp_clock_info *ptp, return 0; } +static void igb_pin_direction(int pin, int input, u32 *ctrl, u32 *ctrl_ext) +{ + u32 *ptr = pin < 2 ? ctrl : ctrl_ext; + u32 mask[IGB_N_SDP] = { + E1000_CTRL_SDP0_DIR, + E1000_CTRL_SDP1_DIR, + E1000_CTRL_EXT_SDP2_DIR, + E1000_CTRL_EXT_SDP3_DIR, + }; + + if (input) + *ptr &= ~mask[pin]; + else + *ptr |= mask[pin]; +} + +static void igb_pin_extts(struct igb_adapter *igb, int chan, int pin) +{ + struct e1000_hw *hw = &igb->hw; + u32 aux0_sel_sdp[IGB_N_SDP] = { + AUX0_SEL_SDP0, AUX0_SEL_SDP1, AUX0_SEL_SDP2, AUX0_SEL_SDP3, + }; + u32 aux1_sel_sdp[IGB_N_SDP] = { + AUX1_SEL_SDP0, AUX1_SEL_SDP1, AUX1_SEL_SDP2, AUX1_SEL_SDP3, + }; + u32 ts_sdp_en[IGB_N_SDP] = { + TS_SDP0_EN, TS_SDP1_EN, TS_SDP2_EN, TS_SDP3_EN, + }; + u32 ctrl, ctrl_ext, tssdp = 0; + + ctrl = rd32(E1000_CTRL); + ctrl_ext = rd32(E1000_CTRL_EXT); + tssdp = rd32(E1000_TSSDP); + + igb_pin_direction(pin, 1, &ctrl, &ctrl_ext); + + /* Make sure this pin is not enabled as an output. */ + tssdp &= ~ts_sdp_en[pin]; + + if (chan == 1) { + tssdp &= ~AUX1_SEL_SDP3; + tssdp |= aux1_sel_sdp[pin] | AUX1_TS_SDP_EN; + } else { + tssdp &= ~AUX0_SEL_SDP3; + tssdp |= aux0_sel_sdp[pin] | AUX0_TS_SDP_EN; + } + + wr32(E1000_TSSDP, tssdp); + wr32(E1000_CTRL, ctrl); + wr32(E1000_CTRL_EXT, ctrl_ext); +} + +static void igb_pin_perout(struct igb_adapter *igb, int chan, int pin) +{ + struct e1000_hw *hw = &igb->hw; + u32 aux0_sel_sdp[IGB_N_SDP] = { + AUX0_SEL_SDP0, AUX0_SEL_SDP1, AUX0_SEL_SDP2, AUX0_SEL_SDP3, + }; + u32 aux1_sel_sdp[IGB_N_SDP] = { + AUX1_SEL_SDP0, AUX1_SEL_SDP1, AUX1_SEL_SDP2, AUX1_SEL_SDP3, + }; + u32 ts_sdp_en[IGB_N_SDP] = { + TS_SDP0_EN, TS_SDP1_EN, TS_SDP2_EN, TS_SDP3_EN, + }; + u32 ts_sdp_sel_tt0[IGB_N_SDP] = { + TS_SDP0_SEL_TT0, TS_SDP1_SEL_TT0, + TS_SDP2_SEL_TT0, TS_SDP3_SEL_TT0, + }; + u32 ts_sdp_sel_tt1[IGB_N_SDP] = { + TS_SDP0_SEL_TT1, TS_SDP1_SEL_TT1, + TS_SDP2_SEL_TT1, TS_SDP3_SEL_TT1, + }; + u32 ts_sdp_sel_clr[IGB_N_SDP] = { + TS_SDP0_SEL_FC1, TS_SDP1_SEL_FC1, + TS_SDP2_SEL_FC1, TS_SDP3_SEL_FC1, + }; + u32 ctrl, ctrl_ext, tssdp = 0; + + ctrl = rd32(E1000_CTRL); + ctrl_ext = rd32(E1000_CTRL_EXT); + tssdp = rd32(E1000_TSSDP); + + igb_pin_direction(pin, 0, &ctrl, &ctrl_ext); + + /* Make sure this pin is not enabled as an input. */ + if ((tssdp & AUX0_SEL_SDP3) == aux0_sel_sdp[pin]) + tssdp &= ~AUX0_TS_SDP_EN; + + if ((tssdp & AUX1_SEL_SDP3) == aux1_sel_sdp[pin]) + tssdp &= ~AUX1_TS_SDP_EN; + + tssdp &= ~ts_sdp_sel_clr[pin]; + if (chan == 1) + tssdp |= ts_sdp_sel_tt1[pin]; + else + tssdp |= ts_sdp_sel_tt0[pin]; + + tssdp |= ts_sdp_en[pin]; + + wr32(E1000_TSSDP, tssdp); + wr32(E1000_CTRL, ctrl); + wr32(E1000_CTRL_EXT, ctrl_ext); +} + +static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + struct igb_adapter *igb = + container_of(ptp, struct igb_adapter, ptp_caps); + struct e1000_hw *hw = &igb->hw; + u32 tsauxc, tsim, tsauxc_mask, tsim_mask, trgttiml, trgttimh; + unsigned long flags; + struct timespec ts; + int pin; + s64 ns; + + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + if (on) { + pin = ptp_find_pin(igb->ptp_clock, PTP_PF_EXTTS, + rq->extts.index); + if (pin < 0) + return -EBUSY; + } + if (rq->extts.index == 1) { + tsauxc_mask = TSAUXC_EN_TS1; + tsim_mask = TSINTR_AUTT1; + } else { + tsauxc_mask = TSAUXC_EN_TS0; + tsim_mask = TSINTR_AUTT0; + } + spin_lock_irqsave(&igb->tmreg_lock, flags); + tsauxc = rd32(E1000_TSAUXC); + tsim = rd32(E1000_TSIM); + if (on) { + igb_pin_extts(igb, rq->extts.index, pin); + tsauxc |= tsauxc_mask; + tsim |= tsim_mask; + } else { + tsauxc &= ~tsauxc_mask; + tsim &= ~tsim_mask; + } + wr32(E1000_TSAUXC, tsauxc); + wr32(E1000_TSIM, tsim); + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + return 0; + + case PTP_CLK_REQ_PEROUT: + if (on) { + pin = ptp_find_pin(igb->ptp_clock, PTP_PF_PEROUT, + rq->perout.index); + if (pin < 0) + return -EBUSY; + } + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + ns = timespec_to_ns(&ts); + ns = ns >> 1; + if (on && ns < 500000LL) { + /* 2k interrupts per second is an awful lot. */ + return -EINVAL; + } + ts = ns_to_timespec(ns); + if (rq->perout.index == 1) { + tsauxc_mask = TSAUXC_EN_TT1; + tsim_mask = TSINTR_TT1; + trgttiml = E1000_TRGTTIML1; + trgttimh = E1000_TRGTTIMH1; + } else { + tsauxc_mask = TSAUXC_EN_TT0; + tsim_mask = TSINTR_TT0; + trgttiml = E1000_TRGTTIML0; + trgttimh = E1000_TRGTTIMH0; + } + spin_lock_irqsave(&igb->tmreg_lock, flags); + tsauxc = rd32(E1000_TSAUXC); + tsim = rd32(E1000_TSIM); + if (on) { + int i = rq->perout.index; + + igb_pin_perout(igb, i, pin); + igb->perout[i].start.tv_sec = rq->perout.start.sec; + igb->perout[i].start.tv_nsec = rq->perout.start.nsec; + igb->perout[i].period.tv_sec = ts.tv_sec; + igb->perout[i].period.tv_nsec = ts.tv_nsec; + wr32(trgttiml, rq->perout.start.sec); + wr32(trgttimh, rq->perout.start.nsec); + tsauxc |= tsauxc_mask; + tsim |= tsim_mask; + } else { + tsauxc &= ~tsauxc_mask; + tsim &= ~tsim_mask; + } + wr32(E1000_TSAUXC, tsauxc); + wr32(E1000_TSIM, tsim); + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + return 0; + + case PTP_CLK_REQ_PPS: + spin_lock_irqsave(&igb->tmreg_lock, flags); + tsim = rd32(E1000_TSIM); + if (on) + tsim |= TSINTR_SYS_WRAP; + else + tsim &= ~TSINTR_SYS_WRAP; + wr32(E1000_TSIM, tsim); + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + return 0; + } + + return -EOPNOTSUPP; +} + static int igb_ptp_feature_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on) { return -EOPNOTSUPP; } +static int igb_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + switch (func) { + case PTP_PF_NONE: + case PTP_PF_EXTTS: + case PTP_PF_PEROUT: + break; + case PTP_PF_PHYSYNC: + return -1; + } + return 0; +} + /** * igb_ptp_tx_work * @work: pointer to work struct @@ -751,6 +978,7 @@ void igb_ptp_init(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; + int i; switch (hw->mac.type) { case e1000_82576: @@ -793,16 +1021,27 @@ void igb_ptp_init(struct igb_adapter *adapter) break; case e1000_i210: case e1000_i211: + for (i = 0; i < IGB_N_SDP; i++) { + struct ptp_pin_desc *ppd = &adapter->sdp_config[i]; + + snprintf(ppd->name, sizeof(ppd->name), "SDP%d", i); + ppd->index = i; + ppd->func = PTP_PF_NONE; + } snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); adapter->ptp_caps.owner = THIS_MODULE; adapter->ptp_caps.max_adj = 62499999; - adapter->ptp_caps.n_ext_ts = 0; - adapter->ptp_caps.pps = 0; + adapter->ptp_caps.n_ext_ts = IGB_N_EXTTS; + adapter->ptp_caps.n_per_out = IGB_N_PEROUT; + adapter->ptp_caps.n_pins = IGB_N_SDP; + adapter->ptp_caps.pps = 1; + adapter->ptp_caps.pin_config = adapter->sdp_config; adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210; adapter->ptp_caps.gettime = igb_ptp_gettime_i210; adapter->ptp_caps.settime = igb_ptp_settime_i210; - adapter->ptp_caps.enable = igb_ptp_feature_enable; + adapter->ptp_caps.enable = igb_ptp_feature_enable_i210; + adapter->ptp_caps.verify = igb_ptp_verify_pin; /* Enable the timer functions by clearing bit 31. */ wr32(E1000_TSAUXC, 0x0); break; @@ -900,6 +1139,7 @@ void igb_ptp_stop(struct igb_adapter *adapter) void igb_ptp_reset(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + unsigned long flags; if (!(adapter->flags & IGB_FLAG_PTP)) return; @@ -907,6 +1147,8 @@ void igb_ptp_reset(struct igb_adapter *adapter) /* reset the tstamp_config */ igb_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); + spin_lock_irqsave(&adapter->tmreg_lock, flags); + switch (adapter->hw.mac.type) { case e1000_82576: /* Dial the nominal frequency. */ @@ -917,23 +1159,25 @@ void igb_ptp_reset(struct igb_adapter *adapter) case e1000_i350: case e1000_i210: case e1000_i211: - /* Enable the timer functions and interrupts. */ wr32(E1000_TSAUXC, 0x0); + wr32(E1000_TSSDP, 0x0); wr32(E1000_TSIM, TSYNC_INTERRUPTS); wr32(E1000_IMS, E1000_IMS_TS); break; default: /* No work to do. */ - return; + goto out; } /* Re-initialize the timer. */ if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { struct timespec ts = ktime_to_timespec(ktime_get_real()); - igb_ptp_settime_i210(&adapter->ptp_caps, &ts); + igb_ptp_write_i210(adapter, &ts); } else { timecounter_init(&adapter->tc, &adapter->cc, ktime_to_ns(ktime_get_real())); } +out: + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); } diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 963dd7e6d547..a716c26e0d99 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -592,7 +592,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, buf->nbufs = 1; buf->npages = 1; buf->page_shift = get_order(size) + PAGE_SHIFT; - buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, + buf->direct.buf = dma_alloc_coherent(&dev->persist->pdev->dev, size, &t, gfp); if (!buf->direct.buf) return -ENOMEM; @@ -619,7 +619,8 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, for (i = 0; i < buf->nbufs; ++i) { buf->page_list[i].buf = - dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, + dma_alloc_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, &t, gfp); if (!buf->page_list[i].buf) goto err_free; @@ -657,7 +658,8 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) int i; if (buf->nbufs == 1) - dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf, + dma_free_coherent(&dev->persist->pdev->dev, size, + buf->direct.buf, buf->direct.map); else { if (BITS_PER_LONG == 64 && buf->direct.buf) @@ -665,7 +667,8 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) for (i = 0; i < buf->nbufs; ++i) if (buf->page_list[i].buf) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, buf->page_list[i].buf, buf->page_list[i].map); kfree(buf->page_list); @@ -738,7 +741,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp if (!mlx4_alloc_db_from_pgdir(pgdir, db, order)) goto out; - pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev), gfp); + pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev, gfp); if (!pgdir) { ret = -ENOMEM; goto out; @@ -775,7 +778,7 @@ void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db) set_bit(i, db->u.pgdir->bits[o]); if (bitmap_full(db->u.pgdir->order1, MLX4_DB_PER_PAGE / 2)) { - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, db->u.pgdir->db_page, db->u.pgdir->db_dma); list_del(&db->u.pgdir->list); kfree(db->u.pgdir); diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 9c656fe4983d..715de8affcc9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -40,16 +40,177 @@ enum { MLX4_CATAS_POLL_INTERVAL = 5 * HZ, }; -static DEFINE_SPINLOCK(catas_lock); -static LIST_HEAD(catas_list); -static struct work_struct catas_work; -static int internal_err_reset = 1; -module_param(internal_err_reset, int, 0644); +int mlx4_internal_err_reset = 1; +module_param_named(internal_err_reset, mlx4_internal_err_reset, int, 0644); MODULE_PARM_DESC(internal_err_reset, - "Reset device on internal errors if non-zero" - " (default 1, in SRIOV mode default is 0)"); + "Reset device on internal errors if non-zero (default 1)"); + +static int read_vendor_id(struct mlx4_dev *dev) +{ + u16 vendor_id = 0; + int ret; + + ret = pci_read_config_word(dev->persist->pdev, 0, &vendor_id); + if (ret) { + mlx4_err(dev, "Failed to read vendor ID, ret=%d\n", ret); + return ret; + } + + if (vendor_id == 0xffff) { + mlx4_err(dev, "PCI can't be accessed to read vendor id\n"); + return -EINVAL; + } + + return 0; +} + +static int mlx4_reset_master(struct mlx4_dev *dev) +{ + int err = 0; + + if (mlx4_is_master(dev)) + mlx4_report_internal_err_comm_event(dev); + + if (!pci_channel_offline(dev->persist->pdev)) { + err = read_vendor_id(dev); + /* If PCI can't be accessed to read vendor ID we assume that its + * link was disabled and chip was already reset. + */ + if (err) + return 0; + + err = mlx4_reset(dev); + if (err) + mlx4_err(dev, "Fail to reset HCA\n"); + } + + return err; +} + +static int mlx4_reset_slave(struct mlx4_dev *dev) +{ +#define COM_CHAN_RST_REQ_OFFSET 0x10 +#define COM_CHAN_RST_ACK_OFFSET 0x08 + + u32 comm_flags; + u32 rst_req; + u32 rst_ack; + unsigned long end; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (pci_channel_offline(dev->persist->pdev)) + return 0; + + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + if (comm_flags == 0xffffffff) { + mlx4_err(dev, "VF reset is not needed\n"); + return 0; + } + + if (!(dev->caps.vf_caps & MLX4_VF_CAP_FLAG_RESET)) { + mlx4_err(dev, "VF reset is not supported\n"); + return -EOPNOTSUPP; + } + + rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >> + COM_CHAN_RST_REQ_OFFSET; + rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >> + COM_CHAN_RST_ACK_OFFSET; + if (rst_req != rst_ack) { + mlx4_err(dev, "Communication channel isn't sync, fail to send reset\n"); + return -EIO; + } + + rst_req ^= 1; + mlx4_warn(dev, "VF is sending reset request to Firmware\n"); + comm_flags = rst_req << COM_CHAN_RST_REQ_OFFSET; + __raw_writel((__force u32)cpu_to_be32(comm_flags), + (__iomem char *)priv->mfunc.comm + MLX4_COMM_CHAN_FLAGS); + /* Make sure that our comm channel write doesn't + * get mixed in with writes from another CPU. + */ + mmiowb(); + + end = msecs_to_jiffies(MLX4_COMM_TIME) + jiffies; + while (time_before(jiffies, end)) { + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >> + COM_CHAN_RST_ACK_OFFSET; + + /* Reading rst_req again since the communication channel can + * be reset at any time by the PF and all its bits will be + * set to zero. + */ + rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >> + COM_CHAN_RST_REQ_OFFSET; + + if (rst_ack == rst_req) { + mlx4_warn(dev, "VF Reset succeed\n"); + return 0; + } + cond_resched(); + } + mlx4_err(dev, "Fail to send reset over the communication channel\n"); + return -ETIMEDOUT; +} + +static int mlx4_comm_internal_err(u32 slave_read) +{ + return (u32)COMM_CHAN_EVENT_INTERNAL_ERR == + (slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0; +} + +void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) +{ + int err; + struct mlx4_dev *dev; + + if (!mlx4_internal_err_reset) + return; + + mutex_lock(&persist->device_state_mutex); + if (persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + goto out; + + dev = persist->dev; + mlx4_err(dev, "device is going to be reset\n"); + if (mlx4_is_slave(dev)) + err = mlx4_reset_slave(dev); + else + err = mlx4_reset_master(dev); + BUG_ON(err != 0); + + dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR; + mlx4_err(dev, "device was reset successfully\n"); + mutex_unlock(&persist->device_state_mutex); + + /* At that step HW was already reset, now notify clients */ + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); + mlx4_cmd_wake_completions(dev); + return; + +out: + mutex_unlock(&persist->device_state_mutex); +} + +static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist) +{ + int err = 0; + + mlx4_enter_error_state(persist); + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP && + !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) { + err = mlx4_restart_one(persist->pdev); + mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", + err); + } + mutex_unlock(&persist->interface_state_mutex); +} static void dump_err_buf(struct mlx4_dev *dev) { @@ -67,58 +228,40 @@ static void poll_catas(unsigned long dev_ptr) { struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr; struct mlx4_priv *priv = mlx4_priv(dev); + u32 slave_read; - if (readl(priv->catas_err.map)) { - /* If the device is off-line, we cannot try to recover it */ - if (pci_channel_offline(dev->pdev)) - mod_timer(&priv->catas_err.timer, - round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); - else { - dump_err_buf(dev); - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); - - if (internal_err_reset) { - spin_lock(&catas_lock); - list_add(&priv->catas_err.list, &catas_list); - spin_unlock(&catas_lock); - - queue_work(mlx4_wq, &catas_work); - } + if (mlx4_is_slave(dev)) { + slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); + if (mlx4_comm_internal_err(slave_read)) { + mlx4_warn(dev, "Internal error detected on the communication channel\n"); + goto internal_err; } - } else - mod_timer(&priv->catas_err.timer, - round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); + } else if (readl(priv->catas_err.map)) { + dump_err_buf(dev); + goto internal_err; + } + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + mlx4_warn(dev, "Internal error mark was detected on device\n"); + goto internal_err; + } + + mod_timer(&priv->catas_err.timer, + round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); + return; + +internal_err: + if (mlx4_internal_err_reset) + queue_work(dev->persist->catas_wq, &dev->persist->catas_work); } static void catas_reset(struct work_struct *work) { - struct mlx4_priv *priv, *tmppriv; - struct mlx4_dev *dev; + struct mlx4_dev_persistent *persist = + container_of(work, struct mlx4_dev_persistent, + catas_work); - LIST_HEAD(tlist); - int ret; - - spin_lock_irq(&catas_lock); - list_splice_init(&catas_list, &tlist); - spin_unlock_irq(&catas_lock); - - list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) { - struct pci_dev *pdev = priv->dev.pdev; - - /* If the device is off-line, we cannot reset it */ - if (pci_channel_offline(pdev)) - continue; - - ret = mlx4_restart_one(priv->dev.pdev); - /* 'priv' now is not valid */ - if (ret) - pr_err("mlx4 %s: Reset failed (%d)\n", - pci_name(pdev), ret); - else { - dev = pci_get_drvdata(pdev); - mlx4_dbg(dev, "Reset succeeded\n"); - } - } + mlx4_handle_error_state(persist); } void mlx4_start_catas_poll(struct mlx4_dev *dev) @@ -126,22 +269,21 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); phys_addr_t addr; - /*If we are in SRIOV the default of the module param must be 0*/ - if (mlx4_is_mfunc(dev)) - internal_err_reset = 0; - INIT_LIST_HEAD(&priv->catas_err.list); init_timer(&priv->catas_err.timer); priv->catas_err.map = NULL; - addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) + - priv->fw.catas_offset; + if (!mlx4_is_slave(dev)) { + addr = pci_resource_start(dev->persist->pdev, + priv->fw.catas_bar) + + priv->fw.catas_offset; - priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); - if (!priv->catas_err.map) { - mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n", - (unsigned long long) addr); - return; + priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); + if (!priv->catas_err.map) { + mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n", + (unsigned long long)addr); + return; + } } priv->catas_err.timer.data = (unsigned long) dev; @@ -157,15 +299,29 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev) del_timer_sync(&priv->catas_err.timer); - if (priv->catas_err.map) + if (priv->catas_err.map) { iounmap(priv->catas_err.map); + priv->catas_err.map = NULL; + } - spin_lock_irq(&catas_lock); - list_del(&priv->catas_err.list); - spin_unlock_irq(&catas_lock); + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION) + flush_workqueue(dev->persist->catas_wq); } -void __init mlx4_catas_init(void) +int mlx4_catas_init(struct mlx4_dev *dev) { - INIT_WORK(&catas_work, catas_reset); + INIT_WORK(&dev->persist->catas_work, catas_reset); + dev->persist->catas_wq = create_singlethread_workqueue("mlx4_health"); + if (!dev->persist->catas_wq) + return -ENOMEM; + + return 0; +} + +void mlx4_catas_end(struct mlx4_dev *dev) +{ + if (dev->persist->catas_wq) { + destroy_workqueue(dev->persist->catas_wq); + dev->persist->catas_wq = NULL; + } } diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 5c93d1451c44..2b48932855e7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -42,6 +42,7 @@ #include <linux/mlx4/device.h> #include <linux/semaphore.h> #include <rdma/ib_smi.h> +#include <linux/delay.h> #include <asm/io.h> @@ -182,6 +183,72 @@ static u8 mlx4_errno_to_status(int errno) } } +static int mlx4_internal_err_ret_value(struct mlx4_dev *dev, u16 op, + u8 op_modifier) +{ + switch (op) { + case MLX4_CMD_UNMAP_ICM: + case MLX4_CMD_UNMAP_ICM_AUX: + case MLX4_CMD_UNMAP_FA: + case MLX4_CMD_2RST_QP: + case MLX4_CMD_HW2SW_EQ: + case MLX4_CMD_HW2SW_CQ: + case MLX4_CMD_HW2SW_SRQ: + case MLX4_CMD_HW2SW_MPT: + case MLX4_CMD_CLOSE_HCA: + case MLX4_QP_FLOW_STEERING_DETACH: + case MLX4_CMD_FREE_RES: + case MLX4_CMD_CLOSE_PORT: + return CMD_STAT_OK; + + case MLX4_CMD_QP_ATTACH: + /* On Detach case return success */ + if (op_modifier == 0) + return CMD_STAT_OK; + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + + default: + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + } +} + +static int mlx4_closing_cmd_fatal_error(u16 op, u8 fw_status) +{ + /* Any error during the closing commands below is considered fatal */ + if (op == MLX4_CMD_CLOSE_HCA || + op == MLX4_CMD_HW2SW_EQ || + op == MLX4_CMD_HW2SW_CQ || + op == MLX4_CMD_2RST_QP || + op == MLX4_CMD_HW2SW_SRQ || + op == MLX4_CMD_SYNC_TPT || + op == MLX4_CMD_UNMAP_ICM || + op == MLX4_CMD_UNMAP_ICM_AUX || + op == MLX4_CMD_UNMAP_FA) + return 1; + /* Error on MLX4_CMD_HW2SW_MPT is fatal except when fw status equals + * CMD_STAT_REG_BOUND. + * This status indicates that memory region has memory windows bound to it + * which may result from invalid user space usage and is not fatal. + */ + if (op == MLX4_CMD_HW2SW_MPT && fw_status != CMD_STAT_REG_BOUND) + return 1; + return 0; +} + +static int mlx4_cmd_reset_flow(struct mlx4_dev *dev, u16 op, u8 op_modifier, + int err) +{ + /* Only if reset flow is really active return code is based on + * command, otherwise current error code is returned. + */ + if (mlx4_internal_err_reset) { + mlx4_enter_error_state(dev->persist); + err = mlx4_internal_err_ret_value(dev, op, op_modifier); + } + + return err; +} + static int comm_pending(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -190,16 +257,30 @@ static int comm_pending(struct mlx4_dev *dev) return (swab32(status) >> 31) != priv->cmd.comm_toggle; } -static void mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param) +static int mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param) { struct mlx4_priv *priv = mlx4_priv(dev); u32 val; + /* To avoid writing to unknown addresses after the device state was + * changed to internal error and the function was rest, + * check the INTERNAL_ERROR flag which is updated under + * device_state_mutex lock. + */ + mutex_lock(&dev->persist->device_state_mutex); + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + mutex_unlock(&dev->persist->device_state_mutex); + return -EIO; + } + priv->cmd.comm_toggle ^= 1; val = param | (cmd << 16) | (priv->cmd.comm_toggle << 31); __raw_writel((__force u32) cpu_to_be32(val), &priv->mfunc.comm->slave_write); mmiowb(); + mutex_unlock(&dev->persist->device_state_mutex); + return 0; } static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, @@ -219,7 +300,13 @@ static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, /* Write command */ down(&priv->cmd.poll_sem); - mlx4_comm_cmd_post(dev, cmd, param); + if (mlx4_comm_cmd_post(dev, cmd, param)) { + /* Only in case the device state is INTERNAL_ERROR, + * mlx4_comm_cmd_post returns with an error + */ + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + goto out; + } end = msecs_to_jiffies(timeout) + jiffies; while (comm_pending(dev) && time_before(jiffies, end)) @@ -231,18 +318,23 @@ static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, * is MLX4_DELAY_RESET_SLAVE*/ if ((MLX4_COMM_CMD_RESET == cmd)) { err = MLX4_DELAY_RESET_SLAVE; + goto out; } else { - mlx4_warn(dev, "Communication channel timed out\n"); - err = -ETIMEDOUT; + mlx4_warn(dev, "Communication channel command 0x%x timed out\n", + cmd); + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); } } + if (err) + mlx4_enter_error_state(dev->persist); +out: up(&priv->cmd.poll_sem); return err; } -static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, - u16 param, unsigned long timeout) +static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 vhcr_cmd, + u16 param, u16 op, unsigned long timeout) { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; struct mlx4_cmd_context *context; @@ -258,34 +350,49 @@ static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, cmd->free_head = context->next; spin_unlock(&cmd->context_lock); - init_completion(&context->done); + reinit_completion(&context->done); - mlx4_comm_cmd_post(dev, op, param); + if (mlx4_comm_cmd_post(dev, vhcr_cmd, param)) { + /* Only in case the device state is INTERNAL_ERROR, + * mlx4_comm_cmd_post returns with an error + */ + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + goto out; + } if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { - mlx4_warn(dev, "communication channel command 0x%x timed out\n", - op); - err = -EBUSY; - goto out; + mlx4_warn(dev, "communication channel command 0x%x (op=0x%x) timed out\n", + vhcr_cmd, op); + goto out_reset; } err = context->result; if (err && context->fw_status != CMD_STAT_MULTI_FUNC_REQ) { mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", - op, context->fw_status); - goto out; + vhcr_cmd, context->fw_status); + if (mlx4_closing_cmd_fatal_error(op, context->fw_status)) + goto out_reset; } -out: /* wait for comm channel ready * this is necessary for prevention the race * when switching between event to polling mode + * Skipping this section in case the device is in FATAL_ERROR state, + * In this state, no commands are sent via the comm channel until + * the device has returned from reset. */ - end = msecs_to_jiffies(timeout) + jiffies; - while (comm_pending(dev) && time_before(jiffies, end)) - cond_resched(); + if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) { + end = msecs_to_jiffies(timeout) + jiffies; + while (comm_pending(dev) && time_before(jiffies, end)) + cond_resched(); + } + goto out; +out_reset: + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + mlx4_enter_error_state(dev->persist); +out: spin_lock(&cmd->context_lock); context->next = cmd->free_head; cmd->free_head = context - cmd->context; @@ -296,10 +403,13 @@ out: } int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, - unsigned long timeout) + u16 op, unsigned long timeout) { + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + if (mlx4_priv(dev)->cmd.use_events) - return mlx4_comm_cmd_wait(dev, cmd, param, timeout); + return mlx4_comm_cmd_wait(dev, cmd, param, op, timeout); return mlx4_comm_cmd_poll(dev, cmd, param, timeout); } @@ -307,7 +417,7 @@ static int cmd_pending(struct mlx4_dev *dev) { u32 status; - if (pci_channel_offline(dev->pdev)) + if (pci_channel_offline(dev->persist->pdev)) return -EIO; status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET); @@ -323,17 +433,21 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; u32 __iomem *hcr = cmd->hcr; - int ret = -EAGAIN; + int ret = -EIO; unsigned long end; - mutex_lock(&cmd->hcr_mutex); - - if (pci_channel_offline(dev->pdev)) { + mutex_lock(&dev->persist->device_state_mutex); + /* To avoid writing to unknown addresses after the device state was + * changed to internal error and the chip was reset, + * check the INTERNAL_ERROR flag which is updated under + * device_state_mutex lock. + */ + if (pci_channel_offline(dev->persist->pdev) || + (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) { /* * Device is going through error recovery * and cannot accept commands. */ - ret = -EIO; goto out; } @@ -342,12 +456,11 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS); while (cmd_pending(dev)) { - if (pci_channel_offline(dev->pdev)) { + if (pci_channel_offline(dev->persist->pdev)) { /* * Device is going through error recovery * and cannot accept commands. */ - ret = -EIO; goto out; } @@ -391,7 +504,11 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, ret = 0; out: - mutex_unlock(&cmd->hcr_mutex); + if (ret) + mlx4_warn(dev, "Could not post command 0x%x: ret=%d, in_param=0x%llx, in_mod=0x%x, op_mod=0x%x\n", + op, ret, in_param, in_modifier, op_modifier); + mutex_unlock(&dev->persist->device_state_mutex); + return ret; } @@ -428,8 +545,11 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, } ret = mlx4_status_to_errno(vhcr->status); } + if (ret && + dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + ret = mlx4_internal_err_ret_value(dev, op, op_modifier); } else { - ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, + ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, op, MLX4_COMM_TIME + timeout); if (!ret) { if (out_is_imm) { @@ -443,9 +563,14 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, } } ret = mlx4_status_to_errno(vhcr->status); - } else - mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n", - op); + } else { + if (dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR) + ret = mlx4_internal_err_ret_value(dev, op, + op_modifier); + else + mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n", op); + } } mutex_unlock(&priv->cmd.slave_cmd_mutex); @@ -464,12 +589,12 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, down(&priv->cmd.poll_sem); - if (pci_channel_offline(dev->pdev)) { + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { /* * Device is going through error recovery * and cannot accept commands. */ - err = -EIO; + err = mlx4_internal_err_ret_value(dev, op, op_modifier); goto out; } @@ -483,16 +608,21 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0); if (err) - goto out; + goto out_reset; end = msecs_to_jiffies(timeout) + jiffies; while (cmd_pending(dev) && time_before(jiffies, end)) { - if (pci_channel_offline(dev->pdev)) { + if (pci_channel_offline(dev->persist->pdev)) { /* * Device is going through error recovery * and cannot accept commands. */ err = -EIO; + goto out_reset; + } + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + err = mlx4_internal_err_ret_value(dev, op, op_modifier); goto out; } @@ -502,8 +632,8 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, if (cmd_pending(dev)) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); - err = -ETIMEDOUT; - goto out; + err = -EIO; + goto out_reset; } if (out_is_imm) @@ -515,10 +645,17 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, stat = be32_to_cpu((__force __be32) __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24; err = mlx4_status_to_errno(stat); - if (err) + if (err) { mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", op, stat); + if (mlx4_closing_cmd_fatal_error(op, stat)) + goto out_reset; + goto out; + } +out_reset: + if (err) + err = mlx4_cmd_reset_flow(dev, op, op_modifier, err); out: up(&priv->cmd.poll_sem); return err; @@ -565,17 +702,19 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, goto out; } - init_completion(&context->done); + reinit_completion(&context->done); - mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, - in_modifier, op_modifier, op, context->token, 1); + err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, + in_modifier, op_modifier, op, context->token, 1); + if (err) + goto out_reset; if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); - err = -EBUSY; - goto out; + err = -EIO; + goto out_reset; } err = context->result; @@ -592,12 +731,20 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, else mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", op, context->fw_status); + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + err = mlx4_internal_err_ret_value(dev, op, op_modifier); + else if (mlx4_closing_cmd_fatal_error(op, context->fw_status)) + goto out_reset; + goto out; } if (out_is_imm) *out_param = context->out_param; +out_reset: + if (err) + err = mlx4_cmd_reset_flow(dev, op, op_modifier, err); out: spin_lock(&cmd->context_lock); context->next = cmd->free_head; @@ -612,10 +759,13 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int out_is_imm, u32 in_modifier, u8 op_modifier, u16 op, unsigned long timeout, int native) { - if (pci_channel_offline(dev->pdev)) - return -EIO; + if (pci_channel_offline(dev->persist->pdev)) + return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO); if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) { + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + return mlx4_internal_err_ret_value(dev, op, + op_modifier); if (mlx4_priv(dev)->cmd.use_events) return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm, in_modifier, @@ -631,7 +781,7 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, EXPORT_SYMBOL_GPL(__mlx4_cmd); -static int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev) +int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); @@ -1460,8 +1610,10 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, ALIGN(sizeof(struct mlx4_vhcr_cmd), MLX4_ACCESS_MEM_ALIGN), 1); if (ret) { - mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n", - __func__, ret); + if (!(dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n", + __func__, ret); kfree(vhcr); return ret; } @@ -1500,11 +1652,14 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, goto out_status; } - if (mlx4_ACCESS_MEM(dev, inbox->dma, slave, - vhcr->in_param, - MLX4_MAILBOX_SIZE, 1)) { - mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n", - __func__, cmd->opcode); + ret = mlx4_ACCESS_MEM(dev, inbox->dma, slave, + vhcr->in_param, + MLX4_MAILBOX_SIZE, 1); + if (ret) { + if (!(dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n", + __func__, cmd->opcode); vhcr_cmd->status = CMD_STAT_INTERNAL_ERR; goto out_status; } @@ -1552,8 +1707,9 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, } if (err) { - mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n", - vhcr->op, slave, vhcr->errno, err); + if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n", + vhcr->op, slave, vhcr->errno, err); vhcr_cmd->status = mlx4_errno_to_status(err); goto out_status; } @@ -1568,7 +1724,9 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, /* If we failed to write back the outbox after the *command was successfully executed, we must fail this * slave, as it is now in undefined state */ - mlx4_err(dev, "%s:Failed writing outbox\n", __func__); + if (!(dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_err(dev, "%s:Failed writing outbox\n", __func__); goto out; } } @@ -1847,8 +2005,11 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, break; case MLX4_COMM_CMD_VHCR_POST: if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && - (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) + (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) { + mlx4_warn(dev, "slave:%d is out of sync, cmd=0x%x, last command=0x%x, reset is needed\n", + slave, cmd, slave_state[slave].last_cmd); goto reset_slave; + } mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_master_process_vhcr(dev, slave, NULL)) { @@ -1882,7 +2043,18 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, reset_slave: /* cleanup any slave resources */ - mlx4_delete_all_resources_for_slave(dev, slave); + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_delete_all_resources_for_slave(dev, slave); + + if (cmd != MLX4_COMM_CMD_RESET) { + mlx4_warn(dev, "Turn on internal error to force reset, slave=%d, cmd=0x%x\n", + slave, cmd); + /* Turn on internal error letting slave reset itself immeditaly, + * otherwise it might take till timeout on command is passed + */ + reply |= ((u32)COMM_CHAN_EVENT_INTERNAL_ERR); + } + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (!slave_state[slave].is_slave_going_down) slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET; @@ -1958,17 +2130,28 @@ void mlx4_master_comm_channel(struct work_struct *work) static int sync_toggles(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - int wr_toggle; - int rd_toggle; + u32 wr_toggle; + u32 rd_toggle; unsigned long end; - wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31; - end = jiffies + msecs_to_jiffies(5000); + wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)); + if (wr_toggle == 0xffffffff) + end = jiffies + msecs_to_jiffies(30000); + else + end = jiffies + msecs_to_jiffies(5000); while (time_before(jiffies, end)) { - rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31; - if (rd_toggle == wr_toggle) { - priv->cmd.comm_toggle = rd_toggle; + rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); + if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { + /* PCI might be offline */ + msleep(100); + wr_toggle = swab32(readl(&priv->mfunc.comm-> + slave_write)); + continue; + } + + if (rd_toggle >> 31 == wr_toggle >> 31) { + priv->cmd.comm_toggle = rd_toggle >> 31; return 0; } @@ -1997,11 +2180,12 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) if (mlx4_is_master(dev)) priv->mfunc.comm = - ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) + + ioremap(pci_resource_start(dev->persist->pdev, + priv->fw.comm_bar) + priv->fw.comm_base, MLX4_COMM_PAGESIZE); else priv->mfunc.comm = - ioremap(pci_resource_start(dev->pdev, 2) + + ioremap(pci_resource_start(dev->persist->pdev, 2) + MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE); if (!priv->mfunc.comm) { mlx4_err(dev, "Couldn't map communication vector\n"); @@ -2073,13 +2257,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) if (mlx4_init_resource_tracker(dev)) goto err_thread; - err = mlx4_ARM_COMM_CHANNEL(dev); - if (err) { - mlx4_err(dev, " Failed to arm comm channel eq: %x\n", - err); - goto err_resource; - } - } else { err = sync_toggles(dev); if (err) { @@ -2089,8 +2266,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) } return 0; -err_resource: - mlx4_free_resource_tracker(dev, RES_TR_FREE_ALL); err_thread: flush_workqueue(priv->mfunc.master.comm_wq); destroy_workqueue(priv->mfunc.master.comm_wq); @@ -2107,9 +2282,9 @@ err_comm_admin: err_comm: iounmap(priv->mfunc.comm); err_vhcr: - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, - priv->mfunc.vhcr, - priv->mfunc.vhcr_dma); + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, + priv->mfunc.vhcr, + priv->mfunc.vhcr_dma); priv->mfunc.vhcr = NULL; return -ENOMEM; } @@ -2120,7 +2295,6 @@ int mlx4_cmd_init(struct mlx4_dev *dev) int flags = 0; if (!priv->cmd.initialized) { - mutex_init(&priv->cmd.hcr_mutex); mutex_init(&priv->cmd.slave_cmd_mutex); sema_init(&priv->cmd.poll_sem, 1); priv->cmd.use_events = 0; @@ -2130,8 +2304,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } if (!mlx4_is_slave(dev) && !priv->cmd.hcr) { - priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) + - MLX4_HCR_BASE, MLX4_HCR_SIZE); + priv->cmd.hcr = ioremap(pci_resource_start(dev->persist->pdev, + 0) + MLX4_HCR_BASE, MLX4_HCR_SIZE); if (!priv->cmd.hcr) { mlx4_err(dev, "Couldn't map command register\n"); goto err; @@ -2140,7 +2314,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } if (mlx4_is_mfunc(dev) && !priv->mfunc.vhcr) { - priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, + priv->mfunc.vhcr = dma_alloc_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, &priv->mfunc.vhcr_dma, GFP_KERNEL); if (!priv->mfunc.vhcr) @@ -2150,7 +2325,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } if (!priv->cmd.pool) { - priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, + priv->cmd.pool = pci_pool_create("mlx4_cmd", + dev->persist->pdev, MLX4_MAILBOX_SIZE, MLX4_MAILBOX_SIZE, 0); if (!priv->cmd.pool) @@ -2166,6 +2342,27 @@ err: return -ENOMEM; } +void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int slave; + u32 slave_read; + + /* Report an internal error event to all + * communication channels. + */ + for (slave = 0; slave < dev->num_slaves; slave++) { + slave_read = swab32(readl(&priv->mfunc.comm[slave].slave_read)); + slave_read |= (u32)COMM_CHAN_EVENT_INTERNAL_ERR; + __raw_writel((__force u32)cpu_to_be32(slave_read), + &priv->mfunc.comm[slave].slave_read); + /* Make sure that our comm channel write doesn't + * get mixed in with writes from another CPU. + */ + mmiowb(); + } +} + void mlx4_multi_func_cleanup(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -2181,6 +2378,7 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev) kfree(priv->mfunc.master.slave_state); kfree(priv->mfunc.master.vf_admin); kfree(priv->mfunc.master.vf_oper); + dev->num_slaves = 0; } iounmap(priv->mfunc.comm); @@ -2202,7 +2400,7 @@ void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask) } if (mlx4_is_mfunc(dev) && priv->mfunc.vhcr && (cleanup_mask & MLX4_CMD_CLEANUP_VHCR)) { - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, priv->mfunc.vhcr, priv->mfunc.vhcr_dma); priv->mfunc.vhcr = NULL; } @@ -2229,6 +2427,11 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) for (i = 0; i < priv->cmd.max_cmds; ++i) { priv->cmd.context[i].token = i; priv->cmd.context[i].next = i + 1; + /* To support fatal error flow, initialize all + * cmd contexts to allow simulating completions + * with complete() at any time. + */ + init_completion(&priv->cmd.context[i].done); } priv->cmd.context[priv->cmd.max_cmds - 1].next = -1; @@ -2306,8 +2509,9 @@ u32 mlx4_comm_get_version(void) static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf) { - if ((vf < 0) || (vf >= dev->num_vfs)) { - mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n", vf, dev->num_vfs); + if ((vf < 0) || (vf >= dev->persist->num_vfs)) { + mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n", + vf, dev->persist->num_vfs); return -EINVAL; } @@ -2316,7 +2520,7 @@ static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf) int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave) { - if (slave < 1 || slave > dev->num_vfs) { + if (slave < 1 || slave > dev->persist->num_vfs) { mlx4_err(dev, "Bad slave number:%d (number of activated slaves: %lu)\n", slave, dev->num_slaves); @@ -2325,6 +2529,25 @@ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave) return slave - 1; } +void mlx4_cmd_wake_completions(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_context *context; + int i; + + spin_lock(&priv->cmd.context_lock); + if (priv->cmd.context) { + for (i = 0; i < priv->cmd.max_cmds; ++i) { + context = &priv->cmd.context[i]; + context->fw_status = CMD_STAT_INTERNAL_ERR; + context->result = + mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + complete(&context->done); + } + } + spin_unlock(&priv->cmd.context_lock); +} + struct mlx4_active_ports mlx4_get_active_ports(struct mlx4_dev *dev, int slave) { struct mlx4_active_ports actv_ports; @@ -2388,7 +2611,7 @@ struct mlx4_slaves_pport mlx4_phys_to_slaves_pport(struct mlx4_dev *dev, if (port <= 0 || port > dev->caps.num_ports) return slaves_pport; - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; i < dev->persist->num_vfs + 1; i++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, i); if (test_bit(port - 1, actv_ports.ports)) @@ -2408,7 +2631,7 @@ struct mlx4_slaves_pport mlx4_phys_to_slaves_pport_actv( bitmap_zero(slaves_pport.slaves, MLX4_MFUNC_MAX); - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; i < dev->persist->num_vfs + 1; i++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, i); if (bitmap_equal(crit_ports->ports, actv_ports.ports, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 82322b1c8411..22da4d0d0f05 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -70,10 +70,10 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, /* Allocate HW buffers on provided NUMA node. * dev->numa_node is used in mtt range allocation flow. */ - set_dev_node(&mdev->dev->pdev->dev, node); + set_dev_node(&mdev->dev->persist->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres, cq->buf_size, 2 * PAGE_SIZE); - set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); + set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) goto err_cq; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 90e0f045a6bc..569eda9e83d6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -92,7 +92,7 @@ mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) (u16) (mdev->dev->caps.fw_ver >> 32), (u16) ((mdev->dev->caps.fw_ver >> 16) & 0xffff), (u16) (mdev->dev->caps.fw_ver & 0xffff)); - strlcpy(drvinfo->bus_info, pci_name(mdev->dev->pdev), + strlcpy(drvinfo->bus_info, pci_name(mdev->dev->persist->pdev), sizeof(drvinfo->bus_info)); drvinfo->n_stats = 0; drvinfo->regdump_len = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 9f16f754137b..c643d2bbb7b9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -241,8 +241,8 @@ static void *mlx4_en_add(struct mlx4_dev *dev) spin_lock_init(&mdev->uar_lock); mdev->dev = dev; - mdev->dma_device = &(dev->pdev->dev); - mdev->pdev = dev->pdev; + mdev->dma_device = &dev->persist->pdev->dev; + mdev->pdev = dev->persist->pdev; mdev->device_up = false; mdev->LSO_support = !!(dev->caps.flags & (1 << 15)); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index d0d6dc1b8e46..43a3f9822f74 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2457,7 +2457,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, netif_set_real_num_tx_queues(dev, prof->tx_ring_num); netif_set_real_num_rx_queues(dev, prof->rx_ring_num); - SET_NETDEV_DEV(dev, &mdev->dev->pdev->dev); + SET_NETDEV_DEV(dev, &mdev->dev->persist->pdev->dev); dev->dev_port = port - 1; /* diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index a0474eb94aa3..2ba5d368edce 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -387,10 +387,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, ring->rx_info, tmp); /* Allocate HW buffers on provided NUMA node */ - set_dev_node(&mdev->dev->pdev->dev, node); + set_dev_node(&mdev->dev->persist->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); - set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); + set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) goto err_info; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 359bb1286eb5..55f9f5c5344e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -91,10 +91,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE); /* Allocate HW buffers on provided NUMA node */ - set_dev_node(&mdev->dev->pdev->dev, node); + set_dev_node(&mdev->dev->persist->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); - set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); + set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) { en_err(priv, "Failed allocating hwq resources\n"); goto err_bounce; diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 3d275fbaf0eb..2f2e6067426d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -237,7 +237,7 @@ int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port) struct mlx4_eqe eqe; /*don't send if we don't have the that slave */ - if (dev->num_vfs < slave) + if (dev->persist->num_vfs < slave) return 0; memset(&eqe, 0, sizeof eqe); @@ -255,7 +255,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, struct mlx4_eqe eqe; /*don't send if we don't have the that slave */ - if (dev->num_vfs < slave) + if (dev->persist->num_vfs < slave) return 0; memset(&eqe, 0, sizeof eqe); @@ -310,7 +310,7 @@ static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event) struct mlx4_slaves_pport slaves_pport = mlx4_phys_to_slaves_pport(dev, port); - for (i = 0; i < dev->num_vfs + 1; i++) + for (i = 0; i < dev->persist->num_vfs + 1; i++) if (test_bit(i, slaves_pport.slaves)) set_and_calc_slave_port_state(dev, i, port, event, &gen_event); @@ -429,8 +429,14 @@ void mlx4_master_handle_slave_flr(struct work_struct *work) if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) { mlx4_dbg(dev, "mlx4_handle_slave_flr: clean slave: %d\n", i); - - mlx4_delete_all_resources_for_slave(dev, i); + /* In case of 'Reset flow' FLR can be generated for + * a slave before mlx4_load_one is done. + * make sure interface is up before trying to delete + * slave resources which weren't allocated yet. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_UP) + mlx4_delete_all_resources_for_slave(dev, i); /*return the slave to running mode*/ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); slave_state[i].last_cmd = MLX4_COMM_CMD_RESET; @@ -560,7 +566,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) mlx4_priv(dev)->sense.do_sense_port[port] = 1; if (!mlx4_is_master(dev)) break; - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; i < dev->persist->num_vfs + 1; + i++) { if (!test_bit(i, slaves_port.slaves)) continue; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) { @@ -596,7 +603,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) if (!mlx4_is_master(dev)) break; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; + i < dev->persist->num_vfs + 1; + i++) { if (!test_bit(i, slaves_port.slaves)) continue; if (i == mlx4_master_func_num(dev)) @@ -865,7 +874,7 @@ static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) if (!priv->eq_table.uar_map[index]) { priv->eq_table.uar_map[index] = - ioremap(pci_resource_start(dev->pdev, 2) + + ioremap(pci_resource_start(dev->persist->pdev, 2) + ((eq->eqn / 4) << PAGE_SHIFT), PAGE_SIZE); if (!priv->eq_table.uar_map[index]) { @@ -928,8 +937,10 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, eq_context = mailbox->buf; for (i = 0; i < npages; ++i) { - eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, - PAGE_SIZE, &t, GFP_KERNEL); + eq->page_list[i].buf = dma_alloc_coherent(&dev->persist-> + pdev->dev, + PAGE_SIZE, &t, + GFP_KERNEL); if (!eq->page_list[i].buf) goto err_out_free_pages; @@ -995,7 +1006,7 @@ err_out_free_eq: err_out_free_pages: for (i = 0; i < npages; ++i) if (eq->page_list[i].buf) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, eq->page_list[i].buf, eq->page_list[i].map); @@ -1044,9 +1055,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev, mlx4_mtt_cleanup(dev, &eq->mtt); for (i = 0; i < npages; ++i) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, - eq->page_list[i].buf, - eq->page_list[i].map); + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, + eq->page_list[i].buf, + eq->page_list[i].map); kfree(eq->page_list); mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR); @@ -1060,7 +1071,7 @@ static void mlx4_free_irqs(struct mlx4_dev *dev) int i, vec; if (eq_table->have_irq) - free_irq(dev->pdev->irq, dev); + free_irq(dev->persist->pdev->irq, dev); for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) if (eq_table->eq[i].have_irq) { @@ -1089,7 +1100,8 @@ static int mlx4_map_clr_int(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) + + priv->clr_base = ioremap(pci_resource_start(dev->persist->pdev, + priv->fw.clr_int_bar) + priv->fw.clr_int_base, MLX4_CLR_INT_SIZE); if (!priv->clr_base) { mlx4_err(dev, "Couldn't map interrupt clear register, aborting\n"); @@ -1212,13 +1224,13 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) i * MLX4_IRQNAME_SIZE, MLX4_IRQNAME_SIZE, "mlx4-comp-%d@pci:%s", i, - pci_name(dev->pdev)); + pci_name(dev->persist->pdev)); } else { snprintf(priv->eq_table.irq_names + i * MLX4_IRQNAME_SIZE, MLX4_IRQNAME_SIZE, "mlx4-async@pci:%s", - pci_name(dev->pdev)); + pci_name(dev->persist->pdev)); } eq_name = priv->eq_table.irq_names + @@ -1235,8 +1247,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) snprintf(priv->eq_table.irq_names, MLX4_IRQNAME_SIZE, DRV_NAME "@pci:%s", - pci_name(dev->pdev)); - err = request_irq(dev->pdev->irq, mlx4_interrupt, + pci_name(dev->persist->pdev)); + err = request_irq(dev->persist->pdev->irq, mlx4_interrupt, IRQF_SHARED, priv->eq_table.irq_names, dev); if (err) goto err_out_async; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 97c9b1db1d27..2a9dd460a95f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -56,7 +56,7 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu int i; if (chunk->nsg > 0) - pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages, + pci_unmap_sg(dev->persist->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); for (i = 0; i < chunk->npages; ++i) @@ -69,7 +69,8 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk * int i; for (i = 0; i < chunk->npages; ++i) - dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length, + dma_free_coherent(&dev->persist->pdev->dev, + chunk->mem[i].length, lowmem_page_address(sg_page(&chunk->mem[i])), sg_dma_address(&chunk->mem[i])); } @@ -173,7 +174,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, --cur_order; if (coherent) - ret = mlx4_alloc_icm_coherent(&dev->pdev->dev, + ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev, &chunk->mem[chunk->npages], cur_order, gfp_mask); else @@ -193,7 +194,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, if (coherent) ++chunk->nsg; else if (chunk->npages == MLX4_ICM_CHUNK_LEN) { - chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, + chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); @@ -208,7 +209,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, } if (!coherent && chunk) { - chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, + chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 116895ac8b35..68d2bad325d5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -138,13 +138,13 @@ int mlx4_register_device(struct mlx4_dev *dev) mutex_lock(&intf_mutex); + dev->persist->interface_state |= MLX4_INTERFACE_STATE_UP; list_add_tail(&priv->dev_list, &dev_list); list_for_each_entry(intf, &intf_list, list) mlx4_add_device(intf, priv); mutex_unlock(&intf_mutex); - if (!mlx4_is_slave(dev)) - mlx4_start_catas_poll(dev); + mlx4_start_catas_poll(dev); return 0; } @@ -154,14 +154,14 @@ void mlx4_unregister_device(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_interface *intf; - if (!mlx4_is_slave(dev)) - mlx4_stop_catas_poll(dev); + mlx4_stop_catas_poll(dev); mutex_lock(&intf_mutex); list_for_each_entry(intf, &intf_list, list) mlx4_remove_device(intf, priv); list_del(&priv->dev_list); + dev->persist->interface_state &= ~MLX4_INTERFACE_STATE_UP; mutex_unlock(&intf_mutex); } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 03e9eb0dc761..9c7ef0bffb52 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -108,6 +108,8 @@ MODULE_PARM_DESC(enable_64b_cqe_eqe, MLX4_FUNC_CAP_EQE_CQE_STRIDE | \ MLX4_FUNC_CAP_DMFS_A0_STATIC) +#define RESET_PERSIST_MASK_FLAGS (MLX4_FLAG_SRIOV) + static char mlx4_version[] = DRV_NAME ": Mellanox ConnectX core driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -318,10 +320,11 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) return -ENODEV; } - if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) { + if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) { mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n", dev_cap->uar_size, - (unsigned long long) pci_resource_len(dev->pdev, 2)); + (unsigned long long) + pci_resource_len(dev->persist->pdev, 2)); return -ENODEV; } @@ -541,8 +544,10 @@ static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev, *speed = PCI_SPEED_UNKNOWN; *width = PCIE_LNK_WIDTH_UNKNOWN; - err1 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP, &lnkcap1); - err2 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP2, &lnkcap2); + err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP, + &lnkcap1); + err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2, + &lnkcap2); if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */ if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB) *speed = PCIE_SPEED_8_0GT; @@ -587,7 +592,7 @@ static void mlx4_check_pcie_caps(struct mlx4_dev *dev) return; } - err = pcie_get_minimum_link(dev->pdev, &speed, &width); + err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width); if (err || speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN) { mlx4_warn(dev, @@ -837,10 +842,12 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) if (dev->caps.uar_page_size * (dev->caps.num_uars - dev->caps.reserved_uars) > - pci_resource_len(dev->pdev, 2)) { + pci_resource_len(dev->persist->pdev, + 2)) { mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n", dev->caps.uar_page_size * dev->caps.num_uars, - (unsigned long long) pci_resource_len(dev->pdev, 2)); + (unsigned long long) + pci_resource_len(dev->persist->pdev, 2)); goto err_mem; } @@ -1477,7 +1484,8 @@ static void mlx4_slave_exit(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); mutex_lock(&priv->cmd.slave_cmd_mutex); - if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) + if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, + MLX4_COMM_TIME)) mlx4_warn(dev, "Failed to close slave function\n"); mutex_unlock(&priv->cmd.slave_cmd_mutex); } @@ -1492,9 +1500,9 @@ static int map_bf_area(struct mlx4_dev *dev) if (!dev->caps.bf_reg_size) return -ENXIO; - bf_start = pci_resource_start(dev->pdev, 2) + + bf_start = pci_resource_start(dev->persist->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT); - bf_len = pci_resource_len(dev->pdev, 2) - + bf_len = pci_resource_len(dev->persist->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT); priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); if (!priv->bf_mapping) @@ -1536,7 +1544,8 @@ static int map_internal_clock(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); priv->clock_mapping = - ioremap(pci_resource_start(dev->pdev, priv->fw.clock_bar) + + ioremap(pci_resource_start(dev->persist->pdev, + priv->fw.clock_bar) + priv->fw.clock_offset, MLX4_CLOCK_SIZE); if (!priv->clock_mapping) @@ -1573,6 +1582,50 @@ static void mlx4_close_fw(struct mlx4_dev *dev) } } +static int mlx4_comm_check_offline(struct mlx4_dev *dev) +{ +#define COMM_CHAN_OFFLINE_OFFSET 0x09 + + u32 comm_flags; + u32 offline_bit; + unsigned long end; + struct mlx4_priv *priv = mlx4_priv(dev); + + end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies; + while (time_before(jiffies, end)) { + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + offline_bit = (comm_flags & + (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); + if (!offline_bit) + return 0; + /* There are cases as part of AER/Reset flow that PF needs + * around 100 msec to load. We therefore sleep for 100 msec + * to allow other tasks to make use of that CPU during this + * time interval. + */ + msleep(100); + } + mlx4_err(dev, "Communication channel is offline.\n"); + return -EIO; +} + +static void mlx4_reset_vf_support(struct mlx4_dev *dev) +{ +#define COMM_CHAN_RST_OFFSET 0x1e + + struct mlx4_priv *priv = mlx4_priv(dev); + u32 comm_rst; + u32 comm_caps; + + comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_CAPS)); + comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET)); + + if (comm_rst) + dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET; +} + static int mlx4_init_slave(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1588,9 +1641,15 @@ static int mlx4_init_slave(struct mlx4_dev *dev) mutex_lock(&priv->cmd.slave_cmd_mutex); priv->cmd.max_cmds = 1; + if (mlx4_comm_check_offline(dev)) { + mlx4_err(dev, "PF is not responsive, skipping initialization\n"); + goto err_offline; + } + + mlx4_reset_vf_support(dev); mlx4_warn(dev, "Sending reset\n"); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, - MLX4_COMM_TIME); + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME); /* if we are in the middle of flr the slave will try * NUM_OF_RESET_RETRIES times before leaving.*/ if (ret_from_reset) { @@ -1615,22 +1674,24 @@ static int mlx4_init_slave(struct mlx4_dev *dev) mlx4_warn(dev, "Sending vhcr0\n"); if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, - MLX4_COMM_TIME)) + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, - MLX4_COMM_TIME)) + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, - MLX4_COMM_TIME)) + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; - if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) + if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; mutex_unlock(&priv->cmd.slave_cmd_mutex); return 0; err: - mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); + mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0); +err_offline: mutex_unlock(&priv->cmd.slave_cmd_mutex); return -EIO; } @@ -1705,7 +1766,8 @@ static void choose_steering_mode(struct mlx4_dev *dev, if (mlx4_log_num_mgm_entry_size <= 0 && dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && (!mlx4_is_mfunc(dev) || - (dev_cap->fs_max_num_qp_per_entry >= (dev->num_vfs + 1))) && + (dev_cap->fs_max_num_qp_per_entry >= + (dev->persist->num_vfs + 1))) && choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= MLX4_MIN_MGM_LOG_ENTRY_SIZE) { dev->oper_log_mgm_entry_size = @@ -2288,7 +2350,8 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) for (i = 0; i < nreq; ++i) entries[i].entry = i; - nreq = pci_enable_msix_range(dev->pdev, entries, 2, nreq); + nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2, + nreq); if (nreq < 0) { kfree(entries); @@ -2316,7 +2379,7 @@ no_msi: dev->caps.comp_pool = 0; for (i = 0; i < 2; ++i) - priv->eq_table.eq[i].irq = dev->pdev->irq; + priv->eq_table.eq[i].irq = dev->persist->pdev->irq; } static int mlx4_init_port_info(struct mlx4_dev *dev, int port) @@ -2344,7 +2407,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) info->port_attr.show = show_port_type; sysfs_attr_init(&info->port_attr.attr); - err = device_create_file(&dev->pdev->dev, &info->port_attr); + err = device_create_file(&dev->persist->pdev->dev, &info->port_attr); if (err) { mlx4_err(dev, "Failed to create file for port %d\n", port); info->port = -1; @@ -2361,10 +2424,12 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) info->port_mtu_attr.show = show_port_ib_mtu; sysfs_attr_init(&info->port_mtu_attr.attr); - err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr); + err = device_create_file(&dev->persist->pdev->dev, + &info->port_mtu_attr); if (err) { mlx4_err(dev, "Failed to create mtu file for port %d\n", port); - device_remove_file(&info->dev->pdev->dev, &info->port_attr); + device_remove_file(&info->dev->persist->pdev->dev, + &info->port_attr); info->port = -1; } @@ -2376,8 +2441,9 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info) if (info->port < 0) return; - device_remove_file(&info->dev->pdev->dev, &info->port_attr); - device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr); + device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); + device_remove_file(&info->dev->persist->pdev->dev, + &info->port_mtu_attr); } static int mlx4_init_steering(struct mlx4_dev *dev) @@ -2444,10 +2510,11 @@ static int mlx4_get_ownership(struct mlx4_dev *dev) void __iomem *owner; u32 ret; - if (pci_channel_offline(dev->pdev)) + if (pci_channel_offline(dev->persist->pdev)) return -EIO; - owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, + owner = ioremap(pci_resource_start(dev->persist->pdev, 0) + + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); @@ -2463,10 +2530,11 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) { void __iomem *owner; - if (pci_channel_offline(dev->pdev)) + if (pci_channel_offline(dev->persist->pdev)) return; - owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, + owner = ioremap(pci_resource_start(dev->persist->pdev, 0) + + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); @@ -2481,11 +2549,19 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) !!((flags) & MLX4_FLAG_MASTER)) static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, - u8 total_vfs, int existing_vfs) + u8 total_vfs, int existing_vfs, int reset_flow) { u64 dev_flags = dev->flags; int err = 0; + if (reset_flow) { + dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs), + GFP_KERNEL); + if (!dev->dev_vfs) + goto free_mem; + return dev_flags; + } + atomic_inc(&pf_loading); if (dev->flags & MLX4_FLAG_SRIOV) { if (existing_vfs != total_vfs) { @@ -2514,13 +2590,14 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, dev_flags |= MLX4_FLAG_SRIOV | MLX4_FLAG_MASTER; dev_flags &= ~MLX4_FLAG_SLAVE; - dev->num_vfs = total_vfs; + dev->persist->num_vfs = total_vfs; } return dev_flags; disable_sriov: atomic_dec(&pf_loading); - dev->num_vfs = 0; +free_mem: + dev->persist->num_vfs = 0; kfree(dev->dev_vfs); return dev_flags & ~MLX4_FLAG_MASTER; } @@ -2544,7 +2621,8 @@ static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap } static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, - int total_vfs, int *nvfs, struct mlx4_priv *priv) + int total_vfs, int *nvfs, struct mlx4_priv *priv, + int reset_flow) { struct mlx4_dev *dev; unsigned sum = 0; @@ -2607,10 +2685,15 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, existing_vfs = pci_num_vf(pdev); if (existing_vfs) dev->flags |= MLX4_FLAG_SRIOV; - dev->num_vfs = total_vfs; + dev->persist->num_vfs = total_vfs; } } + /* on load remove any previous indication of internal error, + * device is up. + */ + dev->persist->state = MLX4_DEVICE_STATE_UP; + slave_start: err = mlx4_cmd_init(dev); if (err) { @@ -2661,8 +2744,10 @@ slave_start: goto err_fw; if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { - u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, - existing_vfs); + u64 dev_flags = mlx4_enable_sriov(dev, pdev, + total_vfs, + existing_vfs, + reset_flow); mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); dev->flags = dev_flags; @@ -2704,7 +2789,7 @@ slave_start: if (dev->flags & MLX4_FLAG_SRIOV) { if (!existing_vfs) pci_disable_sriov(pdev); - if (mlx4_is_master(dev)) + if (mlx4_is_master(dev) && !reset_flow) atomic_dec(&pf_loading); dev->flags &= ~MLX4_FLAG_SRIOV; } @@ -2718,7 +2803,8 @@ slave_start: } if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { - u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs); + u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, + existing_vfs, reset_flow); if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) { mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR); @@ -2771,12 +2857,14 @@ slave_start: dev->caps.num_ports); goto err_close; } - memcpy(dev->nvfs, nvfs, sizeof(dev->nvfs)); + memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs)); - for (i = 0; i < sizeof(dev->nvfs)/sizeof(dev->nvfs[0]); i++) { + for (i = 0; + i < sizeof(dev->persist->nvfs)/ + sizeof(dev->persist->nvfs[0]); i++) { unsigned j; - for (j = 0; j < dev->nvfs[i]; ++sum, ++j) { + for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) { dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1; dev->dev_vfs[sum].n_ports = i < 2 ? 1 : dev->caps.num_ports; @@ -2828,6 +2916,17 @@ slave_start: goto err_steer; mlx4_init_quotas(dev); + /* When PF resources are ready arm its comm channel to enable + * getting commands + */ + if (mlx4_is_master(dev)) { + err = mlx4_ARM_COMM_CHANNEL(dev); + if (err) { + mlx4_err(dev, " Failed to arm comm channel eq: %x\n", + err); + goto err_steer; + } + } for (port = 1; port <= dev->caps.num_ports; port++) { err = mlx4_init_port_info(dev, port); @@ -2846,7 +2945,7 @@ slave_start: priv->removed = 0; - if (mlx4_is_master(dev) && dev->num_vfs) + if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow) atomic_dec(&pf_loading); kfree(dev_cap); @@ -2905,10 +3004,12 @@ err_cmd: mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); err_sriov: - if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) + if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) { pci_disable_sriov(pdev); + dev->flags &= ~MLX4_FLAG_SRIOV; + } - if (mlx4_is_master(dev) && dev->num_vfs) + if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow) atomic_dec(&pf_loading); kfree(priv->dev.dev_vfs); @@ -3049,11 +3150,19 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, } } - err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); + err = mlx4_catas_init(&priv->dev); if (err) goto err_release_regions; + + err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0); + if (err) + goto err_catas; + return 0; +err_catas: + mlx4_catas_end(&priv->dev); + err_release_regions: pci_release_regions(pdev); @@ -3076,38 +3185,60 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) return -ENOMEM; dev = &priv->dev; - dev->pdev = pdev; - pci_set_drvdata(pdev, dev); + dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL); + if (!dev->persist) { + kfree(priv); + return -ENOMEM; + } + dev->persist->pdev = pdev; + dev->persist->dev = dev; + pci_set_drvdata(pdev, dev->persist); priv->pci_dev_data = id->driver_data; + mutex_init(&dev->persist->device_state_mutex); + mutex_init(&dev->persist->interface_state_mutex); ret = __mlx4_init_one(pdev, id->driver_data, priv); - if (ret) + if (ret) { + kfree(dev->persist); kfree(priv); + } else { + pci_save_state(pdev); + } return ret; } +static void mlx4_clean_dev(struct mlx4_dev *dev) +{ + struct mlx4_dev_persistent *persist = dev->persist; + struct mlx4_priv *priv = mlx4_priv(dev); + unsigned long flags = (dev->flags & RESET_PERSIST_MASK_FLAGS); + + memset(priv, 0, sizeof(*priv)); + priv->dev.persist = persist; + priv->dev.flags = flags; +} + static void mlx4_unload_one(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int pci_dev_data; - int p; - int active_vfs = 0; + int p, i; if (priv->removed) return; + /* saving current ports type for further use */ + for (i = 0; i < dev->caps.num_ports; i++) { + dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1]; + dev->persist->curr_port_poss_type[i] = dev->caps. + possible_type[i + 1]; + } + pci_dev_data = priv->pci_dev_data; - /* Disabling SR-IOV is not allowed while there are active vf's */ - if (mlx4_is_master(dev)) { - active_vfs = mlx4_how_many_lives_vf(dev); - if (active_vfs) { - pr_warn("Removing PF when there are active VF's !!\n"); - pr_warn("Will not disable SR-IOV.\n"); - } - } mlx4_stop_sense(dev); mlx4_unregister_device(dev); @@ -3151,12 +3282,6 @@ static void mlx4_unload_one(struct pci_dev *pdev) if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); - if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) { - mlx4_warn(dev, "Disabling SR-IOV\n"); - pci_disable_sriov(pdev); - dev->flags &= ~MLX4_FLAG_SRIOV; - dev->num_vfs = 0; - } if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); @@ -3168,42 +3293,96 @@ static void mlx4_unload_one(struct pci_dev *pdev) kfree(dev->caps.qp1_proxy); kfree(dev->dev_vfs); - memset(priv, 0, sizeof(*priv)); + mlx4_clean_dev(dev); priv->pci_dev_data = pci_dev_data; priv->removed = 1; } static void mlx4_remove_one(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); + int active_vfs = 0; + + mutex_lock(&persist->interface_state_mutex); + persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; + mutex_unlock(&persist->interface_state_mutex); + + /* Disabling SR-IOV is not allowed while there are active vf's */ + if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) { + active_vfs = mlx4_how_many_lives_vf(dev); + if (active_vfs) { + pr_warn("Removing PF when there are active VF's !!\n"); + pr_warn("Will not disable SR-IOV.\n"); + } + } + + /* device marked to be under deletion running now without the lock + * letting other tasks to be terminated + */ + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + else + mlx4_info(dev, "%s: interface is down\n", __func__); + mlx4_catas_end(dev); + if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) { + mlx4_warn(dev, "Disabling SR-IOV\n"); + pci_disable_sriov(pdev); + } - mlx4_unload_one(pdev); pci_release_regions(pdev); pci_disable_device(pdev); + kfree(dev->persist); kfree(priv); pci_set_drvdata(pdev, NULL); } +static int restore_current_port_types(struct mlx4_dev *dev, + enum mlx4_port_type *types, + enum mlx4_port_type *poss_types) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err, i; + + mlx4_stop_sense(dev); + + mutex_lock(&priv->port_mutex); + for (i = 0; i < dev->caps.num_ports; i++) + dev->caps.possible_type[i + 1] = poss_types[i]; + err = mlx4_change_port_types(dev, types); + mlx4_start_sense(dev); + mutex_unlock(&priv->port_mutex); + + return err; +} + int mlx4_restart_one(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; int pci_dev_data, err, total_vfs; pci_dev_data = priv->pci_dev_data; - total_vfs = dev->num_vfs; - memcpy(nvfs, dev->nvfs, sizeof(dev->nvfs)); + total_vfs = dev->persist->num_vfs; + memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); mlx4_unload_one(pdev); - err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); + err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1); if (err) { mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n", __func__, pci_name(pdev), err); return err; } + err = restore_current_port_types(dev, dev->persist->curr_port_type, + dev->persist->curr_port_poss_type); + if (err) + mlx4_err(dev, "could not restore original port types (%d)\n", + err); + return err; } @@ -3258,23 +3437,79 @@ MODULE_DEVICE_TABLE(pci, mlx4_pci_table); static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { - mlx4_unload_one(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + + mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n"); + mlx4_enter_error_state(persist); - return state == pci_channel_io_perm_failure ? - PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + + mutex_unlock(&persist->interface_state_mutex); + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + pci_disable_device(pdev); + return PCI_ERS_RESULT_NEED_RESET; } static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int ret; + int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; + int total_vfs; + + mlx4_err(dev, "mlx4_pci_slot_reset was called\n"); + ret = pci_enable_device(pdev); + if (ret) { + mlx4_err(dev, "Can not re-enable device, ret=%d\n", ret); + return PCI_ERS_RESULT_DISCONNECT; + } + + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + + total_vfs = dev->persist->num_vfs; + memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); + + mutex_lock(&persist->interface_state_mutex); + if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) { + ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs, + priv, 1); + if (ret) { + mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n", + __func__, ret); + goto end; + } - ret = __mlx4_init_one(pdev, priv->pci_dev_data, priv); + ret = restore_current_port_types(dev, dev->persist-> + curr_port_type, dev->persist-> + curr_port_poss_type); + if (ret) + mlx4_err(dev, "could not restore original port types (%d)\n", ret); + } +end: + mutex_unlock(&persist->interface_state_mutex); return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } +static void mlx4_shutdown(struct pci_dev *pdev) +{ + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + + mlx4_info(persist->dev, "mlx4_shutdown was called\n"); + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + mutex_unlock(&persist->interface_state_mutex); +} + static const struct pci_error_handlers mlx4_err_handler = { .error_detected = mlx4_pci_err_detected, .slot_reset = mlx4_pci_slot_reset, @@ -3284,7 +3519,7 @@ static struct pci_driver mlx4_driver = { .name = DRV_NAME, .id_table = mlx4_pci_table, .probe = mlx4_init_one, - .shutdown = mlx4_unload_one, + .shutdown = mlx4_shutdown, .remove = mlx4_remove_one, .err_handler = &mlx4_err_handler, }; @@ -3336,7 +3571,6 @@ static int __init mlx4_init(void) if (mlx4_verify_params()) return -EINVAL; - mlx4_catas_init(); mlx4_wq = create_singlethread_workqueue("mlx4"); if (!mlx4_wq) diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index a3867e7ef885..bd9ea0d01aae 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1318,6 +1318,9 @@ out: mutex_unlock(&priv->mcg_table.mutex); mlx4_free_cmd_mailbox(dev, mailbox); + if (err && dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + /* In case device is under an error, return success as a closing command */ + err = 0; return err; } @@ -1347,6 +1350,9 @@ static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp, MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(dev, mailbox); + if (err && !attach && + dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + err = 0; return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index bdd4eea2247c..096a81c16a9b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -85,7 +85,9 @@ enum { MLX4_CLR_INT_SIZE = 0x00008, MLX4_SLAVE_COMM_BASE = 0x0, MLX4_COMM_PAGESIZE = 0x1000, - MLX4_CLOCK_SIZE = 0x00008 + MLX4_CLOCK_SIZE = 0x00008, + MLX4_COMM_CHAN_CAPS = 0x8, + MLX4_COMM_CHAN_FLAGS = 0xc }; enum { @@ -120,6 +122,10 @@ enum mlx4_mpt_state { }; #define MLX4_COMM_TIME 10000 +#define MLX4_COMM_OFFLINE_TIME_OUT 30000 +#define MLX4_COMM_CMD_NA_OP 0x0 + + enum { MLX4_COMM_CMD_RESET, MLX4_COMM_CMD_VHCR0, @@ -221,19 +227,21 @@ extern int mlx4_debug_level; #define mlx4_dbg(mdev, format, ...) \ do { \ if (mlx4_debug_level) \ - dev_printk(KERN_DEBUG, &(mdev)->pdev->dev, format, \ + dev_printk(KERN_DEBUG, \ + &(mdev)->persist->pdev->dev, format, \ ##__VA_ARGS__); \ } while (0) #define mlx4_err(mdev, format, ...) \ - dev_err(&(mdev)->pdev->dev, format, ##__VA_ARGS__) + dev_err(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) #define mlx4_info(mdev, format, ...) \ - dev_info(&(mdev)->pdev->dev, format, ##__VA_ARGS__) + dev_info(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) #define mlx4_warn(mdev, format, ...) \ - dev_warn(&(mdev)->pdev->dev, format, ##__VA_ARGS__) + dev_warn(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) extern int mlx4_log_num_mgm_entry_size; extern int log_mtts_per_seg; +extern int mlx4_internal_err_reset; #define MLX4_MAX_NUM_SLAVES (MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF) #define ALL_SLAVES 0xff @@ -606,7 +614,6 @@ struct mlx4_mgm { struct mlx4_cmd { struct pci_pool *pool; void __iomem *hcr; - struct mutex hcr_mutex; struct mutex slave_cmd_mutex; struct semaphore poll_sem; struct semaphore event_sem; @@ -994,7 +1001,8 @@ void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); void mlx4_start_catas_poll(struct mlx4_dev *dev); void mlx4_stop_catas_poll(struct mlx4_dev *dev); -void mlx4_catas_init(void); +int mlx4_catas_init(struct mlx4_dev *dev); +void mlx4_catas_end(struct mlx4_dev *dev); int mlx4_restart_one(struct pci_dev *pdev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); @@ -1160,13 +1168,14 @@ enum { int mlx4_cmd_init(struct mlx4_dev *dev); void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask); int mlx4_multi_func_init(struct mlx4_dev *dev); +int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev); void mlx4_multi_func_cleanup(struct mlx4_dev *dev); void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param); int mlx4_cmd_use_events(struct mlx4_dev *dev); void mlx4_cmd_use_polling(struct mlx4_dev *dev); int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, - unsigned long timeout); + u16 op, unsigned long timeout); void mlx4_cq_tasklet_cb(unsigned long data); void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn); @@ -1176,7 +1185,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type); void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type); -void mlx4_handle_catas_err(struct mlx4_dev *dev); +void mlx4_enter_error_state(struct mlx4_dev_persistent *persist); int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, enum mlx4_port_type *type); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 7094a9c70fd5..8dbdf1d29357 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -708,13 +708,13 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt, if (!mtts) return -ENOMEM; - dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle, + dma_sync_single_for_cpu(&dev->persist->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE); for (i = 0; i < npages; ++i) mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); - dma_sync_single_for_device(&dev->pdev->dev, dma_handle, + dma_sync_single_for_device(&dev->persist->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE); return 0; @@ -1020,13 +1020,13 @@ int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list /* Make sure MPT status is visible before writing MTT entries */ wmb(); - dma_sync_single_for_cpu(&dev->pdev->dev, fmr->dma_handle, + dma_sync_single_for_cpu(&dev->persist->pdev->dev, fmr->dma_handle, npages * sizeof(u64), DMA_TO_DEVICE); for (i = 0; i < npages; ++i) fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); - dma_sync_single_for_device(&dev->pdev->dev, fmr->dma_handle, + dma_sync_single_for_device(&dev->persist->pdev->dev, fmr->dma_handle, npages * sizeof(u64), DMA_TO_DEVICE); fmr->mpt->key = cpu_to_be32(key); diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c index 74216071201f..a42b4c0a9ed9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/pd.c +++ b/drivers/net/ethernet/mellanox/mlx4/pd.c @@ -151,11 +151,13 @@ int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar) return -ENOMEM; if (mlx4_is_slave(dev)) - offset = uar->index % ((int) pci_resource_len(dev->pdev, 2) / + offset = uar->index % ((int)pci_resource_len(dev->persist->pdev, + 2) / dev->caps.uar_page_size); else offset = uar->index; - uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + offset; + uar->pfn = (pci_resource_start(dev->persist->pdev, 2) >> PAGE_SHIFT) + + offset; uar->map = NULL; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 30eb1ead0fe6..9f268f05290a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -553,9 +553,9 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port) slaves_pport_actv = mlx4_phys_to_slaves_pport_actv( dev, &exclusive_ports); slave_gid -= bitmap_weight(slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } - vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1; if (slave_gid <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % vfs)) return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs) + 1; return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs; @@ -590,10 +590,10 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port) slaves_pport_actv = mlx4_phys_to_slaves_pport_actv( dev, &exclusive_ports); slave_gid -= bitmap_weight(slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; - vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1; if (slave_gid <= gids % vfs) return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave_gid - 1); @@ -644,7 +644,7 @@ void mlx4_reset_roce_gids(struct mlx4_dev *dev, int slave) int num_eth_ports, err; int i; - if (slave < 0 || slave > dev->num_vfs) + if (slave < 0 || slave > dev->persist->num_vfs) return; actv_ports = mlx4_get_active_ports(dev, slave); @@ -1214,7 +1214,8 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, return -EINVAL; slaves_pport = mlx4_phys_to_slaves_pport(dev, port); - num_vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + num_vfs = bitmap_weight(slaves_pport.slaves, + dev->persist->num_vfs + 1) - 1; for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { if (!memcmp(priv->port[port].gid_table.roce_gids[i].raw, gid, @@ -1258,7 +1259,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, dev, &exclusive_ports); num_vfs_before += bitmap_weight( slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } /* candidate_slave_gid isn't necessarily the correct slave, but @@ -1288,7 +1289,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, dev, &exclusive_ports); slave_gid += bitmap_weight( slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } } *slave_id = slave_gid; diff --git a/drivers/net/ethernet/mellanox/mlx4/reset.c b/drivers/net/ethernet/mellanox/mlx4/reset.c index ea1c6d092145..0076d88587ca 100644 --- a/drivers/net/ethernet/mellanox/mlx4/reset.c +++ b/drivers/net/ethernet/mellanox/mlx4/reset.c @@ -76,19 +76,21 @@ int mlx4_reset(struct mlx4_dev *dev) goto out; } - pcie_cap = pci_pcie_cap(dev->pdev); + pcie_cap = pci_pcie_cap(dev->persist->pdev); for (i = 0; i < 64; ++i) { if (i == 22 || i == 23) continue; - if (pci_read_config_dword(dev->pdev, i * 4, hca_header + i)) { + if (pci_read_config_dword(dev->persist->pdev, i * 4, + hca_header + i)) { err = -ENODEV; mlx4_err(dev, "Couldn't save HCA PCI header, aborting\n"); goto out; } } - reset = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_RESET_BASE, + reset = ioremap(pci_resource_start(dev->persist->pdev, 0) + + MLX4_RESET_BASE, MLX4_RESET_SIZE); if (!reset) { err = -ENOMEM; @@ -122,8 +124,8 @@ int mlx4_reset(struct mlx4_dev *dev) end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES; do { - if (!pci_read_config_word(dev->pdev, PCI_VENDOR_ID, &vendor) && - vendor != 0xffff) + if (!pci_read_config_word(dev->persist->pdev, PCI_VENDOR_ID, + &vendor) && vendor != 0xffff) break; msleep(1); @@ -138,14 +140,16 @@ int mlx4_reset(struct mlx4_dev *dev) /* Now restore the PCI headers */ if (pcie_cap) { devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4]; - if (pcie_capability_write_word(dev->pdev, PCI_EXP_DEVCTL, + if (pcie_capability_write_word(dev->persist->pdev, + PCI_EXP_DEVCTL, devctl)) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA PCI Express Device Control register, aborting\n"); goto out; } linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4]; - if (pcie_capability_write_word(dev->pdev, PCI_EXP_LNKCTL, + if (pcie_capability_write_word(dev->persist->pdev, + PCI_EXP_LNKCTL, linkctl)) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA PCI Express Link control register, aborting\n"); @@ -157,7 +161,8 @@ int mlx4_reset(struct mlx4_dev *dev) if (i * 4 == PCI_COMMAND) continue; - if (pci_write_config_dword(dev->pdev, i * 4, hca_header[i])) { + if (pci_write_config_dword(dev->persist->pdev, i * 4, + hca_header[i])) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA reg %x, aborting\n", i); @@ -165,7 +170,7 @@ int mlx4_reset(struct mlx4_dev *dev) } } - if (pci_write_config_dword(dev->pdev, PCI_COMMAND, + if (pci_write_config_dword(dev->persist->pdev, PCI_COMMAND, hca_header[PCI_COMMAND / 4])) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA COMMAND, aborting\n"); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 4efbd1eca611..3e93879bccce 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -309,12 +309,13 @@ static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave, int allocated, free, reserved, guaranteed, from_free; int from_rsvd; - if (slave > dev->num_vfs) + if (slave > dev->persist->num_vfs) return -EINVAL; spin_lock(&res_alloc->alloc_lock); allocated = (port > 0) ? - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] : + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] : res_alloc->allocated[slave]; free = (port > 0) ? res_alloc->res_port_free[port - 1] : res_alloc->res_free; @@ -352,7 +353,8 @@ static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave, if (!err) { /* grant the request */ if (port > 0) { - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] += count; + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] += count; res_alloc->res_port_free[port - 1] -= count; res_alloc->res_port_rsvd[port - 1] -= from_rsvd; } else { @@ -376,13 +378,14 @@ static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave, &priv->mfunc.master.res_tracker.res_alloc[res_type]; int allocated, guaranteed, from_rsvd; - if (slave > dev->num_vfs) + if (slave > dev->persist->num_vfs) return; spin_lock(&res_alloc->alloc_lock); allocated = (port > 0) ? - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] : + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] : res_alloc->allocated[slave]; guaranteed = res_alloc->guaranteed[slave]; @@ -397,7 +400,8 @@ static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave, } if (port > 0) { - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] -= count; + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] -= count; res_alloc->res_port_free[port - 1] += count; res_alloc->res_port_rsvd[port - 1] += from_rsvd; } else { @@ -415,7 +419,8 @@ static inline void initialize_res_quotas(struct mlx4_dev *dev, enum mlx4_resource res_type, int vf, int num_instances) { - res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1)); + res_alloc->guaranteed[vf] = num_instances / + (2 * (dev->persist->num_vfs + 1)); res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf]; if (vf == mlx4_master_func_num(dev)) { res_alloc->res_free = num_instances; @@ -486,21 +491,26 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { struct resource_allocator *res_alloc = &priv->mfunc.master.res_tracker.res_alloc[i]; - res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); - res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + res_alloc->quota = kmalloc((dev->persist->num_vfs + 1) * + sizeof(int), GFP_KERNEL); + res_alloc->guaranteed = kmalloc((dev->persist->num_vfs + 1) * + sizeof(int), GFP_KERNEL); if (i == RES_MAC || i == RES_VLAN) res_alloc->allocated = kzalloc(MLX4_MAX_PORTS * - (dev->num_vfs + 1) * sizeof(int), - GFP_KERNEL); + (dev->persist->num_vfs + + 1) * + sizeof(int), GFP_KERNEL); else - res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + res_alloc->allocated = kzalloc((dev->persist-> + num_vfs + 1) * + sizeof(int), GFP_KERNEL); if (!res_alloc->quota || !res_alloc->guaranteed || !res_alloc->allocated) goto no_mem_err; spin_lock_init(&res_alloc->alloc_lock); - for (t = 0; t < dev->num_vfs + 1; t++) { + for (t = 0; t < dev->persist->num_vfs + 1; t++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, t); switch (i) { diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 167737fa59de..fe0277a35507 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2947,6 +2947,36 @@ static int sh_eth_drv_remove(struct platform_device *pdev) } #ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP +static int sh_eth_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + int ret = 0; + + if (netif_running(ndev)) { + netif_device_detach(ndev); + ret = sh_eth_close(ndev); + } + + return ret; +} + +static int sh_eth_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + int ret = 0; + + if (netif_running(ndev)) { + ret = sh_eth_open(ndev); + if (ret < 0) + return ret; + netif_device_attach(ndev); + } + + return ret; +} +#endif + static int sh_eth_runtime_nop(struct device *dev) { /* Runtime PM callback shared between ->runtime_suspend() @@ -2960,8 +2990,8 @@ static int sh_eth_runtime_nop(struct device *dev) } static const struct dev_pm_ops sh_eth_dev_pm_ops = { - .runtime_suspend = sh_eth_runtime_nop, - .runtime_resume = sh_eth_runtime_nop, + SET_SYSTEM_SLEEP_PM_OPS(sh_eth_suspend, sh_eth_resume) + SET_RUNTIME_PM_OPS(sh_eth_runtime_nop, sh_eth_runtime_nop, NULL) }; #define SH_ETH_PM_OPS (&sh_eth_dev_pm_ops) #else diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 35f9b86bc9e5..6249a4ec08f0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -32,7 +32,7 @@ struct rk_priv_data { struct platform_device *pdev; int phy_iface; - char regulator[32]; + struct regulator *regulator; bool clk_enabled; bool clock_input; @@ -287,47 +287,25 @@ static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable) static int phy_power_on(struct rk_priv_data *bsp_priv, bool enable) { - struct regulator *ldo; - char *ldostr = bsp_priv->regulator; + struct regulator *ldo = bsp_priv->regulator; int ret; struct device *dev = &bsp_priv->pdev->dev; - if (!ldostr) { - dev_err(dev, "%s: no ldo found\n", __func__); + if (!ldo) { + dev_err(dev, "%s: no regulator found\n", __func__); return -1; } - ldo = regulator_get(NULL, ldostr); - if (!ldo) { - dev_err(dev, "\n%s get ldo %s failed\n", __func__, ldostr); + if (enable) { + ret = regulator_enable(ldo); + if (ret) + dev_err(dev, "%s: fail to enable phy-supply\n", + __func__); } else { - if (enable) { - if (!regulator_is_enabled(ldo)) { - regulator_set_voltage(ldo, 3300000, 3300000); - ret = regulator_enable(ldo); - if (ret != 0) - dev_err(dev, "%s: fail to enable %s\n", - __func__, ldostr); - else - dev_info(dev, "turn on ldo done.\n"); - } else { - dev_warn(dev, "%s is enabled before enable", - ldostr); - } - } else { - if (regulator_is_enabled(ldo)) { - ret = regulator_disable(ldo); - if (ret != 0) - dev_err(dev, "%s: fail to disable %s\n", - __func__, ldostr); - else - dev_info(dev, "turn off ldo done.\n"); - } else { - dev_warn(dev, "%s is disabled before disable", - ldostr); - } - } - regulator_put(ldo); + ret = regulator_disable(ldo); + if (ret) + dev_err(dev, "%s: fail to disable phy-supply\n", + __func__); } return 0; @@ -347,14 +325,14 @@ static void *rk_gmac_setup(struct platform_device *pdev) bsp_priv->phy_iface = of_get_phy_mode(dev->of_node); - ret = of_property_read_string(dev->of_node, "phy_regulator", &strings); - if (ret) { - dev_warn(dev, "%s: Can not read property: phy_regulator.\n", - __func__); - } else { - dev_info(dev, "%s: PHY power controlled by regulator(%s).\n", - __func__, strings); - strcpy(bsp_priv->regulator, strings); + bsp_priv->regulator = devm_regulator_get_optional(dev, "phy"); + if (IS_ERR(bsp_priv->regulator)) { + if (PTR_ERR(bsp_priv->regulator) == -EPROBE_DEFER) { + dev_err(dev, "phy regulator is not available yet, deferred probing\n"); + return ERR_PTR(-EPROBE_DEFER); + } + dev_err(dev, "no regulator found\n"); + bsp_priv->regulator = NULL; } ret = of_property_read_string(dev->of_node, "clock_in_out", &strings); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8c6b7c1651e5..d7fc2b5a1408 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1097,6 +1097,7 @@ static int init_dma_desc_rings(struct net_device *dev, gfp_t flags) priv->dirty_tx = 0; priv->cur_tx = 0; + netdev_reset_queue(priv->dev); stmmac_clear_descriptors(priv); @@ -1300,6 +1301,7 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) static void stmmac_tx_clean(struct stmmac_priv *priv) { unsigned int txsize = priv->dma_tx_size; + unsigned int bytes_compl = 0, pkts_compl = 0; spin_lock(&priv->tx_lock); @@ -1356,6 +1358,8 @@ static void stmmac_tx_clean(struct stmmac_priv *priv) priv->hw->mode->clean_desc3(priv, p); if (likely(skb != NULL)) { + pkts_compl++; + bytes_compl += skb->len; dev_consume_skb_any(skb); priv->tx_skbuff[entry] = NULL; } @@ -1364,6 +1368,9 @@ static void stmmac_tx_clean(struct stmmac_priv *priv) priv->dirty_tx++; } + + netdev_completed_queue(priv->dev, pkts_compl, bytes_compl); + if (unlikely(netif_queue_stopped(priv->dev) && stmmac_tx_avail(priv) > STMMAC_TX_THRESH(priv))) { netif_tx_lock(priv->dev); @@ -1418,6 +1425,7 @@ static void stmmac_tx_err(struct stmmac_priv *priv) (i == txsize - 1)); priv->dirty_tx = 0; priv->cur_tx = 0; + netdev_reset_queue(priv->dev); priv->hw->dma->start_tx(priv->ioaddr); priv->dev->stats.tx_errors++; @@ -2050,6 +2058,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (!priv->hwts_tx_en) skb_tx_timestamp(skb); + netdev_sent_queue(dev, skb->len); priv->hw->dma->enable_dma_transmission(priv->ioaddr); spin_unlock(&priv->tx_lock); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 612e0731142d..1df38bdae2ee 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1471,11 +1471,17 @@ int macvlan_link_register(struct rtnl_link_ops *ops) }; EXPORT_SYMBOL_GPL(macvlan_link_register); +static struct net *macvlan_get_link_net(const struct net_device *dev) +{ + return dev_net(macvlan_dev_real_dev(dev)); +} + static struct rtnl_link_ops macvlan_link_ops = { .kind = "macvlan", .setup = macvlan_setup, .newlink = macvlan_newlink, .dellink = macvlan_dellink, + .get_link_net = macvlan_get_link_net, }; static int macvlan_device_event(struct notifier_block *unused, diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 3ad0e6e16c39..a08a3c78ba97 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -168,7 +168,7 @@ int fixed_phy_set_link_update(struct phy_device *phydev, struct fixed_mdio_bus *fmb = &platform_fmb; struct fixed_phy *fp; - if (!link_update || !phydev || !phydev->bus) + if (!phydev || !phydev->bus) return -EINVAL; list_for_each_entry(fp, &fmb->phys, node) { diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 3a6770a65d78..449835f4331e 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -160,20 +160,19 @@ EXPORT_SYMBOL_GPL(usbnet_get_endpoints); int usbnet_get_ethernet_addr(struct usbnet *dev, int iMACAddress) { - int tmp, i; + int tmp = -1, ret; unsigned char buf [13]; - tmp = usb_string(dev->udev, iMACAddress, buf, sizeof buf); - if (tmp != 12) { + ret = usb_string(dev->udev, iMACAddress, buf, sizeof buf); + if (ret == 12) + tmp = hex2bin(dev->net->dev_addr, buf, 6); + if (tmp < 0) { dev_dbg(&dev->udev->dev, "bad MAC string %d fetch, %d\n", iMACAddress, tmp); - if (tmp >= 0) - tmp = -EINVAL; - return tmp; + if (ret >= 0) + ret = -EINVAL; + return ret; } - for (i = tmp = 0; i < 6; i++, tmp += 2) - dev->net->dev_addr [i] = - (hex_to_bin(buf[tmp]) << 4) + hex_to_bin(buf[tmp + 1]); return 0; } EXPORT_SYMBOL_GPL(usbnet_get_ethernet_addr); diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 8ad596573d17..4cca36ebc4fb 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -469,6 +469,14 @@ static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = { [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, }; +static struct net *veth_get_link_net(const struct net_device *dev) +{ + struct veth_priv *priv = netdev_priv(dev); + struct net_device *peer = rtnl_dereference(priv->peer); + + return peer ? dev_net(peer) : dev_net(dev); +} + static struct rtnl_link_ops veth_link_ops = { .kind = DRV_NAME, .priv_size = sizeof(struct veth_priv), @@ -478,6 +486,7 @@ static struct rtnl_link_ops veth_link_ops = { .dellink = veth_dellink, .policy = veth_policy, .maxtype = VETH_INFO_MAX, + .get_link_net = veth_get_link_net, }; /* diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 11e2e8131359..9bd71d53c5e0 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -925,6 +925,9 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* Free up any pending old buffers before queueing new ones. */ free_old_xmit_skbs(sq); + /* timestamp packet in software */ + skb_tx_timestamp(skb); + /* Try to transmit */ err = xmit_skb(sq, skb); @@ -1376,6 +1379,7 @@ static const struct ethtool_ops virtnet_ethtool_ops = { .get_ringparam = virtnet_get_ringparam, .set_channels = virtnet_set_channels, .get_channels = virtnet_get_channels, + .get_ts_info = ethtool_op_get_ts_info, }; #define MIN_MTU 68 diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 0346eaa6d236..87736e65cd15 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -270,12 +270,13 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, __be16 port, u32 flags) { struct vxlan_sock *vs; - u32 match_flags = flags & VXLAN_F_UNSHAREABLE; + + flags &= VXLAN_F_RCV_FLAGS; hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { if (inet_sk(vs->sock->sk)->inet_sport == port && inet_sk(vs->sock->sk)->sk.sk_family == family && - (vs->flags & VXLAN_F_UNSHAREABLE) == match_flags) + vs->flags == flags) return vs; } return NULL; @@ -339,6 +340,11 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, ndm->ndm_flags = fdb->flags; ndm->ndm_type = RTN_UNICAST; + if (!net_eq(dev_net(vxlan->dev), vxlan->net) && + nla_put_s32(skb, NDA_NDM_IFINDEX_NETNSID, + peernet2id(vxlan->net, dev_net(vxlan->dev)))) + goto nla_put_failure; + if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) goto nla_put_failure; @@ -1669,7 +1675,7 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; } -static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs, +static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags, struct vxlan_metadata *md) { struct vxlanhdr_gbp *gbp; @@ -1687,21 +1693,20 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs, } #if IS_ENABLED(CONFIG_IPV6) -static int vxlan6_xmit_skb(struct vxlan_sock *vs, - struct dst_entry *dst, struct sk_buff *skb, +static int vxlan6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port, - struct vxlan_metadata *md, bool xnet) + struct vxlan_metadata *md, bool xnet, u32 vxflags) { struct vxlanhdr *vxh; int min_headroom; int err; - bool udp_sum = !udp_get_no_check6_tx(vs->sock->sk); + bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX); int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; u16 hdrlen = sizeof(struct vxlanhdr); - if ((vs->flags & VXLAN_F_REMCSUM_TX) && + if ((vxflags & VXLAN_F_REMCSUM_TX) && skb->ip_summed == CHECKSUM_PARTIAL) { int csum_start = skb_checksum_start_offset(skb); @@ -1759,13 +1764,14 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, } } - if (vs->flags & VXLAN_F_GBP) - vxlan_build_gbp_hdr(vxh, vs, md); + if (vxflags & VXLAN_F_GBP) + vxlan_build_gbp_hdr(vxh, vxflags, md); skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio, - ttl, src_port, dst_port); + udp_tunnel6_xmit_skb(dst, skb, dev, saddr, daddr, prio, + ttl, src_port, dst_port, + !!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX)); return 0; err: dst_release(dst); @@ -1773,20 +1779,19 @@ err: } #endif -int vxlan_xmit_skb(struct vxlan_sock *vs, - struct rtable *rt, struct sk_buff *skb, +int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, - struct vxlan_metadata *md, bool xnet) + struct vxlan_metadata *md, bool xnet, u32 vxflags) { struct vxlanhdr *vxh; int min_headroom; int err; - bool udp_sum = !vs->sock->sk->sk_no_check_tx; + bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM); int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; u16 hdrlen = sizeof(struct vxlanhdr); - if ((vs->flags & VXLAN_F_REMCSUM_TX) && + if ((vxflags & VXLAN_F_REMCSUM_TX) && skb->ip_summed == CHECKSUM_PARTIAL) { int csum_start = skb_checksum_start_offset(skb); @@ -1838,13 +1843,14 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, } } - if (vs->flags & VXLAN_F_GBP) - vxlan_build_gbp_hdr(vxh, vs, md); + if (vxflags & VXLAN_F_GBP) + vxlan_build_gbp_hdr(vxh, vxflags, md); skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos, - ttl, df, src_port, dst_port, xnet); + return udp_tunnel_xmit_skb(rt, skb, src, dst, tos, + ttl, df, src_port, dst_port, xnet, + !(vxflags & VXLAN_F_UDP_CSUM)); } EXPORT_SYMBOL_GPL(vxlan_xmit_skb); @@ -1976,10 +1982,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, md.vni = htonl(vni << 8); md.gbp = skb->mark; - err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb, - fl4.saddr, dst->sin.sin_addr.s_addr, - tos, ttl, df, src_port, dst_port, &md, - !net_eq(vxlan->net, dev_net(vxlan->dev))); + err = vxlan_xmit_skb(rt, skb, fl4.saddr, + dst->sin.sin_addr.s_addr, tos, ttl, df, + src_port, dst_port, &md, + !net_eq(vxlan->net, dev_net(vxlan->dev)), + vxlan->flags); if (err < 0) { /* skb is already freed. */ skb = NULL; @@ -2035,10 +2042,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, md.vni = htonl(vni << 8); md.gbp = skb->mark; - err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb, - dev, &fl6.saddr, &fl6.daddr, 0, ttl, - src_port, dst_port, &md, - !net_eq(vxlan->net, dev_net(vxlan->dev))); + err = vxlan6_xmit_skb(ndst, skb, dev, &fl6.saddr, &fl6.daddr, + 0, ttl, src_port, dst_port, &md, + !net_eq(vxlan->net, dev_net(vxlan->dev)), + vxlan->flags); #endif } @@ -2510,15 +2517,11 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6, if (ipv6) { udp_conf.family = AF_INET6; - udp_conf.use_udp6_tx_checksums = - !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); udp_conf.use_udp6_rx_checksums = !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX); } else { udp_conf.family = AF_INET; udp_conf.local_ip.s_addr = INADDR_ANY; - udp_conf.use_udp_checksums = - !!(flags & VXLAN_F_UDP_CSUM); } udp_conf.local_udp_port = port; @@ -2562,7 +2565,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, atomic_set(&vs->refcnt, 1); vs->rcv = rcv; vs->data = data; - vs->flags = flags; + vs->flags = (flags & VXLAN_F_RCV_FLAGS); /* Initialize the vxlan udp offloads structure */ vs->udp_offloads.port = port; diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 5f1fda44882b..589fa256256b 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -251,7 +251,6 @@ struct xenvif { struct xenvif_rx_cb { unsigned long expires; int meta_slots_used; - bool full_coalesce; }; #define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 908e65e9b821..49322b6c32df 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -233,51 +233,6 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) } } -/* - * Returns true if we should start a new receive buffer instead of - * adding 'size' bytes to a buffer which currently contains 'offset' - * bytes. - */ -static bool start_new_rx_buffer(int offset, unsigned long size, int head, - bool full_coalesce) -{ - /* simple case: we have completely filled the current buffer. */ - if (offset == MAX_BUFFER_OFFSET) - return true; - - /* - * complex case: start a fresh buffer if the current frag - * would overflow the current buffer but only if: - * (i) this frag would fit completely in the next buffer - * and (ii) there is already some data in the current buffer - * and (iii) this is not the head buffer. - * and (iv) there is no need to fully utilize the buffers - * - * Where: - * - (i) stops us splitting a frag into two copies - * unless the frag is too large for a single buffer. - * - (ii) stops us from leaving a buffer pointlessly empty. - * - (iii) stops us leaving the first buffer - * empty. Strictly speaking this is already covered - * by (ii) but is explicitly checked because - * netfront relies on the first buffer being - * non-empty and can crash otherwise. - * - (iv) is needed for skbs which can use up more than MAX_SKB_FRAGS - * slot - * - * This means we will effectively linearise small - * frags but do not needlessly split large buffers - * into multiple copies tend to give large frags their - * own buffers as before. - */ - BUG_ON(size > MAX_BUFFER_OFFSET); - if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head && - !full_coalesce) - return true; - - return false; -} - struct netrx_pending_operations { unsigned copy_prod, copy_cons; unsigned meta_prod, meta_cons; @@ -336,24 +291,13 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb BUG_ON(offset >= PAGE_SIZE); BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); - bytes = PAGE_SIZE - offset; + if (npo->copy_off == MAX_BUFFER_OFFSET) + meta = get_next_rx_buffer(queue, npo); + bytes = PAGE_SIZE - offset; if (bytes > size) bytes = size; - if (start_new_rx_buffer(npo->copy_off, - bytes, - *head, - XENVIF_RX_CB(skb)->full_coalesce)) { - /* - * Netfront requires there to be some data in the head - * buffer. - */ - BUG_ON(*head); - - meta = get_next_rx_buffer(queue, npo); - } - if (npo->copy_off + bytes > MAX_BUFFER_OFFSET) bytes = MAX_BUFFER_OFFSET - npo->copy_off; @@ -652,60 +596,15 @@ static void xenvif_rx_action(struct xenvif_queue *queue) while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX) && (skb = xenvif_rx_dequeue(queue)) != NULL) { - RING_IDX max_slots_needed; RING_IDX old_req_cons; RING_IDX ring_slots_used; - int i; queue->last_rx_time = jiffies; - /* We need a cheap worse case estimate for the number of - * slots we'll use. - */ - - max_slots_needed = DIV_ROUND_UP(offset_in_page(skb->data) + - skb_headlen(skb), - PAGE_SIZE); - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - unsigned int size; - unsigned int offset; - - size = skb_frag_size(&skb_shinfo(skb)->frags[i]); - offset = skb_shinfo(skb)->frags[i].page_offset; - - /* For a worse-case estimate we need to factor in - * the fragment page offset as this will affect the - * number of times xenvif_gop_frag_copy() will - * call start_new_rx_buffer(). - */ - max_slots_needed += DIV_ROUND_UP(offset + size, - PAGE_SIZE); - } - - /* To avoid the estimate becoming too pessimal for some - * frontends that limit posted rx requests, cap the estimate - * at MAX_SKB_FRAGS. In this case netback will fully coalesce - * the skb into the provided slots. - */ - if (max_slots_needed > MAX_SKB_FRAGS) { - max_slots_needed = MAX_SKB_FRAGS; - XENVIF_RX_CB(skb)->full_coalesce = true; - } else { - XENVIF_RX_CB(skb)->full_coalesce = false; - } - - /* We may need one more slot for GSO metadata */ - if (skb_is_gso(skb) && - (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || - skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) - max_slots_needed++; - old_req_cons = queue->rx.req_cons; XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue); ring_slots_used = queue->rx.req_cons - old_req_cons; - BUG_ON(ring_slots_used > max_slots_needed); - __skb_queue_tail(&rxq, skb); } diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c694e7baa621..2805062c013f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -52,6 +52,7 @@ struct ipv6_devconf { __s32 force_tllao; __s32 ndisc_notify; __s32 suppress_frag_ndisc; + __s32 accept_ra_mtu; void *sysctl; }; diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 64d25941b329..c989442ffc6a 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -279,6 +279,8 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state); int mlx4_config_dev_retrieval(struct mlx4_dev *dev, struct mlx4_config_dev_params *params); +void mlx4_cmd_wake_completions(struct mlx4_dev *dev); +void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev); /* * mlx4_get_slave_default_vlan - * return true if VST ( default vlan) @@ -288,5 +290,6 @@ bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave, u16 *vlan, u8 *qos); #define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8) +#define COMM_CHAN_EVENT_INTERNAL_ERR (1 << 17) #endif /* MLX4_CMD_H */ diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index f1e41b33462f..5ef54e145e4d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -208,6 +208,10 @@ enum { MLX4_QUERY_FUNC_FLAGS_A0_RES_QP = 1LL << 1 }; +enum { + MLX4_VF_CAP_FLAG_RESET = 1 << 0 +}; + /* bit enums for an 8-bit flags field indicating special use * QPs which require special handling in qp_reserve_range. * Currently, this only includes QPs used by the ETH interface, @@ -411,6 +415,16 @@ enum { MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4, }; +enum { + MLX4_DEVICE_STATE_UP = 1 << 0, + MLX4_DEVICE_STATE_INTERNAL_ERROR = 1 << 1, +}; + +enum { + MLX4_INTERFACE_STATE_UP = 1 << 0, + MLX4_INTERFACE_STATE_DELETION = 1 << 1, +}; + #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK) @@ -535,6 +549,7 @@ struct mlx4_caps { u8 alloc_res_qp_mask; u32 dmfs_high_rate_qpn_base; u32 dmfs_high_rate_qpn_range; + u32 vf_caps; }; struct mlx4_buf_list { @@ -744,8 +759,23 @@ struct mlx4_vf_dev { u8 n_ports; }; -struct mlx4_dev { +struct mlx4_dev_persistent { struct pci_dev *pdev; + struct mlx4_dev *dev; + int nvfs[MLX4_MAX_PORTS + 1]; + int num_vfs; + enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1]; + enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1]; + struct work_struct catas_work; + struct workqueue_struct *catas_wq; + struct mutex device_state_mutex; /* protect HW state */ + u8 state; + struct mutex interface_state_mutex; /* protect SW state */ + u8 interface_state; +}; + +struct mlx4_dev { + struct mlx4_dev_persistent *persist; unsigned long flags; unsigned long num_slaves; struct mlx4_caps caps; @@ -754,13 +784,11 @@ struct mlx4_dev { struct radix_tree_root qp_table_tree; u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; - int num_vfs; int numa_node; int oper_log_mgm_entry_size; u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; struct mlx4_vf_dev *dev_vfs; - int nvfs[MLX4_MAX_PORTS + 1]; }; struct mlx4_eqe { diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index a2562ed53ea3..e0337844358e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -260,7 +260,9 @@ void rhashtable_destroy(struct rhashtable *ht); next = !rht_is_a_nulls(pos) ? \ rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ - pos = next) + pos = next, \ + next = !rht_is_a_nulls(pos) ? \ + rht_dereference_bucket(pos->next, tbl, hash) : NULL) /** * rht_for_each_rcu_continue - continue iterating over rcu hash chain diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 2a50a70ef587..1a20d33d56bc 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -77,17 +77,17 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *sock_cfg); /* Transmit the skb using UDP encapsulation. */ -int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt, - struct sk_buff *skb, __be32 src, __be32 dst, - __u8 tos, __u8 ttl, __be16 df, __be16 src_port, - __be16 dst_port, bool xnet); +int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 tos, __u8 ttl, + __be16 df, __be16 src_port, __be16 dst_port, + bool xnet, bool nocheck); #if IS_ENABLED(CONFIG_IPV6) -int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, - struct sk_buff *skb, struct net_device *dev, - struct in6_addr *saddr, struct in6_addr *daddr, +int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, + struct net_device *dev, struct in6_addr *saddr, + struct in6_addr *daddr, __u8 prio, __u8 ttl, __be16 src_port, - __be16 dst_port); + __be16 dst_port, bool nocheck); #endif void udp_tunnel_sock_release(struct socket *sock); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 7be8c342fc95..2927d6244481 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -129,8 +129,12 @@ struct vxlan_sock { #define VXLAN_F_REMCSUM_RX 0x400 #define VXLAN_F_GBP 0x800 -/* These flags must match in order for a socket to be shareable */ -#define VXLAN_F_UNSHAREABLE VXLAN_F_GBP +/* Flags that are used in the receive patch. These flags must match in + * order for a socket to be shareable + */ +#define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \ + VXLAN_F_UDP_ZERO_CSUM6_RX | \ + VXLAN_F_REMCSUM_RX) struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data, @@ -138,11 +142,10 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, void vxlan_sock_release(struct vxlan_sock *vs); -int vxlan_xmit_skb(struct vxlan_sock *vs, - struct rtable *rt, struct sk_buff *skb, +int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, struct vxlan_metadata *md, - bool xnet); + bool xnet, u32 vxflags); static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, netdev_features_t features) diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 73cb02dc3065..437a6a4b125a 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -169,6 +169,7 @@ enum { DEVCONF_SUPPRESS_FRAG_NDISC, DEVCONF_ACCEPT_RA_FROM_LOCAL, DEVCONF_USE_OPTIMISTIC, + DEVCONF_ACCEPT_RA_MTU, DEVCONF_MAX }; diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index f3d77f9f1e0b..38f236853cc0 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -25,6 +25,7 @@ enum { NDA_VNI, NDA_IFINDEX, NDA_MASTER, + NDA_NDM_IFINDEX_NETNSID, __NDA_MAX }; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 84a78e396a56..bc2d0d80d1f9 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -585,6 +585,7 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) struct rhash_head *he; spinlock_t *lock; unsigned int hash; + bool ret = false; rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); @@ -602,17 +603,16 @@ restart: } rcu_assign_pointer(*pprev, obj->next); - atomic_dec(&ht->nelems); - - spin_unlock_bh(lock); - - rhashtable_wakeup_worker(ht); - - rcu_read_unlock(); - return true; + ret = true; + break; } + /* The entry may be linked in either 'tbl', 'future_tbl', or both. + * 'future_tbl' only exists for a short period of time during + * resizing. Thus traversing both is fine and the added cost is + * very rare. + */ if (tbl != rht_dereference_rcu(ht->future_tbl, ht)) { spin_unlock_bh(lock); @@ -625,9 +625,15 @@ restart: } spin_unlock_bh(lock); + + if (ret) { + atomic_dec(&ht->nelems); + rhashtable_wakeup_worker(ht); + } + rcu_read_unlock(); - return false; + return ret; } EXPORT_SYMBOL_GPL(rhashtable_remove); diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 8ac8a5cc2143..c92b52f37d38 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -238,6 +238,13 @@ nla_put_failure: return -EMSGSIZE; } +static struct net *vlan_get_link_net(const struct net_device *dev) +{ + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; + + return dev_net(real_dev); +} + struct rtnl_link_ops vlan_link_ops __read_mostly = { .kind = "vlan", .maxtype = IFLA_VLAN_MAX, @@ -250,6 +257,7 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = { .dellink = unregister_vlan_dev, .get_size = vlan_get_size, .fill_info = vlan_fill_info, + .get_link_net = vlan_get_link_net, }; int __init vlan_netlink_init(void) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 528cf2790a5f..3875ea51f6fe 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -311,17 +311,14 @@ errout: int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask) { - int err = 0; struct net_bridge_port *port = br_port_get_rtnl(dev); if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN) && !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) - goto out; + return 0; - err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, - filter_mask, dev); -out: - return err; + return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + filter_mask, dev); } static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 9d1a4cac83b6..b7bde551ef76 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -202,6 +202,7 @@ int peernet2id(struct net *net, struct net *peer) return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; } +EXPORT_SYMBOL(peernet2id); struct net *get_net_ns_by_id(struct net *net, int id) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a12eecc0f976..07447d1665e6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2172,8 +2172,11 @@ replay: goto out; } - if (link_net) + if (link_net) { err = dev_change_net_namespace(dev, dest_net, ifname); + if (err < 0) + unregister_netdevice(dev); + } out: if (link_net) put_net(link_net); diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 1e4f6600b31d..825981b1049a 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -32,7 +32,6 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, unsigned int); void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, const struct nl_info *info, unsigned int nlm_flags); -struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); static inline void fib_result_assign(struct fib_result *res, struct fib_info *fi) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 265cb72b7c1b..1e2090ea663e 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -411,24 +411,6 @@ errout: rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); } -/* Return the first fib alias matching TOS with - * priority less than or equal to PRIO. - */ -struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) -{ - if (fah) { - struct fib_alias *fa; - list_for_each_entry(fa, fah, fa_list) { - if (fa->fa_tos > tos) - continue; - if (fa->fa_info->fib_priority >= prio || - fa->fa_tos < tos) - return fa; - } - } - return NULL; -} - static int fib_detect_death(struct fib_info *fi, int order, struct fib_info **last_resort, int *last_idx, int dflt) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 281e5e00025f..3daf0224ff2e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -83,7 +83,8 @@ #define MAX_STAT_DEPTH 32 -#define KEYLENGTH (8*sizeof(t_key)) +#define KEYLENGTH (8*sizeof(t_key)) +#define KEY_MAX ((t_key)~0) typedef unsigned int t_key; @@ -102,8 +103,8 @@ struct tnode { union { /* The fields in this struct are valid if bits > 0 (TNODE) */ struct { - unsigned int full_children; /* KEYLENGTH bits needed */ - unsigned int empty_children; /* KEYLENGTH bits needed */ + t_key empty_children; /* KEYLENGTH bits needed */ + t_key full_children; /* KEYLENGTH bits needed */ struct tnode __rcu *child[0]; }; /* This list pointer if valid if bits == 0 (LEAF) */ @@ -302,6 +303,16 @@ static struct tnode *tnode_alloc(size_t size) return vzalloc(size); } +static inline void empty_child_inc(struct tnode *n) +{ + ++n->empty_children ? : ++n->full_children; +} + +static inline void empty_child_dec(struct tnode *n) +{ + n->empty_children-- ? : n->full_children--; +} + static struct tnode *leaf_new(t_key key) { struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); @@ -335,7 +346,7 @@ static struct leaf_info *leaf_info_new(int plen) static struct tnode *tnode_new(t_key key, int pos, int bits) { - size_t sz = offsetof(struct tnode, child[1 << bits]); + size_t sz = offsetof(struct tnode, child[1ul << bits]); struct tnode *tn = tnode_alloc(sz); unsigned int shift = pos + bits; @@ -348,8 +359,10 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) tn->pos = pos; tn->bits = bits; tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; - tn->full_children = 0; - tn->empty_children = 1<<bits; + if (bits == KEYLENGTH) + tn->full_children = 1; + else + tn->empty_children = 1ul << bits; } pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), @@ -375,11 +388,11 @@ static void put_child(struct tnode *tn, unsigned long i, struct tnode *n) BUG_ON(i >= tnode_child_length(tn)); - /* update emptyChildren */ + /* update emptyChildren, overflow into fullChildren */ if (n == NULL && chi != NULL) - tn->empty_children++; - else if (n != NULL && chi == NULL) - tn->empty_children--; + empty_child_inc(tn); + if (n != NULL && chi == NULL) + empty_child_dec(tn); /* update fullChildren */ wasfull = tnode_full(tn, chi); @@ -396,8 +409,30 @@ static void put_child(struct tnode *tn, unsigned long i, struct tnode *n) rcu_assign_pointer(tn->child[i], n); } -static void put_child_root(struct tnode *tp, struct trie *t, - t_key key, struct tnode *n) +static void update_children(struct tnode *tn) +{ + unsigned long i; + + /* update all of the child parent pointers */ + for (i = tnode_child_length(tn); i;) { + struct tnode *inode = tnode_get_child(tn, --i); + + if (!inode) + continue; + + /* Either update the children of a tnode that + * already belongs to us or update the child + * to point to ourselves. + */ + if (node_parent(inode) == tn) + update_children(inode); + else + node_set_parent(inode, tn); + } +} + +static inline void put_child_root(struct tnode *tp, struct trie *t, + t_key key, struct tnode *n) { if (tp) put_child(tp, get_index(key, tp), n); @@ -434,10 +469,35 @@ static void tnode_free(struct tnode *tn) } } +static void replace(struct trie *t, struct tnode *oldtnode, struct tnode *tn) +{ + struct tnode *tp = node_parent(oldtnode); + unsigned long i; + + /* setup the parent pointer out of and back into this node */ + NODE_INIT_PARENT(tn, tp); + put_child_root(tp, t, tn->key, tn); + + /* update all of the child parent pointers */ + update_children(tn); + + /* all pointers should be clean so we are done */ + tnode_free(oldtnode); + + /* resize children now that oldtnode is freed */ + for (i = tnode_child_length(tn); i;) { + struct tnode *inode = tnode_get_child(tn, --i); + + /* resize child node */ + if (tnode_full(tn, inode)) + resize(t, inode); + } +} + static int inflate(struct trie *t, struct tnode *oldtnode) { - struct tnode *inode, *node0, *node1, *tn, *tp; - unsigned long i, j, k; + struct tnode *tn; + unsigned long i; t_key m; pr_debug("In inflate\n"); @@ -446,13 +506,18 @@ static int inflate(struct trie *t, struct tnode *oldtnode) if (!tn) return -ENOMEM; + /* prepare oldtnode to be freed */ + tnode_free_init(oldtnode); + /* Assemble all of the pointers in our cluster, in this case that * represents all of the pointers out of our allocated nodes that * point to existing tnodes and the links between our allocated * nodes. */ for (i = tnode_child_length(oldtnode), m = 1u << tn->pos; i;) { - inode = tnode_get_child(oldtnode, --i); + struct tnode *inode = tnode_get_child(oldtnode, --i); + struct tnode *node0, *node1; + unsigned long j, k; /* An empty child */ if (inode == NULL) @@ -464,6 +529,9 @@ static int inflate(struct trie *t, struct tnode *oldtnode) continue; } + /* drop the node in the old tnode free list */ + tnode_free_append(oldtnode, inode); + /* An internal node with two children */ if (inode->bits == 1) { put_child(tn, 2 * i + 1, tnode_get_child(inode, 1)); @@ -488,9 +556,9 @@ static int inflate(struct trie *t, struct tnode *oldtnode) node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1); if (!node1) goto nomem; - tnode_free_append(tn, node1); + node0 = tnode_new(inode->key, inode->pos, inode->bits - 1); - node0 = tnode_new(inode->key & ~m, inode->pos, inode->bits - 1); + tnode_free_append(tn, node1); if (!node0) goto nomem; tnode_free_append(tn, node0); @@ -512,53 +580,9 @@ static int inflate(struct trie *t, struct tnode *oldtnode) put_child(tn, 2 * i, node0); } - /* setup the parent pointer into and out of this node */ - tp = node_parent(oldtnode); - NODE_INIT_PARENT(tn, tp); - put_child_root(tp, t, tn->key, tn); - - /* prepare oldtnode to be freed */ - tnode_free_init(oldtnode); - - /* update all child nodes parent pointers to route to us */ - for (i = tnode_child_length(oldtnode); i;) { - inode = tnode_get_child(oldtnode, --i); - - /* A leaf or an internal node with skipped bits */ - if (!tnode_full(oldtnode, inode)) { - node_set_parent(inode, tn); - continue; - } - - /* drop the node in the old tnode free list */ - tnode_free_append(oldtnode, inode); - - /* fetch new nodes */ - node1 = tnode_get_child(tn, 2 * i + 1); - node0 = tnode_get_child(tn, 2 * i); - - /* bits == 1 then node0 and node1 represent inode's children */ - if (inode->bits == 1) { - node_set_parent(node1, tn); - node_set_parent(node0, tn); - continue; - } - - /* update parent pointers in child node's children */ - for (k = tnode_child_length(inode), j = k / 2; j;) { - node_set_parent(tnode_get_child(inode, --k), node1); - node_set_parent(tnode_get_child(inode, --j), node0); - node_set_parent(tnode_get_child(inode, --k), node1); - node_set_parent(tnode_get_child(inode, --j), node0); - } + /* setup the parent pointers into and out of this node */ + replace(t, oldtnode, tn); - /* resize child nodes */ - resize(t, node1); - resize(t, node0); - } - - /* we completed without error, prepare to free old node */ - tnode_free(oldtnode); return 0; nomem: /* all pointers should be clean so we are done */ @@ -568,7 +592,7 @@ nomem: static int halve(struct trie *t, struct tnode *oldtnode) { - struct tnode *tn, *tp, *inode, *node0, *node1; + struct tnode *tn; unsigned long i; pr_debug("In halve\n"); @@ -577,14 +601,18 @@ static int halve(struct trie *t, struct tnode *oldtnode) if (!tn) return -ENOMEM; + /* prepare oldtnode to be freed */ + tnode_free_init(oldtnode); + /* Assemble all of the pointers in our cluster, in this case that * represents all of the pointers out of our allocated nodes that * point to existing tnodes and the links between our allocated * nodes. */ for (i = tnode_child_length(oldtnode); i;) { - node1 = tnode_get_child(oldtnode, --i); - node0 = tnode_get_child(oldtnode, --i); + struct tnode *node1 = tnode_get_child(oldtnode, --i); + struct tnode *node0 = tnode_get_child(oldtnode, --i); + struct tnode *inode; /* At least one of the children is empty */ if (!node1 || !node0) { @@ -609,36 +637,28 @@ static int halve(struct trie *t, struct tnode *oldtnode) put_child(tn, i / 2, inode); } - /* setup the parent pointer out of and back into this node */ - tp = node_parent(oldtnode); - NODE_INIT_PARENT(tn, tp); - put_child_root(tp, t, tn->key, tn); - - /* prepare oldtnode to be freed */ - tnode_free_init(oldtnode); + /* setup the parent pointers into and out of this node */ + replace(t, oldtnode, tn); - /* update all of the child parent pointers */ - for (i = tnode_child_length(tn); i;) { - inode = tnode_get_child(tn, --i); - - /* only new tnodes will be considered "full" nodes */ - if (!tnode_full(tn, inode)) { - node_set_parent(inode, tn); - continue; - } + return 0; +} - /* Two nonempty children */ - node_set_parent(tnode_get_child(inode, 1), inode); - node_set_parent(tnode_get_child(inode, 0), inode); +static void collapse(struct trie *t, struct tnode *oldtnode) +{ + struct tnode *n, *tp; + unsigned long i; - /* resize child node */ - resize(t, inode); - } + /* scan the tnode looking for that one child that might still exist */ + for (n = NULL, i = tnode_child_length(oldtnode); !n && i;) + n = tnode_get_child(oldtnode, --i); - /* all pointers should be clean so we are done */ - tnode_free(oldtnode); + /* compress one level */ + tp = node_parent(oldtnode); + put_child_root(tp, t, oldtnode->key, n); + node_set_parent(n, tp); - return 0; + /* drop dead node */ + node_free(oldtnode); } static unsigned char update_suffix(struct tnode *tn) @@ -740,10 +760,12 @@ static bool should_inflate(const struct tnode *tp, const struct tnode *tn) /* Keep root node larger */ threshold *= tp ? inflate_threshold : inflate_threshold_root; - used += tn->full_children; used -= tn->empty_children; + used += tn->full_children; + + /* if bits == KEYLENGTH then pos = 0, and will fail below */ - return tn->pos && ((50 * used) >= threshold); + return (used > 1) && tn->pos && ((50 * used) >= threshold); } static bool should_halve(const struct tnode *tp, const struct tnode *tn) @@ -755,15 +777,31 @@ static bool should_halve(const struct tnode *tp, const struct tnode *tn) threshold *= tp ? halve_threshold : halve_threshold_root; used -= tn->empty_children; - return (tn->bits > 1) && ((100 * used) < threshold); + /* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */ + + return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold); +} + +static bool should_collapse(const struct tnode *tn) +{ + unsigned long used = tnode_child_length(tn); + + used -= tn->empty_children; + + /* account for bits == KEYLENGTH case */ + if ((tn->bits == KEYLENGTH) && tn->full_children) + used -= KEY_MAX; + + /* One child or none, time to drop us from the trie */ + return used < 2; } #define MAX_WORK 10 static void resize(struct trie *t, struct tnode *tn) { - struct tnode *tp = node_parent(tn), *n = NULL; + struct tnode *tp = node_parent(tn); struct tnode __rcu **cptr; - int max_work; + int max_work = MAX_WORK; pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", tn, inflate_threshold, halve_threshold); @@ -775,19 +813,10 @@ static void resize(struct trie *t, struct tnode *tn) cptr = tp ? &tp->child[get_index(tn->key, tp)] : &t->trie; BUG_ON(tn != rtnl_dereference(*cptr)); - /* No children */ - if (tn->empty_children > (tnode_child_length(tn) - 1)) - goto no_children; - - /* One child */ - if (tn->empty_children == (tnode_child_length(tn) - 1)) - goto one_child; - /* Double as long as the resulting node has a number of * nonempty nodes that are above the threshold. */ - max_work = MAX_WORK; - while (should_inflate(tp, tn) && max_work--) { + while (should_inflate(tp, tn) && max_work) { if (inflate(t, tn)) { #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(t->stats->resize_node_skipped); @@ -795,6 +824,7 @@ static void resize(struct trie *t, struct tnode *tn) break; } + max_work--; tn = rtnl_dereference(*cptr); } @@ -805,8 +835,7 @@ static void resize(struct trie *t, struct tnode *tn) /* Halve as long as the number of empty children in this * node is above threshold. */ - max_work = MAX_WORK; - while (should_halve(tp, tn) && max_work--) { + while (should_halve(tp, tn) && max_work) { if (halve(t, tn)) { #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(t->stats->resize_node_skipped); @@ -814,23 +843,13 @@ static void resize(struct trie *t, struct tnode *tn) break; } + max_work--; tn = rtnl_dereference(*cptr); } /* Only one child remains */ - if (tn->empty_children == (tnode_child_length(tn) - 1)) { - unsigned long i; -one_child: - for (i = tnode_child_length(tn); !n && i;) - n = tnode_get_child(tn, --i); -no_children: - /* compress one level */ - put_child_root(tp, t, tn->key, n); - node_set_parent(n, tp); - - /* drop dead node */ - tnode_free_init(tn); - tnode_free(tn); + if (should_collapse(tn)) { + collapse(t, tn); return; } @@ -898,27 +917,20 @@ static void leaf_push_suffix(struct tnode *l) static void remove_leaf_info(struct tnode *l, struct leaf_info *old) { - struct hlist_node *prev; - - /* record the location of the pointer to this object */ - prev = rtnl_dereference(hlist_pprev_rcu(&old->hlist)); + /* record the location of the previous list_info entry */ + struct hlist_node **pprev = old->hlist.pprev; + struct leaf_info *li = hlist_entry(pprev, typeof(*li), hlist.next); /* remove the leaf info from the list */ hlist_del_rcu(&old->hlist); - /* if we emptied the list this leaf will be freed and we can sort - * out parent suffix lengths as a part of trie_rebalance - */ - if (hlist_empty(&l->list)) + /* only access li if it is pointing at the last valid hlist_node */ + if (hlist_empty(&l->list) || (*pprev)) return; - /* if we removed the tail then we need to update slen */ - if (!rcu_access_pointer(hlist_next_rcu(prev))) { - struct leaf_info *li = hlist_entry(prev, typeof(*li), hlist); - - l->slen = KEYLENGTH - li->plen; - leaf_pull_suffix(l); - } + /* update the trie with the latest suffix length */ + l->slen = KEYLENGTH - li->plen; + leaf_pull_suffix(l); } static void insert_leaf_info(struct tnode *l, struct leaf_info *new) @@ -942,7 +954,7 @@ static void insert_leaf_info(struct tnode *l, struct leaf_info *new) } /* if we added to the tail node then we need to update slen */ - if (!rcu_access_pointer(hlist_next_rcu(&new->hlist))) { + if (l->slen < (KEYLENGTH - new->plen)) { l->slen = KEYLENGTH - new->plen; leaf_push_suffix(l); } @@ -961,12 +973,12 @@ static struct tnode *fib_find_node(struct trie *t, u32 key) * prefix plus zeros for the bits in the cindex. The index * is the difference between the key and this value. From * this we can actually derive several pieces of data. - * if !(index >> bits) - * we know the value is cindex - * else + * if (index & (~0ul << bits)) * we have a mismatch in skip bits and failed + * else + * we know the value is cindex */ - if (index >> n->bits) + if (index & (~0ul << n->bits)) return NULL; /* we have found a leaf. Prefixes have already been compared */ @@ -979,6 +991,26 @@ static struct tnode *fib_find_node(struct trie *t, u32 key) return n; } +/* Return the first fib alias matching TOS with + * priority less than or equal to PRIO. + */ +static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) +{ + struct fib_alias *fa; + + if (!fah) + return NULL; + + list_for_each_entry(fa, fah, fa_list) { + if (fa->fa_tos > tos) + continue; + if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) + return fa; + } + + return NULL; +} + static void trie_rebalance(struct trie *t, struct tnode *tn) { struct tnode *tp; @@ -1301,12 +1333,12 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, * prefix plus zeros for the "bits" in the prefix. The index * is the difference between the key and this value. From * this we can actually derive several pieces of data. - * if !(index >> bits) - * we know the value is child index - * else + * if (index & (~0ul << bits)) * we have a mismatch in skip bits and failed + * else + * we know the value is cindex */ - if (index >> n->bits) + if (index & (~0ul << n->bits)) break; /* we have found a leaf. Prefixes have already been compared */ @@ -1574,6 +1606,7 @@ static int trie_flush_leaf(struct tnode *l) struct hlist_head *lih = &l->list; struct hlist_node *tmp; struct leaf_info *li = NULL; + unsigned char plen = KEYLENGTH; hlist_for_each_entry_safe(li, tmp, lih, hlist) { found += trie_flush_list(&li->falh); @@ -1581,8 +1614,14 @@ static int trie_flush_leaf(struct tnode *l) if (list_empty(&li->falh)) { hlist_del_rcu(&li->hlist); free_leaf_info(li); + continue; } + + plen = li->plen; } + + l->slen = KEYLENGTH - plen; + return found; } @@ -1661,13 +1700,22 @@ int fib_table_flush(struct fib_table *tb) for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) { found += trie_flush_leaf(l); - if (ll && hlist_empty(&ll->list)) - trie_leaf_remove(t, ll); + if (ll) { + if (hlist_empty(&ll->list)) + trie_leaf_remove(t, ll); + else + leaf_pull_suffix(ll); + } + ll = l; } - if (ll && hlist_empty(&ll->list)) - trie_leaf_remove(t, ll); + if (ll) { + if (hlist_empty(&ll->list)) + trie_leaf_remove(t, ll); + else + leaf_pull_suffix(ll); + } pr_debug("trie_flush found=%d\n", found); return found; @@ -1935,16 +1983,10 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) hlist_for_each_entry_rcu(li, &n->list, hlist) ++s->prefixes; } else { - unsigned long i; - s->tnodes++; if (n->bits < MAX_STAT_DEPTH) s->nodesizes[n->bits]++; - - for (i = tnode_child_length(n); i--;) { - if (!rcu_access_pointer(n->child[i])) - s->nullpointers++; - } + s->nullpointers += n->empty_children; } } rcu_read_unlock(); diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 9568594ca2f1..93e51199e44b 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -136,8 +136,9 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst, - tos, ttl, df, src_port, dst_port, xnet); + return udp_tunnel_xmit_skb(rt, skb, src, dst, + tos, ttl, df, src_port, dst_port, xnet, + gs->sock->sk->sk_no_check_tx); } EXPORT_SYMBOL_GPL(geneve_xmit_skb); diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 9996e63ed304..c83b35485056 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -75,10 +75,10 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); -int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt, - struct sk_buff *skb, __be32 src, __be32 dst, - __u8 tos, __u8 ttl, __be16 df, __be16 src_port, - __be16 dst_port, bool xnet) +int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 tos, __u8 ttl, + __be16 df, __be16 src_port, __be16 dst_port, + bool xnet, bool nocheck) { struct udphdr *uh; @@ -90,9 +90,9 @@ int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt, uh->source = src_port; uh->len = htons(skb->len); - udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len); + udp_set_csum(nocheck, skb, src, dst, skb->len); - return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP, + return iptunnel_xmit(skb->sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); } EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d6b4f5d08014..7dcc065e2160 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -201,6 +201,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .disable_ipv6 = 0, .accept_dad = 1, .suppress_frag_ndisc = 1, + .accept_ra_mtu = 1, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -238,6 +239,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .disable_ipv6 = 0, .accept_dad = 1, .suppress_frag_ndisc = 1, + .accept_ra_mtu = 1, }; /* Check if a valid qdisc is available */ @@ -4380,6 +4382,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; + array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu; } static inline size_t inet6_ifla6_size(void) @@ -5259,6 +5262,13 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { + .procname = "accept_ra_mtu", + .data = &ipv6_devconf.accept_ra_mtu, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { /* sentinel */ } }, diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 9306a5ff9149..6dee2a8ca0a9 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1676,6 +1676,7 @@ static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { .changelink = ip6gre_changelink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, + .get_link_net = ip6_tnl_get_link_net, }; /* diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 8db6c98fe218..32d9b268e7d8 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -62,14 +62,14 @@ error: } EXPORT_SYMBOL_GPL(udp_sock_create6); -int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, - struct sk_buff *skb, struct net_device *dev, - struct in6_addr *saddr, struct in6_addr *daddr, - __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port) +int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, + struct net_device *dev, struct in6_addr *saddr, + struct in6_addr *daddr, + __u8 prio, __u8 ttl, __be16 src_port, + __be16 dst_port, bool nocheck) { struct udphdr *uh; struct ipv6hdr *ip6h; - struct sock *sk = sock->sk; __skb_push(skb, sizeof(*uh)); skb_reset_transport_header(skb); @@ -85,7 +85,7 @@ int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, | IPSKB_REROUTED); skb_dst_set(skb, dst); - udp6_set_csum(udp_get_no_check6_tx(sk), skb, saddr, daddr, skb->len); + udp6_set_csum(nocheck, skb, saddr, daddr, skb->len); __skb_push(skb, sizeof(*ip6h)); skb_reset_network_header(skb); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 66980d8d98d1..8d766d9100cb 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -996,13 +996,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, lock_sock(sk); skb = np->pktoptions; if (skb) - atomic_inc(&skb->users); - release_sock(sk); - - if (skb) { ip6_datagram_recv_ctl(sk, &msg, skb); - kfree_skb(skb); - } else { + release_sock(sk); + if (!skb) { if (np->rxopt.bits.rxinfo) { struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 682866777d53..8a9d7c19e247 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1348,7 +1348,7 @@ skip_routeinfo: } } - if (ndopts.nd_opts_mtu) { + if (ndopts.nd_opts_mtu && in6_dev->cnf.accept_ra_mtu) { __be32 n; u32 mtu; diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 8a2d54cba9ba..3cc983bf444a 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -252,12 +252,10 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8); md.gbp = vxlan_ext_gbp(skb); - err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, - fl.saddr, tun_key->ipv4_dst, + err = vxlan_xmit_skb(rt, skb, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, src_port, dst_port, - &md, - false); + &md, false, vxlan_port->exts); if (err < 0) ip_rt_put(rt); return err; diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 475e35e261ec..7a57f66e654a 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -713,6 +713,7 @@ config NET_ACT_BPF config NET_ACT_CONNMARK tristate "Netfilter Connection Mark Retriever" depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES + depends on NF_CONNTRACK_MARK ---help--- Say Y here to allow retrieving of conn mark |