diff options
-rw-r--r-- | drivers/net/ethernet/amd/Kconfig | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/amd/xgbe/xgbe-common.h | 26 | ||||
-rw-r--r-- | drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c | 9 | ||||
-rw-r--r-- | drivers/net/ethernet/amd/xgbe/xgbe-desc.c | 23 | ||||
-rw-r--r-- | drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 344 | ||||
-rw-r--r-- | drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 115 | ||||
-rw-r--r-- | drivers/net/ethernet/amd/xgbe/xgbe-main.c | 13 | ||||
-rw-r--r-- | drivers/net/ethernet/amd/xgbe/xgbe.h | 55 | ||||
-rw-r--r-- | net/tipc/link.c | 417 | ||||
-rw-r--r-- | net/tipc/link.h | 2 | ||||
-rw-r--r-- | net/tipc/msg.c | 282 | ||||
-rw-r--r-- | net/tipc/msg.h | 32 | ||||
-rw-r--r-- | net/tipc/net.c | 63 | ||||
-rw-r--r-- | net/tipc/net.h | 2 | ||||
-rw-r--r-- | net/tipc/node.c | 25 | ||||
-rw-r--r-- | net/tipc/node.h | 17 | ||||
-rw-r--r-- | net/tipc/node_subscr.c | 6 | ||||
-rw-r--r-- | net/tipc/port.c | 328 | ||||
-rw-r--r-- | net/tipc/port.h | 40 | ||||
-rw-r--r-- | net/tipc/socket.c | 487 | ||||
-rw-r--r-- | net/tipc/socket.h | 14 |
21 files changed, 1225 insertions, 1077 deletions
diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index bbaf36d9f5e1..6e314dbba805 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -182,6 +182,8 @@ config AMD_XGBE depends on OF_NET select PHYLIB select AMD_XGBE_PHY + select BITREVERSE + select CRC32 ---help--- This driver supports the AMD 10GbE Ethernet device found on an AMD SoC. diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index bf462ee86f5c..ccbceba553e5 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -276,13 +276,6 @@ #define MAC_PFR 0x0008 #define MAC_WTR 0x000c #define MAC_HTR0 0x0010 -#define MAC_HTR1 0x0014 -#define MAC_HTR2 0x0018 -#define MAC_HTR3 0x001c -#define MAC_HTR4 0x0020 -#define MAC_HTR5 0x0024 -#define MAC_HTR6 0x0028 -#define MAC_HTR7 0x002c #define MAC_VLANTR 0x0050 #define MAC_VLANHTR 0x0058 #define MAC_VLANIR 0x0060 @@ -315,6 +308,7 @@ #define MAC_QTFCR_INC 4 #define MAC_MACA_INC 4 +#define MAC_HTR_INC 4 /* MAC register entry bit positions and sizes */ #define MAC_HWF0R_ADDMACADRSEL_INDEX 18 @@ -387,12 +381,16 @@ #define MAC_MACA1HR_AE_WIDTH 1 #define MAC_PFR_HMC_INDEX 2 #define MAC_PFR_HMC_WIDTH 1 +#define MAC_PFR_HPF_INDEX 10 +#define MAC_PFR_HPF_WIDTH 1 #define MAC_PFR_HUC_INDEX 1 #define MAC_PFR_HUC_WIDTH 1 #define MAC_PFR_PM_INDEX 4 #define MAC_PFR_PM_WIDTH 1 #define MAC_PFR_PR_INDEX 0 #define MAC_PFR_PR_WIDTH 1 +#define MAC_PFR_VTFE_INDEX 16 +#define MAC_PFR_VTFE_WIDTH 1 #define MAC_PMTCSR_MGKPKTEN_INDEX 1 #define MAC_PMTCSR_MGKPKTEN_WIDTH 1 #define MAC_PMTCSR_PWRDWN_INDEX 0 @@ -427,16 +425,30 @@ #define MAC_TCR_SS_WIDTH 2 #define MAC_TCR_TE_INDEX 0 #define MAC_TCR_TE_WIDTH 1 +#define MAC_VLANHTR_VLHT_INDEX 0 +#define MAC_VLANHTR_VLHT_WIDTH 16 +#define MAC_VLANIR_VLTI_INDEX 20 +#define MAC_VLANIR_VLTI_WIDTH 1 +#define MAC_VLANIR_CSVL_INDEX 19 +#define MAC_VLANIR_CSVL_WIDTH 1 #define MAC_VLANTR_DOVLTC_INDEX 20 #define MAC_VLANTR_DOVLTC_WIDTH 1 #define MAC_VLANTR_ERSVLM_INDEX 19 #define MAC_VLANTR_ERSVLM_WIDTH 1 #define MAC_VLANTR_ESVL_INDEX 18 #define MAC_VLANTR_ESVL_WIDTH 1 +#define MAC_VLANTR_ETV_INDEX 16 +#define MAC_VLANTR_ETV_WIDTH 1 #define MAC_VLANTR_EVLS_INDEX 21 #define MAC_VLANTR_EVLS_WIDTH 2 #define MAC_VLANTR_EVLRXS_INDEX 24 #define MAC_VLANTR_EVLRXS_WIDTH 1 +#define MAC_VLANTR_VL_INDEX 0 +#define MAC_VLANTR_VL_WIDTH 16 +#define MAC_VLANTR_VTHM_INDEX 25 +#define MAC_VLANTR_VTHM_WIDTH 1 +#define MAC_VLANTR_VTIM_INDEX 17 +#define MAC_VLANTR_VTIM_WIDTH 1 #define MAC_VR_DEVID_INDEX 8 #define MAC_VR_DEVID_WIDTH 8 #define MAC_VR_SNPSVER_INDEX 0 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c index 6bb76d5c817b..81198587a6c6 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c @@ -151,7 +151,7 @@ static ssize_t xgbe_common_write(const char __user *buffer, size_t count, { char workarea[32]; ssize_t len; - unsigned int scan_value; + int ret; if (*ppos != 0) return 0; @@ -165,10 +165,9 @@ static ssize_t xgbe_common_write(const char __user *buffer, size_t count, return len; workarea[len] = '\0'; - if (sscanf(workarea, "%x", &scan_value) == 1) - *value = scan_value; - else - return -EIO; + ret = kstrtouint(workarea, 0, value); + if (ret) + return ret; return len; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index 6f1c85956d50..a9ce56d5e988 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -131,7 +131,7 @@ static void xgbe_free_ring(struct xgbe_prv_data *pdata, if (ring->rdata) { for (i = 0; i < ring->rdesc_count; i++) { - rdata = GET_DESC_DATA(ring, i); + rdata = XGBE_GET_DESC_DATA(ring, i); xgbe_unmap_skb(pdata, rdata); } @@ -256,7 +256,7 @@ static void xgbe_wrapper_tx_descriptor_init(struct xgbe_prv_data *pdata) rdesc_dma = ring->rdesc_dma; for (j = 0; j < ring->rdesc_count; j++) { - rdata = GET_DESC_DATA(ring, j); + rdata = XGBE_GET_DESC_DATA(ring, j); rdata->rdesc = rdesc; rdata->rdesc_dma = rdesc_dma; @@ -298,7 +298,7 @@ static void xgbe_wrapper_rx_descriptor_init(struct xgbe_prv_data *pdata) rdesc_dma = ring->rdesc_dma; for (j = 0; j < ring->rdesc_count; j++) { - rdata = GET_DESC_DATA(ring, j); + rdata = XGBE_GET_DESC_DATA(ring, j); rdata->rdesc = rdesc; rdata->rdesc_dma = rdesc_dma; @@ -392,7 +392,7 @@ static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb) if ((tso && (packet->mss != ring->tx.cur_mss)) || (vlan && (packet->vlan_ctag != ring->tx.cur_vlan_ctag))) cur_index++; - rdata = GET_DESC_DATA(ring, cur_index); + rdata = XGBE_GET_DESC_DATA(ring, cur_index); if (tso) { DBGPR(" TSO packet\n"); @@ -413,12 +413,12 @@ static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb) packet->length += packet->header_len; cur_index++; - rdata = GET_DESC_DATA(ring, cur_index); + rdata = XGBE_GET_DESC_DATA(ring, cur_index); } /* Map the (remainder of the) packet */ for (datalen = skb_headlen(skb) - offset; datalen; ) { - len = min_t(unsigned int, datalen, TX_MAX_BUF_SIZE); + len = min_t(unsigned int, datalen, XGBE_TX_MAX_BUF_SIZE); skb_dma = dma_map_single(pdata->dev, skb->data + offset, len, DMA_TO_DEVICE); @@ -437,7 +437,7 @@ static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb) packet->length += len; cur_index++; - rdata = GET_DESC_DATA(ring, cur_index); + rdata = XGBE_GET_DESC_DATA(ring, cur_index); } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { @@ -447,7 +447,8 @@ static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb) offset = 0; for (datalen = skb_frag_size(frag); datalen; ) { - len = min_t(unsigned int, datalen, TX_MAX_BUF_SIZE); + len = min_t(unsigned int, datalen, + XGBE_TX_MAX_BUF_SIZE); skb_dma = skb_frag_dma_map(pdata->dev, frag, offset, len, DMA_TO_DEVICE); @@ -468,7 +469,7 @@ static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb) packet->length += len; cur_index++; - rdata = GET_DESC_DATA(ring, cur_index); + rdata = XGBE_GET_DESC_DATA(ring, cur_index); } } @@ -484,7 +485,7 @@ static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb) err_out: while (start_index < cur_index) { - rdata = GET_DESC_DATA(ring, start_index++); + rdata = XGBE_GET_DESC_DATA(ring, start_index++); xgbe_unmap_skb(pdata, rdata); } @@ -507,7 +508,7 @@ static void xgbe_realloc_skb(struct xgbe_channel *channel) ring->rx.realloc_index); for (i = 0; i < ring->dirty; i++) { - rdata = GET_DESC_DATA(ring, ring->rx.realloc_index); + rdata = XGBE_GET_DESC_DATA(ring, ring->rx.realloc_index); /* Reset rdata values */ xgbe_unmap_skb(pdata, rdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 002293b0819d..a56069c91fc4 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -116,6 +116,8 @@ #include <linux/phy.h> #include <linux/clk.h> +#include <linux/bitrev.h> +#include <linux/crc32.h> #include "xgbe.h" #include "xgbe-common.h" @@ -547,24 +549,16 @@ static int xgbe_set_all_multicast_mode(struct xgbe_prv_data *pdata, return 0; } -static int xgbe_set_addn_mac_addrs(struct xgbe_prv_data *pdata, - unsigned int am_mode) +static void xgbe_set_mac_reg(struct xgbe_prv_data *pdata, + struct netdev_hw_addr *ha, unsigned int *mac_reg) { - struct netdev_hw_addr *ha; - unsigned int mac_reg; unsigned int mac_addr_hi, mac_addr_lo; u8 *mac_addr; - unsigned int i; - XGMAC_IOWRITE_BITS(pdata, MAC_PFR, HUC, 0); - XGMAC_IOWRITE_BITS(pdata, MAC_PFR, HMC, 0); - - i = 0; - mac_reg = MAC_MACA1HR; + mac_addr_lo = 0; + mac_addr_hi = 0; - netdev_for_each_uc_addr(ha, pdata->netdev) { - mac_addr_lo = 0; - mac_addr_hi = 0; + if (ha) { mac_addr = (u8 *)&mac_addr_lo; mac_addr[0] = ha->addr[0]; mac_addr[1] = ha->addr[1]; @@ -574,54 +568,93 @@ static int xgbe_set_addn_mac_addrs(struct xgbe_prv_data *pdata, mac_addr[0] = ha->addr[4]; mac_addr[1] = ha->addr[5]; - DBGPR(" adding unicast address %pM at 0x%04x\n", - ha->addr, mac_reg); + DBGPR(" adding mac address %pM at 0x%04x\n", ha->addr, + *mac_reg); XGMAC_SET_BITS(mac_addr_hi, MAC_MACA1HR, AE, 1); + } - XGMAC_IOWRITE(pdata, mac_reg, mac_addr_hi); - mac_reg += MAC_MACA_INC; - XGMAC_IOWRITE(pdata, mac_reg, mac_addr_lo); - mac_reg += MAC_MACA_INC; + XGMAC_IOWRITE(pdata, *mac_reg, mac_addr_hi); + *mac_reg += MAC_MACA_INC; + XGMAC_IOWRITE(pdata, *mac_reg, mac_addr_lo); + *mac_reg += MAC_MACA_INC; +} - i++; - } +static void xgbe_set_mac_addn_addrs(struct xgbe_prv_data *pdata) +{ + struct net_device *netdev = pdata->netdev; + struct netdev_hw_addr *ha; + unsigned int mac_reg; + unsigned int addn_macs; + + mac_reg = MAC_MACA1HR; + addn_macs = pdata->hw_feat.addn_mac; - if (!am_mode) { - netdev_for_each_mc_addr(ha, pdata->netdev) { - mac_addr_lo = 0; - mac_addr_hi = 0; - mac_addr = (u8 *)&mac_addr_lo; - mac_addr[0] = ha->addr[0]; - mac_addr[1] = ha->addr[1]; - mac_addr[2] = ha->addr[2]; - mac_addr[3] = ha->addr[3]; - mac_addr = (u8 *)&mac_addr_hi; - mac_addr[0] = ha->addr[4]; - mac_addr[1] = ha->addr[5]; - - DBGPR(" adding multicast address %pM at 0x%04x\n", - ha->addr, mac_reg); - - XGMAC_SET_BITS(mac_addr_hi, MAC_MACA1HR, AE, 1); - - XGMAC_IOWRITE(pdata, mac_reg, mac_addr_hi); - mac_reg += MAC_MACA_INC; - XGMAC_IOWRITE(pdata, mac_reg, mac_addr_lo); - mac_reg += MAC_MACA_INC; - - i++; + if (netdev_uc_count(netdev) > addn_macs) { + xgbe_set_promiscuous_mode(pdata, 1); + } else { + netdev_for_each_uc_addr(ha, netdev) { + xgbe_set_mac_reg(pdata, ha, &mac_reg); + addn_macs--; + } + + if (netdev_mc_count(netdev) > addn_macs) { + xgbe_set_all_multicast_mode(pdata, 1); + } else { + netdev_for_each_mc_addr(ha, netdev) { + xgbe_set_mac_reg(pdata, ha, &mac_reg); + addn_macs--; + } } } /* Clear remaining additional MAC address entries */ - for (; i < pdata->hw_feat.addn_mac; i++) { - XGMAC_IOWRITE(pdata, mac_reg, 0); - mac_reg += MAC_MACA_INC; - XGMAC_IOWRITE(pdata, mac_reg, 0); - mac_reg += MAC_MACA_INC; + while (addn_macs--) + xgbe_set_mac_reg(pdata, NULL, &mac_reg); +} + +static void xgbe_set_mac_hash_table(struct xgbe_prv_data *pdata) +{ + struct net_device *netdev = pdata->netdev; + struct netdev_hw_addr *ha; + unsigned int hash_reg; + unsigned int hash_table_shift, hash_table_count; + u32 hash_table[XGBE_MAC_HASH_TABLE_SIZE]; + u32 crc; + unsigned int i; + + hash_table_shift = 26 - (pdata->hw_feat.hash_table_size >> 7); + hash_table_count = pdata->hw_feat.hash_table_size / 32; + memset(hash_table, 0, sizeof(hash_table)); + + /* Build the MAC Hash Table register values */ + netdev_for_each_uc_addr(ha, netdev) { + crc = bitrev32(~crc32_le(~0, ha->addr, ETH_ALEN)); + crc >>= hash_table_shift; + hash_table[crc >> 5] |= (1 << (crc & 0x1f)); + } + + netdev_for_each_mc_addr(ha, netdev) { + crc = bitrev32(~crc32_le(~0, ha->addr, ETH_ALEN)); + crc >>= hash_table_shift; + hash_table[crc >> 5] |= (1 << (crc & 0x1f)); } + /* Set the MAC Hash Table registers */ + hash_reg = MAC_HTR0; + for (i = 0; i < hash_table_count; i++) { + XGMAC_IOWRITE(pdata, hash_reg, hash_table[i]); + hash_reg += MAC_HTR_INC; + } +} + +static int xgbe_add_mac_addresses(struct xgbe_prv_data *pdata) +{ + if (pdata->hw_feat.hash_table_size) + xgbe_set_mac_hash_table(pdata); + else + xgbe_set_mac_addn_addrs(pdata); + return 0; } @@ -738,6 +771,89 @@ static int xgbe_disable_rx_vlan_stripping(struct xgbe_prv_data *pdata) return 0; } +static int xgbe_enable_rx_vlan_filtering(struct xgbe_prv_data *pdata) +{ + /* Enable VLAN filtering */ + XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VTFE, 1); + + /* Enable VLAN Hash Table filtering */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, VTHM, 1); + + /* Disable VLAN tag inverse matching */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, VTIM, 0); + + /* Only filter on the lower 12-bits of the VLAN tag */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, ETV, 1); + + /* In order for the VLAN Hash Table filtering to be effective, + * the VLAN tag identifier in the VLAN Tag Register must not + * be zero. Set the VLAN tag identifier to "1" to enable the + * VLAN Hash Table filtering. This implies that a VLAN tag of + * 1 will always pass filtering. + */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANTR, VL, 1); + + return 0; +} + +static int xgbe_disable_rx_vlan_filtering(struct xgbe_prv_data *pdata) +{ + /* Disable VLAN filtering */ + XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VTFE, 0); + + return 0; +} + +#ifndef CRCPOLY_LE +#define CRCPOLY_LE 0xedb88320 +#endif +static u32 xgbe_vid_crc32_le(__le16 vid_le) +{ + u32 poly = CRCPOLY_LE; + u32 crc = ~0; + u32 temp = 0; + unsigned char *data = (unsigned char *)&vid_le; + unsigned char data_byte = 0; + int i, bits; + + bits = get_bitmask_order(VLAN_VID_MASK); + for (i = 0; i < bits; i++) { + if ((i % 8) == 0) + data_byte = data[i / 8]; + + temp = ((crc & 1) ^ data_byte) & 1; + crc >>= 1; + data_byte >>= 1; + + if (temp) + crc ^= poly; + } + + return crc; +} + +static int xgbe_update_vlan_hash_table(struct xgbe_prv_data *pdata) +{ + u32 crc; + u16 vid; + __le16 vid_le; + u16 vlan_hash_table = 0; + + /* Generate the VLAN Hash Table value */ + for_each_set_bit(vid, pdata->active_vlans, VLAN_N_VID) { + /* Get the CRC32 value of the VLAN ID */ + vid_le = cpu_to_le16(vid); + crc = bitrev32(~xgbe_vid_crc32_le(vid_le)) >> 28; + + vlan_hash_table |= (1 << crc); + } + + /* Set the VLAN Hash Table filtering register */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANHTR, VLHT, vlan_hash_table); + + return 0; +} + static void xgbe_tx_desc_reset(struct xgbe_ring_data *rdata) { struct xgbe_ring_desc *rdesc = rdata->rdesc; @@ -766,7 +882,7 @@ static void xgbe_tx_desc_init(struct xgbe_channel *channel) /* Initialze all descriptors */ for (i = 0; i < ring->rdesc_count; i++) { - rdata = GET_DESC_DATA(ring, i); + rdata = XGBE_GET_DESC_DATA(ring, i); rdesc = rdata->rdesc; /* Initialize Tx descriptor @@ -791,7 +907,7 @@ static void xgbe_tx_desc_init(struct xgbe_channel *channel) XGMAC_DMA_IOWRITE(channel, DMA_CH_TDRLR, ring->rdesc_count - 1); /* Update the starting address of descriptor ring */ - rdata = GET_DESC_DATA(ring, start_index); + rdata = XGBE_GET_DESC_DATA(ring, start_index); XGMAC_DMA_IOWRITE(channel, DMA_CH_TDLR_HI, upper_32_bits(rdata->rdesc_dma)); XGMAC_DMA_IOWRITE(channel, DMA_CH_TDLR_LO, @@ -848,7 +964,7 @@ static void xgbe_rx_desc_init(struct xgbe_channel *channel) /* Initialize all descriptors */ for (i = 0; i < ring->rdesc_count; i++) { - rdata = GET_DESC_DATA(ring, i); + rdata = XGBE_GET_DESC_DATA(ring, i); rdesc = rdata->rdesc; /* Initialize Rx descriptor @@ -882,14 +998,14 @@ static void xgbe_rx_desc_init(struct xgbe_channel *channel) XGMAC_DMA_IOWRITE(channel, DMA_CH_RDRLR, ring->rdesc_count - 1); /* Update the starting address of descriptor ring */ - rdata = GET_DESC_DATA(ring, start_index); + rdata = XGBE_GET_DESC_DATA(ring, start_index); XGMAC_DMA_IOWRITE(channel, DMA_CH_RDLR_HI, upper_32_bits(rdata->rdesc_dma)); XGMAC_DMA_IOWRITE(channel, DMA_CH_RDLR_LO, lower_32_bits(rdata->rdesc_dma)); /* Update the Rx Descriptor Tail Pointer */ - rdata = GET_DESC_DATA(ring, start_index + ring->rdesc_count - 1); + rdata = XGBE_GET_DESC_DATA(ring, start_index + ring->rdesc_count - 1); XGMAC_DMA_IOWRITE(channel, DMA_CH_RDTR_LO, lower_32_bits(rdata->rdesc_dma)); @@ -933,7 +1049,7 @@ static void xgbe_pre_xmit(struct xgbe_channel *channel) if (tx_coalesce && !channel->tx_timer_active) ring->coalesce_count = 0; - rdata = GET_DESC_DATA(ring, ring->cur); + rdata = XGBE_GET_DESC_DATA(ring, ring->cur); rdesc = rdata->rdesc; /* Create a context descriptor if this is a TSO packet */ @@ -977,7 +1093,7 @@ static void xgbe_pre_xmit(struct xgbe_channel *channel) } ring->cur++; - rdata = GET_DESC_DATA(ring, ring->cur); + rdata = XGBE_GET_DESC_DATA(ring, ring->cur); rdesc = rdata->rdesc; } @@ -1034,7 +1150,7 @@ static void xgbe_pre_xmit(struct xgbe_channel *channel) for (i = ring->cur - start_index + 1; i < packet->rdesc_count; i++) { ring->cur++; - rdata = GET_DESC_DATA(ring, ring->cur); + rdata = XGBE_GET_DESC_DATA(ring, ring->cur); rdesc = rdata->rdesc; /* Update buffer address */ @@ -1074,7 +1190,7 @@ static void xgbe_pre_xmit(struct xgbe_channel *channel) wmb(); /* Set OWN bit for the first descriptor */ - rdata = GET_DESC_DATA(ring, start_index); + rdata = XGBE_GET_DESC_DATA(ring, start_index); rdesc = rdata->rdesc; XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, OWN, 1); @@ -1088,7 +1204,7 @@ static void xgbe_pre_xmit(struct xgbe_channel *channel) /* Issue a poll command to Tx DMA by writing address * of next immediate free descriptor */ ring->cur++; - rdata = GET_DESC_DATA(ring, ring->cur); + rdata = XGBE_GET_DESC_DATA(ring, ring->cur); XGMAC_DMA_IOWRITE(channel, DMA_CH_TDTR_LO, lower_32_bits(rdata->rdesc_dma)); @@ -1113,11 +1229,12 @@ static int xgbe_dev_read(struct xgbe_channel *channel) struct xgbe_ring_data *rdata; struct xgbe_ring_desc *rdesc; struct xgbe_packet_data *packet = &ring->packet_data; + struct net_device *netdev = channel->pdata->netdev; unsigned int err, etlt; DBGPR("-->xgbe_dev_read: cur = %d\n", ring->cur); - rdata = GET_DESC_DATA(ring, ring->cur); + rdata = XGBE_GET_DESC_DATA(ring, ring->cur); rdesc = rdata->rdesc; /* Check for data availability */ @@ -1153,7 +1270,8 @@ static int xgbe_dev_read(struct xgbe_channel *channel) DBGPR(" err=%u, etlt=%#x\n", err, etlt); if (!err || (err && !etlt)) { - if (etlt == 0x09) { + if ((etlt == 0x09) && + (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) { XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, VLAN_CTAG, 1); packet->vlan_ctag = XGMAC_GET_BITS_LE(rdesc->desc0, @@ -1195,7 +1313,7 @@ static void xgbe_save_interrupt_status(struct xgbe_channel *channel, if (int_state == XGMAC_INT_STATE_SAVE) { channel->saved_ier = XGMAC_DMA_IOREAD(channel, DMA_CH_IER); - channel->saved_ier &= DMA_INTERRUPT_MASK; + channel->saved_ier &= XGBE_DMA_INTERRUPT_MASK; } else { dma_ch_ier = XGMAC_DMA_IOREAD(channel, DMA_CH_IER); dma_ch_ier |= channel->saved_ier; @@ -1275,7 +1393,7 @@ static int xgbe_disable_int(struct xgbe_channel *channel, xgbe_save_interrupt_status(channel, XGMAC_INT_STATE_SAVE); dma_ch_ier = XGMAC_DMA_IOREAD(channel, DMA_CH_IER); - dma_ch_ier &= ~DMA_INTERRUPT_MASK; + dma_ch_ier &= ~XGBE_DMA_INTERRUPT_MASK; XGMAC_DMA_IOWRITE(channel, DMA_CH_IER, dma_ch_ier); break; default: @@ -1342,23 +1460,23 @@ static void xgbe_config_dma_cache(struct xgbe_prv_data *pdata) unsigned int arcache, awcache; arcache = 0; - XGMAC_SET_BITS(arcache, DMA_AXIARCR, DRC, DMA_ARCACHE_SETTING); - XGMAC_SET_BITS(arcache, DMA_AXIARCR, DRD, DMA_ARDOMAIN_SETTING); - XGMAC_SET_BITS(arcache, DMA_AXIARCR, TEC, DMA_ARCACHE_SETTING); - XGMAC_SET_BITS(arcache, DMA_AXIARCR, TED, DMA_ARDOMAIN_SETTING); - XGMAC_SET_BITS(arcache, DMA_AXIARCR, THC, DMA_ARCACHE_SETTING); - XGMAC_SET_BITS(arcache, DMA_AXIARCR, THD, DMA_ARDOMAIN_SETTING); + XGMAC_SET_BITS(arcache, DMA_AXIARCR, DRC, XGBE_DMA_ARCACHE); + XGMAC_SET_BITS(arcache, DMA_AXIARCR, DRD, XGBE_DMA_ARDOMAIN); + XGMAC_SET_BITS(arcache, DMA_AXIARCR, TEC, XGBE_DMA_ARCACHE); + XGMAC_SET_BITS(arcache, DMA_AXIARCR, TED, XGBE_DMA_ARDOMAIN); + XGMAC_SET_BITS(arcache, DMA_AXIARCR, THC, XGBE_DMA_ARCACHE); + XGMAC_SET_BITS(arcache, DMA_AXIARCR, THD, XGBE_DMA_ARDOMAIN); XGMAC_IOWRITE(pdata, DMA_AXIARCR, arcache); awcache = 0; - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, DWC, DMA_AWCACHE_SETTING); - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, DWD, DMA_AWDOMAIN_SETTING); - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RPC, DMA_AWCACHE_SETTING); - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RPD, DMA_AWDOMAIN_SETTING); - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RHC, DMA_AWCACHE_SETTING); - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RHD, DMA_AWDOMAIN_SETTING); - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, TDC, DMA_AWCACHE_SETTING); - XGMAC_SET_BITS(awcache, DMA_AXIAWCR, TDD, DMA_AWDOMAIN_SETTING); + XGMAC_SET_BITS(awcache, DMA_AXIAWCR, DWC, XGBE_DMA_AWCACHE); + XGMAC_SET_BITS(awcache, DMA_AXIAWCR, DWD, XGBE_DMA_AWDOMAIN); + XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RPC, XGBE_DMA_AWCACHE); + XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RPD, XGBE_DMA_AWDOMAIN); + XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RHC, XGBE_DMA_AWCACHE); + XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RHD, XGBE_DMA_AWDOMAIN); + XGMAC_SET_BITS(awcache, DMA_AXIAWCR, TDC, XGBE_DMA_AWCACHE); + XGMAC_SET_BITS(awcache, DMA_AXIAWCR, TDD, XGBE_DMA_AWDOMAIN); XGMAC_IOWRITE(pdata, DMA_AXIAWCR, awcache); } @@ -1388,66 +1506,66 @@ static unsigned int xgbe_calculate_per_queue_fifo(unsigned long fifo_size, /* Calculate Tx/Rx fifo share per queue */ switch (fifo_size) { case 0: - q_fifo_size = FIFO_SIZE_B(128); + q_fifo_size = XGBE_FIFO_SIZE_B(128); break; case 1: - q_fifo_size = FIFO_SIZE_B(256); + q_fifo_size = XGBE_FIFO_SIZE_B(256); break; case 2: - q_fifo_size = FIFO_SIZE_B(512); + q_fifo_size = XGBE_FIFO_SIZE_B(512); break; case 3: - q_fifo_size = FIFO_SIZE_KB(1); + q_fifo_size = XGBE_FIFO_SIZE_KB(1); break; case 4: - q_fifo_size = FIFO_SIZE_KB(2); + q_fifo_size = XGBE_FIFO_SIZE_KB(2); break; case 5: - q_fifo_size = FIFO_SIZE_KB(4); + q_fifo_size = XGBE_FIFO_SIZE_KB(4); break; case 6: - q_fifo_size = FIFO_SIZE_KB(8); + q_fifo_size = XGBE_FIFO_SIZE_KB(8); break; case 7: - q_fifo_size = FIFO_SIZE_KB(16); + q_fifo_size = XGBE_FIFO_SIZE_KB(16); break; case 8: - q_fifo_size = FIFO_SIZE_KB(32); + q_fifo_size = XGBE_FIFO_SIZE_KB(32); break; case 9: - q_fifo_size = FIFO_SIZE_KB(64); + q_fifo_size = XGBE_FIFO_SIZE_KB(64); break; case 10: - q_fifo_size = FIFO_SIZE_KB(128); + q_fifo_size = XGBE_FIFO_SIZE_KB(128); break; case 11: - q_fifo_size = FIFO_SIZE_KB(256); + q_fifo_size = XGBE_FIFO_SIZE_KB(256); break; } q_fifo_size = q_fifo_size / queue_count; /* Set the queue fifo size programmable value */ - if (q_fifo_size >= FIFO_SIZE_KB(256)) + if (q_fifo_size >= XGBE_FIFO_SIZE_KB(256)) p_fifo = XGMAC_MTL_FIFO_SIZE_256K; - else if (q_fifo_size >= FIFO_SIZE_KB(128)) + else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(128)) p_fifo = XGMAC_MTL_FIFO_SIZE_128K; - else if (q_fifo_size >= FIFO_SIZE_KB(64)) + else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(64)) p_fifo = XGMAC_MTL_FIFO_SIZE_64K; - else if (q_fifo_size >= FIFO_SIZE_KB(32)) + else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(32)) p_fifo = XGMAC_MTL_FIFO_SIZE_32K; - else if (q_fifo_size >= FIFO_SIZE_KB(16)) + else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(16)) p_fifo = XGMAC_MTL_FIFO_SIZE_16K; - else if (q_fifo_size >= FIFO_SIZE_KB(8)) + else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(8)) p_fifo = XGMAC_MTL_FIFO_SIZE_8K; - else if (q_fifo_size >= FIFO_SIZE_KB(4)) + else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(4)) p_fifo = XGMAC_MTL_FIFO_SIZE_4K; - else if (q_fifo_size >= FIFO_SIZE_KB(2)) + else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(2)) p_fifo = XGMAC_MTL_FIFO_SIZE_2K; - else if (q_fifo_size >= FIFO_SIZE_KB(1)) + else if (q_fifo_size >= XGBE_FIFO_SIZE_KB(1)) p_fifo = XGMAC_MTL_FIFO_SIZE_1K; - else if (q_fifo_size >= FIFO_SIZE_B(512)) + else if (q_fifo_size >= XGBE_FIFO_SIZE_B(512)) p_fifo = XGMAC_MTL_FIFO_SIZE_512; - else if (q_fifo_size >= FIFO_SIZE_B(256)) + else if (q_fifo_size >= XGBE_FIFO_SIZE_B(256)) p_fifo = XGMAC_MTL_FIFO_SIZE_256; return p_fifo; @@ -1520,6 +1638,13 @@ static void xgbe_config_flow_control_threshold(struct xgbe_prv_data *pdata) static void xgbe_config_mac_address(struct xgbe_prv_data *pdata) { xgbe_set_mac_address(pdata, pdata->netdev->dev_addr); + + /* Filtering is done using perfect filtering and hash filtering */ + if (pdata->hw_feat.hash_table_size) { + XGMAC_IOWRITE_BITS(pdata, MAC_PFR, HPF, 1); + XGMAC_IOWRITE_BITS(pdata, MAC_PFR, HUC, 1); + XGMAC_IOWRITE_BITS(pdata, MAC_PFR, HMC, 1); + } } static void xgbe_config_jumbo_enable(struct xgbe_prv_data *pdata) @@ -1541,6 +1666,18 @@ static void xgbe_config_checksum_offload(struct xgbe_prv_data *pdata) static void xgbe_config_vlan_support(struct xgbe_prv_data *pdata) { + /* Indicate that VLAN Tx CTAGs come from context descriptors */ + XGMAC_IOWRITE_BITS(pdata, MAC_VLANIR, CSVL, 0); + XGMAC_IOWRITE_BITS(pdata, MAC_VLANIR, VLTI, 1); + + /* Set the current VLAN Hash Table register value */ + xgbe_update_vlan_hash_table(pdata); + + if (pdata->netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER) + xgbe_enable_rx_vlan_filtering(pdata); + else + xgbe_disable_rx_vlan_filtering(pdata); + if (pdata->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) xgbe_enable_rx_vlan_stripping(pdata); else @@ -2104,7 +2241,7 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if) hw_if->set_promiscuous_mode = xgbe_set_promiscuous_mode; hw_if->set_all_multicast_mode = xgbe_set_all_multicast_mode; - hw_if->set_addn_mac_addrs = xgbe_set_addn_mac_addrs; + hw_if->add_mac_addresses = xgbe_add_mac_addresses; hw_if->set_mac_address = xgbe_set_mac_address; hw_if->enable_rx_csum = xgbe_enable_rx_csum; @@ -2112,6 +2249,9 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if) hw_if->enable_rx_vlan_stripping = xgbe_enable_rx_vlan_stripping; hw_if->disable_rx_vlan_stripping = xgbe_disable_rx_vlan_stripping; + hw_if->enable_rx_vlan_filtering = xgbe_enable_rx_vlan_filtering; + hw_if->disable_rx_vlan_filtering = xgbe_disable_rx_vlan_filtering; + hw_if->update_vlan_hash_table = xgbe_update_vlan_hash_table; hw_if->read_mmd_regs = xgbe_read_mmd_regs; hw_if->write_mmd_regs = xgbe_write_mmd_regs; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index cfe3d93b5f52..72dd61166a1c 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -144,9 +144,10 @@ static int xgbe_calc_rx_buf_size(struct net_device *netdev, unsigned int mtu) } rx_buf_size = mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; - if (rx_buf_size < RX_MIN_BUF_SIZE) - rx_buf_size = RX_MIN_BUF_SIZE; - rx_buf_size = (rx_buf_size + RX_BUF_ALIGN - 1) & ~(RX_BUF_ALIGN - 1); + if (rx_buf_size < XGBE_RX_MIN_BUF_SIZE) + rx_buf_size = XGBE_RX_MIN_BUF_SIZE; + rx_buf_size = (rx_buf_size + XGBE_RX_BUF_ALIGN - 1) & + ~(XGBE_RX_BUF_ALIGN - 1); return rx_buf_size; } @@ -377,6 +378,21 @@ void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata) hw_feat->pps_out_num = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, PPSOUTNUM); hw_feat->aux_snap_num = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, AUXSNAPNUM); + /* Translate the Hash Table size into actual number */ + switch (hw_feat->hash_table_size) { + case 0: + break; + case 1: + hw_feat->hash_table_size = 64; + break; + case 2: + hw_feat->hash_table_size = 128; + break; + case 3: + hw_feat->hash_table_size = 256; + break; + } + /* The Queue and Channel counts are zero based so increment them * to get the actual number */ @@ -446,7 +462,7 @@ static void xgbe_free_tx_skbuff(struct xgbe_prv_data *pdata) break; for (j = 0; j < ring->rdesc_count; j++) { - rdata = GET_DESC_DATA(ring, j); + rdata = XGBE_GET_DESC_DATA(ring, j); desc_if->unmap_skb(pdata, rdata); } } @@ -471,7 +487,7 @@ static void xgbe_free_rx_skbuff(struct xgbe_prv_data *pdata) break; for (j = 0; j < ring->rdesc_count; j++) { - rdata = GET_DESC_DATA(ring, j); + rdata = XGBE_GET_DESC_DATA(ring, j); desc_if->unmap_skb(pdata, rdata); } } @@ -726,14 +742,14 @@ static void xgbe_packet_info(struct xgbe_ring *ring, struct sk_buff *skb, for (len = skb_headlen(skb); len;) { packet->rdesc_count++; - len -= min_t(unsigned int, len, TX_MAX_BUF_SIZE); + len -= min_t(unsigned int, len, XGBE_TX_MAX_BUF_SIZE); } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { frag = &skb_shinfo(skb)->frags[i]; for (len = skb_frag_size(frag); len; ) { packet->rdesc_count++; - len -= min_t(unsigned int, len, TX_MAX_BUF_SIZE); + len -= min_t(unsigned int, len, XGBE_TX_MAX_BUF_SIZE); } } } @@ -911,18 +927,10 @@ static void xgbe_set_rx_mode(struct net_device *netdev) pr_mode = ((netdev->flags & IFF_PROMISC) != 0); am_mode = ((netdev->flags & IFF_ALLMULTI) != 0); - if (netdev_uc_count(netdev) > pdata->hw_feat.addn_mac) - pr_mode = 1; - if (netdev_mc_count(netdev) > pdata->hw_feat.addn_mac) - am_mode = 1; - if ((netdev_uc_count(netdev) + netdev_mc_count(netdev)) > - pdata->hw_feat.addn_mac) - pr_mode = 1; - hw_if->set_promiscuous_mode(pdata, pr_mode); hw_if->set_all_multicast_mode(pdata, am_mode); - if (!pr_mode) - hw_if->set_addn_mac_addrs(pdata, am_mode); + + hw_if->add_mac_addresses(pdata); DBGPR("<--xgbe_set_rx_mode\n"); } @@ -999,6 +1007,38 @@ static struct rtnl_link_stats64 *xgbe_get_stats64(struct net_device *netdev, return s; } +static int xgbe_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, + u16 vid) +{ + struct xgbe_prv_data *pdata = netdev_priv(netdev); + struct xgbe_hw_if *hw_if = &pdata->hw_if; + + DBGPR("-->%s\n", __func__); + + set_bit(vid, pdata->active_vlans); + hw_if->update_vlan_hash_table(pdata); + + DBGPR("<--%s\n", __func__); + + return 0; +} + +static int xgbe_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, + u16 vid) +{ + struct xgbe_prv_data *pdata = netdev_priv(netdev); + struct xgbe_hw_if *hw_if = &pdata->hw_if; + + DBGPR("-->%s\n", __func__); + + clear_bit(vid, pdata->active_vlans); + hw_if->update_vlan_hash_table(pdata); + + DBGPR("<--%s\n", __func__); + + return 0; +} + #ifdef CONFIG_NET_POLL_CONTROLLER static void xgbe_poll_controller(struct net_device *netdev) { @@ -1021,26 +1061,26 @@ static int xgbe_set_features(struct net_device *netdev, { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; - unsigned int rxcsum_enabled, rxvlan_enabled; + unsigned int rxcsum, rxvlan, rxvlan_filter; - rxcsum_enabled = !!(pdata->netdev_features & NETIF_F_RXCSUM); - rxvlan_enabled = !!(pdata->netdev_features & NETIF_F_HW_VLAN_CTAG_RX); + rxcsum = pdata->netdev_features & NETIF_F_RXCSUM; + rxvlan = pdata->netdev_features & NETIF_F_HW_VLAN_CTAG_RX; + rxvlan_filter = pdata->netdev_features & NETIF_F_HW_VLAN_CTAG_FILTER; - if ((features & NETIF_F_RXCSUM) && !rxcsum_enabled) { + if ((features & NETIF_F_RXCSUM) && !rxcsum) hw_if->enable_rx_csum(pdata); - netdev_alert(netdev, "state change - rxcsum enabled\n"); - } else if (!(features & NETIF_F_RXCSUM) && rxcsum_enabled) { + else if (!(features & NETIF_F_RXCSUM) && rxcsum) hw_if->disable_rx_csum(pdata); - netdev_alert(netdev, "state change - rxcsum disabled\n"); - } - if ((features & NETIF_F_HW_VLAN_CTAG_RX) && !rxvlan_enabled) { + if ((features & NETIF_F_HW_VLAN_CTAG_RX) && !rxvlan) hw_if->enable_rx_vlan_stripping(pdata); - netdev_alert(netdev, "state change - rxvlan enabled\n"); - } else if (!(features & NETIF_F_HW_VLAN_CTAG_RX) && rxvlan_enabled) { + else if (!(features & NETIF_F_HW_VLAN_CTAG_RX) && rxvlan) hw_if->disable_rx_vlan_stripping(pdata); - netdev_alert(netdev, "state change - rxvlan disabled\n"); - } + + if ((features & NETIF_F_HW_VLAN_CTAG_FILTER) && !rxvlan_filter) + hw_if->enable_rx_vlan_filtering(pdata); + else if (!(features & NETIF_F_HW_VLAN_CTAG_FILTER) && rxvlan_filter) + hw_if->disable_rx_vlan_filtering(pdata); pdata->netdev_features = features; @@ -1058,6 +1098,8 @@ static const struct net_device_ops xgbe_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = xgbe_change_mtu, .ndo_get_stats64 = xgbe_get_stats64, + .ndo_vlan_rx_add_vid = xgbe_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = xgbe_vlan_rx_kill_vid, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = xgbe_poll_controller, #endif @@ -1089,8 +1131,9 @@ static int xgbe_tx_poll(struct xgbe_channel *channel) spin_lock_irqsave(&ring->lock, flags); - while ((processed < TX_DESC_MAX_PROC) && (ring->dirty < ring->cur)) { - rdata = GET_DESC_DATA(ring, ring->dirty); + while ((processed < XGBE_TX_DESC_MAX_PROC) && + (ring->dirty < ring->cur)) { + rdata = XGBE_GET_DESC_DATA(ring, ring->dirty); rdesc = rdata->rdesc; if (!hw_if->tx_complete(rdesc)) @@ -1109,7 +1152,7 @@ static int xgbe_tx_poll(struct xgbe_channel *channel) } if ((ring->tx.queue_stopped == 1) && - (xgbe_tx_avail_desc(ring) > TX_DESC_MIN_FREE)) { + (xgbe_tx_avail_desc(ring) > XGBE_TX_DESC_MIN_FREE)) { ring->tx.queue_stopped = 0; netif_wake_subqueue(netdev, channel->queue_index); } @@ -1152,7 +1195,7 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget) cur_len = 0; read_again: - rdata = GET_DESC_DATA(ring, ring->cur); + rdata = XGBE_GET_DESC_DATA(ring, ring->cur); if (hw_if->dev_read(channel)) break; @@ -1244,7 +1287,7 @@ read_again: /* Update the Rx Tail Pointer Register with address of * the last cleaned entry */ - rdata = GET_DESC_DATA(ring, ring->rx.realloc_index - 1); + rdata = XGBE_GET_DESC_DATA(ring, ring->rx.realloc_index - 1); XGMAC_DMA_IOWRITE(channel, DMA_CH_RDTR_LO, lower_32_bits(rdata->rdesc_dma)); } @@ -1296,7 +1339,7 @@ void xgbe_dump_tx_desc(struct xgbe_ring *ring, unsigned int idx, struct xgbe_ring_desc *rdesc; while (count--) { - rdata = GET_DESC_DATA(ring, idx); + rdata = XGBE_GET_DESC_DATA(ring, idx); rdesc = rdata->rdesc; DBGPR("TX_NORMAL_DESC[%d %s] = %08x:%08x:%08x:%08x\n", idx, (flag == 1) ? "QUEUED FOR TX" : "TX BY DEVICE", diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index c83584a26713..b411ac557c47 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -247,16 +247,16 @@ static int xgbe_probe(struct platform_device *pdev) mutex_init(&pdata->xpcs_mutex); /* Set and validate the number of descriptors for a ring */ - BUILD_BUG_ON_NOT_POWER_OF_2(TX_DESC_CNT); - pdata->tx_desc_count = TX_DESC_CNT; + BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_TX_DESC_CNT); + pdata->tx_desc_count = XGBE_TX_DESC_CNT; if (pdata->tx_desc_count & (pdata->tx_desc_count - 1)) { dev_err(dev, "tx descriptor count (%d) is not valid\n", pdata->tx_desc_count); ret = -EINVAL; goto err_io; } - BUILD_BUG_ON_NOT_POWER_OF_2(RX_DESC_CNT); - pdata->rx_desc_count = RX_DESC_CNT; + BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_RX_DESC_CNT); + pdata->rx_desc_count = XGBE_RX_DESC_CNT; if (pdata->rx_desc_count & (pdata->rx_desc_count - 1)) { dev_err(dev, "rx descriptor count (%d) is not valid\n", pdata->rx_desc_count); @@ -385,7 +385,8 @@ static int xgbe_probe(struct platform_device *pdev) NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_TX; + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_FILTER; netdev->vlan_features |= NETIF_F_SG | NETIF_F_IP_CSUM | @@ -396,6 +397,8 @@ static int xgbe_probe(struct platform_device *pdev) netdev->features |= netdev->hw_features; pdata->netdev_features = netdev->features; + netdev->priv_flags |= IFF_UNICAST_FLT; + xgbe_init_rx_coalesce(pdata); xgbe_init_tx_coalesce(pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index ab0627162c01..a2d5f5f5d8b7 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -121,6 +121,8 @@ #include <linux/netdevice.h> #include <linux/workqueue.h> #include <linux/phy.h> +#include <linux/if_vlan.h> +#include <linux/bitops.h> #define XGBE_DRV_NAME "amd-xgbe" @@ -128,22 +130,25 @@ #define XGBE_DRV_DESC "AMD 10 Gigabit Ethernet Driver" /* Descriptor related defines */ -#define TX_DESC_CNT 512 -#define TX_DESC_MIN_FREE (TX_DESC_CNT >> 3) -#define TX_DESC_MAX_PROC (TX_DESC_CNT >> 1) -#define RX_DESC_CNT 512 +#define XGBE_TX_DESC_CNT 512 +#define XGBE_TX_DESC_MIN_FREE (XGBE_TX_DESC_CNT >> 3) +#define XGBE_TX_DESC_MAX_PROC (XGBE_TX_DESC_CNT >> 1) +#define XGBE_RX_DESC_CNT 512 -#define TX_MAX_BUF_SIZE (0x3fff & ~(64 - 1)) +#define XGBE_TX_MAX_BUF_SIZE (0x3fff & ~(64 - 1)) -#define RX_MIN_BUF_SIZE (ETH_FRAME_LEN + ETH_FCS_LEN + VLAN_HLEN) -#define RX_BUF_ALIGN 64 +#define XGBE_RX_MIN_BUF_SIZE (ETH_FRAME_LEN + ETH_FCS_LEN + VLAN_HLEN) +#define XGBE_RX_BUF_ALIGN 64 #define XGBE_MAX_DMA_CHANNELS 16 -#define DMA_ARDOMAIN_SETTING 0x2 -#define DMA_ARCACHE_SETTING 0xb -#define DMA_AWDOMAIN_SETTING 0x2 -#define DMA_AWCACHE_SETTING 0x7 -#define DMA_INTERRUPT_MASK 0x31c7 + +/* DMA cache settings - Outer sharable, write-back, write-allocate */ +#define XGBE_DMA_ARDOMAIN 0x2 +#define XGBE_DMA_ARCACHE 0xb +#define XGBE_DMA_AWDOMAIN 0x2 +#define XGBE_DMA_AWCACHE 0x7 + +#define XGBE_DMA_INTERRUPT_MASK 0x31c7 #define XGMAC_MIN_PACKET 60 #define XGMAC_STD_PACKET_MTU 1500 @@ -151,10 +156,6 @@ #define XGMAC_JUMBO_PACKET_MTU 9000 #define XGMAC_MAX_JUMBO_PACKET 9018 -#define MAX_MULTICAST_LIST 14 -#define TX_FLAGS_IP_PKT 0x00000001 -#define TX_FLAGS_TCP_PKT 0x00000002 - /* MDIO bus phy name */ #define XGBE_PHY_NAME "amd_xgbe_phy" #define XGBE_PRTAD 0 @@ -163,18 +164,18 @@ #define XGMAC_DRIVER_CONTEXT 1 #define XGMAC_IOCTL_CONTEXT 2 -#define FIFO_SIZE_B(x) (x) -#define FIFO_SIZE_KB(x) (x * 1024) +#define XGBE_FIFO_SIZE_B(x) (x) +#define XGBE_FIFO_SIZE_KB(x) (x * 1024) #define XGBE_TC_CNT 2 /* Helper macro for descriptor handling - * Always use GET_DESC_DATA to access the descriptor data + * Always use XGBE_GET_DESC_DATA to access the descriptor data * since the index is free-running and needs to be and-ed * with the descriptor count value of the ring to index to * the proper descriptor data. */ -#define GET_DESC_DATA(_ring, _idx) \ +#define XGBE_GET_DESC_DATA(_ring, _idx) \ ((_ring)->rdata + \ ((_idx) & ((_ring)->rdesc_count - 1))) @@ -190,6 +191,8 @@ /* Flow control queue count */ #define XGMAC_MAX_FLOW_CONTROL_QUEUES 8 +/* Maximum MAC address hash table size (256 bits = 8 bytes) */ +#define XGBE_MAC_HASH_TABLE_SIZE 8 struct xgbe_prv_data; @@ -219,7 +222,7 @@ struct xgbe_ring_desc { /* Structure used to hold information related to the descriptor * and the packet associated with the descriptor (always use - * use the GET_DESC_DATA macro to access this data from the ring) + * use the XGBE_GET_DESC_DATA macro to access this data from the ring) */ struct xgbe_ring_data { struct xgbe_ring_desc *rdesc; /* Virtual address of descriptor */ @@ -250,7 +253,7 @@ struct xgbe_ring { unsigned int rdesc_count; /* Array of descriptor data corresponding the descriptor memory - * (always use the GET_DESC_DATA macro to access this data) + * (always use the XGBE_GET_DESC_DATA macro to access this data) */ struct xgbe_ring_data *rdata; @@ -386,7 +389,7 @@ struct xgbe_hw_if { int (*set_promiscuous_mode)(struct xgbe_prv_data *, unsigned int); int (*set_all_multicast_mode)(struct xgbe_prv_data *, unsigned int); - int (*set_addn_mac_addrs)(struct xgbe_prv_data *, unsigned int); + int (*add_mac_addresses)(struct xgbe_prv_data *); int (*set_mac_address)(struct xgbe_prv_data *, u8 *addr); int (*enable_rx_csum)(struct xgbe_prv_data *); @@ -394,6 +397,9 @@ struct xgbe_hw_if { int (*enable_rx_vlan_stripping)(struct xgbe_prv_data *); int (*disable_rx_vlan_stripping)(struct xgbe_prv_data *); + int (*enable_rx_vlan_filtering)(struct xgbe_prv_data *); + int (*disable_rx_vlan_filtering)(struct xgbe_prv_data *); + int (*update_vlan_hash_table)(struct xgbe_prv_data *); int (*read_mmd_regs)(struct xgbe_prv_data *, int, int); void (*write_mmd_regs)(struct xgbe_prv_data *, int, int, int); @@ -589,6 +595,9 @@ struct xgbe_prv_data { struct napi_struct napi; struct xgbe_mmc_stats mmc_stats; + /* Filtering support */ + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + /* System clock value used for Rx watchdog */ struct clk *sysclock; diff --git a/net/tipc/link.c b/net/tipc/link.c index ad2c57f5868d..a081e7d08d22 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -82,9 +82,6 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf); static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, struct sk_buff **buf); static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); -static int tipc_link_iovec_long_xmit(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destnode); static void link_state_event(struct tipc_link *l_ptr, u32 event); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); @@ -335,13 +332,15 @@ void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) static int link_schedule_port(struct tipc_link *l_ptr, u32 origport, u32 sz) { struct tipc_port *p_ptr; + struct tipc_sock *tsk; spin_lock_bh(&tipc_port_list_lock); p_ptr = tipc_port_lock(origport); if (p_ptr) { if (!list_empty(&p_ptr->wait_list)) goto exit; - p_ptr->congested = 1; + tsk = tipc_port_to_sock(p_ptr); + tsk->link_cong = 1; p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt); list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports); l_ptr->stats.link_congs++; @@ -355,6 +354,7 @@ exit: void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all) { struct tipc_port *p_ptr; + struct tipc_sock *tsk; struct tipc_port *temp_p_ptr; int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size; @@ -370,10 +370,11 @@ void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all) wait_list) { if (win <= 0) break; + tsk = tipc_port_to_sock(p_ptr); list_del_init(&p_ptr->wait_list); spin_lock_bh(p_ptr->lock); - p_ptr->congested = 0; - tipc_port_wakeup(p_ptr); + tsk->link_cong = 0; + tipc_sock_wakeup(tsk); win -= p_ptr->waiting_pkts; spin_unlock_bh(p_ptr->lock); } @@ -850,6 +851,144 @@ int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector) return res; } +/* tipc_link_cong: determine return value and how to treat the + * sent buffer during link congestion. + * - For plain, errorless user data messages we keep the buffer and + * return -ELINKONG. + * - For all other messages we discard the buffer and return -EHOSTUNREACH + * - For TIPC internal messages we also reset the link + */ +static int tipc_link_cong(struct tipc_link *link, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + uint psz = msg_size(msg); + uint imp = tipc_msg_tot_importance(msg); + u32 oport = msg_tot_origport(msg); + + if (likely(imp <= TIPC_CRITICAL_IMPORTANCE)) { + if (!msg_errcode(msg) && !msg_reroute_cnt(msg)) { + link_schedule_port(link, oport, psz); + return -ELINKCONG; + } + } else { + pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); + tipc_link_reset(link); + } + kfree_skb_list(buf); + return -EHOSTUNREACH; +} + +/** + * __tipc_link_xmit2(): same as tipc_link_xmit2, but destlink is known & locked + * @link: link to use + * @buf: chain of buffers containing message + * Consumes the buffer chain, except when returning -ELINKCONG + * Returns 0 if success, otherwise errno: -ELINKCONG, -EMSGSIZE (plain socket + * user data messages) or -EHOSTUNREACH (all other messages/senders) + * Only the socket functions tipc_send_stream() and tipc_send_packet() need + * to act on the return value, since they may need to do more send attempts. + */ +int __tipc_link_xmit2(struct tipc_link *link, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + uint psz = msg_size(msg); + uint qsz = link->out_queue_size; + uint sndlim = link->queue_limit[0]; + uint imp = tipc_msg_tot_importance(msg); + uint mtu = link->max_pkt; + uint ack = mod(link->next_in_no - 1); + uint seqno = link->next_out_no; + uint bc_last_in = link->owner->bclink.last_in; + struct tipc_media_addr *addr = &link->media_addr; + struct sk_buff *next = buf->next; + + /* Match queue limits against msg importance: */ + if (unlikely(qsz >= link->queue_limit[imp])) + return tipc_link_cong(link, buf); + + /* Has valid packet limit been used ? */ + if (unlikely(psz > mtu)) { + kfree_skb_list(buf); + return -EMSGSIZE; + } + + /* Prepare each packet for sending, and add to outqueue: */ + while (buf) { + next = buf->next; + msg = buf_msg(buf); + msg_set_word(msg, 2, ((ack << 16) | mod(seqno))); + msg_set_bcast_ack(msg, bc_last_in); + + if (!link->first_out) { + link->first_out = buf; + } else if (qsz < sndlim) { + link->last_out->next = buf; + } else if (tipc_msg_bundle(link->last_out, buf, mtu)) { + link->stats.sent_bundled++; + buf = next; + next = buf->next; + continue; + } else if (tipc_msg_make_bundle(&buf, mtu, link->addr)) { + link->stats.sent_bundled++; + link->stats.sent_bundles++; + link->last_out->next = buf; + if (!link->next_out) + link->next_out = buf; + } else { + link->last_out->next = buf; + if (!link->next_out) + link->next_out = buf; + } + + /* Send packet if possible: */ + if (likely(++qsz <= sndlim)) { + tipc_bearer_send(link->bearer_id, buf, addr); + link->next_out = next; + link->unacked_window = 0; + } + seqno++; + link->last_out = buf; + buf = next; + } + link->next_out_no = seqno; + link->out_queue_size = qsz; + return 0; +} + +/** + * tipc_link_xmit2() is the general link level function for message sending + * @buf: chain of buffers containing message + * @dsz: amount of user data to be sent + * @dnode: address of destination node + * @selector: a number used for deterministic link selection + * Consumes the buffer chain, except when returning -ELINKCONG + * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE + */ +int tipc_link_xmit2(struct sk_buff *buf, u32 dnode, u32 selector) +{ + struct tipc_link *link = NULL; + struct tipc_node *node; + int rc = -EHOSTUNREACH; + + node = tipc_node_find(dnode); + if (node) { + tipc_node_lock(node); + link = node->active_links[selector & 1]; + if (link) + rc = __tipc_link_xmit2(link, buf); + tipc_node_unlock(node); + } + + if (link) + return rc; + + if (likely(in_own_node(dnode))) + return tipc_sk_rcv(buf); + + kfree_skb_list(buf); + return rc; +} + /* * tipc_link_sync_xmit - synchronize broadcast link endpoints. * @@ -933,252 +1072,6 @@ void tipc_link_names_xmit(struct list_head *message_list, u32 dest) } /* - * tipc_link_xmit_fast: Entry for data messages where the - * destination link is known and the header is complete, - * inclusive total message length. Very time critical. - * Link is locked. Returns user data length. - */ -static int tipc_link_xmit_fast(struct tipc_link *l_ptr, struct sk_buff *buf, - u32 *used_max_pkt) -{ - struct tipc_msg *msg = buf_msg(buf); - int res = msg_data_sz(msg); - - if (likely(!link_congested(l_ptr))) { - if (likely(msg_size(msg) <= l_ptr->max_pkt)) { - link_add_to_outqueue(l_ptr, buf, msg); - tipc_bearer_send(l_ptr->bearer_id, buf, - &l_ptr->media_addr); - l_ptr->unacked_window = 0; - return res; - } - else - *used_max_pkt = l_ptr->max_pkt; - } - return __tipc_link_xmit(l_ptr, buf); /* All other cases */ -} - -/* - * tipc_link_iovec_xmit_fast: Entry for messages where the - * destination processor is known and the header is complete, - * except for total message length. - * Returns user data length or errno. - */ -int tipc_link_iovec_xmit_fast(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destaddr) -{ - struct tipc_msg *hdr = &sender->phdr; - struct tipc_link *l_ptr; - struct sk_buff *buf; - struct tipc_node *node; - int res; - u32 selector = msg_origport(hdr) & 1; - -again: - /* - * Try building message using port's max_pkt hint. - * (Must not hold any locks while building message.) - */ - res = tipc_msg_build(hdr, msg_sect, len, sender->max_pkt, &buf); - /* Exit if build request was invalid */ - if (unlikely(res < 0)) - return res; - - node = tipc_node_find(destaddr); - if (likely(node)) { - tipc_node_lock(node); - l_ptr = node->active_links[selector]; - if (likely(l_ptr)) { - if (likely(buf)) { - res = tipc_link_xmit_fast(l_ptr, buf, - &sender->max_pkt); -exit: - tipc_node_unlock(node); - return res; - } - - /* Exit if link (or bearer) is congested */ - if (link_congested(l_ptr)) { - res = link_schedule_port(l_ptr, - sender->ref, res); - goto exit; - } - - /* - * Message size exceeds max_pkt hint; update hint, - * then re-try fast path or fragment the message - */ - sender->max_pkt = l_ptr->max_pkt; - tipc_node_unlock(node); - - - if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) - goto again; - - return tipc_link_iovec_long_xmit(sender, msg_sect, - len, destaddr); - } - tipc_node_unlock(node); - } - - /* Couldn't find a link to the destination node */ - kfree_skb(buf); - tipc_port_iovec_reject(sender, hdr, msg_sect, len, TIPC_ERR_NO_NODE); - return -ENETUNREACH; -} - -/* - * tipc_link_iovec_long_xmit(): Entry for long messages where the - * destination node is known and the header is complete, - * inclusive total message length. - * Link and bearer congestion status have been checked to be ok, - * and are ignored if they change. - * - * Note that fragments do not use the full link MTU so that they won't have - * to undergo refragmentation if link changeover causes them to be sent - * over another link with an additional tunnel header added as prefix. - * (Refragmentation will still occur if the other link has a smaller MTU.) - * - * Returns user data length or errno. - */ -static int tipc_link_iovec_long_xmit(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destaddr) -{ - struct tipc_link *l_ptr; - struct tipc_node *node; - struct tipc_msg *hdr = &sender->phdr; - u32 dsz = len; - u32 max_pkt, fragm_sz, rest; - struct tipc_msg fragm_hdr; - struct sk_buff *buf, *buf_chain, *prev; - u32 fragm_crs, fragm_rest, hsz, sect_rest; - const unchar __user *sect_crs; - int curr_sect; - u32 fragm_no; - int res = 0; - -again: - fragm_no = 1; - max_pkt = sender->max_pkt - INT_H_SIZE; - /* leave room for tunnel header in case of link changeover */ - fragm_sz = max_pkt - INT_H_SIZE; - /* leave room for fragmentation header in each fragment */ - rest = dsz; - fragm_crs = 0; - fragm_rest = 0; - sect_rest = 0; - sect_crs = NULL; - curr_sect = -1; - - /* Prepare reusable fragment header */ - tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - INT_H_SIZE, msg_destnode(hdr)); - msg_set_size(&fragm_hdr, max_pkt); - msg_set_fragm_no(&fragm_hdr, 1); - - /* Prepare header of first fragment */ - buf_chain = buf = tipc_buf_acquire(max_pkt); - if (!buf) - return -ENOMEM; - buf->next = NULL; - skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE); - hsz = msg_hdr_sz(hdr); - skb_copy_to_linear_data_offset(buf, INT_H_SIZE, hdr, hsz); - - /* Chop up message */ - fragm_crs = INT_H_SIZE + hsz; - fragm_rest = fragm_sz - hsz; - - do { /* For all sections */ - u32 sz; - - if (!sect_rest) { - sect_rest = msg_sect[++curr_sect].iov_len; - sect_crs = msg_sect[curr_sect].iov_base; - } - - if (sect_rest < fragm_rest) - sz = sect_rest; - else - sz = fragm_rest; - - if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { - res = -EFAULT; -error: - kfree_skb_list(buf_chain); - return res; - } - sect_crs += sz; - sect_rest -= sz; - fragm_crs += sz; - fragm_rest -= sz; - rest -= sz; - - if (!fragm_rest && rest) { - - /* Initiate new fragment: */ - if (rest <= fragm_sz) { - fragm_sz = rest; - msg_set_type(&fragm_hdr, LAST_FRAGMENT); - } else { - msg_set_type(&fragm_hdr, FRAGMENT); - } - msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE); - msg_set_fragm_no(&fragm_hdr, ++fragm_no); - prev = buf; - buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE); - if (!buf) { - res = -ENOMEM; - goto error; - } - - buf->next = NULL; - prev->next = buf; - skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE); - fragm_crs = INT_H_SIZE; - fragm_rest = fragm_sz; - } - } while (rest > 0); - - /* - * Now we have a buffer chain. Select a link and check - * that packet size is still OK - */ - node = tipc_node_find(destaddr); - if (likely(node)) { - tipc_node_lock(node); - l_ptr = node->active_links[sender->ref & 1]; - if (!l_ptr) { - tipc_node_unlock(node); - goto reject; - } - if (l_ptr->max_pkt < max_pkt) { - sender->max_pkt = l_ptr->max_pkt; - tipc_node_unlock(node); - kfree_skb_list(buf_chain); - goto again; - } - } else { -reject: - kfree_skb_list(buf_chain); - tipc_port_iovec_reject(sender, hdr, msg_sect, len, - TIPC_ERR_NO_NODE); - return -ENETUNREACH; - } - - /* Append chain of fragments to send queue & send them */ - l_ptr->long_msg_seq_no++; - link_add_chain_to_outqueue(l_ptr, buf_chain, l_ptr->long_msg_seq_no); - l_ptr->stats.sent_fragments += fragm_no; - l_ptr->stats.sent_fragmented++; - tipc_link_push_queue(l_ptr); - tipc_node_unlock(node); - return dsz; -} - -/* * tipc_link_push_packet: Push one unsent packet to the media */ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) @@ -1238,7 +1131,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); if (msg_user(msg) == MSG_BUNDLER) - msg_set_type(msg, CLOSED_MSG); + msg_set_type(msg, BUNDLE_CLOSED); l_ptr->next_out = buf->next; return 0; } @@ -1590,6 +1483,7 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) case TIPC_MEDIUM_IMPORTANCE: case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: + case CONN_MANAGER: tipc_node_unlock(n_ptr); tipc_sk_rcv(buf); continue; @@ -1604,10 +1498,6 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) tipc_node_unlock(n_ptr); tipc_named_rcv(buf); continue; - case CONN_MANAGER: - tipc_node_unlock(n_ptr); - tipc_port_proto_rcv(buf); - continue; case BCAST_PROTOCOL: tipc_link_sync_rcv(n_ptr, buf); break; @@ -2217,6 +2107,7 @@ void tipc_link_bundle_rcv(struct sk_buff *buf) u32 msgcount = msg_msgcnt(buf_msg(buf)); u32 pos = INT_H_SIZE; struct sk_buff *obuf; + struct tipc_msg *omsg; while (msgcount--) { obuf = buf_extract(buf, pos); @@ -2224,8 +2115,16 @@ void tipc_link_bundle_rcv(struct sk_buff *buf) pr_warn("Link unable to unbundle message(s)\n"); break; } - pos += align(msg_size(buf_msg(obuf))); - tipc_net_route_msg(obuf); + omsg = buf_msg(obuf); + pos += align(msg_size(omsg)); + if (msg_isdata(omsg) || (msg_user(omsg) == CONN_MANAGER)) { + tipc_sk_rcv(obuf); + } else if (msg_user(omsg) == NAME_DISTRIBUTOR) { + tipc_named_rcv(obuf); + } else { + pr_warn("Illegal bundled msg: %u\n", msg_user(omsg)); + kfree_skb(obuf); + } } kfree_skb(buf); } diff --git a/net/tipc/link.h b/net/tipc/link.h index 200d518b218e..227ff8120897 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -227,8 +227,10 @@ void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); void tipc_link_reset_list(unsigned int bearer_id); int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector); +int tipc_link_xmit2(struct sk_buff *buf, u32 dest, u32 selector); void tipc_link_names_xmit(struct list_head *message_list, u32 dest); int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf); +int __tipc_link_xmit2(struct tipc_link *link, struct sk_buff *buf); int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf); u32 tipc_link_get_max_pkt(u32 dest, u32 selector); int tipc_link_iovec_xmit_fast(struct tipc_port *sender, diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 8be6e94a1ca9..6ec958401f78 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -36,21 +36,16 @@ #include "core.h" #include "msg.h" +#include "addr.h" +#include "name_table.h" -u32 tipc_msg_tot_importance(struct tipc_msg *m) +#define MAX_FORWARD_SIZE 1024 + +static unsigned int align(unsigned int i) { - if (likely(msg_isdata(m))) { - if (likely(msg_orignode(m) == tipc_own_addr)) - return msg_importance(m); - return msg_importance(m) + 4; - } - if ((msg_user(m) == MSG_FRAGMENTER) && - (msg_type(m) == FIRST_FRAGMENT)) - return msg_importance(msg_get_wrapped(m)); - return msg_importance(m); + return (i + 3) & ~3u; } - void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode) { @@ -152,3 +147,268 @@ out_free: kfree_skb(*buf); return 0; } + + +/** + * tipc_msg_build2 - create buffer chain containing specified header and data + * @mhdr: Message header, to be prepended to data + * @iov: User data + * @offset: Posision in iov to start copying from + * @dsz: Total length of user data + * @pktmax: Max packet size that can be used + * @chain: Buffer or chain of buffers to be returned to caller + * Returns message data size or errno: -ENOMEM, -EFAULT + */ +int tipc_msg_build2(struct tipc_msg *mhdr, struct iovec const *iov, + int offset, int dsz, int pktmax , struct sk_buff **chain) +{ + int mhsz = msg_hdr_sz(mhdr); + int msz = mhsz + dsz; + int pktno = 1; + int pktsz; + int pktrem = pktmax; + int drem = dsz; + struct tipc_msg pkthdr; + struct sk_buff *buf, *prev; + char *pktpos; + int rc; + + msg_set_size(mhdr, msz); + + /* No fragmentation needed? */ + if (likely(msz <= pktmax)) { + buf = tipc_buf_acquire(msz); + *chain = buf; + if (unlikely(!buf)) + return -ENOMEM; + skb_copy_to_linear_data(buf, mhdr, mhsz); + pktpos = buf->data + mhsz; + if (!dsz || !memcpy_fromiovecend(pktpos, iov, offset, dsz)) + return dsz; + rc = -EFAULT; + goto error; + } + + /* Prepare reusable fragment header */ + tipc_msg_init(&pkthdr, MSG_FRAGMENTER, FIRST_FRAGMENT, + INT_H_SIZE, msg_destnode(mhdr)); + msg_set_size(&pkthdr, pktmax); + msg_set_fragm_no(&pkthdr, pktno); + + /* Prepare first fragment */ + *chain = buf = tipc_buf_acquire(pktmax); + if (!buf) + return -ENOMEM; + pktpos = buf->data; + skb_copy_to_linear_data(buf, &pkthdr, INT_H_SIZE); + pktpos += INT_H_SIZE; + pktrem -= INT_H_SIZE; + skb_copy_to_linear_data_offset(buf, INT_H_SIZE, mhdr, mhsz); + pktpos += mhsz; + pktrem -= mhsz; + + do { + if (drem < pktrem) + pktrem = drem; + + if (memcpy_fromiovecend(pktpos, iov, offset, pktrem)) { + rc = -EFAULT; + goto error; + } + drem -= pktrem; + offset += pktrem; + + if (!drem) + break; + + /* Prepare new fragment: */ + if (drem < (pktmax - INT_H_SIZE)) + pktsz = drem + INT_H_SIZE; + else + pktsz = pktmax; + prev = buf; + buf = tipc_buf_acquire(pktsz); + if (!buf) { + rc = -ENOMEM; + goto error; + } + prev->next = buf; + msg_set_type(&pkthdr, FRAGMENT); + msg_set_size(&pkthdr, pktsz); + msg_set_fragm_no(&pkthdr, ++pktno); + skb_copy_to_linear_data(buf, &pkthdr, INT_H_SIZE); + pktpos = buf->data + INT_H_SIZE; + pktrem = pktsz - INT_H_SIZE; + + } while (1); + + msg_set_type(buf_msg(buf), LAST_FRAGMENT); + return dsz; +error: + kfree_skb_list(*chain); + *chain = NULL; + return rc; +} + +/** + * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one + * @bbuf: the existing buffer ("bundle") + * @buf: buffer to be appended + * @mtu: max allowable size for the bundle buffer + * Consumes buffer if successful + * Returns true if bundling could be performed, otherwise false + */ +bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu) +{ + struct tipc_msg *bmsg = buf_msg(bbuf); + struct tipc_msg *msg = buf_msg(buf); + unsigned int bsz = msg_size(bmsg); + unsigned int msz = msg_size(msg); + u32 start = align(bsz); + u32 max = mtu - INT_H_SIZE; + u32 pad = start - bsz; + + if (likely(msg_user(msg) == MSG_FRAGMENTER)) + return false; + if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) + return false; + if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) + return false; + if (likely(msg_user(bmsg) != MSG_BUNDLER)) + return false; + if (likely(msg_type(bmsg) != BUNDLE_OPEN)) + return false; + if (unlikely(skb_tailroom(bbuf) < (pad + msz))) + return false; + if (unlikely(max < (start + msz))) + return false; + + skb_put(bbuf, pad + msz); + skb_copy_to_linear_data_offset(bbuf, start, buf->data, msz); + msg_set_size(bmsg, start + msz); + msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1); + bbuf->next = buf->next; + kfree_skb(buf); + return true; +} + +/** + * tipc_msg_make_bundle(): Create bundle buf and append message to its tail + * @buf: buffer to be appended and replaced + * @mtu: max allowable size for the bundle buffer, inclusive header + * @dnode: destination node for message. (Not always present in header) + * Replaces buffer if successful + * Returns true if sucess, otherwise false + */ +bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode) +{ + struct sk_buff *bbuf; + struct tipc_msg *bmsg; + struct tipc_msg *msg = buf_msg(*buf); + u32 msz = msg_size(msg); + u32 max = mtu - INT_H_SIZE; + + if (msg_user(msg) == MSG_FRAGMENTER) + return false; + if (msg_user(msg) == CHANGEOVER_PROTOCOL) + return false; + if (msg_user(msg) == BCAST_PROTOCOL) + return false; + if (msz > (max / 2)) + return false; + + bbuf = tipc_buf_acquire(max); + if (!bbuf) + return false; + + skb_trim(bbuf, INT_H_SIZE); + bmsg = buf_msg(bbuf); + tipc_msg_init(bmsg, MSG_BUNDLER, BUNDLE_OPEN, INT_H_SIZE, dnode); + msg_set_seqno(bmsg, msg_seqno(msg)); + msg_set_ack(bmsg, msg_ack(msg)); + msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); + bbuf->next = (*buf)->next; + tipc_msg_bundle(bbuf, *buf, mtu); + *buf = bbuf; + return true; +} + +/** + * tipc_msg_reverse(): swap source and destination addresses and add error code + * @buf: buffer containing message to be reversed + * @dnode: return value: node where to send message after reversal + * @err: error code to be set in message + * Consumes buffer if failure + * Returns true if success, otherwise false + */ +bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err) +{ + struct tipc_msg *msg = buf_msg(buf); + uint imp = msg_importance(msg); + struct tipc_msg ohdr; + uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE); + + if (skb_linearize(buf)) + goto exit; + if (msg_dest_droppable(msg)) + goto exit; + if (msg_errcode(msg)) + goto exit; + + memcpy(&ohdr, msg, msg_hdr_sz(msg)); + imp = min_t(uint, imp + 1, TIPC_CRITICAL_IMPORTANCE); + if (msg_isdata(msg)) + msg_set_importance(msg, imp); + msg_set_errcode(msg, err); + msg_set_origport(msg, msg_destport(&ohdr)); + msg_set_destport(msg, msg_origport(&ohdr)); + msg_set_prevnode(msg, tipc_own_addr); + if (!msg_short(msg)) { + msg_set_orignode(msg, msg_destnode(&ohdr)); + msg_set_destnode(msg, msg_orignode(&ohdr)); + } + msg_set_size(msg, msg_hdr_sz(msg) + rdsz); + skb_trim(buf, msg_size(msg)); + skb_orphan(buf); + *dnode = msg_orignode(&ohdr); + return true; +exit: + kfree_skb(buf); + return false; +} + +/** + * tipc_msg_eval: determine fate of message that found no destination + * @buf: the buffer containing the message. + * @dnode: return value: next-hop node, if message to be forwarded + * @err: error code to use, if message to be rejected + * + * Does not consume buffer + * Returns 0 (TIPC_OK) if message ok and we can try again, -TIPC error + * code if message to be rejected + */ +int tipc_msg_eval(struct sk_buff *buf, u32 *dnode) +{ + struct tipc_msg *msg = buf_msg(buf); + u32 dport; + + if (msg_type(msg) != TIPC_NAMED_MSG) + return -TIPC_ERR_NO_PORT; + if (skb_linearize(buf)) + return -TIPC_ERR_NO_NAME; + if (msg_data_sz(msg) > MAX_FORWARD_SIZE) + return -TIPC_ERR_NO_NAME; + if (msg_reroute_cnt(msg) > 0) + return -TIPC_ERR_NO_NAME; + + *dnode = addr_domain(msg_lookup_scope(msg)); + dport = tipc_nametbl_translate(msg_nametype(msg), + msg_nameinst(msg), + dnode); + if (!dport) + return -TIPC_ERR_NO_NAME; + msg_incr_reroute_cnt(msg); + msg_set_destnode(msg, *dnode); + msg_set_destport(msg, dport); + return TIPC_OK; +} diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 503511903d1d..7d574346e75e 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -463,6 +463,11 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) #define FRAGMENT 1 #define LAST_FRAGMENT 2 +/* Bundling protocol message types + */ +#define BUNDLE_OPEN 0 +#define BUNDLE_CLOSED 1 + /* * Link management protocol message types */ @@ -706,12 +711,37 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } -u32 tipc_msg_tot_importance(struct tipc_msg *m); +static inline u32 tipc_msg_tot_importance(struct tipc_msg *m) +{ + if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) + return msg_importance(msg_get_wrapped(m)); + return msg_importance(m); +} + +static inline u32 msg_tot_origport(struct tipc_msg *m) +{ + if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) + return msg_origport(msg_get_wrapped(m)); + return msg_origport(m); +} + +bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err); + +int tipc_msg_eval(struct sk_buff *buf, u32 *dnode); + void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); + int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, unsigned int len, int max_size, struct sk_buff **buf); int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); +bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu); + +bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode); + +int tipc_msg_build2(struct tipc_msg *mhdr, struct iovec const *iov, + int offset, int dsz, int mtu , struct sk_buff **chain); + #endif diff --git a/net/tipc/net.c b/net/tipc/net.c index f64375e7f99f..7fcc94998fea 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -1,7 +1,7 @@ /* * net/tipc/net.c: TIPC network routing code * - * Copyright (c) 1995-2006, Ericsson AB + * Copyright (c) 1995-2006, 2014, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -104,67 +104,6 @@ * - A local spin_lock protecting the queue of subscriber events. */ -static void net_route_named_msg(struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - u32 dnode; - u32 dport; - - if (!msg_named(msg)) { - kfree_skb(buf); - return; - } - - dnode = addr_domain(msg_lookup_scope(msg)); - dport = tipc_nametbl_translate(msg_nametype(msg), msg_nameinst(msg), &dnode); - if (dport) { - msg_set_destnode(msg, dnode); - msg_set_destport(msg, dport); - tipc_net_route_msg(buf); - return; - } - tipc_reject_msg(buf, TIPC_ERR_NO_NAME); -} - -void tipc_net_route_msg(struct sk_buff *buf) -{ - struct tipc_msg *msg; - u32 dnode; - - if (!buf) - return; - msg = buf_msg(buf); - - /* Handle message for this node */ - dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg); - if (tipc_in_scope(dnode, tipc_own_addr)) { - if (msg_isdata(msg)) { - if (msg_mcast(msg)) - tipc_port_mcast_rcv(buf, NULL); - else if (msg_destport(msg)) - tipc_sk_rcv(buf); - else - net_route_named_msg(buf); - return; - } - switch (msg_user(msg)) { - case NAME_DISTRIBUTOR: - tipc_named_rcv(buf); - break; - case CONN_MANAGER: - tipc_port_proto_rcv(buf); - break; - default: - kfree_skb(buf); - } - return; - } - - /* Handle message for another node */ - skb_trim(buf, msg_size(msg)); - tipc_link_xmit(buf, dnode, msg_link_selector(msg)); -} - int tipc_net_start(u32 addr) { char addr_string[16]; diff --git a/net/tipc/net.h b/net/tipc/net.h index c6c2b46f7c28..59ef3388be2c 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -37,8 +37,6 @@ #ifndef _TIPC_NET_H #define _TIPC_NET_H -void tipc_net_route_msg(struct sk_buff *buf); - int tipc_net_start(u32 addr); void tipc_net_stop(void); diff --git a/net/tipc/node.c b/net/tipc/node.c index 5b44c3041be4..d959343043fd 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1,7 +1,7 @@ /* * net/tipc/node.c: TIPC node management routines * - * Copyright (c) 2000-2006, 2012 Ericsson AB + * Copyright (c) 2000-2006, 2012-2014, Ericsson AB * Copyright (c) 2005-2006, 2010-2014, Wind River Systems * All rights reserved. * @@ -155,21 +155,25 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) if (!active[0]) { active[0] = active[1] = l_ptr; node_established_contact(n_ptr); - return; + goto exit; } if (l_ptr->priority < active[0]->priority) { pr_info("New link <%s> becomes standby\n", l_ptr->name); - return; + goto exit; } tipc_link_dup_queue_xmit(active[0], l_ptr); if (l_ptr->priority == active[0]->priority) { active[0] = l_ptr; - return; + goto exit; } pr_info("Old link <%s> becomes standby\n", active[0]->name); if (active[1] != active[0]) pr_info("Old link <%s> becomes standby\n", active[1]->name); active[0] = active[1] = l_ptr; +exit: + /* Leave room for changeover header when returning 'mtu' to users: */ + n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE; } /** @@ -229,6 +233,19 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) tipc_link_failover_send_queue(l_ptr); else node_lost_contact(n_ptr); + + /* Leave room for changeover header when returning 'mtu' to users: */ + if (active[0]) { + n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE; + return; + } + + /* Loopback link went down? No fragmentation needed from now on. */ + if (n_ptr->addr == tipc_own_addr) { + n_ptr->act_mtus[0] = MAX_MSG_SIZE; + n_ptr->act_mtus[1] = MAX_MSG_SIZE; + } } int tipc_node_active_links(struct tipc_node *n_ptr) diff --git a/net/tipc/node.h b/net/tipc/node.h index 9087063793f2..b61716a8218e 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -41,6 +41,7 @@ #include "addr.h" #include "net.h" #include "bearer.h" +#include "msg.h" /* * Out-of-range value for node signature @@ -105,6 +106,7 @@ struct tipc_node { spinlock_t lock; struct hlist_node hash; struct tipc_link *active_links[2]; + u32 act_mtus[2]; struct tipc_link *links[MAX_BEARERS]; unsigned int action_flags; struct tipc_node_bclink bclink; @@ -143,4 +145,19 @@ static inline bool tipc_node_blocked(struct tipc_node *node) TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN)); } +static inline uint tipc_node_get_mtu(u32 addr, u32 selector) +{ + struct tipc_node *node; + u32 mtu; + + node = tipc_node_find(addr); + + if (likely(node)) + mtu = node->act_mtus[selector & 1]; + else + mtu = MAX_MSG_SIZE; + + return mtu; +} + #endif diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 7c59ab1d6ecb..2d13eea8574a 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -84,11 +84,13 @@ void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub) void tipc_nodesub_notify(struct list_head *nsub_list) { struct tipc_node_subscr *ns, *safe; + net_ev_handler handle_node_down; list_for_each_entry_safe(ns, safe, nsub_list, nodesub_list) { - if (ns->handle_node_down) { - ns->handle_node_down(ns->usr_handle); + handle_node_down = ns->handle_node_down; + if (handle_node_down) { ns->handle_node_down = NULL; + handle_node_down(ns->usr_handle); } } } diff --git a/net/tipc/port.c b/net/tipc/port.c index 5fd7acce01ea..0d09dcb6da18 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -42,8 +42,6 @@ /* Connection management: */ #define PROBING_INTERVAL 3600000 /* [ms] => 1 h */ -#define CONFIRMED 0 -#define PROBING 1 #define MAX_REJECT_SIZE 1024 @@ -188,12 +186,6 @@ exit: tipc_port_list_free(dp); } - -void tipc_port_wakeup(struct tipc_port *port) -{ - tipc_sock_wakeup(tipc_port_to_sock(port)); -} - /* tipc_port_init - intiate TIPC port and lock it * * Returns obtained reference if initialization is successful, zero otherwise @@ -235,6 +227,8 @@ u32 tipc_port_init(struct tipc_port *p_ptr, void tipc_port_destroy(struct tipc_port *p_ptr) { struct sk_buff *buf = NULL; + struct tipc_msg *msg = NULL; + u32 peer; tipc_withdraw(p_ptr, 0, NULL); @@ -246,14 +240,15 @@ void tipc_port_destroy(struct tipc_port *p_ptr) if (p_ptr->connected) { buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); tipc_nodesub_unsubscribe(&p_ptr->subscription); + msg = buf_msg(buf); + peer = msg_destnode(msg); + tipc_link_xmit2(buf, peer, msg_link_selector(msg)); } - spin_lock_bh(&tipc_port_list_lock); list_del(&p_ptr->port_list); list_del(&p_ptr->wait_list); spin_unlock_bh(&tipc_port_list_lock); k_term_timer(&p_ptr->timer); - tipc_net_route_msg(buf); } /* @@ -275,100 +270,16 @@ static struct sk_buff *port_build_proto_msg(struct tipc_port *p_ptr, msg_set_destport(msg, tipc_port_peerport(p_ptr)); msg_set_origport(msg, p_ptr->ref); msg_set_msgcnt(msg, ack); + buf->next = NULL; } return buf; } -int tipc_reject_msg(struct sk_buff *buf, u32 err) -{ - struct tipc_msg *msg = buf_msg(buf); - struct sk_buff *rbuf; - struct tipc_msg *rmsg; - int hdr_sz; - u32 imp; - u32 data_sz = msg_data_sz(msg); - u32 src_node; - u32 rmsg_sz; - - /* discard rejected message if it shouldn't be returned to sender */ - if (WARN(!msg_isdata(msg), - "attempt to reject message with user=%u", msg_user(msg))) { - dump_stack(); - goto exit; - } - if (msg_errcode(msg) || msg_dest_droppable(msg)) - goto exit; - - /* - * construct returned message by copying rejected message header and - * data (or subset), then updating header fields that need adjusting - */ - hdr_sz = msg_hdr_sz(msg); - rmsg_sz = hdr_sz + min_t(u32, data_sz, MAX_REJECT_SIZE); - - rbuf = tipc_buf_acquire(rmsg_sz); - if (rbuf == NULL) - goto exit; - - rmsg = buf_msg(rbuf); - skb_copy_to_linear_data(rbuf, msg, rmsg_sz); - - if (msg_connected(rmsg)) { - imp = msg_importance(rmsg); - if (imp < TIPC_CRITICAL_IMPORTANCE) - msg_set_importance(rmsg, ++imp); - } - msg_set_non_seq(rmsg, 0); - msg_set_size(rmsg, rmsg_sz); - msg_set_errcode(rmsg, err); - msg_set_prevnode(rmsg, tipc_own_addr); - msg_swap_words(rmsg, 4, 5); - if (!msg_short(rmsg)) - msg_swap_words(rmsg, 6, 7); - - /* send self-abort message when rejecting on a connected port */ - if (msg_connected(msg)) { - struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg)); - - if (p_ptr) { - struct sk_buff *abuf = NULL; - - if (p_ptr->connected) - abuf = port_build_self_abort_msg(p_ptr, err); - tipc_port_unlock(p_ptr); - tipc_net_route_msg(abuf); - } - } - - /* send returned message & dispose of rejected message */ - src_node = msg_prevnode(msg); - if (in_own_node(src_node)) - tipc_sk_rcv(rbuf); - else - tipc_link_xmit(rbuf, src_node, msg_link_selector(rmsg)); -exit: - kfree_skb(buf); - return data_sz; -} - -int tipc_port_iovec_reject(struct tipc_port *p_ptr, struct tipc_msg *hdr, - struct iovec const *msg_sect, unsigned int len, - int err) -{ - struct sk_buff *buf; - int res; - - res = tipc_msg_build(hdr, msg_sect, len, MAX_MSG_SIZE, &buf); - if (!buf) - return res; - - return tipc_reject_msg(buf, err); -} - static void port_timeout(unsigned long ref) { struct tipc_port *p_ptr = tipc_port_lock(ref); struct sk_buff *buf = NULL; + struct tipc_msg *msg = NULL; if (!p_ptr) return; @@ -379,15 +290,16 @@ static void port_timeout(unsigned long ref) } /* Last probe answered ? */ - if (p_ptr->probing_state == PROBING) { + if (p_ptr->probing_state == TIPC_CONN_PROBING) { buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_PORT); } else { buf = port_build_proto_msg(p_ptr, CONN_PROBE, 0); - p_ptr->probing_state = PROBING; + p_ptr->probing_state = TIPC_CONN_PROBING; k_start_timer(&p_ptr->timer, p_ptr->probing_interval); } tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + msg = buf_msg(buf); + tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); } @@ -395,12 +307,14 @@ static void port_handle_node_down(unsigned long ref) { struct tipc_port *p_ptr = tipc_port_lock(ref); struct sk_buff *buf = NULL; + struct tipc_msg *msg = NULL; if (!p_ptr) return; buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_NODE); tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + msg = buf_msg(buf); + tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); } @@ -412,6 +326,7 @@ static struct sk_buff *port_build_self_abort_msg(struct tipc_port *p_ptr, u32 er struct tipc_msg *msg = buf_msg(buf); msg_swap_words(msg, 4, 5); msg_swap_words(msg, 6, 7); + buf->next = NULL; } return buf; } @@ -436,60 +351,11 @@ static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 er if (imp < TIPC_CRITICAL_IMPORTANCE) msg_set_importance(msg, ++imp); msg_set_errcode(msg, err); + buf->next = NULL; } return buf; } -void tipc_port_proto_rcv(struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - struct tipc_port *p_ptr; - struct sk_buff *r_buf = NULL; - u32 destport = msg_destport(msg); - int wakeable; - - /* Validate connection */ - p_ptr = tipc_port_lock(destport); - if (!p_ptr || !p_ptr->connected || !tipc_port_peer_msg(p_ptr, msg)) { - r_buf = tipc_buf_acquire(BASIC_H_SIZE); - if (r_buf) { - msg = buf_msg(r_buf); - tipc_msg_init(msg, TIPC_HIGH_IMPORTANCE, TIPC_CONN_MSG, - BASIC_H_SIZE, msg_orignode(msg)); - msg_set_errcode(msg, TIPC_ERR_NO_PORT); - msg_set_origport(msg, destport); - msg_set_destport(msg, msg_origport(msg)); - } - if (p_ptr) - tipc_port_unlock(p_ptr); - goto exit; - } - - /* Process protocol message sent by peer */ - switch (msg_type(msg)) { - case CONN_ACK: - wakeable = tipc_port_congested(p_ptr) && p_ptr->congested; - p_ptr->acked += msg_msgcnt(msg); - if (!tipc_port_congested(p_ptr)) { - p_ptr->congested = 0; - if (wakeable) - tipc_port_wakeup(p_ptr); - } - break; - case CONN_PROBE: - r_buf = port_build_proto_msg(p_ptr, CONN_PROBE_REPLY, 0); - break; - default: - /* CONN_PROBE_REPLY or unrecognized - no action required */ - break; - } - p_ptr->probing_state = CONFIRMED; - tipc_port_unlock(p_ptr); -exit: - tipc_net_route_msg(r_buf); - kfree_skb(buf); -} - static int port_print(struct tipc_port *p_ptr, char *buf, int len, int full_id) { struct publication *publ; @@ -581,16 +447,19 @@ void tipc_acknowledge(u32 ref, u32 ack) { struct tipc_port *p_ptr; struct sk_buff *buf = NULL; + struct tipc_msg *msg; p_ptr = tipc_port_lock(ref); if (!p_ptr) return; - if (p_ptr->connected) { - p_ptr->conn_unacked -= ack; + if (p_ptr->connected) buf = port_build_proto_msg(p_ptr, CONN_ACK, ack); - } + tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + if (!buf) + return; + msg = buf_msg(buf); + tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); } int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, @@ -689,7 +558,7 @@ int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr, msg_set_hdr_sz(msg, SHORT_H_SIZE); p_ptr->probing_interval = PROBING_INTERVAL; - p_ptr->probing_state = CONFIRMED; + p_ptr->probing_state = TIPC_CONN_OK; p_ptr->connected = 1; k_start_timer(&p_ptr->timer, p_ptr->probing_interval); @@ -698,7 +567,7 @@ int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr, (net_ev_handler)port_handle_node_down); res = 0; exit: - p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref); + p_ptr->max_pkt = tipc_node_get_mtu(peer->node, ref); return res; } @@ -741,6 +610,7 @@ int tipc_port_disconnect(u32 ref) */ int tipc_port_shutdown(u32 ref) { + struct tipc_msg *msg; struct tipc_port *p_ptr; struct sk_buff *buf = NULL; @@ -750,149 +620,7 @@ int tipc_port_shutdown(u32 ref) buf = port_build_peer_abort_msg(p_ptr, TIPC_CONN_SHUTDOWN); tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + msg = buf_msg(buf); + tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); return tipc_port_disconnect(ref); } - -/* - * tipc_port_iovec_rcv: Concatenate and deliver sectioned - * message for this node. - */ -static int tipc_port_iovec_rcv(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len) -{ - struct sk_buff *buf; - int res; - - res = tipc_msg_build(&sender->phdr, msg_sect, len, MAX_MSG_SIZE, &buf); - if (likely(buf)) - tipc_sk_rcv(buf); - return res; -} - -/** - * tipc_send - send message sections on connection - */ -int tipc_send(struct tipc_port *p_ptr, - struct iovec const *msg_sect, - unsigned int len) -{ - u32 destnode; - int res; - - if (!p_ptr->connected) - return -EINVAL; - - p_ptr->congested = 1; - if (!tipc_port_congested(p_ptr)) { - destnode = tipc_port_peernode(p_ptr); - if (likely(!in_own_node(destnode))) - res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len, - destnode); - else - res = tipc_port_iovec_rcv(p_ptr, msg_sect, len); - - if (likely(res != -ELINKCONG)) { - p_ptr->congested = 0; - if (res > 0) - p_ptr->sent++; - return res; - } - } - if (tipc_port_unreliable(p_ptr)) { - p_ptr->congested = 0; - return len; - } - return -ELINKCONG; -} - -/** - * tipc_send2name - send message sections to port name - */ -int tipc_send2name(struct tipc_port *p_ptr, - struct tipc_name const *name, - unsigned int domain, - struct iovec const *msg_sect, - unsigned int len) -{ - struct tipc_msg *msg; - u32 destnode = domain; - u32 destport; - int res; - - if (p_ptr->connected) - return -EINVAL; - - msg = &p_ptr->phdr; - msg_set_type(msg, TIPC_NAMED_MSG); - msg_set_hdr_sz(msg, NAMED_H_SIZE); - msg_set_nametype(msg, name->type); - msg_set_nameinst(msg, name->instance); - msg_set_lookup_scope(msg, tipc_addr_scope(domain)); - destport = tipc_nametbl_translate(name->type, name->instance, &destnode); - msg_set_destnode(msg, destnode); - msg_set_destport(msg, destport); - - if (likely(destport || destnode)) { - if (likely(in_own_node(destnode))) - res = tipc_port_iovec_rcv(p_ptr, msg_sect, len); - else if (tipc_own_addr) - res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len, - destnode); - else - res = tipc_port_iovec_reject(p_ptr, msg, msg_sect, - len, TIPC_ERR_NO_NODE); - if (likely(res != -ELINKCONG)) { - if (res > 0) - p_ptr->sent++; - return res; - } - if (tipc_port_unreliable(p_ptr)) - return len; - - return -ELINKCONG; - } - return tipc_port_iovec_reject(p_ptr, msg, msg_sect, len, - TIPC_ERR_NO_NAME); -} - -/** - * tipc_send2port - send message sections to port identity - */ -int tipc_send2port(struct tipc_port *p_ptr, - struct tipc_portid const *dest, - struct iovec const *msg_sect, - unsigned int len) -{ - struct tipc_msg *msg; - int res; - - if (p_ptr->connected) - return -EINVAL; - - msg = &p_ptr->phdr; - msg_set_type(msg, TIPC_DIRECT_MSG); - msg_set_lookup_scope(msg, 0); - msg_set_destnode(msg, dest->node); - msg_set_destport(msg, dest->ref); - msg_set_hdr_sz(msg, BASIC_H_SIZE); - - if (in_own_node(dest->node)) - res = tipc_port_iovec_rcv(p_ptr, msg_sect, len); - else if (tipc_own_addr) - res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len, - dest->node); - else - res = tipc_port_iovec_reject(p_ptr, msg, msg_sect, len, - TIPC_ERR_NO_NODE); - if (likely(res != -ELINKCONG)) { - if (res > 0) - p_ptr->sent++; - return res; - } - if (tipc_port_unreliable(p_ptr)) - return len; - - return -ELINKCONG; -} diff --git a/net/tipc/port.h b/net/tipc/port.h index cf4ca5b1d9a4..0e47052daa29 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -53,17 +53,13 @@ * @connected: non-zero if port is currently connected to a peer port * @conn_type: TIPC type used when connection was established * @conn_instance: TIPC instance used when connection was established - * @conn_unacked: number of unacknowledged messages received from peer port * @published: non-zero if port has one or more associated names - * @congested: non-zero if cannot send because of link or port congestion * @max_pkt: maximum packet size "hint" used when building messages sent by port * @ref: unique reference to port in TIPC object registry * @phdr: preformatted message header used when sending messages * @port_list: adjacent ports in TIPC's global list of ports * @wait_list: adjacent ports in list of ports waiting on link congestion * @waiting_pkts: - * @sent: # of non-empty messages sent by port - * @acked: # of non-empty message acknowledgements from connected port's peer * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: @@ -76,17 +72,13 @@ struct tipc_port { int connected; u32 conn_type; u32 conn_instance; - u32 conn_unacked; int published; - u32 congested; u32 max_pkt; u32 ref; struct tipc_msg phdr; struct list_head port_list; struct list_head wait_list; u32 waiting_pkts; - u32 sent; - u32 acked; struct list_head publications; u32 pub_count; u32 probing_state; @@ -104,8 +96,6 @@ struct tipc_port_list; u32 tipc_port_init(struct tipc_port *p_ptr, const unsigned int importance); -int tipc_reject_msg(struct sk_buff *buf, u32 err); - void tipc_acknowledge(u32 port_ref, u32 ack); void tipc_port_destroy(struct tipc_port *p_ptr); @@ -122,8 +112,6 @@ int tipc_port_disconnect(u32 portref); int tipc_port_shutdown(u32 ref); -void tipc_port_wakeup(struct tipc_port *port); - /* * The following routines require that the port be locked on entry */ @@ -136,34 +124,12 @@ int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg); * TIPC messaging routines */ -int tipc_send(struct tipc_port *port, - struct iovec const *msg_sect, - unsigned int len); - -int tipc_send2name(struct tipc_port *port, - struct tipc_name const *name, - u32 domain, - struct iovec const *msg_sect, - unsigned int len); - -int tipc_send2port(struct tipc_port *port, - struct tipc_portid const *dest, - struct iovec const *msg_sect, - unsigned int len); - int tipc_port_mcast_xmit(struct tipc_port *port, struct tipc_name_seq const *seq, struct iovec const *msg, unsigned int len); -int tipc_port_iovec_reject(struct tipc_port *p_ptr, - struct tipc_msg *hdr, - struct iovec const *msg_sect, - unsigned int len, - int err); - struct sk_buff *tipc_port_get_ports(void); -void tipc_port_proto_rcv(struct sk_buff *buf); void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp); void tipc_port_reinit(void); @@ -185,12 +151,6 @@ static inline void tipc_port_unlock(struct tipc_port *p_ptr) spin_unlock_bh(p_ptr->lock); } -static inline int tipc_port_congested(struct tipc_port *p_ptr) -{ - return ((p_ptr->sent - p_ptr->acked) >= TIPC_FLOWCTRL_WIN); -} - - static inline u32 tipc_port_peernode(struct tipc_port *p_ptr) { return msg_destnode(&p_ptr->phdr); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ef0475568f9e..ede78b144dcf 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -36,14 +36,17 @@ #include "core.h" #include "port.h" +#include "name_table.h" #include "node.h" - +#include "link.h" #include <linux/export.h> +#include "link.h" #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ +#define TIPC_FWD_MSG 1 static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); static void tipc_data_ready(struct sock *sk); @@ -123,9 +126,12 @@ static void advance_rx_queue(struct sock *sk) static void reject_rx_queue(struct sock *sk) { struct sk_buff *buf; + u32 dnode; - while ((buf = __skb_dequeue(&sk->sk_receive_queue))) - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); + while ((buf = __skb_dequeue(&sk->sk_receive_queue))) { + if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT)) + tipc_link_xmit2(buf, dnode, 0); + } } /** @@ -201,6 +207,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; + tsk->sent_unacked = 0; atomic_set(&tsk->dupl_rcvcnt, 0); tipc_port_unlock(port); @@ -303,6 +310,7 @@ static int tipc_release(struct socket *sock) struct tipc_sock *tsk; struct tipc_port *port; struct sk_buff *buf; + u32 dnode; /* * Exit if socket isn't fully initialized (occurs when a failed accept() @@ -331,7 +339,8 @@ static int tipc_release(struct socket *sock) sock->state = SS_DISCONNECTING; tipc_port_disconnect(port->ref); } - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); + if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT)) + tipc_link_xmit2(buf, dnode, 0); } } @@ -504,12 +513,12 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, switch ((int)sock->state) { case SS_UNCONNECTED: - if (!tsk->port.congested) + if (!tsk->link_cong) mask |= POLLOUT; break; case SS_READY: case SS_CONNECTED: - if (!tsk->port.congested) + if (!tsk->link_cong && !tipc_sk_conn_cong(tsk)) mask |= POLLOUT; /* fall thru' */ case SS_CONNECTING: @@ -526,6 +535,43 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, } /** + * tipc_sk_proto_rcv - receive a connection mng protocol message + * @tsk: receiving socket + * @dnode: node to send response message to, if any + * @buf: buffer containing protocol message + * Returns 0 (TIPC_OK) if message was consumed, 1 (TIPC_FWD_MSG) if + * (CONN_PROBE_REPLY) message should be forwarded. + */ +int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + struct tipc_port *port = &tsk->port; + int conn_cong; + + /* Ignore if connection cannot be validated: */ + if (!port->connected || !tipc_port_peer_msg(port, msg)) + goto exit; + + port->probing_state = TIPC_CONN_OK; + + if (msg_type(msg) == CONN_ACK) { + conn_cong = tipc_sk_conn_cong(tsk); + tsk->sent_unacked -= msg_msgcnt(msg); + if (conn_cong) + tipc_sock_wakeup(tsk); + } else if (msg_type(msg) == CONN_PROBE) { + if (!tipc_msg_reverse(buf, dnode, TIPC_OK)) + return TIPC_OK; + msg_set_type(msg, CONN_PROBE_REPLY); + return TIPC_FWD_MSG; + } + /* Do nothing if msg_type() == CONN_PROBE_REPLY */ +exit: + kfree_skb(buf); + return TIPC_OK; +} + +/** * dest_name_check - verify user is permitted to send to specified port name * @dest: destination address * @m: descriptor for message to be sent @@ -539,6 +585,8 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) { struct tipc_cfg_msg_hdr hdr; + if (unlikely(dest->addrtype == TIPC_ADDR_ID)) + return 0; if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES)) return 0; if (likely(dest->addr.name.name.type == TIPC_TOP_SRV)) @@ -575,19 +623,55 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) return sock_intr_errno(*timeo_p); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - done = sk_wait_event(sk, timeo_p, !tsk->port.congested); + done = sk_wait_event(sk, timeo_p, !tsk->link_cong); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; } +/** + * tipc_sendmcast - send multicast message + * @sock: socket structure + * @seq: destination address + * @iov: message data to send + * @dsz: total length of message data + * @timeo: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, + struct iovec *iov, size_t dsz, long timeo) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + int rc; + + do { + if (sock->state != SS_READY) { + rc = -EOPNOTSUPP; + break; + } + rc = tipc_port_mcast_xmit(&tsk->port, seq, iov, dsz); + if (likely(rc >= 0)) { + if (sock->state != SS_READY) + sock->state = SS_CONNECTING; + break; + } + if (rc != -ELINKCONG) + break; + rc = tipc_wait_for_sndmsg(sock, &timeo); + } while (!rc); + + return rc; +} /** * tipc_sendmsg - send message in connectionless manner * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure * @m: message to send - * @total_len: length of message + * @dsz: amount of user data to be sent * * Message must have an destination specified explicitly. * Used for SOCK_RDM and SOCK_DGRAM messages, @@ -597,100 +681,122 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) * Returns the number of bytes sent on success, or errno otherwise */ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) + struct msghdr *m, size_t dsz) { + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_port *port = &tsk->port; - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - int needs_conn; + struct tipc_msg *mhdr = &port->phdr; + struct iovec *iov = m->msg_iov; + u32 dnode, dport; + struct sk_buff *buf; + struct tipc_name_seq *seq = &dest->addr.nameseq; + u32 mtu; long timeo; - int res = -EINVAL; + int rc = -EINVAL; if (unlikely(!dest)) return -EDESTADDRREQ; + if (unlikely((m->msg_namelen < sizeof(*dest)) || (dest->family != AF_TIPC))) return -EINVAL; - if (total_len > TIPC_MAX_USER_MSG_SIZE) + + if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (iocb) lock_sock(sk); - needs_conn = (sock->state != SS_READY); - if (unlikely(needs_conn)) { + if (unlikely(sock->state != SS_READY)) { if (sock->state == SS_LISTENING) { - res = -EPIPE; + rc = -EPIPE; goto exit; } if (sock->state != SS_UNCONNECTED) { - res = -EISCONN; + rc = -EISCONN; goto exit; } if (tsk->port.published) { - res = -EOPNOTSUPP; + rc = -EOPNOTSUPP; goto exit; } if (dest->addrtype == TIPC_ADDR_NAME) { tsk->port.conn_type = dest->addr.name.name.type; tsk->port.conn_instance = dest->addr.name.name.instance; } - - /* Abort any pending connection attempts (very unlikely) */ - reject_rx_queue(sk); } + rc = dest_name_check(dest, m); + if (rc) + goto exit; timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); - do { - if (dest->addrtype == TIPC_ADDR_NAME) { - res = dest_name_check(dest, m); - if (res) - break; - res = tipc_send2name(port, - &dest->addr.name.name, - dest->addr.name.domain, - m->msg_iov, - total_len); - } else if (dest->addrtype == TIPC_ADDR_ID) { - res = tipc_send2port(port, - &dest->addr.id, - m->msg_iov, - total_len); - } else if (dest->addrtype == TIPC_ADDR_MCAST) { - if (needs_conn) { - res = -EOPNOTSUPP; - break; - } - res = dest_name_check(dest, m); - if (res) - break; - res = tipc_port_mcast_xmit(port, - &dest->addr.nameseq, - m->msg_iov, - total_len); + + if (dest->addrtype == TIPC_ADDR_MCAST) { + rc = tipc_sendmcast(sock, seq, iov, dsz, timeo); + goto exit; + } else if (dest->addrtype == TIPC_ADDR_NAME) { + u32 type = dest->addr.name.name.type; + u32 inst = dest->addr.name.name.instance; + u32 domain = dest->addr.name.domain; + + dnode = domain; + msg_set_type(mhdr, TIPC_NAMED_MSG); + msg_set_hdr_sz(mhdr, NAMED_H_SIZE); + msg_set_nametype(mhdr, type); + msg_set_nameinst(mhdr, inst); + msg_set_lookup_scope(mhdr, tipc_addr_scope(domain)); + dport = tipc_nametbl_translate(type, inst, &dnode); + msg_set_destnode(mhdr, dnode); + msg_set_destport(mhdr, dport); + if (unlikely(!dport && !dnode)) { + rc = -EHOSTUNREACH; + goto exit; } - if (likely(res != -ELINKCONG)) { - if (needs_conn && (res >= 0)) + } else if (dest->addrtype == TIPC_ADDR_ID) { + dnode = dest->addr.id.node; + msg_set_type(mhdr, TIPC_DIRECT_MSG); + msg_set_lookup_scope(mhdr, 0); + msg_set_destnode(mhdr, dnode); + msg_set_destport(mhdr, dest->addr.id.ref); + msg_set_hdr_sz(mhdr, BASIC_H_SIZE); + } + +new_mtu: + mtu = tipc_node_get_mtu(dnode, tsk->port.ref); + rc = tipc_msg_build2(mhdr, iov, 0, dsz, mtu, &buf); + if (rc < 0) + goto exit; + + do { + rc = tipc_link_xmit2(buf, dnode, tsk->port.ref); + if (likely(rc >= 0)) { + if (sock->state != SS_READY) sock->state = SS_CONNECTING; + rc = dsz; break; } - res = tipc_wait_for_sndmsg(sock, &timeo); - if (res) + if (rc == -EMSGSIZE) + goto new_mtu; + + if (rc != -ELINKCONG) break; - } while (1); + + rc = tipc_wait_for_sndmsg(sock, &timeo); + } while (!rc); exit: if (iocb) release_sock(sk); - return res; + + return rc; } static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; DEFINE_WAIT(wait); int done; @@ -709,37 +815,49 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); done = sk_wait_event(sk, timeo_p, - (!port->congested || !port->connected)); + (!tsk->link_cong && + !tipc_sk_conn_cong(tsk)) || + !tsk->port.connected); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; } /** - * tipc_send_packet - send a connection-oriented message - * @iocb: if NULL, indicates that socket lock is already held + * tipc_send_stream - send stream-oriented data + * @iocb: (unused) * @sock: socket structure - * @m: message to send - * @total_len: length of message + * @m: data to send + * @dsz: total length of data to be transmitted * - * Used for SOCK_SEQPACKET messages and SOCK_STREAM data. + * Used for SOCK_STREAM data. * - * Returns the number of bytes sent on success, or errno otherwise + * Returns the number of bytes sent on success (or partial success), + * or errno if no data sent */ -static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_port *port = &tsk->port; + struct tipc_msg *mhdr = &port->phdr; + struct sk_buff *buf; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - int res = -EINVAL; + u32 ref = port->ref; + int rc = -EINVAL; long timeo; + u32 dnode; + uint mtu, send, sent = 0; /* Handle implied connection establishment */ - if (unlikely(dest)) - return tipc_sendmsg(iocb, sock, m, total_len); - - if (total_len > TIPC_MAX_USER_MSG_SIZE) + if (unlikely(dest)) { + rc = tipc_sendmsg(iocb, sock, m, dsz); + if (dsz && (dsz == rc)) + tsk->sent_unacked = 1; + return rc; + } + if (dsz > (uint)INT_MAX) return -EMSGSIZE; if (iocb) @@ -747,123 +865,64 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, if (unlikely(sock->state != SS_CONNECTED)) { if (sock->state == SS_DISCONNECTING) - res = -EPIPE; + rc = -EPIPE; else - res = -ENOTCONN; + rc = -ENOTCONN; goto exit; } timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + dnode = tipc_port_peernode(port); + +next: + mtu = port->max_pkt; + send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); + rc = tipc_msg_build2(mhdr, m->msg_iov, sent, send, mtu, &buf); + if (unlikely(rc < 0)) + goto exit; do { - res = tipc_send(&tsk->port, m->msg_iov, total_len); - if (likely(res != -ELINKCONG)) - break; - res = tipc_wait_for_sndpkt(sock, &timeo); - if (res) - break; - } while (1); + if (likely(!tipc_sk_conn_cong(tsk))) { + rc = tipc_link_xmit2(buf, dnode, ref); + if (likely(!rc)) { + tsk->sent_unacked++; + sent += send; + if (sent == dsz) + break; + goto next; + } + if (rc == -EMSGSIZE) { + port->max_pkt = tipc_node_get_mtu(dnode, ref); + goto next; + } + if (rc != -ELINKCONG) + break; + } + rc = tipc_wait_for_sndpkt(sock, &timeo); + } while (!rc); exit: if (iocb) release_sock(sk); - return res; + return sent ? sent : rc; } /** - * tipc_send_stream - send stream-oriented data - * @iocb: (unused) + * tipc_send_packet - send a connection-oriented message + * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure - * @m: data to send - * @total_len: total length of data to be sent + * @m: message to send + * @dsz: length of data to be transmitted * - * Used for SOCK_STREAM data. + * Used for SOCK_SEQPACKET messages. * - * Returns the number of bytes sent on success (or partial success), - * or errno if no data sent + * Returns the number of bytes sent on success, or errno otherwise */ -static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t dsz) { - struct sock *sk = sock->sk; - struct tipc_sock *tsk = tipc_sk(sk); - struct msghdr my_msg; - struct iovec my_iov; - struct iovec *curr_iov; - int curr_iovlen; - char __user *curr_start; - u32 hdr_size; - int curr_left; - int bytes_to_send; - int bytes_sent; - int res; - - lock_sock(sk); - - /* Handle special cases where there is no connection */ - if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_UNCONNECTED) - res = tipc_send_packet(NULL, sock, m, total_len); - else - res = sock->state == SS_DISCONNECTING ? -EPIPE : -ENOTCONN; - goto exit; - } - - if (unlikely(m->msg_name)) { - res = -EISCONN; - goto exit; - } - - if (total_len > (unsigned int)INT_MAX) { - res = -EMSGSIZE; - goto exit; - } - - /* - * Send each iovec entry using one or more messages - * - * Note: This algorithm is good for the most likely case - * (i.e. one large iovec entry), but could be improved to pass sets - * of small iovec entries into send_packet(). - */ - curr_iov = m->msg_iov; - curr_iovlen = m->msg_iovlen; - my_msg.msg_iov = &my_iov; - my_msg.msg_iovlen = 1; - my_msg.msg_flags = m->msg_flags; - my_msg.msg_name = NULL; - bytes_sent = 0; - - hdr_size = msg_hdr_sz(&tsk->port.phdr); - - while (curr_iovlen--) { - curr_start = curr_iov->iov_base; - curr_left = curr_iov->iov_len; - - while (curr_left) { - bytes_to_send = tsk->port.max_pkt - hdr_size; - if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE) - bytes_to_send = TIPC_MAX_USER_MSG_SIZE; - if (curr_left < bytes_to_send) - bytes_to_send = curr_left; - my_iov.iov_base = curr_start; - my_iov.iov_len = bytes_to_send; - res = tipc_send_packet(NULL, sock, &my_msg, - bytes_to_send); - if (res < 0) { - if (bytes_sent) - res = bytes_sent; - goto exit; - } - curr_left -= bytes_to_send; - curr_start += bytes_to_send; - bytes_sent += bytes_to_send; - } + if (dsz > TIPC_MAX_USER_MSG_SIZE) + return -EMSGSIZE; - curr_iov++; - } - res = bytes_sent; -exit: - release_sock(sk); - return res; + return tipc_send_stream(iocb, sock, m, dsz); } /** @@ -1104,8 +1163,10 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { if ((sock->state != SS_READY) && - (++port->conn_unacked >= TIPC_CONNACK_INTV)) - tipc_acknowledge(port->ref, port->conn_unacked); + (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { + tipc_acknowledge(port->ref, tsk->rcv_unacked); + tsk->rcv_unacked = 0; + } advance_rx_queue(sk); } exit: @@ -1213,8 +1274,10 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { - if (unlikely(++port->conn_unacked >= TIPC_CONNACK_INTV)) - tipc_acknowledge(port->ref, port->conn_unacked); + if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { + tipc_acknowledge(port->ref, tsk->rcv_unacked); + tsk->rcv_unacked = 0; + } advance_rx_queue(sk); } @@ -1269,17 +1332,16 @@ static void tipc_data_ready(struct sock *sk) * @tsk: TIPC socket * @msg: message * - * Returns TIPC error status code and socket error status code - * once it encounters some errors + * Returns 0 (TIPC_OK) if everyting ok, -TIPC_ERR_NO_PORT otherwise */ -static u32 filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) +static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) { struct sock *sk = &tsk->sk; struct tipc_port *port = &tsk->port; struct socket *sock = sk->sk_socket; struct tipc_msg *msg = buf_msg(*buf); - u32 retval = TIPC_ERR_NO_PORT; + int retval = -TIPC_ERR_NO_PORT; int res; if (msg_mcast(msg)) @@ -1382,32 +1444,37 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) * * Called with socket lock already taken; port lock may also be taken. * - * Returns TIPC error status code (TIPC_OK if message is not to be rejected) + * Returns 0 (TIPC_OK) if message was consumed, -TIPC error code if message + * to be rejected, 1 (TIPC_FWD_MSG) if (CONN_MANAGER) message to be forwarded */ -static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) +static int filter_rcv(struct sock *sk, struct sk_buff *buf) { struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *msg = buf_msg(buf); unsigned int limit = rcvbuf_limit(sk, buf); - u32 res = TIPC_OK; + u32 onode; + int rc = TIPC_OK; + + if (unlikely(msg_user(msg) == CONN_MANAGER)) + return tipc_sk_proto_rcv(tsk, &onode, buf); /* Reject message if it is wrong sort of message for socket */ if (msg_type(msg) > TIPC_DIRECT_MSG) - return TIPC_ERR_NO_PORT; + return -TIPC_ERR_NO_PORT; if (sock->state == SS_READY) { if (msg_connected(msg)) - return TIPC_ERR_NO_PORT; + return -TIPC_ERR_NO_PORT; } else { - res = filter_connect(tsk, &buf); - if (res != TIPC_OK || buf == NULL) - return res; + rc = filter_connect(tsk, &buf); + if (rc != TIPC_OK || buf == NULL) + return rc; } /* Reject message if there isn't room to queue it */ if (sk_rmem_alloc_get(sk) + buf->truesize >= limit) - return TIPC_ERR_OVERLOAD; + return -TIPC_ERR_OVERLOAD; /* Enqueue message */ TIPC_SKB_CB(buf)->handle = NULL; @@ -1429,16 +1496,23 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) */ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf) { - u32 res; + int rc; + u32 onode; struct tipc_sock *tsk = tipc_sk(sk); uint truesize = buf->truesize; - res = filter_rcv(sk, buf); - if (unlikely(res)) - tipc_reject_msg(buf, res); + rc = filter_rcv(sk, buf); + + if (likely(!rc)) { + if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) + atomic_add(truesize, &tsk->dupl_rcvcnt); + return 0; + } - if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) - atomic_add(truesize, &tsk->dupl_rcvcnt); + if ((rc < 0) && !tipc_msg_reverse(buf, &onode, -rc)) + return 0; + + tipc_link_xmit2(buf, onode, 0); return 0; } @@ -1455,19 +1529,14 @@ int tipc_sk_rcv(struct sk_buff *buf) struct tipc_port *port; struct sock *sk; u32 dport = msg_destport(buf_msg(buf)); - int err = TIPC_OK; + int rc = TIPC_OK; uint limit; + u32 dnode; - /* Forward unresolved named message */ - if (unlikely(!dport)) { - tipc_net_route_msg(buf); - return 0; - } - - /* Validate destination */ + /* Validate destination and message */ port = tipc_port_lock(dport); if (unlikely(!port)) { - err = TIPC_ERR_NO_PORT; + rc = tipc_msg_eval(buf, &dnode); goto exit; } @@ -1478,23 +1547,25 @@ int tipc_sk_rcv(struct sk_buff *buf) bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { - err = filter_rcv(sk, buf); + rc = filter_rcv(sk, buf); } else { if (sk->sk_backlog.len == 0) atomic_set(&tsk->dupl_rcvcnt, 0); limit = rcvbuf_limit(sk, buf) + atomic_read(&tsk->dupl_rcvcnt); if (sk_add_backlog(sk, buf, limit)) - err = TIPC_ERR_OVERLOAD; + rc = -TIPC_ERR_OVERLOAD; } - bh_unlock_sock(sk); tipc_port_unlock(port); - if (likely(!err)) + if (likely(!rc)) return 0; exit: - tipc_reject_msg(buf, err); - return -EHOSTUNREACH; + if ((rc < 0) && !tipc_msg_reverse(buf, &dnode, -rc)) + return -EHOSTUNREACH; + + tipc_link_xmit2(buf, dnode, 0); + return (rc < 0) ? -EHOSTUNREACH : 0; } static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) @@ -1758,6 +1829,7 @@ static int tipc_shutdown(struct socket *sock, int how) struct tipc_sock *tsk = tipc_sk(sk); struct tipc_port *port = &tsk->port; struct sk_buff *buf; + u32 peer; int res; if (how != SHUT_RDWR) @@ -1778,7 +1850,8 @@ restart: goto restart; } tipc_port_disconnect(port->ref); - tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN); + if (tipc_msg_reverse(buf, &peer, TIPC_CONN_SHUTDOWN)) + tipc_link_xmit2(buf, peer, 0); } else { tipc_port_shutdown(port->ref); } diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 3afcd2a70b31..2cdede9eda1b 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -38,6 +38,9 @@ #include "port.h" #include <net/sock.h> +#define TIPC_CONN_OK 0 +#define TIPC_CONN_PROBING 1 + /** * struct tipc_sock - TIPC socket structure * @sk: socket - interacts with 'port' and with user via the socket API @@ -45,6 +48,9 @@ * @peer_name: the peer of the connection, if any * @conn_timeout: the time we can wait for an unresponded setup request * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue + * @link_cong: non-zero if owner must sleep because of link congestion + * @sent_unacked: # messages sent by socket, and not yet acked by peer + * @rcv_unacked: # messages read by user, but not yet acked back to peer */ struct tipc_sock { @@ -52,6 +58,9 @@ struct tipc_sock { struct tipc_port port; unsigned int conn_timeout; atomic_t dupl_rcvcnt; + int link_cong; + uint sent_unacked; + uint rcv_unacked; }; static inline struct tipc_sock *tipc_sk(const struct sock *sk) @@ -69,6 +78,11 @@ static inline void tipc_sock_wakeup(struct tipc_sock *tsk) tsk->sk.sk_write_space(&tsk->sk); } +static inline int tipc_sk_conn_cong(struct tipc_sock *tsk) +{ + return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN; +} + int tipc_sk_rcv(struct sk_buff *buf); #endif |