summaryrefslogtreecommitdiffstats
path: root/drivers/net/sfc
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2010-12-10 19:20:43 +0100
committerDavid S. Miller <davem@davemloft.net>2010-12-10 19:20:43 +0100
commitcf78f8ee3de7d8d5b47d371c95716d0e4facf1c4 (patch)
treeffd211dfe1d4f0d91fe10396b05e261865f62b61 /drivers/net/sfc
parentMerge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/lin... (diff)
parentsfc: Generalise filter spec initialisation (diff)
downloadlinux-cf78f8ee3de7d8d5b47d371c95716d0e4facf1c4.tar.xz
linux-cf78f8ee3de7d8d5b47d371c95716d0e4facf1c4.zip
Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/bwh/sfc-next-2.6
Diffstat (limited to 'drivers/net/sfc')
-rw-r--r--drivers/net/sfc/efx.h5
-rw-r--r--drivers/net/sfc/ethtool.c99
-rw-r--r--drivers/net/sfc/filter.c252
-rw-r--r--drivers/net/sfc/filter.h149
-rw-r--r--drivers/net/sfc/io.h153
-rw-r--r--drivers/net/sfc/net_driver.h57
-rw-r--r--drivers/net/sfc/nic.c42
-rw-r--r--drivers/net/sfc/tx.c17
8 files changed, 468 insertions, 306 deletions
diff --git a/drivers/net/sfc/efx.h b/drivers/net/sfc/efx.h
index 10a1bf40da96..003fdb35b4bb 100644
--- a/drivers/net/sfc/efx.h
+++ b/drivers/net/sfc/efx.h
@@ -74,9 +74,8 @@ extern int efx_filter_insert_filter(struct efx_nic *efx,
bool replace);
extern int efx_filter_remove_filter(struct efx_nic *efx,
struct efx_filter_spec *spec);
-extern void efx_filter_table_clear(struct efx_nic *efx,
- enum efx_filter_table_id table_id,
- enum efx_filter_priority priority);
+extern void efx_filter_clear_rx(struct efx_nic *efx,
+ enum efx_filter_priority priority);
/* Channels */
extern void efx_process_channel_now(struct efx_channel *channel);
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c
index aae756bf47ee..5e50e57b0ae2 100644
--- a/drivers/net/sfc/ethtool.c
+++ b/drivers/net/sfc/ethtool.c
@@ -11,6 +11,7 @@
#include <linux/netdevice.h>
#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
+#include <linux/in.h>
#include "net_driver.h"
#include "workarounds.h"
#include "selftest.h"
@@ -558,12 +559,8 @@ static int efx_ethtool_set_flags(struct net_device *net_dev, u32 data)
if (rc)
return rc;
- if (!(data & ETH_FLAG_NTUPLE)) {
- efx_filter_table_clear(efx, EFX_FILTER_TABLE_RX_IP,
- EFX_FILTER_PRI_MANUAL);
- efx_filter_table_clear(efx, EFX_FILTER_TABLE_RX_MAC,
- EFX_FILTER_PRI_MANUAL);
- }
+ if (!(data & ETH_FLAG_NTUPLE))
+ efx_filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL);
return 0;
}
@@ -582,6 +579,9 @@ static void efx_ethtool_self_test(struct net_device *net_dev,
goto fail1;
}
+ netif_info(efx, drv, efx->net_dev, "starting %sline testing\n",
+ (test->flags & ETH_TEST_FL_OFFLINE) ? "off" : "on");
+
/* We need rx buffers and interrupts. */
already_up = (efx->net_dev->flags & IFF_UP);
if (!already_up) {
@@ -600,9 +600,9 @@ static void efx_ethtool_self_test(struct net_device *net_dev,
if (!already_up)
dev_close(efx->net_dev);
- netif_dbg(efx, drv, efx->net_dev, "%s %sline self-tests\n",
- rc == 0 ? "passed" : "failed",
- (test->flags & ETH_TEST_FL_OFFLINE) ? "off" : "on");
+ netif_info(efx, drv, efx->net_dev, "%s %sline self-tests\n",
+ rc == 0 ? "passed" : "failed",
+ (test->flags & ETH_TEST_FL_OFFLINE) ? "off" : "on");
fail2:
fail1:
@@ -921,6 +921,7 @@ static int efx_ethtool_set_rx_ntuple(struct net_device *net_dev,
struct ethhdr *mac_entry = &ntuple->fs.h_u.ether_spec;
struct ethhdr *mac_mask = &ntuple->fs.m_u.ether_spec;
struct efx_filter_spec filter;
+ int rc;
/* Range-check action */
if (ntuple->fs.action < ETHTOOL_RXNTUPLE_ACTION_CLEAR ||
@@ -930,9 +931,16 @@ static int efx_ethtool_set_rx_ntuple(struct net_device *net_dev,
if (~ntuple->fs.data_mask)
return -EINVAL;
+ efx_filter_init_rx(&filter, EFX_FILTER_PRI_MANUAL, 0,
+ (ntuple->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP) ?
+ 0xfff : ntuple->fs.action);
+
switch (ntuple->fs.flow_type) {
case TCP_V4_FLOW:
- case UDP_V4_FLOW:
+ case UDP_V4_FLOW: {
+ u8 proto = (ntuple->fs.flow_type == TCP_V4_FLOW ?
+ IPPROTO_TCP : IPPROTO_UDP);
+
/* Must match all of destination, */
if (ip_mask->ip4dst | ip_mask->pdst)
return -EINVAL;
@@ -944,7 +952,22 @@ static int efx_ethtool_set_rx_ntuple(struct net_device *net_dev,
/* and nothing else */
if ((u8)~ip_mask->tos | (u16)~ntuple->fs.vlan_tag_mask)
return -EINVAL;
+
+ if (!ip_mask->ip4src)
+ rc = efx_filter_set_ipv4_full(&filter, proto,
+ ip_entry->ip4dst,
+ ip_entry->pdst,
+ ip_entry->ip4src,
+ ip_entry->psrc);
+ else
+ rc = efx_filter_set_ipv4_local(&filter, proto,
+ ip_entry->ip4dst,
+ ip_entry->pdst);
+ if (rc)
+ return rc;
break;
+ }
+
case ETHER_FLOW:
/* Must match all of destination, */
if (!is_zero_ether_addr(mac_mask->h_dest))
@@ -957,58 +980,24 @@ static int efx_ethtool_set_rx_ntuple(struct net_device *net_dev,
if (!is_broadcast_ether_addr(mac_mask->h_source) ||
mac_mask->h_proto != htons(0xffff))
return -EINVAL;
+
+ rc = efx_filter_set_eth_local(
+ &filter,
+ (ntuple->fs.vlan_tag_mask == 0xf000) ?
+ ntuple->fs.vlan_tag : EFX_FILTER_VID_UNSPEC,
+ mac_entry->h_dest);
+ if (rc)
+ return rc;
break;
+
default:
return -EINVAL;
}
- filter.priority = EFX_FILTER_PRI_MANUAL;
- filter.flags = 0;
-
- switch (ntuple->fs.flow_type) {
- case TCP_V4_FLOW:
- if (!ip_mask->ip4src)
- efx_filter_set_rx_tcp_full(&filter,
- htonl(ip_entry->ip4src),
- htons(ip_entry->psrc),
- htonl(ip_entry->ip4dst),
- htons(ip_entry->pdst));
- else
- efx_filter_set_rx_tcp_wild(&filter,
- htonl(ip_entry->ip4dst),
- htons(ip_entry->pdst));
- break;
- case UDP_V4_FLOW:
- if (!ip_mask->ip4src)
- efx_filter_set_rx_udp_full(&filter,
- htonl(ip_entry->ip4src),
- htons(ip_entry->psrc),
- htonl(ip_entry->ip4dst),
- htons(ip_entry->pdst));
- else
- efx_filter_set_rx_udp_wild(&filter,
- htonl(ip_entry->ip4dst),
- htons(ip_entry->pdst));
- break;
- case ETHER_FLOW:
- if (ntuple->fs.vlan_tag_mask == 0xf000)
- efx_filter_set_rx_mac_full(&filter,
- ntuple->fs.vlan_tag & 0xfff,
- mac_entry->h_dest);
- else
- efx_filter_set_rx_mac_wild(&filter, mac_entry->h_dest);
- break;
- }
-
- if (ntuple->fs.action == ETHTOOL_RXNTUPLE_ACTION_CLEAR) {
+ if (ntuple->fs.action == ETHTOOL_RXNTUPLE_ACTION_CLEAR)
return efx_filter_remove_filter(efx, &filter);
- } else {
- if (ntuple->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP)
- filter.dmaq_id = 0xfff;
- else
- filter.dmaq_id = ntuple->fs.action;
+ else
return efx_filter_insert_filter(efx, &filter, true);
- }
}
static int efx_ethtool_get_rxfh_indir(struct net_device *net_dev,
diff --git a/drivers/net/sfc/filter.c b/drivers/net/sfc/filter.c
index 44500b54fd5f..d4722c41c4ce 100644
--- a/drivers/net/sfc/filter.c
+++ b/drivers/net/sfc/filter.c
@@ -7,6 +7,7 @@
* by the Free Software Foundation, incorporated herein by reference.
*/
+#include <linux/in.h>
#include "efx.h"
#include "filter.h"
#include "io.h"
@@ -26,19 +27,26 @@
*/
#define FILTER_CTL_SRCH_MAX 200
+enum efx_filter_table_id {
+ EFX_FILTER_TABLE_RX_IP = 0,
+ EFX_FILTER_TABLE_RX_MAC,
+ EFX_FILTER_TABLE_COUNT,
+};
+
struct efx_filter_table {
+ enum efx_filter_table_id id;
u32 offset; /* address of table relative to BAR */
unsigned size; /* number of entries */
unsigned step; /* step between entries */
unsigned used; /* number currently used */
unsigned long *used_bitmap;
struct efx_filter_spec *spec;
+ unsigned search_depth[EFX_FILTER_TYPE_COUNT];
};
struct efx_filter_state {
spinlock_t lock;
struct efx_filter_table table[EFX_FILTER_TABLE_COUNT];
- unsigned search_depth[EFX_FILTER_TYPE_COUNT];
};
/* The filter hash function is LFSR polynomial x^16 + x^3 + 1 of a 32-bit
@@ -65,68 +73,203 @@ static u16 efx_filter_increment(u32 key)
}
static enum efx_filter_table_id
-efx_filter_type_table_id(enum efx_filter_type type)
+efx_filter_spec_table_id(const struct efx_filter_spec *spec)
+{
+ BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_TCP_FULL >> 2));
+ BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_TCP_WILD >> 2));
+ BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_UDP_FULL >> 2));
+ BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_UDP_WILD >> 2));
+ BUILD_BUG_ON(EFX_FILTER_TABLE_RX_MAC != (EFX_FILTER_MAC_FULL >> 2));
+ BUILD_BUG_ON(EFX_FILTER_TABLE_RX_MAC != (EFX_FILTER_MAC_WILD >> 2));
+ EFX_BUG_ON_PARANOID(spec->type == EFX_FILTER_UNSPEC);
+ return spec->type >> 2;
+}
+
+static struct efx_filter_table *
+efx_filter_spec_table(struct efx_filter_state *state,
+ const struct efx_filter_spec *spec)
{
- BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_RX_TCP_FULL >> 2));
- BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_RX_TCP_WILD >> 2));
- BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_RX_UDP_FULL >> 2));
- BUILD_BUG_ON(EFX_FILTER_TABLE_RX_IP != (EFX_FILTER_RX_UDP_WILD >> 2));
- BUILD_BUG_ON(EFX_FILTER_TABLE_RX_MAC != (EFX_FILTER_RX_MAC_FULL >> 2));
- BUILD_BUG_ON(EFX_FILTER_TABLE_RX_MAC != (EFX_FILTER_RX_MAC_WILD >> 2));
- return type >> 2;
+ if (spec->type == EFX_FILTER_UNSPEC)
+ return NULL;
+ else
+ return &state->table[efx_filter_spec_table_id(spec)];
}
-static void
-efx_filter_table_reset_search_depth(struct efx_filter_state *state,
- enum efx_filter_table_id table_id)
+static void efx_filter_table_reset_search_depth(struct efx_filter_table *table)
{
- memset(state->search_depth + (table_id << 2), 0,
- sizeof(state->search_depth[0]) << 2);
+ memset(table->search_depth, 0, sizeof(table->search_depth));
}
static void efx_filter_push_rx_limits(struct efx_nic *efx)
{
struct efx_filter_state *state = efx->filter_state;
+ struct efx_filter_table *table;
efx_oword_t filter_ctl;
efx_reado(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL);
+ table = &state->table[EFX_FILTER_TABLE_RX_IP];
EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_TCP_FULL_SRCH_LIMIT,
- state->search_depth[EFX_FILTER_RX_TCP_FULL] +
+ table->search_depth[EFX_FILTER_TCP_FULL] +
FILTER_CTL_SRCH_FUDGE_FULL);
EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_TCP_WILD_SRCH_LIMIT,
- state->search_depth[EFX_FILTER_RX_TCP_WILD] +
+ table->search_depth[EFX_FILTER_TCP_WILD] +
FILTER_CTL_SRCH_FUDGE_WILD);
EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_UDP_FULL_SRCH_LIMIT,
- state->search_depth[EFX_FILTER_RX_UDP_FULL] +
+ table->search_depth[EFX_FILTER_UDP_FULL] +
FILTER_CTL_SRCH_FUDGE_FULL);
EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_UDP_WILD_SRCH_LIMIT,
- state->search_depth[EFX_FILTER_RX_UDP_WILD] +
+ table->search_depth[EFX_FILTER_UDP_WILD] +
FILTER_CTL_SRCH_FUDGE_WILD);
- if (state->table[EFX_FILTER_TABLE_RX_MAC].size) {
+ table = &state->table[EFX_FILTER_TABLE_RX_MAC];
+ if (table->size) {
EFX_SET_OWORD_FIELD(
filter_ctl, FRF_CZ_ETHERNET_FULL_SEARCH_LIMIT,
- state->search_depth[EFX_FILTER_RX_MAC_FULL] +
+ table->search_depth[EFX_FILTER_MAC_FULL] +
FILTER_CTL_SRCH_FUDGE_FULL);
EFX_SET_OWORD_FIELD(
filter_ctl, FRF_CZ_ETHERNET_WILDCARD_SEARCH_LIMIT,
- state->search_depth[EFX_FILTER_RX_MAC_WILD] +
+ table->search_depth[EFX_FILTER_MAC_WILD] +
FILTER_CTL_SRCH_FUDGE_WILD);
}
efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL);
}
+static inline void __efx_filter_set_ipv4(struct efx_filter_spec *spec,
+ __be32 host1, __be16 port1,
+ __be32 host2, __be16 port2)
+{
+ spec->data[0] = ntohl(host1) << 16 | ntohs(port1);
+ spec->data[1] = ntohs(port2) << 16 | ntohl(host1) >> 16;
+ spec->data[2] = ntohl(host2);
+}
+
+/**
+ * efx_filter_set_ipv4_local - specify IPv4 host, transport protocol and port
+ * @spec: Specification to initialise
+ * @proto: Transport layer protocol number
+ * @host: Local host address (network byte order)
+ * @port: Local port (network byte order)
+ */
+int efx_filter_set_ipv4_local(struct efx_filter_spec *spec, u8 proto,
+ __be32 host, __be16 port)
+{
+ __be32 host1;
+ __be16 port1;
+
+ EFX_BUG_ON_PARANOID(!(spec->flags & EFX_FILTER_FLAG_RX));
+
+ /* This cannot currently be combined with other filtering */
+ if (spec->type != EFX_FILTER_UNSPEC)
+ return -EPROTONOSUPPORT;
+
+ if (port == 0)
+ return -EINVAL;
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ spec->type = EFX_FILTER_TCP_WILD;
+ break;
+ case IPPROTO_UDP:
+ spec->type = EFX_FILTER_UDP_WILD;
+ break;
+ default:
+ return -EPROTONOSUPPORT;
+ }
+
+ /* Filter is constructed in terms of source and destination,
+ * with the odd wrinkle that the ports are swapped in a UDP
+ * wildcard filter. We need to convert from local and remote
+ * (= zero for wildcard) addresses.
+ */
+ host1 = 0;
+ if (proto != IPPROTO_UDP) {
+ port1 = 0;
+ } else {
+ port1 = port;
+ port = 0;
+ }
+
+ __efx_filter_set_ipv4(spec, host1, port1, host, port);
+ return 0;
+}
+
+/**
+ * efx_filter_set_ipv4_full - specify IPv4 hosts, transport protocol and ports
+ * @spec: Specification to initialise
+ * @proto: Transport layer protocol number
+ * @host: Local host address (network byte order)
+ * @port: Local port (network byte order)
+ * @rhost: Remote host address (network byte order)
+ * @rport: Remote port (network byte order)
+ */
+int efx_filter_set_ipv4_full(struct efx_filter_spec *spec, u8 proto,
+ __be32 host, __be16 port,
+ __be32 rhost, __be16 rport)
+{
+ EFX_BUG_ON_PARANOID(!(spec->flags & EFX_FILTER_FLAG_RX));
+
+ /* This cannot currently be combined with other filtering */
+ if (spec->type != EFX_FILTER_UNSPEC)
+ return -EPROTONOSUPPORT;
+
+ if (port == 0 || rport == 0)
+ return -EINVAL;
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ spec->type = EFX_FILTER_TCP_FULL;
+ break;
+ case IPPROTO_UDP:
+ spec->type = EFX_FILTER_UDP_FULL;
+ break;
+ default:
+ return -EPROTONOSUPPORT;
+ }
+
+ __efx_filter_set_ipv4(spec, rhost, rport, host, port);
+ return 0;
+}
+
+/**
+ * efx_filter_set_eth_local - specify local Ethernet address and optional VID
+ * @spec: Specification to initialise
+ * @vid: VLAN ID to match, or %EFX_FILTER_VID_UNSPEC
+ * @addr: Local Ethernet MAC address
+ */
+int efx_filter_set_eth_local(struct efx_filter_spec *spec,
+ u16 vid, const u8 *addr)
+{
+ EFX_BUG_ON_PARANOID(!(spec->flags & EFX_FILTER_FLAG_RX));
+
+ /* This cannot currently be combined with other filtering */
+ if (spec->type != EFX_FILTER_UNSPEC)
+ return -EPROTONOSUPPORT;
+
+ if (vid == EFX_FILTER_VID_UNSPEC) {
+ spec->type = EFX_FILTER_MAC_WILD;
+ spec->data[0] = 0;
+ } else {
+ spec->type = EFX_FILTER_MAC_FULL;
+ spec->data[0] = vid;
+ }
+
+ spec->data[1] = addr[2] << 24 | addr[3] << 16 | addr[4] << 8 | addr[5];
+ spec->data[2] = addr[0] << 8 | addr[1];
+ return 0;
+}
+
/* Build a filter entry and return its n-tuple key. */
static u32 efx_filter_build(efx_oword_t *filter, struct efx_filter_spec *spec)
{
u32 data3;
- switch (efx_filter_type_table_id(spec->type)) {
+ switch (efx_filter_spec_table_id(spec)) {
case EFX_FILTER_TABLE_RX_IP: {
- bool is_udp = (spec->type == EFX_FILTER_RX_UDP_FULL ||
- spec->type == EFX_FILTER_RX_UDP_WILD);
+ bool is_udp = (spec->type == EFX_FILTER_UDP_FULL ||
+ spec->type == EFX_FILTER_UDP_WILD);
EFX_POPULATE_OWORD_7(
*filter,
FRF_BZ_RSS_EN,
@@ -143,7 +286,7 @@ static u32 efx_filter_build(efx_oword_t *filter, struct efx_filter_spec *spec)
}
case EFX_FILTER_TABLE_RX_MAC: {
- bool is_wild = spec->type == EFX_FILTER_RX_MAC_WILD;
+ bool is_wild = spec->type == EFX_FILTER_MAC_WILD;
EFX_POPULATE_OWORD_8(
*filter,
FRF_CZ_RMFT_RSS_EN,
@@ -206,6 +349,14 @@ found:
return filter_idx;
}
+/* Construct/deconstruct external filter IDs */
+
+static inline int
+efx_filter_make_id(enum efx_filter_table_id table_id, unsigned index)
+{
+ return table_id << 16 | index;
+}
+
/**
* efx_filter_insert_filter - add or replace a filter
* @efx: NIC in which to insert the filter
@@ -213,30 +364,28 @@ found:
* @replace: Flag for whether the specified filter may replace a filter
* with an identical match expression and equal or lower priority
*
- * On success, return the filter index within its table.
+ * On success, return the filter ID.
* On failure, return a negative error code.
*/
int efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec,
bool replace)
{
struct efx_filter_state *state = efx->filter_state;
- enum efx_filter_table_id table_id =
- efx_filter_type_table_id(spec->type);
- struct efx_filter_table *table = &state->table[table_id];
+ struct efx_filter_table *table = efx_filter_spec_table(state, spec);
struct efx_filter_spec *saved_spec;
efx_oword_t filter;
int filter_idx, depth;
u32 key;
int rc;
- if (table->size == 0)
+ if (!table || table->size == 0)
return -EINVAL;
key = efx_filter_build(&filter, spec);
netif_vdbg(efx, hw, efx->net_dev,
"%s: type %d search_depth=%d", __func__, spec->type,
- state->search_depth[spec->type]);
+ table->search_depth[spec->type]);
spin_lock_bh(&state->lock);
@@ -263,8 +412,8 @@ int efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec,
}
*saved_spec = *spec;
- if (state->search_depth[spec->type] < depth) {
- state->search_depth[spec->type] = depth;
+ if (table->search_depth[spec->type] < depth) {
+ table->search_depth[spec->type] = depth;
efx_filter_push_rx_limits(efx);
}
@@ -273,6 +422,7 @@ int efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec,
netif_vdbg(efx, hw, efx->net_dev,
"%s: filter type %d index %d rxq %u set",
__func__, spec->type, filter_idx, spec->dmaq_id);
+ rc = efx_filter_make_id(table->id, filter_idx);
out:
spin_unlock_bh(&state->lock);
@@ -306,15 +456,16 @@ static void efx_filter_table_clear_entry(struct efx_nic *efx,
int efx_filter_remove_filter(struct efx_nic *efx, struct efx_filter_spec *spec)
{
struct efx_filter_state *state = efx->filter_state;
- enum efx_filter_table_id table_id =
- efx_filter_type_table_id(spec->type);
- struct efx_filter_table *table = &state->table[table_id];
+ struct efx_filter_table *table = efx_filter_spec_table(state, spec);
struct efx_filter_spec *saved_spec;
efx_oword_t filter;
int filter_idx, depth;
u32 key;
int rc;
+ if (!table)
+ return -EINVAL;
+
key = efx_filter_build(&filter, spec);
spin_lock_bh(&state->lock);
@@ -332,7 +483,7 @@ int efx_filter_remove_filter(struct efx_nic *efx, struct efx_filter_spec *spec)
efx_filter_table_clear_entry(efx, table, filter_idx);
if (table->used == 0)
- efx_filter_table_reset_search_depth(state, table_id);
+ efx_filter_table_reset_search_depth(table);
rc = 0;
out:
@@ -340,15 +491,9 @@ out:
return rc;
}
-/**
- * efx_filter_table_clear - remove filters from a table by priority
- * @efx: NIC from which to remove the filters
- * @table_id: Table from which to remove the filters
- * @priority: Maximum priority to remove
- */
-void efx_filter_table_clear(struct efx_nic *efx,
- enum efx_filter_table_id table_id,
- enum efx_filter_priority priority)
+static void efx_filter_table_clear(struct efx_nic *efx,
+ enum efx_filter_table_id table_id,
+ enum efx_filter_priority priority)
{
struct efx_filter_state *state = efx->filter_state;
struct efx_filter_table *table = &state->table[table_id];
@@ -360,11 +505,22 @@ void efx_filter_table_clear(struct efx_nic *efx,
if (table->spec[filter_idx].priority <= priority)
efx_filter_table_clear_entry(efx, table, filter_idx);
if (table->used == 0)
- efx_filter_table_reset_search_depth(state, table_id);
+ efx_filter_table_reset_search_depth(table);
spin_unlock_bh(&state->lock);
}
+/**
+ * efx_filter_clear_rx - remove RX filters by priority
+ * @efx: NIC from which to remove the filters
+ * @priority: Maximum priority to remove
+ */
+void efx_filter_clear_rx(struct efx_nic *efx, enum efx_filter_priority priority)
+{
+ efx_filter_table_clear(efx, EFX_FILTER_TABLE_RX_IP, priority);
+ efx_filter_table_clear(efx, EFX_FILTER_TABLE_RX_MAC, priority);
+}
+
/* Restore filter stater after reset */
void efx_restore_filters(struct efx_nic *efx)
{
@@ -407,6 +563,7 @@ int efx_probe_filters(struct efx_nic *efx)
if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
table = &state->table[EFX_FILTER_TABLE_RX_IP];
+ table->id = EFX_FILTER_TABLE_RX_IP;
table->offset = FR_BZ_RX_FILTER_TBL0;
table->size = FR_BZ_RX_FILTER_TBL0_ROWS;
table->step = FR_BZ_RX_FILTER_TBL0_STEP;
@@ -414,6 +571,7 @@ int efx_probe_filters(struct efx_nic *efx)
if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0) {
table = &state->table[EFX_FILTER_TABLE_RX_MAC];
+ table->id = EFX_FILTER_TABLE_RX_MAC;
table->offset = FR_CZ_RX_MAC_FILTER_TBL0;
table->size = FR_CZ_RX_MAC_FILTER_TBL0_ROWS;
table->step = FR_CZ_RX_MAC_FILTER_TBL0_STEP;
diff --git a/drivers/net/sfc/filter.h b/drivers/net/sfc/filter.h
index a53319ded79c..872f2132a496 100644
--- a/drivers/net/sfc/filter.h
+++ b/drivers/net/sfc/filter.h
@@ -12,31 +12,27 @@
#include <linux/types.h>
-enum efx_filter_table_id {
- EFX_FILTER_TABLE_RX_IP = 0,
- EFX_FILTER_TABLE_RX_MAC,
- EFX_FILTER_TABLE_COUNT,
-};
-
/**
* enum efx_filter_type - type of hardware filter
- * @EFX_FILTER_RX_TCP_FULL: RX, matching TCP/IPv4 4-tuple
- * @EFX_FILTER_RX_TCP_WILD: RX, matching TCP/IPv4 destination (host, port)
- * @EFX_FILTER_RX_UDP_FULL: RX, matching UDP/IPv4 4-tuple
- * @EFX_FILTER_RX_UDP_WILD: RX, matching UDP/IPv4 destination (host, port)
- * @EFX_FILTER_RX_MAC_FULL: RX, matching Ethernet destination MAC address, VID
- * @EFX_FILTER_RX_MAC_WILD: RX, matching Ethernet destination MAC address
+ * @EFX_FILTER_TCP_FULL: Matching TCP/IPv4 4-tuple
+ * @EFX_FILTER_TCP_WILD: Matching TCP/IPv4 destination (host, port)
+ * @EFX_FILTER_UDP_FULL: Matching UDP/IPv4 4-tuple
+ * @EFX_FILTER_UDP_WILD: Matching UDP/IPv4 destination (host, port)
+ * @EFX_FILTER_MAC_FULL: Matching Ethernet destination MAC address, VID
+ * @EFX_FILTER_MAC_WILD: Matching Ethernet destination MAC address
+ * @EFX_FILTER_UNSPEC: Match type is unspecified
*
- * Falcon NICs only support the RX TCP/IPv4 and UDP/IPv4 filter types.
+ * Falcon NICs only support the TCP/IPv4 and UDP/IPv4 filter types.
*/
enum efx_filter_type {
- EFX_FILTER_RX_TCP_FULL = 0,
- EFX_FILTER_RX_TCP_WILD,
- EFX_FILTER_RX_UDP_FULL,
- EFX_FILTER_RX_UDP_WILD,
- EFX_FILTER_RX_MAC_FULL = 4,
- EFX_FILTER_RX_MAC_WILD,
- EFX_FILTER_TYPE_COUNT,
+ EFX_FILTER_TCP_FULL = 0,
+ EFX_FILTER_TCP_WILD,
+ EFX_FILTER_UDP_FULL,
+ EFX_FILTER_UDP_WILD,
+ EFX_FILTER_MAC_FULL = 4,
+ EFX_FILTER_MAC_WILD,
+ EFX_FILTER_TYPE_COUNT, /* number of specific types */
+ EFX_FILTER_UNSPEC = 0xf,
};
/**
@@ -63,13 +59,13 @@ enum efx_filter_priority {
* @EFX_FILTER_FLAG_RX_OVERRIDE_IP: Enables a MAC filter to override
* any IP filter that matches the same packet. By default, IP
* filters take precedence.
- *
- * Currently, no flags are defined for TX filters.
+ * @EFX_FILTER_FLAG_RX: Filter is for RX
*/
enum efx_filter_flags {
EFX_FILTER_FLAG_RX_RSS = 0x01,
EFX_FILTER_FLAG_RX_SCATTER = 0x02,
EFX_FILTER_FLAG_RX_OVERRIDE_IP = 0x04,
+ EFX_FILTER_FLAG_RX = 0x08,
};
/**
@@ -91,99 +87,26 @@ struct efx_filter_spec {
u32 data[3];
};
-/**
- * efx_filter_set_rx_tcp_full - specify RX filter with TCP/IPv4 full match
- * @spec: Specification to initialise
- * @shost: Source host address (host byte order)
- * @sport: Source port (host byte order)
- * @dhost: Destination host address (host byte order)
- * @dport: Destination port (host byte order)
- */
-static inline void
-efx_filter_set_rx_tcp_full(struct efx_filter_spec *spec,
- u32 shost, u16 sport, u32 dhost, u16 dport)
-{
- spec->type = EFX_FILTER_RX_TCP_FULL;
- spec->data[0] = sport | shost << 16;
- spec->data[1] = dport << 16 | shost >> 16;
- spec->data[2] = dhost;
-}
-
-/**
- * efx_filter_set_rx_tcp_wild - specify RX filter with TCP/IPv4 wildcard match
- * @spec: Specification to initialise
- * @dhost: Destination host address (host byte order)
- * @dport: Destination port (host byte order)
- */
-static inline void
-efx_filter_set_rx_tcp_wild(struct efx_filter_spec *spec, u32 dhost, u16 dport)
-{
- spec->type = EFX_FILTER_RX_TCP_WILD;
- spec->data[0] = 0;
- spec->data[1] = dport << 16;
- spec->data[2] = dhost;
-}
-
-/**
- * efx_filter_set_rx_udp_full - specify RX filter with UDP/IPv4 full match
- * @spec: Specification to initialise
- * @shost: Source host address (host byte order)
- * @sport: Source port (host byte order)
- * @dhost: Destination host address (host byte order)
- * @dport: Destination port (host byte order)
- */
-static inline void
-efx_filter_set_rx_udp_full(struct efx_filter_spec *spec,
- u32 shost, u16 sport, u32 dhost, u16 dport)
-{
- spec->type = EFX_FILTER_RX_UDP_FULL;
- spec->data[0] = sport | shost << 16;
- spec->data[1] = dport << 16 | shost >> 16;
- spec->data[2] = dhost;
-}
-
-/**
- * efx_filter_set_rx_udp_wild - specify RX filter with UDP/IPv4 wildcard match
- * @spec: Specification to initialise
- * @dhost: Destination host address (host byte order)
- * @dport: Destination port (host byte order)
- */
-static inline void
-efx_filter_set_rx_udp_wild(struct efx_filter_spec *spec, u32 dhost, u16 dport)
+static inline void efx_filter_init_rx(struct efx_filter_spec *spec,
+ enum efx_filter_priority priority,
+ enum efx_filter_flags flags,
+ unsigned rxq_id)
{
- spec->type = EFX_FILTER_RX_UDP_WILD;
- spec->data[0] = dport;
- spec->data[1] = 0;
- spec->data[2] = dhost;
+ spec->type = EFX_FILTER_UNSPEC;
+ spec->priority = priority;
+ spec->flags = EFX_FILTER_FLAG_RX | flags;
+ spec->dmaq_id = rxq_id;
}
-/**
- * efx_filter_set_rx_mac_full - specify RX filter with MAC full match
- * @spec: Specification to initialise
- * @vid: VLAN ID
- * @addr: Destination MAC address
- */
-static inline void efx_filter_set_rx_mac_full(struct efx_filter_spec *spec,
- u16 vid, const u8 *addr)
-{
- spec->type = EFX_FILTER_RX_MAC_FULL;
- spec->data[0] = vid;
- spec->data[1] = addr[2] << 24 | addr[3] << 16 | addr[4] << 8 | addr[5];
- spec->data[2] = addr[0] << 8 | addr[1];
-}
-
-/**
- * efx_filter_set_rx_mac_full - specify RX filter with MAC wildcard match
- * @spec: Specification to initialise
- * @addr: Destination MAC address
- */
-static inline void efx_filter_set_rx_mac_wild(struct efx_filter_spec *spec,
- const u8 *addr)
-{
- spec->type = EFX_FILTER_RX_MAC_WILD;
- spec->data[0] = 0;
- spec->data[1] = addr[2] << 24 | addr[3] << 16 | addr[4] << 8 | addr[5];
- spec->data[2] = addr[0] << 8 | addr[1];
-}
+extern int efx_filter_set_ipv4_local(struct efx_filter_spec *spec, u8 proto,
+ __be32 host, __be16 port);
+extern int efx_filter_set_ipv4_full(struct efx_filter_spec *spec, u8 proto,
+ __be32 host, __be16 port,
+ __be32 rhost, __be16 rport);
+extern int efx_filter_set_eth_local(struct efx_filter_spec *spec,
+ u16 vid, const u8 *addr);
+enum {
+ EFX_FILTER_VID_UNSPEC = 0xffff,
+};
#endif /* EFX_FILTER_H */
diff --git a/drivers/net/sfc/io.h b/drivers/net/sfc/io.h
index 85a99fe87437..6da4ae20a039 100644
--- a/drivers/net/sfc/io.h
+++ b/drivers/net/sfc/io.h
@@ -22,28 +22,39 @@
*
* Notes on locking strategy:
*
- * Most NIC registers require 16-byte (or 8-byte, for SRAM) atomic writes
- * which necessitates locking.
- * Under normal operation few writes to NIC registers are made and these
- * registers (EVQ_RPTR_REG, RX_DESC_UPD_REG and TX_DESC_UPD_REG) are special
- * cased to allow 4-byte (hence lockless) accesses.
+ * Most CSRs are 128-bit (oword) and therefore cannot be read or
+ * written atomically. Access from the host is buffered by the Bus
+ * Interface Unit (BIU). Whenever the host reads from the lowest
+ * address of such a register, or from the address of a different such
+ * register, the BIU latches the register's value. Subsequent reads
+ * from higher addresses of the same register will read the latched
+ * value. Whenever the host writes part of such a register, the BIU
+ * collects the written value and does not write to the underlying
+ * register until all 4 dwords have been written. A similar buffering
+ * scheme applies to host access to the NIC's 64-bit SRAM.
*
- * It *is* safe to write to these 4-byte registers in the middle of an
- * access to an 8-byte or 16-byte register. We therefore use a
- * spinlock to protect accesses to the larger registers, but no locks
- * for the 4-byte registers.
+ * Access to different CSRs and 64-bit SRAM words must be serialised,
+ * since interleaved access can result in lost writes or lost
+ * information from read-to-clear fields. We use efx_nic::biu_lock
+ * for this. (We could use separate locks for read and write, but
+ * this is not normally a performance bottleneck.)
*
- * A write barrier is needed to ensure that DW3 is written after DW0/1/2
- * due to the way the 16byte registers are "collected" in the BIU.
+ * The DMA descriptor pointers (RX_DESC_UPD and TX_DESC_UPD) are
+ * 128-bit but are special-cased in the BIU to avoid the need for
+ * locking in the host:
*
- * We also lock when carrying out reads, to ensure consistency of the
- * data (made possible since the BIU reads all 128 bits into a cache).
- * Reads are very rare, so this isn't a significant performance
- * impact. (Most data transferred from NIC to host is DMAed directly
- * into host memory).
- *
- * I/O BAR access uses locks for both reads and writes (but is only provided
- * for testing purposes).
+ * - They are write-only.
+ * - The semantics of writing to these registers are such that
+ * replacing the low 96 bits with zero does not affect functionality.
+ * - If the host writes to the last dword address of such a register
+ * (i.e. the high 32 bits) the underlying register will always be
+ * written. If the collector does not hold values for the low 96
+ * bits of the register, they will be written as zero. Writing to
+ * the last qword does not have this effect and must not be done.
+ * - If the host writes to the address of any other part of such a
+ * register while the collector already holds values for some other
+ * register, the write is discarded and the collector maintains its
+ * current state.
*/
#if BITS_PER_LONG == 64
@@ -72,7 +83,7 @@ static inline __le32 _efx_readd(struct efx_nic *efx, unsigned int reg)
return (__force __le32)__raw_readl(efx->membase + reg);
}
-/* Writes to a normal 16-byte Efx register, locking as appropriate. */
+/* Write a normal 128-bit CSR, locking as appropriate. */
static inline void efx_writeo(struct efx_nic *efx, efx_oword_t *value,
unsigned int reg)
{
@@ -85,21 +96,18 @@ static inline void efx_writeo(struct efx_nic *efx, efx_oword_t *value,
spin_lock_irqsave(&efx->biu_lock, flags);
#ifdef EFX_USE_QWORD_IO
_efx_writeq(efx, value->u64[0], reg + 0);
- wmb();
_efx_writeq(efx, value->u64[1], reg + 8);
#else
_efx_writed(efx, value->u32[0], reg + 0);
_efx_writed(efx, value->u32[1], reg + 4);
_efx_writed(efx, value->u32[2], reg + 8);
- wmb();
_efx_writed(efx, value->u32[3], reg + 12);
#endif
mmiowb();
spin_unlock_irqrestore(&efx->biu_lock, flags);
}
-/* Write an 8-byte NIC SRAM entry through the supplied mapping,
- * locking as appropriate. */
+/* Write 64-bit SRAM through the supplied mapping, locking as appropriate. */
static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase,
efx_qword_t *value, unsigned int index)
{
@@ -115,36 +123,25 @@ static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase,
__raw_writeq((__force u64)value->u64[0], membase + addr);
#else
__raw_writel((__force u32)value->u32[0], membase + addr);
- wmb();
__raw_writel((__force u32)value->u32[1], membase + addr + 4);
#endif
mmiowb();
spin_unlock_irqrestore(&efx->biu_lock, flags);
}
-/* Write dword to NIC register that allows partial writes
- *
- * Some registers (EVQ_RPTR_REG, RX_DESC_UPD_REG and
- * TX_DESC_UPD_REG) can be written to as a single dword. This allows
- * for lockless writes.
- */
+/* Write a 32-bit CSR or the last dword of a special 128-bit CSR */
static inline void efx_writed(struct efx_nic *efx, efx_dword_t *value,
unsigned int reg)
{
netif_vdbg(efx, hw, efx->net_dev,
- "writing partial register %x with "EFX_DWORD_FMT"\n",
+ "writing register %x with "EFX_DWORD_FMT"\n",
reg, EFX_DWORD_VAL(*value));
/* No lock required */
_efx_writed(efx, value->u32[0], reg);
}
-/* Read from a NIC register
- *
- * This reads an entire 16-byte register in one go, locking as
- * appropriate. It is essential to read the first dword first, as this
- * prompts the NIC to load the current value into the shadow register.
- */
+/* Read a 128-bit CSR, locking as appropriate. */
static inline void efx_reado(struct efx_nic *efx, efx_oword_t *value,
unsigned int reg)
{
@@ -152,7 +149,6 @@ static inline void efx_reado(struct efx_nic *efx, efx_oword_t *value,
spin_lock_irqsave(&efx->biu_lock, flags);
value->u32[0] = _efx_readd(efx, reg + 0);
- rmb();
value->u32[1] = _efx_readd(efx, reg + 4);
value->u32[2] = _efx_readd(efx, reg + 8);
value->u32[3] = _efx_readd(efx, reg + 12);
@@ -163,8 +159,7 @@ static inline void efx_reado(struct efx_nic *efx, efx_oword_t *value,
EFX_OWORD_VAL(*value));
}
-/* Read an 8-byte SRAM entry through supplied mapping,
- * locking as appropriate. */
+/* Read 64-bit SRAM through the supplied mapping, locking as appropriate. */
static inline void efx_sram_readq(struct efx_nic *efx, void __iomem *membase,
efx_qword_t *value, unsigned int index)
{
@@ -176,7 +171,6 @@ static inline void efx_sram_readq(struct efx_nic *efx, void __iomem *membase,
value->u64[0] = (__force __le64)__raw_readq(membase + addr);
#else
value->u32[0] = (__force __le32)__raw_readl(membase + addr);
- rmb();
value->u32[1] = (__force __le32)__raw_readl(membase + addr + 4);
#endif
spin_unlock_irqrestore(&efx->biu_lock, flags);
@@ -186,7 +180,7 @@ static inline void efx_sram_readq(struct efx_nic *efx, void __iomem *membase,
addr, EFX_QWORD_VAL(*value));
}
-/* Read dword from register that allows partial writes (sic) */
+/* Read a 32-bit CSR or SRAM */
static inline void efx_readd(struct efx_nic *efx, efx_dword_t *value,
unsigned int reg)
{
@@ -196,28 +190,28 @@ static inline void efx_readd(struct efx_nic *efx, efx_dword_t *value,
reg, EFX_DWORD_VAL(*value));
}
-/* Write to a register forming part of a table */
+/* Write a 128-bit CSR forming part of a table */
static inline void efx_writeo_table(struct efx_nic *efx, efx_oword_t *value,
unsigned int reg, unsigned int index)
{
efx_writeo(efx, value, reg + index * sizeof(efx_oword_t));
}
-/* Read to a register forming part of a table */
+/* Read a 128-bit CSR forming part of a table */
static inline void efx_reado_table(struct efx_nic *efx, efx_oword_t *value,
unsigned int reg, unsigned int index)
{
efx_reado(efx, value, reg + index * sizeof(efx_oword_t));
}
-/* Write to a dword register forming part of a table */
+/* Write a 32-bit CSR forming part of a table, or 32-bit SRAM */
static inline void efx_writed_table(struct efx_nic *efx, efx_dword_t *value,
unsigned int reg, unsigned int index)
{
efx_writed(efx, value, reg + index * sizeof(efx_oword_t));
}
-/* Read from a dword register forming part of a table */
+/* Read a 32-bit CSR forming part of a table, or 32-bit SRAM */
static inline void efx_readd_table(struct efx_nic *efx, efx_dword_t *value,
unsigned int reg, unsigned int index)
{
@@ -231,29 +225,54 @@ static inline void efx_readd_table(struct efx_nic *efx, efx_dword_t *value,
#define EFX_PAGED_REG(page, reg) \
((page) * EFX_PAGE_BLOCK_SIZE + (reg))
-/* As for efx_writeo(), but for a page-mapped register. */
-static inline void efx_writeo_page(struct efx_nic *efx, efx_oword_t *value,
- unsigned int reg, unsigned int page)
+/* Write the whole of RX_DESC_UPD or TX_DESC_UPD */
+static inline void _efx_writeo_page(struct efx_nic *efx, efx_oword_t *value,
+ unsigned int reg, unsigned int page)
{
- efx_writeo(efx, value, EFX_PAGED_REG(page, reg));
-}
+ reg = EFX_PAGED_REG(page, reg);
-/* As for efx_writed(), but for a page-mapped register. */
-static inline void efx_writed_page(struct efx_nic *efx, efx_dword_t *value,
- unsigned int reg, unsigned int page)
+ netif_vdbg(efx, hw, efx->net_dev,
+ "writing register %x with " EFX_OWORD_FMT "\n", reg,
+ EFX_OWORD_VAL(*value));
+
+#ifdef EFX_USE_QWORD_IO
+ _efx_writeq(efx, value->u64[0], reg + 0);
+#else
+ _efx_writed(efx, value->u32[0], reg + 0);
+ _efx_writed(efx, value->u32[1], reg + 4);
+#endif
+ _efx_writed(efx, value->u32[2], reg + 8);
+ _efx_writed(efx, value->u32[3], reg + 12);
+}
+#define efx_writeo_page(efx, value, reg, page) \
+ _efx_writeo_page(efx, value, \
+ reg + \
+ BUILD_BUG_ON_ZERO((reg) != 0x830 && (reg) != 0xa10), \
+ page)
+
+/* Write a page-mapped 32-bit CSR (EVQ_RPTR or the high bits of
+ * RX_DESC_UPD or TX_DESC_UPD)
+ */
+static inline void _efx_writed_page(struct efx_nic *efx, efx_dword_t *value,
+ unsigned int reg, unsigned int page)
{
efx_writed(efx, value, EFX_PAGED_REG(page, reg));
}
-
-/* Write dword to page-mapped register with an extra lock.
- *
- * As for efx_writed_page(), but for a register that suffers from
- * SFC bug 3181. Take out a lock so the BIU collector cannot be
- * confused. */
-static inline void efx_writed_page_locked(struct efx_nic *efx,
- efx_dword_t *value,
- unsigned int reg,
- unsigned int page)
+#define efx_writed_page(efx, value, reg, page) \
+ _efx_writed_page(efx, value, \
+ reg + \
+ BUILD_BUG_ON_ZERO((reg) != 0x400 && (reg) != 0x83c \
+ && (reg) != 0xa1c), \
+ page)
+
+/* Write TIMER_COMMAND. This is a page-mapped 32-bit CSR, but a bug
+ * in the BIU means that writes to TIMER_COMMAND[0] invalidate the
+ * collector register.
+ */
+static inline void _efx_writed_page_locked(struct efx_nic *efx,
+ efx_dword_t *value,
+ unsigned int reg,
+ unsigned int page)
{
unsigned long flags __attribute__ ((unused));
@@ -265,5 +284,9 @@ static inline void efx_writed_page_locked(struct efx_nic *efx,
efx_writed(efx, value, EFX_PAGED_REG(page, reg));
}
}
+#define efx_writed_page_locked(efx, value, reg, page) \
+ _efx_writed_page_locked(efx, value, \
+ reg + BUILD_BUG_ON_ZERO((reg) != 0x420), \
+ page)
#endif /* EFX_IO_H */
diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h
index 4c12332434b7..76f2fb197f0a 100644
--- a/drivers/net/sfc/net_driver.h
+++ b/drivers/net/sfc/net_driver.h
@@ -142,6 +142,12 @@ struct efx_tx_buffer {
* @flushed: Used when handling queue flushing
* @read_count: Current read pointer.
* This is the number of buffers that have been removed from both rings.
+ * @old_write_count: The value of @write_count when last checked.
+ * This is here for performance reasons. The xmit path will
+ * only get the up-to-date value of @write_count if this
+ * variable indicates that the queue is empty. This is to
+ * avoid cache-line ping-pong between the xmit path and the
+ * completion path.
* @stopped: Stopped count.
* Set if this TX queue is currently stopping its port.
* @insert_count: Current insert pointer
@@ -163,6 +169,10 @@ struct efx_tx_buffer {
* @tso_long_headers: Number of packets with headers too long for standard
* blocks
* @tso_packets: Number of packets via the TSO xmit path
+ * @pushes: Number of times the TX push feature has been used
+ * @empty_read_count: If the completion path has seen the queue as empty
+ * and the transmission path has not yet checked this, the value of
+ * @read_count bitwise-added to %EFX_EMPTY_COUNT_VALID; otherwise 0.
*/
struct efx_tx_queue {
/* Members which don't change on the fast path */
@@ -177,6 +187,7 @@ struct efx_tx_queue {
/* Members used mainly on the completion path */
unsigned int read_count ____cacheline_aligned_in_smp;
+ unsigned int old_write_count;
int stopped;
/* Members used only on the xmit path */
@@ -187,6 +198,11 @@ struct efx_tx_queue {
unsigned int tso_bursts;
unsigned int tso_long_headers;
unsigned int tso_packets;
+ unsigned int pushes;
+
+ /* Members shared between paths and sometimes updated */
+ unsigned int empty_read_count ____cacheline_aligned_in_smp;
+#define EFX_EMPTY_COUNT_VALID 0x80000000
};
/**
@@ -626,10 +642,8 @@ struct efx_filter_state;
* Work items do not hold and must not acquire RTNL.
* @workqueue_name: Name of workqueue
* @reset_work: Scheduled reset workitem
- * @monitor_work: Hardware monitor workitem
* @membase_phys: Memory BAR value as physical address
* @membase: Memory BAR value
- * @biu_lock: BIU (bus interface unit) lock
* @interrupt_mode: Interrupt mode
* @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues
* @irq_rx_moderation: IRQ moderation time for RX event queues
@@ -653,14 +667,9 @@ struct efx_filter_state;
* @int_error_count: Number of internal errors seen recently
* @int_error_expire: Time at which error count will be expired
* @irq_status: Interrupt status buffer
- * @last_irq_cpu: Last CPU to handle interrupt.
- * This register is written with the SMP processor ID whenever an
- * interrupt is handled. It is used by efx_nic_test_interrupt()
- * to verify that an interrupt has occurred.
* @irq_zero_count: Number of legacy IRQs seen with queue flags == 0
* @fatal_irq_level: IRQ level (bit number) used for serious errors
* @mtd_list: List of MTDs attached to the NIC
- * @n_rx_nodesc_drop_cnt: RX no descriptor drop count
* @nic_data: Hardware dependant state
* @mac_lock: MAC access lock. Protects @port_enabled, @phy_mode,
* @port_inhibited, efx_monitor() and efx_reconfigure_port()
@@ -673,11 +682,7 @@ struct efx_filter_state;
* @port_initialized: Port initialized?
* @net_dev: Operating system network device. Consider holding the rtnl lock
* @rx_checksum_enabled: RX checksumming enabled
- * @mac_stats: MAC statistics. These include all statistics the MACs
- * can provide. Generic code converts these into a standard
- * &struct net_device_stats.
* @stats_buffer: DMA buffer for statistics
- * @stats_lock: Statistics update lock. Serialises statistics fetches
* @mac_op: MAC interface
* @phy_type: PHY type
* @phy_op: PHY interface
@@ -695,10 +700,23 @@ struct efx_filter_state;
* @loopback_mode: Loopback status
* @loopback_modes: Supported loopback mode bitmask
* @loopback_selftest: Offline self-test private state
+ * @monitor_work: Hardware monitor workitem
+ * @biu_lock: BIU (bus interface unit) lock
+ * @last_irq_cpu: Last CPU to handle interrupt.
+ * This register is written with the SMP processor ID whenever an
+ * interrupt is handled. It is used by efx_nic_test_interrupt()
+ * to verify that an interrupt has occurred.
+ * @n_rx_nodesc_drop_cnt: RX no descriptor drop count
+ * @mac_stats: MAC statistics. These include all statistics the MACs
+ * can provide. Generic code converts these into a standard
+ * &struct net_device_stats.
+ * @stats_lock: Statistics update lock. Serialises statistics fetches
*
* This is stored in the private area of the &struct net_device.
*/
struct efx_nic {
+ /* The following fields should be written very rarely */
+
char name[IFNAMSIZ];
struct pci_dev *pci_dev;
const struct efx_nic_type *type;
@@ -707,10 +725,9 @@ struct efx_nic {
struct workqueue_struct *workqueue;
char workqueue_name[16];
struct work_struct reset_work;
- struct delayed_work monitor_work;
resource_size_t membase_phys;
void __iomem *membase;
- spinlock_t biu_lock;
+
enum efx_int_mode interrupt_mode;
bool irq_rx_adaptive;
unsigned int irq_rx_moderation;
@@ -737,7 +754,6 @@ struct efx_nic {
unsigned long int_error_expire;
struct efx_buffer irq_status;
- volatile signed int last_irq_cpu;
unsigned irq_zero_count;
unsigned fatal_irq_level;
@@ -745,8 +761,6 @@ struct efx_nic {
struct list_head mtd_list;
#endif
- unsigned n_rx_nodesc_drop_cnt;
-
void *nic_data;
struct mutex mac_lock;
@@ -758,9 +772,7 @@ struct efx_nic {
struct net_device *net_dev;
bool rx_checksum_enabled;
- struct efx_mac_stats mac_stats;
struct efx_buffer stats_buffer;
- spinlock_t stats_lock;
struct efx_mac_operations *mac_op;
@@ -786,6 +798,15 @@ struct efx_nic {
void *loopback_selftest;
struct efx_filter_state *filter_state;
+
+ /* The following fields may be written more often */
+
+ struct delayed_work monitor_work ____cacheline_aligned_in_smp;
+ spinlock_t biu_lock;
+ volatile signed int last_irq_cpu;
+ unsigned n_rx_nodesc_drop_cnt;
+ struct efx_mac_stats mac_stats;
+ spinlock_t stats_lock;
};
static inline int efx_dev_registered(struct efx_nic *efx)
diff --git a/drivers/net/sfc/nic.c b/drivers/net/sfc/nic.c
index 399b12abe2fd..da386599ab68 100644
--- a/drivers/net/sfc/nic.c
+++ b/drivers/net/sfc/nic.c
@@ -362,6 +362,35 @@ static inline void efx_notify_tx_desc(struct efx_tx_queue *tx_queue)
FR_AZ_TX_DESC_UPD_DWORD_P0, tx_queue->queue);
}
+/* Write pointer and first descriptor for TX descriptor ring */
+static inline void efx_push_tx_desc(struct efx_tx_queue *tx_queue,
+ const efx_qword_t *txd)
+{
+ unsigned write_ptr;
+ efx_oword_t reg;
+
+ BUILD_BUG_ON(FRF_AZ_TX_DESC_LBN != 0);
+ BUILD_BUG_ON(FR_AA_TX_DESC_UPD_KER != FR_BZ_TX_DESC_UPD_P0);
+
+ write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
+ EFX_POPULATE_OWORD_2(reg, FRF_AZ_TX_DESC_PUSH_CMD, true,
+ FRF_AZ_TX_DESC_WPTR, write_ptr);
+ reg.qword[0] = *txd;
+ efx_writeo_page(tx_queue->efx, &reg,
+ FR_BZ_TX_DESC_UPD_P0, tx_queue->queue);
+}
+
+static inline bool
+efx_may_push_tx_desc(struct efx_tx_queue *tx_queue, unsigned int write_count)
+{
+ unsigned empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count);
+
+ if (empty_read_count == 0)
+ return false;
+
+ tx_queue->empty_read_count = 0;
+ return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0;
+}
/* For each entry inserted into the software descriptor ring, create a
* descriptor in the hardware TX descriptor ring (in host memory), and
@@ -373,6 +402,7 @@ void efx_nic_push_buffers(struct efx_tx_queue *tx_queue)
struct efx_tx_buffer *buffer;
efx_qword_t *txd;
unsigned write_ptr;
+ unsigned old_write_count = tx_queue->write_count;
BUG_ON(tx_queue->write_count == tx_queue->insert_count);
@@ -391,7 +421,15 @@ void efx_nic_push_buffers(struct efx_tx_queue *tx_queue)
} while (tx_queue->write_count != tx_queue->insert_count);
wmb(); /* Ensure descriptors are written before they are fetched */
- efx_notify_tx_desc(tx_queue);
+
+ if (efx_may_push_tx_desc(tx_queue, old_write_count)) {
+ txd = efx_tx_desc(tx_queue,
+ old_write_count & tx_queue->ptr_mask);
+ efx_push_tx_desc(tx_queue, txd);
+ ++tx_queue->pushes;
+ } else {
+ efx_notify_tx_desc(tx_queue);
+ }
}
/* Allocate hardware resources for a TX queue */
@@ -1632,7 +1670,7 @@ void efx_nic_init_common(struct efx_nic *efx)
EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_RX_SPACER, 0xfe);
EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_RX_SPACER_EN, 1);
EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_ONE_PKT_PER_Q, 1);
- EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_PUSH_EN, 0);
+ EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_PUSH_EN, 1);
EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_DIS_NON_IP_EV, 1);
/* Enable SW_EV to inherit in char driver - assume harmless here */
EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_SOFT_EVT_EN, 1);
diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c
index 03194f7c0954..bdb92b4af683 100644
--- a/drivers/net/sfc/tx.c
+++ b/drivers/net/sfc/tx.c
@@ -240,8 +240,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
* of read_count. */
smp_mb();
tx_queue->old_read_count =
- *(volatile unsigned *)
- &tx_queue->read_count;
+ ACCESS_ONCE(tx_queue->read_count);
fill_level = (tx_queue->insert_count
- tx_queue->old_read_count);
q_space = efx->txq_entries - 1 - fill_level;
@@ -429,6 +428,16 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
__netif_tx_unlock(queue);
}
}
+
+ /* Check whether the hardware queue is now empty */
+ if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
+ tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count);
+ if (tx_queue->read_count == tx_queue->old_write_count) {
+ smp_mb();
+ tx_queue->empty_read_count =
+ tx_queue->read_count | EFX_EMPTY_COUNT_VALID;
+ }
+ }
}
int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
@@ -474,8 +483,10 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
tx_queue->insert_count = 0;
tx_queue->write_count = 0;
+ tx_queue->old_write_count = 0;
tx_queue->read_count = 0;
tx_queue->old_read_count = 0;
+ tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
BUG_ON(tx_queue->stopped);
/* Set up TX descriptor ring */
@@ -764,7 +775,7 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
* stopped from the access of read_count. */
smp_mb();
tx_queue->old_read_count =
- *(volatile unsigned *)&tx_queue->read_count;
+ ACCESS_ONCE(tx_queue->read_count);
fill_level = (tx_queue->insert_count
- tx_queue->old_read_count);
q_space = efx->txq_entries - 1 - fill_level;