diff options
author | Igor Russkikh <irusskikh@marvell.com> | 2020-05-14 11:57:18 +0200 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-05-14 22:25:46 +0200 |
commit | a8736ea83b80526529e21db29595e5337bfa95c2 (patch) | |
tree | 8459f8d3d14adbb657039d30c28192c4cd4be3c2 /drivers | |
parent | net: qed: adding hw_err states and handling (diff) | |
download | linux-a8736ea83b80526529e21db29595e5337bfa95c2.tar.xz linux-a8736ea83b80526529e21db29595e5337bfa95c2.zip |
net: qede: add hw err scheduled handler
qede (ethernet level driver) registers a callback handler.
This handler maintains eth dev state flags/bits to track error processing.
It implements in place processing part for nonsleeping context (WARN_ON
trigger), and a deferred (delayed work) part which triggers recovery
process for recoverable errors.
In later patches this atomic handler will come with more meat.
We introduce err_flags on ethdevice structure, its being used to record
error handling properties.
Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/ethernet/qlogic/qede/qede.h | 13 | ||||
-rw-r--r-- | drivers/net/ethernet/qlogic/qede/qede_main.c | 95 |
2 files changed, 106 insertions, 2 deletions
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index f6f0b51620ab..695d645d9ba9 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -278,6 +278,14 @@ struct qede_dev { struct qede_rdma_dev rdma_info; struct bpf_prog *xdp_prog; + + unsigned long err_flags; +#define QEDE_ERR_IS_HANDLED 31 +#define QEDE_ERR_ATTN_CLR_EN 0 +#define QEDE_ERR_GET_DBG_INFO 1 +#define QEDE_ERR_IS_RECOVERABLE 2 +#define QEDE_ERR_WARN 3 + struct qede_dump_info dump_info; }; @@ -485,12 +493,15 @@ struct qede_fastpath { #define QEDE_SP_RECOVERY 0 #define QEDE_SP_RX_MODE 1 +#define QEDE_SP_RSVD1 2 +#define QEDE_SP_RSVD2 3 +#define QEDE_SP_HW_ERR 4 +#define QEDE_SP_ARFS_CONFIG 5 #define QEDE_SP_AER 7 #ifdef CONFIG_RFS_ACCEL int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id); -#define QEDE_SP_ARFS_CONFIG 4 #define QEDE_SP_TASK_POLL_DELAY (5 * HZ) #endif diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 300405369c37..e67d5da23792 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -139,10 +139,12 @@ static void qede_shutdown(struct pci_dev *pdev); static void qede_link_update(void *dev, struct qed_link_output *link); static void qede_schedule_recovery_handler(void *dev); static void qede_recovery_handler(struct qede_dev *edev); +static void qede_schedule_hw_err_handler(void *dev, + enum qed_hw_err_type err_type); static void qede_get_eth_tlv_data(void *edev, void *data); static void qede_get_generic_tlv_data(void *edev, struct qed_generic_tlvs *data); - +static void qede_generic_hw_err_handler(struct qede_dev *edev); #ifdef CONFIG_QED_SRIOV static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos, __be16 vlan_proto) @@ -230,6 +232,7 @@ static struct qed_eth_cb_ops qede_ll_ops = { #endif .link_update = qede_link_update, .schedule_recovery_handler = qede_schedule_recovery_handler, + .schedule_hw_err_handler = qede_schedule_hw_err_handler, .get_generic_tlv_data = qede_get_generic_tlv_data, .get_protocol_tlv_data = qede_get_eth_tlv_data, }, @@ -1009,6 +1012,8 @@ static void qede_sp_task(struct work_struct *work) qede_process_arfs_filters(edev, false); } #endif + if (test_and_clear_bit(QEDE_SP_HW_ERR, &edev->sp_flags)) + qede_generic_hw_err_handler(edev); __qede_unlock(edev); if (test_and_clear_bit(QEDE_SP_AER, &edev->sp_flags)) { @@ -2509,6 +2514,94 @@ err: qede_recovery_failed(edev); } +static void qede_atomic_hw_err_handler(struct qede_dev *edev) +{ + DP_NOTICE(edev, + "Generic non-sleepable HW error handling started - err_flags 0x%lx\n", + edev->err_flags); + + /* Get a call trace of the flow that led to the error */ + WARN_ON(test_bit(QEDE_ERR_WARN, &edev->err_flags)); + + DP_NOTICE(edev, "Generic non-sleepable HW error handling is done\n"); +} + +static void qede_generic_hw_err_handler(struct qede_dev *edev) +{ + struct qed_dev *cdev = edev->cdev; + + DP_NOTICE(edev, + "Generic sleepable HW error handling started - err_flags 0x%lx\n", + edev->err_flags); + + /* Trigger a recovery process. + * This is placed in the sleep requiring section just to make + * sure it is the last one, and that all the other operations + * were completed. + */ + if (test_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags)) + edev->ops->common->recovery_process(cdev); + + clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags); + + DP_NOTICE(edev, "Generic sleepable HW error handling is done\n"); +} + +static void qede_set_hw_err_flags(struct qede_dev *edev, + enum qed_hw_err_type err_type) +{ + unsigned long err_flags = 0; + + switch (err_type) { + case QED_HW_ERR_DMAE_FAIL: + set_bit(QEDE_ERR_WARN, &err_flags); + fallthrough; + case QED_HW_ERR_MFW_RESP_FAIL: + case QED_HW_ERR_HW_ATTN: + case QED_HW_ERR_RAMROD_FAIL: + case QED_HW_ERR_FW_ASSERT: + set_bit(QEDE_ERR_ATTN_CLR_EN, &err_flags); + set_bit(QEDE_ERR_GET_DBG_INFO, &err_flags); + break; + + default: + DP_NOTICE(edev, "Unexpected HW error [%d]\n", err_type); + break; + } + + edev->err_flags |= err_flags; +} + +static void qede_schedule_hw_err_handler(void *dev, + enum qed_hw_err_type err_type) +{ + struct qede_dev *edev = dev; + + /* Fan failure cannot be masked by handling of another HW error or by a + * concurrent recovery process. + */ + if ((test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) || + edev->state == QEDE_STATE_RECOVERY) && + err_type != QED_HW_ERR_FAN_FAIL) { + DP_INFO(edev, + "Avoid scheduling an error handling while another HW error is being handled\n"); + return; + } + + if (err_type >= QED_HW_ERR_LAST) { + DP_NOTICE(edev, "Unknown HW error [%d]\n", err_type); + clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags); + return; + } + + qede_set_hw_err_flags(edev, err_type); + qede_atomic_hw_err_handler(edev); + set_bit(QEDE_SP_HW_ERR, &edev->sp_flags); + schedule_delayed_work(&edev->sp_task, 0); + + DP_INFO(edev, "Scheduled a error handler [err_type %d]\n", err_type); +} + static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq) { struct netdev_queue *netdev_txq; |