summaryrefslogtreecommitdiffstats
path: root/drivers/net/mlx4/catas.c
diff options
context:
space:
mode:
authorJack Morgenstein <jackm@dev.mellanox.co.il>2007-07-12 16:50:45 +0200
committerRoland Dreier <rolandd@cisco.com>2007-07-18 03:37:42 +0200
commitee49bd9397cd2b8fe7a1962505d81c1d0a1366fc (patch)
tree064800f1ebcf6c2586f6727aa03c85e875b96289 /drivers/net/mlx4/catas.c
parentIB/iser: Make a couple of functions static (diff)
downloadlinux-ee49bd9397cd2b8fe7a1962505d81c1d0a1366fc.tar.xz
linux-ee49bd9397cd2b8fe7a1962505d81c1d0a1366fc.zip
mlx4_core: Reset device when internal error is detected
Reset the device when an internal error is detected. Also, detect errors by polling the error buffer rather than using interrupts. This is more robust and doesn't depend on MSI-X. Remove the old interrupt handler entirely, since we don't want to support two mechanisms for detecting internal errors. Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/net/mlx4/catas.c')
-rw-r--r--drivers/net/mlx4/catas.c106
1 files changed, 99 insertions, 7 deletions
diff --git a/drivers/net/mlx4/catas.c b/drivers/net/mlx4/catas.c
index 1bb088aeaf71..6b32ec94b3a8 100644
--- a/drivers/net/mlx4/catas.c
+++ b/drivers/net/mlx4/catas.c
@@ -30,41 +30,133 @@
* SOFTWARE.
*/
+#include <linux/workqueue.h>
+
#include "mlx4.h"
-void mlx4_handle_catas_err(struct mlx4_dev *dev)
+enum {
+ MLX4_CATAS_POLL_INTERVAL = 5 * HZ,
+};
+
+static DEFINE_SPINLOCK(catas_lock);
+
+static LIST_HEAD(catas_list);
+static struct workqueue_struct *catas_wq;
+static struct work_struct catas_work;
+
+static int internal_err_reset = 1;
+module_param(internal_err_reset, int, 0644);
+MODULE_PARM_DESC(internal_err_reset,
+ "Reset device on internal errors if non-zero (default 1)");
+
+static void dump_err_buf(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int i;
- mlx4_err(dev, "Catastrophic error detected:\n");
+ mlx4_err(dev, "Internal error detected:\n");
for (i = 0; i < priv->fw.catas_size; ++i)
mlx4_err(dev, " buf[%02x]: %08x\n",
i, swab32(readl(priv->catas_err.map + i)));
+}
- mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
+static void poll_catas(unsigned long dev_ptr)
+{
+ struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (readl(priv->catas_err.map)) {
+ dump_err_buf(dev);
+
+ mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
+
+ if (internal_err_reset) {
+ spin_lock(&catas_lock);
+ list_add(&priv->catas_err.list, &catas_list);
+ spin_unlock(&catas_lock);
+
+ queue_work(catas_wq, &catas_work);
+ }
+ } else
+ mod_timer(&priv->catas_err.timer,
+ round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
}
-void mlx4_map_catas_buf(struct mlx4_dev *dev)
+static void catas_reset(struct work_struct *work)
+{
+ struct mlx4_priv *priv, *tmppriv;
+ struct mlx4_dev *dev;
+
+ LIST_HEAD(tlist);
+ int ret;
+
+ spin_lock_irq(&catas_lock);
+ list_splice_init(&catas_list, &tlist);
+ spin_unlock_irq(&catas_lock);
+
+ list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) {
+ ret = mlx4_restart_one(priv->dev.pdev);
+ dev = &priv->dev;
+ if (ret)
+ mlx4_err(dev, "Reset failed (%d)\n", ret);
+ else
+ mlx4_dbg(dev, "Reset succeeded\n");
+ }
+}
+
+void mlx4_start_catas_poll(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
unsigned long addr;
+ INIT_LIST_HEAD(&priv->catas_err.list);
+ init_timer(&priv->catas_err.timer);
+ priv->catas_err.map = NULL;
+
addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
priv->fw.catas_offset;
priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
- if (!priv->catas_err.map)
- mlx4_warn(dev, "Failed to map catastrophic error buffer at 0x%lx\n",
+ if (!priv->catas_err.map) {
+ mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n",
addr);
+ return;
+ }
+ priv->catas_err.timer.data = (unsigned long) dev;
+ priv->catas_err.timer.function = poll_catas;
+ priv->catas_err.timer.expires =
+ round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL);
+ add_timer(&priv->catas_err.timer);
}
-void mlx4_unmap_catas_buf(struct mlx4_dev *dev)
+void mlx4_stop_catas_poll(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
+ del_timer_sync(&priv->catas_err.timer);
+
if (priv->catas_err.map)
iounmap(priv->catas_err.map);
+
+ spin_lock_irq(&catas_lock);
+ list_del(&priv->catas_err.list);
+ spin_unlock_irq(&catas_lock);
+}
+
+int __init mlx4_catas_init(void)
+{
+ INIT_WORK(&catas_work, catas_reset);
+
+ catas_wq = create_singlethread_workqueue("mlx4_err");
+ if (!catas_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx4_catas_cleanup(void)
+{
+ destroy_workqueue(catas_wq);
}