diff options
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r-- | arch/powerpc/platforms/powernv/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/eeh-powernv.c | 101 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/npu-dma.c | 7 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/ocxl.c | 515 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-dump.c | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-elog.c | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-imc.c | 77 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-sysparam.c | 6 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-wrappers.S | 3 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal.c | 28 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci-ioda.c | 88 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci.c | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci.h | 8 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/smp.c | 28 |
14 files changed, 743 insertions, 131 deletions
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 3732118a0482..6c9d5199a7e2 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_PERF_EVENTS) += opal-imc.o obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o obj-$(CONFIG_PPC_FTW) += nx-ftw.o +obj-$(CONFIG_OCXL_BASE) += ocxl.o diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 4650fb294e7a..33c86c1a1720 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -43,6 +43,22 @@ static int eeh_event_irq = -EINVAL; +void pnv_pcibios_bus_add_device(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + + if (!pdev->is_virtfn) + return; + + /* + * The following operations will fail if VF's sysfs files + * aren't created or its resources aren't finalized. + */ + eeh_add_device_early(pdn); + eeh_add_device_late(pdev); + eeh_sysfs_add_device(pdev); +} + static int pnv_eeh_init(void) { struct pci_controller *hose; @@ -86,6 +102,7 @@ static int pnv_eeh_init(void) } eeh_set_pe_aux_size(max_diag_size); + ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device; return 0; } @@ -1638,70 +1655,11 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) return ret; } -static int pnv_eeh_restore_vf_config(struct pci_dn *pdn) -{ - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - u32 devctl, cmd, cap2, aer_capctl; - int old_mps; - - if (edev->pcie_cap) { - /* Restore MPS */ - old_mps = (ffs(pdn->mps) - 8) << 5; - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; - devctl |= old_mps; - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - - /* Disable Completion Timeout */ - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, - 4, &cap2); - if (cap2 & 0x10) { - eeh_ops->read_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, &cap2); - cap2 |= 0x10; - eeh_ops->write_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, cap2); - } - } - - /* Enable SERR and parity checking */ - eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); - cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); - eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); - - /* Enable report various errors */ - if (edev->pcie_cap) { - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_CERE; - devctl |= (PCI_EXP_DEVCTL_NFERE | - PCI_EXP_DEVCTL_FERE | - PCI_EXP_DEVCTL_URRE); - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - } - - /* Enable ECRC generation and check */ - if (edev->pcie_cap && edev->aer_cap) { - eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, &aer_capctl); - aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); - eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, aer_capctl); - } - - return 0; -} - static int pnv_eeh_restore_config(struct pci_dn *pdn) { struct eeh_dev *edev = pdn_to_eeh_dev(pdn); struct pnv_phb *phb; - s64 ret; + s64 ret = 0; int config_addr = (pdn->busno << 8) | (pdn->devfn); if (!edev) @@ -1715,7 +1673,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn) * to be exported by firmware in extendible way. */ if (edev->physfn) { - ret = pnv_eeh_restore_vf_config(pdn); + ret = eeh_restore_vf_config(pdn); } else { phb = pdn->phb->private_data; ret = opal_pci_reinit(phb->opal_id, @@ -1728,7 +1686,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn) return -EIO; } - return 0; + return ret; } static struct eeh_ops pnv_eeh_ops = { @@ -1746,25 +1704,10 @@ static struct eeh_ops pnv_eeh_ops = { .read_config = pnv_eeh_read_config, .write_config = pnv_eeh_write_config, .next_error = pnv_eeh_next_error, - .restore_config = pnv_eeh_restore_config + .restore_config = pnv_eeh_restore_config, + .notify_resume = NULL }; -void pcibios_bus_add_device(struct pci_dev *pdev) -{ - struct pci_dn *pdn = pci_get_pdn(pdev); - - if (!pdev->is_virtfn) - return; - - /* - * The following operations will fail if VF's sysfs files - * aren't created or its resources aren't finalized. - */ - eeh_add_device_early(pdn); - eeh_add_device_late(pdev); - eeh_sysfs_add_device(pdev); -} - #ifdef CONFIG_PCI_IOV static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev) { diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index f6cbc1a71472..0a253b64ac5f 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -39,7 +39,10 @@ */ static struct pci_dev *get_pci_dev(struct device_node *dn) { - return PCI_DN(dn)->pcidev; + struct pci_dn *pdn = PCI_DN(dn); + + return pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus), + pdn->busno, pdn->devfn); } /* Given a NPU device get the associated PCI device. */ @@ -277,7 +280,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) int64_t rc = 0; phys_addr_t top = memblock_end_of_DRAM(); - if (phb->type != PNV_PHB_NPU || !npe->pdev) + if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev) return -EINVAL; rc = pnv_npu_unset_window(npe, 0); diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c new file mode 100644 index 000000000000..fa9b53af3c7b --- /dev/null +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -0,0 +1,515 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include <asm/pnv-ocxl.h> +#include <asm/opal.h> +#include <asm/xive.h> +#include <misc/ocxl-config.h> +#include "pci.h" + +#define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full +#define PNV_OCXL_ACTAG_MAX 64 +/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */ +#define PNV_OCXL_PASID_BITS 15 +#define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1) + +#define AFU_PRESENT (1 << 31) +#define AFU_INDEX_MASK 0x3F000000 +#define AFU_INDEX_SHIFT 24 +#define ACTAG_MASK 0xFFF + + +struct actag_range { + u16 start; + u16 count; +}; + +struct npu_link { + struct list_head list; + int domain; + int bus; + int dev; + u16 fn_desired_actags[8]; + struct actag_range fn_actags[8]; + bool assignment_done; +}; +static struct list_head links_list = LIST_HEAD_INIT(links_list); +static DEFINE_MUTEX(links_list_lock); + + +/* + * opencapi actags handling: + * + * When sending commands, the opencapi device references the memory + * context it's targeting with an 'actag', which is really an alias + * for a (BDF, pasid) combination. When it receives a command, the NPU + * must do a lookup of the actag to identify the memory context. The + * hardware supports a finite number of actags per link (64 for + * POWER9). + * + * The device can carry multiple functions, and each function can have + * multiple AFUs. Each AFU advertises in its config space the number + * of desired actags. The host must configure in the config space of + * the AFU how many actags the AFU is really allowed to use (which can + * be less than what the AFU desires). + * + * When a PCI function is probed by the driver, it has no visibility + * about the other PCI functions and how many actags they'd like, + * which makes it impossible to distribute actags fairly among AFUs. + * + * Unfortunately, the only way to know how many actags a function + * desires is by looking at the data for each AFU in the config space + * and add them up. Similarly, the only way to know how many actags + * all the functions of the physical device desire is by adding the + * previously computed function counts. Then we can match that against + * what the hardware supports. + * + * To get a comprehensive view, we use a 'pci fixup': at the end of + * PCI enumeration, each function counts how many actags its AFUs + * desire and we save it in a 'npu_link' structure, shared between all + * the PCI functions of a same device. Therefore, when the first + * function is probed by the driver, we can get an idea of the total + * count of desired actags for the device, and assign the actags to + * the AFUs, by pro-rating if needed. + */ + +static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos) +{ + int vsec = pos; + u16 vendor, id; + + while ((vsec = pci_find_next_ext_capability(dev, vsec, + OCXL_EXT_CAP_ID_DVSEC))) { + pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, + &vendor); + pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); + if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id) + return vsec; + } + return 0; +} + +static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) +{ + int vsec = 0; + u8 idx; + + while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID, + vsec))) { + pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, + &idx); + if (idx == afu_idx) + return vsec; + } + return 0; +} + +static int get_max_afu_index(struct pci_dev *dev, int *afu_idx) +{ + int pos; + u32 val; + + pos = find_dvsec_from_pos(dev, OCXL_DVSEC_FUNC_ID, 0); + if (!pos) + return -ESRCH; + + pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val); + if (val & AFU_PRESENT) + *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT; + else + *afu_idx = -1; + return 0; +} + +static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag) +{ + int pos; + u16 actag_sup; + + pos = find_dvsec_afu_ctrl(dev, afu_idx); + if (!pos) + return -ESRCH; + + pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, + &actag_sup); + *actag = actag_sup & ACTAG_MASK; + return 0; +} + +static struct npu_link *find_link(struct pci_dev *dev) +{ + struct npu_link *link; + + list_for_each_entry(link, &links_list, list) { + /* The functions of a device all share the same link */ + if (link->domain == pci_domain_nr(dev->bus) && + link->bus == dev->bus->number && + link->dev == PCI_SLOT(dev->devfn)) { + return link; + } + } + + /* link doesn't exist yet. Allocate one */ + link = kzalloc(sizeof(struct npu_link), GFP_KERNEL); + if (!link) + return NULL; + link->domain = pci_domain_nr(dev->bus); + link->bus = dev->bus->number; + link->dev = PCI_SLOT(dev->devfn); + list_add(&link->list, &links_list); + return link; +} + +static void pnv_ocxl_fixup_actag(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct npu_link *link; + int rc, afu_idx = -1, i, actag; + + if (!machine_is(powernv)) + return; + + if (phb->type != PNV_PHB_NPU_OCAPI) + return; + + mutex_lock(&links_list_lock); + + link = find_link(dev); + if (!link) { + dev_warn(&dev->dev, "couldn't update actag information\n"); + mutex_unlock(&links_list_lock); + return; + } + + /* + * Check how many actags are desired for the AFUs under that + * function and add it to the count for the link + */ + rc = get_max_afu_index(dev, &afu_idx); + if (rc) { + /* Most likely an invalid config space */ + dev_dbg(&dev->dev, "couldn't find AFU information\n"); + afu_idx = -1; + } + + link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0; + for (i = 0; i <= afu_idx; i++) { + /* + * AFU index 'holes' are allowed. So don't fail if we + * can't read the actag info for an index + */ + rc = get_actag_count(dev, i, &actag); + if (rc) + continue; + link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag; + } + dev_dbg(&dev->dev, "total actags for function: %d\n", + link->fn_desired_actags[PCI_FUNC(dev->devfn)]); + + mutex_unlock(&links_list_lock); +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag); + +static u16 assign_fn_actags(u16 desired, u16 total) +{ + u16 count; + + if (total <= PNV_OCXL_ACTAG_MAX) + count = desired; + else + count = PNV_OCXL_ACTAG_MAX * desired / total; + + return count; +} + +static void assign_actags(struct npu_link *link) +{ + u16 actag_count, range_start = 0, total_desired = 0; + int i; + + for (i = 0; i < 8; i++) + total_desired += link->fn_desired_actags[i]; + + for (i = 0; i < 8; i++) { + if (link->fn_desired_actags[i]) { + actag_count = assign_fn_actags( + link->fn_desired_actags[i], + total_desired); + link->fn_actags[i].start = range_start; + link->fn_actags[i].count = actag_count; + range_start += actag_count; + WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX); + } + pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n", + link->domain, link->bus, link->dev, i, + link->fn_actags[i].start, link->fn_actags[i].count, + link->fn_desired_actags[i]); + } + link->assignment_done = true; +} + +int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, + u16 *supported) +{ + struct npu_link *link; + + mutex_lock(&links_list_lock); + + link = find_link(dev); + if (!link) { + dev_err(&dev->dev, "actag information not found\n"); + mutex_unlock(&links_list_lock); + return -ENODEV; + } + /* + * On p9, we only have 64 actags per link, so they must be + * shared by all the functions of the same adapter. We counted + * the desired actag counts during PCI enumeration, so that we + * can allocate a pro-rated number of actags to each function. + */ + if (!link->assignment_done) + assign_actags(link); + + *base = link->fn_actags[PCI_FUNC(dev->devfn)].start; + *enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count; + *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)]; + + mutex_unlock(&links_list_lock); + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag); + +int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count) +{ + struct npu_link *link; + int i, rc = -EINVAL; + + /* + * The number of PASIDs (process address space ID) which can + * be used by a function depends on how many functions exist + * on the device. The NPU needs to be configured to know how + * many bits are available to PASIDs and how many are to be + * used by the function BDF indentifier. + * + * We only support one AFU-carrying function for now. + */ + mutex_lock(&links_list_lock); + + link = find_link(dev); + if (!link) { + dev_err(&dev->dev, "actag information not found\n"); + mutex_unlock(&links_list_lock); + return -ENODEV; + } + + for (i = 0; i < 8; i++) + if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) { + *count = PNV_OCXL_PASID_MAX; + rc = 0; + break; + } + + mutex_unlock(&links_list_lock); + dev_dbg(&dev->dev, "%d PASIDs available for function\n", + rc ? 0 : *count); + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count); + +static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf) +{ + int shift, idx; + + WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE); + idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2; + shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2)); + buf[idx] |= rate << shift; +} + +int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, + char *rate_buf, int rate_buf_size) +{ + if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) + return -EINVAL; + /* + * The TL capabilities are a characteristic of the NPU, so + * we go with hard-coded values. + * + * The receiving rate of each template is encoded on 4 bits. + * + * On P9: + * - templates 0 -> 3 are supported + * - templates 0, 1 and 3 have a 0 receiving rate + * - template 2 has receiving rate of 1 (extra cycle) + */ + memset(rate_buf, 0, rate_buf_size); + set_templ_rate(2, 1, rate_buf); + *cap = PNV_OCXL_TL_P9_RECV_CAP; + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap); + +int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, + uint64_t rate_buf_phys, int rate_buf_size) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int rc; + + if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) + return -EINVAL; + + rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap, + rate_buf_phys, rate_buf_size); + if (rc) { + dev_err(&dev->dev, "Can't configure host TL: %d\n", rc); + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf); + +int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq) +{ + int rc; + + rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq); + if (rc) { + dev_err(&dev->dev, + "Can't get translation interrupt for device\n"); + return rc; + } + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq); + +void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, + void __iomem *tfc, void __iomem *pe_handle) +{ + iounmap(dsisr); + iounmap(dar); + iounmap(tfc); + iounmap(pe_handle); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs); + +int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, + void __iomem **dar, void __iomem **tfc, + void __iomem **pe_handle) +{ + u64 reg; + int i, j, rc = 0; + void __iomem *regs[4]; + + /* + * opal stores the mmio addresses of the DSISR, DAR, TFC and + * PE_HANDLE registers in a device tree property, in that + * order + */ + for (i = 0; i < 4; i++) { + rc = of_property_read_u64_index(dev->dev.of_node, + "ibm,opal-xsl-mmio", i, ®); + if (rc) + break; + regs[i] = ioremap(reg, 8); + if (!regs[i]) { + rc = -EINVAL; + break; + } + } + if (rc) { + dev_err(&dev->dev, "Can't map translation mmio registers\n"); + for (j = i - 1; j >= 0; j--) + iounmap(regs[j]); + } else { + *dsisr = regs[0]; + *dar = regs[1]; + *tfc = regs[2]; + *pe_handle = regs[3]; + } + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs); + +struct spa_data { + u64 phb_opal_id; + u32 bdfn; +}; + +int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, + void **platform_data) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct spa_data *data; + u32 bdfn; + int rc; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + bdfn = (dev->bus->number << 8) | dev->devfn; + rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem), + PE_mask); + if (rc) { + dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc); + kfree(data); + return rc; + } + data->phb_opal_id = phb->opal_id; + data->bdfn = bdfn; + *platform_data = (void *) data; + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup); + +void pnv_ocxl_spa_release(void *platform_data) +{ + struct spa_data *data = (struct spa_data *) platform_data; + int rc; + + rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0); + WARN_ON(rc); + kfree(data); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release); + +int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle) +{ + struct spa_data *data = (struct spa_data *) platform_data; + int rc; + + rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle); + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe); + +int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr) +{ + __be64 flags, trigger_page; + s64 rc; + u32 hwirq; + + hwirq = xive_native_alloc_irq(); + if (!hwirq) + return -ENOENT; + + rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL, + NULL); + if (rc || !trigger_page) { + xive_native_free_irq(hwirq); + return -ENOENT; + } + *irq = hwirq; + *trigger_addr = be64_to_cpu(trigger_page); + return 0; + +} +EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq); + +void pnv_ocxl_free_xive_irq(u32 irq) +{ + xive_native_free_irq(irq); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq); diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 4c827826c05e..0dc8fa4e0af2 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -103,9 +103,9 @@ static ssize_t dump_ack_store(struct dump_obj *dump_obj, * due to the dynamic size of the dump */ static struct dump_attribute id_attribute = - __ATTR(id, S_IRUGO, dump_id_show, NULL); + __ATTR(id, 0444, dump_id_show, NULL); static struct dump_attribute type_attribute = - __ATTR(type, S_IRUGO, dump_type_show, NULL); + __ATTR(type, 0444, dump_type_show, NULL); static struct dump_attribute ack_attribute = __ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store); diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index ecd6d9177d13..ba6e437abb4b 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -83,9 +83,9 @@ static ssize_t elog_ack_store(struct elog_obj *elog_obj, } static struct elog_attribute id_attribute = - __ATTR(id, S_IRUGO, elog_id_show, NULL); + __ATTR(id, 0444, elog_id_show, NULL); static struct elog_attribute type_attribute = - __ATTR(type, S_IRUGO, elog_type_show, NULL); + __ATTR(type, 0444, elog_type_show, NULL); static struct elog_attribute ack_attribute = __ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store); diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 465ea105b771..dd4c9b8b8a81 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -21,6 +21,78 @@ #include <asm/io.h> #include <asm/imc-pmu.h> #include <asm/cputhreads.h> +#include <asm/debugfs.h> + +static struct dentry *imc_debugfs_parent; + +/* Helpers to export imc command and mode via debugfs */ +static int imc_mem_get(void *data, u64 *val) +{ + *val = cpu_to_be64(*(u64 *)data); + return 0; +} + +static int imc_mem_set(void *data, u64 val) +{ + *(u64 *)data = cpu_to_be64(val); + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, imc_mem_get, imc_mem_set, "0x%016llx\n"); + +static struct dentry *imc_debugfs_create_x64(const char *name, umode_t mode, + struct dentry *parent, u64 *value) +{ + return debugfs_create_file_unsafe(name, mode, parent, + value, &fops_imc_x64); +} + +/* + * export_imc_mode_and_cmd: Create a debugfs interface + * for imc_cmd and imc_mode + * for each node in the system. + * imc_mode and imc_cmd can be changed by echo into + * this interface. + */ +static void export_imc_mode_and_cmd(struct device_node *node, + struct imc_pmu *pmu_ptr) +{ + static u64 loc, *imc_mode_addr, *imc_cmd_addr; + int chip = 0, nid; + char mode[16], cmd[16]; + u32 cb_offset; + + imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root); + + /* + * Return here, either because 'imc' directory already exists, + * Or failed to create a new one. + */ + if (!imc_debugfs_parent) + return; + + if (of_property_read_u32(node, "cb_offset", &cb_offset)) + cb_offset = IMC_CNTL_BLK_OFFSET; + + for_each_node(nid) { + loc = (u64)(pmu_ptr->mem_info[chip].vbase) + cb_offset; + imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET); + sprintf(mode, "imc_mode_%d", nid); + if (!imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent, + imc_mode_addr)) + goto err; + + imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET); + sprintf(cmd, "imc_cmd_%d", nid); + if (!imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent, + imc_cmd_addr)) + goto err; + chip++; + } + return; + +err: + debugfs_remove_recursive(imc_debugfs_parent); +} /* * imc_get_mem_addr_nest: Function to get nest counter memory region @@ -65,6 +137,7 @@ static int imc_get_mem_addr_nest(struct device_node *node, } pmu_ptr->imc_counter_mmaped = true; + export_imc_mode_and_cmd(node, pmu_ptr); kfree(base_addr_arr); kfree(chipid_arr); return 0; @@ -213,6 +286,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev) } } + /* If none of the nest units are registered, remove debugfs interface */ + if (pmu_count == 0) + debugfs_remove_recursive(imc_debugfs_parent); + return 0; } diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c index 23fb6647dced..6fd4092798d5 100644 --- a/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -260,13 +260,13 @@ void __init opal_sys_param_init(void) /* If the parameter is read-only or read-write */ switch (perm[i] & 3) { case OPAL_SYSPARAM_READ: - attr[i].kobj_attr.attr.mode = S_IRUGO; + attr[i].kobj_attr.attr.mode = 0444; break; case OPAL_SYSPARAM_WRITE: - attr[i].kobj_attr.attr.mode = S_IWUSR; + attr[i].kobj_attr.attr.mode = 0200; break; case OPAL_SYSPARAM_RW: - attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUSR; + attr[i].kobj_attr.attr.mode = 0644; break; default: break; diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 6f4b00a2ac46..1b2936ba6040 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -320,3 +320,6 @@ OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP); OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO); OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO); OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR); +OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP); +OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE); +OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 041ddbd1fc57..c15182765ff5 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -127,7 +127,7 @@ int __init early_init_dt_scan_opal(unsigned long node, if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { powerpc_firmware_features |= FW_FEATURE_OPAL; - pr_info("OPAL detected !\n"); + pr_debug("OPAL detected !\n"); } else { panic("OPAL != V3 detected, no longer supported.\n"); } @@ -239,8 +239,8 @@ int opal_message_notifier_register(enum opal_msg_type msg_type, struct notifier_block *nb) { if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) { - pr_warning("%s: Invalid arguments, msg_type:%d\n", - __func__, msg_type); + pr_warn("%s: Invalid arguments, msg_type:%d\n", + __func__, msg_type); return -EINVAL; } @@ -281,8 +281,8 @@ static void opal_handle_message(void) /* check for errors. */ if (ret) { - pr_warning("%s: Failed to retrieve opal message, err=%lld\n", - __func__, ret); + pr_warn("%s: Failed to retrieve opal message, err=%lld\n", + __func__, ret); return; } @@ -461,24 +461,14 @@ static int opal_recover_mce(struct pt_regs *regs, void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) { - /* - * This is mostly taken from kernel/panic.c, but tries to do - * relatively minimal work. Don't use delay functions (TB may - * be broken), don't crash dump (need to set a firmware log), - * don't run notifiers. We do want to get some information to - * Linux console. - */ - console_verbose(); - bust_spinlocks(1); + panic_flush_kmsg_start(); + pr_emerg("Hardware platform error: %s\n", msg); if (regs) show_regs(regs); smp_send_stop(); - printk_safe_flush_on_panic(); - kmsg_dump(KMSG_DUMP_PANIC); - bust_spinlocks(0); - debug_locks_off(); - console_flush_on_panic(); + + panic_flush_kmsg_end(); /* * Don't bother to shut things down because this will diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 9582aeb1fe4c..496e47696ed0 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -54,7 +54,8 @@ #define POWERNV_IOMMU_DEFAULT_LEVELS 1 #define POWERNV_IOMMU_MAX_LEVELS 5 -static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU" }; +static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK", + "NPU_OCAPI" }; static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, @@ -89,6 +90,7 @@ void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, } static bool pnv_iommu_bypass_disabled __read_mostly; +static bool pci_reset_phbs __read_mostly; static int __init iommu_setup(char *str) { @@ -110,6 +112,14 @@ static int __init iommu_setup(char *str) } early_param("iommu", iommu_setup); +static int __init pci_reset_phbs_setup(char *str) +{ + pci_reset_phbs = true; + return 0; +} + +early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup); + static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) { /* @@ -924,7 +934,7 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) * Configure PELTV. NPUs don't have a PELTV table so skip * configuration on them. */ - if (phb->type != PNV_PHB_NPU) + if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI) pnv_ioda_set_peltv(phb, pe, true); /* Setup reverse map */ @@ -1059,8 +1069,8 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) pe = pnv_ioda_alloc_pe(phb); if (!pe) { - pr_warning("%s: Not enough PE# available, disabling device\n", - pci_name(dev)); + pr_warn("%s: Not enough PE# available, disabling device\n", + pci_name(dev)); return NULL; } @@ -1072,7 +1082,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) * At some point we want to remove the PDN completely anyways */ pci_dev_get(dev); - pdn->pcidev = dev; pdn->pe_number = pe->pe_number; pe->flags = PNV_IODA_PE_DEV; pe->pdev = dev; @@ -1119,7 +1128,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) continue; pe->device_count++; - pdn->pcidev = dev; pdn->pe_number = pe->pe_number; if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) pnv_ioda_setup_same_PE(dev->subordinate, pe); @@ -1164,7 +1172,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) pe = pnv_ioda_alloc_pe(phb); if (!pe) { - pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", + pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n", __func__, pci_domain_nr(bus), bus->number); return NULL; } @@ -1234,7 +1242,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) pci_dev_get(npu_pdev); npu_pdn = pci_get_pdn(npu_pdev); rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; - npu_pdn->pcidev = npu_pdev; npu_pdn->pe_number = pe_num; phb->ioda.pe_rmap[rid] = pe->pe_number; @@ -1272,16 +1279,23 @@ static void pnv_pci_ioda_setup_PEs(void) { struct pci_controller *hose, *tmp; struct pnv_phb *phb; + struct pci_bus *bus; + struct pci_dev *pdev; list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; - if (phb->type == PNV_PHB_NPU) { + if (phb->type == PNV_PHB_NPU_NVLINK) { /* PE#0 is needed for error reporting */ pnv_ioda_reserve_pe(phb, 0); pnv_ioda_setup_npu_PEs(hose->bus); if (phb->model == PNV_PHB_MODEL_NPU2) pnv_npu2_init(phb); } + if (phb->type == PNV_PHB_NPU_OCAPI) { + bus = hose->bus; + list_for_each_entry(pdev, &bus->devices, bus_list) + pnv_ioda_setup_dev_PE(pdev); + } } } @@ -1692,7 +1706,7 @@ m64_failed: return ret; } -int pcibios_sriov_disable(struct pci_dev *pdev) +int pnv_pcibios_sriov_disable(struct pci_dev *pdev) { pnv_pci_sriov_disable(pdev); @@ -1701,7 +1715,7 @@ int pcibios_sriov_disable(struct pci_dev *pdev) return 0; } -int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { /* Allocate PCI data */ add_dev_pci_data(pdev); @@ -2572,7 +2586,6 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, unsigned long direct_table_size; if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) || - (window_size > memory_hotplug_max()) || !is_power_of_2(window_size)) return 0; @@ -2640,7 +2653,7 @@ static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque) hose = pci_bus_to_host(pdev->bus); phb = hose->private_data; - if (phb->type != PNV_PHB_NPU) + if (phb->type != PNV_PHB_NPU_NVLINK) return 0; *ptmppe = &phb->ioda.pe_array[pdn->pe_number]; @@ -2724,7 +2737,7 @@ static void pnv_pci_ioda_setup_iommu_api(void) list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; - if (phb->type != PNV_PHB_NPU) + if (phb->type != PNV_PHB_NPU_NVLINK) continue; list_for_each_entry(pe, &phb->ioda.pe_list, list) { @@ -3293,7 +3306,7 @@ static void pnv_pci_ioda_create_dbgfs(void) sprintf(name, "PCI%04x", hose->global_number); phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); if (!phb->dbgfs) { - pr_warning("%s: Error on creating debugfs on PHB#%x\n", + pr_warn("%s: Error on creating debugfs on PHB#%x\n", __func__, hose->global_number); continue; } @@ -3774,6 +3787,13 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { .shutdown = pnv_pci_ioda_shutdown, }; +static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { + .enable_device_hook = pnv_pci_enable_device_hook, + .window_alignment = pnv_pci_window_alignment, + .reset_secondary_bus = pnv_pci_reset_secondary_bus, + .shutdown = pnv_pci_ioda_shutdown, +}; + #ifdef CONFIG_CXL_BASE const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = { .dma_dev_setup = pnv_pci_dma_dev_setup, @@ -4007,9 +4027,14 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, */ ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; - if (phb->type == PNV_PHB_NPU) { + switch (phb->type) { + case PNV_PHB_NPU_NVLINK: hose->controller_ops = pnv_npu_ioda_controller_ops; - } else { + break; + case PNV_PHB_NPU_OCAPI: + hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops; + break; + default: phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; hose->controller_ops = pnv_pci_ioda_controller_ops; } @@ -4019,6 +4044,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, #ifdef CONFIG_PCI_IOV ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources; ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment; + ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable; + ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable; #endif pci_add_flags(PCI_REASSIGN_ALL_RSRC); @@ -4026,15 +4053,16 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Reset IODA tables to a clean state */ rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); if (rc) - pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); + pr_warn(" OPAL Error %ld performing IODA table reset !\n", rc); /* * If we're running in kdump kernel, the previous kernel never * shutdown PCI devices correctly. We already got IODA table * cleaned out. So we have to issue PHB reset to stop all PCI - * transactions from previous kernel. + * transactions from previous kernel. The ppc_pci_reset_phbs + * kernel parameter will force this reset too. */ - if (is_kdump_kernel()) { + if (is_kdump_kernel() || pci_reset_phbs) { pr_info(" Issue PHB reset ...\n"); pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE); @@ -4052,8 +4080,26 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np) void __init pnv_pci_init_npu_phb(struct device_node *np) { - pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU); + pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_NVLINK); +} + +void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np) +{ + pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI); +} + +static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + + if (!machine_is(powernv)) + return; + + if (phb->type == PNV_PHB_NPU_OCAPI) + dev->cfg_size = PCI_CFG_SPACE_EXP_SIZE; } +DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pnv_npu2_opencapi_cfg_size_fixup); void __init pnv_pci_init_ioda_hub(struct device_node *np) { diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 5422f4a6317c..69d102cbf48f 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -1142,6 +1142,10 @@ void __init pnv_pci_init(void) for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb") pnv_pci_init_npu_phb(np); + /* Look for NPU2 OpenCAPI PHBs */ + for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb") + pnv_pci_init_npu2_opencapi_phb(np); + /* Configure IOMMU DMA hooks */ set_pci_dma_ops(&dma_iommu_ops); } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index b772d7473896..eada4b6068cb 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -12,9 +12,10 @@ struct pci_dn; #define NV_NMMU_ATSD_REGS 8 enum pnv_phb_type { - PNV_PHB_IODA1 = 0, - PNV_PHB_IODA2 = 1, - PNV_PHB_NPU = 2, + PNV_PHB_IODA1 = 0, + PNV_PHB_IODA2 = 1, + PNV_PHB_NPU_NVLINK = 2, + PNV_PHB_NPU_OCAPI = 3, }; /* Precise PHB model for error management */ @@ -227,6 +228,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_init_npu_phb(struct device_node *np); +extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np); extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index ba030669eca1..9664c8461f03 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -37,6 +37,8 @@ #include <asm/kvm_ppc.h> #include <asm/ppc-opcode.h> #include <asm/cpuidle.h> +#include <asm/kexec.h> +#include <asm/reg.h> #include "powernv.h" @@ -209,9 +211,32 @@ static void pnv_smp_cpu_kill_self(void) } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); + } else if ((srr1 & wmask) == SRR1_WAKERESET) { + irq_set_pending_from_srr1(srr1); + /* Does not return */ } + smp_mb(); + /* + * For kdump kernels, we process the ipi and jump to + * crash_ipi_callback + */ + if (kdump_in_progress()) { + /* + * If we got to this point, we've not used + * NMI's, otherwise we would have gone + * via the SRR1_WAKERESET path. We are + * using regular IPI's for waking up offline + * threads. + */ + struct pt_regs regs; + + ppc_save_regs(®s); + crash_ipi_callback(®s); + /* Does not return */ + } + if (cpu_core_split_required()) continue; @@ -371,5 +396,8 @@ void __init pnv_smp_init(void) #ifdef CONFIG_HOTPLUG_CPU ppc_md.cpu_die = pnv_smp_cpu_kill_self; +#ifdef CONFIG_KEXEC_CORE + crash_wake_offline = 1; +#endif #endif } |