summaryrefslogtreecommitdiffstats
path: root/arch/ia64/sn/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64/sn/kernel')
-rw-r--r--arch/ia64/sn/kernel/bte_error.c58
-rw-r--r--arch/ia64/sn/kernel/huberror.c9
-rw-r--r--arch/ia64/sn/kernel/io_init.c104
-rw-r--r--arch/ia64/sn/kernel/irq.c10
-rw-r--r--arch/ia64/sn/kernel/tiocx.c34
-rw-r--r--arch/ia64/sn/kernel/xpc.h1273
-rw-r--r--arch/ia64/sn/kernel/xpc_channel.c24
-rw-r--r--arch/ia64/sn/kernel/xpc_main.c189
-rw-r--r--arch/ia64/sn/kernel/xpc_partition.c10
9 files changed, 283 insertions, 1428 deletions
diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c
index fcbc748ae433..f1ec1370b3e3 100644
--- a/arch/ia64/sn/kernel/bte_error.c
+++ b/arch/ia64/sn/kernel/bte_error.c
@@ -33,7 +33,7 @@ void bte_error_handler(unsigned long);
* Wait until all BTE related CRBs are completed
* and then reset the interfaces.
*/
-void shub1_bte_error_handler(unsigned long _nodepda)
+int shub1_bte_error_handler(unsigned long _nodepda)
{
struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
@@ -53,7 +53,7 @@ void shub1_bte_error_handler(unsigned long _nodepda)
(err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) {
BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda,
smp_processor_id()));
- return;
+ return 1;
}
/* Determine information about our hub */
@@ -81,7 +81,7 @@ void shub1_bte_error_handler(unsigned long _nodepda)
mod_timer(recovery_timer, HZ * 5);
BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
smp_processor_id()));
- return;
+ return 1;
}
if (icmr.ii_icmr_fld_s.i_crb_vld != 0) {
@@ -99,7 +99,7 @@ void shub1_bte_error_handler(unsigned long _nodepda)
BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n",
err_nodepda, smp_processor_id(),
i));
- return;
+ return 1;
}
}
}
@@ -124,6 +124,42 @@ void shub1_bte_error_handler(unsigned long _nodepda)
REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval);
del_timer(recovery_timer);
+ return 0;
+}
+
+/*
+ * Wait until all BTE related CRBs are completed
+ * and then reset the interfaces.
+ */
+int shub2_bte_error_handler(unsigned long _nodepda)
+{
+ struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
+ struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
+ struct bteinfo_s *bte;
+ nasid_t nasid;
+ u64 status;
+ int i;
+
+ nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
+
+ /*
+ * Verify that all the BTEs are complete
+ */
+ for (i = 0; i < BTES_PER_NODE; i++) {
+ bte = &err_nodepda->bte_if[i];
+ status = BTE_LNSTAT_LOAD(bte);
+ if ((status & IBLS_ERROR) || !(status & IBLS_BUSY))
+ continue;
+ mod_timer(recovery_timer, HZ * 5);
+ BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
+ smp_processor_id()));
+ return 1;
+ }
+ if (ia64_sn_bte_recovery(nasid))
+ panic("bte_error_handler(): Fatal BTE Error");
+
+ del_timer(recovery_timer);
+ return 0;
}
/*
@@ -135,7 +171,6 @@ void bte_error_handler(unsigned long _nodepda)
struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock;
int i;
- nasid_t nasid;
unsigned long irq_flags;
volatile u64 *notify;
bte_result_t bh_error;
@@ -160,12 +195,15 @@ void bte_error_handler(unsigned long _nodepda)
}
if (is_shub1()) {
- shub1_bte_error_handler(_nodepda);
+ if (shub1_bte_error_handler(_nodepda)) {
+ spin_unlock_irqrestore(recovery_lock, irq_flags);
+ return;
+ }
} else {
- nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
-
- if (ia64_sn_bte_recovery(nasid))
- panic("bte_error_handler(): Fatal BTE Error");
+ if (shub2_bte_error_handler(_nodepda)) {
+ spin_unlock_irqrestore(recovery_lock, irq_flags);
+ return;
+ }
}
for (i = 0; i < BTES_PER_NODE; i++) {
diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c
index 5c5eb01c50f0..56ab6bae00ee 100644
--- a/arch/ia64/sn/kernel/huberror.c
+++ b/arch/ia64/sn/kernel/huberror.c
@@ -32,13 +32,14 @@ static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep)
ret_stuff.v0 = 0;
hubdev_info = (struct hubdev_info *)arg;
nasid = hubdev_info->hdi_nasid;
- SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT,
+
+ if (is_shub1()) {
+ SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT,
(u64) nasid, 0, 0, 0, 0, 0, 0);
- if ((int)ret_stuff.v0)
- panic("hubii_eint_handler(): Fatal TIO Error");
+ if ((int)ret_stuff.v0)
+ panic("hubii_eint_handler(): Fatal TIO Error");
- if (is_shub1()) {
if (!(nasid & 1)) /* Not a TIO, handle CRB errors */
(void)hubiio_crb_error_handler(hubdev_info);
} else
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 318087e35b66..233d55115d33 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -76,11 +76,12 @@ static struct sn_pcibus_provider sn_pci_default_provider = {
};
/*
- * Retrieve the DMA Flush List given nasid. This list is needed
- * to implement the WAR - Flush DMA data on PIO Reads.
+ * Retrieve the DMA Flush List given nasid, widget, and device.
+ * This list is needed to implement the WAR - Flush DMA data on PIO Reads.
*/
-static inline uint64_t
-sal_get_widget_dmaflush_list(u64 nasid, u64 widget_num, u64 address)
+static inline u64
+sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num,
+ u64 address)
{
struct ia64_sal_retval ret_stuff;
@@ -88,17 +89,17 @@ sal_get_widget_dmaflush_list(u64 nasid, u64 widget_num, u64 address)
ret_stuff.v0 = 0;
SAL_CALL_NOLOCK(ret_stuff,
- (u64) SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST,
- (u64) nasid, (u64) widget_num, (u64) address, 0, 0, 0,
- 0);
- return ret_stuff.v0;
+ (u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST,
+ (u64) nasid, (u64) widget_num,
+ (u64) device_num, (u64) address, 0, 0, 0);
+ return ret_stuff.status;
}
/*
* Retrieve the hub device info structure for the given nasid.
*/
-static inline uint64_t sal_get_hubdev_info(u64 handle, u64 address)
+static inline u64 sal_get_hubdev_info(u64 handle, u64 address)
{
struct ia64_sal_retval ret_stuff;
@@ -114,7 +115,7 @@ static inline uint64_t sal_get_hubdev_info(u64 handle, u64 address)
/*
* Retrieve the pci bus information given the bus number.
*/
-static inline uint64_t sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
+static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
{
struct ia64_sal_retval ret_stuff;
@@ -130,9 +131,9 @@ static inline uint64_t sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
/*
* Retrieve the pci device information given the bus and device|function number.
*/
-static inline uint64_t
-sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev,
- u64 sn_irq_info)
+static inline u64
+sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev,
+ u64 sn_irq_info)
{
struct ia64_sal_retval ret_stuff;
ret_stuff.status = 0;
@@ -140,7 +141,7 @@ sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev,
SAL_CALL_NOLOCK(ret_stuff,
(u64) SN_SAL_IOIF_GET_PCIDEV_INFO,
- (u64) segment, (u64) bus_number, (u64) devfn,
+ (u64) segment, (u64) bus_number, (u64) devfn,
(u64) pci_dev,
sn_irq_info, 0, 0);
return ret_stuff.v0;
@@ -170,12 +171,12 @@ sn_pcidev_info_get(struct pci_dev *dev)
*/
static void sn_fixup_ionodes(void)
{
-
- struct sn_flush_device_list *sn_flush_device_list;
+ struct sn_flush_device_kernel *sn_flush_device_kernel;
+ struct sn_flush_device_kernel *dev_entry;
struct hubdev_info *hubdev;
- uint64_t status;
- uint64_t nasid;
- int i, widget;
+ u64 status;
+ u64 nasid;
+ int i, widget, device;
/*
* Get SGI Specific HUB chipset information.
@@ -186,7 +187,7 @@ static void sn_fixup_ionodes(void)
nasid = cnodeid_to_nasid(i);
hubdev->max_segment_number = 0xffffffff;
hubdev->max_pcibus_number = 0xff;
- status = sal_get_hubdev_info(nasid, (uint64_t) __pa(hubdev));
+ status = sal_get_hubdev_info(nasid, (u64) __pa(hubdev));
if (status)
continue;
@@ -213,38 +214,49 @@ static void sn_fixup_ionodes(void)
hubdev->hdi_flush_nasid_list.widget_p =
kmalloc((HUB_WIDGET_ID_MAX + 1) *
- sizeof(struct sn_flush_device_list *), GFP_KERNEL);
-
+ sizeof(struct sn_flush_device_kernel *),
+ GFP_KERNEL);
memset(hubdev->hdi_flush_nasid_list.widget_p, 0x0,
(HUB_WIDGET_ID_MAX + 1) *
- sizeof(struct sn_flush_device_list *));
+ sizeof(struct sn_flush_device_kernel *));
for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) {
- sn_flush_device_list = kmalloc(DEV_PER_WIDGET *
- sizeof(struct
- sn_flush_device_list),
- GFP_KERNEL);
- memset(sn_flush_device_list, 0x0,
+ sn_flush_device_kernel = kmalloc(DEV_PER_WIDGET *
+ sizeof(struct
+ sn_flush_device_kernel),
+ GFP_KERNEL);
+ if (!sn_flush_device_kernel)
+ BUG();
+ memset(sn_flush_device_kernel, 0x0,
DEV_PER_WIDGET *
- sizeof(struct sn_flush_device_list));
-
- status =
- sal_get_widget_dmaflush_list(nasid, widget,
- (uint64_t)
- __pa
- (sn_flush_device_list));
- if (status) {
- kfree(sn_flush_device_list);
- continue;
+ sizeof(struct sn_flush_device_kernel));
+
+ dev_entry = sn_flush_device_kernel;
+ for (device = 0; device < DEV_PER_WIDGET;
+ device++,dev_entry++) {
+ dev_entry->common = kmalloc(sizeof(struct
+ sn_flush_device_common),
+ GFP_KERNEL);
+ if (!dev_entry->common)
+ BUG();
+ memset(dev_entry->common, 0x0, sizeof(struct
+ sn_flush_device_common));
+
+ status = sal_get_device_dmaflush_list(nasid,
+ widget,
+ device,
+ (u64)(dev_entry->common));
+ if (status)
+ BUG();
+
+ spin_lock_init(&dev_entry->sfdl_flush_lock);
}
- spin_lock_init(&sn_flush_device_list->sfdl_flush_lock);
- hubdev->hdi_flush_nasid_list.widget_p[widget] =
- sn_flush_device_list;
- }
-
+ if (sn_flush_device_kernel)
+ hubdev->hdi_flush_nasid_list.widget_p[widget] =
+ sn_flush_device_kernel;
+ }
}
-
}
/*
@@ -256,7 +268,7 @@ static void sn_fixup_ionodes(void)
*/
static void
sn_pci_window_fixup(struct pci_dev *dev, unsigned int count,
- int64_t * pci_addrs)
+ s64 * pci_addrs)
{
struct pci_controller *controller = PCI_CONTROLLER(dev->bus);
unsigned int i;
@@ -316,7 +328,7 @@ void sn_pci_fixup_slot(struct pci_dev *dev)
struct pci_bus *host_pci_bus;
struct pci_dev *host_pci_dev;
struct pcidev_info *pcidev_info;
- int64_t pci_addrs[PCI_ROM_RESOURCE + 1];
+ s64 pci_addrs[PCI_ROM_RESOURCE + 1];
struct sn_irq_info *sn_irq_info;
unsigned long size;
unsigned int bus_no, devfn;
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 01d18b7b5bb3..ec37084bdc17 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -28,7 +28,7 @@ extern int sn_ioif_inited;
static struct list_head **sn_irq_lh;
static spinlock_t sn_irq_info_lock = SPIN_LOCK_UNLOCKED; /* non-IRQ lock */
-static inline uint64_t sn_intr_alloc(nasid_t local_nasid, int local_widget,
+static inline u64 sn_intr_alloc(nasid_t local_nasid, int local_widget,
u64 sn_irq_info,
int req_irq, nasid_t req_nasid,
int req_slice)
@@ -123,7 +123,7 @@ static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
sn_irq_lh[irq], list) {
- uint64_t bridge;
+ u64 bridge;
int local_widget, status;
nasid_t local_nasid;
struct sn_irq_info *new_irq_info;
@@ -134,7 +134,7 @@ static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
break;
memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info));
- bridge = (uint64_t) new_irq_info->irq_bridge;
+ bridge = (u64) new_irq_info->irq_bridge;
if (!bridge) {
kfree(new_irq_info);
break; /* irq is not a device interrupt */
@@ -349,10 +349,10 @@ static void force_interrupt(int irq)
*/
static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info)
{
- uint64_t regval;
+ u64 regval;
int irr_reg_num;
int irr_bit;
- uint64_t irr_reg;
+ u64 irr_reg;
struct pcidev_info *pcidev_info;
struct pcibus_info *pcibus_info;
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
index 493fb3f38dc3..d263d3e8fbb9 100644
--- a/arch/ia64/sn/kernel/tiocx.c
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -77,12 +77,6 @@ static void tiocx_bus_release(struct device *dev)
kfree(to_cx_dev(dev));
}
-struct bus_type tiocx_bus_type = {
- .name = "tiocx",
- .match = tiocx_match,
- .uevent = tiocx_uevent,
-};
-
/**
* cx_device_match - Find cx_device in the id table.
* @ids: id table from driver
@@ -149,6 +143,14 @@ static int cx_driver_remove(struct device *dev)
return 0;
}
+struct bus_type tiocx_bus_type = {
+ .name = "tiocx",
+ .match = tiocx_match,
+ .uevent = tiocx_uevent,
+ .probe = cx_device_probe,
+ .remove = cx_driver_remove,
+};
+
/**
* cx_driver_register - Register the driver.
* @cx_driver: driver table (cx_drv struct) from driver
@@ -162,8 +164,6 @@ int cx_driver_register(struct cx_drv *cx_driver)
{
cx_driver->driver.name = cx_driver->name;
cx_driver->driver.bus = &tiocx_bus_type;
- cx_driver->driver.probe = cx_device_probe;
- cx_driver->driver.remove = cx_driver_remove;
return driver_register(&cx_driver->driver);
}
@@ -245,7 +245,7 @@ static int cx_device_reload(struct cx_dev *cx_dev)
cx_dev->bt);
}
-static inline uint64_t tiocx_intr_alloc(nasid_t nasid, int widget,
+static inline u64 tiocx_intr_alloc(nasid_t nasid, int widget,
u64 sn_irq_info,
int req_irq, nasid_t req_nasid,
int req_slice)
@@ -302,7 +302,7 @@ struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq,
void tiocx_irq_free(struct sn_irq_info *sn_irq_info)
{
- uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge;
+ u64 bridge = (u64) sn_irq_info->irq_bridge;
nasid_t nasid = NASID_GET(bridge);
int widget;
@@ -313,12 +313,12 @@ void tiocx_irq_free(struct sn_irq_info *sn_irq_info)
}
}
-uint64_t tiocx_dma_addr(uint64_t addr)
+u64 tiocx_dma_addr(u64 addr)
{
return PHYS_TO_TIODMA(addr);
}
-uint64_t tiocx_swin_base(int nasid)
+u64 tiocx_swin_base(int nasid)
{
return TIO_SWIN_BASE(nasid, TIOCX_CORELET);
}
@@ -335,8 +335,8 @@ EXPORT_SYMBOL(tiocx_swin_base);
static void tio_conveyor_set(nasid_t nasid, int enable_flag)
{
- uint64_t ice_frz;
- uint64_t disable_cb = (1ull << 61);
+ u64 ice_frz;
+ u64 disable_cb = (1ull << 61);
if (!(nasid & 1))
return;
@@ -388,7 +388,7 @@ static int is_fpga_tio(int nasid, int *bt)
static int bitstream_loaded(nasid_t nasid)
{
- uint64_t cx_credits;
+ u64 cx_credits;
cx_credits = REMOTE_HUB_L(nasid, TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3);
cx_credits &= TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3_CREDIT_CNT_MASK;
@@ -404,14 +404,14 @@ static int tiocx_reload(struct cx_dev *cx_dev)
nasid_t nasid = cx_dev->cx_id.nasid;
if (bitstream_loaded(nasid)) {
- uint64_t cx_id;
+ u64 cx_id;
int rv;
rv = ia64_sn_sysctl_tio_clock_reset(nasid);
if (rv) {
printk(KERN_ALERT "CX port JTAG reset failed.\n");
} else {
- cx_id = *(volatile uint64_t *)
+ cx_id = *(volatile u64 *)
(TIO_SWIN_BASE(nasid, TIOCX_CORELET) +
WIDGET_ID);
part_num = XWIDGET_PART_NUM(cx_id);
diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h
deleted file mode 100644
index 5483a9f227d4..000000000000
--- a/arch/ia64/sn/kernel/xpc.h
+++ /dev/null
@@ -1,1273 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
- */
-
-
-/*
- * Cross Partition Communication (XPC) structures and macros.
- */
-
-#ifndef _IA64_SN_KERNEL_XPC_H
-#define _IA64_SN_KERNEL_XPC_H
-
-
-#include <linux/config.h>
-#include <linux/interrupt.h>
-#include <linux/sysctl.h>
-#include <linux/device.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/sn/bte.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/mspec.h>
-#include <asm/sn/shub_mmr.h>
-#include <asm/sn/xp.h>
-
-
-/*
- * XPC Version numbers consist of a major and minor number. XPC can always
- * talk to versions with same major #, and never talk to versions with a
- * different major #.
- */
-#define _XPC_VERSION(_maj, _min) (((_maj) << 4) | ((_min) & 0xf))
-#define XPC_VERSION_MAJOR(_v) ((_v) >> 4)
-#define XPC_VERSION_MINOR(_v) ((_v) & 0xf)
-
-
-/*
- * The next macros define word or bit representations for given
- * C-brick nasid in either the SAL provided bit array representing
- * nasids in the partition/machine or the AMO_t array used for
- * inter-partition initiation communications.
- *
- * For SN2 machines, C-Bricks are alway even numbered NASIDs. As
- * such, some space will be saved by insisting that nasid information
- * passed from SAL always be packed for C-Bricks and the
- * cross-partition interrupts use the same packing scheme.
- */
-#define XPC_NASID_W_INDEX(_n) (((_n) / 64) / 2)
-#define XPC_NASID_B_INDEX(_n) (((_n) / 2) & (64 - 1))
-#define XPC_NASID_IN_ARRAY(_n, _p) ((_p)[XPC_NASID_W_INDEX(_n)] & \
- (1UL << XPC_NASID_B_INDEX(_n)))
-#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
-
-#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */
-#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */
-
-/* define the process name of HB checker and the CPU it is pinned to */
-#define XPC_HB_CHECK_THREAD_NAME "xpc_hb"
-#define XPC_HB_CHECK_CPU 0
-
-/* define the process name of the discovery thread */
-#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery"
-
-
-/*
- * the reserved page
- *
- * SAL reserves one page of memory per partition for XPC. Though a full page
- * in length (16384 bytes), its starting address is not page aligned, but it
- * is cacheline aligned. The reserved page consists of the following:
- *
- * reserved page header
- *
- * The first cacheline of the reserved page contains the header
- * (struct xpc_rsvd_page). Before SAL initialization has completed,
- * SAL has set up the following fields of the reserved page header:
- * SAL_signature, SAL_version, partid, and nasids_size. The other
- * fields are set up by XPC. (xpc_rsvd_page points to the local
- * partition's reserved page.)
- *
- * part_nasids mask
- * mach_nasids mask
- *
- * SAL also sets up two bitmaps (or masks), one that reflects the actual
- * nasids in this partition (part_nasids), and the other that reflects
- * the actual nasids in the entire machine (mach_nasids). We're only
- * interested in the even numbered nasids (which contain the processors
- * and/or memory), so we only need half as many bits to represent the
- * nasids. The part_nasids mask is located starting at the first cacheline
- * following the reserved page header. The mach_nasids mask follows right
- * after the part_nasids mask. The size in bytes of each mask is reflected
- * by the reserved page header field 'nasids_size'. (Local partition's
- * mask pointers are xpc_part_nasids and xpc_mach_nasids.)
- *
- * vars
- * vars part
- *
- * Immediately following the mach_nasids mask are the XPC variables
- * required by other partitions. First are those that are generic to all
- * partitions (vars), followed on the next available cacheline by those
- * which are partition specific (vars part). These are setup by XPC.
- * (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
- *
- * Note: Until vars_pa is set, the partition XPC code has not been initialized.
- */
-struct xpc_rsvd_page {
- u64 SAL_signature; /* SAL: unique signature */
- u64 SAL_version; /* SAL: version */
- u8 partid; /* SAL: partition ID */
- u8 version;
- u8 pad1[6]; /* align to next u64 in cacheline */
- volatile u64 vars_pa;
- struct timespec stamp; /* time when reserved page was setup by XPC */
- u64 pad2[9]; /* align to last u64 in cacheline */
- u64 nasids_size; /* SAL: size of each nasid mask in bytes */
-};
-
-#define XPC_RP_VERSION _XPC_VERSION(1,1) /* version 1.1 of the reserved page */
-
-#define XPC_SUPPORTS_RP_STAMP(_version) \
- (_version >= _XPC_VERSION(1,1))
-
-/*
- * compare stamps - the return value is:
- *
- * < 0, if stamp1 < stamp2
- * = 0, if stamp1 == stamp2
- * > 0, if stamp1 > stamp2
- */
-static inline int
-xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
-{
- int ret;
-
-
- if ((ret = stamp1->tv_sec - stamp2->tv_sec) == 0) {
- ret = stamp1->tv_nsec - stamp2->tv_nsec;
- }
- return ret;
-}
-
-
-/*
- * Define the structures by which XPC variables can be exported to other
- * partitions. (There are two: struct xpc_vars and struct xpc_vars_part)
- */
-
-/*
- * The following structure describes the partition generic variables
- * needed by other partitions in order to properly initialize.
- *
- * struct xpc_vars version number also applies to struct xpc_vars_part.
- * Changes to either structure and/or related functionality should be
- * reflected by incrementing either the major or minor version numbers
- * of struct xpc_vars.
- */
-struct xpc_vars {
- u8 version;
- u64 heartbeat;
- u64 heartbeating_to_mask;
- u64 heartbeat_offline; /* if 0, heartbeat should be changing */
- int act_nasid;
- int act_phys_cpuid;
- u64 vars_part_pa;
- u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */
- AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */
-};
-
-#define XPC_V_VERSION _XPC_VERSION(3,1) /* version 3.1 of the cross vars */
-
-#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \
- (_version >= _XPC_VERSION(3,1))
-
-
-static inline int
-xpc_hb_allowed(partid_t partid, struct xpc_vars *vars)
-{
- return ((vars->heartbeating_to_mask & (1UL << partid)) != 0);
-}
-
-static inline void
-xpc_allow_hb(partid_t partid, struct xpc_vars *vars)
-{
- u64 old_mask, new_mask;
-
- do {
- old_mask = vars->heartbeating_to_mask;
- new_mask = (old_mask | (1UL << partid));
- } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
- old_mask);
-}
-
-static inline void
-xpc_disallow_hb(partid_t partid, struct xpc_vars *vars)
-{
- u64 old_mask, new_mask;
-
- do {
- old_mask = vars->heartbeating_to_mask;
- new_mask = (old_mask & ~(1UL << partid));
- } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
- old_mask);
-}
-
-
-/*
- * The AMOs page consists of a number of AMO variables which are divided into
- * four groups, The first two groups are used to identify an IRQ's sender.
- * These two groups consist of 64 and 128 AMO variables respectively. The last
- * two groups, consisting of just one AMO variable each, are used to identify
- * the remote partitions that are currently engaged (from the viewpoint of
- * the XPC running on the remote partition).
- */
-#define XPC_NOTIFY_IRQ_AMOS 0
-#define XPC_ACTIVATE_IRQ_AMOS (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS)
-#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
-#define XPC_DISENGAGE_REQUEST_AMO (XPC_ENGAGED_PARTITIONS_AMO + 1)
-
-
-/*
- * The following structure describes the per partition specific variables.
- *
- * An array of these structures, one per partition, will be defined. As a
- * partition becomes active XPC will copy the array entry corresponding to
- * itself from that partition. It is desirable that the size of this
- * structure evenly divide into a cacheline, such that none of the entries
- * in this array crosses a cacheline boundary. As it is now, each entry
- * occupies half a cacheline.
- */
-struct xpc_vars_part {
- volatile u64 magic;
-
- u64 openclose_args_pa; /* physical address of open and close args */
- u64 GPs_pa; /* physical address of Get/Put values */
-
- u64 IPI_amo_pa; /* physical address of IPI AMO_t structure */
- int IPI_nasid; /* nasid of where to send IPIs */
- int IPI_phys_cpuid; /* physical CPU ID of where to send IPIs */
-
- u8 nchannels; /* #of defined channels supported */
-
- u8 reserved[23]; /* pad to a full 64 bytes */
-};
-
-/*
- * The vars_part MAGIC numbers play a part in the first contact protocol.
- *
- * MAGIC1 indicates that the per partition specific variables for a remote
- * partition have been initialized by this partition.
- *
- * MAGIC2 indicates that this partition has pulled the remote partititions
- * per partition variables that pertain to this partition.
- */
-#define XPC_VP_MAGIC1 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */
-#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
-
-
-/* the reserved page sizes and offsets */
-
-#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
-#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars))
-
-#define XPC_RP_PART_NASIDS(_rp) (u64 *) ((u8 *) _rp + XPC_RP_HEADER_SIZE)
-#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words)
-#define XPC_RP_VARS(_rp) ((struct xpc_vars *) XPC_RP_MACH_NASIDS(_rp) + xp_nasid_mask_words)
-#define XPC_RP_VARS_PART(_rp) (struct xpc_vars_part *) ((u8 *) XPC_RP_VARS(rp) + XPC_RP_VARS_SIZE)
-
-
-/*
- * Functions registered by add_timer() or called by kernel_thread() only
- * allow for a single 64-bit argument. The following macros can be used to
- * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from
- * the passed argument.
- */
-#define XPC_PACK_ARGS(_arg1, _arg2) \
- ((((u64) _arg1) & 0xffffffff) | \
- ((((u64) _arg2) & 0xffffffff) << 32))
-
-#define XPC_UNPACK_ARG1(_args) (((u64) _args) & 0xffffffff)
-#define XPC_UNPACK_ARG2(_args) ((((u64) _args) >> 32) & 0xffffffff)
-
-
-
-/*
- * Define a Get/Put value pair (pointers) used with a message queue.
- */
-struct xpc_gp {
- volatile s64 get; /* Get value */
- volatile s64 put; /* Put value */
-};
-
-#define XPC_GP_SIZE \
- L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_NCHANNELS)
-
-
-
-/*
- * Define a structure that contains arguments associated with opening and
- * closing a channel.
- */
-struct xpc_openclose_args {
- u16 reason; /* reason why channel is closing */
- u16 msg_size; /* sizeof each message entry */
- u16 remote_nentries; /* #of message entries in remote msg queue */
- u16 local_nentries; /* #of message entries in local msg queue */
- u64 local_msgqueue_pa; /* physical address of local message queue */
-};
-
-#define XPC_OPENCLOSE_ARGS_SIZE \
- L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * XPC_NCHANNELS)
-
-
-
-/* struct xpc_msg flags */
-
-#define XPC_M_DONE 0x01 /* msg has been received/consumed */
-#define XPC_M_READY 0x02 /* msg is ready to be sent */
-#define XPC_M_INTERRUPT 0x04 /* send interrupt when msg consumed */
-
-
-#define XPC_MSG_ADDRESS(_payload) \
- ((struct xpc_msg *)((u8 *)(_payload) - XPC_MSG_PAYLOAD_OFFSET))
-
-
-
-/*
- * Defines notify entry.
- *
- * This is used to notify a message's sender that their message was received
- * and consumed by the intended recipient.
- */
-struct xpc_notify {
- struct semaphore sema; /* notify semaphore */
- volatile u8 type; /* type of notification */
-
- /* the following two fields are only used if type == XPC_N_CALL */
- xpc_notify_func func; /* user's notify function */
- void *key; /* pointer to user's key */
-};
-
-/* struct xpc_notify type of notification */
-
-#define XPC_N_CALL 0x01 /* notify function provided by user */
-
-
-
-/*
- * Define the structure that manages all the stuff required by a channel. In
- * particular, they are used to manage the messages sent across the channel.
- *
- * This structure is private to a partition, and is NOT shared across the
- * partition boundary.
- *
- * There is an array of these structures for each remote partition. It is
- * allocated at the time a partition becomes active. The array contains one
- * of these structures for each potential channel connection to that partition.
- *
- * Each of these structures manages two message queues (circular buffers).
- * They are allocated at the time a channel connection is made. One of
- * these message queues (local_msgqueue) holds the locally created messages
- * that are destined for the remote partition. The other of these message
- * queues (remote_msgqueue) is a locally cached copy of the remote partition's
- * own local_msgqueue.
- *
- * The following is a description of the Get/Put pointers used to manage these
- * two message queues. Consider the local_msgqueue to be on one partition
- * and the remote_msgqueue to be its cached copy on another partition. A
- * description of what each of the lettered areas contains is included.
- *
- *
- * local_msgqueue remote_msgqueue
- *
- * |/////////| |/////////|
- * w_remote_GP.get --> +---------+ |/////////|
- * | F | |/////////|
- * remote_GP.get --> +---------+ +---------+ <-- local_GP->get
- * | | | |
- * | | | E |
- * | | | |
- * | | +---------+ <-- w_local_GP.get
- * | B | |/////////|
- * | | |////D////|
- * | | |/////////|
- * | | +---------+ <-- w_remote_GP.put
- * | | |////C////|
- * local_GP->put --> +---------+ +---------+ <-- remote_GP.put
- * | | |/////////|
- * | A | |/////////|
- * | | |/////////|
- * w_local_GP.put --> +---------+ |/////////|
- * |/////////| |/////////|
- *
- *
- * ( remote_GP.[get|put] are cached copies of the remote
- * partition's local_GP->[get|put], and thus their values can
- * lag behind their counterparts on the remote partition. )
- *
- *
- * A - Messages that have been allocated, but have not yet been sent to the
- * remote partition.
- *
- * B - Messages that have been sent, but have not yet been acknowledged by the
- * remote partition as having been received.
- *
- * C - Area that needs to be prepared for the copying of sent messages, by
- * the clearing of the message flags of any previously received messages.
- *
- * D - Area into which sent messages are to be copied from the remote
- * partition's local_msgqueue and then delivered to their intended
- * recipients. [ To allow for a multi-message copy, another pointer
- * (next_msg_to_pull) has been added to keep track of the next message
- * number needing to be copied (pulled). It chases after w_remote_GP.put.
- * Any messages lying between w_local_GP.get and next_msg_to_pull have
- * been copied and are ready to be delivered. ]
- *
- * E - Messages that have been copied and delivered, but have not yet been
- * acknowledged by the recipient as having been received.
- *
- * F - Messages that have been acknowledged, but XPC has not yet notified the
- * sender that the message was received by its intended recipient.
- * This is also an area that needs to be prepared for the allocating of
- * new messages, by the clearing of the message flags of the acknowledged
- * messages.
- */
-struct xpc_channel {
- partid_t partid; /* ID of remote partition connected */
- spinlock_t lock; /* lock for updating this structure */
- u32 flags; /* general flags */
-
- enum xpc_retval reason; /* reason why channel is disconnect'g */
- int reason_line; /* line# disconnect initiated from */
-
- u16 number; /* channel # */
-
- u16 msg_size; /* sizeof each msg entry */
- u16 local_nentries; /* #of msg entries in local msg queue */
- u16 remote_nentries; /* #of msg entries in remote msg queue*/
-
- void *local_msgqueue_base; /* base address of kmalloc'd space */
- struct xpc_msg *local_msgqueue; /* local message queue */
- void *remote_msgqueue_base; /* base address of kmalloc'd space */
- struct xpc_msg *remote_msgqueue;/* cached copy of remote partition's */
- /* local message queue */
- u64 remote_msgqueue_pa; /* phys addr of remote partition's */
- /* local message queue */
-
- atomic_t references; /* #of external references to queues */
-
- atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */
- wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */
-
- u8 delayed_IPI_flags; /* IPI flags received, but delayed */
- /* action until channel disconnected */
-
- /* queue of msg senders who want to be notified when msg received */
-
- atomic_t n_to_notify; /* #of msg senders to notify */
- struct xpc_notify *notify_queue;/* notify queue for messages sent */
-
- xpc_channel_func func; /* user's channel function */
- void *key; /* pointer to user's key */
-
- struct semaphore msg_to_pull_sema; /* next msg to pull serialization */
- struct semaphore wdisconnect_sema; /* wait for channel disconnect */
-
- struct xpc_openclose_args *local_openclose_args; /* args passed on */
- /* opening or closing of channel */
-
- /* various flavors of local and remote Get/Put values */
-
- struct xpc_gp *local_GP; /* local Get/Put values */
- struct xpc_gp remote_GP; /* remote Get/Put values */
- struct xpc_gp w_local_GP; /* working local Get/Put values */
- struct xpc_gp w_remote_GP; /* working remote Get/Put values */
- s64 next_msg_to_pull; /* Put value of next msg to pull */
-
- /* kthread management related fields */
-
-// >>> rethink having kthreads_assigned_limit and kthreads_idle_limit; perhaps
-// >>> allow the assigned limit be unbounded and let the idle limit be dynamic
-// >>> dependent on activity over the last interval of time
- atomic_t kthreads_assigned; /* #of kthreads assigned to channel */
- u32 kthreads_assigned_limit; /* limit on #of kthreads assigned */
- atomic_t kthreads_idle; /* #of kthreads idle waiting for work */
- u32 kthreads_idle_limit; /* limit on #of kthreads idle */
- atomic_t kthreads_active; /* #of kthreads actively working */
- // >>> following field is temporary
- u32 kthreads_created; /* total #of kthreads created */
-
- wait_queue_head_t idle_wq; /* idle kthread wait queue */
-
-} ____cacheline_aligned;
-
-
-/* struct xpc_channel flags */
-
-#define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */
-
-#define XPC_C_ROPENREPLY 0x00000002 /* remote open channel reply */
-#define XPC_C_OPENREPLY 0x00000004 /* local open channel reply */
-#define XPC_C_ROPENREQUEST 0x00000008 /* remote open channel request */
-#define XPC_C_OPENREQUEST 0x00000010 /* local open channel request */
-
-#define XPC_C_SETUP 0x00000020 /* channel's msgqueues are alloc'd */
-#define XPC_C_CONNECTCALLOUT 0x00000040 /* channel connected callout made */
-#define XPC_C_CONNECTED 0x00000080 /* local channel is connected */
-#define XPC_C_CONNECTING 0x00000100 /* channel is being connected */
-
-#define XPC_C_RCLOSEREPLY 0x00000200 /* remote close channel reply */
-#define XPC_C_CLOSEREPLY 0x00000400 /* local close channel reply */
-#define XPC_C_RCLOSEREQUEST 0x00000800 /* remote close channel request */
-#define XPC_C_CLOSEREQUEST 0x00001000 /* local close channel request */
-
-#define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */
-#define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */
-#define XPC_C_DISCONNECTCALLOUT 0x00008000 /* chan disconnected callout made */
-#define XPC_C_WDISCONNECT 0x00010000 /* waiting for channel disconnect */
-
-
-
-/*
- * Manages channels on a partition basis. There is one of these structures
- * for each partition (a partition will never utilize the structure that
- * represents itself).
- */
-struct xpc_partition {
-
- /* XPC HB infrastructure */
-
- u8 remote_rp_version; /* version# of partition's rsvd pg */
- struct timespec remote_rp_stamp;/* time when rsvd pg was initialized */
- u64 remote_rp_pa; /* phys addr of partition's rsvd pg */
- u64 remote_vars_pa; /* phys addr of partition's vars */
- u64 remote_vars_part_pa; /* phys addr of partition's vars part */
- u64 last_heartbeat; /* HB at last read */
- u64 remote_amos_page_pa; /* phys addr of partition's amos page */
- int remote_act_nasid; /* active part's act/deact nasid */
- int remote_act_phys_cpuid; /* active part's act/deact phys cpuid */
- u32 act_IRQ_rcvd; /* IRQs since activation */
- spinlock_t act_lock; /* protect updating of act_state */
- u8 act_state; /* from XPC HB viewpoint */
- u8 remote_vars_version; /* version# of partition's vars */
- enum xpc_retval reason; /* reason partition is deactivating */
- int reason_line; /* line# deactivation initiated from */
- int reactivate_nasid; /* nasid in partition to reactivate */
-
- unsigned long disengage_request_timeout; /* timeout in jiffies */
- struct timer_list disengage_request_timer;
-
-
- /* XPC infrastructure referencing and teardown control */
-
- volatile u8 setup_state; /* infrastructure setup state */
- wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */
- atomic_t references; /* #of references to infrastructure */
-
-
- /*
- * NONE OF THE PRECEDING FIELDS OF THIS STRUCTURE WILL BE CLEARED WHEN
- * XPC SETS UP THE NECESSARY INFRASTRUCTURE TO SUPPORT CROSS PARTITION
- * COMMUNICATION. ALL OF THE FOLLOWING FIELDS WILL BE CLEARED. (THE
- * 'nchannels' FIELD MUST BE THE FIRST OF THE FIELDS TO BE CLEARED.)
- */
-
-
- u8 nchannels; /* #of defined channels supported */
- atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */
- atomic_t nchannels_engaged;/* #of channels engaged with remote part */
- struct xpc_channel *channels;/* array of channel structures */
-
- void *local_GPs_base; /* base address of kmalloc'd space */
- struct xpc_gp *local_GPs; /* local Get/Put values */
- void *remote_GPs_base; /* base address of kmalloc'd space */
- struct xpc_gp *remote_GPs;/* copy of remote partition's local Get/Put */
- /* values */
- u64 remote_GPs_pa; /* phys address of remote partition's local */
- /* Get/Put values */
-
-
- /* fields used to pass args when opening or closing a channel */
-
- void *local_openclose_args_base; /* base address of kmalloc'd space */
- struct xpc_openclose_args *local_openclose_args; /* local's args */
- void *remote_openclose_args_base; /* base address of kmalloc'd space */
- struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
- /* args */
- u64 remote_openclose_args_pa; /* phys addr of remote's args */
-
-
- /* IPI sending, receiving and handling related fields */
-
- int remote_IPI_nasid; /* nasid of where to send IPIs */
- int remote_IPI_phys_cpuid; /* phys CPU ID of where to send IPIs */
- AMO_t *remote_IPI_amo_va; /* address of remote IPI AMO_t structure */
-
- AMO_t *local_IPI_amo_va; /* address of IPI AMO_t structure */
- u64 local_IPI_amo; /* IPI amo flags yet to be handled */
- char IPI_owner[8]; /* IPI owner's name */
- struct timer_list dropped_IPI_timer; /* dropped IPI timer */
-
- spinlock_t IPI_lock; /* IPI handler lock */
-
-
- /* channel manager related fields */
-
- atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */
- wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */
-
-} ____cacheline_aligned;
-
-
-/* struct xpc_partition act_state values (for XPC HB) */
-
-#define XPC_P_INACTIVE 0x00 /* partition is not active */
-#define XPC_P_ACTIVATION_REQ 0x01 /* created thread to activate */
-#define XPC_P_ACTIVATING 0x02 /* activation thread started */
-#define XPC_P_ACTIVE 0x03 /* xpc_partition_up() was called */
-#define XPC_P_DEACTIVATING 0x04 /* partition deactivation initiated */
-
-
-#define XPC_DEACTIVATE_PARTITION(_p, _reason) \
- xpc_deactivate_partition(__LINE__, (_p), (_reason))
-
-
-/* struct xpc_partition setup_state values */
-
-#define XPC_P_UNSET 0x00 /* infrastructure was never setup */
-#define XPC_P_SETUP 0x01 /* infrastructure is setup */
-#define XPC_P_WTEARDOWN 0x02 /* waiting to teardown infrastructure */
-#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */
-
-
-
-/*
- * struct xpc_partition IPI_timer #of seconds to wait before checking for
- * dropped IPIs. These occur whenever an IPI amo write doesn't complete until
- * after the IPI was received.
- */
-#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ)
-
-
-/* number of seconds to wait for other partitions to disengage */
-#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT 90
-
-/* interval in seconds to print 'waiting disengagement' messages */
-#define XPC_DISENGAGE_PRINTMSG_INTERVAL 10
-
-
-#define XPC_PARTID(_p) ((partid_t) ((_p) - &xpc_partitions[0]))
-
-
-
-/* found in xp_main.c */
-extern struct xpc_registration xpc_registrations[];
-
-
-/* found in xpc_main.c */
-extern struct device *xpc_part;
-extern struct device *xpc_chan;
-extern int xpc_disengage_request_timelimit;
-extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *);
-extern void xpc_dropped_IPI_check(struct xpc_partition *);
-extern void xpc_activate_partition(struct xpc_partition *);
-extern void xpc_activate_kthreads(struct xpc_channel *, int);
-extern void xpc_create_kthreads(struct xpc_channel *, int);
-extern void xpc_disconnect_wait(int);
-
-
-/* found in xpc_partition.c */
-extern int xpc_exiting;
-extern struct xpc_vars *xpc_vars;
-extern struct xpc_rsvd_page *xpc_rsvd_page;
-extern struct xpc_vars_part *xpc_vars_part;
-extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
-extern char xpc_remote_copy_buffer[];
-extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
-extern void xpc_allow_IPI_ops(void);
-extern void xpc_restrict_IPI_ops(void);
-extern int xpc_identify_act_IRQ_sender(void);
-extern int xpc_partition_disengaged(struct xpc_partition *);
-extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *);
-extern void xpc_mark_partition_inactive(struct xpc_partition *);
-extern void xpc_discovery(void);
-extern void xpc_check_remote_hb(void);
-extern void xpc_deactivate_partition(const int, struct xpc_partition *,
- enum xpc_retval);
-extern enum xpc_retval xpc_initiate_partid_to_nasids(partid_t, void *);
-
-
-/* found in xpc_channel.c */
-extern void xpc_initiate_connect(int);
-extern void xpc_initiate_disconnect(int);
-extern enum xpc_retval xpc_initiate_allocate(partid_t, int, u32, void **);
-extern enum xpc_retval xpc_initiate_send(partid_t, int, void *);
-extern enum xpc_retval xpc_initiate_send_notify(partid_t, int, void *,
- xpc_notify_func, void *);
-extern void xpc_initiate_received(partid_t, int, void *);
-extern enum xpc_retval xpc_setup_infrastructure(struct xpc_partition *);
-extern enum xpc_retval xpc_pull_remote_vars_part(struct xpc_partition *);
-extern void xpc_process_channel_activity(struct xpc_partition *);
-extern void xpc_connected_callout(struct xpc_channel *);
-extern void xpc_deliver_msg(struct xpc_channel *);
-extern void xpc_disconnect_channel(const int, struct xpc_channel *,
- enum xpc_retval, unsigned long *);
-extern void xpc_disconnecting_callout(struct xpc_channel *);
-extern void xpc_partition_going_down(struct xpc_partition *, enum xpc_retval);
-extern void xpc_teardown_infrastructure(struct xpc_partition *);
-
-
-
-static inline void
-xpc_wakeup_channel_mgr(struct xpc_partition *part)
-{
- if (atomic_inc_return(&part->channel_mgr_requests) == 1) {
- wake_up(&part->channel_mgr_wq);
- }
-}
-
-
-
-/*
- * These next two inlines are used to keep us from tearing down a channel's
- * msg queues while a thread may be referencing them.
- */
-static inline void
-xpc_msgqueue_ref(struct xpc_channel *ch)
-{
- atomic_inc(&ch->references);
-}
-
-static inline void
-xpc_msgqueue_deref(struct xpc_channel *ch)
-{
- s32 refs = atomic_dec_return(&ch->references);
-
- DBUG_ON(refs < 0);
- if (refs == 0) {
- xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]);
- }
-}
-
-
-
-#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \
- xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs)
-
-
-/*
- * These two inlines are used to keep us from tearing down a partition's
- * setup infrastructure while a thread may be referencing it.
- */
-static inline void
-xpc_part_deref(struct xpc_partition *part)
-{
- s32 refs = atomic_dec_return(&part->references);
-
-
- DBUG_ON(refs < 0);
- if (refs == 0 && part->setup_state == XPC_P_WTEARDOWN) {
- wake_up(&part->teardown_wq);
- }
-}
-
-static inline int
-xpc_part_ref(struct xpc_partition *part)
-{
- int setup;
-
-
- atomic_inc(&part->references);
- setup = (part->setup_state == XPC_P_SETUP);
- if (!setup) {
- xpc_part_deref(part);
- }
- return setup;
-}
-
-
-
-/*
- * The following macro is to be used for the setting of the reason and
- * reason_line fields in both the struct xpc_channel and struct xpc_partition
- * structures.
- */
-#define XPC_SET_REASON(_p, _reason, _line) \
- { \
- (_p)->reason = _reason; \
- (_p)->reason_line = _line; \
- }
-
-
-
-/*
- * This next set of inlines are used to keep track of when a partition is
- * potentially engaged in accessing memory belonging to another partition.
- */
-
-static inline void
-xpc_mark_partition_engaged(struct xpc_partition *part)
-{
- unsigned long irq_flags;
- AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
- (XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
-
-
- local_irq_save(irq_flags);
-
- /* set bit corresponding to our partid in remote partition's AMO */
- FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
- (1UL << sn_partition_id));
- /*
- * We must always use the nofault function regardless of whether we
- * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
- * didn't, we'd never know that the other partition is down and would
- * keep sending IPIs and AMOs to it until the heartbeat times out.
- */
- (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
- variable), xp_nofault_PIOR_target));
-
- local_irq_restore(irq_flags);
-}
-
-static inline void
-xpc_mark_partition_disengaged(struct xpc_partition *part)
-{
- unsigned long irq_flags;
- AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
- (XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
-
-
- local_irq_save(irq_flags);
-
- /* clear bit corresponding to our partid in remote partition's AMO */
- FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
- ~(1UL << sn_partition_id));
- /*
- * We must always use the nofault function regardless of whether we
- * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
- * didn't, we'd never know that the other partition is down and would
- * keep sending IPIs and AMOs to it until the heartbeat times out.
- */
- (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
- variable), xp_nofault_PIOR_target));
-
- local_irq_restore(irq_flags);
-}
-
-static inline void
-xpc_request_partition_disengage(struct xpc_partition *part)
-{
- unsigned long irq_flags;
- AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
- (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
-
-
- local_irq_save(irq_flags);
-
- /* set bit corresponding to our partid in remote partition's AMO */
- FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
- (1UL << sn_partition_id));
- /*
- * We must always use the nofault function regardless of whether we
- * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
- * didn't, we'd never know that the other partition is down and would
- * keep sending IPIs and AMOs to it until the heartbeat times out.
- */
- (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
- variable), xp_nofault_PIOR_target));
-
- local_irq_restore(irq_flags);
-}
-
-static inline void
-xpc_cancel_partition_disengage_request(struct xpc_partition *part)
-{
- unsigned long irq_flags;
- AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
- (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
-
-
- local_irq_save(irq_flags);
-
- /* clear bit corresponding to our partid in remote partition's AMO */
- FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
- ~(1UL << sn_partition_id));
- /*
- * We must always use the nofault function regardless of whether we
- * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
- * didn't, we'd never know that the other partition is down and would
- * keep sending IPIs and AMOs to it until the heartbeat times out.
- */
- (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
- variable), xp_nofault_PIOR_target));
-
- local_irq_restore(irq_flags);
-}
-
-static inline u64
-xpc_partition_engaged(u64 partid_mask)
-{
- AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
-
-
- /* return our partition's AMO variable ANDed with partid_mask */
- return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
- partid_mask);
-}
-
-static inline u64
-xpc_partition_disengage_requested(u64 partid_mask)
-{
- AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
-
-
- /* return our partition's AMO variable ANDed with partid_mask */
- return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
- partid_mask);
-}
-
-static inline void
-xpc_clear_partition_engaged(u64 partid_mask)
-{
- AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
-
-
- /* clear bit(s) based on partid_mask in our partition's AMO */
- FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
- ~partid_mask);
-}
-
-static inline void
-xpc_clear_partition_disengage_request(u64 partid_mask)
-{
- AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
-
-
- /* clear bit(s) based on partid_mask in our partition's AMO */
- FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
- ~partid_mask);
-}
-
-
-
-/*
- * The following set of macros and inlines are used for the sending and
- * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
- * one that is associated with partition activity (SGI_XPC_ACTIVATE) and
- * the other that is associated with channel activity (SGI_XPC_NOTIFY).
- */
-
-static inline u64
-xpc_IPI_receive(AMO_t *amo)
-{
- return FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_CLEAR);
-}
-
-
-static inline enum xpc_retval
-xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
-{
- int ret = 0;
- unsigned long irq_flags;
-
-
- local_irq_save(irq_flags);
-
- FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR, flag);
- sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
-
- /*
- * We must always use the nofault function regardless of whether we
- * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
- * didn't, we'd never know that the other partition is down and would
- * keep sending IPIs and AMOs to it until the heartbeat times out.
- */
- ret = xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
- xp_nofault_PIOR_target));
-
- local_irq_restore(irq_flags);
-
- return ((ret == 0) ? xpcSuccess : xpcPioReadError);
-}
-
-
-/*
- * IPIs associated with SGI_XPC_ACTIVATE IRQ.
- */
-
-/*
- * Flag the appropriate AMO variable and send an IPI to the specified node.
- */
-static inline void
-xpc_activate_IRQ_send(u64 amos_page_pa, int from_nasid, int to_nasid,
- int to_phys_cpuid)
-{
- int w_index = XPC_NASID_W_INDEX(from_nasid);
- int b_index = XPC_NASID_B_INDEX(from_nasid);
- AMO_t *amos = (AMO_t *) __va(amos_page_pa +
- (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
-
-
- (void) xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid,
- to_phys_cpuid, SGI_XPC_ACTIVATE);
-}
-
-static inline void
-xpc_IPI_send_activate(struct xpc_vars *vars)
-{
- xpc_activate_IRQ_send(vars->amos_page_pa, cnodeid_to_nasid(0),
- vars->act_nasid, vars->act_phys_cpuid);
-}
-
-static inline void
-xpc_IPI_send_activated(struct xpc_partition *part)
-{
- xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
- part->remote_act_nasid, part->remote_act_phys_cpuid);
-}
-
-static inline void
-xpc_IPI_send_reactivate(struct xpc_partition *part)
-{
- xpc_activate_IRQ_send(xpc_vars->amos_page_pa, part->reactivate_nasid,
- xpc_vars->act_nasid, xpc_vars->act_phys_cpuid);
-}
-
-static inline void
-xpc_IPI_send_disengage(struct xpc_partition *part)
-{
- xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
- part->remote_act_nasid, part->remote_act_phys_cpuid);
-}
-
-
-/*
- * IPIs associated with SGI_XPC_NOTIFY IRQ.
- */
-
-/*
- * Send an IPI to the remote partition that is associated with the
- * specified channel.
- */
-#define XPC_NOTIFY_IRQ_SEND(_ch, _ipi_f, _irq_f) \
- xpc_notify_IRQ_send(_ch, _ipi_f, #_ipi_f, _irq_f)
-
-static inline void
-xpc_notify_IRQ_send(struct xpc_channel *ch, u8 ipi_flag, char *ipi_flag_string,
- unsigned long *irq_flags)
-{
- struct xpc_partition *part = &xpc_partitions[ch->partid];
- enum xpc_retval ret;
-
-
- if (likely(part->act_state != XPC_P_DEACTIVATING)) {
- ret = xpc_IPI_send(part->remote_IPI_amo_va,
- (u64) ipi_flag << (ch->number * 8),
- part->remote_IPI_nasid,
- part->remote_IPI_phys_cpuid,
- SGI_XPC_NOTIFY);
- dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
- ipi_flag_string, ch->partid, ch->number, ret);
- if (unlikely(ret != xpcSuccess)) {
- if (irq_flags != NULL) {
- spin_unlock_irqrestore(&ch->lock, *irq_flags);
- }
- XPC_DEACTIVATE_PARTITION(part, ret);
- if (irq_flags != NULL) {
- spin_lock_irqsave(&ch->lock, *irq_flags);
- }
- }
- }
-}
-
-
-/*
- * Make it look like the remote partition, which is associated with the
- * specified channel, sent us an IPI. This faked IPI will be handled
- * by xpc_dropped_IPI_check().
- */
-#define XPC_NOTIFY_IRQ_SEND_LOCAL(_ch, _ipi_f) \
- xpc_notify_IRQ_send_local(_ch, _ipi_f, #_ipi_f)
-
-static inline void
-xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag,
- char *ipi_flag_string)
-{
- struct xpc_partition *part = &xpc_partitions[ch->partid];
-
-
- FETCHOP_STORE_OP(TO_AMO((u64) &part->local_IPI_amo_va->variable),
- FETCHOP_OR, ((u64) ipi_flag << (ch->number * 8)));
- dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
- ipi_flag_string, ch->partid, ch->number);
-}
-
-
-/*
- * The sending and receiving of IPIs includes the setting of an AMO variable
- * to indicate the reason the IPI was sent. The 64-bit variable is divided
- * up into eight bytes, ordered from right to left. Byte zero pertains to
- * channel 0, byte one to channel 1, and so on. Each byte is described by
- * the following IPI flags.
- */
-
-#define XPC_IPI_CLOSEREQUEST 0x01
-#define XPC_IPI_CLOSEREPLY 0x02
-#define XPC_IPI_OPENREQUEST 0x04
-#define XPC_IPI_OPENREPLY 0x08
-#define XPC_IPI_MSGREQUEST 0x10
-
-
-/* given an AMO variable and a channel#, get its associated IPI flags */
-#define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff))
-#define XPC_SET_IPI_FLAGS(_amo, _c, _f) (_amo) |= ((u64) (_f) << ((_c) * 8))
-
-#define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0f)
-#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & 0x1010101010101010)
-
-
-static inline void
-xpc_IPI_send_closerequest(struct xpc_channel *ch, unsigned long *irq_flags)
-{
- struct xpc_openclose_args *args = ch->local_openclose_args;
-
-
- args->reason = ch->reason;
-
- XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREQUEST, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_closereply(struct xpc_channel *ch, unsigned long *irq_flags)
-{
- XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREPLY, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_openrequest(struct xpc_channel *ch, unsigned long *irq_flags)
-{
- struct xpc_openclose_args *args = ch->local_openclose_args;
-
-
- args->msg_size = ch->msg_size;
- args->local_nentries = ch->local_nentries;
-
- XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREQUEST, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_openreply(struct xpc_channel *ch, unsigned long *irq_flags)
-{
- struct xpc_openclose_args *args = ch->local_openclose_args;
-
-
- args->remote_nentries = ch->remote_nentries;
- args->local_nentries = ch->local_nentries;
- args->local_msgqueue_pa = __pa(ch->local_msgqueue);
-
- XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREPLY, irq_flags);
-}
-
-static inline void
-xpc_IPI_send_msgrequest(struct xpc_channel *ch)
-{
- XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_MSGREQUEST, NULL);
-}
-
-static inline void
-xpc_IPI_send_local_msgrequest(struct xpc_channel *ch)
-{
- XPC_NOTIFY_IRQ_SEND_LOCAL(ch, XPC_IPI_MSGREQUEST);
-}
-
-
-/*
- * Memory for XPC's AMO variables is allocated by the MSPEC driver. These
- * pages are located in the lowest granule. The lowest granule uses 4k pages
- * for cached references and an alternate TLB handler to never provide a
- * cacheable mapping for the entire region. This will prevent speculative
- * reading of cached copies of our lines from being issued which will cause
- * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
- * AMO variables (based on XP_MAX_PARTITIONS) for message notification and an
- * additional 128 AMO variables (based on XP_NASID_MASK_WORDS) for partition
- * activation and 2 AMO variables for partition deactivation.
- */
-static inline AMO_t *
-xpc_IPI_init(int index)
-{
- AMO_t *amo = xpc_vars->amos_page + index;
-
-
- (void) xpc_IPI_receive(amo); /* clear AMO variable */
- return amo;
-}
-
-
-
-static inline enum xpc_retval
-xpc_map_bte_errors(bte_result_t error)
-{
- switch (error) {
- case BTE_SUCCESS: return xpcSuccess;
- case BTEFAIL_DIR: return xpcBteDirectoryError;
- case BTEFAIL_POISON: return xpcBtePoisonError;
- case BTEFAIL_WERR: return xpcBteWriteError;
- case BTEFAIL_ACCESS: return xpcBteAccessError;
- case BTEFAIL_PWERR: return xpcBtePWriteError;
- case BTEFAIL_PRERR: return xpcBtePReadError;
- case BTEFAIL_TOUT: return xpcBteTimeOutError;
- case BTEFAIL_XTERR: return xpcBteXtalkError;
- case BTEFAIL_NOTAVAIL: return xpcBteNotAvailable;
- default: return xpcBteUnmappedError;
- }
-}
-
-
-
-static inline void *
-xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
-{
- /* see if kmalloc will give us cachline aligned memory by default */
- *base = kmalloc(size, flags);
- if (*base == NULL) {
- return NULL;
- }
- if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
- return *base;
- }
- kfree(*base);
-
- /* nope, we'll have to do it ourselves */
- *base = kmalloc(size + L1_CACHE_BYTES, flags);
- if (*base == NULL) {
- return NULL;
- }
- return (void *) L1_CACHE_ALIGN((u64) *base);
-}
-
-
-/*
- * Check to see if there is any channel activity to/from the specified
- * partition.
- */
-static inline void
-xpc_check_for_channel_activity(struct xpc_partition *part)
-{
- u64 IPI_amo;
- unsigned long irq_flags;
-
-
- IPI_amo = xpc_IPI_receive(part->local_IPI_amo_va);
- if (IPI_amo == 0) {
- return;
- }
-
- spin_lock_irqsave(&part->IPI_lock, irq_flags);
- part->local_IPI_amo |= IPI_amo;
- spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
-
- dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n",
- XPC_PARTID(part), IPI_amo);
-
- xpc_wakeup_channel_mgr(part);
-}
-
-
-#endif /* _IA64_SN_KERNEL_XPC_H */
-
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index abf4fc2a87bb..0c0a68902409 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -3,7 +3,7 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved.
*/
@@ -24,7 +24,7 @@
#include <linux/slab.h>
#include <asm/sn/bte.h>
#include <asm/sn/sn_sal.h>
-#include "xpc.h"
+#include <asm/sn/xpc.h>
/*
@@ -779,6 +779,12 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
/* both sides are disconnected now */
+ if (ch->flags & XPC_C_CONNECTCALLOUT) {
+ spin_unlock_irqrestore(&ch->lock, *irq_flags);
+ xpc_disconnect_callout(ch, xpcDisconnected);
+ spin_lock_irqsave(&ch->lock, *irq_flags);
+ }
+
/* it's now safe to free the channel's message queues */
xpc_free_msgqueues(ch);
@@ -1645,7 +1651,7 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
void
-xpc_disconnecting_callout(struct xpc_channel *ch)
+xpc_disconnect_callout(struct xpc_channel *ch, enum xpc_retval reason)
{
/*
* Let the channel's registerer know that the channel is being
@@ -1654,15 +1660,13 @@ xpc_disconnecting_callout(struct xpc_channel *ch)
*/
if (ch->func != NULL) {
- dev_dbg(xpc_chan, "ch->func() called, reason=xpcDisconnecting,"
- " partid=%d, channel=%d\n", ch->partid, ch->number);
+ dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, "
+ "channel=%d\n", reason, ch->partid, ch->number);
- ch->func(xpcDisconnecting, ch->partid, ch->number, NULL,
- ch->key);
+ ch->func(reason, ch->partid, ch->number, NULL, ch->key);
- dev_dbg(xpc_chan, "ch->func() returned, reason="
- "xpcDisconnecting, partid=%d, channel=%d\n",
- ch->partid, ch->number);
+ dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, "
+ "channel=%d\n", reason, ch->partid, ch->number);
}
}
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index b617236524c6..8930586e0eb4 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -3,7 +3,7 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved.
*/
@@ -59,7 +59,7 @@
#include <asm/sn/sn_sal.h>
#include <asm/kdebug.h>
#include <asm/uaccess.h>
-#include "xpc.h"
+#include <asm/sn/xpc.h>
/* define two XPC debug device structures to be used with dev_dbg() et al */
@@ -82,6 +82,9 @@ struct device *xpc_part = &xpc_part_dbg_subname;
struct device *xpc_chan = &xpc_chan_dbg_subname;
+static int xpc_kdebug_ignore;
+
+
/* systune related variables for /proc/sys directories */
static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
@@ -162,6 +165,8 @@ static ctl_table xpc_sys_dir[] = {
};
static struct ctl_table_header *xpc_sysctl;
+/* non-zero if any remote partition disengage request was timed out */
+int xpc_disengage_request_timedout;
/* #of IRQs received */
static atomic_t xpc_act_IRQ_rcvd;
@@ -773,7 +778,7 @@ xpc_daemonize_kthread(void *args)
ch->flags |= XPC_C_DISCONNECTCALLOUT;
spin_unlock_irqrestore(&ch->lock, irq_flags);
- xpc_disconnecting_callout(ch);
+ xpc_disconnect_callout(ch, xpcDisconnecting);
} else {
spin_unlock_irqrestore(&ch->lock, irq_flags);
}
@@ -921,9 +926,9 @@ static void
xpc_do_exit(enum xpc_retval reason)
{
partid_t partid;
- int active_part_count;
+ int active_part_count, printed_waiting_msg = 0;
struct xpc_partition *part;
- unsigned long printmsg_time;
+ unsigned long printmsg_time, disengage_request_timeout = 0;
/* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
@@ -953,7 +958,8 @@ xpc_do_exit(enum xpc_retval reason)
/* wait for all partitions to become inactive */
- printmsg_time = jiffies;
+ printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+ xpc_disengage_request_timedout = 0;
do {
active_part_count = 0;
@@ -969,20 +975,39 @@ xpc_do_exit(enum xpc_retval reason)
active_part_count++;
XPC_DEACTIVATE_PARTITION(part, reason);
- }
- if (active_part_count == 0) {
- break;
+ if (part->disengage_request_timeout >
+ disengage_request_timeout) {
+ disengage_request_timeout =
+ part->disengage_request_timeout;
+ }
}
- if (jiffies >= printmsg_time) {
- dev_info(xpc_part, "waiting for partitions to "
- "deactivate/disengage, active count=%d, remote "
- "engaged=0x%lx\n", active_part_count,
- xpc_partition_engaged(1UL << partid));
-
- printmsg_time = jiffies +
+ if (xpc_partition_engaged(-1UL)) {
+ if (time_after(jiffies, printmsg_time)) {
+ dev_info(xpc_part, "waiting for remote "
+ "partitions to disengage, timeout in "
+ "%ld seconds\n",
+ (disengage_request_timeout - jiffies)
+ / HZ);
+ printmsg_time = jiffies +
(XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+ printed_waiting_msg = 1;
+ }
+
+ } else if (active_part_count > 0) {
+ if (printed_waiting_msg) {
+ dev_info(xpc_part, "waiting for local partition"
+ " to disengage\n");
+ printed_waiting_msg = 0;
+ }
+
+ } else {
+ if (!xpc_disengage_request_timedout) {
+ dev_info(xpc_part, "all partitions have "
+ "disengaged\n");
+ }
+ break;
}
/* sleep for a 1/3 of a second or so */
@@ -1000,11 +1025,13 @@ xpc_do_exit(enum xpc_retval reason)
del_timer_sync(&xpc_hb_timer);
DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
- /* take ourselves off of the reboot_notifier_list */
- (void) unregister_reboot_notifier(&xpc_reboot_notifier);
+ if (reason == xpcUnloading) {
+ /* take ourselves off of the reboot_notifier_list */
+ (void) unregister_reboot_notifier(&xpc_reboot_notifier);
- /* take ourselves off of the die_notifier list */
- (void) unregister_die_notifier(&xpc_die_notifier);
+ /* take ourselves off of the die_notifier list */
+ (void) unregister_die_notifier(&xpc_die_notifier);
+ }
/* close down protections for IPI operations */
xpc_restrict_IPI_ops();
@@ -1020,7 +1047,35 @@ xpc_do_exit(enum xpc_retval reason)
/*
- * Called when the system is about to be either restarted or halted.
+ * This function is called when the system is being rebooted.
+ */
+static int
+xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
+{
+ enum xpc_retval reason;
+
+
+ switch (event) {
+ case SYS_RESTART:
+ reason = xpcSystemReboot;
+ break;
+ case SYS_HALT:
+ reason = xpcSystemHalt;
+ break;
+ case SYS_POWER_OFF:
+ reason = xpcSystemPoweroff;
+ break;
+ default:
+ reason = xpcSystemGoingDown;
+ }
+
+ xpc_do_exit(reason);
+ return NOTIFY_DONE;
+}
+
+
+/*
+ * Notify other partitions to disengage from all references to our memory.
*/
static void
xpc_die_disengage(void)
@@ -1028,7 +1083,7 @@ xpc_die_disengage(void)
struct xpc_partition *part;
partid_t partid;
unsigned long engaged;
- long time, print_time, disengage_request_timeout;
+ long time, printmsg_time, disengage_request_timeout;
/* keep xpc_hb_checker thread from doing anything (just in case) */
@@ -1055,57 +1110,53 @@ xpc_die_disengage(void)
}
}
- print_time = rtc_time();
- disengage_request_timeout = print_time +
+ time = rtc_time();
+ printmsg_time = time +
+ (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
+ disengage_request_timeout = time +
(xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
/* wait for all other partitions to disengage from us */
- while ((engaged = xpc_partition_engaged(-1UL)) &&
- (time = rtc_time()) < disengage_request_timeout) {
+ while (1) {
+ engaged = xpc_partition_engaged(-1UL);
+ if (!engaged) {
+ dev_info(xpc_part, "all partitions have disengaged\n");
+ break;
+ }
- if (time >= print_time) {
+ time = rtc_time();
+ if (time >= disengage_request_timeout) {
+ for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+ if (engaged & (1UL << partid)) {
+ dev_info(xpc_part, "disengage from "
+ "remote partition %d timed "
+ "out\n", partid);
+ }
+ }
+ break;
+ }
+
+ if (time >= printmsg_time) {
dev_info(xpc_part, "waiting for remote partitions to "
- "disengage, engaged=0x%lx\n", engaged);
- print_time = time + (XPC_DISENGAGE_PRINTMSG_INTERVAL *
+ "disengage, timeout in %ld seconds\n",
+ (disengage_request_timeout - time) /
+ sn_rtc_cycles_per_second);
+ printmsg_time = time +
+ (XPC_DISENGAGE_PRINTMSG_INTERVAL *
sn_rtc_cycles_per_second);
}
}
- dev_info(xpc_part, "finished waiting for remote partitions to "
- "disengage, engaged=0x%lx\n", engaged);
-}
-
-
-/*
- * This function is called when the system is being rebooted.
- */
-static int
-xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
-{
- enum xpc_retval reason;
-
-
- switch (event) {
- case SYS_RESTART:
- reason = xpcSystemReboot;
- break;
- case SYS_HALT:
- reason = xpcSystemHalt;
- break;
- case SYS_POWER_OFF:
- reason = xpcSystemPoweroff;
- break;
- default:
- reason = xpcSystemGoingDown;
- }
-
- xpc_do_exit(reason);
- return NOTIFY_DONE;
}
/*
- * This function is called when the system is being rebooted.
+ * This function is called when the system is being restarted or halted due
+ * to some sort of system failure. If this is the case we need to notify the
+ * other partitions to disengage from all references to our memory.
+ * This function can also be called when our heartbeater could be offlined
+ * for a time. In this case we need to notify other partitions to not worry
+ * about the lack of a heartbeat.
*/
static int
xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
@@ -1115,11 +1166,25 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
case DIE_MACHINE_HALT:
xpc_die_disengage();
break;
+
+ case DIE_KDEBUG_ENTER:
+ /* Should lack of heartbeat be ignored by other partitions? */
+ if (!xpc_kdebug_ignore) {
+ break;
+ }
+ /* fall through */
case DIE_MCA_MONARCH_ENTER:
case DIE_INIT_MONARCH_ENTER:
xpc_vars->heartbeat++;
xpc_vars->heartbeat_offline = 1;
break;
+
+ case DIE_KDEBUG_LEAVE:
+ /* Is lack of heartbeat being ignored by other partitions? */
+ if (!xpc_kdebug_ignore) {
+ break;
+ }
+ /* fall through */
case DIE_MCA_MONARCH_LEAVE:
case DIE_INIT_MONARCH_LEAVE:
xpc_vars->heartbeat++;
@@ -1344,3 +1409,7 @@ module_param(xpc_disengage_request_timelimit, int, 0);
MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
"for disengage request to complete.");
+module_param(xpc_kdebug_ignore, int, 0);
+MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
+ "other partitions when dropping into kdebug.");
+
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index cdd6431853a1..88a730e6cfdb 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -3,7 +3,7 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved.
*/
@@ -28,7 +28,7 @@
#include <asm/sn/sn_sal.h>
#include <asm/sn/nodepda.h>
#include <asm/sn/addrs.h>
-#include "xpc.h"
+#include <asm/sn/xpc.h>
/* XPC is exiting flag */
@@ -771,7 +771,8 @@ xpc_identify_act_IRQ_req(int nasid)
}
}
- if (!xpc_partition_disengaged(part)) {
+ if (part->disengage_request_timeout > 0 &&
+ !xpc_partition_disengaged(part)) {
/* still waiting on other side to disengage from us */
return;
}
@@ -873,6 +874,9 @@ xpc_partition_disengaged(struct xpc_partition *part)
* request in a timely fashion, so assume it's dead.
*/
+ dev_info(xpc_part, "disengage from remote partition %d "
+ "timed out\n", partid);
+ xpc_disengage_request_timedout = 1;
xpc_clear_partition_engaged(1UL << partid);
disengaged = 1;
}