summaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel
diff options
context:
space:
mode:
authorDmitry Torokhov <dtor_core@ameritech.net>2006-04-02 07:08:05 +0200
committerDmitry Torokhov <dtor_core@ameritech.net>2006-04-02 07:08:05 +0200
commit95d465fd750897ab32462a6702fbfe1b122cbbc0 (patch)
tree65c38b2f11c51bb6932e44dd6c92f15b0091abfe /arch/ia64/kernel
parentInput: gameport - fix memory leak (diff)
parentMerge master.kernel.org:/home/rmk/linux-2.6-serial (diff)
downloadlinux-95d465fd750897ab32462a6702fbfe1b122cbbc0.tar.xz
linux-95d465fd750897ab32462a6702fbfe1b122cbbc0.zip
Manual merge with Linus.
Conflicts: arch/powerpc/kernel/setup-common.c drivers/input/keyboard/hil_kbd.c drivers/input/mouse/hil_ptr.c
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r--arch/ia64/kernel/Makefile3
-rw-r--r--arch/ia64/kernel/acpi.c55
-rw-r--r--arch/ia64/kernel/efi.c62
-rw-r--r--arch/ia64/kernel/entry.S15
-rw-r--r--arch/ia64/kernel/gate.lds.S1
-rw-r--r--arch/ia64/kernel/iosapic.c271
-rw-r--r--arch/ia64/kernel/irq.c13
-rw-r--r--arch/ia64/kernel/ivt.S1
-rw-r--r--arch/ia64/kernel/kprobes.c51
-rw-r--r--arch/ia64/kernel/machvec.c19
-rw-r--r--arch/ia64/kernel/mca.c197
-rw-r--r--arch/ia64/kernel/mca_drv.c22
-rw-r--r--arch/ia64/kernel/mca_drv.h7
-rw-r--r--arch/ia64/kernel/mca_drv_asm.S13
-rw-r--r--arch/ia64/kernel/numa.c2
-rw-r--r--arch/ia64/kernel/palinfo.c8
-rw-r--r--arch/ia64/kernel/patch.c8
-rw-r--r--arch/ia64/kernel/perfmon.c5
-rw-r--r--arch/ia64/kernel/process.c8
-rw-r--r--arch/ia64/kernel/ptrace.c10
-rw-r--r--arch/ia64/kernel/setup.c71
-rw-r--r--arch/ia64/kernel/signal.c101
-rw-r--r--arch/ia64/kernel/smpboot.c221
-rw-r--r--arch/ia64/kernel/time.c11
-rw-r--r--arch/ia64/kernel/topology.c369
-rw-r--r--arch/ia64/kernel/traps.c6
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S54
27 files changed, 1065 insertions, 539 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 09a0dbc17fb6..59e871dae742 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.lds
obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \
salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
- unwind.o mca.o mca_asm.o topology.o
+ unwind.o mca.o mca_asm.o topology.o dmi_scan.o
obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o
obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o
@@ -30,6 +30,7 @@ obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
mca_recovery-y += mca_drv.o mca_drv_asm.o
+dmi_scan-y += ../../i386/kernel/dmi_scan.o
# The gate DSO image is built using a special linker script.
targets += gate.so gate-syms.o
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index ecd44bdc8394..58c93a30348c 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -284,19 +284,24 @@ acpi_parse_plat_int_src(acpi_table_entry_header * header,
return 0;
}
+#ifdef CONFIG_HOTPLUG_CPU
unsigned int can_cpei_retarget(void)
{
extern int cpe_vector;
+ extern unsigned int force_cpei_retarget;
/*
* Only if CPEI is supported and the override flag
* is present, otherwise return that its re-targettable
* if we are in polling mode.
*/
- if (cpe_vector > 0 && !acpi_cpei_override)
- return 0;
- else
- return 1;
+ if (cpe_vector > 0) {
+ if (acpi_cpei_override || force_cpei_retarget)
+ return 1;
+ else
+ return 0;
+ }
+ return 1;
}
unsigned int is_cpu_cpei_target(unsigned int cpu)
@@ -315,6 +320,7 @@ void set_cpei_target_cpu(unsigned int cpu)
{
acpi_cpei_phys_cpuid = cpu_physical_id(cpu);
}
+#endif
unsigned int get_cpei_target_cpu(void)
{
@@ -414,6 +420,26 @@ int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS];
int __initdata nid_to_pxm_map[MAX_NUMNODES];
static struct acpi_table_slit __initdata *slit_table;
+static int get_processor_proximity_domain(struct acpi_table_processor_affinity *pa)
+{
+ int pxm;
+
+ pxm = pa->proximity_domain;
+ if (ia64_platform_is("sn2"))
+ pxm += pa->reserved[0] << 8;
+ return pxm;
+}
+
+static int get_memory_proximity_domain(struct acpi_table_memory_affinity *ma)
+{
+ int pxm;
+
+ pxm = ma->proximity_domain;
+ if (ia64_platform_is("sn2"))
+ pxm += ma->reserved1[0] << 8;
+ return pxm;
+}
+
/*
* ACPI 2.0 SLIT (System Locality Information Table)
* http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
@@ -437,13 +463,20 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
void __init
acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
{
+ int pxm;
+
+ if (!pa->flags.enabled)
+ return;
+
+ pxm = get_processor_proximity_domain(pa);
+
/* record this node in proximity bitmap */
- pxm_bit_set(pa->proximity_domain);
+ pxm_bit_set(pxm);
node_cpuid[srat_num_cpus].phys_id =
(pa->apic_id << 8) | (pa->lsapic_eid);
/* nid should be overridden as logical node id later */
- node_cpuid[srat_num_cpus].nid = pa->proximity_domain;
+ node_cpuid[srat_num_cpus].nid = pxm;
srat_num_cpus++;
}
@@ -451,10 +484,10 @@ void __init
acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
{
unsigned long paddr, size;
- u8 pxm;
+ int pxm;
struct node_memblk_s *p, *q, *pend;
- pxm = ma->proximity_domain;
+ pxm = get_memory_proximity_domain(ma);
/* fill node memory chunk structure */
paddr = ma->base_addr_hi;
@@ -618,9 +651,9 @@ unsigned long __init acpi_find_rsdp(void)
{
unsigned long rsdp_phys = 0;
- if (efi.acpi20)
- rsdp_phys = __pa(efi.acpi20);
- else if (efi.acpi)
+ if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
+ rsdp_phys = efi.acpi20;
+ else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
printk(KERN_WARNING PREFIX
"v1.0/r0.71 tables no longer supported\n");
return rsdp_phys;
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 9990320b6f9a..12cfedce73b1 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -458,24 +458,33 @@ efi_init (void)
printk(KERN_INFO "EFI v%u.%.02u by %s:",
efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor);
+ efi.mps = EFI_INVALID_TABLE_ADDR;
+ efi.acpi = EFI_INVALID_TABLE_ADDR;
+ efi.acpi20 = EFI_INVALID_TABLE_ADDR;
+ efi.smbios = EFI_INVALID_TABLE_ADDR;
+ efi.sal_systab = EFI_INVALID_TABLE_ADDR;
+ efi.boot_info = EFI_INVALID_TABLE_ADDR;
+ efi.hcdp = EFI_INVALID_TABLE_ADDR;
+ efi.uga = EFI_INVALID_TABLE_ADDR;
+
for (i = 0; i < (int) efi.systab->nr_tables; i++) {
if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
- efi.mps = __va(config_tables[i].table);
+ efi.mps = config_tables[i].table;
printk(" MPS=0x%lx", config_tables[i].table);
} else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
- efi.acpi20 = __va(config_tables[i].table);
+ efi.acpi20 = config_tables[i].table;
printk(" ACPI 2.0=0x%lx", config_tables[i].table);
} else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
- efi.acpi = __va(config_tables[i].table);
+ efi.acpi = config_tables[i].table;
printk(" ACPI=0x%lx", config_tables[i].table);
} else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
- efi.smbios = __va(config_tables[i].table);
+ efi.smbios = config_tables[i].table;
printk(" SMBIOS=0x%lx", config_tables[i].table);
} else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
- efi.sal_systab = __va(config_tables[i].table);
+ efi.sal_systab = config_tables[i].table;
printk(" SALsystab=0x%lx", config_tables[i].table);
} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
- efi.hcdp = __va(config_tables[i].table);
+ efi.hcdp = config_tables[i].table;
printk(" HCDP=0x%lx", config_tables[i].table);
}
}
@@ -677,27 +686,34 @@ EXPORT_SYMBOL(efi_mem_attributes);
/*
* Determines whether the memory at phys_addr supports the desired
* attribute (WB, UC, etc). If this returns 1, the caller can safely
- * access *size bytes at phys_addr with the specified attribute.
+ * access size bytes at phys_addr with the specified attribute.
*/
-static int
-efi_mem_attribute_range (unsigned long phys_addr, unsigned long *size, u64 attr)
+int
+efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, u64 attr)
{
+ unsigned long end = phys_addr + size;
efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
- unsigned long md_end;
- if (!md || (md->attribute & attr) != attr)
+ /*
+ * Some firmware doesn't report MMIO regions in the EFI memory
+ * map. The Intel BigSur (a.k.a. HP i2000) has this problem.
+ * On those platforms, we have to assume UC is valid everywhere.
+ */
+ if (!md || (md->attribute & attr) != attr) {
+ if (attr == EFI_MEMORY_UC && !efi_memmap_has_mmio())
+ return 1;
return 0;
+ }
do {
- md_end = efi_md_end(md);
- if (phys_addr + *size <= md_end)
+ unsigned long md_end = efi_md_end(md);
+
+ if (end <= md_end)
return 1;
md = efi_memory_descriptor(md_end);
- if (!md || (md->attribute & attr) != attr) {
- *size = md_end - phys_addr;
- return 1;
- }
+ if (!md || (md->attribute & attr) != attr)
+ return 0;
} while (md);
return 0;
}
@@ -708,7 +724,7 @@ efi_mem_attribute_range (unsigned long phys_addr, unsigned long *size, u64 attr)
* control access size.
*/
int
-valid_phys_addr_range (unsigned long phys_addr, unsigned long *size)
+valid_phys_addr_range (unsigned long phys_addr, unsigned long size)
{
return efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB);
}
@@ -723,7 +739,7 @@ valid_phys_addr_range (unsigned long phys_addr, unsigned long *size)
* because that doesn't appear in the boot-time EFI memory map.
*/
int
-valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long *size)
+valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long size)
{
if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB))
return 1;
@@ -731,14 +747,6 @@ valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long *size)
if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_UC))
return 1;
- /*
- * Some firmware doesn't report MMIO regions in the EFI memory map.
- * The Intel BigSur (a.k.a. HP i2000) has this problem. In this
- * case, we can't use the EFI memory map to validate mmap requests.
- */
- if (!efi_memmap_has_mmio())
- return 1;
-
return 0;
}
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 930fdfca6ddb..750e8e7fbdc3 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1102,9 +1102,6 @@ skip_rbs_switch:
st8 [r2]=r8
st8 [r3]=r10
.work_pending:
- tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context?
-(p6) br.cond.sptk.few .sigdelayed
- ;;
tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0?
(p6) br.cond.sptk.few .notify
#ifdef CONFIG_PREEMPT
@@ -1131,17 +1128,6 @@ skip_rbs_switch:
(pLvSys)br.cond.sptk.few .work_pending_syscall_end
br.cond.sptk.many .work_processed_kernel // don't re-check
-// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where
-// it could not be delivered. Deliver it now. The signal might be for us and
-// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed
-// signal.
-
-.sigdelayed:
- br.call.sptk.many rp=do_sigdelayed
- cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check
-(pLvSys)br.cond.sptk.few .work_pending_syscall_end
- br.cond.sptk.many .work_processed_kernel // re-check
-
.work_pending_syscall_end:
adds r2=PT(R8)+16,r12
adds r3=PT(R10)+16,r12
@@ -1619,5 +1605,6 @@ sys_call_table:
data8 sys_ni_syscall // reserved for pselect
data8 sys_ni_syscall // 1295 reserved for ppoll
data8 sys_unshare
+ data8 sys_splice
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S
index e1e4aba9ecd0..7c99e6ec3daf 100644
--- a/arch/ia64/kernel/gate.lds.S
+++ b/arch/ia64/kernel/gate.lds.S
@@ -59,6 +59,7 @@ SECTIONS
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
*(__ex_table)
+ *(__mca_table)
}
}
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 574084f343fa..7956eb9058fc 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -9,54 +9,65 @@
* Copyright (C) 1999 VA Linux Systems
* Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
*
- * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O APIC code.
- * In particular, we now have separate handlers for edge
- * and level triggered interrupts.
- * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation
- * PCI to vector mapping, shared PCI interrupts.
- * 00/10/27 D. Mosberger Document things a bit more to make them more understandable.
- * Clean up much of the old IOSAPIC cruft.
- * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts and fixes for
- * ACPI S5(SoftOff) support.
+ * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O
+ * APIC code. In particular, we now have separate
+ * handlers for edge and level triggered
+ * interrupts.
+ * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector
+ * allocation PCI to vector mapping, shared PCI
+ * interrupts.
+ * 00/10/27 D. Mosberger Document things a bit more to make them more
+ * understandable. Clean up much of the old
+ * IOSAPIC cruft.
+ * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts
+ * and fixes for ACPI S5(SoftOff) support.
* 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT
- * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt vectors in
- * iosapic_set_affinity(), initializations for
- * /proc/irq/#/smp_affinity
+ * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt
+ * vectors in iosapic_set_affinity(),
+ * initializations for /proc/irq/#/smp_affinity
* 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing.
* 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq
- * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping
- * error
+ * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to
+ * IOSAPIC mapping error
* 02/07/29 T. Kochi Allocate interrupt vectors dynamically
- * 02/08/04 T. Kochi Cleaned up terminology (irq, global system interrupt, vector, etc.)
- * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's pci_irq code.
+ * 02/08/04 T. Kochi Cleaned up terminology (irq, global system
+ * interrupt, vector, etc.)
+ * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's
+ * pci_irq code.
* 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC.
- * Remove iosapic_address & gsi_base from external interfaces.
- * Rationalize __init/__devinit attributes.
+ * Remove iosapic_address & gsi_base from
+ * external interfaces. Rationalize
+ * __init/__devinit attributes.
* 04/12/04 Ashok Raj <ashok.raj@intel.com> Intel Corporation 2004
- * Updated to work with irq migration necessary for CPU Hotplug
+ * Updated to work with irq migration necessary
+ * for CPU Hotplug
*/
/*
- * Here is what the interrupt logic between a PCI device and the kernel looks like:
+ * Here is what the interrupt logic between a PCI device and the kernel looks
+ * like:
*
- * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD). The
- * device is uniquely identified by its bus--, and slot-number (the function
- * number does not matter here because all functions share the same interrupt
- * lines).
+ * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC,
+ * INTD). The device is uniquely identified by its bus-, and slot-number
+ * (the function number does not matter here because all functions share
+ * the same interrupt lines).
*
- * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller.
- * Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level
- * triggered and use the same polarity). Each interrupt line has a unique Global
- * System Interrupt (GSI) number which can be calculated as the sum of the controller's
- * base GSI number and the IOSAPIC pin number to which the line connects.
+ * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC
+ * controller. Multiple interrupt lines may have to share the same
+ * IOSAPIC pin (if they're level triggered and use the same polarity).
+ * Each interrupt line has a unique Global System Interrupt (GSI) number
+ * which can be calculated as the sum of the controller's base GSI number
+ * and the IOSAPIC pin number to which the line connects.
*
- * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the IOSAPIC pin
- * into the IA-64 interrupt vector. This interrupt vector is then sent to the CPU.
+ * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the
+ * IOSAPIC pin into the IA-64 interrupt vector. This interrupt vector is then
+ * sent to the CPU.
*
- * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is used as
- * architecture-independent interrupt handling mechanism in Linux. As an
- * IRQ is a number, we have to have IA-64 interrupt vector number <-> IRQ number
- * mapping. On smaller systems, we use one-to-one mapping between IA-64 vector and
- * IRQ. A platform can implement platform_irq_to_vector(irq) and
+ * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is
+ * used as architecture-independent interrupt handling mechanism in Linux.
+ * As an IRQ is a number, we have to have
+ * IA-64 interrupt vector number <-> IRQ number mapping. On smaller
+ * systems, we use one-to-one mapping between IA-64 vector and IRQ. A
+ * platform can implement platform_irq_to_vector(irq) and
* platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
* Please see also include/asm-ia64/hw_irq.h for those APIs.
*
@@ -64,9 +75,9 @@
*
* PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
*
- * Note: The term "IRQ" is loosely used everywhere in Linux kernel to describe interrupts.
- * Now we use "IRQ" only for Linux IRQ's. ISA IRQ (isa_irq) is the only exception in this
- * source code.
+ * Note: The term "IRQ" is loosely used everywhere in Linux kernel to
+ * describeinterrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ
+ * (isa_irq) is the only exception in this source code.
*/
#include <linux/config.h>
@@ -90,7 +101,6 @@
#include <asm/ptrace.h>
#include <asm/system.h>
-
#undef DEBUG_INTERRUPT_ROUTING
#ifdef DEBUG_INTERRUPT_ROUTING
@@ -99,36 +109,46 @@
#define DBG(fmt...)
#endif
-#define NR_PREALLOCATE_RTE_ENTRIES (PAGE_SIZE / sizeof(struct iosapic_rte_info))
+#define NR_PREALLOCATE_RTE_ENTRIES \
+ (PAGE_SIZE / sizeof(struct iosapic_rte_info))
#define RTE_PREALLOCATED (1)
static DEFINE_SPINLOCK(iosapic_lock);
-/* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */
+/*
+ * These tables map IA-64 vectors to the IOSAPIC pin that generates this
+ * vector.
+ */
struct iosapic_rte_info {
- struct list_head rte_list; /* node in list of RTEs sharing the same vector */
+ struct list_head rte_list; /* node in list of RTEs sharing the
+ * same vector */
char __iomem *addr; /* base address of IOSAPIC */
- unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */
+ unsigned int gsi_base; /* first GSI assigned to this
+ * IOSAPIC */
char rte_index; /* IOSAPIC RTE index */
int refcnt; /* reference counter */
unsigned int flags; /* flags */
} ____cacheline_aligned;
static struct iosapic_intr_info {
- struct list_head rtes; /* RTEs using this vector (empty => not an IOSAPIC interrupt) */
+ struct list_head rtes; /* RTEs using this vector (empty =>
+ * not an IOSAPIC interrupt) */
int count; /* # of RTEs that shares this vector */
- u32 low32; /* current value of low word of Redirection table entry */
+ u32 low32; /* current value of low word of
+ * Redirection table entry */
unsigned int dest; /* destination CPU physical ID */
unsigned char dmode : 3; /* delivery mode (see iosapic.h) */
- unsigned char polarity: 1; /* interrupt polarity (see iosapic.h) */
+ unsigned char polarity: 1; /* interrupt polarity
+ * (see iosapic.h) */
unsigned char trigger : 1; /* trigger mode (see iosapic.h) */
} iosapic_intr_info[IA64_NUM_VECTORS];
static struct iosapic {
char __iomem *addr; /* base address of IOSAPIC */
- unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */
- unsigned short num_rte; /* number of RTE in this IOSAPIC */
+ unsigned int gsi_base; /* first GSI assigned to this
+ * IOSAPIC */
+ unsigned short num_rte; /* # of RTEs on this IOSAPIC */
int rtes_inuse; /* # of RTEs in use on this IOSAPIC */
#ifdef CONFIG_NUMA
unsigned short node; /* numa node association via pxm */
@@ -149,7 +169,8 @@ find_iosapic (unsigned int gsi)
int i;
for (i = 0; i < NR_IOSAPICS; i++) {
- if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < iosapic_lists[i].num_rte)
+ if ((unsigned) (gsi - iosapic_lists[i].gsi_base) <
+ iosapic_lists[i].num_rte)
return i;
}
@@ -162,7 +183,8 @@ _gsi_to_vector (unsigned int gsi)
struct iosapic_intr_info *info;
struct iosapic_rte_info *rte;
- for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info)
+ for (info = iosapic_intr_info; info <
+ iosapic_intr_info + IA64_NUM_VECTORS; ++info)
list_for_each_entry(rte, &info->rtes, rte_list)
if (rte->gsi_base + rte->rte_index == gsi)
return info - iosapic_intr_info;
@@ -185,8 +207,8 @@ gsi_to_irq (unsigned int gsi)
unsigned long flags;
int irq;
/*
- * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq
- * numbers...
+ * XXX fix me: this assumes an identity mapping between IA-64 vector
+ * and Linux irq numbers...
*/
spin_lock_irqsave(&iosapic_lock, flags);
{
@@ -197,7 +219,8 @@ gsi_to_irq (unsigned int gsi)
return irq;
}
-static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, unsigned int vec)
+static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi,
+ unsigned int vec)
{
struct iosapic_rte_info *rte;
@@ -237,7 +260,9 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
for (irq = 0; irq < NR_IRQS; ++irq)
if (irq_to_vector(irq) == vector) {
- set_irq_affinity_info(irq, (int)(dest & 0xffff), redir);
+ set_irq_affinity_info(irq,
+ (int)(dest & 0xffff),
+ redir);
break;
}
}
@@ -259,7 +284,7 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
}
static void
-nop (unsigned int vector)
+nop (unsigned int irq)
{
/* do nothing... */
}
@@ -281,7 +306,8 @@ mask_irq (unsigned int irq)
{
/* set only the mask bit */
low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
- list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+ list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
+ rte_list) {
addr = rte->addr;
rte_index = rte->rte_index;
iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
@@ -306,7 +332,8 @@ unmask_irq (unsigned int irq)
spin_lock_irqsave(&iosapic_lock, flags);
{
low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
- list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+ list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
+ rte_list) {
addr = rte->addr;
rte_index = rte->rte_index;
iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
@@ -346,21 +373,25 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask)
spin_lock_irqsave(&iosapic_lock, flags);
{
- low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
+ low32 = iosapic_intr_info[vec].low32 &
+ ~(7 << IOSAPIC_DELIVERY_SHIFT);
if (redir)
/* change delivery mode to lowest priority */
- low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
+ low32 |= (IOSAPIC_LOWEST_PRIORITY <<
+ IOSAPIC_DELIVERY_SHIFT);
else
/* change delivery mode to fixed */
low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
iosapic_intr_info[vec].low32 = low32;
iosapic_intr_info[vec].dest = dest;
- list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) {
+ list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
+ rte_list) {
addr = rte->addr;
rte_index = rte->rte_index;
- iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
+ iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index),
+ high32);
iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
}
}
@@ -433,7 +464,8 @@ iosapic_ack_edge_irq (unsigned int irq)
* interrupt for real. This prevents IRQ storms from unhandled
* devices.
*/
- if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED))
+ if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) ==
+ (IRQ_PENDING|IRQ_DISABLED))
mask_irq(irq);
}
@@ -467,7 +499,8 @@ iosapic_version (char __iomem *addr)
return iosapic_read(addr, IOSAPIC_VERSION);
}
-static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long pol)
+static int iosapic_find_sharable_vector (unsigned long trigger,
+ unsigned long pol)
{
int i, vector = -1, min_count = -1;
struct iosapic_intr_info *info;
@@ -482,7 +515,8 @@ static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long po
for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) {
info = &iosapic_intr_info[i];
if (info->trigger == trigger && info->polarity == pol &&
- (info->dmode == IOSAPIC_FIXED || info->dmode == IOSAPIC_LOWEST_PRIORITY)) {
+ (info->dmode == IOSAPIC_FIXED || info->dmode ==
+ IOSAPIC_LOWEST_PRIORITY)) {
if (min_count == -1 || info->count < min_count) {
vector = i;
min_count = info->count;
@@ -506,12 +540,15 @@ iosapic_reassign_vector (int vector)
new_vector = assign_irq_vector(AUTO_ASSIGN);
if (new_vector < 0)
panic("%s: out of interrupt vectors!\n", __FUNCTION__);
- printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector);
+ printk(KERN_INFO "Reassigning vector %d to %d\n",
+ vector, new_vector);
memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
sizeof(struct iosapic_intr_info));
INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes);
- list_move(iosapic_intr_info[vector].rtes.next, &iosapic_intr_info[new_vector].rtes);
- memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
+ list_move(iosapic_intr_info[vector].rtes.next,
+ &iosapic_intr_info[new_vector].rtes);
+ memset(&iosapic_intr_info[vector], 0,
+ sizeof(struct iosapic_intr_info));
iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
}
@@ -524,7 +561,8 @@ static struct iosapic_rte_info *iosapic_alloc_rte (void)
int preallocated = 0;
if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
- rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * NR_PREALLOCATE_RTE_ENTRIES);
+ rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
+ NR_PREALLOCATE_RTE_ENTRIES);
if (!rte)
return NULL;
for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
@@ -532,7 +570,8 @@ static struct iosapic_rte_info *iosapic_alloc_rte (void)
}
if (!list_empty(&free_rte_list)) {
- rte = list_entry(free_rte_list.next, struct iosapic_rte_info, rte_list);
+ rte = list_entry(free_rte_list.next, struct iosapic_rte_info,
+ rte_list);
list_del(&rte->rte_list);
preallocated++;
} else {
@@ -575,7 +614,8 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
index = find_iosapic(gsi);
if (index < 0) {
- printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi);
+ printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
+ __FUNCTION__, gsi);
return -ENODEV;
}
@@ -586,7 +626,8 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
if (!rte) {
rte = iosapic_alloc_rte();
if (!rte) {
- printk(KERN_WARNING "%s: cannot allocate memory\n", __FUNCTION__);
+ printk(KERN_WARNING "%s: cannot allocate memory\n",
+ __FUNCTION__);
return -ENOMEM;
}
@@ -602,7 +643,9 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
else if (vector_is_shared(vector)) {
struct iosapic_intr_info *info = &iosapic_intr_info[vector];
if (info->trigger != trigger || info->polarity != polarity) {
- printk (KERN_WARNING "%s: cannot override the interrupt\n", __FUNCTION__);
+ printk (KERN_WARNING
+ "%s: cannot override the interrupt\n",
+ __FUNCTION__);
return -EINVAL;
}
}
@@ -619,8 +662,10 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
idesc = irq_descp(vector);
if (idesc->handler != irq_type) {
if (idesc->handler != &no_irq_type)
- printk(KERN_WARNING "%s: changing vector %d from %s to %s\n",
- __FUNCTION__, vector, idesc->handler->typename, irq_type->typename);
+ printk(KERN_WARNING
+ "%s: changing vector %d from %s to %s\n",
+ __FUNCTION__, vector,
+ idesc->handler->typename, irq_type->typename);
idesc->handler = irq_type;
}
return 0;
@@ -631,6 +676,7 @@ get_target_cpu (unsigned int gsi, int vector)
{
#ifdef CONFIG_SMP
static int cpu = -1;
+ extern int cpe_vector;
/*
* In case of vector shared by multiple RTEs, all RTEs that
@@ -653,6 +699,11 @@ get_target_cpu (unsigned int gsi, int vector)
if (!cpu_online(smp_processor_id()))
return cpu_physical_id(smp_processor_id());
+#ifdef CONFIG_ACPI
+ if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR)
+ return get_cpei_target_cpu();
+#endif
+
#ifdef CONFIG_NUMA
{
int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
@@ -675,7 +726,7 @@ get_target_cpu (unsigned int gsi, int vector)
if (!num_cpus)
goto skip_numa_setup;
- /* Use vector assigment to distribute across cpus in node */
+ /* Use vector assignment to distribute across cpus in node */
cpu_index = vector % num_cpus;
for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
@@ -697,7 +748,7 @@ skip_numa_setup:
} while (!cpu_online(cpu));
return cpu_physical_id(cpu);
-#else
+#else /* CONFIG_SMP */
return cpu_physical_id(smp_processor_id());
#endif
}
@@ -749,7 +800,8 @@ again:
if (list_empty(&iosapic_intr_info[vector].rtes))
free_irq_vector(vector);
spin_unlock(&iosapic_lock);
- spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
+ spin_unlock_irqrestore(&irq_descp(vector)->lock,
+ flags);
goto again;
}
@@ -758,7 +810,8 @@ again:
polarity, trigger);
if (err < 0) {
spin_unlock(&iosapic_lock);
- spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
+ spin_unlock_irqrestore(&irq_descp(vector)->lock,
+ flags);
return err;
}
@@ -800,7 +853,8 @@ iosapic_unregister_intr (unsigned int gsi)
*/
irq = gsi_to_irq(gsi);
if (irq < 0) {
- printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
+ printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
+ gsi);
WARN_ON(1);
return;
}
@@ -811,7 +865,9 @@ iosapic_unregister_intr (unsigned int gsi)
spin_lock(&iosapic_lock);
{
if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) {
- printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
+ printk(KERN_ERR
+ "iosapic_unregister_intr(%u) unbalanced\n",
+ gsi);
WARN_ON(1);
goto out;
}
@@ -821,7 +877,8 @@ iosapic_unregister_intr (unsigned int gsi)
/* Mask the interrupt */
low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK;
- iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), low32);
+ iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index),
+ low32);
/* Remove the rte entry from the list */
list_del(&rte->rte_list);
@@ -834,7 +891,9 @@ iosapic_unregister_intr (unsigned int gsi)
trigger = iosapic_intr_info[vector].trigger;
polarity = iosapic_intr_info[vector].polarity;
dest = iosapic_intr_info[vector].dest;
- printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n",
+ printk(KERN_INFO
+ "GSI %u (%s, %s) -> CPU %d (0x%04x)"
+ " vector %d unregistered\n",
gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
(polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
cpu_logical_id(dest), dest, vector);
@@ -847,12 +906,15 @@ iosapic_unregister_intr (unsigned int gsi)
idesc->handler = &no_irq_type;
/* Clear the interrupt information */
- memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
+ memset(&iosapic_intr_info[vector], 0,
+ sizeof(struct iosapic_intr_info));
iosapic_intr_info[vector].low32 |= IOSAPIC_MASK;
INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
if (idesc->action) {
- printk(KERN_ERR "interrupt handlers still exist on IRQ %u\n", irq);
+ printk(KERN_ERR
+ "interrupt handlers still exist on"
+ "IRQ %u\n", irq);
WARN_ON(1);
}
@@ -867,7 +929,6 @@ iosapic_unregister_intr (unsigned int gsi)
/*
* ACPI calls this when it finds an entry for a platform interrupt.
- * Note that the irq_base and IOSAPIC address must be set in iosapic_init().
*/
int __init
iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
@@ -901,13 +962,16 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
mask = 1;
break;
default:
- printk(KERN_ERR "iosapic_register_platform_irq(): invalid int type 0x%x\n", int_type);
+ printk(KERN_ERR "%s: invalid int type 0x%x\n", __FUNCTION__,
+ int_type);
return -1;
}
register_intr(gsi, vector, delivery, polarity, trigger);
- printk(KERN_INFO "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
+ printk(KERN_INFO
+ "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)"
+ " vector %d\n",
int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
(polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
@@ -917,10 +981,8 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
return vector;
}
-
/*
* ACPI calls this when it finds an entry for a legacy ISA IRQ override.
- * Note that the gsi_base and IOSAPIC address must be set in iosapic_init().
*/
void __init
iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
@@ -949,16 +1011,19 @@ iosapic_system_init (int system_pcat_compat)
for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) {
iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
- INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); /* mark as unused */
+ /* mark as unused */
+ INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
}
pcat_compat = system_pcat_compat;
if (pcat_compat) {
/*
- * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support
- * enabled.
+ * Disable the compatibility mode interrupts (8259 style),
+ * needs IN/OUT support enabled.
*/
- printk(KERN_INFO "%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__);
+ printk(KERN_INFO
+ "%s: Disabling PC-AT compatible 8259 interrupts\n",
+ __FUNCTION__);
outb(0xff, 0xA1);
outb(0xff, 0x21);
}
@@ -998,10 +1063,7 @@ iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
base = iosapic_lists[index].gsi_base;
end = base + iosapic_lists[index].num_rte - 1;
- if (gsi_base < base && gsi_end < base)
- continue;/* OK */
-
- if (gsi_base > end && gsi_end > end)
+ if (gsi_end < base || end < gsi_base)
continue; /* OK */
return -EBUSY;
@@ -1047,12 +1109,14 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
if ((gsi_base == 0) && pcat_compat) {
/*
- * Map the legacy ISA devices into the IOSAPIC data. Some of these may
- * get reprogrammed later on with data from the ACPI Interrupt Source
- * Override table.
+ * Map the legacy ISA devices into the IOSAPIC data. Some of
+ * these may get reprogrammed later on with data from the ACPI
+ * Interrupt Source Override table.
*/
for (isa_irq = 0; isa_irq < 16; ++isa_irq)
- iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE);
+ iosapic_override_isa_irq(isa_irq, isa_irq,
+ IOSAPIC_POL_HIGH,
+ IOSAPIC_EDGE);
}
return 0;
}
@@ -1075,7 +1139,8 @@ iosapic_remove (unsigned int gsi_base)
if (iosapic_lists[index].rtes_inuse) {
err = -EBUSY;
- printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n",
+ printk(KERN_WARNING
+ "%s: IOSAPIC for GSI base %u is busy\n",
__FUNCTION__, gsi_base);
goto out;
}
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index d33244c32759..5ce908ef9c95 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -163,8 +163,19 @@ void fixup_irqs(void)
{
unsigned int irq;
extern void ia64_process_pending_intr(void);
+ extern void ia64_disable_timer(void);
+ extern volatile int time_keeper_id;
+
+ ia64_disable_timer();
+
+ /*
+ * Find a new timesync master
+ */
+ if (smp_processor_id() == time_keeper_id) {
+ time_keeper_id = first_cpu(cpu_online_map);
+ printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id);
+ }
- ia64_set_itv(1<<16);
/*
* Phase 1: Locate irq's bound to this cpu and
* relocate them for cpu removal.
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index dcd906fe5749..829a43cab797 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -865,6 +865,7 @@ ENTRY(interrupt)
;;
SAVE_REST
;;
+ MCA_RECOVER_RANGE(interrupt)
alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
mov out0=cr.ivr // pass cr.ivr as first arg
add out1=16,sp // pass pointer to pt_regs as second arg
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 50ae8c7d453d..789881ca83d4 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -34,6 +34,7 @@
#include <asm/pgtable.h>
#include <asm/kdebug.h>
#include <asm/sections.h>
+#include <asm/uaccess.h>
extern void jprobe_inst_return(void);
@@ -722,13 +723,50 @@ static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
- if (kcb->kprobe_status & KPROBE_HIT_SS) {
- resume_execution(cur, regs);
- reset_current_kprobe();
+ switch(kcb->kprobe_status) {
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ /*
+ * We are here because the instruction being single
+ * stepped caused a page fault. We reset the current
+ * kprobe and the instruction pointer points back to
+ * the probe address and allow the page fault handler
+ * to continue as a normal page fault.
+ */
+ regs->cr_iip = ((unsigned long)cur->addr) & ~0xFULL;
+ ia64_psr(regs)->ri = ((unsigned long)cur->addr) & 0xf;
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ restore_previous_kprobe(kcb);
+ else
+ reset_current_kprobe();
preempt_enable_no_resched();
+ break;
+ case KPROBE_HIT_ACTIVE:
+ case KPROBE_HIT_SSDONE:
+ /*
+ * We increment the nmissed count for accounting,
+ * we can also use npre/npostfault count for accouting
+ * these specific fault cases.
+ */
+ kprobes_inc_nmissed_count(cur);
+
+ /*
+ * We come here because instructions in the pre/post
+ * handler caused the page_fault, this could happen
+ * if handler tries to access user space by
+ * copy_from_user(), get_user() etc. Let the
+ * user-specified handler try to fix it first.
+ */
+ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+ return 1;
+
+ /*
+ * Let ia64_do_page_fault() fix it.
+ */
+ break;
+ default:
+ break;
}
return 0;
@@ -740,6 +778,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
struct die_args *args = (struct die_args *)data;
int ret = NOTIFY_DONE;
+ if (args->regs && user_mode(args->regs))
+ return ret;
+
switch(val) {
case DIE_BREAK:
/* err is break number from ia64_bad_break() */
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
index c3a04ee7f4f6..4b0b71d5aef4 100644
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@@ -14,7 +14,15 @@
struct ia64_machine_vector ia64_mv;
EXPORT_SYMBOL(ia64_mv);
-static struct ia64_machine_vector *
+static __initdata const char *mvec_name;
+static __init int setup_mvec(char *s)
+{
+ mvec_name = s;
+ return 0;
+}
+early_param("machvec", setup_mvec);
+
+static struct ia64_machine_vector * __init
lookup_machvec (const char *name)
{
extern struct ia64_machine_vector machvec_start[];
@@ -33,10 +41,13 @@ machvec_init (const char *name)
{
struct ia64_machine_vector *mv;
+ if (!name)
+ name = mvec_name ? mvec_name : acpi_get_sysname();
mv = lookup_machvec(name);
- if (!mv) {
- panic("generic kernel failed to find machine vector for platform %s!", name);
- }
+ if (!mv)
+ panic("generic kernel failed to find machine vector for"
+ " platform %s!", name);
+
ia64_mv = *mv;
printk(KERN_INFO "booting generic kernel on platform %s\n", name);
}
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index ee7eec9ee576..8963171788d5 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -69,6 +69,7 @@
#include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/workqueue.h>
+#include <linux/cpumask.h>
#include <asm/delay.h>
#include <asm/kdebug.h>
@@ -83,6 +84,7 @@
#include <asm/irq.h>
#include <asm/hw_irq.h>
+#include "mca_drv.h"
#include "entry.h"
#if defined(IA64_MCA_DEBUG_INFO)
@@ -133,7 +135,7 @@ static int cpe_poll_enabled = 1;
extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe);
-static int mca_init;
+static int mca_init __initdata;
static void inline
@@ -184,7 +186,7 @@ static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
* Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE})
* Outputs : None
*/
-static void
+static void __init
ia64_log_init(int sal_info_type)
{
u64 max_size = 0;
@@ -282,13 +284,53 @@ ia64_mca_log_sal_error_record(int sal_info_type)
}
/*
- * platform dependent error handling
+ * search_mca_table
+ * See if the MCA surfaced in an instruction range
+ * that has been tagged as recoverable.
+ *
+ * Inputs
+ * first First address range to check
+ * last Last address range to check
+ * ip Instruction pointer, address we are looking for
+ *
+ * Return value:
+ * 1 on Success (in the table)/ 0 on Failure (not in the table)
*/
-#ifndef PLATFORM_MCA_HANDLERS
+int
+search_mca_table (const struct mca_table_entry *first,
+ const struct mca_table_entry *last,
+ unsigned long ip)
+{
+ const struct mca_table_entry *curr;
+ u64 curr_start, curr_end;
+
+ curr = first;
+ while (curr <= last) {
+ curr_start = (u64) &curr->start_addr + curr->start_addr;
+ curr_end = (u64) &curr->end_addr + curr->end_addr;
+
+ if ((ip >= curr_start) && (ip <= curr_end)) {
+ return 1;
+ }
+ curr++;
+ }
+ return 0;
+}
+
+/* Given an address, look for it in the mca tables. */
+int mca_recover_range(unsigned long addr)
+{
+ extern struct mca_table_entry __start___mca_table[];
+ extern struct mca_table_entry __stop___mca_table[];
+
+ return search_mca_table(__start___mca_table, __stop___mca_table-1, addr);
+}
+EXPORT_SYMBOL_GPL(mca_recover_range);
#ifdef CONFIG_ACPI
int cpe_vector = -1;
+int ia64_cpe_irq = -1;
static irqreturn_t
ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
@@ -359,7 +401,7 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
* Outputs
* None
*/
-static void
+static void __init
ia64_mca_register_cpev (int cpev)
{
/* Register the CPE interrupt vector with SAL */
@@ -377,8 +419,6 @@ ia64_mca_register_cpev (int cpev)
}
#endif /* CONFIG_ACPI */
-#endif /* PLATFORM_MCA_HANDLERS */
-
/*
* ia64_mca_cmc_vector_setup
*
@@ -392,7 +432,7 @@ ia64_mca_register_cpev (int cpev)
* Outputs
* None
*/
-void
+void __cpuinit
ia64_mca_cmc_vector_setup (void)
{
cmcv_reg_t cmcv;
@@ -630,6 +670,32 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
*tnat |= (nat << tslot);
}
+/* Change the comm field on the MCA/INT task to include the pid that
+ * was interrupted, it makes for easier debugging. If that pid was 0
+ * (swapper or nested MCA/INIT) then use the start of the previous comm
+ * field suffixed with its cpu.
+ */
+
+static void
+ia64_mca_modify_comm(const task_t *previous_current)
+{
+ char *p, comm[sizeof(current->comm)];
+ if (previous_current->pid)
+ snprintf(comm, sizeof(comm), "%s %d",
+ current->comm, previous_current->pid);
+ else {
+ int l;
+ if ((p = strchr(previous_current->comm, ' ')))
+ l = p - previous_current->comm;
+ else
+ l = strlen(previous_current->comm);
+ snprintf(comm, sizeof(comm), "%s %*s %d",
+ current->comm, l, previous_current->comm,
+ task_thread_info(previous_current)->cpu);
+ }
+ memcpy(current->comm, comm, sizeof(current->comm));
+}
+
/* On entry to this routine, we are running on the per cpu stack, see
* mca_asm.h. The original stack has not been touched by this event. Some of
* the original stack's registers will be in the RBS on this stack. This stack
@@ -648,7 +714,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
struct ia64_sal_os_state *sos,
const char *type)
{
- char *p, comm[sizeof(current->comm)];
+ char *p;
ia64_va va;
extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */
const pal_min_state_area_t *ms = sos->pal_min_state;
@@ -721,54 +787,43 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
/* Verify the previous stack state before we change it */
if (user_mode(regs)) {
msg = "occurred in user space";
- goto no_mod;
- }
- if (r13 != sos->prev_IA64_KR_CURRENT) {
- msg = "inconsistent previous current and r13";
- goto no_mod;
- }
- if ((r12 - r13) >= KERNEL_STACK_SIZE) {
- msg = "inconsistent r12 and r13";
- goto no_mod;
- }
- if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) {
- msg = "inconsistent ar.bspstore and r13";
- goto no_mod;
- }
- va.p = old_bspstore;
- if (va.f.reg < 5) {
- msg = "old_bspstore is in the wrong region";
- goto no_mod;
- }
- if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) {
- msg = "inconsistent ar.bsp and r13";
- goto no_mod;
- }
- size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8;
- if (ar_bspstore + size > r12) {
- msg = "no room for blocked state";
+ /* previous_current is guaranteed to be valid when the task was
+ * in user space, so ...
+ */
+ ia64_mca_modify_comm(previous_current);
goto no_mod;
}
- /* Change the comm field on the MCA/INT task to include the pid that
- * was interrupted, it makes for easier debugging. If that pid was 0
- * (swapper or nested MCA/INIT) then use the start of the previous comm
- * field suffixed with its cpu.
- */
- if (previous_current->pid)
- snprintf(comm, sizeof(comm), "%s %d",
- current->comm, previous_current->pid);
- else {
- int l;
- if ((p = strchr(previous_current->comm, ' ')))
- l = p - previous_current->comm;
- else
- l = strlen(previous_current->comm);
- snprintf(comm, sizeof(comm), "%s %*s %d",
- current->comm, l, previous_current->comm,
- task_thread_info(previous_current)->cpu);
+ if (!mca_recover_range(ms->pmsa_iip)) {
+ if (r13 != sos->prev_IA64_KR_CURRENT) {
+ msg = "inconsistent previous current and r13";
+ goto no_mod;
+ }
+ if ((r12 - r13) >= KERNEL_STACK_SIZE) {
+ msg = "inconsistent r12 and r13";
+ goto no_mod;
+ }
+ if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) {
+ msg = "inconsistent ar.bspstore and r13";
+ goto no_mod;
+ }
+ va.p = old_bspstore;
+ if (va.f.reg < 5) {
+ msg = "old_bspstore is in the wrong region";
+ goto no_mod;
+ }
+ if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) {
+ msg = "inconsistent ar.bsp and r13";
+ goto no_mod;
+ }
+ size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8;
+ if (ar_bspstore + size > r12) {
+ msg = "no room for blocked state";
+ goto no_mod;
+ }
}
- memcpy(current->comm, comm, sizeof(current->comm));
+
+ ia64_mca_modify_comm(previous_current);
/* Make the original task look blocked. First stack a struct pt_regs,
* describing the state at the time of interrupt. mca_asm.S built a
@@ -908,7 +963,7 @@ no_mod:
static void
ia64_wait_for_slaves(int monarch)
{
- int c, wait = 0;
+ int c, wait = 0, missing = 0;
for_each_online_cpu(c) {
if (c == monarch)
continue;
@@ -919,15 +974,32 @@ ia64_wait_for_slaves(int monarch)
}
}
if (!wait)
- return;
+ goto all_in;
for_each_online_cpu(c) {
if (c == monarch)
continue;
if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */
+ if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
+ missing = 1;
break;
}
}
+ if (!missing)
+ goto all_in;
+ printk(KERN_INFO "OS MCA slave did not rendezvous on cpu");
+ for_each_online_cpu(c) {
+ if (c == monarch)
+ continue;
+ if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
+ printk(" %d", c);
+ }
+ printk("\n");
+ return;
+
+all_in:
+ printk(KERN_INFO "All OS MCA slaves have reached rendezvous\n");
+ return;
}
/*
@@ -953,6 +1025,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
task_t *previous_current;
oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */
+ console_loglevel = 15; /* make sure printks make it to console */
+ printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n",
+ sos->proc_state_param, cpu, sos->monarch);
+
previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
monarch_cpu = cpu;
if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0)
@@ -1416,7 +1492,7 @@ static struct irqaction mca_cpep_irqaction = {
* format most of the fields.
*/
-static void
+static void __cpuinit
format_mca_init_stack(void *mca_data, unsigned long offset,
const char *type, int cpu)
{
@@ -1430,7 +1506,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
ti->cpu = cpu;
p->thread_info = ti;
p->state = TASK_UNINTERRUPTIBLE;
- __set_bit(cpu, &p->cpus_allowed);
+ cpu_set(cpu, p->cpus_allowed);
INIT_LIST_HEAD(&p->tasks);
p->parent = p->real_parent = p->group_leader = p;
INIT_LIST_HEAD(&p->children);
@@ -1440,15 +1516,17 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
/* Do per-CPU MCA-related initialization. */
-void __devinit
+void __cpuinit
ia64_mca_cpu_init(void *cpu_data)
{
void *pal_vaddr;
+ static int first_time = 1;
- if (smp_processor_id() == 0) {
+ if (first_time) {
void *mca_data;
int cpu;
+ first_time = 0;
mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu)
* NR_CPUS + KERNEL_STACK_SIZE);
mca_data = (void *)(((unsigned long)mca_data +
@@ -1704,6 +1782,7 @@ ia64_mca_late_init(void)
desc = irq_descp(irq);
desc->status |= IRQ_PER_CPU;
setup_irq(irq, &mca_cpe_irqaction);
+ ia64_cpe_irq = irq;
}
ia64_mca_register_cpev(cpe_vector);
IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__);
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index e883d85906db..37c88eb55873 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -6,6 +6,7 @@
* Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
* Copyright (C) 2005 Silicon Graphics, Inc
* Copyright (C) 2005 Keith Owens <kaos@sgi.com>
+ * Copyright (C) 2006 Russ Anderson <rja@sgi.com>
*/
#include <linux/config.h>
#include <linux/types.h>
@@ -121,11 +122,12 @@ mca_page_isolate(unsigned long paddr)
*/
void
-mca_handler_bh(unsigned long paddr)
+mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
{
- printk(KERN_ERR
- "OS_MCA: process [pid: %d](%s) encounters MCA (paddr=%lx)\n",
- current->pid, current->comm, paddr);
+ printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
+ "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
+ raw_smp_processor_id(), current->pid, current->uid,
+ iip, ipsr, paddr, current->comm);
spin_lock(&mca_bh_lock);
switch (mca_page_isolate(paddr)) {
@@ -442,21 +444,26 @@ recover_from_read_error(slidx_table_t *slidx,
if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
return 0;
psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
+ psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
/*
* Check the privilege level of interrupted context.
* If it is user-mode, then terminate affected process.
*/
- if (psr1->cpl != 0) {
+
+ pmsa = sos->pal_min_state;
+ if (psr1->cpl != 0 ||
+ ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) {
smei = peidx_bus_check(peidx, 0);
if (smei->valid.target_identifier) {
/*
* setup for resume to bottom half of MCA,
* "mca_handler_bhhook"
*/
- pmsa = sos->pal_min_state;
- /* pass to bhhook as 1st argument (gr8) */
+ /* pass to bhhook as argument (gr8, ...) */
pmsa->pmsa_gr[8-1] = smei->target_identifier;
+ pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip;
+ pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr;
/* set interrupted return address (but no use) */
pmsa->pmsa_br0 = pmsa->pmsa_iip;
/* change resume address to bottom half */
@@ -466,6 +473,7 @@ recover_from_read_error(slidx_table_t *slidx,
psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
psr2->cpl = 0;
psr2->ri = 0;
+ psr2->bn = 1;
psr2->i = 0;
return 1;
diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h
index e2f6fa1e0ef6..31a2e52bb16f 100644
--- a/arch/ia64/kernel/mca_drv.h
+++ b/arch/ia64/kernel/mca_drv.h
@@ -111,3 +111,10 @@ typedef struct slidx_table {
slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\
__count; })
+struct mca_table_entry {
+ int start_addr; /* location-relative starting address of MCA recoverable range */
+ int end_addr; /* location-relative ending address of MCA recoverable range */
+};
+
+extern const struct mca_table_entry *search_mca_tables (unsigned long addr);
+extern int mca_recover_range(unsigned long);
diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S
index 3f298ee4d00c..e6a580d354b9 100644
--- a/arch/ia64/kernel/mca_drv_asm.S
+++ b/arch/ia64/kernel/mca_drv_asm.S
@@ -14,15 +14,12 @@
GLOBAL_ENTRY(mca_handler_bhhook)
invala // clear RSE ?
- ;;
cover
;;
clrrrb
;;
- alloc r16=ar.pfs,0,2,1,0 // make a new frame
- ;;
+ alloc r16=ar.pfs,0,2,3,0 // make a new frame
mov ar.rsc=0
- ;;
mov r13=IA64_KR(CURRENT) // current task pointer
;;
mov r2=r13
@@ -30,7 +27,6 @@ GLOBAL_ENTRY(mca_handler_bhhook)
addl r22=IA64_RBS_OFFSET,r2
;;
mov ar.bspstore=r22
- ;;
addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2
;;
adds r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
@@ -40,12 +36,12 @@ GLOBAL_ENTRY(mca_handler_bhhook)
movl loc1=mca_handler_bh // recovery C function
;;
mov out0=r8 // poisoned address
+ mov out1=r9 // iip
+ mov out2=r10 // psr
mov b6=loc1
;;
mov loc1=rp
- ;;
- ssm psr.i
- ;;
+ ssm psr.i | psr.ic
br.call.sptk.many rp=b6 // does not return ...
;;
mov ar.pfs=loc0
@@ -53,5 +49,4 @@ GLOBAL_ENTRY(mca_handler_bhhook)
;;
mov r8=r0
br.ret.sptk.many rp
- ;;
END(mca_handler_bhhook)
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index a68ce6678092..0766493d4d00 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -25,7 +25,7 @@
#include <asm/processor.h>
#include <asm/smp.h>
-u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
+u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_to_node_map);
cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index 89faa603c6be..6386f63c413e 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -240,7 +240,7 @@ cache_info(char *page)
}
p += sprintf(p,
"%s Cache level %lu:\n"
- "\tSize : %lu bytes\n"
+ "\tSize : %u bytes\n"
"\tAttributes : ",
cache_types[j+cci.pcci_unified], i+1,
cci.pcci_cache_size);
@@ -648,9 +648,9 @@ frequency_info(char *page)
if (ia64_pal_freq_ratios(&proc, &bus, &itc) != 0) return 0;
p += sprintf(p,
- "Processor/Clock ratio : %ld/%ld\n"
- "Bus/Clock ratio : %ld/%ld\n"
- "ITC/Clock ratio : %ld/%ld\n",
+ "Processor/Clock ratio : %d/%d\n"
+ "Bus/Clock ratio : %d/%d\n"
+ "ITC/Clock ratio : %d/%d\n",
proc.num, proc.den, bus.num, bus.den, itc.num, itc.den);
return p - page;
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index 6a4ac7d70b35..bc11bb096f58 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -115,7 +115,7 @@ ia64_patch_vtop (unsigned long start, unsigned long end)
ia64_srlz_i();
}
-void
+void __init
ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
{
static int first_time = 1;
@@ -149,7 +149,7 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
ia64_srlz_i();
}
-static void
+static void __init
patch_fsyscall_table (unsigned long start, unsigned long end)
{
extern unsigned long fsyscall_table[NR_syscalls];
@@ -166,7 +166,7 @@ patch_fsyscall_table (unsigned long start, unsigned long end)
ia64_srlz_i();
}
-static void
+static void __init
patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
{
extern char fsys_bubble_down[];
@@ -184,7 +184,7 @@ patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
ia64_srlz_i();
}
-void
+void __init
ia64_patch_gate (void)
{
# define START(name) ((unsigned long) __start_gate_##name##_patchlist)
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 9c5194b385da..077f21216b65 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -6722,6 +6722,7 @@ __initcall(pfm_init);
void
pfm_init_percpu (void)
{
+ static int first_time=1;
/*
* make sure no measurement is active
* (may inherit programmed PMCs from EFI).
@@ -6734,8 +6735,10 @@ pfm_init_percpu (void)
*/
pfm_unfreeze_pmu();
- if (smp_processor_id() == 0)
+ if (first_time) {
register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
+ first_time=0;
+ }
ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
ia64_srlz_d();
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 309d59658e5f..355d57970ba3 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -30,7 +30,6 @@
#include <linux/efi.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
-#include <linux/kprobes.h>
#include <asm/cpu.h>
#include <asm/delay.h>
@@ -738,13 +737,6 @@ void
exit_thread (void)
{
- /*
- * Remove function-return probe instances associated with this task
- * and put them back on the free list. Do not insert an exit probe for
- * this function, it will be disabled by kprobe_flush_task if you do.
- */
- kprobe_flush_task(current);
-
ia64_drop_fpu(current);
#ifdef CONFIG_PERFMON
/* if needed, stop monitoring and flush state to perfmon context */
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index eaed14aac6aa..9887c8787e7a 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1656,8 +1656,14 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
long arg4, long arg5, long arg6, long arg7,
struct pt_regs regs)
{
- if (unlikely(current->audit_context))
- audit_syscall_exit(current, AUDITSC_RESULT(regs.r10), regs.r8);
+ if (unlikely(current->audit_context)) {
+ int success = AUDITSC_RESULT(regs.r10);
+ long result = regs.r8;
+
+ if (success != AUDITSC_SUCCESS)
+ result = -result;
+ audit_syscall_exit(current, success, result);
+ }
if (test_thread_flag(TIF_SYSCALL_TRACE)
&& (current->ptrace & PT_PTRACED))
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 3258e09278d0..e4dfda1eb7dd 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -37,11 +37,11 @@
#include <linux/string.h>
#include <linux/threads.h>
#include <linux/tty.h>
+#include <linux/dmi.h>
#include <linux/serial.h>
#include <linux/serial_core.h>
#include <linux/efi.h>
#include <linux/initrd.h>
-#include <linux/platform.h>
#include <linux/pm.h>
#include <linux/cpufreq.h>
@@ -131,8 +131,8 @@ EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
/*
* We use a special marker for the end of memory and it uses the extra (+1) slot
*/
-struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
-int num_rsvd_regions;
+struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata;
+int num_rsvd_regions __initdata;
/*
@@ -141,7 +141,7 @@ int num_rsvd_regions;
* caller-specified function is called with the memory ranges that remain after filtering.
* This routine does not assume the incoming segments are sorted.
*/
-int
+int __init
filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
{
unsigned long range_start, range_end, prev_start;
@@ -177,7 +177,7 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
return 0;
}
-static void
+static void __init
sort_regions (struct rsvd_region *rsvd_region, int max)
{
int j;
@@ -218,7 +218,7 @@ __initcall(register_memory);
* initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined,
* see include/asm-ia64/meminit.h if you need to define more.
*/
-void
+void __init
reserve_memory (void)
{
int n = 0;
@@ -270,7 +270,7 @@ reserve_memory (void)
* Grab the initrd start and end from the boot parameter struct given us by
* the boot loader.
*/
-void
+void __init
find_initrd (void)
{
#ifdef CONFIG_BLK_DEV_INITRD
@@ -362,7 +362,7 @@ mark_bsp_online (void)
}
#ifdef CONFIG_SMP
-static void
+static void __init
check_for_logical_procs (void)
{
pal_logical_to_physical_t info;
@@ -389,6 +389,14 @@ check_for_logical_procs (void)
}
#endif
+static __initdata int nomca;
+static __init int setup_nomca(char *s)
+{
+ nomca = 1;
+ return 0;
+}
+early_param("nomca", setup_nomca);
+
void __init
setup_arch (char **cmdline_p)
{
@@ -402,35 +410,15 @@ setup_arch (char **cmdline_p)
efi_init();
io_port_init();
+ parse_early_param();
+
#ifdef CONFIG_IA64_GENERIC
- {
- const char *mvec_name = strstr (*cmdline_p, "machvec=");
- char str[64];
-
- if (mvec_name) {
- const char *end;
- size_t len;
-
- mvec_name += 8;
- end = strchr (mvec_name, ' ');
- if (end)
- len = end - mvec_name;
- else
- len = strlen (mvec_name);
- len = min(len, sizeof (str) - 1);
- strncpy (str, mvec_name, len);
- str[len] = '\0';
- mvec_name = str;
- } else
- mvec_name = acpi_get_sysname();
- machvec_init(mvec_name);
- }
+ machvec_init(NULL);
#endif
if (early_console_setup(*cmdline_p) == 0)
mark_bsp_online();
- parse_early_param();
#ifdef CONFIG_ACPI
/* Initialize the ACPI boot-time table parser */
acpi_table_init();
@@ -446,7 +434,7 @@ setup_arch (char **cmdline_p)
find_memory();
/* process SAL system table: */
- ia64_sal_init(efi.sal_systab);
+ ia64_sal_init(__va(efi.sal_systab));
ia64_setup_printk_clock();
@@ -493,7 +481,7 @@ setup_arch (char **cmdline_p)
#endif
/* enable IA-64 Machine Check Abort Handling unless disabled */
- if (!strstr(saved_command_line, "nomca"))
+ if (!nomca)
ia64_mca_init();
platform_setup(cmdline_p);
@@ -623,7 +611,7 @@ struct seq_operations cpuinfo_op = {
.show = show_cpuinfo
};
-void
+static void __cpuinit
identify_cpu (struct cpuinfo_ia64 *c)
{
union {
@@ -700,7 +688,7 @@ setup_per_cpu_areas (void)
* In addition, the minimum of the i-cache stride sizes is calculated for
* "flush_icache_range()".
*/
-static void
+static void __cpuinit
get_max_cacheline_size (void)
{
unsigned long line_size, max = 1;
@@ -763,10 +751,10 @@ get_max_cacheline_size (void)
* cpu_init() initializes state that is per-CPU. This function acts
* as a 'CPU state barrier', nothing should get across.
*/
-void
+void __cpuinit
cpu_init (void)
{
- extern void __devinit ia64_mmu_init (void *);
+ extern void __cpuinit ia64_mmu_init (void *);
unsigned long num_phys_stacked;
pal_vm_info_2_u_t vmi;
unsigned int max_ctx;
@@ -894,9 +882,16 @@ void sched_cacheflush(void)
ia64_sal_cache_flush(3);
}
-void
+void __init
check_bugs (void)
{
ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
(unsigned long) __end___mckinley_e9_bundles);
}
+
+static int __init run_dmi_scan(void)
+{
+ dmi_scan_machine();
+ return 0;
+}
+core_initcall(run_dmi_scan);
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 463f6bb44d07..1d7903ee2126 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -588,104 +588,3 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
}
return 0;
}
-
-/* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it
- * could not be delivered. It is important that the target process is not
- * allowed to do any more work in user space. Possible cases for the target
- * process:
- *
- * - It is sleeping and will wake up soon. Store the data in the current task,
- * the signal will be sent when the current task returns from the next
- * interrupt.
- *
- * - It is running in user context. Store the data in the current task, the
- * signal will be sent when the current task returns from the next interrupt.
- *
- * - It is running in kernel context on this or another cpu and will return to
- * user context. Store the data in the target task, the signal will be sent
- * to itself when the target task returns to user space.
- *
- * - It is running in kernel context on this cpu and will sleep before
- * returning to user context. Because this is also the current task, the
- * signal will not get delivered and the task could sleep indefinitely.
- * Store the data in the idle task for this cpu, the signal will be sent
- * after the idle task processes its next interrupt.
- *
- * To cover all cases, store the data in the target task, the current task and
- * the idle task on this cpu. Whatever happens, the signal will be delivered
- * to the target task before it can do any useful user space work. Multiple
- * deliveries have no unwanted side effects.
- *
- * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts
- * disabled. It must not take any locks nor use kernel structures or services
- * that require locks.
- */
-
-/* To ensure that we get the right pid, check its start time. To avoid extra
- * include files in thread_info.h, convert the task start_time to unsigned long,
- * giving us a cycle time of > 580 years.
- */
-static inline unsigned long
-start_time_ul(const struct task_struct *t)
-{
- return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec;
-}
-
-void
-set_sigdelayed(pid_t pid, int signo, int code, void __user *addr)
-{
- struct task_struct *t;
- unsigned long start_time = 0;
- int i;
-
- for (i = 1; i <= 3; ++i) {
- switch (i) {
- case 1:
- t = find_task_by_pid(pid);
- if (t)
- start_time = start_time_ul(t);
- break;
- case 2:
- t = current;
- break;
- default:
- t = idle_task(smp_processor_id());
- break;
- }
-
- if (!t)
- return;
- task_thread_info(t)->sigdelayed.signo = signo;
- task_thread_info(t)->sigdelayed.code = code;
- task_thread_info(t)->sigdelayed.addr = addr;
- task_thread_info(t)->sigdelayed.start_time = start_time;
- task_thread_info(t)->sigdelayed.pid = pid;
- wmb();
- set_tsk_thread_flag(t, TIF_SIGDELAYED);
- }
-}
-
-/* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that
- * was detected in MCA/INIT/NMI/PMI context where it could not be delivered.
- */
-
-void
-do_sigdelayed(void)
-{
- struct siginfo siginfo;
- pid_t pid;
- struct task_struct *t;
-
- clear_thread_flag(TIF_SIGDELAYED);
- memset(&siginfo, 0, sizeof(siginfo));
- siginfo.si_signo = current_thread_info()->sigdelayed.signo;
- siginfo.si_code = current_thread_info()->sigdelayed.code;
- siginfo.si_addr = current_thread_info()->sigdelayed.addr;
- pid = current_thread_info()->sigdelayed.pid;
- t = find_task_by_pid(pid);
- if (!t)
- return;
- if (current_thread_info()->sigdelayed.start_time != start_time_ul(t))
- return;
- force_sig_info(siginfo.si_signo, &siginfo, t);
-}
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index b681ef34a86e..44e9547878ac 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -70,6 +70,12 @@
#endif
#ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_PERMIT_BSP_REMOVE
+#define bsp_remove_ok 1
+#else
+#define bsp_remove_ok 0
+#endif
+
/*
* Store all idle threads, this can be reused instead of creating
* a new thread. Also avoids complicated thread destroy functionality
@@ -104,7 +110,7 @@ struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0];
/*
* ITC synchronization related stuff:
*/
-#define MASTER 0
+#define MASTER (0)
#define SLAVE (SMP_CACHE_BYTES/8)
#define NUM_ROUNDS 64 /* magic value */
@@ -151,6 +157,27 @@ char __initdata no_int_routing;
unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */
+#ifdef CONFIG_FORCE_CPEI_RETARGET
+#define CPEI_OVERRIDE_DEFAULT (1)
+#else
+#define CPEI_OVERRIDE_DEFAULT (0)
+#endif
+
+unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT;
+
+static int __init
+cmdl_force_cpei(char *str)
+{
+ int value=0;
+
+ get_option (&str, &value);
+ force_cpei_retarget = value;
+
+ return 1;
+}
+
+__setup("force_cpei=", cmdl_force_cpei);
+
static int __init
nointroute (char *str)
{
@@ -161,6 +188,27 @@ nointroute (char *str)
__setup("nointroute", nointroute);
+static void fix_b0_for_bsp(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ int cpuid;
+ static int fix_bsp_b0 = 1;
+
+ cpuid = smp_processor_id();
+
+ /*
+ * Cache the b0 value on the first AP that comes up
+ */
+ if (!(fix_bsp_b0 && cpuid))
+ return;
+
+ sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0];
+ printk ("Fixed BSP b0 value from CPU %d\n", cpuid);
+
+ fix_bsp_b0 = 0;
+#endif
+}
+
void
sync_master (void *arg)
{
@@ -327,8 +375,9 @@ smp_setup_percpu_timer (void)
static void __devinit
smp_callin (void)
{
- int cpuid, phys_id;
+ int cpuid, phys_id, itc_master;
extern void ia64_init_itm(void);
+ extern volatile int time_keeper_id;
#ifdef CONFIG_PERFMON
extern void pfm_init_percpu(void);
@@ -336,6 +385,7 @@ smp_callin (void)
cpuid = smp_processor_id();
phys_id = hard_smp_processor_id();
+ itc_master = time_keeper_id;
if (cpu_online(cpuid)) {
printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
@@ -343,6 +393,8 @@ smp_callin (void)
BUG();
}
+ fix_b0_for_bsp();
+
lock_ipi_calllock();
cpu_set(cpuid, cpu_online_map);
unlock_ipi_calllock();
@@ -365,8 +417,8 @@ smp_callin (void)
* calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls
* local_bh_enable(), which bugs out if irqs are not enabled...
*/
- Dprintk("Going to syncup ITC with BP.\n");
- ia64_sync_itc(0);
+ Dprintk("Going to syncup ITC with ITC Master.\n");
+ ia64_sync_itc(itc_master);
}
/*
@@ -572,32 +624,8 @@ void __devinit smp_prepare_boot_cpu(void)
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
}
-/*
- * mt_info[] is a temporary store for all info returned by
- * PAL_LOGICAL_TO_PHYSICAL, to be copied into cpuinfo_ia64 when the
- * specific cpu comes.
- */
-static struct {
- __u32 socket_id;
- __u16 core_id;
- __u16 thread_id;
- __u16 proc_fixed_addr;
- __u8 valid;
-} mt_info[NR_CPUS] __devinitdata;
-
#ifdef CONFIG_HOTPLUG_CPU
static inline void
-remove_from_mtinfo(int cpu)
-{
- int i;
-
- for_each_cpu(i)
- if (mt_info[i].valid && mt_info[i].socket_id ==
- cpu_data(cpu)->socket_id)
- mt_info[i].valid = 0;
-}
-
-static inline void
clear_cpu_sibling_map(int cpu)
{
int i;
@@ -626,15 +654,50 @@ remove_siblinginfo(int cpu)
/* remove it from all sibling map's */
clear_cpu_sibling_map(cpu);
+}
+
+extern void fixup_irqs(void);
- /* if this cpu is the last in the core group, remove all its info
- * from mt_info structure
+int migrate_platform_irqs(unsigned int cpu)
+{
+ int new_cpei_cpu;
+ irq_desc_t *desc = NULL;
+ cpumask_t mask;
+ int retval = 0;
+
+ /*
+ * dont permit CPEI target to removed.
*/
- if (last)
- remove_from_mtinfo(cpu);
+ if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) {
+ printk ("CPU (%d) is CPEI Target\n", cpu);
+ if (can_cpei_retarget()) {
+ /*
+ * Now re-target the CPEI to a different processor
+ */
+ new_cpei_cpu = any_online_cpu(cpu_online_map);
+ mask = cpumask_of_cpu(new_cpei_cpu);
+ set_cpei_target_cpu(new_cpei_cpu);
+ desc = irq_descp(ia64_cpe_irq);
+ /*
+ * Switch for now, immediatly, we need to do fake intr
+ * as other interrupts, but need to study CPEI behaviour with
+ * polling before making changes.
+ */
+ if (desc) {
+ desc->handler->disable(ia64_cpe_irq);
+ desc->handler->set_affinity(ia64_cpe_irq, mask);
+ desc->handler->enable(ia64_cpe_irq);
+ printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu);
+ }
+ }
+ if (!desc) {
+ printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu);
+ retval = -EBUSY;
+ }
+ }
+ return retval;
}
-extern void fixup_irqs(void);
/* must be called with cpucontrol mutex held */
int __cpu_disable(void)
{
@@ -643,8 +706,17 @@ int __cpu_disable(void)
/*
* dont permit boot processor for now
*/
- if (cpu == 0)
- return -EBUSY;
+ if (cpu == 0 && !bsp_remove_ok) {
+ printk ("Your platform does not support removal of BSP\n");
+ return (-EBUSY);
+ }
+
+ cpu_clear(cpu, cpu_online_map);
+
+ if (migrate_platform_irqs(cpu)) {
+ cpu_set(cpu, cpu_online_map);
+ return (-EBUSY);
+ }
remove_siblinginfo(cpu);
cpu_clear(cpu, cpu_online_map);
@@ -776,40 +848,6 @@ init_smp_config(void)
ia64_sal_strerror(sal_ret));
}
-static inline int __devinit
-check_for_mtinfo_index(void)
-{
- int i;
-
- for_each_cpu(i)
- if (!mt_info[i].valid)
- return i;
-
- return -1;
-}
-
-/*
- * Search the mt_info to find out if this socket's cid/tid information is
- * cached or not. If the socket exists, fill in the core_id and thread_id
- * in cpuinfo
- */
-static int __devinit
-check_for_new_socket(__u16 logical_address, struct cpuinfo_ia64 *c)
-{
- int i;
- __u32 sid = c->socket_id;
-
- for_each_cpu(i) {
- if (mt_info[i].valid && mt_info[i].proc_fixed_addr == logical_address
- && mt_info[i].socket_id == sid) {
- c->core_id = mt_info[i].core_id;
- c->thread_id = mt_info[i].thread_id;
- return 1; /* not a new socket */
- }
- }
- return 0;
-}
-
/*
* identify_siblings(cpu) gets called from identify_cpu. This populates the
* information related to logical execution units in per_cpu_data structure.
@@ -819,14 +857,12 @@ identify_siblings(struct cpuinfo_ia64 *c)
{
s64 status;
u16 pltid;
- u64 proc_fixed_addr;
- int count, i;
pal_logical_to_physical_t info;
if (smp_num_cpucores == 1 && smp_num_siblings == 1)
return;
- if ((status = ia64_pal_logical_to_phys(0, &info)) != PAL_STATUS_SUCCESS) {
+ if ((status = ia64_pal_logical_to_phys(-1, &info)) != PAL_STATUS_SUCCESS) {
printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n",
status);
return;
@@ -835,47 +871,12 @@ identify_siblings(struct cpuinfo_ia64 *c)
printk(KERN_ERR "ia64_sal_pltid failed with %ld\n", status);
return;
}
- if ((status = ia64_pal_fixed_addr(&proc_fixed_addr)) != PAL_STATUS_SUCCESS) {
- printk(KERN_ERR "ia64_pal_fixed_addr failed with %ld\n", status);
- return;
- }
c->socket_id = (pltid << 8) | info.overview_ppid;
c->cores_per_socket = info.overview_cpp;
c->threads_per_core = info.overview_tpc;
- count = c->num_log = info.overview_num_log;
-
- /* If the thread and core id information is already cached, then
- * we will simply update cpu_info and return. Otherwise, we will
- * do the PAL calls and cache core and thread id's of all the siblings.
- */
- if (check_for_new_socket(proc_fixed_addr, c))
- return;
-
- for (i = 0; i < count; i++) {
- int index;
-
- if (i && (status = ia64_pal_logical_to_phys(i, &info))
- != PAL_STATUS_SUCCESS) {
- printk(KERN_ERR "ia64_pal_logical_to_phys failed"
- " with %ld\n", status);
- return;
- }
- if (info.log2_la == proc_fixed_addr) {
- c->core_id = info.log1_cid;
- c->thread_id = info.log1_tid;
- }
+ c->num_log = info.overview_num_log;
- index = check_for_mtinfo_index();
- /* We will not do the mt_info caching optimization in this case.
- */
- if (index < 0)
- continue;
-
- mt_info[index].valid = 1;
- mt_info[index].socket_id = c->socket_id;
- mt_info[index].core_id = info.log1_cid;
- mt_info[index].thread_id = info.log1_tid;
- mt_info[index].proc_fixed_addr = info.log2_la;
- }
+ c->core_id = info.log1_cid;
+ c->thread_id = info.log1_tid;
}
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 307d01e15b2e..49958904045b 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -32,7 +32,7 @@
extern unsigned long wall_jiffies;
-#define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */
+volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
#ifdef CONFIG_IA64_DEBUG_IRQ
@@ -71,7 +71,7 @@ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
new_itm += local_cpu_data->itm_delta;
- if (smp_processor_id() == TIME_KEEPER_ID) {
+ if (smp_processor_id() == time_keeper_id) {
/*
* Here we are in the timer irq handler. We have irqs locally
* disabled, but we don't know if the timer_bh is running on
@@ -188,7 +188,7 @@ ia64_init_itm (void)
itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ;
- printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, "
+ printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%u/%u, "
"ITC freq=%lu.%03luMHz", smp_processor_id(),
platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
@@ -236,6 +236,11 @@ static struct irqaction timer_irqaction = {
.name = "timer"
};
+void __devinit ia64_disable_timer(void)
+{
+ ia64_set_itv(1 << 16);
+}
+
void __init
time_init (void)
{
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 6e5eea19fa67..b47476d655f1 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -9,6 +9,8 @@
* 2002/08/07 Erich Focht <efocht@ess.nec.de>
* Populate cpu entries in sysfs for non-numa systems as well
* Intel Corporation - Ashok Raj
+ * 02/27/2006 Zhang, Yanmin
+ * Populate cpu cache entries in sysfs for cpu cache info
*/
#include <linux/config.h>
@@ -19,6 +21,7 @@
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/nodemask.h>
+#include <linux/notifier.h>
#include <asm/mmzone.h>
#include <asm/numa.h>
#include <asm/cpu.h>
@@ -36,7 +39,7 @@ int arch_register_cpu(int num)
parent = &sysfs_nodes[cpu_to_node(num)];
#endif /* CONFIG_NUMA */
-#ifdef CONFIG_ACPI
+#if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU)
/*
* If CPEI cannot be re-targetted, and this is
* CPEI target, then dont create the control file
@@ -101,3 +104,367 @@ out:
}
subsys_initcall(topology_init);
+
+
+/*
+ * Export cpu cache information through sysfs
+ */
+
+/*
+ * A bunch of string array to get pretty printing
+ */
+static const char *cache_types[] = {
+ "", /* not used */
+ "Instruction",
+ "Data",
+ "Unified" /* unified */
+};
+
+static const char *cache_mattrib[]={
+ "WriteThrough",
+ "WriteBack",
+ "", /* reserved */
+ "" /* reserved */
+};
+
+struct cache_info {
+ pal_cache_config_info_t cci;
+ cpumask_t shared_cpu_map;
+ int level;
+ int type;
+ struct kobject kobj;
+};
+
+struct cpu_cache_info {
+ struct cache_info *cache_leaves;
+ int num_cache_leaves;
+ struct kobject kobj;
+};
+
+static struct cpu_cache_info all_cpu_cache_info[NR_CPUS];
+#define LEAF_KOBJECT_PTR(x,y) (&all_cpu_cache_info[x].cache_leaves[y])
+
+#ifdef CONFIG_SMP
+static void cache_shared_cpu_map_setup( unsigned int cpu,
+ struct cache_info * this_leaf)
+{
+ pal_cache_shared_info_t csi;
+ int num_shared, i = 0;
+ unsigned int j;
+
+ if (cpu_data(cpu)->threads_per_core <= 1 &&
+ cpu_data(cpu)->cores_per_socket <= 1) {
+ cpu_set(cpu, this_leaf->shared_cpu_map);
+ return;
+ }
+
+ if (ia64_pal_cache_shared_info(this_leaf->level,
+ this_leaf->type,
+ 0,
+ &csi) != PAL_STATUS_SUCCESS)
+ return;
+
+ num_shared = (int) csi.num_shared;
+ do {
+ for_each_cpu(j)
+ if (cpu_data(cpu)->socket_id == cpu_data(j)->socket_id
+ && cpu_data(j)->core_id == csi.log1_cid
+ && cpu_data(j)->thread_id == csi.log1_tid)
+ cpu_set(j, this_leaf->shared_cpu_map);
+
+ i++;
+ } while (i < num_shared &&
+ ia64_pal_cache_shared_info(this_leaf->level,
+ this_leaf->type,
+ i,
+ &csi) == PAL_STATUS_SUCCESS);
+}
+#else
+static void cache_shared_cpu_map_setup(unsigned int cpu,
+ struct cache_info * this_leaf)
+{
+ cpu_set(cpu, this_leaf->shared_cpu_map);
+ return;
+}
+#endif
+
+static ssize_t show_coherency_line_size(struct cache_info *this_leaf,
+ char *buf)
+{
+ return sprintf(buf, "%u\n", 1 << this_leaf->cci.pcci_line_size);
+}
+
+static ssize_t show_ways_of_associativity(struct cache_info *this_leaf,
+ char *buf)
+{
+ return sprintf(buf, "%u\n", this_leaf->cci.pcci_assoc);
+}
+
+static ssize_t show_attributes(struct cache_info *this_leaf, char *buf)
+{
+ return sprintf(buf,
+ "%s\n",
+ cache_mattrib[this_leaf->cci.pcci_cache_attr]);
+}
+
+static ssize_t show_size(struct cache_info *this_leaf, char *buf)
+{
+ return sprintf(buf, "%uK\n", this_leaf->cci.pcci_cache_size / 1024);
+}
+
+static ssize_t show_number_of_sets(struct cache_info *this_leaf, char *buf)
+{
+ unsigned number_of_sets = this_leaf->cci.pcci_cache_size;
+ number_of_sets /= this_leaf->cci.pcci_assoc;
+ number_of_sets /= 1 << this_leaf->cci.pcci_line_size;
+
+ return sprintf(buf, "%u\n", number_of_sets);
+}
+
+static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf)
+{
+ ssize_t len;
+ cpumask_t shared_cpu_map;
+
+ cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map);
+ len = cpumask_scnprintf(buf, NR_CPUS+1, shared_cpu_map);
+ len += sprintf(buf+len, "\n");
+ return len;
+}
+
+static ssize_t show_type(struct cache_info *this_leaf, char *buf)
+{
+ int type = this_leaf->type + this_leaf->cci.pcci_unified;
+ return sprintf(buf, "%s\n", cache_types[type]);
+}
+
+static ssize_t show_level(struct cache_info *this_leaf, char *buf)
+{
+ return sprintf(buf, "%u\n", this_leaf->level);
+}
+
+struct cache_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct cache_info *, char *);
+ ssize_t (*store)(struct cache_info *, const char *, size_t count);
+};
+
+#ifdef define_one_ro
+ #undef define_one_ro
+#endif
+#define define_one_ro(_name) \
+ static struct cache_attr _name = \
+__ATTR(_name, 0444, show_##_name, NULL)
+
+define_one_ro(level);
+define_one_ro(type);
+define_one_ro(coherency_line_size);
+define_one_ro(ways_of_associativity);
+define_one_ro(size);
+define_one_ro(number_of_sets);
+define_one_ro(shared_cpu_map);
+define_one_ro(attributes);
+
+static struct attribute * cache_default_attrs[] = {
+ &type.attr,
+ &level.attr,
+ &coherency_line_size.attr,
+ &ways_of_associativity.attr,
+ &attributes.attr,
+ &size.attr,
+ &number_of_sets.attr,
+ &shared_cpu_map.attr,
+ NULL
+};
+
+#define to_object(k) container_of(k, struct cache_info, kobj)
+#define to_attr(a) container_of(a, struct cache_attr, attr)
+
+static ssize_t cache_show(struct kobject * kobj, struct attribute * attr, char * buf)
+{
+ struct cache_attr *fattr = to_attr(attr);
+ struct cache_info *this_leaf = to_object(kobj);
+ ssize_t ret;
+
+ ret = fattr->show ? fattr->show(this_leaf, buf) : 0;
+ return ret;
+}
+
+static struct sysfs_ops cache_sysfs_ops = {
+ .show = cache_show
+};
+
+static struct kobj_type cache_ktype = {
+ .sysfs_ops = &cache_sysfs_ops,
+ .default_attrs = cache_default_attrs,
+};
+
+static struct kobj_type cache_ktype_percpu_entry = {
+ .sysfs_ops = &cache_sysfs_ops,
+};
+
+static void __cpuinit cpu_cache_sysfs_exit(unsigned int cpu)
+{
+ if (all_cpu_cache_info[cpu].cache_leaves) {
+ kfree(all_cpu_cache_info[cpu].cache_leaves);
+ all_cpu_cache_info[cpu].cache_leaves = NULL;
+ }
+ all_cpu_cache_info[cpu].num_cache_leaves = 0;
+ memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject));
+
+ return;
+}
+
+static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu)
+{
+ u64 i, levels, unique_caches;
+ pal_cache_config_info_t cci;
+ int j;
+ s64 status;
+ struct cache_info *this_cache;
+ int num_cache_leaves = 0;
+
+ if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) {
+ printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status);
+ return -1;
+ }
+
+ this_cache=kzalloc(sizeof(struct cache_info)*unique_caches,
+ GFP_KERNEL);
+ if (this_cache == NULL)
+ return -ENOMEM;
+
+ for (i=0; i < levels; i++) {
+ for (j=2; j >0 ; j--) {
+ if ((status=ia64_pal_cache_config_info(i,j, &cci)) !=
+ PAL_STATUS_SUCCESS)
+ continue;
+
+ this_cache[num_cache_leaves].cci = cci;
+ this_cache[num_cache_leaves].level = i + 1;
+ this_cache[num_cache_leaves].type = j;
+
+ cache_shared_cpu_map_setup(cpu,
+ &this_cache[num_cache_leaves]);
+ num_cache_leaves ++;
+ }
+ }
+
+ all_cpu_cache_info[cpu].cache_leaves = this_cache;
+ all_cpu_cache_info[cpu].num_cache_leaves = num_cache_leaves;
+
+ memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject));
+
+ return 0;
+}
+
+/* Add cache interface for CPU device */
+static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
+{
+ unsigned int cpu = sys_dev->id;
+ unsigned long i, j;
+ struct cache_info *this_object;
+ int retval = 0;
+ cpumask_t oldmask;
+
+ if (all_cpu_cache_info[cpu].kobj.parent)
+ return 0;
+
+ oldmask = current->cpus_allowed;
+ retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
+ if (unlikely(retval))
+ return retval;
+
+ retval = cpu_cache_sysfs_init(cpu);
+ set_cpus_allowed(current, oldmask);
+ if (unlikely(retval < 0))
+ return retval;
+
+ all_cpu_cache_info[cpu].kobj.parent = &sys_dev->kobj;
+ kobject_set_name(&all_cpu_cache_info[cpu].kobj, "%s", "cache");
+ all_cpu_cache_info[cpu].kobj.ktype = &cache_ktype_percpu_entry;
+ retval = kobject_register(&all_cpu_cache_info[cpu].kobj);
+
+ for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) {
+ this_object = LEAF_KOBJECT_PTR(cpu,i);
+ this_object->kobj.parent = &all_cpu_cache_info[cpu].kobj;
+ kobject_set_name(&(this_object->kobj), "index%1lu", i);
+ this_object->kobj.ktype = &cache_ktype;
+ retval = kobject_register(&(this_object->kobj));
+ if (unlikely(retval)) {
+ for (j = 0; j < i; j++) {
+ kobject_unregister(
+ &(LEAF_KOBJECT_PTR(cpu,j)->kobj));
+ }
+ kobject_unregister(&all_cpu_cache_info[cpu].kobj);
+ cpu_cache_sysfs_exit(cpu);
+ break;
+ }
+ }
+ return retval;
+}
+
+/* Remove cache interface for CPU device */
+static int __cpuinit cache_remove_dev(struct sys_device * sys_dev)
+{
+ unsigned int cpu = sys_dev->id;
+ unsigned long i;
+
+ for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++)
+ kobject_unregister(&(LEAF_KOBJECT_PTR(cpu,i)->kobj));
+
+ if (all_cpu_cache_info[cpu].kobj.parent) {
+ kobject_unregister(&all_cpu_cache_info[cpu].kobj);
+ memset(&all_cpu_cache_info[cpu].kobj,
+ 0,
+ sizeof(struct kobject));
+ }
+
+ cpu_cache_sysfs_exit(cpu);
+
+ return 0;
+}
+
+/*
+ * When a cpu is hot-plugged, do a check and initiate
+ * cache kobject if necessary
+ */
+static int __cpuinit cache_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ struct sys_device *sys_dev;
+
+ sys_dev = get_cpu_sysdev(cpu);
+ switch (action) {
+ case CPU_ONLINE:
+ cache_add_dev(sys_dev);
+ break;
+ case CPU_DEAD:
+ cache_remove_dev(sys_dev);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block cache_cpu_notifier =
+{
+ .notifier_call = cache_cpu_callback
+};
+
+static int __cpuinit cache_sysfs_init(void)
+{
+ int i;
+
+ for_each_online_cpu(i) {
+ cache_cpu_callback(&cache_cpu_notifier, CPU_ONLINE,
+ (void *)(long)i);
+ }
+
+ register_cpu_notifier(&cache_cpu_notifier);
+
+ return 0;
+}
+
+device_initcall(cache_sysfs_init);
+
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index dabd6c32641e..7c1ddc8ac443 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -30,19 +30,19 @@ extern spinlock_t timerlist_lock;
fpswa_interface_t *fpswa_interface;
EXPORT_SYMBOL(fpswa_interface);
-struct notifier_block *ia64die_chain;
+ATOMIC_NOTIFIER_HEAD(ia64die_chain);
int
register_die_notifier(struct notifier_block *nb)
{
- return notifier_chain_register(&ia64die_chain, nb);
+ return atomic_notifier_chain_register(&ia64die_chain, nb);
}
EXPORT_SYMBOL_GPL(register_die_notifier);
int
unregister_die_notifier(struct notifier_block *nb)
{
- return notifier_chain_unregister(&ia64die_chain, nb);
+ return atomic_notifier_chain_unregister(&ia64die_chain, nb);
}
EXPORT_SYMBOL_GPL(unregister_die_notifier);
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 73af6267d2ef..783600fe52b2 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -70,34 +70,18 @@ SECTIONS
__stop___ex_table = .;
}
- .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET)
- {
- __start___vtop_patchlist = .;
- *(.data.patch.vtop)
- __end___vtop_patchlist = .;
- }
-
- .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET)
+ /* MCA table */
+ . = ALIGN(16);
+ __mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET)
{
- __start___mckinley_e9_bundles = .;
- *(.data.patch.mckinley_e9)
- __end___mckinley_e9_bundles = .;
+ __start___mca_table = .;
+ *(__mca_table)
+ __stop___mca_table = .;
}
/* Global data */
_data = .;
-#if defined(CONFIG_IA64_GENERIC)
- /* Machine Vector */
- . = ALIGN(16);
- .machvec : AT(ADDR(.machvec) - LOAD_OFFSET)
- {
- machvec_start = .;
- *(.machvec)
- machvec_end = .;
- }
-#endif
-
/* Unwind info & table: */
. = ALIGN(8);
.IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET)
@@ -154,6 +138,32 @@ SECTIONS
*(.initcall7.init)
__initcall_end = .;
}
+
+ .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET)
+ {
+ __start___vtop_patchlist = .;
+ *(.data.patch.vtop)
+ __end___vtop_patchlist = .;
+ }
+
+ .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET)
+ {
+ __start___mckinley_e9_bundles = .;
+ *(.data.patch.mckinley_e9)
+ __end___mckinley_e9_bundles = .;
+ }
+
+#if defined(CONFIG_IA64_GENERIC)
+ /* Machine Vector */
+ . = ALIGN(16);
+ .machvec : AT(ADDR(.machvec) - LOAD_OFFSET)
+ {
+ machvec_start = .;
+ *(.machvec)
+ machvec_end = .;
+ }
+#endif
+
__con_initcall_start = .;
.con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET)
{ *(.con_initcall.init) }