summaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r--arch/ia64/kernel/Makefile2
-rw-r--r--arch/ia64/kernel/acpi.c88
-rw-r--r--arch/ia64/kernel/domain.c80
-rw-r--r--arch/ia64/kernel/entry.S122
-rw-r--r--arch/ia64/kernel/fsys.S147
-rw-r--r--arch/ia64/kernel/gate.S62
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c3
-rw-r--r--arch/ia64/kernel/iosapic.c147
-rw-r--r--arch/ia64/kernel/irq_ia64.c15
-rw-r--r--arch/ia64/kernel/ivt.S198
-rw-r--r--arch/ia64/kernel/jprobes.S61
-rw-r--r--arch/ia64/kernel/kprobes.c721
-rw-r--r--arch/ia64/kernel/mca.c2
-rw-r--r--arch/ia64/kernel/numa.c57
-rw-r--r--arch/ia64/kernel/perfmon.c2
-rw-r--r--arch/ia64/kernel/process.c24
-rw-r--r--arch/ia64/kernel/ptrace.c22
-rw-r--r--arch/ia64/kernel/salinfo.c3
-rw-r--r--arch/ia64/kernel/setup.c52
-rw-r--r--arch/ia64/kernel/signal.c17
-rw-r--r--arch/ia64/kernel/smp.c3
-rw-r--r--arch/ia64/kernel/smpboot.c45
-rw-r--r--arch/ia64/kernel/topology.c9
-rw-r--r--arch/ia64/kernel/traps.c39
-rw-r--r--arch/ia64/kernel/unwind.c12
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S7
26 files changed, 1559 insertions, 381 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 4c73d8ba2e3d..e1fb68ddec26 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -17,9 +17,11 @@ obj-$(CONFIG_IA64_PALINFO) += palinfo.o
obj-$(CONFIG_IOSAPIC) += iosapic.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o
+obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o
obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
+obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
mca_recovery-y += mca_drv.o mca_drv_asm.o
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 72dfd9e7de0f..9609f243e5d0 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -11,6 +11,7 @@
* Copyright (C) 2001 Jenna Hall <jenna.s.hall@intel.com>
* Copyright (C) 2001 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
* Copyright (C) 2002 Erich Focht <efocht@ess.nec.de>
+ * Copyright (C) 2004 Ashok Raj <ashok.raj@intel.com>
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
@@ -67,6 +68,11 @@ EXPORT_SYMBOL(pm_power_off);
unsigned char acpi_kbd_controller_present = 1;
unsigned char acpi_legacy_devices;
+static unsigned int __initdata acpi_madt_rev;
+
+unsigned int acpi_cpei_override;
+unsigned int acpi_cpei_phys_cpuid;
+
#define MAX_SAPICS 256
u16 ia64_acpiid_to_sapicid[MAX_SAPICS] =
{ [0 ... MAX_SAPICS - 1] = -1 };
@@ -236,9 +242,7 @@ acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end)
if (BAD_MADT_ENTRY(iosapic, end))
return -EINVAL;
- iosapic_init(iosapic->address, iosapic->global_irq_base);
-
- return 0;
+ return iosapic_init(iosapic->address, iosapic->global_irq_base);
}
@@ -267,10 +271,56 @@ acpi_parse_plat_int_src (
(plintsrc->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
platform_intr_list[plintsrc->type] = vector;
+ if (acpi_madt_rev > 1) {
+ acpi_cpei_override = plintsrc->plint_flags.cpei_override_flag;
+ }
+
+ /*
+ * Save the physical id, so we can check when its being removed
+ */
+ acpi_cpei_phys_cpuid = ((plintsrc->id << 8) | (plintsrc->eid)) & 0xffff;
+
return 0;
}
+unsigned int can_cpei_retarget(void)
+{
+ extern int cpe_vector;
+
+ /*
+ * Only if CPEI is supported and the override flag
+ * is present, otherwise return that its re-targettable
+ * if we are in polling mode.
+ */
+ if (cpe_vector > 0 && !acpi_cpei_override)
+ return 0;
+ else
+ return 1;
+}
+
+unsigned int is_cpu_cpei_target(unsigned int cpu)
+{
+ unsigned int logical_id;
+
+ logical_id = cpu_logical_id(acpi_cpei_phys_cpuid);
+
+ if (logical_id == cpu)
+ return 1;
+ else
+ return 0;
+}
+
+void set_cpei_target_cpu(unsigned int cpu)
+{
+ acpi_cpei_phys_cpuid = cpu_physical_id(cpu);
+}
+
+unsigned int get_cpei_target_cpu(void)
+{
+ return acpi_cpei_phys_cpuid;
+}
+
static int __init
acpi_parse_int_src_ovr (
acpi_table_entry_header *header, const unsigned long end)
@@ -328,6 +378,8 @@ acpi_parse_madt (unsigned long phys_addr, unsigned long size)
acpi_madt = (struct acpi_table_madt *) __va(phys_addr);
+ acpi_madt_rev = acpi_madt->header.revision;
+
/* remember the value for reference after free_initmem() */
#ifdef CONFIG_ITANIUM
has_8259 = 1; /* Firmware on old Itanium systems is broken */
@@ -642,9 +694,11 @@ acpi_boot_init (void)
if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id())
node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu];
}
- build_cpu_to_node_map();
# endif
#endif
+#ifdef CONFIG_ACPI_NUMA
+ build_cpu_to_node_map();
+#endif
/* Make boot-up look pretty */
printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus);
return 0;
@@ -772,7 +826,7 @@ EXPORT_SYMBOL(acpi_unmap_lsapic);
#ifdef CONFIG_ACPI_NUMA
-acpi_status __init
+acpi_status __devinit
acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret)
{
struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
@@ -825,4 +879,28 @@ acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret)
return AE_OK;
}
#endif /* CONFIG_NUMA */
+
+int
+acpi_register_ioapic (acpi_handle handle, u64 phys_addr, u32 gsi_base)
+{
+ int err;
+
+ if ((err = iosapic_init(phys_addr, gsi_base)))
+ return err;
+
+#if CONFIG_ACPI_NUMA
+ acpi_map_iosapic(handle, 0, NULL, NULL);
+#endif /* CONFIG_ACPI_NUMA */
+
+ return 0;
+}
+EXPORT_SYMBOL(acpi_register_ioapic);
+
+int
+acpi_unregister_ioapic (acpi_handle handle, u32 gsi_base)
+{
+ return iosapic_remove(gsi_base);
+}
+EXPORT_SYMBOL(acpi_unregister_ioapic);
+
#endif /* CONFIG_ACPI_BOOT */
diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c
index fe532c970438..bbb8efe126b7 100644
--- a/arch/ia64/kernel/domain.c
+++ b/arch/ia64/kernel/domain.c
@@ -14,7 +14,7 @@
#include <linux/topology.h>
#include <linux/nodemask.h>
-#define SD_NODES_PER_DOMAIN 6
+#define SD_NODES_PER_DOMAIN 16
#ifdef CONFIG_NUMA
/**
@@ -27,7 +27,7 @@
*
* Should use nodemask_t.
*/
-static int __devinit find_next_best_node(int node, unsigned long *used_nodes)
+static int find_next_best_node(int node, unsigned long *used_nodes)
{
int i, n, val, min_val, best_node = 0;
@@ -66,7 +66,7 @@ static int __devinit find_next_best_node(int node, unsigned long *used_nodes)
* should be one that prevents unnecessary balancing, but also spreads tasks
* out optimally.
*/
-static cpumask_t __devinit sched_domain_node_span(int node)
+static cpumask_t sched_domain_node_span(int node)
{
int i;
cpumask_t span, nodemask;
@@ -96,7 +96,7 @@ static cpumask_t __devinit sched_domain_node_span(int node)
#ifdef CONFIG_SCHED_SMT
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
static struct sched_group sched_group_cpus[NR_CPUS];
-static int __devinit cpu_to_cpu_group(int cpu)
+static int cpu_to_cpu_group(int cpu)
{
return cpu;
}
@@ -104,7 +104,7 @@ static int __devinit cpu_to_cpu_group(int cpu)
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
static struct sched_group sched_group_phys[NR_CPUS];
-static int __devinit cpu_to_phys_group(int cpu)
+static int cpu_to_phys_group(int cpu)
{
#ifdef CONFIG_SCHED_SMT
return first_cpu(cpu_sibling_map[cpu]);
@@ -125,44 +125,36 @@ static struct sched_group *sched_group_nodes[MAX_NUMNODES];
static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
static struct sched_group sched_group_allnodes[MAX_NUMNODES];
-static int __devinit cpu_to_allnodes_group(int cpu)
+static int cpu_to_allnodes_group(int cpu)
{
return cpu_to_node(cpu);
}
#endif
/*
- * Set up scheduler domains and groups. Callers must hold the hotplug lock.
+ * Build sched domains for a given set of cpus and attach the sched domains
+ * to the individual cpus
*/
-void __devinit arch_init_sched_domains(void)
+void build_sched_domains(const cpumask_t *cpu_map)
{
int i;
- cpumask_t cpu_default_map;
/*
- * Setup mask for cpus without special case scheduling requirements.
- * For now this just excludes isolated cpus, but could be used to
- * exclude other special cases in the future.
+ * Set up domains for cpus specified by the cpu_map.
*/
- cpus_complement(cpu_default_map, cpu_isolated_map);
- cpus_and(cpu_default_map, cpu_default_map, cpu_online_map);
-
- /*
- * Set up domains. Isolated domains just stay on the dummy domain.
- */
- for_each_cpu_mask(i, cpu_default_map) {
+ for_each_cpu_mask(i, *cpu_map) {
int group;
struct sched_domain *sd = NULL, *p;
cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
- cpus_and(nodemask, nodemask, cpu_default_map);
+ cpus_and(nodemask, nodemask, *cpu_map);
#ifdef CONFIG_NUMA
if (num_online_cpus()
> SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
sd = &per_cpu(allnodes_domains, i);
*sd = SD_ALLNODES_INIT;
- sd->span = cpu_default_map;
+ sd->span = *cpu_map;
group = cpu_to_allnodes_group(i);
sd->groups = &sched_group_allnodes[group];
p = sd;
@@ -173,7 +165,7 @@ void __devinit arch_init_sched_domains(void)
*sd = SD_NODE_INIT;
sd->span = sched_domain_node_span(cpu_to_node(i));
sd->parent = p;
- cpus_and(sd->span, sd->span, cpu_default_map);
+ cpus_and(sd->span, sd->span, *cpu_map);
#endif
p = sd;
@@ -190,7 +182,7 @@ void __devinit arch_init_sched_domains(void)
group = cpu_to_cpu_group(i);
*sd = SD_SIBLING_INIT;
sd->span = cpu_sibling_map[i];
- cpus_and(sd->span, sd->span, cpu_default_map);
+ cpus_and(sd->span, sd->span, *cpu_map);
sd->parent = p;
sd->groups = &sched_group_cpus[group];
#endif
@@ -198,9 +190,9 @@ void __devinit arch_init_sched_domains(void)
#ifdef CONFIG_SCHED_SMT
/* Set up CPU (sibling) groups */
- for_each_cpu_mask(i, cpu_default_map) {
+ for_each_cpu_mask(i, *cpu_map) {
cpumask_t this_sibling_map = cpu_sibling_map[i];
- cpus_and(this_sibling_map, this_sibling_map, cpu_default_map);
+ cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
if (i != first_cpu(this_sibling_map))
continue;
@@ -213,7 +205,7 @@ void __devinit arch_init_sched_domains(void)
for (i = 0; i < MAX_NUMNODES; i++) {
cpumask_t nodemask = node_to_cpumask(i);
- cpus_and(nodemask, nodemask, cpu_default_map);
+ cpus_and(nodemask, nodemask, *cpu_map);
if (cpus_empty(nodemask))
continue;
@@ -222,7 +214,7 @@ void __devinit arch_init_sched_domains(void)
}
#ifdef CONFIG_NUMA
- init_sched_build_groups(sched_group_allnodes, cpu_default_map,
+ init_sched_build_groups(sched_group_allnodes, *cpu_map,
&cpu_to_allnodes_group);
for (i = 0; i < MAX_NUMNODES; i++) {
@@ -233,12 +225,12 @@ void __devinit arch_init_sched_domains(void)
cpumask_t covered = CPU_MASK_NONE;
int j;
- cpus_and(nodemask, nodemask, cpu_default_map);
+ cpus_and(nodemask, nodemask, *cpu_map);
if (cpus_empty(nodemask))
continue;
domainspan = sched_domain_node_span(i);
- cpus_and(domainspan, domainspan, cpu_default_map);
+ cpus_and(domainspan, domainspan, *cpu_map);
sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
sched_group_nodes[i] = sg;
@@ -266,7 +258,7 @@ void __devinit arch_init_sched_domains(void)
int n = (i + j) % MAX_NUMNODES;
cpus_complement(notcovered, covered);
- cpus_and(tmp, notcovered, cpu_default_map);
+ cpus_and(tmp, notcovered, *cpu_map);
cpus_and(tmp, tmp, domainspan);
if (cpus_empty(tmp))
break;
@@ -293,7 +285,7 @@ void __devinit arch_init_sched_domains(void)
#endif
/* Calculate CPU power for physical packages and nodes */
- for_each_cpu_mask(i, cpu_default_map) {
+ for_each_cpu_mask(i, *cpu_map) {
int power;
struct sched_domain *sd;
#ifdef CONFIG_SCHED_SMT
@@ -349,7 +341,7 @@ next_sg:
#endif
/* Attach the domains */
- for_each_online_cpu(i) {
+ for_each_cpu_mask(i, *cpu_map) {
struct sched_domain *sd;
#ifdef CONFIG_SCHED_SMT
sd = &per_cpu(cpu_domains, i);
@@ -359,13 +351,35 @@ next_sg:
cpu_attach_domain(sd, i);
}
}
+/*
+ * Set up scheduler domains and groups. Callers must hold the hotplug lock.
+ */
+void arch_init_sched_domains(const cpumask_t *cpu_map)
+{
+ cpumask_t cpu_default_map;
+
+ /*
+ * Setup mask for cpus without special case scheduling requirements.
+ * For now this just excludes isolated cpus, but could be used to
+ * exclude other special cases in the future.
+ */
+ cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
+
+ build_sched_domains(&cpu_default_map);
+}
-void __devinit arch_destroy_sched_domains(void)
+void arch_destroy_sched_domains(const cpumask_t *cpu_map)
{
#ifdef CONFIG_NUMA
int i;
for (i = 0; i < MAX_NUMNODES; i++) {
+ cpumask_t nodemask = node_to_cpumask(i);
struct sched_group *oldsg, *sg = sched_group_nodes[i];
+
+ cpus_and(nodemask, nodemask, *cpu_map);
+ if (cpus_empty(nodemask))
+ continue;
+
if (sg == NULL)
continue;
sg = sg->next;
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index b1d5d3d5276c..9be53e1ea404 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -470,18 +470,6 @@ ENTRY(load_switch_stack)
br.cond.sptk.many b7
END(load_switch_stack)
-GLOBAL_ENTRY(__ia64_syscall)
- .regstk 6,0,0,0
- mov r15=in5 // put syscall number in place
- break __BREAK_SYSCALL
- movl r2=errno
- cmp.eq p6,p7=-1,r10
- ;;
-(p6) st4 [r2]=r8
-(p6) mov r8=-1
- br.ret.sptk.many rp
-END(__ia64_syscall)
-
GLOBAL_ENTRY(execve)
mov r15=__NR_execve // put syscall number in place
break __BREAK_SYSCALL
@@ -637,7 +625,7 @@ END(ia64_ret_from_syscall)
* r8-r11: restored (syscall return value(s))
* r12: restored (user-level stack pointer)
* r13: restored (user-level thread pointer)
- * r14: cleared
+ * r14: set to __kernel_syscall_via_epc
* r15: restored (syscall #)
* r16-r17: cleared
* r18: user-level b6
@@ -658,7 +646,7 @@ END(ia64_ret_from_syscall)
* pr: restored (user-level pr)
* b0: restored (user-level rp)
* b6: restored
- * b7: cleared
+ * b7: set to __kernel_syscall_via_epc
* ar.unat: restored (user-level ar.unat)
* ar.pfs: restored (user-level ar.pfs)
* ar.rsc: restored (user-level ar.rsc)
@@ -704,72 +692,79 @@ ENTRY(ia64_leave_syscall)
;;
(p6) ld4 r31=[r18] // load current_thread_info()->flags
ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
- mov b7=r0 // clear b7
+ nop.i 0
;;
- ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage)
+ mov r16=ar.bsp // M2 get existing backing store pointer
ld8 r18=[r2],PT(R9)-PT(B6) // load b6
(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
;;
- mov r16=ar.bsp // M2 get existing backing store pointer
+ ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage)
(p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending?
(p6) br.cond.spnt .work_pending_syscall
;;
// start restoring the state saved on the kernel stack (struct pt_regs):
ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
ld8 r11=[r3],PT(CR_IIP)-PT(R11)
- mov f6=f0 // clear f6
+(pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE!
;;
invala // M0|1 invalidate ALAT
- rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection
- mov f9=f0 // clear f9
+ rsm psr.i | psr.ic // M2 turn off interrupts and interruption collection
+ cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs
- ld8 r29=[r2],16 // load cr.ipsr
- ld8 r28=[r3],16 // load cr.iip
- mov f8=f0 // clear f8
+ ld8 r29=[r2],16 // M0|1 load cr.ipsr
+ ld8 r28=[r3],16 // M0|1 load cr.iip
+ mov r22=r0 // A clear r22
;;
ld8 r30=[r2],16 // M0|1 load cr.ifs
ld8 r25=[r3],16 // M0|1 load ar.unat
- cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs
+(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
;;
ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
-(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
- mov f10=f0 // clear f10
+(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
+ nop 0
;;
- ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
- ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc
- mov f11=f0 // clear f11
+ ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
+ ld8 r27=[r3],PT(PR)-PT(AR_RSC) // M0|1 load ar.rsc
+ mov f6=f0 // F clear f6
;;
- ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage)
- ld8 r31=[r3],PT(R1)-PT(PR) // load predicates
-(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+ ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // M0|1 load ar.rnat (may be garbage)
+ ld8 r31=[r3],PT(R1)-PT(PR) // M0|1 load predicates
+ mov f7=f0 // F clear f7
;;
- ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr
- ld8.fill r1=[r3],16 // load r1
-(pUStk) mov r17=1
+ ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // M0|1 load ar.fpsr
+ ld8.fill r1=[r3],16 // M0|1 load r1
+(pUStk) mov r17=1 // A
;;
- srlz.d // M0 ensure interruption collection is off
- ld8.fill r13=[r3],16
- mov f7=f0 // clear f7
+(pUStk) st1 [r14]=r17 // M2|3
+ ld8.fill r13=[r3],16 // M0|1
+ mov f8=f0 // F clear f8
;;
- ld8.fill r12=[r2] // restore r12 (sp)
- mov.m ar.ssd=r0 // M2 clear ar.ssd
- mov r22=r0 // clear r22
+ ld8.fill r12=[r2] // M0|1 restore r12 (sp)
+ ld8.fill r15=[r3] // M0|1 restore r15
+ mov b6=r18 // I0 restore b6
+
+ addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
+ mov f9=f0 // F clear f9
+(pKStk) br.cond.dpnt.many skip_rbs_switch // B
- ld8.fill r15=[r3] // restore r15
-(pUStk) st1 [r14]=r17
- addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
+ srlz.d // M0 ensure interruption collection is off (for cover)
+ shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
+ cover // B add current frame into dirty partition & set cr.ifs
;;
-(pUStk) ld4 r17=[r3] // r17 = cpu_data->phys_stacked_size_p8
- mov.m ar.csd=r0 // M2 clear ar.csd
- mov b6=r18 // I0 restore b6
+(pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8
+ mov r19=ar.bsp // M2 get new backing store pointer
+ mov f10=f0 // F clear f10
+
+ nop.m 0
+ movl r14=__kernel_syscall_via_epc // X
;;
- mov r14=r0 // clear r14
- shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
-(pKStk) br.cond.dpnt.many skip_rbs_switch
+ mov.m ar.csd=r0 // M2 clear ar.csd
+ mov.m ar.ccv=r0 // M2 clear ar.ccv
+ mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc)
- mov.m ar.ccv=r0 // clear ar.ccv
-(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
- br.cond.sptk.many rbs_switch
+ mov.m ar.ssd=r0 // M2 clear ar.ssd
+ mov f11=f0 // F clear f11
+ br.cond.sptk.many rbs_switch // B
END(ia64_leave_syscall)
#ifdef CONFIG_IA32_SUPPORT
@@ -885,7 +880,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
ldf.fill f7=[r2],PT(F11)-PT(F7)
ldf.fill f8=[r3],32
;;
- srlz.i // ensure interruption collection is off
+ srlz.d // ensure that inter. collection is off (VHPT is don't care, since text is pinned)
mov ar.ccv=r15
;;
ldf.fill f11=[r2]
@@ -945,11 +940,10 @@ GLOBAL_ENTRY(ia64_leave_kernel)
* NOTE: alloc, loadrs, and cover can't be predicated.
*/
(pNonSys) br.cond.dpnt dont_preserve_current_frame
-
-rbs_switch:
cover // add current frame into dirty partition and set cr.ifs
;;
mov r19=ar.bsp // get new backing store pointer
+rbs_switch:
sub r16=r16,r18 // krbs = old bsp - size of dirty partition
cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs
;;
@@ -1024,14 +1018,14 @@ rse_clear_invalid:
mov loc5=0
mov loc6=0
mov loc7=0
-(pRecurse) br.call.sptk.few b0=rse_clear_invalid
+(pRecurse) br.call.dptk.few b0=rse_clear_invalid
;;
mov loc8=0
mov loc9=0
cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret
mov loc10=0
mov loc11=0
-(pReturn) br.ret.sptk.many b0
+(pReturn) br.ret.dptk.many b0
#endif /* !CONFIG_ITANIUM */
# undef pRecurse
# undef pReturn
@@ -1255,7 +1249,7 @@ ENTRY(sys_rt_sigreturn)
stf.spill [r17]=f11
adds out0=16,sp // out0 = &sigscratch
br.call.sptk.many rp=ia64_rt_sigreturn
-.ret19: .restore sp 0
+.ret19: .restore sp,0
adds sp=16,sp
;;
ld8 r9=[sp] // load new ar.unat
@@ -1577,11 +1571,11 @@ sys_call_table:
data8 sys_add_key
data8 sys_request_key
data8 sys_keyctl
+ data8 sys_ioprio_set
+ data8 sys_ioprio_get // 1275
data8 sys_ni_syscall
- data8 sys_ni_syscall // 1275
- data8 sys_set_zone_reclaim
- data8 sys_ni_syscall
- data8 sys_ni_syscall
- data8 sys_ni_syscall
+ data8 sys_inotify_init
+ data8 sys_inotify_add_watch
+ data8 sys_inotify_rm_watch
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 962b6c4e32b5..7d7684a369d3 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -531,93 +531,114 @@ GLOBAL_ENTRY(fsys_bubble_down)
.altrp b6
.body
/*
- * We get here for syscalls that don't have a lightweight handler. For those, we
- * need to bubble down into the kernel and that requires setting up a minimal
- * pt_regs structure, and initializing the CPU state more or less as if an
- * interruption had occurred. To make syscall-restarts work, we setup pt_regs
- * such that cr_iip points to the second instruction in syscall_via_break.
- * Decrementing the IP hence will restart the syscall via break and not
- * decrementing IP will return us to the caller, as usual. Note that we preserve
- * the value of psr.pp rather than initializing it from dcr.pp. This makes it
- * possible to distinguish fsyscall execution from other privileged execution.
+ * We get here for syscalls that don't have a lightweight
+ * handler. For those, we need to bubble down into the kernel
+ * and that requires setting up a minimal pt_regs structure,
+ * and initializing the CPU state more or less as if an
+ * interruption had occurred. To make syscall-restarts work,
+ * we setup pt_regs such that cr_iip points to the second
+ * instruction in syscall_via_break. Decrementing the IP
+ * hence will restart the syscall via break and not
+ * decrementing IP will return us to the caller, as usual.
+ * Note that we preserve the value of psr.pp rather than
+ * initializing it from dcr.pp. This makes it possible to
+ * distinguish fsyscall execution from other privileged
+ * execution.
*
* On entry:
- * - normal fsyscall handler register usage, except that we also have:
+ * - normal fsyscall handler register usage, except
+ * that we also have:
* - r18: address of syscall entry point
* - r21: ar.fpsr
* - r26: ar.pfs
* - r27: ar.rsc
* - r29: psr
+ *
+ * We used to clear some PSR bits here but that requires slow
+ * serialization. Fortuntely, that isn't really necessary.
+ * The rationale is as follows: we used to clear bits
+ * ~PSR_PRESERVED_BITS in PSR.L. Since
+ * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
+ * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
+ * However,
+ *
+ * PSR.BE : already is turned off in __kernel_syscall_via_epc()
+ * PSR.AC : don't care (kernel normally turns PSR.AC on)
+ * PSR.I : already turned off by the time fsys_bubble_down gets
+ * invoked
+ * PSR.DFL: always 0 (kernel never turns it on)
+ * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
+ * initiative
+ * PSR.DI : always 0 (kernel never turns it on)
+ * PSR.SI : always 0 (kernel never turns it on)
+ * PSR.DB : don't care --- kernel never enables kernel-level
+ * breakpoints
+ * PSR.TB : must be 0 already; if it wasn't zero on entry to
+ * __kernel_syscall_via_epc, the branch to fsys_bubble_down
+ * will trigger a taken branch; the taken-trap-handler then
+ * converts the syscall into a break-based system-call.
*/
-# define PSR_PRESERVED_BITS (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
- | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_SP | IA64_PSR_RT \
- | IA64_PSR_IC)
/*
- * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have
- * to synthesize.
+ * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
+ * The rest we have to synthesize.
*/
-# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
+# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \
+ | (0x1 << IA64_PSR_RI_BIT) \
| IA64_PSR_BN | IA64_PSR_I)
- invala
- movl r8=PSR_ONE_BITS
+ invala // M0|1
+ movl r14=ia64_ret_from_syscall // X
- mov r25=ar.unat // save ar.unat (5 cyc)
- movl r9=PSR_PRESERVED_BITS
+ nop.m 0
+ movl r28=__kernel_syscall_via_break // X create cr.iip
+ ;;
- mov ar.rsc=0 // set enforced lazy mode, pl 0, little-endian, loadrs=0
- movl r28=__kernel_syscall_via_break
+ mov r2=r16 // A get task addr to addl-addressable register
+ adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
+ mov r31=pr // I0 save pr (2 cyc)
;;
- mov r23=ar.bspstore // save ar.bspstore (12 cyc)
- mov r31=pr // save pr (2 cyc)
- mov r20=r1 // save caller's gp in r20
+ st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
+ addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS
+ add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A
;;
- mov r2=r16 // copy current task addr to addl-addressable register
- and r9=r9,r29
- mov r19=b6 // save b6 (2 cyc)
+ ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags
+ lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store
+ nop.i 0
;;
- mov psr.l=r9 // slam the door (17 cyc to srlz.i)
- or r29=r8,r29 // construct cr.ipsr value to save
- addl r22=IA64_RBS_OFFSET,r2 // compute base of RBS
+ mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
+ nop.m 0
+ nop.i 0
;;
- // GAS reports a spurious RAW hazard on the read of ar.rnat because it thinks
- // we may be reading ar.itc after writing to psr.l. Avoid that message with
- // this directive:
- dv_serialize_data
- mov.m r24=ar.rnat // read ar.rnat (5 cyc lat)
- lfetch.fault.excl.nt1 [r22]
- adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2
-
- // ensure previous insn group is issued before we stall for srlz.i:
+ mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore
+ mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!)
+ nop.i 0
;;
- srlz.i // ensure new psr.l has been established
- /////////////////////////////////////////////////////////////////////////////
- ////////// from this point on, execution is not interruptible anymore
- /////////////////////////////////////////////////////////////////////////////
- addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // compute base of memory stack
- cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1
+ mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS
+ movl r8=PSR_ONE_BITS // X
;;
- st1 [r16]=r0 // clear current->thread.on_ustack flag
- mov ar.bspstore=r22 // switch to kernel RBS
- mov b6=r18 // copy syscall entry-point to b6 (7 cyc)
- add r3=TI_FLAGS+IA64_TASK_SIZE,r2
+ mov r25=ar.unat // M2 (5 cyc) save ar.unat
+ mov r19=b6 // I0 save b6 (2 cyc)
+ mov r20=r1 // A save caller's gp in r20
;;
- ld4 r3=[r3] // r2 = current_thread_info()->flags
- mov r18=ar.bsp // save (kernel) ar.bsp (12 cyc)
- mov ar.rsc=0x3 // set eager mode, pl 0, little-endian, loadrs=0
- br.call.sptk.many b7=ia64_syscall_setup
- ;;
- ssm psr.i
- movl r2=ia64_ret_from_syscall
+ or r29=r8,r29 // A construct cr.ipsr value to save
+ mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc)
+ addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
+
+ mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc)
+ cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
+ br.call.sptk.many b7=ia64_syscall_setup // B
;;
- mov rp=r2 // set the real return addr
- and r3=_TIF_SYSCALL_TRACEAUDIT,r3
+ mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
+ mov rp=r14 // I0 set the real return addr
+ and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A
;;
- cmp.eq p8,p0=r3,r0
+ ssm psr.i // M2 we're on kernel stacks now, reenable irqs
+ cmp.eq p8,p0=r3,r0 // A
+(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT
-(p10) br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8
-(p8) br.call.sptk.many b6=b6 // ignore this return addr
- br.cond.sptk ia64_trace_syscall
+ nop.m 0
+(p8) br.call.sptk.many b6=b6 // B (ignore return address)
+ br.cond.spnt ia64_trace_syscall // B
END(fsys_bubble_down)
.rodata
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
index facf75acdc85..86948ce63e43 100644
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -72,38 +72,40 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
* bundle get executed. The remaining code must be safe even if
* they do not get executed.
*/
- adds r17=-1024,r15
- mov r10=0 // default to successful syscall execution
- epc
+ adds r17=-1024,r15 // A
+ mov r10=0 // A default to successful syscall execution
+ epc // B causes split-issue
}
;;
- rsm psr.be // note: on McKinley "rsm psr.be/srlz.d" is slightly faster than "rum psr.be"
- LOAD_FSYSCALL_TABLE(r14)
-
- mov r16=IA64_KR(CURRENT) // 12 cycle read latency
- tnat.nz p10,p9=r15
- mov r19=NR_syscalls-1
+ rsm psr.be | psr.i // M2 (5 cyc to srlz.d)
+ LOAD_FSYSCALL_TABLE(r14) // X
;;
- shladd r18=r17,3,r14
-
- srlz.d
- cmp.ne p8,p0=r0,r0 // p8 <- FALSE
- /* Note: if r17 is a NaT, p6 will be set to zero. */
- cmp.geu p6,p7=r19,r17 // (syscall > 0 && syscall < 1024+NR_syscalls)?
- ;;
-(p6) ld8 r18=[r18]
- mov r21=ar.fpsr
- add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
- ;;
-(p6) mov b7=r18
-(p6) tbit.z p8,p0=r18,0
-(p8) br.dptk.many b7
-
-(p6) rsm psr.i
- mov r27=ar.rsc
- mov r26=ar.pfs
+ mov r16=IA64_KR(CURRENT) // M2 (12 cyc)
+ shladd r18=r17,3,r14 // A
+ mov r19=NR_syscalls-1 // A
+ ;;
+ lfetch [r18] // M0|1
+ mov r29=psr // M2 (12 cyc)
+ // If r17 is a NaT, p6 will be zero
+ cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)?
+ ;;
+ mov r21=ar.fpsr // M2 (12 cyc)
+ tnat.nz p10,p9=r15 // I0
+ mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...)
+ ;;
+ srlz.d // M0 (forces split-issue) ensure PSR.BE==0
+(p6) ld8 r18=[r18] // M0|1
+ nop.i 0
+ ;;
+ nop.m 0
+(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!)
+ nop.i 0
;;
- mov r29=psr // read psr (12 cyc load latency)
+(p8) ssm psr.i
+(p6) mov b7=r18 // I0
+(p8) br.dptk.many b7 // B
+
+ mov r27=ar.rsc // M2 (12 cyc)
/*
* brl.cond doesn't work as intended because the linker would convert this branch
* into a branch to a PLT. Perhaps there will be a way to avoid this with some
@@ -111,6 +113,8 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
* instead.
*/
#ifdef CONFIG_ITANIUM
+(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
+ ;;
(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down
;;
(p6) mov b7=r14
@@ -118,7 +122,7 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
#else
BRL_COND_FSYS_BUBBLE_DOWN(p6)
#endif
-
+ ssm psr.i
mov r10=-1
(p10) mov r8=EINVAL
(p9) mov r8=ENOSYS
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 7bbf019c9867..01572814abe4 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -58,9 +58,6 @@ EXPORT_SYMBOL(__strlen_user);
EXPORT_SYMBOL(__strncpy_from_user);
EXPORT_SYMBOL(__strnlen_user);
-#include <asm/unistd.h>
-EXPORT_SYMBOL(__ia64_syscall);
-
/* from arch/ia64/lib */
extern void __divsi3(void);
extern void __udivsi3(void);
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 88b014381df5..7936b62f7a2e 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -129,14 +129,13 @@ static struct iosapic {
char __iomem *addr; /* base address of IOSAPIC */
unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */
unsigned short num_rte; /* number of RTE in this IOSAPIC */
+ int rtes_inuse; /* # of RTEs in use on this IOSAPIC */
#ifdef CONFIG_NUMA
unsigned short node; /* numa node association via pxm */
#endif
} iosapic_lists[NR_IOSAPICS];
-static int num_iosapic;
-
-static unsigned char pcat_compat __initdata; /* 8259 compatibility flag */
+static unsigned char pcat_compat __devinitdata; /* 8259 compatibility flag */
static int iosapic_kmalloc_ok;
static LIST_HEAD(free_rte_list);
@@ -149,7 +148,7 @@ find_iosapic (unsigned int gsi)
{
int i;
- for (i = 0; i < num_iosapic; i++) {
+ for (i = 0; i < NR_IOSAPICS; i++) {
if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < iosapic_lists[i].num_rte)
return i;
}
@@ -490,8 +489,6 @@ static int iosapic_find_sharable_vector (unsigned long trigger, unsigned long po
}
}
}
- if (vector < 0)
- panic("%s: out of interrupt vectors!\n", __FUNCTION__);
return vector;
}
@@ -507,6 +504,8 @@ iosapic_reassign_vector (int vector)
if (!list_empty(&iosapic_intr_info[vector].rtes)) {
new_vector = assign_irq_vector(AUTO_ASSIGN);
+ if (new_vector < 0)
+ panic("%s: out of interrupt vectors!\n", __FUNCTION__);
printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector);
memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
sizeof(struct iosapic_intr_info));
@@ -598,6 +597,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
rte->refcnt++;
list_add_tail(&rte->rte_list, &iosapic_intr_info[vector].rtes);
iosapic_intr_info[vector].count++;
+ iosapic_lists[index].rtes_inuse++;
}
else if (vector_is_shared(vector)) {
struct iosapic_intr_info *info = &iosapic_intr_info[vector];
@@ -734,9 +734,12 @@ again:
spin_unlock_irqrestore(&iosapic_lock, flags);
/* If vector is running out, we try to find a sharable vector */
- vector = assign_irq_vector_nopanic(AUTO_ASSIGN);
- if (vector < 0)
+ vector = assign_irq_vector(AUTO_ASSIGN);
+ if (vector < 0) {
vector = iosapic_find_sharable_vector(trigger, polarity);
+ if (vector < 0)
+ panic("%s: out of interrupt vectors!\n", __FUNCTION__);
+ }
spin_lock_irqsave(&irq_descp(vector)->lock, flags);
spin_lock(&iosapic_lock);
@@ -778,7 +781,7 @@ void
iosapic_unregister_intr (unsigned int gsi)
{
unsigned long flags;
- int irq, vector;
+ int irq, vector, index;
irq_desc_t *idesc;
u32 low32;
unsigned long trigger, polarity;
@@ -819,6 +822,9 @@ iosapic_unregister_intr (unsigned int gsi)
list_del(&rte->rte_list);
iosapic_intr_info[vector].count--;
iosapic_free_rte(rte);
+ index = find_iosapic(gsi);
+ iosapic_lists[index].rtes_inuse--;
+ WARN_ON(iosapic_lists[index].rtes_inuse < 0);
trigger = iosapic_intr_info[vector].trigger;
polarity = iosapic_intr_info[vector].polarity;
@@ -881,6 +887,8 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
break;
case ACPI_INTERRUPT_INIT:
vector = assign_irq_vector(AUTO_ASSIGN);
+ if (vector < 0)
+ panic("%s: out of interrupt vectors!\n", __FUNCTION__);
delivery = IOSAPIC_INIT;
break;
case ACPI_INTERRUPT_CPEI:
@@ -952,30 +960,86 @@ iosapic_system_init (int system_pcat_compat)
}
}
-void __init
+static inline int
+iosapic_alloc (void)
+{
+ int index;
+
+ for (index = 0; index < NR_IOSAPICS; index++)
+ if (!iosapic_lists[index].addr)
+ return index;
+
+ printk(KERN_WARNING "%s: failed to allocate iosapic\n", __FUNCTION__);
+ return -1;
+}
+
+static inline void
+iosapic_free (int index)
+{
+ memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0]));
+}
+
+static inline int
+iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
+{
+ int index;
+ unsigned int gsi_end, base, end;
+
+ /* check gsi range */
+ gsi_end = gsi_base + ((ver >> 16) & 0xff);
+ for (index = 0; index < NR_IOSAPICS; index++) {
+ if (!iosapic_lists[index].addr)
+ continue;
+
+ base = iosapic_lists[index].gsi_base;
+ end = base + iosapic_lists[index].num_rte - 1;
+
+ if (gsi_base < base && gsi_end < base)
+ continue;/* OK */
+
+ if (gsi_base > end && gsi_end > end)
+ continue; /* OK */
+
+ return -EBUSY;
+ }
+ return 0;
+}
+
+int __devinit
iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
{
- int num_rte;
+ int num_rte, err, index;
unsigned int isa_irq, ver;
char __iomem *addr;
+ unsigned long flags;
- addr = ioremap(phys_addr, 0);
- ver = iosapic_version(addr);
+ spin_lock_irqsave(&iosapic_lock, flags);
+ {
+ addr = ioremap(phys_addr, 0);
+ ver = iosapic_version(addr);
- /*
- * The MAX_REDIR register holds the highest input pin
- * number (starting from 0).
- * We add 1 so that we can use it for number of pins (= RTEs)
- */
- num_rte = ((ver >> 16) & 0xff) + 1;
+ if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
+ iounmap(addr);
+ spin_unlock_irqrestore(&iosapic_lock, flags);
+ return err;
+ }
+
+ /*
+ * The MAX_REDIR register holds the highest input pin
+ * number (starting from 0).
+ * We add 1 so that we can use it for number of pins (= RTEs)
+ */
+ num_rte = ((ver >> 16) & 0xff) + 1;
- iosapic_lists[num_iosapic].addr = addr;
- iosapic_lists[num_iosapic].gsi_base = gsi_base;
- iosapic_lists[num_iosapic].num_rte = num_rte;
+ index = iosapic_alloc();
+ iosapic_lists[index].addr = addr;
+ iosapic_lists[index].gsi_base = gsi_base;
+ iosapic_lists[index].num_rte = num_rte;
#ifdef CONFIG_NUMA
- iosapic_lists[num_iosapic].node = MAX_NUMNODES;
+ iosapic_lists[index].node = MAX_NUMNODES;
#endif
- num_iosapic++;
+ }
+ spin_unlock_irqrestore(&iosapic_lock, flags);
if ((gsi_base == 0) && pcat_compat) {
/*
@@ -986,10 +1050,43 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
for (isa_irq = 0; isa_irq < 16; ++isa_irq)
iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE);
}
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG
+int
+iosapic_remove (unsigned int gsi_base)
+{
+ int index, err = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iosapic_lock, flags);
+ {
+ index = find_iosapic(gsi_base);
+ if (index < 0) {
+ printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
+ __FUNCTION__, gsi_base);
+ goto out;
+ }
+
+ if (iosapic_lists[index].rtes_inuse) {
+ err = -EBUSY;
+ printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n",
+ __FUNCTION__, gsi_base);
+ goto out;
+ }
+
+ iounmap(iosapic_lists[index].addr);
+ iosapic_free(index);
+ }
+ out:
+ spin_unlock_irqrestore(&iosapic_lock, flags);
+ return err;
}
+#endif /* CONFIG_HOTPLUG */
#ifdef CONFIG_NUMA
-void __init
+void __devinit
map_iosapic_to_node(unsigned int gsi_base, int node)
{
int index;
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 4fe60c7a2e90..6c4d59fd0364 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -63,30 +63,19 @@ EXPORT_SYMBOL(isa_irq_to_vector_map);
static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_NUM_DEVICE_VECTORS)];
int
-assign_irq_vector_nopanic (int irq)
+assign_irq_vector (int irq)
{
int pos, vector;
again:
pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS);
vector = IA64_FIRST_DEVICE_VECTOR + pos;
if (vector > IA64_LAST_DEVICE_VECTOR)
- return -1;
+ return -ENOSPC;
if (test_and_set_bit(pos, ia64_vector_mask))
goto again;
return vector;
}
-int
-assign_irq_vector (int irq)
-{
- int vector = assign_irq_vector_nopanic(irq);
-
- if (vector < 0)
- panic("assign_irq_vector: out of interrupt vectors!");
-
- return vector;
-}
-
void
free_irq_vector (int vector)
{
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 2bc085a73e30..3bb3a13c4047 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -1,7 +1,7 @@
/*
* arch/ia64/kernel/ivt.S
*
- * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
* Stephane Eranian <eranian@hpl.hp.com>
* David Mosberger <davidm@hpl.hp.com>
* Copyright (C) 2000, 2002-2003 Intel Co
@@ -692,82 +692,118 @@ ENTRY(break_fault)
* to prevent leaking bits from kernel to user level.
*/
DBG_FAULT(11)
- mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle read lat.
- mov r17=cr.iim
- mov r18=__IA64_BREAK_SYSCALL
- mov r21=ar.fpsr
- mov r29=cr.ipsr
- mov r19=b6
- mov r25=ar.unat
- mov r27=ar.rsc
- mov r26=ar.pfs
- mov r28=cr.iip
- mov r31=pr // prepare to save predicates
- mov r20=r1
- ;;
+ mov.m r16=IA64_KR(CURRENT) // M2 r16 <- current task (12 cyc)
+ mov r29=cr.ipsr // M2 (12 cyc)
+ mov r31=pr // I0 (2 cyc)
+
+ mov r17=cr.iim // M2 (2 cyc)
+ mov.m r27=ar.rsc // M2 (12 cyc)
+ mov r18=__IA64_BREAK_SYSCALL // A
+
+ mov.m ar.rsc=0 // M2
+ mov.m r21=ar.fpsr // M2 (12 cyc)
+ mov r19=b6 // I0 (2 cyc)
+ ;;
+ mov.m r23=ar.bspstore // M2 (12 cyc)
+ mov.m r24=ar.rnat // M2 (5 cyc)
+ mov.i r26=ar.pfs // I0 (2 cyc)
+
+ invala // M0|1
+ nop.m 0 // M
+ mov r20=r1 // A save r1
+
+ nop.m 0
+ movl r30=sys_call_table // X
+
+ mov r28=cr.iip // M2 (2 cyc)
+ cmp.eq p0,p7=r18,r17 // I0 is this a system call?
+(p7) br.cond.spnt non_syscall // B no ->
+ //
+ // From this point on, we are definitely on the syscall-path
+ // and we can use (non-banked) scratch registers.
+ //
+///////////////////////////////////////////////////////////////////////
+ mov r1=r16 // A move task-pointer to "addl"-addressable reg
+ mov r2=r16 // A setup r2 for ia64_syscall_setup
+ add r9=TI_FLAGS+IA64_TASK_SIZE,r16 // A r9 = &current_thread_info()->flags
+
adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
- cmp.eq p0,p7=r18,r17 // is this a system call? (p7 <- false, if so)
-(p7) br.cond.spnt non_syscall
+ adds r15=-1024,r15 // A subtract 1024 from syscall number
+ mov r3=NR_syscalls - 1
;;
- ld1 r17=[r16] // load current->thread.on_ustack flag
- st1 [r16]=r0 // clear current->thread.on_ustack flag
- add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT
+ ld1.bias r17=[r16] // M0|1 r17 = current->thread.on_ustack flag
+ ld4 r9=[r9] // M0|1 r9 = current_thread_info()->flags
+ extr.u r8=r29,41,2 // I0 extract ei field from cr.ipsr
+
+ shladd r30=r15,3,r30 // A r30 = sys_call_table + 8*(syscall-1024)
+ addl r22=IA64_RBS_OFFSET,r1 // A compute base of RBS
+ cmp.leu p6,p7=r15,r3 // A syscall number in range?
;;
- invala
- /* adjust return address so we skip over the break instruction: */
+ lfetch.fault.excl.nt1 [r22] // M0|1 prefetch RBS
+(p6) ld8 r30=[r30] // M0|1 load address of syscall entry point
+ tnat.nz.or p7,p0=r15 // I0 is syscall nr a NaT?
- extr.u r8=r29,41,2 // extract ei field from cr.ipsr
- ;;
- cmp.eq p6,p7=2,r8 // isr.ei==2?
- mov r2=r1 // setup r2 for ia64_syscall_setup
- ;;
-(p6) mov r8=0 // clear ei to 0
-(p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped
-(p7) adds r8=1,r8 // increment ei to next slot
- ;;
- cmp.eq pKStk,pUStk=r0,r17 // are we in kernel mode already?
- dep r29=r8,r29,41,2 // insert new ei into cr.ipsr
+ mov.m ar.bspstore=r22 // M2 switch to kernel RBS
+ cmp.eq p8,p9=2,r8 // A isr.ei==2?
;;
- // switch from user to kernel RBS:
- MINSTATE_START_SAVE_MIN_VIRT
- br.call.sptk.many b7=ia64_syscall_setup
- ;;
- MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption collection is on
- mov r3=NR_syscalls - 1
- ;;
-(p15) ssm psr.i // restore psr.i
- // p10==true means out registers are more than 8 or r15's Nat is true
-(p10) br.cond.spnt.many ia64_ret_from_syscall
- ;;
- movl r16=sys_call_table
+(p8) mov r8=0 // A clear ei to 0
+(p7) movl r30=sys_ni_syscall // X
- adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024
- movl r2=ia64_ret_from_syscall
- ;;
- shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024)
- cmp.leu p6,p7=r15,r3 // (syscall > 0 && syscall < 1024 + NR_syscalls) ?
- mov rp=r2 // set the real return addr
+(p8) adds r28=16,r28 // A switch cr.iip to next bundle
+(p9) adds r8=1,r8 // A increment ei to next slot
+ nop.i 0
;;
-(p6) ld8 r20=[r20] // load address of syscall entry point
-(p7) movl r20=sys_ni_syscall
- add r2=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
- ld4 r2=[r2] // r2 = current_thread_info()->flags
- ;;
- and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit
+ mov.m r25=ar.unat // M2 (5 cyc)
+ dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr
+ adds r15=1024,r15 // A restore original syscall number
+ //
+ // If any of the above loads miss in L1D, we'll stall here until
+ // the data arrives.
+ //
+///////////////////////////////////////////////////////////////////////
+ st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
+ mov b6=r30 // I0 setup syscall handler branch reg early
+ cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already?
+
+ and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit
+ mov r18=ar.bsp // M2 (12 cyc)
+(pKStk) br.cond.spnt .break_fixup // B we're already in kernel-mode -- fix up RBS
+ ;;
+.back_from_break_fixup:
+(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A compute base of memory stack
+ cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited?
+ br.call.sptk.many b7=ia64_syscall_setup // B
+1:
+ mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
+ nop 0
+ bsw.1 // B (6 cyc) regs are saved, switch to bank 1
;;
- cmp.eq p8,p0=r2,r0
- mov b6=r20
+
+ ssm psr.ic | PSR_DEFAULT_BITS // M2 now it's safe to re-enable intr.-collection
+ movl r3=ia64_ret_from_syscall // X
;;
-(p8) br.call.sptk.many b6=b6 // ignore this return addr
- br.cond.sptk ia64_trace_syscall
+
+ srlz.i // M0 ensure interruption collection is on
+ mov rp=r3 // I0 set the real return addr
+(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT
+
+(p15) ssm psr.i // M2 restore psr.i
+(p14) br.call.sptk.many b6=b6 // B invoke syscall-handker (ignore return addr)
+ br.cond.spnt.many ia64_trace_syscall // B do syscall-tracing thingamagic
// NOT REACHED
+///////////////////////////////////////////////////////////////////////
+ // On entry, we optimistically assumed that we're coming from user-space.
+ // For the rare cases where a system-call is done from within the kernel,
+ // we fix things up at this point:
+.break_fixup:
+ add r1=-IA64_PT_REGS_SIZE,sp // A allocate space for pt_regs structure
+ mov ar.rnat=r24 // M2 restore kernel's AR.RNAT
+ ;;
+ mov ar.bspstore=r23 // M2 restore kernel's AR.BSPSTORE
+ br.cond.sptk .back_from_break_fixup
END(break_fault)
.org ia64_ivt+0x3000
@@ -842,8 +878,6 @@ END(interrupt)
* - r31: saved pr
* - b0: original contents (to be saved)
* On exit:
- * - executing on bank 1 registers
- * - psr.ic enabled, interrupts restored
* - p10: TRUE if syscall is invoked with more than 8 out
* registers or r15's Nat is true
* - r1: kernel's gp
@@ -851,8 +885,11 @@ END(interrupt)
* - r8: -EINVAL if p10 is true
* - r12: points to kernel stack
* - r13: points to current task
+ * - r14: preserved (same as on entry)
+ * - p13: preserved
* - p15: TRUE if interrupts need to be re-enabled
* - ar.fpsr: set to kernel settings
+ * - b6: preserved (same as on entry)
*/
GLOBAL_ENTRY(ia64_syscall_setup)
#if PT(B6) != 0
@@ -920,10 +957,10 @@ GLOBAL_ENTRY(ia64_syscall_setup)
(p13) mov in5=-1
;;
st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr
- tnat.nz p14,p0=in6
+ tnat.nz p13,p0=in6
cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8
;;
- stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error)
+ mov r8=1
(p9) tnat.nz p10,p0=r15
adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch)
@@ -934,9 +971,9 @@ GLOBAL_ENTRY(ia64_syscall_setup)
mov r13=r2 // establish `current'
movl r1=__gp // establish kernel global pointer
;;
-(p14) mov in6=-1
+ st8 [r16]=r8 // ensure pt_regs.r8 != 0 (see handle_syscall_error)
+(p13) mov in6=-1
(p8) mov in7=-1
- nop.i 0
cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
movl r17=FPSR_DEFAULT
@@ -1007,6 +1044,8 @@ END(dispatch_illegal_op_fault)
FAULT(17)
ENTRY(non_syscall)
+ mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER
+ ;;
SAVE_MIN_WITH_COVER
// There is no particular reason for this code to be here, other than that
@@ -1204,6 +1243,25 @@ END(disabled_fp_reg)
// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
ENTRY(nat_consumption)
DBG_FAULT(26)
+
+ mov r16=cr.ipsr
+ mov r17=cr.isr
+ mov r31=pr // save PR
+ ;;
+ and r18=0xf,r17 // r18 = cr.ipsr.code{3:0}
+ tbit.z p6,p0=r17,IA64_ISR_NA_BIT
+ ;;
+ cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18
+ dep r16=-1,r16,IA64_PSR_ED_BIT,1
+(p6) br.cond.spnt 1f // branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH)
+ ;;
+ mov cr.ipsr=r16 // set cr.ipsr.na
+ mov pr=r31,-1
+ ;;
+ rfi
+
+1: mov pr=r31,-1
+ ;;
FAULT(26)
END(nat_consumption)
diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S
new file mode 100644
index 000000000000..b7fa3ccd2b0f
--- /dev/null
+++ b/arch/ia64/kernel/jprobes.S
@@ -0,0 +1,61 @@
+/*
+ * Jprobe specific operations
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Intel Corporation, 2005
+ *
+ * 2005-May Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy
+ * <anil.s.keshavamurthy@intel.com> initial implementation
+ *
+ * Jprobes (a.k.a. "jump probes" which is built on-top of kprobes) allow a
+ * probe to be inserted into the beginning of a function call. The fundamental
+ * difference between a jprobe and a kprobe is the jprobe handler is executed
+ * in the same context as the target function, while the kprobe handlers
+ * are executed in interrupt context.
+ *
+ * For jprobes we initially gain control by placing a break point in the
+ * first instruction of the targeted function. When we catch that specific
+ * break, we:
+ * * set the return address to our jprobe_inst_return() function
+ * * jump to the jprobe handler function
+ *
+ * Since we fixed up the return address, the jprobe handler will return to our
+ * jprobe_inst_return() function, giving us control again. At this point we
+ * are back in the parents frame marker, so we do yet another call to our
+ * jprobe_break() function to fix up the frame marker as it would normally
+ * exist in the target function.
+ *
+ * Our jprobe_return function then transfers control back to kprobes.c by
+ * executing a break instruction using one of our reserved numbers. When we
+ * catch that break in kprobes.c, we continue like we do for a normal kprobe
+ * by single stepping the emulated instruction, and then returning execution
+ * to the correct location.
+ */
+#include <asm/asmmacro.h>
+
+ /*
+ * void jprobe_break(void)
+ */
+ENTRY(jprobe_break)
+ break.m 0x80300
+END(jprobe_break)
+
+ /*
+ * void jprobe_inst_return(void)
+ */
+GLOBAL_ENTRY(jprobe_inst_return)
+ br.call.sptk.many b0=jprobe_break
+END(jprobe_inst_return)
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
new file mode 100644
index 000000000000..884f5cd27d8a
--- /dev/null
+++ b/arch/ia64/kernel/kprobes.c
@@ -0,0 +1,721 @@
+/*
+ * Kernel Probes (KProbes)
+ * arch/ia64/kernel/kprobes.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ * Copyright (C) Intel Corporation, 2005
+ *
+ * 2005-Apr Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy
+ * <anil.s.keshavamurthy@intel.com> adapted from i386
+ */
+
+#include <linux/config.h>
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/preempt.h>
+#include <linux/moduleloader.h>
+
+#include <asm/pgtable.h>
+#include <asm/kdebug.h>
+#include <asm/sections.h>
+
+extern void jprobe_inst_return(void);
+
+/* kprobe_status settings */
+#define KPROBE_HIT_ACTIVE 0x00000001
+#define KPROBE_HIT_SS 0x00000002
+
+static struct kprobe *current_kprobe, *kprobe_prev;
+static unsigned long kprobe_status, kprobe_status_prev;
+static struct pt_regs jprobe_saved_regs;
+
+enum instruction_type {A, I, M, F, B, L, X, u};
+static enum instruction_type bundle_encoding[32][3] = {
+ { M, I, I }, /* 00 */
+ { M, I, I }, /* 01 */
+ { M, I, I }, /* 02 */
+ { M, I, I }, /* 03 */
+ { M, L, X }, /* 04 */
+ { M, L, X }, /* 05 */
+ { u, u, u }, /* 06 */
+ { u, u, u }, /* 07 */
+ { M, M, I }, /* 08 */
+ { M, M, I }, /* 09 */
+ { M, M, I }, /* 0A */
+ { M, M, I }, /* 0B */
+ { M, F, I }, /* 0C */
+ { M, F, I }, /* 0D */
+ { M, M, F }, /* 0E */
+ { M, M, F }, /* 0F */
+ { M, I, B }, /* 10 */
+ { M, I, B }, /* 11 */
+ { M, B, B }, /* 12 */
+ { M, B, B }, /* 13 */
+ { u, u, u }, /* 14 */
+ { u, u, u }, /* 15 */
+ { B, B, B }, /* 16 */
+ { B, B, B }, /* 17 */
+ { M, M, B }, /* 18 */
+ { M, M, B }, /* 19 */
+ { u, u, u }, /* 1A */
+ { u, u, u }, /* 1B */
+ { M, F, B }, /* 1C */
+ { M, F, B }, /* 1D */
+ { u, u, u }, /* 1E */
+ { u, u, u }, /* 1F */
+};
+
+/*
+ * In this function we check to see if the instruction
+ * is IP relative instruction and update the kprobe
+ * inst flag accordingly
+ */
+static void update_kprobe_inst_flag(uint template, uint slot, uint major_opcode,
+ unsigned long kprobe_inst, struct kprobe *p)
+{
+ p->ainsn.inst_flag = 0;
+ p->ainsn.target_br_reg = 0;
+
+ if (bundle_encoding[template][slot] == B) {
+ switch (major_opcode) {
+ case INDIRECT_CALL_OPCODE:
+ p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
+ p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
+ break;
+ case IP_RELATIVE_PREDICT_OPCODE:
+ case IP_RELATIVE_BRANCH_OPCODE:
+ p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR;
+ break;
+ case IP_RELATIVE_CALL_OPCODE:
+ p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR;
+ p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
+ p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
+ break;
+ }
+ } else if (bundle_encoding[template][slot] == X) {
+ switch (major_opcode) {
+ case LONG_CALL_OPCODE:
+ p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
+ p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
+ break;
+ }
+ }
+ return;
+}
+
+/*
+ * In this function we check to see if the instruction
+ * on which we are inserting kprobe is supported.
+ * Returns 0 if supported
+ * Returns -EINVAL if unsupported
+ */
+static int unsupported_inst(uint template, uint slot, uint major_opcode,
+ unsigned long kprobe_inst, struct kprobe *p)
+{
+ unsigned long addr = (unsigned long)p->addr;
+
+ if (bundle_encoding[template][slot] == I) {
+ switch (major_opcode) {
+ case 0x0: //I_UNIT_MISC_OPCODE:
+ /*
+ * Check for Integer speculation instruction
+ * - Bit 33-35 to be equal to 0x1
+ */
+ if (((kprobe_inst >> 33) & 0x7) == 1) {
+ printk(KERN_WARNING
+ "Kprobes on speculation inst at <0x%lx> not supported\n",
+ addr);
+ return -EINVAL;
+ }
+
+ /*
+ * IP relative mov instruction
+ * - Bit 27-35 to be equal to 0x30
+ */
+ if (((kprobe_inst >> 27) & 0x1FF) == 0x30) {
+ printk(KERN_WARNING
+ "Kprobes on \"mov r1=ip\" at <0x%lx> not supported\n",
+ addr);
+ return -EINVAL;
+
+ }
+ }
+ }
+ return 0;
+}
+
+
+/*
+ * In this function we check to see if the instruction
+ * (qp) cmpx.crel.ctype p1,p2=r2,r3
+ * on which we are inserting kprobe is cmp instruction
+ * with ctype as unc.
+ */
+static uint is_cmp_ctype_unc_inst(uint template, uint slot, uint major_opcode,
+unsigned long kprobe_inst)
+{
+ cmp_inst_t cmp_inst;
+ uint ctype_unc = 0;
+
+ if (!((bundle_encoding[template][slot] == I) ||
+ (bundle_encoding[template][slot] == M)))
+ goto out;
+
+ if (!((major_opcode == 0xC) || (major_opcode == 0xD) ||
+ (major_opcode == 0xE)))
+ goto out;
+
+ cmp_inst.l = kprobe_inst;
+ if ((cmp_inst.f.x2 == 0) || (cmp_inst.f.x2 == 1)) {
+ /* Integere compare - Register Register (A6 type)*/
+ if ((cmp_inst.f.tb == 0) && (cmp_inst.f.ta == 0)
+ &&(cmp_inst.f.c == 1))
+ ctype_unc = 1;
+ } else if ((cmp_inst.f.x2 == 2)||(cmp_inst.f.x2 == 3)) {
+ /* Integere compare - Immediate Register (A8 type)*/
+ if ((cmp_inst.f.ta == 0) &&(cmp_inst.f.c == 1))
+ ctype_unc = 1;
+ }
+out:
+ return ctype_unc;
+}
+
+/*
+ * In this function we override the bundle with
+ * the break instruction at the given slot.
+ */
+static void prepare_break_inst(uint template, uint slot, uint major_opcode,
+ unsigned long kprobe_inst, struct kprobe *p)
+{
+ unsigned long break_inst = BREAK_INST;
+ bundle_t *bundle = &p->ainsn.insn.bundle;
+
+ /*
+ * Copy the original kprobe_inst qualifying predicate(qp)
+ * to the break instruction iff !is_cmp_ctype_unc_inst
+ * because for cmp instruction with ctype equal to unc,
+ * which is a special instruction always needs to be
+ * executed regradless of qp
+ */
+ if (!is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst))
+ break_inst |= (0x3f & kprobe_inst);
+
+ switch (slot) {
+ case 0:
+ bundle->quad0.slot0 = break_inst;
+ break;
+ case 1:
+ bundle->quad0.slot1_p0 = break_inst;
+ bundle->quad1.slot1_p1 = break_inst >> (64-46);
+ break;
+ case 2:
+ bundle->quad1.slot2 = break_inst;
+ break;
+ }
+
+ /*
+ * Update the instruction flag, so that we can
+ * emulate the instruction properly after we
+ * single step on original instruction
+ */
+ update_kprobe_inst_flag(template, slot, major_opcode, kprobe_inst, p);
+}
+
+static inline void get_kprobe_inst(bundle_t *bundle, uint slot,
+ unsigned long *kprobe_inst, uint *major_opcode)
+{
+ unsigned long kprobe_inst_p0, kprobe_inst_p1;
+ unsigned int template;
+
+ template = bundle->quad0.template;
+
+ switch (slot) {
+ case 0:
+ *major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT);
+ *kprobe_inst = bundle->quad0.slot0;
+ break;
+ case 1:
+ *major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT);
+ kprobe_inst_p0 = bundle->quad0.slot1_p0;
+ kprobe_inst_p1 = bundle->quad1.slot1_p1;
+ *kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46));
+ break;
+ case 2:
+ *major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT);
+ *kprobe_inst = bundle->quad1.slot2;
+ break;
+ }
+}
+
+/* Returns non-zero if the addr is in the Interrupt Vector Table */
+static inline int in_ivt_functions(unsigned long addr)
+{
+ return (addr >= (unsigned long)__start_ivt_text
+ && addr < (unsigned long)__end_ivt_text);
+}
+
+static int valid_kprobe_addr(int template, int slot, unsigned long addr)
+{
+ if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) {
+ printk(KERN_WARNING "Attempting to insert unaligned kprobe "
+ "at 0x%lx\n", addr);
+ return -EINVAL;
+ }
+
+ if (in_ivt_functions(addr)) {
+ printk(KERN_WARNING "Kprobes can't be inserted inside "
+ "IVT functions at 0x%lx\n", addr);
+ return -EINVAL;
+ }
+
+ if (slot == 1 && bundle_encoding[template][1] != L) {
+ printk(KERN_WARNING "Inserting kprobes on slot #1 "
+ "is not supported\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static inline void save_previous_kprobe(void)
+{
+ kprobe_prev = current_kprobe;
+ kprobe_status_prev = kprobe_status;
+}
+
+static inline void restore_previous_kprobe(void)
+{
+ current_kprobe = kprobe_prev;
+ kprobe_status = kprobe_status_prev;
+}
+
+static inline void set_current_kprobe(struct kprobe *p)
+{
+ current_kprobe = p;
+}
+
+static void kretprobe_trampoline(void)
+{
+}
+
+/*
+ * At this point the target function has been tricked into
+ * returning into our trampoline. Lookup the associated instance
+ * and then:
+ * - call the handler function
+ * - cleanup by marking the instance as unused
+ * - long jump back to the original return address
+ */
+int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head;
+ struct hlist_node *node, *tmp;
+ unsigned long orig_ret_address = 0;
+ unsigned long trampoline_address =
+ ((struct fnptr *)kretprobe_trampoline)->ip;
+
+ head = kretprobe_inst_table_head(current);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because an multiple functions in the call path
+ * have a return probe installed on them, and/or more then one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler)
+ ri->rp->handler(ri, regs);
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ recycle_rp_inst(ri);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+ regs->cr_iip = orig_ret_address;
+
+ unlock_kprobes();
+ preempt_enable_no_resched();
+
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we have handled unlocking
+ * and re-enabling preemption.
+ */
+ return 1;
+}
+
+void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri;
+
+ if ((ri = get_free_rp_inst(rp)) != NULL) {
+ ri->rp = rp;
+ ri->task = current;
+ ri->ret_addr = (kprobe_opcode_t *)regs->b0;
+
+ /* Replace the return addr with trampoline addr */
+ regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
+
+ add_rp_inst(ri);
+ } else {
+ rp->nmissed++;
+ }
+}
+
+int arch_prepare_kprobe(struct kprobe *p)
+{
+ unsigned long addr = (unsigned long) p->addr;
+ unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL);
+ unsigned long kprobe_inst=0;
+ unsigned int slot = addr & 0xf, template, major_opcode = 0;
+ bundle_t *bundle = &p->ainsn.insn.bundle;
+
+ memcpy(&p->opcode.bundle, kprobe_addr, sizeof(bundle_t));
+ memcpy(&p->ainsn.insn.bundle, kprobe_addr, sizeof(bundle_t));
+
+ template = bundle->quad0.template;
+
+ if(valid_kprobe_addr(template, slot, addr))
+ return -EINVAL;
+
+ /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
+ if (slot == 1 && bundle_encoding[template][1] == L)
+ slot++;
+
+ /* Get kprobe_inst and major_opcode from the bundle */
+ get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
+
+ if (unsupported_inst(template, slot, major_opcode, kprobe_inst, p))
+ return -EINVAL;
+
+ prepare_break_inst(template, slot, major_opcode, kprobe_inst, p);
+
+ return 0;
+}
+
+void arch_arm_kprobe(struct kprobe *p)
+{
+ unsigned long addr = (unsigned long)p->addr;
+ unsigned long arm_addr = addr & ~0xFULL;
+
+ memcpy((char *)arm_addr, &p->ainsn.insn.bundle, sizeof(bundle_t));
+ flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
+}
+
+void arch_disarm_kprobe(struct kprobe *p)
+{
+ unsigned long addr = (unsigned long)p->addr;
+ unsigned long arm_addr = addr & ~0xFULL;
+
+ /* p->opcode contains the original unaltered bundle */
+ memcpy((char *) arm_addr, (char *) &p->opcode.bundle, sizeof(bundle_t));
+ flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
+}
+
+void arch_remove_kprobe(struct kprobe *p)
+{
+}
+
+/*
+ * We are resuming execution after a single step fault, so the pt_regs
+ * structure reflects the register state after we executed the instruction
+ * located in the kprobe (p->ainsn.insn.bundle). We still need to adjust
+ * the ip to point back to the original stack address. To set the IP address
+ * to original stack address, handle the case where we need to fixup the
+ * relative IP address and/or fixup branch register.
+ */
+static void resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+ unsigned long bundle_addr = ((unsigned long) (&p->opcode.bundle)) & ~0xFULL;
+ unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL;
+ unsigned long template;
+ int slot = ((unsigned long)p->addr & 0xf);
+
+ template = p->opcode.bundle.quad0.template;
+
+ if (slot == 1 && bundle_encoding[template][1] == L)
+ slot = 2;
+
+ if (p->ainsn.inst_flag) {
+
+ if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
+ /* Fix relative IP address */
+ regs->cr_iip = (regs->cr_iip - bundle_addr) + resume_addr;
+ }
+
+ if (p->ainsn.inst_flag & INST_FLAG_FIX_BRANCH_REG) {
+ /*
+ * Fix target branch register, software convention is
+ * to use either b0 or b6 or b7, so just checking
+ * only those registers
+ */
+ switch (p->ainsn.target_br_reg) {
+ case 0:
+ if ((regs->b0 == bundle_addr) ||
+ (regs->b0 == bundle_addr + 0x10)) {
+ regs->b0 = (regs->b0 - bundle_addr) +
+ resume_addr;
+ }
+ break;
+ case 6:
+ if ((regs->b6 == bundle_addr) ||
+ (regs->b6 == bundle_addr + 0x10)) {
+ regs->b6 = (regs->b6 - bundle_addr) +
+ resume_addr;
+ }
+ break;
+ case 7:
+ if ((regs->b7 == bundle_addr) ||
+ (regs->b7 == bundle_addr + 0x10)) {
+ regs->b7 = (regs->b7 - bundle_addr) +
+ resume_addr;
+ }
+ break;
+ } /* end switch */
+ }
+ goto turn_ss_off;
+ }
+
+ if (slot == 2) {
+ if (regs->cr_iip == bundle_addr + 0x10) {
+ regs->cr_iip = resume_addr + 0x10;
+ }
+ } else {
+ if (regs->cr_iip == bundle_addr) {
+ regs->cr_iip = resume_addr;
+ }
+ }
+
+turn_ss_off:
+ /* Turn off Single Step bit */
+ ia64_psr(regs)->ss = 0;
+}
+
+static void prepare_ss(struct kprobe *p, struct pt_regs *regs)
+{
+ unsigned long bundle_addr = (unsigned long) &p->opcode.bundle;
+ unsigned long slot = (unsigned long)p->addr & 0xf;
+
+ /* Update instruction pointer (IIP) and slot number (IPSR.ri) */
+ regs->cr_iip = bundle_addr & ~0xFULL;
+
+ if (slot > 2)
+ slot = 0;
+
+ ia64_psr(regs)->ri = slot;
+
+ /* turn on single stepping */
+ ia64_psr(regs)->ss = 1;
+}
+
+static int pre_kprobes_handler(struct die_args *args)
+{
+ struct kprobe *p;
+ int ret = 0;
+ struct pt_regs *regs = args->regs;
+ kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs);
+
+ preempt_disable();
+
+ /* Handle recursion cases */
+ if (kprobe_running()) {
+ p = get_kprobe(addr);
+ if (p) {
+ if (kprobe_status == KPROBE_HIT_SS) {
+ unlock_kprobes();
+ goto no_kprobe;
+ }
+ /* We have reentered the pre_kprobe_handler(), since
+ * another probe was hit while within the handler.
+ * We here save the original kprobes variables and
+ * just single step on the instruction of the new probe
+ * without calling any user handlers.
+ */
+ save_previous_kprobe();
+ set_current_kprobe(p);
+ p->nmissed++;
+ prepare_ss(p, regs);
+ kprobe_status = KPROBE_REENTER;
+ return 1;
+ } else if (args->err == __IA64_BREAK_JPROBE) {
+ /*
+ * jprobe instrumented function just completed
+ */
+ p = current_kprobe;
+ if (p->break_handler && p->break_handler(p, regs)) {
+ goto ss_probe;
+ }
+ } else {
+ /* Not our break */
+ goto no_kprobe;
+ }
+ }
+
+ lock_kprobes();
+ p = get_kprobe(addr);
+ if (!p) {
+ unlock_kprobes();
+ goto no_kprobe;
+ }
+
+ kprobe_status = KPROBE_HIT_ACTIVE;
+ set_current_kprobe(p);
+
+ if (p->pre_handler && p->pre_handler(p, regs))
+ /*
+ * Our pre-handler is specifically requesting that we just
+ * do a return. This is used for both the jprobe pre-handler
+ * and the kretprobe trampoline
+ */
+ return 1;
+
+ss_probe:
+ prepare_ss(p, regs);
+ kprobe_status = KPROBE_HIT_SS;
+ return 1;
+
+no_kprobe:
+ preempt_enable_no_resched();
+ return ret;
+}
+
+static int post_kprobes_handler(struct pt_regs *regs)
+{
+ if (!kprobe_running())
+ return 0;
+
+ if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) {
+ kprobe_status = KPROBE_HIT_SSDONE;
+ current_kprobe->post_handler(current_kprobe, regs, 0);
+ }
+
+ resume_execution(current_kprobe, regs);
+
+ /*Restore back the original saved kprobes variables and continue. */
+ if (kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe();
+ goto out;
+ }
+
+ unlock_kprobes();
+
+out:
+ preempt_enable_no_resched();
+ return 1;
+}
+
+static int kprobes_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ if (!kprobe_running())
+ return 0;
+
+ if (current_kprobe->fault_handler &&
+ current_kprobe->fault_handler(current_kprobe, regs, trapnr))
+ return 1;
+
+ if (kprobe_status & KPROBE_HIT_SS) {
+ resume_execution(current_kprobe, regs);
+ unlock_kprobes();
+ preempt_enable_no_resched();
+ }
+
+ return 0;
+}
+
+int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
+ void *data)
+{
+ struct die_args *args = (struct die_args *)data;
+ switch(val) {
+ case DIE_BREAK:
+ if (pre_kprobes_handler(args))
+ return NOTIFY_STOP;
+ break;
+ case DIE_SS:
+ if (post_kprobes_handler(args->regs))
+ return NOTIFY_STOP;
+ break;
+ case DIE_PAGE_FAULT:
+ if (kprobes_fault_handler(args->regs, args->trapnr))
+ return NOTIFY_STOP;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+ unsigned long addr = ((struct fnptr *)(jp->entry))->ip;
+
+ /* save architectural state */
+ jprobe_saved_regs = *regs;
+
+ /* after rfi, execute the jprobe instrumented function */
+ regs->cr_iip = addr & ~0xFULL;
+ ia64_psr(regs)->ri = addr & 0xf;
+ regs->r1 = ((struct fnptr *)(jp->entry))->gp;
+
+ /*
+ * fix the return address to our jprobe_inst_return() function
+ * in the jprobes.S file
+ */
+ regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip;
+
+ return 1;
+}
+
+int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ *regs = jprobe_saved_regs;
+ return 1;
+}
+
+static struct kprobe trampoline_p = {
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+ trampoline_p.addr =
+ (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip;
+ return register_kprobe(&trampoline_p);
+}
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 736e328b5e61..4ebbf3974381 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -271,7 +271,7 @@ ia64_mca_log_sal_error_record(int sal_info_type)
#ifdef CONFIG_ACPI
-static int cpe_vector = -1;
+int cpe_vector = -1;
static irqreturn_t
ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
new file mode 100644
index 000000000000..a68ce6678092
--- /dev/null
+++ b/arch/ia64/kernel/numa.c
@@ -0,0 +1,57 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ia64 kernel NUMA specific stuff
+ *
+ * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de>
+ * Copyright (C) 2004 Silicon Graphics, Inc.
+ * Jesse Barnes <jbarnes@sgi.com>
+ */
+#include <linux/config.h>
+#include <linux/topology.h>
+#include <linux/module.h>
+#include <asm/processor.h>
+#include <asm/smp.h>
+
+u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_to_node_map);
+
+cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
+
+/**
+ * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays
+ *
+ * Build cpu to node mapping and initialize the per node cpu masks using
+ * info from the node_cpuid array handed to us by ACPI.
+ */
+void __init build_cpu_to_node_map(void)
+{
+ int cpu, i, node;
+
+ for(node=0; node < MAX_NUMNODES; node++)
+ cpus_clear(node_to_cpu_mask[node]);
+
+ for(cpu = 0; cpu < NR_CPUS; ++cpu) {
+ node = -1;
+ for (i = 0; i < NR_CPUS; ++i)
+ if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
+ node = node_cpuid[i].nid;
+ break;
+ }
+ cpu_to_node_map[cpu] = (node >= 0) ? node : 0;
+ if (node >= 0)
+ cpu_set(cpu, node_to_cpu_mask[node]);
+ }
+}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 6407bff6bfd7..f1201ac8a116 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -37,7 +37,6 @@
#include <linux/vfs.h>
#include <linux/pagemap.h>
#include <linux/mount.h>
-#include <linux/version.h>
#include <linux/bitops.h>
#include <asm/errno.h>
@@ -4313,6 +4312,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n",
thread->pfm_context, ctx));
+ ret = -EBUSY;
old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *));
if (old != NULL) {
DPRINT(("load_pid [%d] already has a context\n", req->load_pid));
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index ebb71f3d6d19..051e050359e4 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -27,6 +27,7 @@
#include <linux/efi.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
+#include <linux/kprobes.h>
#include <asm/cpu.h>
#include <asm/delay.h>
@@ -178,7 +179,7 @@ static int can_do_pal_halt = 1;
static int __init nohalt_setup(char * str)
{
- pal_halt = 0;
+ pal_halt = can_do_pal_halt = 0;
return 1;
}
__setup("nohalt", nohalt_setup);
@@ -195,6 +196,7 @@ update_pal_halt_status(int status)
void
default_idle (void)
{
+ local_irq_enable();
while (!need_resched())
if (can_do_pal_halt)
safe_halt();
@@ -707,6 +709,13 @@ kernel_thread_helper (int (*fn)(void *), void *arg)
void
flush_thread (void)
{
+ /*
+ * Remove function-return probe instances associated with this task
+ * and put them back on the free list. Do not insert an exit probe for
+ * this function, it will be disabled by kprobe_flush_task if you do.
+ */
+ kprobe_flush_task(current);
+
/* drop floating-point and debug-register state if it exists: */
current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
ia64_drop_fpu(current);
@@ -721,6 +730,14 @@ flush_thread (void)
void
exit_thread (void)
{
+
+ /*
+ * Remove function-return probe instances associated with this task
+ * and put them back on the free list. Do not insert an exit probe for
+ * this function, it will be disabled by kprobe_flush_task if you do.
+ */
+ kprobe_flush_task(current);
+
ia64_drop_fpu(current);
#ifdef CONFIG_PERFMON
/* if needed, stop monitoring and flush state to perfmon context */
@@ -790,16 +807,12 @@ machine_restart (char *restart_cmd)
(*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL);
}
-EXPORT_SYMBOL(machine_restart);
-
void
machine_halt (void)
{
cpu_halt();
}
-EXPORT_SYMBOL(machine_halt);
-
void
machine_power_off (void)
{
@@ -808,4 +821,3 @@ machine_power_off (void)
machine_halt();
}
-EXPORT_SYMBOL(machine_power_off);
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 6d57aebad485..bbb8bc7c0552 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -725,12 +725,32 @@ convert_to_non_syscall (struct task_struct *child, struct pt_regs *pt,
break;
}
+ /*
+ * Note: at the time of this call, the target task is blocked
+ * in notify_resume_user() and by clearling PRED_LEAVE_SYSCALL
+ * (aka, "pLvSys") we redirect execution from
+ * .work_pending_syscall_end to .work_processed_kernel.
+ */
unw_get_pr(&prev_info, &pr);
- pr &= ~(1UL << PRED_SYSCALL);
+ pr &= ~((1UL << PRED_SYSCALL) | (1UL << PRED_LEAVE_SYSCALL));
pr |= (1UL << PRED_NON_SYSCALL);
unw_set_pr(&prev_info, pr);
pt->cr_ifs = (1UL << 63) | cfm;
+ /*
+ * Clear the memory that is NOT written on syscall-entry to
+ * ensure we do not leak kernel-state to user when execution
+ * resumes.
+ */
+ pt->r2 = 0;
+ pt->r3 = 0;
+ pt->r14 = 0;
+ memset(&pt->r16, 0, 16*8); /* clear r16-r31 */
+ memset(&pt->f6, 0, 6*16); /* clear f6-f11 */
+ pt->b7 = 0;
+ pt->ar_ccv = 0;
+ pt->ar_csd = 0;
+ pt->ar_ssd = 0;
}
static int
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index d227fabecd02..6f0cc7a6634e 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -143,7 +143,8 @@ struct salinfo_data {
static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)];
-static spinlock_t data_lock, data_saved_lock;
+static DEFINE_SPINLOCK(data_lock);
+static DEFINE_SPINLOCK(data_saved_lock);
/** salinfo_platform_oemdata - optional callback to decode oemdata from an error
* record.
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index d14692e0920a..84f89da7c640 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -20,6 +20,7 @@
* 02/01/00 R.Seth fixed get_cpuinfo for SMP
* 01/07/99 S.Eranian added the support for command line argument
* 06/24/99 W.Drummond added boot_cpu_data.
+ * 05/28/05 Z. Menyhart Dynamic stride size for "flush_icache_range()"
*/
#include <linux/config.h>
#include <linux/module.h>
@@ -40,6 +41,8 @@
#include <linux/serial_core.h>
#include <linux/efi.h>
#include <linux/initrd.h>
+#include <linux/platform.h>
+#include <linux/pm.h>
#include <asm/ia32.h>
#include <asm/machvec.h>
@@ -72,6 +75,8 @@ DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
unsigned long ia64_cycles_per_usec;
struct ia64_boot_param *ia64_boot_param;
struct screen_info screen_info;
+unsigned long vga_console_iobase;
+unsigned long vga_console_membase;
unsigned long ia64_max_cacheline_size;
unsigned long ia64_iobase; /* virtual address for I/O accesses */
@@ -81,6 +86,13 @@ EXPORT_SYMBOL(io_space);
unsigned int num_io_spaces;
/*
+ * "flush_icache_range()" needs to know what processor dependent stride size to use
+ * when it makes i-cache(s) coherent with d-caches.
+ */
+#define I_CACHE_STRIDE_SHIFT 5 /* Safest way to go: 32 bytes by 32 bytes */
+unsigned long ia64_i_cache_stride_shift = ~0;
+
+/*
* The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This
* mask specifies a mask of address bits that must be 0 in order for two buffers to be
* mergeable by the I/O MMU (i.e., the end address of the first buffer and the start
@@ -273,23 +285,25 @@ io_port_init (void)
static inline int __init
early_console_setup (char *cmdline)
{
+ int earlycons = 0;
+
#ifdef CONFIG_SERIAL_SGI_L1_CONSOLE
{
extern int sn_serial_console_early_setup(void);
if (!sn_serial_console_early_setup())
- return 0;
+ earlycons++;
}
#endif
#ifdef CONFIG_EFI_PCDP
if (!efi_setup_pcdp_console(cmdline))
- return 0;
+ earlycons++;
#endif
#ifdef CONFIG_SERIAL_8250_CONSOLE
if (!early_serial_console_init(cmdline))
- return 0;
+ earlycons++;
#endif
- return -1;
+ return (earlycons) ? 0 : -1;
}
static inline void
@@ -622,6 +636,12 @@ setup_per_cpu_areas (void)
/* start_kernel() requires this... */
}
+/*
+ * Calculate the max. cache line size.
+ *
+ * In addition, the minimum of the i-cache stride sizes is calculated for
+ * "flush_icache_range()".
+ */
static void
get_max_cacheline_size (void)
{
@@ -635,6 +655,8 @@ get_max_cacheline_size (void)
printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
__FUNCTION__, status);
max = SMP_CACHE_BYTES;
+ /* Safest setup for "flush_icache_range()" */
+ ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT;
goto out;
}
@@ -643,14 +665,31 @@ get_max_cacheline_size (void)
&cci);
if (status != 0) {
printk(KERN_ERR
- "%s: ia64_pal_cache_config_info(l=%lu) failed (status=%ld)\n",
+ "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n",
__FUNCTION__, l, status);
max = SMP_CACHE_BYTES;
+ /* The safest setup for "flush_icache_range()" */
+ cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
+ cci.pcci_unified = 1;
}
line_size = 1 << cci.pcci_line_size;
if (line_size > max)
max = line_size;
- }
+ if (!cci.pcci_unified) {
+ status = ia64_pal_cache_config_info(l,
+ /* cache_type (instruction)= */ 1,
+ &cci);
+ if (status != 0) {
+ printk(KERN_ERR
+ "%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n",
+ __FUNCTION__, l, status);
+ /* The safest setup for "flush_icache_range()" */
+ cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
+ }
+ }
+ if (cci.pcci_stride < ia64_i_cache_stride_shift)
+ ia64_i_cache_stride_shift = cci.pcci_stride;
+ }
out:
if (max > ia64_max_cacheline_size)
ia64_max_cacheline_size = max;
@@ -779,6 +818,7 @@ cpu_init (void)
/* size of physical stacked register partition plus 8 bytes: */
__get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
platform_cpu_init();
+ pm_idle = default_idle;
}
void
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index edd9f07860b2..774f34b675cf 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -143,6 +143,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
__copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
psr->mfh = 0; /* drop signal handler's fph contents... */
+ preempt_disable();
if (psr->dfh)
ia64_drop_fpu(current);
else {
@@ -150,6 +151,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
__ia64_load_fpu(current->thread.fph);
ia64_set_local_fpu_owner(current);
}
+ preempt_enable();
}
return err;
}
@@ -465,15 +467,12 @@ handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigse
if (!setup_frame(sig, ka, info, oldset, scr))
return 0;
- if (!(ka->sa.sa_flags & SA_NODEFER)) {
- spin_lock_irq(&current->sighand->siglock);
- {
- sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
- sigaddset(&current->blocked, sig);
- recalc_sigpending();
- }
- spin_unlock_irq(&current->sighand->siglock);
- }
+ spin_lock_irq(&current->sighand->siglock);
+ sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+ if (!(ka->sa.sa_flags & SA_NODEFER))
+ sigaddset(&current->blocked, sig);
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
return 1;
}
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index b49d4ddaab93..0166a9847095 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -231,13 +231,16 @@ smp_flush_tlb_all (void)
void
smp_flush_tlb_mm (struct mm_struct *mm)
{
+ preempt_disable();
/* this happens for the common case of a single-threaded fork(): */
if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1))
{
local_finish_flush_tlb_mm(mm);
+ preempt_enable();
return;
}
+ preempt_enable();
/*
* We could optimize this further by using mm->cpu_vm_mask to track which CPUs
* have been running in the address space. It's not clear that this is worth the
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 3865f088ffa2..7d72c0d872b3 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -346,6 +346,7 @@ smp_callin (void)
lock_ipi_calllock();
cpu_set(cpuid, cpu_online_map);
unlock_ipi_calllock();
+ per_cpu(cpu_state, cpuid) = CPU_ONLINE;
smp_setup_percpu_timer();
@@ -524,47 +525,6 @@ smp_build_cpu_map (void)
}
}
-#ifdef CONFIG_NUMA
-
-/* on which node is each logical CPU (one cacheline even for 64 CPUs) */
-u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_to_node_map);
-/* which logical CPUs are on which nodes */
-cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
-
-/*
- * Build cpu to node mapping and initialize the per node cpu masks.
- */
-void __init
-build_cpu_to_node_map (void)
-{
- int cpu, i, node;
-
- for(node=0; node<MAX_NUMNODES; node++)
- cpus_clear(node_to_cpu_mask[node]);
- for(cpu = 0; cpu < NR_CPUS; ++cpu) {
- /*
- * All Itanium NUMA platforms I know use ACPI, so maybe we
- * can drop this ifdef completely. [EF]
- */
-#ifdef CONFIG_ACPI_NUMA
- node = -1;
- for (i = 0; i < NR_CPUS; ++i)
- if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
- node = node_cpuid[i].nid;
- break;
- }
-#else
-# error Fixme: Dunno how to build CPU-to-node map.
-#endif
- cpu_to_node_map[cpu] = (node >= 0) ? node : 0;
- if (node >= 0)
- cpu_set(cpu, node_to_cpu_mask[node]);
- }
-}
-
-#endif /* CONFIG_NUMA */
-
/*
* Cycle through the APs sending Wakeup IPIs to boot each.
*/
@@ -611,6 +571,7 @@ void __devinit smp_prepare_boot_cpu(void)
{
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), cpu_callin_map);
+ per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
}
/*
@@ -688,6 +649,7 @@ int __cpu_disable(void)
return -EBUSY;
remove_siblinginfo(cpu);
+ cpu_clear(cpu, cpu_online_map);
fixup_irqs();
local_flush_tlb_all();
cpu_clear(cpu, cpu_callin_map);
@@ -774,6 +736,7 @@ __cpu_up (unsigned int cpu)
if (cpu_isset(cpu, cpu_callin_map))
return -EINVAL;
+ per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* Processor goes to start_secondary(), sets online flag */
ret = do_boot_cpu(sapicid, cpu);
if (ret < 0)
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index f1aafd4c05f9..92ff46ad21e2 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -36,6 +36,15 @@ int arch_register_cpu(int num)
parent = &sysfs_nodes[cpu_to_node(num)];
#endif /* CONFIG_NUMA */
+#ifdef CONFIG_ACPI_BOOT
+ /*
+ * If CPEI cannot be re-targetted, and this is
+ * CPEI target, then dont create the control file
+ */
+ if (!can_cpei_retarget() && is_cpu_cpei_target(num))
+ sysfs_cpus[num].cpu.no_control = 1;
+#endif
+
return register_cpu(&sysfs_cpus[num].cpu, num, parent);
}
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 1861173bd4f6..4440c8343fa4 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -21,12 +21,26 @@
#include <asm/intrinsics.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
+#include <asm/kdebug.h>
extern spinlock_t timerlist_lock;
fpswa_interface_t *fpswa_interface;
EXPORT_SYMBOL(fpswa_interface);
+struct notifier_block *ia64die_chain;
+static DEFINE_SPINLOCK(die_notifier_lock);
+
+int register_die_notifier(struct notifier_block *nb)
+{
+ int err = 0;
+ unsigned long flags;
+ spin_lock_irqsave(&die_notifier_lock, flags);
+ err = notifier_chain_register(&ia64die_chain, nb);
+ spin_unlock_irqrestore(&die_notifier_lock, flags);
+ return err;
+}
+
void __init
trap_init (void)
{
@@ -76,14 +90,16 @@ die (const char *str, struct pt_regs *regs, long err)
.lock_owner_depth = 0
};
static int die_counter;
+ int cpu = get_cpu();
- if (die.lock_owner != smp_processor_id()) {
+ if (die.lock_owner != cpu) {
console_verbose();
spin_lock_irq(&die.lock);
- die.lock_owner = smp_processor_id();
+ die.lock_owner = cpu;
die.lock_owner_depth = 0;
bust_spinlocks(1);
}
+ put_cpu();
if (++die.lock_owner_depth < 3) {
printk("%s[%d]: %s %ld [%d]\n",
@@ -137,6 +153,10 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
switch (break_num) {
case 0: /* unknown error (used by GCC for __builtin_abort()) */
+ if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP)
+ == NOTIFY_STOP) {
+ return;
+ }
die_if_kernel("bugcheck!", regs, break_num);
sig = SIGILL; code = ILL_ILLOPC;
break;
@@ -189,6 +209,15 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
sig = SIGILL; code = __ILL_BNDMOD;
break;
+ case 0x80200:
+ case 0x80300:
+ if (notify_die(DIE_BREAK, "kprobe", regs, break_num, TRAP_BRKPT, SIGTRAP)
+ == NOTIFY_STOP) {
+ return;
+ }
+ sig = SIGTRAP; code = TRAP_BRKPT;
+ break;
+
default:
if (break_num < 0x40000 || break_num > 0x100000)
die_if_kernel("Bad break", regs, break_num);
@@ -548,7 +577,11 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
#endif
break;
case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break;
- case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break;
+ case 36:
+ if (notify_die(DIE_SS, "ss", &regs, vector,
+ vector, SIGTRAP) == NOTIFY_STOP)
+ return;
+ siginfo.si_code = TRAP_TRACE; ifa = 0; break;
}
siginfo.si_signo = SIGTRAP;
siginfo.si_errno = 0;
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index 2776a074c6f1..3288be47bc75 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -362,7 +362,7 @@ unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char
if (info->pri_unat_loc)
nat_addr = info->pri_unat_loc;
else
- nat_addr = &info->sw->ar_unat;
+ nat_addr = &info->sw->caller_unat;
nat_mask = (1UL << ((long) addr & 0x1f8)/8);
}
} else {
@@ -524,7 +524,7 @@ unw_access_ar (struct unw_frame_info *info, int regnum, unsigned long *val, int
case UNW_AR_UNAT:
addr = info->unat_loc;
if (!addr)
- addr = &info->sw->ar_unat;
+ addr = &info->sw->caller_unat;
break;
case UNW_AR_LC:
@@ -1775,7 +1775,7 @@ run_script (struct unw_script *script, struct unw_frame_info *state)
case UNW_INSN_SETNAT_MEMSTK:
if (!state->pri_unat_loc)
- state->pri_unat_loc = &state->sw->ar_unat;
+ state->pri_unat_loc = &state->sw->caller_unat;
/* register off. is a multiple of 8, so the least 3 bits (type) are 0 */
s[dst+1] = ((unsigned long) state->pri_unat_loc - s[dst]) | UNW_NAT_MEMSTK;
break;
@@ -2243,11 +2243,11 @@ unw_init (void)
if (8*sizeof(unw_hash_index_t) < UNW_LOG_HASH_SIZE)
unw_hash_index_t_is_too_narrow();
- unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(AR_UNAT);
+ unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(CALLER_UNAT);
unw.sw_off[unw.preg_index[UNW_REG_BSPSTORE]] = SW(AR_BSPSTORE);
- unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_UNAT);
+ unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_PFS);
unw.sw_off[unw.preg_index[UNW_REG_RP]] = SW(B0);
- unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(AR_UNAT);
+ unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(CALLER_UNAT);
unw.sw_off[unw.preg_index[UNW_REG_PR]] = SW(PR);
unw.sw_off[unw.preg_index[UNW_REG_LC]] = SW(AR_LC);
unw.sw_off[unw.preg_index[UNW_REG_FPSR]] = SW(AR_FPSR);
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index b9f0db4c1b04..a676e79e0681 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -8,6 +8,11 @@
#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE)
#include <asm-generic/vmlinux.lds.h>
+#define IVT_TEXT \
+ VMLINUX_SYMBOL(__start_ivt_text) = .; \
+ *(.text.ivt) \
+ VMLINUX_SYMBOL(__end_ivt_text) = .;
+
OUTPUT_FORMAT("elf64-ia64-little")
OUTPUT_ARCH(ia64)
ENTRY(phys_start)
@@ -39,7 +44,7 @@ SECTIONS
.text : AT(ADDR(.text) - LOAD_OFFSET)
{
- *(.text.ivt)
+ IVT_TEXT
*(.text)
SCHED_TEXT
LOCK_TEXT