summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-06 09:53:05 +0100
committerIngo Molnar <mingo@elte.hu>2009-01-06 09:53:05 +0100
commit3d7a96f5a485b7d06c2379f343d7312af89ec9e2 (patch)
tree5f097f68eb0f9fd3fa4a10f38672e300e9127b10 /arch
parentkmemtrace: Remove the relay version of kmemtrace (diff)
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm (diff)
downloadlinux-3d7a96f5a485b7d06c2379f343d7312af89ec9e2.tar.xz
linux-3d7a96f5a485b7d06c2379f343d7312af89ec9e2.zip
Merge branch 'linus' into tracing/kmemtrace2
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/smp.h1
-rw-r--r--arch/alpha/include/asm/topology.h17
-rw-r--r--arch/alpha/kernel/Makefile2
-rw-r--r--arch/alpha/kernel/binfmt_loader.c51
-rw-r--r--arch/alpha/kernel/init_task.c1
-rw-r--r--arch/alpha/kernel/irq.c5
-rw-r--r--arch/alpha/kernel/process.c2
-rw-r--r--arch/alpha/kernel/setup.c5
-rw-r--r--arch/alpha/kernel/smp.c7
-rw-r--r--arch/alpha/kernel/sys_dp264.c8
-rw-r--r--arch/alpha/kernel/sys_titan.c4
-rw-r--r--arch/arm/common/gic.c4
-rw-r--r--arch/arm/kernel/init_task.c1
-rw-r--r--arch/arm/kernel/irq.c2
-rw-r--r--arch/arm/kernel/smp.c10
-rw-r--r--arch/arm/mach-at91/at91rm9200_time.c3
-rw-r--r--arch/arm/mach-at91/at91sam926x_time.c2
-rw-r--r--arch/arm/mach-davinci/time.c2
-rw-r--r--arch/arm/mach-imx/time.c2
-rw-r--r--arch/arm/mach-ixp4xx/common.c2
-rw-r--r--arch/arm/mach-msm/timer.c2
-rw-r--r--arch/arm/mach-ns9xxx/time-ns9360.c2
-rw-r--r--arch/arm/mach-omap1/time.c2
-rw-r--r--arch/arm/mach-omap1/timer32k.c2
-rw-r--r--arch/arm/mach-omap2/clock24xx.h4
-rw-r--r--arch/arm/mach-omap2/timer-gp.c2
-rw-r--r--arch/arm/mach-pxa/time.c2
-rw-r--r--arch/arm/mach-realview/core.c2
-rw-r--r--arch/arm/mach-realview/localtimer.c4
-rw-r--r--arch/arm/mach-sa1100/time.c2
-rw-r--r--arch/arm/mach-versatile/core.c2
-rw-r--r--arch/arm/oprofile/op_model_mpcore.c4
-rw-r--r--arch/arm/plat-mxc/time.c2
-rw-r--r--arch/arm/plat-orion/time.c2
-rw-r--r--arch/avr32/include/asm/bitops.h5
-rw-r--r--arch/avr32/kernel/init_task.c1
-rw-r--r--arch/avr32/kernel/time.c2
-rw-r--r--arch/blackfin/include/asm/bitops.h1
-rw-r--r--arch/blackfin/kernel/init_task.c1
-rw-r--r--arch/blackfin/kernel/time-ts.c2
-rw-r--r--arch/cris/arch-v32/kernel/irq.c4
-rw-r--r--arch/cris/arch-v32/kernel/smp.c4
-rw-r--r--arch/cris/include/asm/bitops.h1
-rw-r--r--arch/cris/include/asm/smp.h1
-rw-r--r--arch/cris/kernel/process.c1
-rw-r--r--arch/frv/kernel/init_task.c1
-rw-r--r--arch/h8300/include/asm/bitops.h1
-rw-r--r--arch/h8300/kernel/init_task.c1
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/ia64/hp/sim/hpsim_irq.c2
-rw-r--r--arch/ia64/include/asm/irq.h2
-rw-r--r--arch/ia64/include/asm/kvm.h6
-rw-r--r--arch/ia64/include/asm/kvm_host.h198
-rw-r--r--arch/ia64/include/asm/smp.h1
-rw-r--r--arch/ia64/include/asm/topology.h11
-rw-r--r--arch/ia64/kernel/acpi.c3
-rw-r--r--arch/ia64/kernel/init_task.c1
-rw-r--r--arch/ia64/kernel/iosapic.c35
-rw-r--r--arch/ia64/kernel/irq.c13
-rw-r--r--arch/ia64/kernel/msi_ia64.c12
-rw-r--r--arch/ia64/kernel/smpboot.c10
-rw-r--r--arch/ia64/kernel/time.c18
-rw-r--r--arch/ia64/kernel/topology.c2
-rw-r--r--arch/ia64/kvm/Makefile6
-rw-r--r--arch/ia64/kvm/asm-offsets.c11
-rw-r--r--arch/ia64/kvm/kvm-ia64.c110
-rw-r--r--arch/ia64/kvm/kvm_lib.c15
-rw-r--r--arch/ia64/kvm/kvm_minstate.h4
-rw-r--r--arch/ia64/kvm/misc.h3
-rw-r--r--arch/ia64/kvm/mmio.c38
-rw-r--r--arch/ia64/kvm/process.c29
-rw-r--r--arch/ia64/kvm/vcpu.c76
-rw-r--r--arch/ia64/kvm/vcpu.h5
-rw-r--r--arch/ia64/kvm/vmm.c29
-rw-r--r--arch/ia64/kvm/vmm_ivt.S1469
-rw-r--r--arch/ia64/kvm/vtlb.c4
-rw-r--r--arch/ia64/sn/kernel/irq.c6
-rw-r--r--arch/ia64/sn/kernel/msi_sn.c7
-rw-r--r--arch/ia64/sn/kernel/sn2/sn_hwperf.c27
-rw-r--r--arch/m32r/Kconfig1
-rw-r--r--arch/m32r/kernel/init_task.c1
-rw-r--r--arch/m32r/kernel/smpboot.c8
-rw-r--r--arch/m68k/Kconfig1
-rw-r--r--arch/m68k/kernel/process.c1
-rw-r--r--arch/m68knommu/include/asm/bitops.h1
-rw-r--r--arch/m68knommu/kernel/init_task.c1
-rw-r--r--arch/m68knommu/platform/coldfire/pit.c2
-rw-r--r--arch/mips/include/asm/irq.h3
-rw-r--r--arch/mips/include/asm/mach-ip27/topology.h5
-rw-r--r--arch/mips/include/asm/smp.h3
-rw-r--r--arch/mips/jazz/irq.c2
-rw-r--r--arch/mips/kernel/cevt-bcm1480.c4
-rw-r--r--arch/mips/kernel/cevt-ds1287.c2
-rw-r--r--arch/mips/kernel/cevt-gt641xx.c2
-rw-r--r--arch/mips/kernel/cevt-r4k.c2
-rw-r--r--arch/mips/kernel/cevt-sb1250.c4
-rw-r--r--arch/mips/kernel/cevt-smtc.c2
-rw-r--r--arch/mips/kernel/cevt-txx9.c2
-rw-r--r--arch/mips/kernel/i8253.c2
-rw-r--r--arch/mips/kernel/init_task.c1
-rw-r--r--arch/mips/kernel/irq-gic.c6
-rw-r--r--arch/mips/kernel/smp-cmp.c6
-rw-r--r--arch/mips/kernel/smp-mt.c2
-rw-r--r--arch/mips/kernel/smp.c7
-rw-r--r--arch/mips/kernel/smtc.c6
-rw-r--r--arch/mips/mti-malta/malta-smtc.c6
-rw-r--r--arch/mips/nxp/pnx8550/common/time.c1
-rw-r--r--arch/mips/pmc-sierra/yosemite/smp.c6
-rw-r--r--arch/mips/sgi-ip27/ip27-smp.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-timer.c2
-rw-r--r--arch/mips/sibyte/bcm1480/irq.c8
-rw-r--r--arch/mips/sibyte/bcm1480/smp.c8
-rw-r--r--arch/mips/sibyte/sb1250/irq.c8
-rw-r--r--arch/mips/sibyte/sb1250/smp.c8
-rw-r--r--arch/mips/sni/time.c2
-rw-r--r--arch/mn10300/kernel/init_task.c1
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/parisc/include/asm/module.h6
-rw-r--r--arch/parisc/include/asm/smp.h2
-rw-r--r--arch/parisc/kernel/init_task.c1
-rw-r--r--arch/parisc/kernel/irq.c6
-rw-r--r--arch/parisc/kernel/module.c216
-rw-r--r--arch/parisc/kernel/smp.c15
-rw-r--r--arch/powerpc/include/asm/disassemble.h80
-rw-r--r--arch/powerpc/include/asm/kvm_44x.h61
-rw-r--r--arch/powerpc/include/asm/kvm_host.h116
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h83
-rw-r--r--arch/powerpc/include/asm/mmu-44x.h1
-rw-r--r--arch/powerpc/include/asm/topology.h13
-rw-r--r--arch/powerpc/kernel/asm-offsets.c21
-rw-r--r--arch/powerpc/kernel/init_task.c1
-rw-r--r--arch/powerpc/kernel/irq.c2
-rw-r--r--arch/powerpc/kernel/process.c1
-rw-r--r--arch/powerpc/kernel/smp.c4
-rw-r--r--arch/powerpc/kernel/time.c20
-rw-r--r--arch/powerpc/kvm/44x.c228
-rw-r--r--arch/powerpc/kvm/44x_emulate.c371
-rw-r--r--arch/powerpc/kvm/44x_tlb.c463
-rw-r--r--arch/powerpc/kvm/44x_tlb.h26
-rw-r--r--arch/powerpc/kvm/Kconfig28
-rw-r--r--arch/powerpc/kvm/Makefile12
-rw-r--r--arch/powerpc/kvm/booke.c (renamed from arch/powerpc/kvm/booke_guest.c)418
-rw-r--r--arch/powerpc/kvm/booke.h60
-rw-r--r--arch/powerpc/kvm/booke_host.c83
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S72
-rw-r--r--arch/powerpc/kvm/emulate.c447
-rw-r--r--arch/powerpc/kvm/powerpc.c130
-rw-r--r--arch/powerpc/kvm/timing.c239
-rw-r--r--arch/powerpc/kvm/timing.h102
-rw-r--r--arch/powerpc/oprofile/cell/spu_task_sync.c2
-rw-r--r--arch/powerpc/platforms/cell/spu_priv1_mmio.c6
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c1
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c4
-rw-r--r--arch/powerpc/platforms/pseries/xics.c4
-rw-r--r--arch/powerpc/sysdev/mpic.c4
-rw-r--r--arch/powerpc/sysdev/mpic.h2
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/hypfs/inode.c1
-rw-r--r--arch/s390/include/asm/cpu.h7
-rw-r--r--arch/s390/include/asm/cputime.h42
-rw-r--r--arch/s390/include/asm/lowcore.h49
-rw-r--r--arch/s390/include/asm/system.h4
-rw-r--r--arch/s390/include/asm/thread_info.h2
-rw-r--r--arch/s390/include/asm/timer.h16
-rw-r--r--arch/s390/include/asm/topology.h2
-rw-r--r--arch/s390/include/asm/vdso.h15
-rw-r--r--arch/s390/kernel/asm-offsets.c5
-rw-r--r--arch/s390/kernel/entry.S5
-rw-r--r--arch/s390/kernel/entry64.S50
-rw-r--r--arch/s390/kernel/head64.S2
-rw-r--r--arch/s390/kernel/init_task.c1
-rw-r--r--arch/s390/kernel/process.c43
-rw-r--r--arch/s390/kernel/s390_ext.c2
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/kernel/smp.c40
-rw-r--r--arch/s390/kernel/time.c2
-rw-r--r--arch/s390/kernel/topology.c5
-rw-r--r--arch/s390/kernel/vdso.c123
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S5
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S39
-rw-r--r--arch/s390/kernel/vtime.c486
-rw-r--r--arch/s390/kvm/kvm-s390.c41
-rw-r--r--arch/sh/include/asm/smp.h2
-rw-r--r--arch/sh/include/asm/topology.h2
-rw-r--r--arch/sh/kernel/init_task.c1
-rw-r--r--arch/sh/kernel/smp.c10
-rw-r--r--arch/sh/kernel/timers/timer-broadcast.c2
-rw-r--r--arch/sh/kernel/timers/timer-tmu.c2
-rw-r--r--arch/sparc/Kconfig57
-rw-r--r--arch/sparc/configs/sparc64_defconfig105
-rw-r--r--arch/sparc/include/asm/Kbuild20
-rw-r--r--arch/sparc/include/asm/byteorder.h12
-rw-r--r--arch/sparc/include/asm/ipcbuf.h38
-rw-r--r--arch/sparc/include/asm/ipcbuf_32.h31
-rw-r--r--arch/sparc/include/asm/ipcbuf_64.h28
-rw-r--r--arch/sparc/include/asm/jsflash.h2
-rw-r--r--arch/sparc/include/asm/openprom.h277
-rw-r--r--arch/sparc/include/asm/openprom_32.h255
-rw-r--r--arch/sparc/include/asm/openprom_64.h280
-rw-r--r--arch/sparc/include/asm/posix_types.h157
-rw-r--r--arch/sparc/include/asm/posix_types_32.h118
-rw-r--r--arch/sparc/include/asm/posix_types_64.h122
-rw-r--r--arch/sparc/include/asm/processor_32.h2
-rw-r--r--arch/sparc/include/asm/ptrace.h448
-rw-r--r--arch/sparc/include/asm/ptrace_32.h186
-rw-r--r--arch/sparc/include/asm/ptrace_64.h356
-rw-r--r--arch/sparc/include/asm/reg.h8
-rw-r--r--arch/sparc/include/asm/reg_32.h79
-rw-r--r--arch/sparc/include/asm/reg_64.h56
-rw-r--r--arch/sparc/include/asm/sigcontext.h102
-rw-r--r--arch/sparc/include/asm/sigcontext_32.h62
-rw-r--r--arch/sparc/include/asm/sigcontext_64.h87
-rw-r--r--arch/sparc/include/asm/siginfo.h43
-rw-r--r--arch/sparc/include/asm/siginfo_32.h17
-rw-r--r--arch/sparc/include/asm/siginfo_64.h32
-rw-r--r--arch/sparc/include/asm/signal.h212
-rw-r--r--arch/sparc/include/asm/signal_32.h207
-rw-r--r--arch/sparc/include/asm/signal_64.h194
-rw-r--r--arch/sparc/include/asm/smp_32.h5
-rw-r--r--arch/sparc/include/asm/stat.h111
-rw-r--r--arch/sparc/include/asm/stat_32.h76
-rw-r--r--arch/sparc/include/asm/stat_64.h47
-rw-r--r--arch/sparc/include/asm/thread_info_32.h2
-rw-r--r--arch/sparc/include/asm/topology_64.h13
-rw-r--r--arch/sparc/include/asm/traps.h11
-rw-r--r--arch/sparc/kernel/init_task.c1
-rw-r--r--arch/sparc/kernel/irq_32.c2
-rw-r--r--arch/sparc/kernel/irq_64.c11
-rw-r--r--arch/sparc/kernel/kgdb_32.c12
-rw-r--r--arch/sparc/kernel/muldiv.c8
-rw-r--r--arch/sparc/kernel/of_device_64.c4
-rw-r--r--arch/sparc/kernel/pci_msi.c4
-rw-r--r--arch/sparc/kernel/pci_psycho.c5
-rw-r--r--arch/sparc/kernel/process_32.c16
-rw-r--r--arch/sparc/kernel/prom_common.c14
-rw-r--r--arch/sparc/kernel/signal_32.c6
-rw-r--r--arch/sparc/kernel/smp_32.c6
-rw-r--r--arch/sparc/kernel/smp_64.c4
-rw-r--r--arch/sparc/kernel/sparc_ksyms_32.c4
-rw-r--r--arch/sparc/kernel/time_64.c2
-rw-r--r--arch/sparc/kernel/traps_32.c4
-rw-r--r--arch/sparc/kernel/unaligned_32.c12
-rw-r--r--arch/sparc/kernel/windows.c6
-rw-r--r--arch/um/Makefile25
-rw-r--r--arch/um/include/asm/system.h14
-rw-r--r--arch/um/kernel/init_task.c1
-rw-r--r--arch/um/kernel/smp.c7
-rw-r--r--arch/um/kernel/time.c2
-rw-r--r--arch/x86/Kconfig26
-rw-r--r--arch/x86/Kconfig.cpu2
-rw-r--r--arch/x86/ia32/ia32_signal.c3
-rw-r--r--arch/x86/ia32/ipc32.c1
-rw-r--r--arch/x86/ia32/sys_ia32.c2
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h61
-rw-r--r--arch/x86/include/asm/apic.h3
-rw-r--r--arch/x86/include/asm/bigsmp/apic.h32
-rw-r--r--arch/x86/include/asm/bigsmp/ipi.h13
-rw-r--r--arch/x86/include/asm/desc.h10
-rw-r--r--arch/x86/include/asm/efi.h1
-rw-r--r--arch/x86/include/asm/es7000/apic.h60
-rw-r--r--arch/x86/include/asm/es7000/ipi.h12
-rw-r--r--arch/x86/include/asm/genapic_32.h13
-rw-r--r--arch/x86/include/asm/genapic_64.h14
-rw-r--r--arch/x86/include/asm/ipi.h23
-rw-r--r--arch/x86/include/asm/irq.h3
-rw-r--r--arch/x86/include/asm/kvm_host.h47
-rw-r--r--arch/x86/include/asm/kvm_x86_emulate.h11
-rw-r--r--arch/x86/include/asm/lguest.h2
-rw-r--r--arch/x86/include/asm/mach-default/mach_apic.h28
-rw-r--r--arch/x86/include/asm/mach-default/mach_ipi.h18
-rw-r--r--arch/x86/include/asm/mach-generic/mach_apic.h1
-rw-r--r--arch/x86/include/asm/mpspec.h2
-rw-r--r--arch/x86/include/asm/mtrr.h25
-rw-r--r--arch/x86/include/asm/numaq/apic.h16
-rw-r--r--arch/x86/include/asm/numaq/ipi.h13
-rw-r--r--arch/x86/include/asm/pci.h10
-rw-r--r--arch/x86/include/asm/pci_x86.h (renamed from arch/x86/pci/pci.h)17
-rw-r--r--arch/x86/include/asm/smp.h6
-rw-r--r--arch/x86/include/asm/summit/apic.h39
-rw-r--r--arch/x86/include/asm/summit/ipi.h9
-rw-r--r--arch/x86/include/asm/svm.h (renamed from arch/x86/kvm/svm.h)0
-rw-r--r--arch/x86/include/asm/sys_ia32.h101
-rw-r--r--arch/x86/include/asm/topology.h38
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h46
-rw-r--r--arch/x86/include/asm/virtext.h132
-rw-r--r--arch/x86/include/asm/vmx.h (renamed from arch/x86/kvm/vmx.h)27
-rw-r--r--arch/x86/kernel/acpi/boot.c31
-rw-r--r--arch/x86/kernel/amd_iommu.c666
-rw-r--r--arch/x86/kernel/amd_iommu_init.c19
-rw-r--r--arch/x86/kernel/apic.c42
-rw-r--r--arch/x86/kernel/bios_uv.c2
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c28
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c9
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c24
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c51
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c9
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c45
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c108
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c12
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c10
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h18
-rw-r--r--arch/x86/kernel/cpuid.c8
-rw-r--r--arch/x86/kernel/crash.c18
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/genapic_flat_64.c107
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c81
-rw-r--r--arch/x86/kernel/genx2apic_phys.c78
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c61
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/hpet.c8
-rw-r--r--arch/x86/kernel/i8253.c2
-rw-r--r--arch/x86/kernel/init_task.c1
-rw-r--r--arch/x86/kernel/io_apic.c390
-rw-r--r--arch/x86/kernel/ipi.c28
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/irq_32.c13
-rw-r--r--arch/x86/kernel/irq_64.c15
-rw-r--r--arch/x86/kernel/irqinit_32.c16
-rw-r--r--arch/x86/kernel/irqinit_64.c13
-rw-r--r--arch/x86/kernel/kvmclock.c10
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/mfgpt_32.c2
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c3
-rw-r--r--arch/x86/kernel/mpparse.c10
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c3
-rw-r--r--arch/x86/kernel/pci-gart_64.c2
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c2
-rw-r--r--arch/x86/kernel/reboot.c73
-rw-r--r--arch/x86/kernel/setup_percpu.c36
-rw-r--r--arch/x86/kernel/smp.c8
-rw-r--r--arch/x86/kernel/smpboot.c48
-rw-r--r--arch/x86/kernel/tlb_32.c2
-rw-r--r--arch/x86/kernel/tlb_64.c2
-rw-r--r--arch/x86/kernel/tlb_uv.c9
-rw-r--r--arch/x86/kernel/traps.c45
-rw-r--r--arch/x86/kernel/vmiclock_32.c2
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/kvm/Makefile4
-rw-r--r--arch/x86/kvm/i8254.c19
-rw-r--r--arch/x86/kvm/i8259.c52
-rw-r--r--arch/x86/kvm/irq.h6
-rw-r--r--arch/x86/kvm/kvm_svm.h2
-rw-r--r--arch/x86/kvm/lapic.c58
-rw-r--r--arch/x86/kvm/mmu.c444
-rw-r--r--arch/x86/kvm/paging_tmpl.h44
-rw-r--r--arch/x86/kvm/svm.c48
-rw-r--r--arch/x86/kvm/vmx.c350
-rw-r--r--arch/x86/kvm/x86.c120
-rw-r--r--arch/x86/kvm/x86_emulate.c297
-rw-r--r--arch/x86/lguest/boot.c2
-rw-r--r--arch/x86/mach-default/setup.c15
-rw-r--r--arch/x86/mach-generic/bigsmp.c5
-rw-r--r--arch/x86/mach-generic/es7000.c5
-rw-r--r--arch/x86/mach-generic/numaq.c5
-rw-r--r--arch/x86/mach-generic/summit.c5
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c16
-rw-r--r--arch/x86/mm/init_32.c8
-rw-r--r--arch/x86/mm/numa_64.c4
-rw-r--r--arch/x86/mm/srat_64.c2
-rw-r--r--arch/x86/pci/acpi.c2
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/pci/common.c3
-rw-r--r--arch/x86/pci/direct.c2
-rw-r--r--arch/x86/pci/early.c2
-rw-r--r--arch/x86/pci/fixup.c3
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/pci/init.c2
-rw-r--r--arch/x86/pci/irq.c3
-rw-r--r--arch/x86/pci/legacy.c2
-rw-r--r--arch/x86/pci/mmconfig-shared.c3
-rw-r--r--arch/x86/pci/mmconfig_32.c2
-rw-r--r--arch/x86/pci/mmconfig_64.c3
-rw-r--r--arch/x86/pci/numaq_32.c2
-rw-r--r--arch/x86/pci/olpc.c2
-rw-r--r--arch/x86/pci/pcbios.c5
-rw-r--r--arch/x86/pci/visws.c3
-rw-r--r--arch/x86/xen/mmu.c20
-rw-r--r--arch/x86/xen/smp.c27
-rw-r--r--arch/x86/xen/suspend.c3
-rw-r--r--arch/x86/xen/time.c12
-rw-r--r--arch/x86/xen/xen-ops.h2
-rw-r--r--arch/xtensa/kernel/init_task.c1
385 files changed, 8879 insertions, 6665 deletions
diff --git a/arch/alpha/include/asm/smp.h b/arch/alpha/include/asm/smp.h
index 544c69af8168..547e90951cec 100644
--- a/arch/alpha/include/asm/smp.h
+++ b/arch/alpha/include/asm/smp.h
@@ -45,7 +45,6 @@ extern struct cpuinfo_alpha cpu_data[NR_CPUS];
#define raw_smp_processor_id() (current_thread_info()->cpu)
extern int smp_num_cpus;
-#define cpu_possible_map cpu_present_map
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi(cpumask_t mask);
diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h
index 149532e162c4..b4f284c72ff3 100644
--- a/arch/alpha/include/asm/topology.h
+++ b/arch/alpha/include/asm/topology.h
@@ -39,7 +39,24 @@ static inline cpumask_t node_to_cpumask(int node)
return node_cpu_mask;
}
+extern struct cpumask node_to_cpumask_map[];
+/* FIXME: This is dumb, recalculating every time. But simple. */
+static const struct cpumask *cpumask_of_node(int node)
+{
+ int cpu;
+
+ cpumask_clear(&node_to_cpumask_map[node]);
+
+ for_each_online_cpu(cpu) {
+ if (cpu_to_node(cpu) == node)
+ cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
+ }
+
+ return &node_to_cpumask_map[node];
+}
+
#define pcibus_to_cpumask(bus) (cpu_online_map)
+#define cpumask_of_pcibus(bus) (cpu_online_mask)
#endif /* !CONFIG_NUMA */
# include <asm-generic/topology.h>
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index ac706c1d7ada..b4697759a123 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -8,7 +8,7 @@ EXTRA_CFLAGS := -Werror -Wno-sign-compare
obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \
irq_alpha.o signal.o setup.o ptrace.o time.o \
- alpha_ksyms.o systbls.o err_common.o io.o
+ alpha_ksyms.o systbls.o err_common.o io.o binfmt_loader.o
obj-$(CONFIG_VGA_HOSE) += console.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c
new file mode 100644
index 000000000000..4a0af906b00a
--- /dev/null
+++ b/arch/alpha/kernel/binfmt_loader.c
@@ -0,0 +1,51 @@
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mm_types.h>
+#include <linux/binfmts.h>
+#include <linux/a.out.h>
+
+static int load_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+{
+ struct exec *eh = (struct exec *)bprm->buf;
+ unsigned long loader;
+ struct file *file;
+ int retval;
+
+ if (eh->fh.f_magic != 0x183 || (eh->fh.f_flags & 0x3000) != 0x3000)
+ return -ENOEXEC;
+
+ if (bprm->loader)
+ return -ENOEXEC;
+
+ allow_write_access(bprm->file);
+ fput(bprm->file);
+ bprm->file = NULL;
+
+ loader = bprm->vma->vm_end - sizeof(void *);
+
+ file = open_exec("/sbin/loader");
+ retval = PTR_ERR(file);
+ if (IS_ERR(file))
+ return retval;
+
+ /* Remember if the application is TASO. */
+ bprm->taso = eh->ah.entry < 0x100000000UL;
+
+ bprm->file = file;
+ bprm->loader = loader;
+ retval = prepare_binprm(bprm);
+ if (retval < 0)
+ return retval;
+ return search_binary_handler(bprm,regs);
+}
+
+static struct linux_binfmt loader_format = {
+ .load_binary = load_binary,
+};
+
+static int __init init_loader_binfmt(void)
+{
+ return register_binfmt(&loader_format);
+}
+arch_initcall(init_loader_binfmt);
diff --git a/arch/alpha/kernel/init_task.c b/arch/alpha/kernel/init_task.c
index 1f762189fa64..c2938e574a40 100644
--- a/arch/alpha/kernel/init_task.c
+++ b/arch/alpha/kernel/init_task.c
@@ -8,7 +8,6 @@
#include <asm/uaccess.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index c626a821cdcb..703731accda6 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -50,12 +50,13 @@ int irq_select_affinity(unsigned int irq)
if (!irq_desc[irq].chip->set_affinity || irq_user_affinity[irq])
return 1;
- while (!cpu_possible(cpu) || !cpu_isset(cpu, irq_default_affinity))
+ while (!cpu_possible(cpu) ||
+ !cpumask_test_cpu(cpu, irq_default_affinity))
cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
last_cpu = cpu;
irq_desc[irq].affinity = cpumask_of_cpu(cpu);
- irq_desc[irq].chip->set_affinity(irq, cpumask_of_cpu(cpu));
+ irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu));
return 0;
}
#endif /* CONFIG_SMP */
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 351407e07e71..f238370c907d 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -94,6 +94,7 @@ common_shutdown_1(void *generic_ptr)
flags |= 0x00040000UL; /* "remain halted" */
*pflags = flags;
cpu_clear(cpuid, cpu_present_map);
+ cpu_clear(cpuid, cpu_possible_map);
halt();
}
#endif
@@ -120,6 +121,7 @@ common_shutdown_1(void *generic_ptr)
#ifdef CONFIG_SMP
/* Wait for the secondaries to halt. */
cpu_clear(boot_cpuid, cpu_present_map);
+ cpu_clear(boot_cpuid, cpu_possible_map);
while (cpus_weight(cpu_present_map))
barrier();
#endif
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index a449e999027c..02bee6983ce2 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -79,6 +79,11 @@ int alpha_l3_cacheshape;
unsigned long alpha_verbose_mcheck = CONFIG_VERBOSE_MCHECK_ON;
#endif
+#ifdef CONFIG_NUMA
+struct cpumask node_to_cpumask_map[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_to_cpumask_map);
+#endif
+
/* Which processor we booted from. */
int boot_cpuid;
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index cf7da10097bb..d953e510f68d 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -70,11 +70,6 @@ enum ipi_message_type {
/* Set to a secondary's cpuid when it comes online. */
static int smp_secondary_alive __devinitdata = 0;
-/* Which cpus ids came online. */
-cpumask_t cpu_online_map;
-
-EXPORT_SYMBOL(cpu_online_map);
-
int smp_num_probed; /* Internal processor count */
int smp_num_cpus = 1; /* Number that came online. */
EXPORT_SYMBOL(smp_num_cpus);
@@ -440,6 +435,7 @@ setup_smp(void)
((char *)cpubase + i*hwrpb->processor_size);
if ((cpu->flags & 0x1cc) == 0x1cc) {
smp_num_probed++;
+ cpu_set(i, cpu_possible_map);
cpu_set(i, cpu_present_map);
cpu->pal_revision = boot_cpu_palrev;
}
@@ -473,6 +469,7 @@ smp_prepare_cpus(unsigned int max_cpus)
/* Nothing to do on a UP box, or when told not to. */
if (smp_num_probed == 1 || max_cpus == 0) {
+ cpu_possible_map = cpumask_of_cpu(boot_cpuid);
cpu_present_map = cpumask_of_cpu(boot_cpuid);
printk(KERN_INFO "SMP mode deactivated.\n");
return;
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index c71b0fd7a61f..ab44c164d9d4 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -177,19 +177,19 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
}
static void
-dp264_set_affinity(unsigned int irq, cpumask_t affinity)
+dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
{
spin_lock(&dp264_irq_lock);
- cpu_set_irq_affinity(irq, affinity);
+ cpu_set_irq_affinity(irq, *affinity);
tsunami_update_irq_hw(cached_irq_mask);
spin_unlock(&dp264_irq_lock);
}
static void
-clipper_set_affinity(unsigned int irq, cpumask_t affinity)
+clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
{
spin_lock(&dp264_irq_lock);
- cpu_set_irq_affinity(irq - 16, affinity);
+ cpu_set_irq_affinity(irq - 16, *affinity);
tsunami_update_irq_hw(cached_irq_mask);
spin_unlock(&dp264_irq_lock);
}
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index 52c91ccc1648..27f840a4ad3d 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -158,10 +158,10 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
}
static void
-titan_set_irq_affinity(unsigned int irq, cpumask_t affinity)
+titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
{
spin_lock(&titan_irq_lock);
- titan_cpu_set_irq_affinity(irq - 16, affinity);
+ titan_cpu_set_irq_affinity(irq - 16, *affinity);
titan_update_irq_hw(titan_cached_irq_mask);
spin_unlock(&titan_irq_lock);
}
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index 7fc9860a97d7..c6884ba1d5ed 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -109,11 +109,11 @@ static void gic_unmask_irq(unsigned int irq)
}
#ifdef CONFIG_SMP
-static void gic_set_cpu(unsigned int irq, cpumask_t mask_val)
+static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
{
void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3);
unsigned int shift = (irq % 4) * 8;
- unsigned int cpu = first_cpu(mask_val);
+ unsigned int cpu = cpumask_first(mask_val);
u32 val;
spin_lock(&irq_controller_lock);
diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c
index 0bbf80625395..e859af349467 100644
--- a/arch/arm/kernel/init_task.c
+++ b/arch/arm/kernel/init_task.c
@@ -12,7 +12,6 @@
#include <asm/pgtable.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 2f3eb795fa6e..7141cee1fab7 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -174,7 +174,7 @@ static void route_irq(struct irq_desc *desc, unsigned int irq, unsigned int cpu)
pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu);
spin_lock_irq(&desc->lock);
- desc->chip->set_affinity(irq, cpumask_of_cpu(cpu));
+ desc->chip->set_affinity(irq, cpumask_of(cpu));
spin_unlock_irq(&desc->lock);
}
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 019237d21622..55fa7ff96a3e 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -34,16 +34,6 @@
#include <asm/ptrace.h>
/*
- * bitmask of present and online CPUs.
- * The present bitmask indicates that the CPU is physically present.
- * The online bitmask indicates that the CPU is up and running.
- */
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
-cpumask_t cpu_online_map;
-EXPORT_SYMBOL(cpu_online_map);
-
-/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
* where to place its SVC stack
diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c
index d140eae53ded..1ff1bda0a894 100644
--- a/arch/arm/mach-at91/at91rm9200_time.c
+++ b/arch/arm/mach-at91/at91rm9200_time.c
@@ -178,7 +178,6 @@ static struct clock_event_device clkevt = {
.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
.shift = 32,
.rating = 150,
- .cpumask = CPU_MASK_CPU0,
.set_next_event = clkevt32k_next_event,
.set_mode = clkevt32k_mode,
};
@@ -206,7 +205,7 @@ void __init at91rm9200_timer_init(void)
clkevt.mult = div_sc(AT91_SLOW_CLOCK, NSEC_PER_SEC, clkevt.shift);
clkevt.max_delta_ns = clockevent_delta2ns(AT91_ST_ALMV, &clkevt);
clkevt.min_delta_ns = clockevent_delta2ns(2, &clkevt) + 1;
- clkevt.cpumask = cpumask_of_cpu(0);
+ clkevt.cpumask = cpumask_of(0);
clockevents_register_device(&clkevt);
/* register clocksource */
diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c
index 122fd77ed580..b63e1d5f1bad 100644
--- a/arch/arm/mach-at91/at91sam926x_time.c
+++ b/arch/arm/mach-at91/at91sam926x_time.c
@@ -91,7 +91,6 @@ static struct clock_event_device pit_clkevt = {
.features = CLOCK_EVT_FEAT_PERIODIC,
.shift = 32,
.rating = 100,
- .cpumask = CPU_MASK_CPU0,
.set_mode = pit_clkevt_mode,
};
@@ -173,6 +172,7 @@ static void __init at91sam926x_pit_init(void)
/* Set up and register clockevents */
pit_clkevt.mult = div_sc(pit_rate, NSEC_PER_SEC, pit_clkevt.shift);
+ pit_clkevt.cpumask = cpumask_of(0);
clockevents_register_device(&pit_clkevt);
}
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index 3b9a296b5c4b..f8bcd29d17a6 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -322,7 +322,7 @@ static void __init davinci_timer_init(void)
clockevent_davinci.min_delta_ns =
clockevent_delta2ns(1, &clockevent_davinci);
- clockevent_davinci.cpumask = cpumask_of_cpu(0);
+ clockevent_davinci.cpumask = cpumask_of(0);
clockevents_register_device(&clockevent_davinci);
}
diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c
index a11765f5f23b..aff0ebcfa847 100644
--- a/arch/arm/mach-imx/time.c
+++ b/arch/arm/mach-imx/time.c
@@ -184,7 +184,7 @@ static int __init imx_clockevent_init(unsigned long rate)
clockevent_imx.min_delta_ns =
clockevent_delta2ns(0xf, &clockevent_imx);
- clockevent_imx.cpumask = cpumask_of_cpu(0);
+ clockevent_imx.cpumask = cpumask_of(0);
clockevents_register_device(&clockevent_imx);
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 7766f469456b..f4656d2ac8a8 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -487,7 +487,7 @@ static int __init ixp4xx_clockevent_init(void)
clockevent_delta2ns(0xfffffffe, &clockevent_ixp4xx);
clockevent_ixp4xx.min_delta_ns =
clockevent_delta2ns(0xf, &clockevent_ixp4xx);
- clockevent_ixp4xx.cpumask = cpumask_of_cpu(0);
+ clockevent_ixp4xx.cpumask = cpumask_of(0);
clockevents_register_device(&clockevent_ixp4xx);
return 0;
diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c
index 345a14cb73c3..444d9c0f5ca6 100644
--- a/arch/arm/mach-msm/timer.c
+++ b/arch/arm/mach-msm/timer.c
@@ -182,7 +182,7 @@ static void __init msm_timer_init(void)
clockevent_delta2ns(0xf0000000 >> clock->shift, ce);
/* 4 gets rounded down to 3 */
ce->min_delta_ns = clockevent_delta2ns(4, ce);
- ce->cpumask = cpumask_of_cpu(0);
+ ce->cpumask = cpumask_of(0);
cs->mult = clocksource_hz2mult(clock->freq, cs->shift);
res = clocksource_register(cs);
diff --git a/arch/arm/mach-ns9xxx/time-ns9360.c b/arch/arm/mach-ns9xxx/time-ns9360.c
index a63424d083d9..41df69721769 100644
--- a/arch/arm/mach-ns9xxx/time-ns9360.c
+++ b/arch/arm/mach-ns9xxx/time-ns9360.c
@@ -173,7 +173,7 @@ static void __init ns9360_timer_init(void)
ns9360_clockevent_device.min_delta_ns =
clockevent_delta2ns(1, &ns9360_clockevent_device);
- ns9360_clockevent_device.cpumask = cpumask_of_cpu(0);
+ ns9360_clockevent_device.cpumask = cpumask_of(0);
clockevents_register_device(&ns9360_clockevent_device);
setup_irq(IRQ_NS9360_TIMER0 + TIMER_CLOCKEVENT,
diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c
index 2cf7e32bd293..495a32c287b4 100644
--- a/arch/arm/mach-omap1/time.c
+++ b/arch/arm/mach-omap1/time.c
@@ -173,7 +173,7 @@ static __init void omap_init_mpu_timer(unsigned long rate)
clockevent_mpu_timer1.min_delta_ns =
clockevent_delta2ns(1, &clockevent_mpu_timer1);
- clockevent_mpu_timer1.cpumask = cpumask_of_cpu(0);
+ clockevent_mpu_timer1.cpumask = cpumask_of(0);
clockevents_register_device(&clockevent_mpu_timer1);
}
diff --git a/arch/arm/mach-omap1/timer32k.c b/arch/arm/mach-omap1/timer32k.c
index 705367ece174..fd3f7396e162 100644
--- a/arch/arm/mach-omap1/timer32k.c
+++ b/arch/arm/mach-omap1/timer32k.c
@@ -187,7 +187,7 @@ static __init void omap_init_32k_timer(void)
clockevent_32k_timer.min_delta_ns =
clockevent_delta2ns(1, &clockevent_32k_timer);
- clockevent_32k_timer.cpumask = cpumask_of_cpu(0);
+ clockevent_32k_timer.cpumask = cpumask_of(0);
clockevents_register_device(&clockevent_32k_timer);
}
diff --git a/arch/arm/mach-omap2/clock24xx.h b/arch/arm/mach-omap2/clock24xx.h
index ff6cd14d254d..ad6d98d177c5 100644
--- a/arch/arm/mach-omap2/clock24xx.h
+++ b/arch/arm/mach-omap2/clock24xx.h
@@ -2321,7 +2321,7 @@ static struct clk i2c2_fck = {
};
static struct clk i2chs2_fck = {
- .name = "i2chs_fck",
+ .name = "i2c_fck",
.id = 2,
.parent = &func_96m_ck,
.flags = CLOCK_IN_OMAP243X,
@@ -2354,7 +2354,7 @@ static struct clk i2c1_fck = {
};
static struct clk i2chs1_fck = {
- .name = "i2chs_fck",
+ .name = "i2c_fck",
.id = 1,
.parent = &func_96m_ck,
.flags = CLOCK_IN_OMAP243X,
diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c
index 589393bedade..ae6036300f60 100644
--- a/arch/arm/mach-omap2/timer-gp.c
+++ b/arch/arm/mach-omap2/timer-gp.c
@@ -120,7 +120,7 @@ static void __init omap2_gp_clockevent_init(void)
clockevent_gpt.min_delta_ns =
clockevent_delta2ns(1, &clockevent_gpt);
- clockevent_gpt.cpumask = cpumask_of_cpu(0);
+ clockevent_gpt.cpumask = cpumask_of(0);
clockevents_register_device(&clockevent_gpt);
}
diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c
index 001624158519..95656a72268d 100644
--- a/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@ -122,7 +122,6 @@ static struct clock_event_device ckevt_pxa_osmr0 = {
.features = CLOCK_EVT_FEAT_ONESHOT,
.shift = 32,
.rating = 200,
- .cpumask = CPU_MASK_CPU0,
.set_next_event = pxa_osmr0_set_next_event,
.set_mode = pxa_osmr0_set_mode,
};
@@ -163,6 +162,7 @@ static void __init pxa_timer_init(void)
clockevent_delta2ns(0x7fffffff, &ckevt_pxa_osmr0);
ckevt_pxa_osmr0.min_delta_ns =
clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1;
+ ckevt_pxa_osmr0.cpumask = cpumask_of(0);
cksrc_pxa_oscr0.mult =
clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift);
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 5f1d55963ced..bd2aa4f16141 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -624,7 +624,7 @@ static struct clock_event_device timer0_clockevent = {
.set_mode = timer_set_mode,
.set_next_event = timer_set_next_event,
.rating = 300,
- .cpumask = CPU_MASK_ALL,
+ .cpumask = cpu_all_mask,
};
static void __init realview_clockevents_init(unsigned int timer_irq)
diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c
index 9019ef2e5611..67d6d9cc68b2 100644
--- a/arch/arm/mach-realview/localtimer.c
+++ b/arch/arm/mach-realview/localtimer.c
@@ -154,7 +154,7 @@ void __cpuinit local_timer_setup(void)
clk->set_mode = local_timer_set_mode;
clk->set_next_event = local_timer_set_next_event;
clk->irq = IRQ_LOCALTIMER;
- clk->cpumask = cpumask_of_cpu(cpu);
+ clk->cpumask = cpumask_of(cpu);
clk->shift = 20;
clk->mult = div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift);
clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk);
@@ -193,7 +193,7 @@ void __cpuinit local_timer_setup(void)
clk->rating = 200;
clk->set_mode = dummy_timer_set_mode;
clk->broadcast = smp_timer_broadcast;
- clk->cpumask = cpumask_of_cpu(cpu);
+ clk->cpumask = cpumask_of(cpu);
clockevents_register_device(clk);
}
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index 8c5e727f3b75..711c0295c66f 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -73,7 +73,6 @@ static struct clock_event_device ckevt_sa1100_osmr0 = {
.features = CLOCK_EVT_FEAT_ONESHOT,
.shift = 32,
.rating = 200,
- .cpumask = CPU_MASK_CPU0,
.set_next_event = sa1100_osmr0_set_next_event,
.set_mode = sa1100_osmr0_set_mode,
};
@@ -110,6 +109,7 @@ static void __init sa1100_timer_init(void)
clockevent_delta2ns(0x7fffffff, &ckevt_sa1100_osmr0);
ckevt_sa1100_osmr0.min_delta_ns =
clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1;
+ ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
cksrc_sa1100_oscr.mult =
clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift);
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index df25aa138509..1c43494f5c42 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -1005,7 +1005,7 @@ static void __init versatile_timer_init(void)
timer0_clockevent.min_delta_ns =
clockevent_delta2ns(0xf, &timer0_clockevent);
- timer0_clockevent.cpumask = cpumask_of_cpu(0);
+ timer0_clockevent.cpumask = cpumask_of(0);
clockevents_register_device(&timer0_clockevent);
}
diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
index 4de366e8b4c5..6d6bd5899240 100644
--- a/arch/arm/oprofile/op_model_mpcore.c
+++ b/arch/arm/oprofile/op_model_mpcore.c
@@ -260,10 +260,10 @@ static void em_stop(void)
static void em_route_irq(int irq, unsigned int cpu)
{
struct irq_desc *desc = irq_desc + irq;
- cpumask_t mask = cpumask_of_cpu(cpu);
+ const struct cpumask *mask = cpumask_of(cpu);
spin_lock_irq(&desc->lock);
- desc->affinity = mask;
+ desc->affinity = *mask;
desc->chip->set_affinity(irq, mask);
spin_unlock_irq(&desc->lock);
}
diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c
index fd28f5194f71..758a1293bcfa 100644
--- a/arch/arm/plat-mxc/time.c
+++ b/arch/arm/plat-mxc/time.c
@@ -190,7 +190,7 @@ static int __init mxc_clockevent_init(void)
clockevent_mxc.min_delta_ns =
clockevent_delta2ns(0xff, &clockevent_mxc);
- clockevent_mxc.cpumask = cpumask_of_cpu(0);
+ clockevent_mxc.cpumask = cpumask_of(0);
clockevents_register_device(&clockevent_mxc);
diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c
index 544d6b327f3a..6fa2923e6dca 100644
--- a/arch/arm/plat-orion/time.c
+++ b/arch/arm/plat-orion/time.c
@@ -149,7 +149,6 @@ static struct clock_event_device orion_clkevt = {
.features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
.shift = 32,
.rating = 300,
- .cpumask = CPU_MASK_CPU0,
.set_next_event = orion_clkevt_next_event,
.set_mode = orion_clkevt_mode,
};
@@ -199,5 +198,6 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk)
orion_clkevt.mult = div_sc(tclk, NSEC_PER_SEC, orion_clkevt.shift);
orion_clkevt.max_delta_ns = clockevent_delta2ns(0xfffffffe, &orion_clkevt);
orion_clkevt.min_delta_ns = clockevent_delta2ns(1, &orion_clkevt);
+ orion_clkevt.cpumask = cpumask_of(0);
clockevents_register_device(&orion_clkevt);
}
diff --git a/arch/avr32/include/asm/bitops.h b/arch/avr32/include/asm/bitops.h
index 1a50b69b1a19..f7dd5f71edf7 100644
--- a/arch/avr32/include/asm/bitops.h
+++ b/arch/avr32/include/asm/bitops.h
@@ -263,6 +263,11 @@ static inline int fls(unsigned long word)
return 32 - result;
}
+static inline int __fls(unsigned long word)
+{
+ return fls(word) - 1;
+}
+
unsigned long find_first_zero_bit(const unsigned long *addr,
unsigned long size);
unsigned long find_next_zero_bit(const unsigned long *addr,
diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c
index 44058469c6ec..993d56ee3cf3 100644
--- a/arch/avr32/kernel/init_task.c
+++ b/arch/avr32/kernel/init_task.c
@@ -13,7 +13,6 @@
#include <asm/pgtable.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c
index 283481d74a5b..0ff46bf873b0 100644
--- a/arch/avr32/kernel/time.c
+++ b/arch/avr32/kernel/time.c
@@ -106,7 +106,6 @@ static struct clock_event_device comparator = {
.features = CLOCK_EVT_FEAT_ONESHOT,
.shift = 16,
.rating = 50,
- .cpumask = CPU_MASK_CPU0,
.set_next_event = comparator_next_event,
.set_mode = comparator_mode,
};
@@ -134,6 +133,7 @@ void __init time_init(void)
comparator.mult = div_sc(counter_hz, NSEC_PER_SEC, comparator.shift);
comparator.max_delta_ns = clockevent_delta2ns((u32)~0, &comparator);
comparator.min_delta_ns = clockevent_delta2ns(50, &comparator) + 1;
+ comparator.cpumask = cpumask_of(0);
sysreg_write(COMPARE, 0);
timer_irqaction.dev_id = &comparator;
diff --git a/arch/blackfin/include/asm/bitops.h b/arch/blackfin/include/asm/bitops.h
index b39a175c79c1..c428e4106f89 100644
--- a/arch/blackfin/include/asm/bitops.h
+++ b/arch/blackfin/include/asm/bitops.h
@@ -213,6 +213,7 @@ static __inline__ int __test_bit(int nr, const void *addr)
#endif /* __KERNEL__ */
#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
#endif /* _BLACKFIN_BITOPS_H */
diff --git a/arch/blackfin/kernel/init_task.c b/arch/blackfin/kernel/init_task.c
index 6bdba7b21109..2c228c020978 100644
--- a/arch/blackfin/kernel/init_task.c
+++ b/arch/blackfin/kernel/init_task.c
@@ -33,7 +33,6 @@
#include <linux/mqueue.h>
#include <linux/fs.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c
index e887efc86c29..0ed2badfd746 100644
--- a/arch/blackfin/kernel/time-ts.c
+++ b/arch/blackfin/kernel/time-ts.c
@@ -162,7 +162,6 @@ static struct clock_event_device clockevent_bfin = {
.name = "bfin_core_timer",
.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
.shift = 32,
- .cpumask = CPU_MASK_CPU0,
.set_next_event = bfin_timer_set_next_event,
.set_mode = bfin_timer_set_mode,
};
@@ -193,6 +192,7 @@ static int __init bfin_clockevent_init(void)
clockevent_bfin.mult = div_sc(timer_clk, NSEC_PER_SEC, clockevent_bfin.shift);
clockevent_bfin.max_delta_ns = clockevent_delta2ns(-1, &clockevent_bfin);
clockevent_bfin.min_delta_ns = clockevent_delta2ns(100, &clockevent_bfin);
+ clockevent_bfin.cpumask = cpumask_of(0);
clockevents_register_device(&clockevent_bfin);
return 0;
diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c
index 173c141ac9ba..295131fee710 100644
--- a/arch/cris/arch-v32/kernel/irq.c
+++ b/arch/cris/arch-v32/kernel/irq.c
@@ -325,11 +325,11 @@ static void end_crisv32_irq(unsigned int irq)
{
}
-void set_affinity_crisv32_irq(unsigned int irq, cpumask_t dest)
+void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
{
unsigned long flags;
spin_lock_irqsave(&irq_lock, flags);
- irq_allocations[irq - FIRST_IRQ].mask = dest;
+ irq_allocations[irq - FIRST_IRQ].mask = *dest;
spin_unlock_irqrestore(&irq_lock, flags);
}
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 52e16c6436f9..9dac17334640 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -29,11 +29,7 @@
spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED};
/* CPU masks */
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
EXPORT_SYMBOL(phys_cpu_present_map);
/* Variables used during SMP boot */
diff --git a/arch/cris/include/asm/bitops.h b/arch/cris/include/asm/bitops.h
index c0e62f811e09..9e69cfb7f134 100644
--- a/arch/cris/include/asm/bitops.h
+++ b/arch/cris/include/asm/bitops.h
@@ -148,6 +148,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
#define ffs kernel_ffs
#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/find.h>
diff --git a/arch/cris/include/asm/smp.h b/arch/cris/include/asm/smp.h
index dba33aba3e95..c615a06dd757 100644
--- a/arch/cris/include/asm/smp.h
+++ b/arch/cris/include/asm/smp.h
@@ -4,7 +4,6 @@
#include <linux/cpumask.h>
extern cpumask_t phys_cpu_present_map;
-extern cpumask_t cpu_possible_map;
#define raw_smp_processor_id() (current_thread_info()->cpu)
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index 5933656db5a2..60816e876455 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -37,7 +37,6 @@
* setup.
*/
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/frv/kernel/init_task.c b/arch/frv/kernel/init_task.c
index e2198815b630..29429a8b7f6a 100644
--- a/arch/frv/kernel/init_task.c
+++ b/arch/frv/kernel/init_task.c
@@ -10,7 +10,6 @@
#include <asm/pgtable.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h
index cb18e3b0aa94..cb9ddf5fc54f 100644
--- a/arch/h8300/include/asm/bitops.h
+++ b/arch/h8300/include/asm/bitops.h
@@ -207,6 +207,7 @@ static __inline__ unsigned long __ffs(unsigned long word)
#endif /* __KERNEL__ */
#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
#endif /* _H8300_BITOPS_H */
diff --git a/arch/h8300/kernel/init_task.c b/arch/h8300/kernel/init_task.c
index 93a4899e46c2..cb5dc552da97 100644
--- a/arch/h8300/kernel/init_task.c
+++ b/arch/h8300/kernel/init_task.c
@@ -12,7 +12,6 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 7fa8f615ba6e..3d31636cbafb 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -687,3 +687,6 @@ config IRQ_PER_CPU
config IOMMU_HELPER
def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
+
+config IOMMU_API
+ def_bool (DMAR)
diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c
index c2f58ff364e7..cc0a3182db3c 100644
--- a/arch/ia64/hp/sim/hpsim_irq.c
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -22,7 +22,7 @@ hpsim_irq_noop (unsigned int irq)
}
static void
-hpsim_set_affinity_noop (unsigned int a, cpumask_t b)
+hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b)
{
}
diff --git a/arch/ia64/include/asm/irq.h b/arch/ia64/include/asm/irq.h
index 3627116fb0e2..36429a532630 100644
--- a/arch/ia64/include/asm/irq.h
+++ b/arch/ia64/include/asm/irq.h
@@ -27,7 +27,7 @@ irq_canonicalize (int irq)
}
extern void set_irq_affinity_info (unsigned int irq, int dest, int redir);
-bool is_affinity_mask_valid(cpumask_t cpumask);
+bool is_affinity_mask_valid(cpumask_var_t cpumask);
#define is_affinity_mask_valid is_affinity_mask_valid
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index f38472ac2267..68aa6da807c1 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -166,8 +166,6 @@ struct saved_vpd {
};
struct kvm_regs {
- char *saved_guest;
- char *saved_stack;
struct saved_vpd vpd;
/*Arch-regs*/
int mp_state;
@@ -200,6 +198,10 @@ struct kvm_regs {
unsigned long fp_psr; /*used for lazy float register */
unsigned long saved_gp;
/*for phycial emulation */
+
+ union context saved_guest;
+
+ unsigned long reserved[64]; /* for future use */
};
struct kvm_sregs {
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index c60d324da540..348663661659 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -23,17 +23,6 @@
#ifndef __ASM_KVM_HOST_H
#define __ASM_KVM_HOST_H
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/kvm.h>
-#include <linux/kvm_para.h>
-#include <linux/kvm_types.h>
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-
-#define KVM_MAX_VCPUS 4
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
@@ -50,70 +39,132 @@
#define EXIT_REASON_EXTERNAL_INTERRUPT 6
#define EXIT_REASON_IPI 7
#define EXIT_REASON_PTC_G 8
+#define EXIT_REASON_DEBUG 20
/*Define vmm address space and vm data space.*/
-#define KVM_VMM_SIZE (16UL<<20)
+#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
#define KVM_VMM_SHIFT 24
-#define KVM_VMM_BASE 0xD000000000000000UL
-#define VMM_SIZE (8UL<<20)
+#define KVM_VMM_BASE 0xD000000000000000
+#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
/*
* Define vm_buffer, used by PAL Services, base address.
- * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M
+ * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
*/
#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
-#define KVM_VM_BUFFER_SIZE (8UL<<20)
-
-/*Define Virtual machine data layout.*/
-#define KVM_VM_DATA_SHIFT 24
-#define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT)
-#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE)
-
-
-#define KVM_P2M_BASE KVM_VM_DATA_BASE
-#define KVM_P2M_OFS 0
-#define KVM_P2M_SIZE (8UL << 20)
-
-#define KVM_VHPT_BASE (KVM_P2M_BASE + KVM_P2M_SIZE)
-#define KVM_VHPT_OFS KVM_P2M_SIZE
-#define KVM_VHPT_BLOCK_SIZE (2UL << 20)
-#define VHPT_SHIFT 18
-#define VHPT_SIZE (1UL << VHPT_SHIFT)
-#define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5))
-
-#define KVM_VTLB_BASE (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE)
-#define KVM_VTLB_OFS (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE)
-#define KVM_VTLB_BLOCK_SIZE (1UL<<20)
-#define VTLB_SHIFT 17
-#define VTLB_SIZE (1UL<<VTLB_SHIFT)
-#define VTLB_NUM_ENTRIES (1<<(VTLB_SHIFT-5))
-
-#define KVM_VPD_BASE (KVM_VTLB_BASE+KVM_VTLB_BLOCK_SIZE)
-#define KVM_VPD_OFS (KVM_VTLB_OFS+KVM_VTLB_BLOCK_SIZE)
-#define KVM_VPD_BLOCK_SIZE (2UL<<20)
-#define VPD_SHIFT 16
-#define VPD_SIZE (1UL<<VPD_SHIFT)
-
-#define KVM_VCPU_BASE (KVM_VPD_BASE+KVM_VPD_BLOCK_SIZE)
-#define KVM_VCPU_OFS (KVM_VPD_OFS+KVM_VPD_BLOCK_SIZE)
-#define KVM_VCPU_BLOCK_SIZE (2UL<<20)
-#define VCPU_SHIFT 18
-#define VCPU_SIZE (1UL<<VCPU_SHIFT)
-#define MAX_VCPU_NUM KVM_VCPU_BLOCK_SIZE/VCPU_SIZE
-
-#define KVM_VM_BASE (KVM_VCPU_BASE+KVM_VCPU_BLOCK_SIZE)
-#define KVM_VM_OFS (KVM_VCPU_OFS+KVM_VCPU_BLOCK_SIZE)
-#define KVM_VM_BLOCK_SIZE (1UL<<19)
-
-#define KVM_MEM_DIRTY_LOG_BASE (KVM_VM_BASE+KVM_VM_BLOCK_SIZE)
-#define KVM_MEM_DIRTY_LOG_OFS (KVM_VM_OFS+KVM_VM_BLOCK_SIZE)
-#define KVM_MEM_DIRTY_LOG_SIZE (1UL<<19)
-
-/* Get vpd, vhpt, tlb, vcpu, base*/
-#define VPD_ADDR(n) (KVM_VPD_BASE+n*VPD_SIZE)
-#define VHPT_ADDR(n) (KVM_VHPT_BASE+n*VHPT_SIZE)
-#define VTLB_ADDR(n) (KVM_VTLB_BASE+n*VTLB_SIZE)
-#define VCPU_ADDR(n) (KVM_VCPU_BASE+n*VCPU_SIZE)
+#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
+
+/*
+ * kvm guest's data area looks as follow:
+ *
+ * +----------------------+ ------- KVM_VM_DATA_SIZE
+ * | vcpu[n]'s data | | ___________________KVM_STK_OFFSET
+ * | | | / |
+ * | .......... | | /vcpu's struct&stack |
+ * | .......... | | /---------------------|---- 0
+ * | vcpu[5]'s data | | / vpd |
+ * | vcpu[4]'s data | |/-----------------------|
+ * | vcpu[3]'s data | / vtlb |
+ * | vcpu[2]'s data | /|------------------------|
+ * | vcpu[1]'s data |/ | vhpt |
+ * | vcpu[0]'s data |____________________________|
+ * +----------------------+ |
+ * | memory dirty log | |
+ * +----------------------+ |
+ * | vm's data struct | |
+ * +----------------------+ |
+ * | | |
+ * | | |
+ * | | |
+ * | | |
+ * | | |
+ * | | |
+ * | | |
+ * | vm's p2m table | |
+ * | | |
+ * | | |
+ * | | | |
+ * vm's data->| | | |
+ * +----------------------+ ------- 0
+ * To support large memory, needs to increase the size of p2m.
+ * To support more vcpus, needs to ensure it has enough space to
+ * hold vcpus' data.
+ */
+
+#define KVM_VM_DATA_SHIFT 26
+#define KVM_VM_DATA_SIZE (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
+#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VM_DATA_SIZE)
+
+#define KVM_P2M_BASE KVM_VM_DATA_BASE
+#define KVM_P2M_SIZE (__IA64_UL_CONST(24) << 20)
+
+#define VHPT_SHIFT 16
+#define VHPT_SIZE (__IA64_UL_CONST(1) << VHPT_SHIFT)
+#define VHPT_NUM_ENTRIES (__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
+
+#define VTLB_SHIFT 16
+#define VTLB_SIZE (__IA64_UL_CONST(1) << VTLB_SHIFT)
+#define VTLB_NUM_ENTRIES (1UL << (VHPT_SHIFT-5))
+
+#define VPD_SHIFT 16
+#define VPD_SIZE (__IA64_UL_CONST(1) << VPD_SHIFT)
+
+#define VCPU_STRUCT_SHIFT 16
+#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
+
+#define KVM_STK_OFFSET VCPU_STRUCT_SIZE
+
+#define KVM_VM_STRUCT_SHIFT 19
+#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
+
+#define KVM_MEM_DIRY_LOG_SHIFT 19
+#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
+
+#ifndef __ASSEMBLY__
+
+/*Define the max vcpus and memory for Guests.*/
+#define KVM_MAX_VCPUS (KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
+ KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
+#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
+
+#define VMM_LOG_LEN 256
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/kvm.h>
+#include <linux/kvm_para.h>
+#include <linux/kvm_types.h>
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/page.h>
+
+struct kvm_vcpu_data {
+ char vcpu_vhpt[VHPT_SIZE];
+ char vcpu_vtlb[VTLB_SIZE];
+ char vcpu_vpd[VPD_SIZE];
+ char vcpu_struct[VCPU_STRUCT_SIZE];
+};
+
+struct kvm_vm_data {
+ char kvm_p2m[KVM_P2M_SIZE];
+ char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
+ char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
+ struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
+};
+
+#define VCPU_BASE(n) KVM_VM_DATA_BASE + \
+ offsetof(struct kvm_vm_data, vcpu_data[n])
+#define VM_BASE KVM_VM_DATA_BASE + \
+ offsetof(struct kvm_vm_data, kvm_vm_struct)
+#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \
+ offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
+
+#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
+#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
+#define VPD_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
+#define VCPU_STRUCT_BASE(n) (VCPU_BASE(n) + \
+ offsetof(struct kvm_vcpu_data, vcpu_struct))
/*IO section definitions*/
#define IOREQ_READ 1
@@ -389,6 +440,7 @@ struct kvm_vcpu_arch {
unsigned long opcode;
unsigned long cause;
+ char log_buf[VMM_LOG_LEN];
union context host;
union context guest;
};
@@ -403,20 +455,19 @@ struct kvm_sal_data {
};
struct kvm_arch {
+ spinlock_t dirty_log_lock;
+
unsigned long vm_base;
unsigned long metaphysical_rr0;
unsigned long metaphysical_rr4;
unsigned long vmm_init_rr;
- unsigned long vhpt_base;
- unsigned long vtlb_base;
- unsigned long vpd_base;
- spinlock_t dirty_log_lock;
+
struct kvm_ioapic *vioapic;
struct kvm_vm_stat stat;
struct kvm_sal_data rdv_sal_data;
struct list_head assigned_dev_head;
- struct dmar_domain *intel_iommu_domain;
+ struct iommu_domain *iommu_domain;
struct hlist_head irq_ack_notifier_list;
unsigned long irq_sources_bitmap;
@@ -512,7 +563,7 @@ struct kvm_pt_regs {
static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
{
- return (struct kvm_pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
+ return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
}
typedef int kvm_vmm_entry(void);
@@ -531,5 +582,6 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
void kvm_sal_emul(struct kvm_vcpu *vcpu);
static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {}
+#endif /* __ASSEMBLY__*/
#endif
diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h
index 12d96e0cd513..21c402365d0e 100644
--- a/arch/ia64/include/asm/smp.h
+++ b/arch/ia64/include/asm/smp.h
@@ -57,7 +57,6 @@ extern struct smp_boot_data {
extern char no_int_routing __devinitdata;
-extern cpumask_t cpu_online_map;
extern cpumask_t cpu_core_map[NR_CPUS];
DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
extern int smp_num_siblings;
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 35bcb641c9e5..76a33a91ca69 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -34,6 +34,7 @@
* Returns a bitmask of CPUs on Node 'node'.
*/
#define node_to_cpumask(node) (node_to_cpu_mask[node])
+#define cpumask_of_node(node) (&node_to_cpu_mask[node])
/*
* Returns the number of the node containing Node 'nid'.
@@ -45,7 +46,7 @@
/*
* Returns the number of the first CPU on Node 'node'.
*/
-#define node_to_first_cpu(node) (first_cpu(node_to_cpumask(node)))
+#define node_to_first_cpu(node) (cpumask_first(cpumask_of_node(node)))
/*
* Determines the node for a given pci bus
@@ -55,7 +56,6 @@
void build_cpu_to_node_map(void);
#define SD_CPU_INIT (struct sched_domain) { \
- .span = CPU_MASK_NONE, \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
@@ -80,7 +80,6 @@ void build_cpu_to_node_map(void);
/* sched_domains SD_NODE_INIT for IA64 NUMA machines */
#define SD_NODE_INIT (struct sched_domain) { \
- .span = CPU_MASK_NONE, \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
@@ -111,6 +110,8 @@ void build_cpu_to_node_map(void);
#define topology_core_id(cpu) (cpu_data(cpu)->core_id)
#define topology_core_siblings(cpu) (cpu_core_map[cpu])
#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
+#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
#define smt_capable() (smp_num_siblings > 1)
#endif
@@ -121,6 +122,10 @@ extern void arch_fix_phys_package_id(int num, u32 slot);
node_to_cpumask(pcibus_to_node(bus)) \
)
+#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
+ cpu_all_mask : \
+ cpumask_from_node(pcibus_to_node(bus)))
+
#include <asm-generic/topology.h>
#endif /* _ASM_IA64_TOPOLOGY_H */
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index bd7acc71e8a9..0553648b7595 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -202,7 +202,6 @@ char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
Boot-time Table Parsing
-------------------------------------------------------------------------- */
-static int total_cpus __initdata;
static int available_cpus __initdata;
struct acpi_table_madt *acpi_madt __initdata;
static u8 has_8259;
@@ -1001,7 +1000,7 @@ acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret)
node = pxm_to_node(pxm);
if (node >= MAX_NUMNODES || !node_online(node) ||
- cpus_empty(node_to_cpumask(node)))
+ cpumask_empty(cpumask_of_node(node)))
return AE_OK;
/* We know a gsi to node mapping! */
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index 9d7e1c66faf4..5b0e830c6f33 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -17,7 +17,6 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 5c4674ae8aea..5cfd3d91001a 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -330,25 +330,25 @@ unmask_irq (unsigned int irq)
static void
-iosapic_set_affinity (unsigned int irq, cpumask_t mask)
+iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
{
#ifdef CONFIG_SMP
u32 high32, low32;
- int dest, rte_index;
+ int cpu, dest, rte_index;
int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
struct iosapic_rte_info *rte;
struct iosapic *iosapic;
irq &= (~IA64_IRQ_REDIRECTED);
- cpus_and(mask, mask, cpu_online_map);
- if (cpus_empty(mask))
+ cpu = cpumask_first_and(cpu_online_mask, mask);
+ if (cpu >= nr_cpu_ids)
return;
- if (irq_prepare_move(irq, first_cpu(mask)))
+ if (irq_prepare_move(irq, cpu))
return;
- dest = cpu_physical_id(first_cpu(mask));
+ dest = cpu_physical_id(cpu);
if (!iosapic_intr_info[irq].count)
return; /* not an IOSAPIC interrupt */
@@ -695,32 +695,31 @@ get_target_cpu (unsigned int gsi, int irq)
#ifdef CONFIG_NUMA
{
int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
- cpumask_t cpu_mask;
+ const struct cpumask *cpu_mask;
iosapic_index = find_iosapic(gsi);
if (iosapic_index < 0 ||
iosapic_lists[iosapic_index].node == MAX_NUMNODES)
goto skip_numa_setup;
- cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
- cpus_and(cpu_mask, cpu_mask, domain);
- for_each_cpu_mask(numa_cpu, cpu_mask) {
- if (!cpu_online(numa_cpu))
- cpu_clear(numa_cpu, cpu_mask);
+ cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node);
+ num_cpus = 0;
+ for_each_cpu_and(numa_cpu, cpu_mask, &domain) {
+ if (cpu_online(numa_cpu))
+ num_cpus++;
}
- num_cpus = cpus_weight(cpu_mask);
-
if (!num_cpus)
goto skip_numa_setup;
/* Use irq assignment to distribute across cpus in node */
cpu_index = irq % num_cpus;
- for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
- numa_cpu = next_cpu(numa_cpu, cpu_mask);
+ for_each_cpu_and(numa_cpu, cpu_mask, &domain)
+ if (cpu_online(numa_cpu) && i++ >= cpu_index)
+ break;
- if (numa_cpu != NR_CPUS)
+ if (numa_cpu < nr_cpu_ids)
return cpu_physical_id(numa_cpu);
}
skip_numa_setup:
@@ -731,7 +730,7 @@ skip_numa_setup:
* case of NUMA.)
*/
do {
- if (++cpu >= NR_CPUS)
+ if (++cpu >= nr_cpu_ids)
cpu = 0;
} while (!cpu_online(cpu) || !cpu_isset(cpu, domain));
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 7fd18f54c056..95ff16cb05d8 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -112,11 +112,11 @@ void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
}
}
-bool is_affinity_mask_valid(cpumask_t cpumask)
+bool is_affinity_mask_valid(cpumask_var_t cpumask)
{
if (ia64_platform_is("sn2")) {
/* Only allow one CPU to be specified in the smp_affinity mask */
- if (cpus_weight(cpumask) != 1)
+ if (cpumask_weight(cpumask) != 1)
return false;
}
return true;
@@ -133,7 +133,6 @@ unsigned int vectors_in_migration[NR_IRQS];
*/
static void migrate_irqs(void)
{
- cpumask_t mask;
irq_desc_t *desc;
int irq, new_cpu;
@@ -152,15 +151,14 @@ static void migrate_irqs(void)
if (desc->status == IRQ_PER_CPU)
continue;
- cpus_and(mask, irq_desc[irq].affinity, cpu_online_map);
- if (any_online_cpu(mask) == NR_CPUS) {
+ if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask)
+ >= nr_cpu_ids) {
/*
* Save it for phase 2 processing
*/
vectors_in_migration[irq] = irq;
new_cpu = any_online_cpu(cpu_online_map);
- mask = cpumask_of_cpu(new_cpu);
/*
* Al three are essential, currently WARN_ON.. maybe panic?
@@ -168,7 +166,8 @@ static void migrate_irqs(void)
if (desc->chip && desc->chip->disable &&
desc->chip->enable && desc->chip->set_affinity) {
desc->chip->disable(irq);
- desc->chip->set_affinity(irq, mask);
+ desc->chip->set_affinity(irq,
+ cpumask_of(new_cpu));
desc->chip->enable(irq);
} else {
WARN_ON((!(desc->chip) || !(desc->chip->disable) ||
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 702a09c13238..890339339035 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -49,11 +49,12 @@
static struct irq_chip ia64_msi_chip;
#ifdef CONFIG_SMP
-static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
+static void ia64_set_msi_irq_affinity(unsigned int irq,
+ const cpumask_t *cpu_mask)
{
struct msi_msg msg;
u32 addr, data;
- int cpu = first_cpu(cpu_mask);
+ int cpu = first_cpu(*cpu_mask);
if (!cpu_online(cpu))
return;
@@ -166,12 +167,11 @@ void arch_teardown_msi_irq(unsigned int irq)
#ifdef CONFIG_DMAR
#ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_cfg *cfg = irq_cfg + irq;
struct msi_msg msg;
- int cpu = first_cpu(mask);
-
+ int cpu = cpumask_first(mask);
if (!cpu_online(cpu))
return;
@@ -187,7 +187,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
dmar_msi_write(irq, &msg);
- irq_desc[irq].affinity = mask;
+ irq_desc[irq].affinity = *mask;
}
#endif /* CONFIG_SMP */
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 1dcbb85fc4ee..11463994a7d5 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -131,12 +131,6 @@ struct task_struct *task_for_booting_cpu;
*/
DEFINE_PER_CPU(int, cpu_state);
-/* Bitmasks of currently online, and possible CPUs */
-cpumask_t cpu_online_map;
-EXPORT_SYMBOL(cpu_online_map);
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_possible_map);
-
cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_core_map);
DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map);
@@ -688,7 +682,7 @@ int migrate_platform_irqs(unsigned int cpu)
{
int new_cpei_cpu;
irq_desc_t *desc = NULL;
- cpumask_t mask;
+ const struct cpumask *mask;
int retval = 0;
/*
@@ -701,7 +695,7 @@ int migrate_platform_irqs(unsigned int cpu)
* Now re-target the CPEI to a different processor
*/
new_cpei_cpu = any_online_cpu(cpu_online_map);
- mask = cpumask_of_cpu(new_cpei_cpu);
+ mask = cpumask_of(new_cpei_cpu);
set_cpei_target_cpu(new_cpei_cpu);
desc = irq_desc + ia64_cpe_irq;
/*
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 65c10a42c88f..f0ebb342409d 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -93,13 +93,14 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
now = ia64_get_itc();
delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
- account_system_time(prev, 0, delta_stime);
- account_system_time_scaled(prev, delta_stime);
+ if (idle_task(smp_processor_id()) != prev)
+ account_system_time(prev, 0, delta_stime, delta_stime);
+ else
+ account_idle_time(delta_stime);
if (pi->ac_utime) {
delta_utime = cycle_to_cputime(pi->ac_utime);
- account_user_time(prev, delta_utime);
- account_user_time_scaled(prev, delta_utime);
+ account_user_time(prev, delta_utime, delta_utime);
}
pi->ac_stamp = ni->ac_stamp = now;
@@ -122,8 +123,10 @@ void account_system_vtime(struct task_struct *tsk)
now = ia64_get_itc();
delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
- account_system_time(tsk, 0, delta_stime);
- account_system_time_scaled(tsk, delta_stime);
+ if (irq_count() || idle_task(smp_processor_id()) != tsk)
+ account_system_time(tsk, 0, delta_stime, delta_stime);
+ else
+ account_idle_time(delta_stime);
ti->ac_stime = 0;
ti->ac_stamp = now;
@@ -143,8 +146,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
if (ti->ac_utime) {
delta_utime = cycle_to_cputime(ti->ac_utime);
- account_user_time(p, delta_utime);
- account_user_time_scaled(p, delta_utime);
+ account_user_time(p, delta_utime, delta_utime);
ti->ac_utime = 0;
}
}
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index c75b914f2d6b..a8d61a3e9a94 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -219,7 +219,7 @@ static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf)
cpumask_t shared_cpu_map;
cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map);
- len = cpumask_scnprintf(buf, NR_CPUS+1, shared_cpu_map);
+ len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map);
len += sprintf(buf+len, "\n");
return len;
}
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 92cef66ca268..0bb99b732908 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -51,8 +51,8 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o)
-ifeq ($(CONFIG_DMAR),y)
-common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
+ifeq ($(CONFIG_IOMMU_API),y)
+common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
endif
kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
@@ -60,7 +60,7 @@ obj-$(CONFIG_KVM) += kvm.o
CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
- vtlb.o process.o
+ vtlb.o process.o kvm_lib.o
#Add link memcpy and memset to avoid possible structure assignment error
kvm-intel-objs += memcpy.o memset.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
index 4e3dc13a619c..0c3564a7a033 100644
--- a/arch/ia64/kvm/asm-offsets.c
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -24,19 +24,10 @@
#include <linux/autoconf.h>
#include <linux/kvm_host.h>
+#include <linux/kbuild.h>
#include "vcpu.h"
-#define task_struct kvm_vcpu
-
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " (%0) " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : :)
-
-#define OFFSET(_sym, _str, _mem) \
- DEFINE(_sym, offsetof(_str, _mem));
-
void foo(void)
{
DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index af1464f7a6ad..4e586f6110aa 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -31,6 +31,7 @@
#include <linux/bitops.h>
#include <linux/hrtimer.h>
#include <linux/uaccess.h>
+#include <linux/iommu.h>
#include <linux/intel-iommu.h>
#include <asm/pgtable.h>
@@ -180,7 +181,6 @@ int kvm_dev_ioctl_check_extension(long ext)
switch (ext) {
case KVM_CAP_IRQCHIP:
- case KVM_CAP_USER_MEMORY:
case KVM_CAP_MP_STATE:
r = 1;
@@ -189,7 +189,7 @@ int kvm_dev_ioctl_check_extension(long ext)
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
case KVM_CAP_IOMMU:
- r = intel_iommu_found();
+ r = iommu_found();
break;
default:
r = 0;
@@ -439,7 +439,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
expires = div64_u64(itc_diff, cyc_per_usec);
kt = ktime_set(0, 1000 * expires);
- down_read(&vcpu->kvm->slots_lock);
vcpu->arch.ht_active = 1;
hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
@@ -452,7 +451,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
vcpu->arch.mp_state =
KVM_MP_STATE_RUNNABLE;
- up_read(&vcpu->kvm->slots_lock);
if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
return -EINTR;
@@ -476,6 +474,13 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
return 1;
}
+static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run)
+{
+ printk("VMM: %s", vcpu->arch.log_buf);
+ return 1;
+}
+
static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run) = {
[EXIT_REASON_VM_PANIC] = handle_vm_error,
@@ -487,6 +492,7 @@ static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
[EXIT_REASON_IPI] = handle_ipi,
[EXIT_REASON_PTC_G] = handle_global_purge,
+ [EXIT_REASON_DEBUG] = handle_vcpu_debug,
};
@@ -698,27 +704,24 @@ out:
return r;
}
-/*
- * Allocate 16M memory for every vm to hold its specific data.
- * Its memory map is defined in kvm_host.h.
- */
static struct kvm *kvm_alloc_kvm(void)
{
struct kvm *kvm;
uint64_t vm_base;
+ BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
+
vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
if (!vm_base)
return ERR_PTR(-ENOMEM);
- printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base);
- /* Zero all pages before use! */
memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-
- kvm = (struct kvm *)(vm_base + KVM_VM_OFS);
+ kvm = (struct kvm *)(vm_base +
+ offsetof(struct kvm_vm_data, kvm_vm_struct));
kvm->arch.vm_base = vm_base;
+ printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
return kvm;
}
@@ -760,21 +763,12 @@ static void kvm_build_io_pmt(struct kvm *kvm)
static void kvm_init_vm(struct kvm *kvm)
{
- long vm_base;
-
BUG_ON(!kvm);
kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
kvm->arch.vmm_init_rr = VMM_INIT_RR;
- vm_base = kvm->arch.vm_base;
- if (vm_base) {
- kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS;
- kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS;
- kvm->arch.vpd_base = vm_base + KVM_VPD_OFS;
- }
-
/*
*Fill P2M entries for MMIO/IO ranges
*/
@@ -838,9 +832,8 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
- int i;
struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
- int r;
+ int i;
vcpu_load(vcpu);
@@ -857,18 +850,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vpd->vpr = regs->vpd.vpr;
- r = -EFAULT;
- r = copy_from_user(&vcpu->arch.guest, regs->saved_guest,
- sizeof(union context));
- if (r)
- goto out;
- r = copy_from_user(vcpu + 1, regs->saved_stack +
- sizeof(struct kvm_vcpu),
- IA64_STK_OFFSET - sizeof(struct kvm_vcpu));
- if (r)
- goto out;
- vcpu->arch.exit_data =
- ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data;
+ memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
RESTORE_REGS(mp_state);
RESTORE_REGS(vmm_rr);
@@ -902,9 +884,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
set_bit(KVM_REQ_RESUME, &vcpu->requests);
vcpu_put(vcpu);
- r = 0;
-out:
- return r;
+
+ return 0;
}
long kvm_arch_vm_ioctl(struct file *filp,
@@ -1166,10 +1147,11 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
/*Set entry address for first run.*/
regs->cr_iip = PALE_RESET_ENTRY;
- /*Initilize itc offset for vcpus*/
+ /*Initialize itc offset for vcpus*/
itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
- for (i = 0; i < MAX_VCPU_NUM; i++) {
- v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
+ for (i = 0; i < KVM_MAX_VCPUS; i++) {
+ v = (struct kvm_vcpu *)((char *)vcpu +
+ sizeof(struct kvm_vcpu_data) * i);
v->arch.itc_offset = itc_offset;
v->arch.last_itc = 0;
}
@@ -1183,7 +1165,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.apic->vcpu = vcpu;
p_ctx->gr[1] = 0;
- p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET);
+ p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
p_ctx->gr[13] = (unsigned long)vmm_vcpu;
p_ctx->psr = 0x1008522000UL;
p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
@@ -1218,12 +1200,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.hlt_timer.function = hlt_timer_fn;
vcpu->arch.last_run_cpu = -1;
- vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id);
+ vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
vcpu->arch.vsa_base = kvm_vsa_base;
vcpu->arch.__gp = kvm_vmm_gp;
vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
- vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id);
- vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id);
+ vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
+ vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
init_ptce_info(vcpu);
r = 0;
@@ -1273,12 +1255,22 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
int r;
int cpu;
+ BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
+
+ r = -EINVAL;
+ if (id >= KVM_MAX_VCPUS) {
+ printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
+ KVM_MAX_VCPUS);
+ goto fail;
+ }
+
r = -ENOMEM;
if (!vm_base) {
printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
goto fail;
}
- vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id);
+ vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
+ vcpu_data[id].vcpu_struct));
vcpu->kvm = kvm;
cpu = get_cpu();
@@ -1374,9 +1366,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
- int i;
- int r;
struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+ int i;
+
vcpu_load(vcpu);
for (i = 0; i < 16; i++) {
@@ -1391,14 +1383,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->vpd.vpsr = vpd->vpsr;
regs->vpd.vpr = vpd->vpr;
- r = -EFAULT;
- r = copy_to_user(regs->saved_guest, &vcpu->arch.guest,
- sizeof(union context));
- if (r)
- goto out;
- r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET);
- if (r)
- goto out;
+ memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
+
SAVE_REGS(mp_state);
SAVE_REGS(vmm_rr);
memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
@@ -1426,10 +1412,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
SAVE_REGS(metaphysical_saved_rr4);
SAVE_REGS(fp_psr);
SAVE_REGS(saved_gp);
+
vcpu_put(vcpu);
- r = 0;
-out:
- return r;
+ return 0;
}
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
@@ -1457,6 +1442,9 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
unsigned long base_gfn = memslot->base_gfn;
+ if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
+ return -ENOMEM;
+
for (i = 0; i < npages; i++) {
pfn = gfn_to_pfn(kvm, base_gfn + i);
if (!kvm_is_mmio_pfn(pfn)) {
@@ -1631,8 +1619,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
struct kvm_memory_slot *memslot;
int r, i;
long n, base;
- unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS
- + KVM_MEM_DIRTY_LOG_OFS);
+ unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
+ offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
r = -EINVAL;
if (log->slot >= KVM_MEMORY_SLOTS)
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
new file mode 100644
index 000000000000..a85cb611ecd7
--- /dev/null
+++ b/arch/ia64/kvm/kvm_lib.c
@@ -0,0 +1,15 @@
+/*
+ * kvm_lib.c: Compile some libraries for kvm-intel module.
+ *
+ * Just include kernel's library, and disable symbols export.
+ * Copyright (C) 2008, Intel Corporation.
+ * Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#undef CONFIG_MODULES
+#include "../../../lib/vsprintf.c"
+#include "../../../lib/ctype.c"
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
index 2cc41d17cf99..b2bcaa2787aa 100644
--- a/arch/ia64/kvm/kvm_minstate.h
+++ b/arch/ia64/kvm/kvm_minstate.h
@@ -24,6 +24,8 @@
#include <asm/asmmacro.h>
#include <asm/types.h>
#include <asm/kregs.h>
+#include <asm/kvm_host.h>
+
#include "asm-offsets.h"
#define KVM_MINSTATE_START_SAVE_MIN \
@@ -33,7 +35,7 @@
addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \
;; \
lfetch.fault.excl.nt1 [r22]; \
- addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
+ addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1; \
mov r23 = ar.bspstore; /* save ar.bspstore */ \
;; \
mov ar.bspstore = r22; /* switch to kernel RBS */\
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
index e585c4607344..dd979e00b574 100644
--- a/arch/ia64/kvm/misc.h
+++ b/arch/ia64/kvm/misc.h
@@ -27,7 +27,8 @@
*/
static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
{
- return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS);
+ return (uint64_t *)(kvm->arch.vm_base +
+ offsetof(struct kvm_vm_data, kvm_p2m));
}
static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
index 7f1a858bc69f..21f63fffc379 100644
--- a/arch/ia64/kvm/mmio.c
+++ b/arch/ia64/kvm/mmio.c
@@ -66,31 +66,25 @@ void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
switch (addr) {
case PIB_OFST_INTA:
- /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/
- panic_vm(v);
+ panic_vm(v, "Undefined write on PIB INTA\n");
break;
case PIB_OFST_XTP:
if (length == 1) {
vlsapic_write_xtp(v, val);
} else {
- /*panic_domain(NULL,
- "Undefined write on PIB XTP\n");*/
- panic_vm(v);
+ panic_vm(v, "Undefined write on PIB XTP\n");
}
break;
default:
if (PIB_LOW_HALF(addr)) {
- /*lower half */
+ /*Lower half */
if (length != 8)
- /*panic_domain(NULL,
- "Can't LHF write with size %ld!\n",
- length);*/
- panic_vm(v);
+ panic_vm(v, "Can't LHF write with size %ld!\n",
+ length);
else
vlsapic_write_ipi(v, addr, val);
- } else { /* upper half
- printk("IPI-UHF write %lx\n",addr);*/
- panic_vm(v);
+ } else { /*Upper half */
+ panic_vm(v, "IPI-UHF write %lx\n", addr);
}
break;
}
@@ -108,22 +102,18 @@ unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
if (length == 1) /* 1 byte load */
; /* There is no i8259, there is no INTA access*/
else
- /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */
- panic_vm(v);
+ panic_vm(v, "Undefined read on PIB INTA\n");
break;
case PIB_OFST_XTP:
if (length == 1) {
result = VLSAPIC_XTP(v);
- /* printk("read xtp %lx\n", result); */
} else {
- /*panic_domain(NULL,
- "Undefined read on PIB XTP\n");*/
- panic_vm(v);
+ panic_vm(v, "Undefined read on PIB XTP\n");
}
break;
default:
- panic_vm(v);
+ panic_vm(v, "Undefined addr access for lsapic!\n");
break;
}
return result;
@@ -162,7 +152,7 @@ static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
/* it's necessary to ensure zero extending */
*dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
} else
- panic_vm(vcpu);
+ panic_vm(vcpu, "Unhandled mmio access returned!\n");
out:
local_irq_restore(psr);
return ;
@@ -324,7 +314,9 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
return;
} else {
inst_type = -1;
- panic_vm(vcpu);
+ panic_vm(vcpu, "Unsupported MMIO access instruction! \
+ Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
+ bundle.i64[0], bundle.i64[1]);
}
size = 1 << size;
@@ -335,7 +327,7 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
if (inst_type == SL_INTEGER)
vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
else
- panic_vm(vcpu);
+ panic_vm(vcpu, "Unsupported instruction type!\n");
}
vcpu_increment_iip(vcpu);
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index 800817307b7b..552d07724207 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -527,7 +527,8 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
vector = vec2off[vec];
if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
- panic_vm(vcpu);
+ panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
+ "with psr.ic = 0\n", vector);
return;
}
@@ -586,7 +587,7 @@ static void set_pal_call_result(struct kvm_vcpu *vcpu)
vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
} else
- panic_vm(vcpu);
+ panic_vm(vcpu, "Mis-set for exit reason!\n");
}
static void set_sal_call_data(struct kvm_vcpu *vcpu)
@@ -614,7 +615,7 @@ static void set_sal_call_result(struct kvm_vcpu *vcpu)
vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
} else
- panic_vm(vcpu);
+ panic_vm(vcpu, "Mis-set for exit reason!\n");
}
void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
@@ -680,7 +681,7 @@ static void generate_exirq(struct kvm_vcpu *vcpu)
vpsr = VCPU(vcpu, vpsr);
isr = vpsr & IA64_PSR_RI;
if (!(vpsr & IA64_PSR_IC))
- panic_vm(vcpu);
+ panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
}
@@ -941,8 +942,20 @@ static void vcpu_do_resume(struct kvm_vcpu *vcpu)
ia64_set_pta(vcpu->arch.vhpt.pta.val);
}
+static void vmm_sanity_check(struct kvm_vcpu *vcpu)
+{
+ struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+ if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
+ panic_vm(vcpu, "Failed to do vmm sanity check,"
+ "it maybe caused by crashed vmm!!\n\n");
+ }
+}
+
static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
{
+ vmm_sanity_check(vcpu); /*Guarantee vcpu runing on healthy vmm!*/
+
if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
vcpu_do_resume(vcpu);
return;
@@ -968,3 +981,11 @@ void vmm_transition(struct kvm_vcpu *vcpu)
1, 0, 0, 0, 0, 0);
kvm_do_resume_op(vcpu);
}
+
+void vmm_panic_handler(u64 vec)
+{
+ struct kvm_vcpu *vcpu = current_vcpu;
+ vmm_sanity = 0;
+ panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
+ vec2off[vec]);
+}
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index e44027ce5667..ecd526b55323 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -816,8 +816,9 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
unsigned long vitv = VCPU(vcpu, itv);
if (vcpu->vcpu_id == 0) {
- for (i = 0; i < MAX_VCPU_NUM; i++) {
- v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
+ for (i = 0; i < KVM_MAX_VCPUS; i++) {
+ v = (struct kvm_vcpu *)((char *)vcpu +
+ sizeof(struct kvm_vcpu_data) * i);
VMX(v, itc_offset) = itc_offset;
VMX(v, last_itc) = 0;
}
@@ -1650,7 +1651,8 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
* Otherwise panic
*/
if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
- panic_vm(vcpu);
+ panic_vm(vcpu, "Only support guests with vpsr.pk =0 \
+ & vpsr.is=0\n");
/*
* For those IA64_PSR bits: id/da/dd/ss/ed/ia
@@ -2103,7 +2105,7 @@ void kvm_init_all_rr(struct kvm_vcpu *vcpu)
if (is_physical_mode(vcpu)) {
if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
- panic_vm(vcpu);
+ panic_vm(vcpu, "Machine Status conflicts!\n");
ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
ia64_dv_serialize_data();
@@ -2152,10 +2154,70 @@ int vmm_entry(void)
return 0;
}
-void panic_vm(struct kvm_vcpu *v)
-{
+static void kvm_show_registers(struct kvm_pt_regs *regs)
+{
+ unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+
+ struct kvm_vcpu *vcpu = current_vcpu;
+ if (vcpu != NULL)
+ printk("vcpu 0x%p vcpu %d\n",
+ vcpu, vcpu->vcpu_id);
+
+ printk("psr : %016lx ifs : %016lx ip : [<%016lx>]\n",
+ regs->cr_ipsr, regs->cr_ifs, ip);
+
+ printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
+ regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
+ printk("rnat: %016lx bspstore: %016lx pr : %016lx\n",
+ regs->ar_rnat, regs->ar_bspstore, regs->pr);
+ printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
+ regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
+ printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
+ printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0,
+ regs->b6, regs->b7);
+ printk("f6 : %05lx%016lx f7 : %05lx%016lx\n",
+ regs->f6.u.bits[1], regs->f6.u.bits[0],
+ regs->f7.u.bits[1], regs->f7.u.bits[0]);
+ printk("f8 : %05lx%016lx f9 : %05lx%016lx\n",
+ regs->f8.u.bits[1], regs->f8.u.bits[0],
+ regs->f9.u.bits[1], regs->f9.u.bits[0]);
+ printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
+ regs->f10.u.bits[1], regs->f10.u.bits[0],
+ regs->f11.u.bits[1], regs->f11.u.bits[0]);
+
+ printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1,
+ regs->r2, regs->r3);
+ printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8,
+ regs->r9, regs->r10);
+ printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
+ regs->r12, regs->r13);
+ printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
+ regs->r15, regs->r16);
+ printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
+ regs->r18, regs->r19);
+ printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
+ regs->r21, regs->r22);
+ printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
+ regs->r24, regs->r25);
+ printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
+ regs->r27, regs->r28);
+ printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
+ regs->r30, regs->r31);
+
+}
+
+void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
+{
+ va_list args;
+ char buf[256];
+
+ struct kvm_pt_regs *regs = vcpu_regs(v);
struct exit_ctl_data *p = &v->arch.exit_data;
-
+ va_start(args, fmt);
+ vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+ printk(buf);
+ kvm_show_registers(regs);
p->exit_reason = EXIT_REASON_VM_PANIC;
vmm_transition(v);
/*Never to return*/
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
index e9b2a4e121c0..b2f12a562bdf 100644
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -737,9 +737,12 @@ void kvm_init_vtlb(struct kvm_vcpu *v);
void kvm_init_vhpt(struct kvm_vcpu *v);
void thash_init(struct thash_cb *hcb, u64 sz);
-void panic_vm(struct kvm_vcpu *v);
+void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
u64 arg4, u64 arg5, u64 arg6, u64 arg7);
+
+extern long vmm_sanity;
+
#endif
#endif /* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
index 2275bf4e681a..9eee5c04bacc 100644
--- a/arch/ia64/kvm/vmm.c
+++ b/arch/ia64/kvm/vmm.c
@@ -20,6 +20,7 @@
*/
+#include<linux/kernel.h>
#include<linux/module.h>
#include<asm/fpswa.h>
@@ -31,6 +32,8 @@ MODULE_LICENSE("GPL");
extern char kvm_ia64_ivt;
extern fpswa_interface_t *vmm_fpswa_interface;
+long vmm_sanity = 1;
+
struct kvm_vmm_info vmm_info = {
.module = THIS_MODULE,
.vmm_entry = vmm_entry,
@@ -62,5 +65,31 @@ void vmm_spin_unlock(spinlock_t *lock)
{
_vmm_raw_spin_unlock(lock);
}
+
+static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
+{
+ struct exit_ctl_data *p = &vcpu->arch.exit_data;
+ long psr;
+
+ local_irq_save(psr);
+ p->exit_reason = EXIT_REASON_DEBUG;
+ vmm_transition(vcpu);
+ local_irq_restore(psr);
+}
+
+asmlinkage int printk(const char *fmt, ...)
+{
+ struct kvm_vcpu *vcpu = current_vcpu;
+ va_list args;
+ int r;
+
+ memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
+ va_start(args, fmt);
+ r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
+ va_end(args);
+ vcpu_debug_exit(vcpu);
+ return r;
+}
+
module_init(kvm_vmm_init)
module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
index c1d7251a1480..3ef1a017a318 100644
--- a/arch/ia64/kvm/vmm_ivt.S
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -1,5 +1,5 @@
/*
- * /ia64/kvm_ivt.S
+ * arch/ia64/kvm/vmm_ivt.S
*
* Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
* Stephane Eranian <eranian@hpl.hp.com>
@@ -70,32 +70,39 @@
# define PSR_DEFAULT_BITS 0
#endif
-
#define KVM_FAULT(n) \
- kvm_fault_##n:; \
- mov r19=n;; \
- br.sptk.many kvm_fault_##n; \
- ;; \
-
+ kvm_fault_##n:; \
+ mov r19=n;; \
+ br.sptk.many kvm_vmm_panic; \
+ ;; \
#define KVM_REFLECT(n) \
- mov r31=pr; \
- mov r19=n; /* prepare to save predicates */ \
- mov r29=cr.ipsr; \
- ;; \
- tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \
-(p7)br.sptk.many kvm_dispatch_reflection; \
- br.sptk.many kvm_panic; \
-
-
-GLOBAL_ENTRY(kvm_panic)
- br.sptk.many kvm_panic
- ;;
-END(kvm_panic)
-
-
-
-
+ mov r31=pr; \
+ mov r19=n; /* prepare to save predicates */ \
+ mov r29=cr.ipsr; \
+ ;; \
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \
+(p7) br.sptk.many kvm_dispatch_reflection; \
+ br.sptk.many kvm_vmm_panic; \
+
+GLOBAL_ENTRY(kvm_vmm_panic)
+ KVM_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,1,0
+ mov out0=r15
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ KVM_SAVE_REST
+ mov rp=r14
+ ;;
+ br.call.sptk.many b6=vmm_panic_handler;
+END(kvm_vmm_panic)
.section .text.ivt,"ax"
@@ -105,308 +112,307 @@ kvm_ia64_ivt:
///////////////////////////////////////////////////////////////
// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
ENTRY(kvm_vhpt_miss)
- KVM_FAULT(0)
+ KVM_FAULT(0)
END(kvm_vhpt_miss)
-
.org kvm_ia64_ivt+0x400
////////////////////////////////////////////////////////////////
// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
ENTRY(kvm_itlb_miss)
- mov r31 = pr
- mov r29=cr.ipsr;
- ;;
- tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
- (p6) br.sptk kvm_alt_itlb_miss
- mov r19 = 1
- br.sptk kvm_itlb_miss_dispatch
- KVM_FAULT(1);
+ mov r31 = pr
+ mov r29=cr.ipsr;
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6) br.sptk kvm_alt_itlb_miss
+ mov r19 = 1
+ br.sptk kvm_itlb_miss_dispatch
+ KVM_FAULT(1);
END(kvm_itlb_miss)
.org kvm_ia64_ivt+0x0800
//////////////////////////////////////////////////////////////////
// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
ENTRY(kvm_dtlb_miss)
- mov r31 = pr
- mov r29=cr.ipsr;
- ;;
- tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)br.sptk kvm_alt_dtlb_miss
- br.sptk kvm_dtlb_miss_dispatch
+ mov r31 = pr
+ mov r29=cr.ipsr;
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6) br.sptk kvm_alt_dtlb_miss
+ br.sptk kvm_dtlb_miss_dispatch
END(kvm_dtlb_miss)
.org kvm_ia64_ivt+0x0c00
////////////////////////////////////////////////////////////////////
// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
ENTRY(kvm_alt_itlb_miss)
- mov r16=cr.ifa // get address that caused the TLB miss
- ;;
- movl r17=PAGE_KERNEL
- mov r24=cr.ipsr
- movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
- ;;
- and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
- ;;
- or r19=r17,r19 // insert PTE control bits into r19
- ;;
- movl r20=IA64_GRANULE_SHIFT<<2
- ;;
- mov cr.itir=r20
- ;;
- itc.i r19 // insert the TLB entry
- mov pr=r31,-1
- rfi
+ mov r16=cr.ifa // get address that caused the TLB miss
+ ;;
+ movl r17=PAGE_KERNEL
+ mov r24=cr.ipsr
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ ;;
+ and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
+ ;;
+ or r19=r17,r19 // insert PTE control bits into r19
+ ;;
+ movl r20=IA64_GRANULE_SHIFT<<2
+ ;;
+ mov cr.itir=r20
+ ;;
+ itc.i r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
END(kvm_alt_itlb_miss)
.org kvm_ia64_ivt+0x1000
/////////////////////////////////////////////////////////////////////
// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
ENTRY(kvm_alt_dtlb_miss)
- mov r16=cr.ifa // get address that caused the TLB miss
- ;;
- movl r17=PAGE_KERNEL
- movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
- mov r24=cr.ipsr
- ;;
- and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
- ;;
- or r19=r19,r17 // insert PTE control bits into r19
- ;;
- movl r20=IA64_GRANULE_SHIFT<<2
- ;;
- mov cr.itir=r20
- ;;
- itc.d r19 // insert the TLB entry
- mov pr=r31,-1
- rfi
+ mov r16=cr.ifa // get address that caused the TLB miss
+ ;;
+ movl r17=PAGE_KERNEL
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ mov r24=cr.ipsr
+ ;;
+ and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
+ ;;
+ or r19=r19,r17 // insert PTE control bits into r19
+ ;;
+ movl r20=IA64_GRANULE_SHIFT<<2
+ ;;
+ mov cr.itir=r20
+ ;;
+ itc.d r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
END(kvm_alt_dtlb_miss)
.org kvm_ia64_ivt+0x1400
//////////////////////////////////////////////////////////////////////
// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
ENTRY(kvm_nested_dtlb_miss)
- KVM_FAULT(5)
+ KVM_FAULT(5)
END(kvm_nested_dtlb_miss)
.org kvm_ia64_ivt+0x1800
/////////////////////////////////////////////////////////////////////
// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
ENTRY(kvm_ikey_miss)
- KVM_REFLECT(6)
+ KVM_REFLECT(6)
END(kvm_ikey_miss)
.org kvm_ia64_ivt+0x1c00
/////////////////////////////////////////////////////////////////////
// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
ENTRY(kvm_dkey_miss)
- KVM_REFLECT(7)
+ KVM_REFLECT(7)
END(kvm_dkey_miss)
.org kvm_ia64_ivt+0x2000
////////////////////////////////////////////////////////////////////
// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
ENTRY(kvm_dirty_bit)
- KVM_REFLECT(8)
+ KVM_REFLECT(8)
END(kvm_dirty_bit)
.org kvm_ia64_ivt+0x2400
////////////////////////////////////////////////////////////////////
// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
ENTRY(kvm_iaccess_bit)
- KVM_REFLECT(9)
+ KVM_REFLECT(9)
END(kvm_iaccess_bit)
.org kvm_ia64_ivt+0x2800
///////////////////////////////////////////////////////////////////
// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
ENTRY(kvm_daccess_bit)
- KVM_REFLECT(10)
+ KVM_REFLECT(10)
END(kvm_daccess_bit)
.org kvm_ia64_ivt+0x2c00
/////////////////////////////////////////////////////////////////
// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
ENTRY(kvm_break_fault)
- mov r31=pr
- mov r19=11
- mov r29=cr.ipsr
- ;;
- KVM_SAVE_MIN_WITH_COVER_R19
- ;;
- alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
- mov out0=cr.ifa
- mov out2=cr.isr // FIXME: pity to make this slow access twice
- mov out3=cr.iim // FIXME: pity to make this slow access twice
- adds r3=8,r2 // set up second base pointer
- ;;
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- //(p15)ssm psr.i // restore psr.i
- addl r14=@gprel(ia64_leave_hypervisor),gp
- ;;
- KVM_SAVE_REST
- mov rp=r14
- ;;
- adds out1=16,sp
- br.call.sptk.many b6=kvm_ia64_handle_break
- ;;
+ mov r31=pr
+ mov r19=11
+ mov r29=cr.ipsr
+ ;;
+ KVM_SAVE_MIN_WITH_COVER_R19
+ ;;
+ alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
+ mov out0=cr.ifa
+ mov out2=cr.isr // FIXME: pity to make this slow access twice
+ mov out3=cr.iim // FIXME: pity to make this slow access twice
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15)ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ KVM_SAVE_REST
+ mov rp=r14
+ ;;
+ adds out1=16,sp
+ br.call.sptk.many b6=kvm_ia64_handle_break
+ ;;
END(kvm_break_fault)
.org kvm_ia64_ivt+0x3000
/////////////////////////////////////////////////////////////////
// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
ENTRY(kvm_interrupt)
- mov r31=pr // prepare to save predicates
- mov r19=12
- mov r29=cr.ipsr
- ;;
- tbit.z p6,p7=r29,IA64_PSR_VM_BIT
- tbit.z p0,p15=r29,IA64_PSR_I_BIT
- ;;
-(p7) br.sptk kvm_dispatch_interrupt
- ;;
- mov r27=ar.rsc /* M */
- mov r20=r1 /* A */
- mov r25=ar.unat /* M */
- mov r26=ar.pfs /* I */
- mov r28=cr.iip /* M */
- cover /* B (or nothing) */
- ;;
- mov r1=sp
- ;;
- invala /* M */
- mov r30=cr.ifs
- ;;
- addl r1=-VMM_PT_REGS_SIZE,r1
- ;;
- adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */
- adds r16=PT(CR_IPSR),r1
- ;;
- lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
- st8 [r16]=r29 /* save cr.ipsr */
- ;;
- lfetch.fault.excl.nt1 [r17]
- mov r29=b0
- ;;
- adds r16=PT(R8),r1 /* initialize first base pointer */
- adds r17=PT(R9),r1 /* initialize second base pointer */
- mov r18=r0 /* make sure r18 isn't NaT */
- ;;
+ mov r31=pr // prepare to save predicates
+ mov r19=12
+ mov r29=cr.ipsr
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT
+ tbit.z p0,p15=r29,IA64_PSR_I_BIT
+ ;;
+(p7) br.sptk kvm_dispatch_interrupt
+ ;;
+ mov r27=ar.rsc /* M */
+ mov r20=r1 /* A */
+ mov r25=ar.unat /* M */
+ mov r26=ar.pfs /* I */
+ mov r28=cr.iip /* M */
+ cover /* B (or nothing) */
+ ;;
+ mov r1=sp
+ ;;
+ invala /* M */
+ mov r30=cr.ifs
+ ;;
+ addl r1=-VMM_PT_REGS_SIZE,r1
+ ;;
+ adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */
+ adds r16=PT(CR_IPSR),r1
+ ;;
+ lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
+ st8 [r16]=r29 /* save cr.ipsr */
+ ;;
+ lfetch.fault.excl.nt1 [r17]
+ mov r29=b0
+ ;;
+ adds r16=PT(R8),r1 /* initialize first base pointer */
+ adds r17=PT(R9),r1 /* initialize second base pointer */
+ mov r18=r0 /* make sure r18 isn't NaT */
+ ;;
.mem.offset 0,0; st8.spill [r16]=r8,16
.mem.offset 8,0; st8.spill [r17]=r9,16
;;
.mem.offset 0,0; st8.spill [r16]=r10,24
.mem.offset 8,0; st8.spill [r17]=r11,24
;;
- st8 [r16]=r28,16 /* save cr.iip */
- st8 [r17]=r30,16 /* save cr.ifs */
- mov r8=ar.fpsr /* M */
- mov r9=ar.csd
- mov r10=ar.ssd
- movl r11=FPSR_DEFAULT /* L-unit */
- ;;
- st8 [r16]=r25,16 /* save ar.unat */
- st8 [r17]=r26,16 /* save ar.pfs */
- shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */
- ;;
- st8 [r16]=r27,16 /* save ar.rsc */
- adds r17=16,r17 /* skip over ar_rnat field */
- ;;
- st8 [r17]=r31,16 /* save predicates */
- adds r16=16,r16 /* skip over ar_bspstore field */
- ;;
- st8 [r16]=r29,16 /* save b0 */
- st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */
- ;;
+ st8 [r16]=r28,16 /* save cr.iip */
+ st8 [r17]=r30,16 /* save cr.ifs */
+ mov r8=ar.fpsr /* M */
+ mov r9=ar.csd
+ mov r10=ar.ssd
+ movl r11=FPSR_DEFAULT /* L-unit */
+ ;;
+ st8 [r16]=r25,16 /* save ar.unat */
+ st8 [r17]=r26,16 /* save ar.pfs */
+ shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */
+ ;;
+ st8 [r16]=r27,16 /* save ar.rsc */
+ adds r17=16,r17 /* skip over ar_rnat field */
+ ;;
+ st8 [r17]=r31,16 /* save predicates */
+ adds r16=16,r16 /* skip over ar_bspstore field */
+ ;;
+ st8 [r16]=r29,16 /* save b0 */
+ st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */
+ ;;
.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */
.mem.offset 8,0; st8.spill [r17]=r12,16
- adds r12=-16,r1
- /* switch to kernel memory stack (with 16 bytes of scratch) */
- ;;
+ adds r12=-16,r1
+ /* switch to kernel memory stack (with 16 bytes of scratch) */
+ ;;
.mem.offset 0,0; st8.spill [r16]=r13,16
.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
- ;;
+ ;;
.mem.offset 0,0; st8.spill [r16]=r15,16
.mem.offset 8,0; st8.spill [r17]=r14,16
- dep r14=-1,r0,60,4
- ;;
+ dep r14=-1,r0,60,4
+ ;;
.mem.offset 0,0; st8.spill [r16]=r2,16
.mem.offset 8,0; st8.spill [r17]=r3,16
- adds r2=VMM_PT_REGS_R16_OFFSET,r1
- adds r14 = VMM_VCPU_GP_OFFSET,r13
- ;;
- mov r8=ar.ccv
- ld8 r14 = [r14]
- ;;
- mov r1=r14 /* establish kernel global pointer */
- ;; \
- bsw.1
- ;;
- alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
- mov out0=r13
- ;;
- ssm psr.ic
- ;;
- srlz.i
- ;;
- //(p15) ssm psr.i
- adds r3=8,r2 // set up second base pointer for SAVE_REST
- srlz.i // ensure everybody knows psr.ic is back on
- ;;
+ adds r2=VMM_PT_REGS_R16_OFFSET,r1
+ adds r14 = VMM_VCPU_GP_OFFSET,r13
+ ;;
+ mov r8=ar.ccv
+ ld8 r14 = [r14]
+ ;;
+ mov r1=r14 /* establish kernel global pointer */
+ ;; \
+ bsw.1
+ ;;
+ alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+ mov out0=r13
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i
+ ;;
+ //(p15) ssm psr.i
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ srlz.i // ensure everybody knows psr.ic is back on
+ ;;
.mem.offset 0,0; st8.spill [r2]=r16,16
.mem.offset 8,0; st8.spill [r3]=r17,16
- ;;
+ ;;
.mem.offset 0,0; st8.spill [r2]=r18,16
.mem.offset 8,0; st8.spill [r3]=r19,16
- ;;
+ ;;
.mem.offset 0,0; st8.spill [r2]=r20,16
.mem.offset 8,0; st8.spill [r3]=r21,16
- mov r18=b6
- ;;
+ mov r18=b6
+ ;;
.mem.offset 0,0; st8.spill [r2]=r22,16
.mem.offset 8,0; st8.spill [r3]=r23,16
- mov r19=b7
- ;;
+ mov r19=b7
+ ;;
.mem.offset 0,0; st8.spill [r2]=r24,16
.mem.offset 8,0; st8.spill [r3]=r25,16
- ;;
+ ;;
.mem.offset 0,0; st8.spill [r2]=r26,16
.mem.offset 8,0; st8.spill [r3]=r27,16
- ;;
+ ;;
.mem.offset 0,0; st8.spill [r2]=r28,16
.mem.offset 8,0; st8.spill [r3]=r29,16
- ;;
+ ;;
.mem.offset 0,0; st8.spill [r2]=r30,16
.mem.offset 8,0; st8.spill [r3]=r31,32
- ;;
- mov ar.fpsr=r11 /* M-unit */
- st8 [r2]=r8,8 /* ar.ccv */
- adds r24=PT(B6)-PT(F7),r3
- ;;
- stf.spill [r2]=f6,32
- stf.spill [r3]=f7,32
- ;;
- stf.spill [r2]=f8,32
- stf.spill [r3]=f9,32
- ;;
- stf.spill [r2]=f10
- stf.spill [r3]=f11
- adds r25=PT(B7)-PT(F11),r3
- ;;
- st8 [r24]=r18,16 /* b6 */
- st8 [r25]=r19,16 /* b7 */
- ;;
- st8 [r24]=r9 /* ar.csd */
- st8 [r25]=r10 /* ar.ssd */
- ;;
- srlz.d // make sure we see the effect of cr.ivr
- addl r14=@gprel(ia64_leave_nested),gp
- ;;
- mov rp=r14
- br.call.sptk.many b6=kvm_ia64_handle_irq
- ;;
+ ;;
+ mov ar.fpsr=r11 /* M-unit */
+ st8 [r2]=r8,8 /* ar.ccv */
+ adds r24=PT(B6)-PT(F7),r3
+ ;;
+ stf.spill [r2]=f6,32
+ stf.spill [r3]=f7,32
+ ;;
+ stf.spill [r2]=f8,32
+ stf.spill [r3]=f9,32
+ ;;
+ stf.spill [r2]=f10
+ stf.spill [r3]=f11
+ adds r25=PT(B7)-PT(F11),r3
+ ;;
+ st8 [r24]=r18,16 /* b6 */
+ st8 [r25]=r19,16 /* b7 */
+ ;;
+ st8 [r24]=r9 /* ar.csd */
+ st8 [r25]=r10 /* ar.ssd */
+ ;;
+ srlz.d // make sure we see the effect of cr.ivr
+ addl r14=@gprel(ia64_leave_nested),gp
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=kvm_ia64_handle_irq
+ ;;
END(kvm_interrupt)
.global kvm_dispatch_vexirq
@@ -414,387 +420,385 @@ END(kvm_interrupt)
//////////////////////////////////////////////////////////////////////
// 0x3400 Entry 13 (size 64 bundles) Reserved
ENTRY(kvm_virtual_exirq)
- mov r31=pr
- mov r19=13
- mov r30 =r0
- ;;
+ mov r31=pr
+ mov r19=13
+ mov r30 =r0
+ ;;
kvm_dispatch_vexirq:
- cmp.eq p6,p0 = 1,r30
- ;;
-(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
- ;;
-(p6)ld8 r1 = [r29]
- ;;
- KVM_SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,1,0
- mov out0=r13
-
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- //(p15) ssm psr.i // restore psr.i
- adds r3=8,r2 // set up second base pointer
- ;;
- KVM_SAVE_REST
- addl r14=@gprel(ia64_leave_hypervisor),gp
- ;;
- mov rp=r14
- br.call.sptk.many b6=kvm_vexirq
+ cmp.eq p6,p0 = 1,r30
+ ;;
+(p6) add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
+ ;;
+(p6) ld8 r1 = [r29]
+ ;;
+ KVM_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,1,0
+ mov out0=r13
+
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ KVM_SAVE_REST
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=kvm_vexirq
END(kvm_virtual_exirq)
.org kvm_ia64_ivt+0x3800
/////////////////////////////////////////////////////////////////////
// 0x3800 Entry 14 (size 64 bundles) Reserved
- KVM_FAULT(14)
- // this code segment is from 2.6.16.13
-
+ KVM_FAULT(14)
+ // this code segment is from 2.6.16.13
.org kvm_ia64_ivt+0x3c00
///////////////////////////////////////////////////////////////////////
// 0x3c00 Entry 15 (size 64 bundles) Reserved
- KVM_FAULT(15)
-
+ KVM_FAULT(15)
.org kvm_ia64_ivt+0x4000
///////////////////////////////////////////////////////////////////////
// 0x4000 Entry 16 (size 64 bundles) Reserved
- KVM_FAULT(16)
+ KVM_FAULT(16)
.org kvm_ia64_ivt+0x4400
//////////////////////////////////////////////////////////////////////
// 0x4400 Entry 17 (size 64 bundles) Reserved
- KVM_FAULT(17)
+ KVM_FAULT(17)
.org kvm_ia64_ivt+0x4800
//////////////////////////////////////////////////////////////////////
// 0x4800 Entry 18 (size 64 bundles) Reserved
- KVM_FAULT(18)
+ KVM_FAULT(18)
.org kvm_ia64_ivt+0x4c00
//////////////////////////////////////////////////////////////////////
// 0x4c00 Entry 19 (size 64 bundles) Reserved
- KVM_FAULT(19)
+ KVM_FAULT(19)
.org kvm_ia64_ivt+0x5000
//////////////////////////////////////////////////////////////////////
// 0x5000 Entry 20 (size 16 bundles) Page Not Present
ENTRY(kvm_page_not_present)
- KVM_REFLECT(20)
+ KVM_REFLECT(20)
END(kvm_page_not_present)
.org kvm_ia64_ivt+0x5100
///////////////////////////////////////////////////////////////////////
// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
ENTRY(kvm_key_permission)
- KVM_REFLECT(21)
+ KVM_REFLECT(21)
END(kvm_key_permission)
.org kvm_ia64_ivt+0x5200
//////////////////////////////////////////////////////////////////////
// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
ENTRY(kvm_iaccess_rights)
- KVM_REFLECT(22)
+ KVM_REFLECT(22)
END(kvm_iaccess_rights)
.org kvm_ia64_ivt+0x5300
//////////////////////////////////////////////////////////////////////
// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
ENTRY(kvm_daccess_rights)
- KVM_REFLECT(23)
+ KVM_REFLECT(23)
END(kvm_daccess_rights)
.org kvm_ia64_ivt+0x5400
/////////////////////////////////////////////////////////////////////
// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
ENTRY(kvm_general_exception)
- KVM_REFLECT(24)
- KVM_FAULT(24)
+ KVM_REFLECT(24)
+ KVM_FAULT(24)
END(kvm_general_exception)
.org kvm_ia64_ivt+0x5500
//////////////////////////////////////////////////////////////////////
// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
ENTRY(kvm_disabled_fp_reg)
- KVM_REFLECT(25)
+ KVM_REFLECT(25)
END(kvm_disabled_fp_reg)
.org kvm_ia64_ivt+0x5600
////////////////////////////////////////////////////////////////////
// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
ENTRY(kvm_nat_consumption)
- KVM_REFLECT(26)
+ KVM_REFLECT(26)
END(kvm_nat_consumption)
.org kvm_ia64_ivt+0x5700
/////////////////////////////////////////////////////////////////////
// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
ENTRY(kvm_speculation_vector)
- KVM_REFLECT(27)
+ KVM_REFLECT(27)
END(kvm_speculation_vector)
.org kvm_ia64_ivt+0x5800
/////////////////////////////////////////////////////////////////////
// 0x5800 Entry 28 (size 16 bundles) Reserved
- KVM_FAULT(28)
+ KVM_FAULT(28)
.org kvm_ia64_ivt+0x5900
///////////////////////////////////////////////////////////////////
// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
ENTRY(kvm_debug_vector)
- KVM_FAULT(29)
+ KVM_FAULT(29)
END(kvm_debug_vector)
.org kvm_ia64_ivt+0x5a00
///////////////////////////////////////////////////////////////
// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
ENTRY(kvm_unaligned_access)
- KVM_REFLECT(30)
+ KVM_REFLECT(30)
END(kvm_unaligned_access)
.org kvm_ia64_ivt+0x5b00
//////////////////////////////////////////////////////////////////////
// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
ENTRY(kvm_unsupported_data_reference)
- KVM_REFLECT(31)
+ KVM_REFLECT(31)
END(kvm_unsupported_data_reference)
.org kvm_ia64_ivt+0x5c00
////////////////////////////////////////////////////////////////////
// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
ENTRY(kvm_floating_point_fault)
- KVM_REFLECT(32)
+ KVM_REFLECT(32)
END(kvm_floating_point_fault)
.org kvm_ia64_ivt+0x5d00
/////////////////////////////////////////////////////////////////////
// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
ENTRY(kvm_floating_point_trap)
- KVM_REFLECT(33)
+ KVM_REFLECT(33)
END(kvm_floating_point_trap)
.org kvm_ia64_ivt+0x5e00
//////////////////////////////////////////////////////////////////////
// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
ENTRY(kvm_lower_privilege_trap)
- KVM_REFLECT(34)
+ KVM_REFLECT(34)
END(kvm_lower_privilege_trap)
.org kvm_ia64_ivt+0x5f00
//////////////////////////////////////////////////////////////////////
// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
ENTRY(kvm_taken_branch_trap)
- KVM_REFLECT(35)
+ KVM_REFLECT(35)
END(kvm_taken_branch_trap)
.org kvm_ia64_ivt+0x6000
////////////////////////////////////////////////////////////////////
// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
ENTRY(kvm_single_step_trap)
- KVM_REFLECT(36)
+ KVM_REFLECT(36)
END(kvm_single_step_trap)
.global kvm_virtualization_fault_back
.org kvm_ia64_ivt+0x6100
/////////////////////////////////////////////////////////////////////
// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
ENTRY(kvm_virtualization_fault)
- mov r31=pr
- adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
- ;;
- st8 [r16] = r1
- adds r17 = VMM_VCPU_GP_OFFSET, r21
- ;;
- ld8 r1 = [r17]
- cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
- cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
- cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
- cmp.eq p9,p0=EVENT_RSM,r24
- cmp.eq p10,p0=EVENT_SSM,r24
- cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
- cmp.eq p12,p0=EVENT_THASH,r24
- (p6) br.dptk.many kvm_asm_mov_from_ar
- (p7) br.dptk.many kvm_asm_mov_from_rr
- (p8) br.dptk.many kvm_asm_mov_to_rr
- (p9) br.dptk.many kvm_asm_rsm
- (p10) br.dptk.many kvm_asm_ssm
- (p11) br.dptk.many kvm_asm_mov_to_psr
- (p12) br.dptk.many kvm_asm_thash
- ;;
+ mov r31=pr
+ adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+ ;;
+ st8 [r16] = r1
+ adds r17 = VMM_VCPU_GP_OFFSET, r21
+ ;;
+ ld8 r1 = [r17]
+ cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
+ cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
+ cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
+ cmp.eq p9,p0=EVENT_RSM,r24
+ cmp.eq p10,p0=EVENT_SSM,r24
+ cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
+ cmp.eq p12,p0=EVENT_THASH,r24
+(p6) br.dptk.many kvm_asm_mov_from_ar
+(p7) br.dptk.many kvm_asm_mov_from_rr
+(p8) br.dptk.many kvm_asm_mov_to_rr
+(p9) br.dptk.many kvm_asm_rsm
+(p10) br.dptk.many kvm_asm_ssm
+(p11) br.dptk.many kvm_asm_mov_to_psr
+(p12) br.dptk.many kvm_asm_thash
+ ;;
kvm_virtualization_fault_back:
- adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
- ;;
- ld8 r1 = [r16]
- ;;
- mov r19=37
- adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
- adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
- ;;
- st8 [r16] = r24
- st8 [r17] = r25
- ;;
- cmp.ne p6,p0=EVENT_RFI, r24
- (p6) br.sptk kvm_dispatch_virtualization_fault
- ;;
- adds r18=VMM_VPD_BASE_OFFSET,r21
- ;;
- ld8 r18=[r18]
- ;;
- adds r18=VMM_VPD_VIFS_OFFSET,r18
- ;;
- ld8 r18=[r18]
- ;;
- tbit.z p6,p0=r18,63
- (p6) br.sptk kvm_dispatch_virtualization_fault
- ;;
- //if vifs.v=1 desert current register frame
- alloc r18=ar.pfs,0,0,0,0
- br.sptk kvm_dispatch_virtualization_fault
+ adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+ ;;
+ ld8 r1 = [r16]
+ ;;
+ mov r19=37
+ adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+ adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+ ;;
+ st8 [r16] = r24
+ st8 [r17] = r25
+ ;;
+ cmp.ne p6,p0=EVENT_RFI, r24
+(p6) br.sptk kvm_dispatch_virtualization_fault
+ ;;
+ adds r18=VMM_VPD_BASE_OFFSET,r21
+ ;;
+ ld8 r18=[r18]
+ ;;
+ adds r18=VMM_VPD_VIFS_OFFSET,r18
+ ;;
+ ld8 r18=[r18]
+ ;;
+ tbit.z p6,p0=r18,63
+(p6) br.sptk kvm_dispatch_virtualization_fault
+ ;;
+//if vifs.v=1 desert current register frame
+ alloc r18=ar.pfs,0,0,0,0
+ br.sptk kvm_dispatch_virtualization_fault
END(kvm_virtualization_fault)
.org kvm_ia64_ivt+0x6200
//////////////////////////////////////////////////////////////
// 0x6200 Entry 38 (size 16 bundles) Reserved
- KVM_FAULT(38)
+ KVM_FAULT(38)
.org kvm_ia64_ivt+0x6300
/////////////////////////////////////////////////////////////////
// 0x6300 Entry 39 (size 16 bundles) Reserved
- KVM_FAULT(39)
+ KVM_FAULT(39)
.org kvm_ia64_ivt+0x6400
/////////////////////////////////////////////////////////////////
// 0x6400 Entry 40 (size 16 bundles) Reserved
- KVM_FAULT(40)
+ KVM_FAULT(40)
.org kvm_ia64_ivt+0x6500
//////////////////////////////////////////////////////////////////
// 0x6500 Entry 41 (size 16 bundles) Reserved
- KVM_FAULT(41)
+ KVM_FAULT(41)
.org kvm_ia64_ivt+0x6600
//////////////////////////////////////////////////////////////////
// 0x6600 Entry 42 (size 16 bundles) Reserved
- KVM_FAULT(42)
+ KVM_FAULT(42)
.org kvm_ia64_ivt+0x6700
//////////////////////////////////////////////////////////////////
// 0x6700 Entry 43 (size 16 bundles) Reserved
- KVM_FAULT(43)
+ KVM_FAULT(43)
.org kvm_ia64_ivt+0x6800
//////////////////////////////////////////////////////////////////
// 0x6800 Entry 44 (size 16 bundles) Reserved
- KVM_FAULT(44)
+ KVM_FAULT(44)
.org kvm_ia64_ivt+0x6900
///////////////////////////////////////////////////////////////////
// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
ENTRY(kvm_ia32_exception)
- KVM_FAULT(45)
+ KVM_FAULT(45)
END(kvm_ia32_exception)
.org kvm_ia64_ivt+0x6a00
////////////////////////////////////////////////////////////////////
// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
ENTRY(kvm_ia32_intercept)
- KVM_FAULT(47)
+ KVM_FAULT(47)
END(kvm_ia32_intercept)
.org kvm_ia64_ivt+0x6c00
/////////////////////////////////////////////////////////////////////
// 0x6c00 Entry 48 (size 16 bundles) Reserved
- KVM_FAULT(48)
+ KVM_FAULT(48)
.org kvm_ia64_ivt+0x6d00
//////////////////////////////////////////////////////////////////////
// 0x6d00 Entry 49 (size 16 bundles) Reserved
- KVM_FAULT(49)
+ KVM_FAULT(49)
.org kvm_ia64_ivt+0x6e00
//////////////////////////////////////////////////////////////////////
// 0x6e00 Entry 50 (size 16 bundles) Reserved
- KVM_FAULT(50)
+ KVM_FAULT(50)
.org kvm_ia64_ivt+0x6f00
/////////////////////////////////////////////////////////////////////
// 0x6f00 Entry 51 (size 16 bundles) Reserved
- KVM_FAULT(52)
+ KVM_FAULT(52)
.org kvm_ia64_ivt+0x7100
////////////////////////////////////////////////////////////////////
// 0x7100 Entry 53 (size 16 bundles) Reserved
- KVM_FAULT(53)
+ KVM_FAULT(53)
.org kvm_ia64_ivt+0x7200
/////////////////////////////////////////////////////////////////////
// 0x7200 Entry 54 (size 16 bundles) Reserved
- KVM_FAULT(54)
+ KVM_FAULT(54)
.org kvm_ia64_ivt+0x7300
////////////////////////////////////////////////////////////////////
// 0x7300 Entry 55 (size 16 bundles) Reserved
- KVM_FAULT(55)
+ KVM_FAULT(55)
.org kvm_ia64_ivt+0x7400
////////////////////////////////////////////////////////////////////
// 0x7400 Entry 56 (size 16 bundles) Reserved
- KVM_FAULT(56)
+ KVM_FAULT(56)
.org kvm_ia64_ivt+0x7500
/////////////////////////////////////////////////////////////////////
// 0x7500 Entry 57 (size 16 bundles) Reserved
- KVM_FAULT(57)
+ KVM_FAULT(57)
.org kvm_ia64_ivt+0x7600
/////////////////////////////////////////////////////////////////////
// 0x7600 Entry 58 (size 16 bundles) Reserved
- KVM_FAULT(58)
+ KVM_FAULT(58)
.org kvm_ia64_ivt+0x7700
////////////////////////////////////////////////////////////////////
// 0x7700 Entry 59 (size 16 bundles) Reserved
- KVM_FAULT(59)
+ KVM_FAULT(59)
.org kvm_ia64_ivt+0x7800
////////////////////////////////////////////////////////////////////
// 0x7800 Entry 60 (size 16 bundles) Reserved
- KVM_FAULT(60)
+ KVM_FAULT(60)
.org kvm_ia64_ivt+0x7900
/////////////////////////////////////////////////////////////////////
// 0x7900 Entry 61 (size 16 bundles) Reserved
- KVM_FAULT(61)
+ KVM_FAULT(61)
.org kvm_ia64_ivt+0x7a00
/////////////////////////////////////////////////////////////////////
// 0x7a00 Entry 62 (size 16 bundles) Reserved
- KVM_FAULT(62)
+ KVM_FAULT(62)
.org kvm_ia64_ivt+0x7b00
/////////////////////////////////////////////////////////////////////
// 0x7b00 Entry 63 (size 16 bundles) Reserved
- KVM_FAULT(63)
+ KVM_FAULT(63)
.org kvm_ia64_ivt+0x7c00
////////////////////////////////////////////////////////////////////
// 0x7c00 Entry 64 (size 16 bundles) Reserved
- KVM_FAULT(64)
+ KVM_FAULT(64)
.org kvm_ia64_ivt+0x7d00
/////////////////////////////////////////////////////////////////////
// 0x7d00 Entry 65 (size 16 bundles) Reserved
- KVM_FAULT(65)
+ KVM_FAULT(65)
.org kvm_ia64_ivt+0x7e00
/////////////////////////////////////////////////////////////////////
// 0x7e00 Entry 66 (size 16 bundles) Reserved
- KVM_FAULT(66)
+ KVM_FAULT(66)
.org kvm_ia64_ivt+0x7f00
////////////////////////////////////////////////////////////////////
// 0x7f00 Entry 67 (size 16 bundles) Reserved
- KVM_FAULT(67)
+ KVM_FAULT(67)
.org kvm_ia64_ivt+0x8000
// There is no particular reason for this code to be here, other than that
@@ -804,132 +808,128 @@ END(kvm_ia32_intercept)
ENTRY(kvm_dtlb_miss_dispatch)
- mov r19 = 2
- KVM_SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,3,0
- mov out0=cr.ifa
- mov out1=r15
- adds r3=8,r2 // set up second base pointer
- ;;
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- //(p15) ssm psr.i // restore psr.i
- addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
- ;;
- KVM_SAVE_REST
- KVM_SAVE_EXTRA
- mov rp=r14
- ;;
- adds out2=16,r12
- br.call.sptk.many b6=kvm_page_fault
+ mov r19 = 2
+ KVM_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,3,0
+ mov out0=cr.ifa
+ mov out1=r15
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+ ;;
+ KVM_SAVE_REST
+ KVM_SAVE_EXTRA
+ mov rp=r14
+ ;;
+ adds out2=16,r12
+ br.call.sptk.many b6=kvm_page_fault
END(kvm_dtlb_miss_dispatch)
ENTRY(kvm_itlb_miss_dispatch)
- KVM_SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,3,0
- mov out0=cr.ifa
- mov out1=r15
- adds r3=8,r2 // set up second base pointer
- ;;
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- //(p15) ssm psr.i // restore psr.i
- addl r14=@gprel(ia64_leave_hypervisor),gp
- ;;
- KVM_SAVE_REST
- mov rp=r14
- ;;
- adds out2=16,r12
- br.call.sptk.many b6=kvm_page_fault
+ KVM_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,3,0
+ mov out0=cr.ifa
+ mov out1=r15
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ KVM_SAVE_REST
+ mov rp=r14
+ ;;
+ adds out2=16,r12
+ br.call.sptk.many b6=kvm_page_fault
END(kvm_itlb_miss_dispatch)
ENTRY(kvm_dispatch_reflection)
- /*
- * Input:
- * psr.ic: off
- * r19: intr type (offset into ivt, see ia64_int.h)
- * r31: contains saved predicates (pr)
- */
- KVM_SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,5,0
- mov out0=cr.ifa
- mov out1=cr.isr
- mov out2=cr.iim
- mov out3=r15
- adds r3=8,r2 // set up second base pointer
- ;;
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- //(p15) ssm psr.i // restore psr.i
- addl r14=@gprel(ia64_leave_hypervisor),gp
- ;;
- KVM_SAVE_REST
- mov rp=r14
- ;;
- adds out4=16,r12
- br.call.sptk.many b6=reflect_interruption
+/*
+ * Input:
+ * psr.ic: off
+ * r19: intr type (offset into ivt, see ia64_int.h)
+ * r31: contains saved predicates (pr)
+ */
+ KVM_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,5,0
+ mov out0=cr.ifa
+ mov out1=cr.isr
+ mov out2=cr.iim
+ mov out3=r15
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ KVM_SAVE_REST
+ mov rp=r14
+ ;;
+ adds out4=16,r12
+ br.call.sptk.many b6=reflect_interruption
END(kvm_dispatch_reflection)
ENTRY(kvm_dispatch_virtualization_fault)
- adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
- adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
- ;;
- st8 [r16] = r24
- st8 [r17] = r25
- ;;
- KVM_SAVE_MIN_WITH_COVER_R19
- ;;
- alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
- mov out0=r13 //vcpu
- adds r3=8,r2 // set up second base pointer
- ;;
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- //(p15) ssm psr.i // restore psr.i
- addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
- ;;
- KVM_SAVE_REST
- KVM_SAVE_EXTRA
- mov rp=r14
- ;;
- adds out1=16,sp //regs
- br.call.sptk.many b6=kvm_emulate
+ adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+ adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+ ;;
+ st8 [r16] = r24
+ st8 [r17] = r25
+ ;;
+ KVM_SAVE_MIN_WITH_COVER_R19
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
+ mov out0=r13 //vcpu
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+ ;;
+ KVM_SAVE_REST
+ KVM_SAVE_EXTRA
+ mov rp=r14
+ ;;
+ adds out1=16,sp //regs
+ br.call.sptk.many b6=kvm_emulate
END(kvm_dispatch_virtualization_fault)
ENTRY(kvm_dispatch_interrupt)
- KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3
- ;;
- alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
- //mov out0=cr.ivr // pass cr.ivr as first arg
- adds r3=8,r2 // set up second base pointer for SAVE_REST
- ;;
- ssm psr.ic
- ;;
- srlz.i
- ;;
- //(p15) ssm psr.i
- addl r14=@gprel(ia64_leave_hypervisor),gp
- ;;
- KVM_SAVE_REST
- mov rp=r14
- ;;
- mov out0=r13 // pass pointer to pt_regs as second arg
- br.call.sptk.many b6=kvm_ia64_handle_irq
+ KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3
+ ;;
+ alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i
+ ;;
+ //(p15) ssm psr.i
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ KVM_SAVE_REST
+ mov rp=r14
+ ;;
+ mov out0=r13 // pass pointer to pt_regs as second arg
+ br.call.sptk.many b6=kvm_ia64_handle_irq
END(kvm_dispatch_interrupt)
-
-
-
GLOBAL_ENTRY(ia64_leave_nested)
rsm psr.i
;;
@@ -1008,7 +1008,7 @@ GLOBAL_ENTRY(ia64_leave_nested)
;;
ldf.fill f11=[r2]
// mov r18=r13
-// mov r21=r13
+// mov r21=r13
adds r16=PT(CR_IPSR)+16,r12
adds r17=PT(CR_IIP)+16,r12
;;
@@ -1058,138 +1058,135 @@ GLOBAL_ENTRY(ia64_leave_nested)
rfi
END(ia64_leave_nested)
-
-
GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
- /*
- * work.need_resched etc. mustn't get changed
- *by this CPU before it returns to
- ;;
- * user- or fsys-mode, hence we disable interrupts early on:
- */
- adds r2 = PT(R4)+16,r12
- adds r3 = PT(R5)+16,r12
- adds r8 = PT(EML_UNAT)+16,r12
- ;;
- ld8 r8 = [r8]
- ;;
- mov ar.unat=r8
- ;;
- ld8.fill r4=[r2],16 //load r4
- ld8.fill r5=[r3],16 //load r5
- ;;
- ld8.fill r6=[r2] //load r6
- ld8.fill r7=[r3] //load r7
- ;;
+/*
+ * work.need_resched etc. mustn't get changed
+ *by this CPU before it returns to
+ * user- or fsys-mode, hence we disable interrupts early on:
+ */
+ adds r2 = PT(R4)+16,r12
+ adds r3 = PT(R5)+16,r12
+ adds r8 = PT(EML_UNAT)+16,r12
+ ;;
+ ld8 r8 = [r8]
+ ;;
+ mov ar.unat=r8
+ ;;
+ ld8.fill r4=[r2],16 //load r4
+ ld8.fill r5=[r3],16 //load r5
+ ;;
+ ld8.fill r6=[r2] //load r6
+ ld8.fill r7=[r3] //load r7
+ ;;
END(ia64_leave_hypervisor_prepare)
//fall through
GLOBAL_ENTRY(ia64_leave_hypervisor)
- rsm psr.i
- ;;
- br.call.sptk.many b0=leave_hypervisor_tail
- ;;
- adds r20=PT(PR)+16,r12
- adds r8=PT(EML_UNAT)+16,r12
- ;;
- ld8 r8=[r8]
- ;;
- mov ar.unat=r8
- ;;
- lfetch [r20],PT(CR_IPSR)-PT(PR)
- adds r2 = PT(B6)+16,r12
- adds r3 = PT(B7)+16,r12
- ;;
- lfetch [r20]
- ;;
- ld8 r24=[r2],16 /* B6 */
- ld8 r25=[r3],16 /* B7 */
- ;;
- ld8 r26=[r2],16 /* ar_csd */
- ld8 r27=[r3],16 /* ar_ssd */
- mov b6 = r24
- ;;
- ld8.fill r8=[r2],16
- ld8.fill r9=[r3],16
- mov b7 = r25
- ;;
- mov ar.csd = r26
- mov ar.ssd = r27
- ;;
- ld8.fill r10=[r2],PT(R15)-PT(R10)
- ld8.fill r11=[r3],PT(R14)-PT(R11)
- ;;
- ld8.fill r15=[r2],PT(R16)-PT(R15)
- ld8.fill r14=[r3],PT(R17)-PT(R14)
- ;;
- ld8.fill r16=[r2],16
- ld8.fill r17=[r3],16
- ;;
- ld8.fill r18=[r2],16
- ld8.fill r19=[r3],16
- ;;
- ld8.fill r20=[r2],16
- ld8.fill r21=[r3],16
- ;;
- ld8.fill r22=[r2],16
- ld8.fill r23=[r3],16
- ;;
- ld8.fill r24=[r2],16
- ld8.fill r25=[r3],16
- ;;
- ld8.fill r26=[r2],16
- ld8.fill r27=[r3],16
- ;;
- ld8.fill r28=[r2],16
- ld8.fill r29=[r3],16
- ;;
- ld8.fill r30=[r2],PT(F6)-PT(R30)
- ld8.fill r31=[r3],PT(F7)-PT(R31)
- ;;
- rsm psr.i | psr.ic
- // initiate turning off of interrupt and interruption collection
- invala // invalidate ALAT
- ;;
- srlz.i // ensure interruption collection is off
- ;;
- bsw.0
- ;;
- adds r16 = PT(CR_IPSR)+16,r12
- adds r17 = PT(CR_IIP)+16,r12
- mov r21=r13 // get current
- ;;
- ld8 r31=[r16],16 // load cr.ipsr
- ld8 r30=[r17],16 // load cr.iip
- ;;
- ld8 r29=[r16],16 // load cr.ifs
- ld8 r28=[r17],16 // load ar.unat
- ;;
- ld8 r27=[r16],16 // load ar.pfs
- ld8 r26=[r17],16 // load ar.rsc
- ;;
- ld8 r25=[r16],16 // load ar.rnat
- ld8 r24=[r17],16 // load ar.bspstore
- ;;
- ld8 r23=[r16],16 // load predicates
- ld8 r22=[r17],16 // load b0
- ;;
- ld8 r20=[r16],16 // load ar.rsc value for "loadrs"
- ld8.fill r1=[r17],16 //load r1
- ;;
- ld8.fill r12=[r16],16 //load r12
- ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13
- ;;
- ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr
- ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2
- ;;
- ld8.fill r3=[r16] //load r3
- ld8 r18=[r17] //load ar_ccv
- ;;
- mov ar.fpsr=r19
- mov ar.ccv=r18
- shr.u r18=r20,16
- ;;
+ rsm psr.i
+ ;;
+ br.call.sptk.many b0=leave_hypervisor_tail
+ ;;
+ adds r20=PT(PR)+16,r12
+ adds r8=PT(EML_UNAT)+16,r12
+ ;;
+ ld8 r8=[r8]
+ ;;
+ mov ar.unat=r8
+ ;;
+ lfetch [r20],PT(CR_IPSR)-PT(PR)
+ adds r2 = PT(B6)+16,r12
+ adds r3 = PT(B7)+16,r12
+ ;;
+ lfetch [r20]
+ ;;
+ ld8 r24=[r2],16 /* B6 */
+ ld8 r25=[r3],16 /* B7 */
+ ;;
+ ld8 r26=[r2],16 /* ar_csd */
+ ld8 r27=[r3],16 /* ar_ssd */
+ mov b6 = r24
+ ;;
+ ld8.fill r8=[r2],16
+ ld8.fill r9=[r3],16
+ mov b7 = r25
+ ;;
+ mov ar.csd = r26
+ mov ar.ssd = r27
+ ;;
+ ld8.fill r10=[r2],PT(R15)-PT(R10)
+ ld8.fill r11=[r3],PT(R14)-PT(R11)
+ ;;
+ ld8.fill r15=[r2],PT(R16)-PT(R15)
+ ld8.fill r14=[r3],PT(R17)-PT(R14)
+ ;;
+ ld8.fill r16=[r2],16
+ ld8.fill r17=[r3],16
+ ;;
+ ld8.fill r18=[r2],16
+ ld8.fill r19=[r3],16
+ ;;
+ ld8.fill r20=[r2],16
+ ld8.fill r21=[r3],16
+ ;;
+ ld8.fill r22=[r2],16
+ ld8.fill r23=[r3],16
+ ;;
+ ld8.fill r24=[r2],16
+ ld8.fill r25=[r3],16
+ ;;
+ ld8.fill r26=[r2],16
+ ld8.fill r27=[r3],16
+ ;;
+ ld8.fill r28=[r2],16
+ ld8.fill r29=[r3],16
+ ;;
+ ld8.fill r30=[r2],PT(F6)-PT(R30)
+ ld8.fill r31=[r3],PT(F7)-PT(R31)
+ ;;
+ rsm psr.i | psr.ic
+ // initiate turning off of interrupt and interruption collection
+ invala // invalidate ALAT
+ ;;
+ srlz.i // ensure interruption collection is off
+ ;;
+ bsw.0
+ ;;
+ adds r16 = PT(CR_IPSR)+16,r12
+ adds r17 = PT(CR_IIP)+16,r12
+ mov r21=r13 // get current
+ ;;
+ ld8 r31=[r16],16 // load cr.ipsr
+ ld8 r30=[r17],16 // load cr.iip
+ ;;
+ ld8 r29=[r16],16 // load cr.ifs
+ ld8 r28=[r17],16 // load ar.unat
+ ;;
+ ld8 r27=[r16],16 // load ar.pfs
+ ld8 r26=[r17],16 // load ar.rsc
+ ;;
+ ld8 r25=[r16],16 // load ar.rnat
+ ld8 r24=[r17],16 // load ar.bspstore
+ ;;
+ ld8 r23=[r16],16 // load predicates
+ ld8 r22=[r17],16 // load b0
+ ;;
+ ld8 r20=[r16],16 // load ar.rsc value for "loadrs"
+ ld8.fill r1=[r17],16 //load r1
+ ;;
+ ld8.fill r12=[r16],16 //load r12
+ ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13
+ ;;
+ ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr
+ ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2
+ ;;
+ ld8.fill r3=[r16] //load r3
+ ld8 r18=[r17] //load ar_ccv
+ ;;
+ mov ar.fpsr=r19
+ mov ar.ccv=r18
+ shr.u r18=r20,16
+ ;;
kvm_rbs_switch:
- mov r19=96
+ mov r19=96
kvm_dont_preserve_current_frame:
/*
@@ -1201,76 +1198,76 @@ kvm_dont_preserve_current_frame:
# define pReturn p7
# define Nregs 14
- alloc loc0=ar.pfs,2,Nregs-2,2,0
- shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
- sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize
- ;;
- mov ar.rsc=r20 // load ar.rsc to be used for "loadrs"
- shladd in0=loc1,3,r19
- mov in1=0
- ;;
- TEXT_ALIGN(32)
+ alloc loc0=ar.pfs,2,Nregs-2,2,0
+ shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
+ sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize
+ ;;
+ mov ar.rsc=r20 // load ar.rsc to be used for "loadrs"
+ shladd in0=loc1,3,r19
+ mov in1=0
+ ;;
+ TEXT_ALIGN(32)
kvm_rse_clear_invalid:
- alloc loc0=ar.pfs,2,Nregs-2,2,0
- cmp.lt pRecurse,p0=Nregs*8,in0
- // if more than Nregs regs left to clear, (re)curse
- add out0=-Nregs*8,in0
- add out1=1,in1 // increment recursion count
- mov loc1=0
- mov loc2=0
- ;;
- mov loc3=0
- mov loc4=0
- mov loc5=0
- mov loc6=0
- mov loc7=0
+ alloc loc0=ar.pfs,2,Nregs-2,2,0
+ cmp.lt pRecurse,p0=Nregs*8,in0
+ // if more than Nregs regs left to clear, (re)curse
+ add out0=-Nregs*8,in0
+ add out1=1,in1 // increment recursion count
+ mov loc1=0
+ mov loc2=0
+ ;;
+ mov loc3=0
+ mov loc4=0
+ mov loc5=0
+ mov loc6=0
+ mov loc7=0
(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
- ;;
- mov loc8=0
- mov loc9=0
- cmp.ne pReturn,p0=r0,in1
- // if recursion count != 0, we need to do a br.ret
- mov loc10=0
- mov loc11=0
+ ;;
+ mov loc8=0
+ mov loc9=0
+ cmp.ne pReturn,p0=r0,in1
+ // if recursion count != 0, we need to do a br.ret
+ mov loc10=0
+ mov loc11=0
(pReturn) br.ret.dptk.many b0
# undef pRecurse
# undef pReturn
// loadrs has already been shifted
- alloc r16=ar.pfs,0,0,0,0 // drop current register frame
- ;;
- loadrs
- ;;
- mov ar.bspstore=r24
- ;;
- mov ar.unat=r28
- mov ar.rnat=r25
- mov ar.rsc=r26
- ;;
- mov cr.ipsr=r31
- mov cr.iip=r30
- mov cr.ifs=r29
- mov ar.pfs=r27
- adds r18=VMM_VPD_BASE_OFFSET,r21
- ;;
- ld8 r18=[r18] //vpd
- adds r17=VMM_VCPU_ISR_OFFSET,r21
- ;;
- ld8 r17=[r17]
- adds r19=VMM_VPD_VPSR_OFFSET,r18
- ;;
- ld8 r19=[r19] //vpsr
- mov r25=r18
- adds r16= VMM_VCPU_GP_OFFSET,r21
- ;;
- ld8 r16= [r16] // Put gp in r24
- movl r24=@gprel(ia64_vmm_entry) // calculate return address
- ;;
- add r24=r24,r16
- ;;
- br.sptk.many kvm_vps_sync_write // call the service
- ;;
+ alloc r16=ar.pfs,0,0,0,0 // drop current register frame
+ ;;
+ loadrs
+ ;;
+ mov ar.bspstore=r24
+ ;;
+ mov ar.unat=r28
+ mov ar.rnat=r25
+ mov ar.rsc=r26
+ ;;
+ mov cr.ipsr=r31
+ mov cr.iip=r30
+ mov cr.ifs=r29
+ mov ar.pfs=r27
+ adds r18=VMM_VPD_BASE_OFFSET,r21
+ ;;
+ ld8 r18=[r18] //vpd
+ adds r17=VMM_VCPU_ISR_OFFSET,r21
+ ;;
+ ld8 r17=[r17]
+ adds r19=VMM_VPD_VPSR_OFFSET,r18
+ ;;
+ ld8 r19=[r19] //vpsr
+ mov r25=r18
+ adds r16= VMM_VCPU_GP_OFFSET,r21
+ ;;
+ ld8 r16= [r16] // Put gp in r24
+ movl r24=@gprel(ia64_vmm_entry) // calculate return address
+ ;;
+ add r24=r24,r16
+ ;;
+ br.sptk.many kvm_vps_sync_write // call the service
+ ;;
END(ia64_leave_hypervisor)
// fall through
GLOBAL_ENTRY(ia64_vmm_entry)
@@ -1283,16 +1280,14 @@ GLOBAL_ENTRY(ia64_vmm_entry)
* r22:b0
* r23:predicate
*/
- mov r24=r22
- mov r25=r18
- tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
- (p1) br.cond.sptk.few kvm_vps_resume_normal
- (p2) br.cond.sptk.many kvm_vps_resume_handler
- ;;
+ mov r24=r22
+ mov r25=r18
+ tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
+(p1) br.cond.sptk.few kvm_vps_resume_normal
+(p2) br.cond.sptk.many kvm_vps_resume_handler
+ ;;
END(ia64_vmm_entry)
-
-
/*
* extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
* u64 arg3, u64 arg4, u64 arg5,
@@ -1310,88 +1305,88 @@ psrsave = loc2
entry = loc3
hostret = r24
- alloc pfssave=ar.pfs,4,4,0,0
- mov rpsave=rp
- adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
- ;;
- ld8 entry=[entry]
-1: mov hostret=ip
- mov r25=in1 // copy arguments
- mov r26=in2
- mov r27=in3
- mov psrsave=psr
- ;;
- tbit.nz p6,p0=psrsave,14 // IA64_PSR_I
- tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC
- ;;
- add hostret=2f-1b,hostret // calculate return address
- add entry=entry,in0
- ;;
- rsm psr.i | psr.ic
- ;;
- srlz.i
- mov b6=entry
- br.cond.sptk b6 // call the service
+ alloc pfssave=ar.pfs,4,4,0,0
+ mov rpsave=rp
+ adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
+ ;;
+ ld8 entry=[entry]
+1: mov hostret=ip
+ mov r25=in1 // copy arguments
+ mov r26=in2
+ mov r27=in3
+ mov psrsave=psr
+ ;;
+ tbit.nz p6,p0=psrsave,14 // IA64_PSR_I
+ tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC
+ ;;
+ add hostret=2f-1b,hostret // calculate return address
+ add entry=entry,in0
+ ;;
+ rsm psr.i | psr.ic
+ ;;
+ srlz.i
+ mov b6=entry
+ br.cond.sptk b6 // call the service
2:
- // Architectural sequence for enabling interrupts if necessary
+// Architectural sequence for enabling interrupts if necessary
(p7) ssm psr.ic
- ;;
+ ;;
(p7) srlz.i
- ;;
+ ;;
//(p6) ssm psr.i
- ;;
- mov rp=rpsave
- mov ar.pfs=pfssave
- mov r8=r31
- ;;
- srlz.d
- br.ret.sptk rp
+ ;;
+ mov rp=rpsave
+ mov ar.pfs=pfssave
+ mov r8=r31
+ ;;
+ srlz.d
+ br.ret.sptk rp
END(ia64_call_vsa)
#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100)
GLOBAL_ENTRY(vmm_reset_entry)
- //set up ipsr, iip, vpd.vpsr, dcr
- // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
- // For DCR: all bits 0
- bsw.0
- ;;
- mov r21 =r13
- adds r14=-VMM_PT_REGS_SIZE, r12
- ;;
- movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
- movl r10=0x8000000000000000
- adds r16=PT(CR_IIP), r14
- adds r20=PT(R1), r14
- ;;
- rsm psr.ic | psr.i
- ;;
- srlz.i
- ;;
- mov ar.rsc = 0
- ;;
- flushrs
- ;;
- mov ar.bspstore = 0
- // clear BSPSTORE
- ;;
- mov cr.ipsr=r6
- mov cr.ifs=r10
- ld8 r4 = [r16] // Set init iip for first run.
- ld8 r1 = [r20]
- ;;
- mov cr.iip=r4
- adds r16=VMM_VPD_BASE_OFFSET,r13
- ;;
- ld8 r18=[r16]
- ;;
- adds r19=VMM_VPD_VPSR_OFFSET,r18
- ;;
- ld8 r19=[r19]
- mov r17=r0
- mov r22=r0
- mov r23=r0
- br.cond.sptk ia64_vmm_entry
- br.ret.sptk b0
+ //set up ipsr, iip, vpd.vpsr, dcr
+ // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
+ // For DCR: all bits 0
+ bsw.0
+ ;;
+ mov r21 =r13
+ adds r14=-VMM_PT_REGS_SIZE, r12
+ ;;
+ movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
+ movl r10=0x8000000000000000
+ adds r16=PT(CR_IIP), r14
+ adds r20=PT(R1), r14
+ ;;
+ rsm psr.ic | psr.i
+ ;;
+ srlz.i
+ ;;
+ mov ar.rsc = 0
+ ;;
+ flushrs
+ ;;
+ mov ar.bspstore = 0
+ // clear BSPSTORE
+ ;;
+ mov cr.ipsr=r6
+ mov cr.ifs=r10
+ ld8 r4 = [r16] // Set init iip for first run.
+ ld8 r1 = [r20]
+ ;;
+ mov cr.iip=r4
+ adds r16=VMM_VPD_BASE_OFFSET,r13
+ ;;
+ ld8 r18=[r16]
+ ;;
+ adds r19=VMM_VPD_VPSR_OFFSET,r18
+ ;;
+ ld8 r19=[r19]
+ mov r17=r0
+ mov r22=r0
+ mov r23=r0
+ br.cond.sptk ia64_vmm_entry
+ br.ret.sptk b0
END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index e22b93361e08..6b6307a3bd55 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -183,8 +183,8 @@ void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
u64 i, dirty_pages = 1;
u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
- void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE)
- + KVM_MEM_DIRTY_LOG_OFS;
+ void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
+
dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
vmm_spin_lock(lock);
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 0c66dbdd1d72..66fd705e82c0 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -227,14 +227,14 @@ finish_up:
return new_irq_info;
}
-static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
+static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
{
struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
nasid_t nasid;
int slice;
- nasid = cpuid_to_nasid(first_cpu(mask));
- slice = cpuid_to_slice(first_cpu(mask));
+ nasid = cpuid_to_nasid(cpumask_first(mask));
+ slice = cpuid_to_slice(cpumask_first(mask));
list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
sn_irq_lh[irq], list)
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index 83f190ffe350..ca553b0429ce 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -151,7 +151,8 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry)
}
#ifdef CONFIG_SMP
-static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
+static void sn_set_msi_irq_affinity(unsigned int irq,
+ const struct cpumask *cpu_mask)
{
struct msi_msg msg;
int slice;
@@ -164,7 +165,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
struct sn_pcibus_provider *provider;
unsigned int cpu;
- cpu = first_cpu(cpu_mask);
+ cpu = cpumask_first(cpu_mask);
sn_irq_info = sn_msi_info[irq].sn_irq_info;
if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
return;
@@ -204,7 +205,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff);
write_msi_msg(irq, &msg);
- irq_desc[irq].affinity = cpu_mask;
+ irq_desc[irq].affinity = *cpu_mask;
}
#endif /* CONFIG_SMP */
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 636588e7e068..be339477f906 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -385,7 +385,6 @@ static int sn_topology_show(struct seq_file *s, void *d)
int j;
const char *slabname;
int ordinal;
- cpumask_t cpumask;
char slice;
struct cpuinfo_ia64 *c;
struct sn_hwperf_port_info *ptdata;
@@ -473,23 +472,21 @@ static int sn_topology_show(struct seq_file *s, void *d)
* CPUs on this node, if any
*/
if (!SN_HWPERF_IS_IONODE(obj)) {
- cpumask = node_to_cpumask(ordinal);
- for_each_online_cpu(i) {
- if (cpu_isset(i, cpumask)) {
- slice = 'a' + cpuid_to_slice(i);
- c = cpu_data(i);
- seq_printf(s, "cpu %d %s%c local"
- " freq %luMHz, arch ia64",
- i, obj->location, slice,
- c->proc_freq / 1000000);
- for_each_online_cpu(j) {
- seq_printf(s, j ? ":%d" : ", dist %d",
- node_distance(
+ for_each_cpu_and(i, cpu_online_mask,
+ cpumask_of_node(ordinal)) {
+ slice = 'a' + cpuid_to_slice(i);
+ c = cpu_data(i);
+ seq_printf(s, "cpu %d %s%c local"
+ " freq %luMHz, arch ia64",
+ i, obj->location, slice,
+ c->proc_freq / 1000000);
+ for_each_online_cpu(j) {
+ seq_printf(s, j ? ":%d" : ", dist %d",
+ node_distance(
cpu_to_node(i),
cpu_to_node(j)));
- }
- seq_putc(s, '\n');
}
+ seq_putc(s, '\n');
}
}
}
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 29047d5c259a..cabba332cc48 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -10,6 +10,7 @@ config M32R
default y
select HAVE_IDE
select HAVE_OPROFILE
+ select INIT_ALL_POSSIBLE
config SBUS
bool
diff --git a/arch/m32r/kernel/init_task.c b/arch/m32r/kernel/init_task.c
index 0d658dbb6766..016885c6f260 100644
--- a/arch/m32r/kernel/init_task.c
+++ b/arch/m32r/kernel/init_task.c
@@ -11,7 +11,6 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index 39cb6da72dcb..2547d6c4a827 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -73,17 +73,11 @@ static unsigned int bsp_phys_id = -1;
/* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map;
-/* Bitmask of currently online CPUs */
-cpumask_t cpu_online_map;
-EXPORT_SYMBOL(cpu_online_map);
-
cpumask_t cpu_bootout_map;
cpumask_t cpu_bootin_map;
static cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
EXPORT_SYMBOL(cpu_callout_map);
-cpumask_t cpu_possible_map = CPU_MASK_ALL;
-EXPORT_SYMBOL(cpu_possible_map);
/* Per CPU bogomips and other parameters */
struct cpuinfo_m32r cpu_data[NR_CPUS] __cacheline_aligned;
@@ -598,7 +592,7 @@ int setup_profiling_timer(unsigned int multiplier)
* accounting. At that time they also adjust their APIC timers
* accordingly.
*/
- for (i = 0; i < NR_CPUS; ++i)
+ for_each_possible_cpu(i)
per_cpu(prof_multiplier, i) = multiplier;
return 0;
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 836fb66f080d..c825bde17cb3 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -280,7 +280,6 @@ config M68060
config MMU_MOTOROLA
bool
- depends on MMU && !MMU_SUN3
config MMU_SUN3
bool
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 3042c2bc8c58..632ce016014d 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -40,7 +40,6 @@
* alignment requirements and potentially different initial
* setup.
*/
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m68knommu/include/asm/bitops.h b/arch/m68knommu/include/asm/bitops.h
index 6f3685eab44c..9d3cbe5fad1e 100644
--- a/arch/m68knommu/include/asm/bitops.h
+++ b/arch/m68knommu/include/asm/bitops.h
@@ -331,6 +331,7 @@ found_middle:
#endif /* __KERNEL__ */
#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
#endif /* _M68KNOMMU_BITOPS_H */
diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68knommu/kernel/init_task.c
index 344c01aede08..fe282de1d596 100644
--- a/arch/m68knommu/kernel/init_task.c
+++ b/arch/m68knommu/kernel/init_task.c
@@ -12,7 +12,6 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m68knommu/platform/coldfire/pit.c b/arch/m68knommu/platform/coldfire/pit.c
index c5b916700b22..2a12e7fa9748 100644
--- a/arch/m68knommu/platform/coldfire/pit.c
+++ b/arch/m68knommu/platform/coldfire/pit.c
@@ -156,7 +156,7 @@ void hw_timer_init(void)
{
u32 imr;
- cf_pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+ cf_pit_clockevent.cpumask = cpumask_of(smp_processor_id());
cf_pit_clockevent.mult = div_sc(FREQ, NSEC_PER_SEC, 32);
cf_pit_clockevent.max_delta_ns =
clockevent_delta2ns(0xFFFF, &cf_pit_clockevent);
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index a58f0eecc68f..abc62aa744ac 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -49,7 +49,8 @@ static inline void smtc_im_ack_irq(unsigned int irq)
#ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
#include <linux/cpumask.h>
-extern void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity);
+extern void plat_set_irq_affinity(unsigned int irq,
+ const struct cpumask *affinity);
extern void smtc_forward_irq(unsigned int irq);
/*
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 7785bec732f2..55d481569a1f 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -25,11 +25,13 @@ extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
#define cpu_to_node(cpu) (sn_cpu_info[(cpu)].p_nodeid)
#define parent_node(node) (node)
#define node_to_cpumask(node) (hub_data(node)->h_cpus)
-#define node_to_first_cpu(node) (first_cpu(node_to_cpumask(node)))
+#define cpumask_of_node(node) (&hub_data(node)->h_cpus)
+#define node_to_first_cpu(node) (cpumask_first(cpumask_of_node(node)))
struct pci_bus;
extern int pcibus_to_node(struct pci_bus *);
#define pcibus_to_cpumask(bus) (cpu_online_map)
+#define cpumask_of_pcibus(bus) (cpu_online_mask)
extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
@@ -37,7 +39,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
/* sched_domains SD_NODE_INIT for SGI IP27 machines */
#define SD_NODE_INIT (struct sched_domain) { \
- .span = CPU_MASK_NONE, \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index 0ff5b523ea77..86557b5d1b3f 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -38,9 +38,6 @@ extern int __cpu_logical_map[NR_CPUS];
#define SMP_RESCHEDULE_YOURSELF 0x1 /* XXX braindead */
#define SMP_CALL_FUNCTION 0x2
-extern cpumask_t phys_cpu_present_map;
-#define cpu_possible_map phys_cpu_present_map
-
extern void asmlinkage smp_bootstrap(void);
/*
diff --git a/arch/mips/jazz/irq.c b/arch/mips/jazz/irq.c
index d7f8a782aae4..03965cb1b252 100644
--- a/arch/mips/jazz/irq.c
+++ b/arch/mips/jazz/irq.c
@@ -146,7 +146,7 @@ void __init plat_time_init(void)
BUG_ON(HZ != 100);
- cd->cpumask = cpumask_of_cpu(cpu);
+ cd->cpumask = cpumask_of(cpu);
clockevents_register_device(cd);
action->dev_id = cd;
setup_irq(JAZZ_TIMER_IRQ, action);
diff --git a/arch/mips/kernel/cevt-bcm1480.c b/arch/mips/kernel/cevt-bcm1480.c
index 0a57f86945f1..b820661678b0 100644
--- a/arch/mips/kernel/cevt-bcm1480.c
+++ b/arch/mips/kernel/cevt-bcm1480.c
@@ -126,7 +126,7 @@ void __cpuinit sb1480_clockevent_init(void)
cd->min_delta_ns = clockevent_delta2ns(2, cd);
cd->rating = 200;
cd->irq = irq;
- cd->cpumask = cpumask_of_cpu(cpu);
+ cd->cpumask = cpumask_of(cpu);
cd->set_next_event = sibyte_next_event;
cd->set_mode = sibyte_set_mode;
clockevents_register_device(cd);
@@ -148,6 +148,6 @@ void __cpuinit sb1480_clockevent_init(void)
action->name = name;
action->dev_id = cd;
- irq_set_affinity(irq, cpumask_of_cpu(cpu));
+ irq_set_affinity(irq, cpumask_of(cpu));
setup_irq(irq, action);
}
diff --git a/arch/mips/kernel/cevt-ds1287.c b/arch/mips/kernel/cevt-ds1287.c
index df4acb68bfb5..1ada45ea0700 100644
--- a/arch/mips/kernel/cevt-ds1287.c
+++ b/arch/mips/kernel/cevt-ds1287.c
@@ -88,7 +88,6 @@ static void ds1287_event_handler(struct clock_event_device *dev)
static struct clock_event_device ds1287_clockevent = {
.name = "ds1287",
.features = CLOCK_EVT_FEAT_PERIODIC,
- .cpumask = CPU_MASK_CPU0,
.set_next_event = ds1287_set_next_event,
.set_mode = ds1287_set_mode,
.event_handler = ds1287_event_handler,
@@ -122,6 +121,7 @@ int __init ds1287_clockevent_init(int irq)
clockevent_set_clock(cd, 32768);
cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
+ cd->cpumask = cpumask_of(0);
clockevents_register_device(&ds1287_clockevent);
diff --git a/arch/mips/kernel/cevt-gt641xx.c b/arch/mips/kernel/cevt-gt641xx.c
index 6e2f58520afb..e9b787feedcb 100644
--- a/arch/mips/kernel/cevt-gt641xx.c
+++ b/arch/mips/kernel/cevt-gt641xx.c
@@ -96,7 +96,6 @@ static void gt641xx_timer0_event_handler(struct clock_event_device *dev)
static struct clock_event_device gt641xx_timer0_clockevent = {
.name = "gt641xx-timer0",
.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
- .cpumask = CPU_MASK_CPU0,
.irq = GT641XX_TIMER0_IRQ,
.set_next_event = gt641xx_timer0_set_next_event,
.set_mode = gt641xx_timer0_set_mode,
@@ -132,6 +131,7 @@ static int __init gt641xx_timer0_clockevent_init(void)
clockevent_set_clock(cd, gt641xx_base_clock);
cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
+ cd->cpumask = cpumask_of(0);
clockevents_register_device(&gt641xx_timer0_clockevent);
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 4a4c59f2737a..e1ec83b68031 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -195,7 +195,7 @@ int __cpuinit mips_clockevent_init(void)
cd->rating = 300;
cd->irq = irq;
- cd->cpumask = cpumask_of_cpu(cpu);
+ cd->cpumask = cpumask_of(cpu);
cd->set_next_event = mips_next_event;
cd->set_mode = mips_set_clock_mode;
cd->event_handler = mips_event_handler;
diff --git a/arch/mips/kernel/cevt-sb1250.c b/arch/mips/kernel/cevt-sb1250.c
index 63ac3ad462bc..a2eebaafda52 100644
--- a/arch/mips/kernel/cevt-sb1250.c
+++ b/arch/mips/kernel/cevt-sb1250.c
@@ -125,7 +125,7 @@ void __cpuinit sb1250_clockevent_init(void)
cd->min_delta_ns = clockevent_delta2ns(2, cd);
cd->rating = 200;
cd->irq = irq;
- cd->cpumask = cpumask_of_cpu(cpu);
+ cd->cpumask = cpumask_of(cpu);
cd->set_next_event = sibyte_next_event;
cd->set_mode = sibyte_set_mode;
clockevents_register_device(cd);
@@ -147,6 +147,6 @@ void __cpuinit sb1250_clockevent_init(void)
action->name = name;
action->dev_id = cd;
- irq_set_affinity(irq, cpumask_of_cpu(cpu));
+ irq_set_affinity(irq, cpumask_of(cpu));
setup_irq(irq, action);
}
diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c
index 5162fe4b5952..6d45e24db5bf 100644
--- a/arch/mips/kernel/cevt-smtc.c
+++ b/arch/mips/kernel/cevt-smtc.c
@@ -292,7 +292,7 @@ int __cpuinit mips_clockevent_init(void)
cd->rating = 300;
cd->irq = irq;
- cd->cpumask = cpumask_of_cpu(cpu);
+ cd->cpumask = cpumask_of(cpu);
cd->set_next_event = mips_next_event;
cd->set_mode = mips_set_clock_mode;
cd->event_handler = mips_event_handler;
diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c
index b5fc4eb412d2..eccf7d6096bd 100644
--- a/arch/mips/kernel/cevt-txx9.c
+++ b/arch/mips/kernel/cevt-txx9.c
@@ -112,7 +112,6 @@ static struct clock_event_device txx9tmr_clock_event_device = {
.name = "TXx9",
.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
.rating = 200,
- .cpumask = CPU_MASK_CPU0,
.set_mode = txx9tmr_set_mode,
.set_next_event = txx9tmr_set_next_event,
};
@@ -150,6 +149,7 @@ void __init txx9_clockevent_init(unsigned long baseaddr, int irq,
clockevent_delta2ns(0xffffffff >> (32 - TXX9_TIMER_BITS), cd);
cd->min_delta_ns = clockevent_delta2ns(0xf, cd);
cd->irq = irq;
+ cd->cpumask = cpumask_of(0),
clockevents_register_device(cd);
setup_irq(irq, &txx9tmr_irq);
printk(KERN_INFO "TXx9: clockevent device at 0x%lx, irq %d\n",
diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c
index b6ac55162b9a..f4d187825f96 100644
--- a/arch/mips/kernel/i8253.c
+++ b/arch/mips/kernel/i8253.c
@@ -115,7 +115,7 @@ void __init setup_pit_timer(void)
* Start pit with the boot cpu mask and make it global after the
* IO_APIC has been initialized.
*/
- cd->cpumask = cpumask_of_cpu(cpu);
+ cd->cpumask = cpumask_of(cpu);
clockevent_set_clock(cd, CLOCK_TICK_RATE);
cd->max_delta_ns = clockevent_delta2ns(0x7FFF, cd);
cd->min_delta_ns = clockevent_delta2ns(0xF, cd);
diff --git a/arch/mips/kernel/init_task.c b/arch/mips/kernel/init_task.c
index d72487ad7c15..149cd914526e 100644
--- a/arch/mips/kernel/init_task.c
+++ b/arch/mips/kernel/init_task.c
@@ -9,7 +9,6 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
index f0a4bb19e096..494a49a317e9 100644
--- a/arch/mips/kernel/irq-gic.c
+++ b/arch/mips/kernel/irq-gic.c
@@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq)
static DEFINE_SPINLOCK(gic_lock);
-static void gic_set_affinity(unsigned int irq, cpumask_t cpumask)
+static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
{
cpumask_t tmp = CPU_MASK_NONE;
unsigned long flags;
@@ -164,7 +164,7 @@ static void gic_set_affinity(unsigned int irq, cpumask_t cpumask)
pr_debug(KERN_DEBUG "%s called\n", __func__);
irq -= _irqbase;
- cpus_and(tmp, cpumask, cpu_online_map);
+ cpumask_and(&tmp, cpumask, cpu_online_mask);
if (cpus_empty(tmp))
return;
@@ -187,7 +187,7 @@ static void gic_set_affinity(unsigned int irq, cpumask_t cpumask)
set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask);
}
- irq_desc[irq].affinity = cpumask;
+ irq_desc[irq].affinity = *cpumask;
spin_unlock_irqrestore(&gic_lock, flags);
}
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index ca476c4f62a5..f27beca4b26d 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -51,10 +51,10 @@ static int __init allowcpus(char *str)
int len;
cpus_clear(cpu_allow_map);
- if (cpulist_parse(str, cpu_allow_map) == 0) {
+ if (cpulist_parse(str, &cpu_allow_map) == 0) {
cpu_set(0, cpu_allow_map);
cpus_and(cpu_possible_map, cpu_possible_map, cpu_allow_map);
- len = cpulist_scnprintf(buf, sizeof(buf)-1, cpu_possible_map);
+ len = cpulist_scnprintf(buf, sizeof(buf)-1, &cpu_possible_map);
buf[len] = '\0';
pr_debug("Allowable CPUs: %s\n", buf);
return 1;
@@ -226,7 +226,7 @@ void __init cmp_smp_setup(void)
for (i = 1; i < NR_CPUS; i++) {
if (amon_cpu_avail(i)) {
- cpu_set(i, phys_cpu_present_map);
+ cpu_set(i, cpu_possible_map);
__cpu_number_map[i] = ++ncpu;
__cpu_logical_map[ncpu] = i;
}
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 87a1816c1f45..6f7ee5ac46ee 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -70,7 +70,7 @@ static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0,
write_vpe_c0_vpeconf0(tmp);
/* Record this as available CPU */
- cpu_set(tc, phys_cpu_present_map);
+ cpu_set(tc, cpu_possible_map);
__cpu_number_map[tc] = ++ncpu;
__cpu_logical_map[ncpu] = tc;
}
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 8bf88faf5afd..3da94704f816 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -44,15 +44,10 @@
#include <asm/mipsmtregs.h>
#endif /* CONFIG_MIPS_MT_SMTC */
-cpumask_t phys_cpu_present_map; /* Bitmask of available CPUs */
volatile cpumask_t cpu_callin_map; /* Bitmask of started secondaries */
-cpumask_t cpu_online_map; /* Bitmask of currently online CPUs */
int __cpu_number_map[NR_CPUS]; /* Map physical to logical */
int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */
-EXPORT_SYMBOL(phys_cpu_present_map);
-EXPORT_SYMBOL(cpu_online_map);
-
extern void cpu_idle(void);
/* Number of TCs (or siblings in Intel speak) per CPU core */
@@ -195,7 +190,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
/* preload SMP state for boot cpu */
void __devinit smp_prepare_boot_cpu(void)
{
- cpu_set(0, phys_cpu_present_map);
+ cpu_set(0, cpu_possible_map);
cpu_set(0, cpu_online_map);
cpu_set(0, cpu_callin_map);
}
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 897fb2b4751c..b6cca01ff82b 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -290,7 +290,7 @@ static void smtc_configure_tlb(void)
* possibly leave some TCs/VPEs as "slave" processors.
*
* Use c0_MVPConf0 to find out how many TCs are available, setting up
- * phys_cpu_present_map and the logical/physical mappings.
+ * cpu_possible_map and the logical/physical mappings.
*/
int __init smtc_build_cpu_map(int start_cpu_slot)
@@ -304,7 +304,7 @@ int __init smtc_build_cpu_map(int start_cpu_slot)
*/
ntcs = ((read_c0_mvpconf0() & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
for (i=start_cpu_slot; i<NR_CPUS && i<ntcs; i++) {
- cpu_set(i, phys_cpu_present_map);
+ cpu_set(i, cpu_possible_map);
__cpu_number_map[i] = i;
__cpu_logical_map[i] = i;
}
@@ -521,7 +521,7 @@ void smtc_prepare_cpus(int cpus)
* Pull any physically present but unused TCs out of circulation.
*/
while (tc < (((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1)) {
- cpu_clear(tc, phys_cpu_present_map);
+ cpu_clear(tc, cpu_possible_map);
cpu_clear(tc, cpu_present_map);
tc++;
}
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index f84a46a8ae6e..aabd7274507b 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -114,9 +114,9 @@ struct plat_smp_ops msmtc_smp_ops = {
*/
-void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity)
+void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
{
- cpumask_t tmask = affinity;
+ cpumask_t tmask = *affinity;
int cpu = 0;
void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff);
@@ -139,7 +139,7 @@ void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity)
* be made to forward to an offline "CPU".
*/
- for_each_cpu_mask(cpu, affinity) {
+ for_each_cpu(cpu, affinity) {
if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu))
cpu_clear(cpu, tmask);
}
diff --git a/arch/mips/nxp/pnx8550/common/time.c b/arch/mips/nxp/pnx8550/common/time.c
index 62f495b57f93..cf293b279098 100644
--- a/arch/mips/nxp/pnx8550/common/time.c
+++ b/arch/mips/nxp/pnx8550/common/time.c
@@ -102,6 +102,7 @@ __init void plat_time_init(void)
unsigned int p;
unsigned int pow2p;
+ pnx8xxx_clockevent.cpumask = cpu_none_mask;
clockevents_register_device(&pnx8xxx_clockevent);
clocksource_register(&pnx_clocksource);
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index 3a7df647ca77..f78c29b68d77 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -141,7 +141,7 @@ static void __cpuinit yos_boot_secondary(int cpu, struct task_struct *idle)
}
/*
- * Detect available CPUs, populate phys_cpu_present_map before smp_init
+ * Detect available CPUs, populate cpu_possible_map before smp_init
*
* We don't want to start the secondary CPU yet nor do we have a nice probing
* feature in PMON so we just assume presence of the secondary core.
@@ -150,10 +150,10 @@ static void __init yos_smp_setup(void)
{
int i;
- cpus_clear(phys_cpu_present_map);
+ cpus_clear(cpu_possible_map);
for (i = 0; i < 2; i++) {
- cpu_set(i, phys_cpu_present_map);
+ cpu_set(i, cpu_possible_map);
__cpu_number_map[i] = i;
__cpu_logical_map[i] = i;
}
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index ba5cdebeaf0d..5b47d6b65275 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -76,7 +76,7 @@ static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest)
/* Only let it join in if it's marked enabled */
if ((acpu->cpu_info.flags & KLINFO_ENABLE) &&
(tot_cpus_found != NR_CPUS)) {
- cpu_set(cpuid, phys_cpu_present_map);
+ cpu_set(cpuid, cpu_possible_map);
alloc_cpupda(cpuid, tot_cpus_found);
cpus_found++;
tot_cpus_found++;
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index 1327c2746fb7..f024057a35f8 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -134,7 +134,7 @@ void __cpuinit hub_rt_clock_event_init(void)
cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
cd->rating = 200;
cd->irq = irq;
- cd->cpumask = cpumask_of_cpu(cpu);
+ cd->cpumask = cpumask_of(cpu);
cd->set_next_event = rt_next_event;
cd->set_mode = rt_set_mode;
clockevents_register_device(cd);
diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c
index a35818ed4263..12b465d404df 100644
--- a/arch/mips/sibyte/bcm1480/irq.c
+++ b/arch/mips/sibyte/bcm1480/irq.c
@@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq);
static void disable_bcm1480_irq(unsigned int irq);
static void ack_bcm1480_irq(unsigned int irq);
#ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask);
+static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
#endif
#ifdef CONFIG_PCI
@@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq)
}
#ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask)
+static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
{
int i = 0, old_cpu, cpu, int_on, k;
u64 cur_ints;
@@ -117,11 +117,11 @@ static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask)
unsigned long flags;
unsigned int irq_dirty;
- if (cpus_weight(mask) != 1) {
+ if (cpumask_weight(mask) != 1) {
printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
return;
}
- i = first_cpu(mask);
+ i = cpumask_first(mask);
/* Convert logical CPU to physical CPU */
cpu = cpu_logical_map(i);
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index bd9eeb43ed0e..dddfda8e8294 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -136,7 +136,7 @@ static void __cpuinit bcm1480_boot_secondary(int cpu, struct task_struct *idle)
/*
* Use CFE to find out how many CPUs are available, setting up
- * phys_cpu_present_map and the logical/physical mappings.
+ * cpu_possible_map and the logical/physical mappings.
* XXXKW will the boot CPU ever not be physical 0?
*
* Common setup before any secondaries are started
@@ -145,14 +145,14 @@ static void __init bcm1480_smp_setup(void)
{
int i, num;
- cpus_clear(phys_cpu_present_map);
- cpu_set(0, phys_cpu_present_map);
+ cpus_clear(cpu_possible_map);
+ cpu_set(0, cpu_possible_map);
__cpu_number_map[0] = 0;
__cpu_logical_map[0] = 0;
for (i = 1, num = 0; i < NR_CPUS; i++) {
if (cfe_cpu_stop(i) == 0) {
- cpu_set(i, phys_cpu_present_map);
+ cpu_set(i, cpu_possible_map);
__cpu_number_map[i] = ++num;
__cpu_logical_map[num] = i;
}
diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c
index a5158483986e..808ac2959b8c 100644
--- a/arch/mips/sibyte/sb1250/irq.c
+++ b/arch/mips/sibyte/sb1250/irq.c
@@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq);
static void disable_sb1250_irq(unsigned int irq);
static void ack_sb1250_irq(unsigned int irq);
#ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, cpumask_t mask);
+static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
#endif
#ifdef CONFIG_SIBYTE_HAS_LDT
@@ -103,16 +103,16 @@ void sb1250_unmask_irq(int cpu, int irq)
}
#ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, cpumask_t mask)
+static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
{
int i = 0, old_cpu, cpu, int_on;
u64 cur_ints;
struct irq_desc *desc = irq_desc + irq;
unsigned long flags;
- i = first_cpu(mask);
+ i = cpumask_first(mask);
- if (cpus_weight(mask) > 1) {
+ if (cpumask_weight(mask) > 1) {
printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
return;
}
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index 0734b933e969..5950a288a7da 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -124,7 +124,7 @@ static void __cpuinit sb1250_boot_secondary(int cpu, struct task_struct *idle)
/*
* Use CFE to find out how many CPUs are available, setting up
- * phys_cpu_present_map and the logical/physical mappings.
+ * cpu_possible_map and the logical/physical mappings.
* XXXKW will the boot CPU ever not be physical 0?
*
* Common setup before any secondaries are started
@@ -133,14 +133,14 @@ static void __init sb1250_smp_setup(void)
{
int i, num;
- cpus_clear(phys_cpu_present_map);
- cpu_set(0, phys_cpu_present_map);
+ cpus_clear(cpu_possible_map);
+ cpu_set(0, cpu_possible_map);
__cpu_number_map[0] = 0;
__cpu_logical_map[0] = 0;
for (i = 1, num = 0; i < NR_CPUS; i++) {
if (cfe_cpu_stop(i) == 0) {
- cpu_set(i, phys_cpu_present_map);
+ cpu_set(i, cpu_possible_map);
__cpu_number_map[i] = ++num;
__cpu_logical_map[num] = i;
}
diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index 796e3ce28720..69f5f88711cc 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -80,7 +80,7 @@ static void __init sni_a20r_timer_setup(void)
struct irqaction *action = &a20r_irqaction;
unsigned int cpu = smp_processor_id();
- cd->cpumask = cpumask_of_cpu(cpu);
+ cd->cpumask = cpumask_of(cpu);
clockevents_register_device(cd);
action->dev_id = cd;
setup_irq(SNI_A20R_IRQ_TIMER, &a20r_irqaction);
diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c
index af16f6e5c918..5ac3566f8c98 100644
--- a/arch/mn10300/kernel/init_task.c
+++ b/arch/mn10300/kernel/init_task.c
@@ -18,7 +18,6 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 644a70b1b04e..aacf11d33723 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -11,6 +11,7 @@ config PARISC
select HAVE_OPROFILE
select RTC_CLASS
select RTC_DRV_PARISC
+ select INIT_ALL_POSSIBLE
help
The PA-RISC microprocessor is designed by Hewlett-Packard and used
in many of their workstations & servers (HP9000 700 and 800 series,
diff --git a/arch/parisc/include/asm/module.h b/arch/parisc/include/asm/module.h
index c2cb49e934c1..1f4123427ea0 100644
--- a/arch/parisc/include/asm/module.h
+++ b/arch/parisc/include/asm/module.h
@@ -23,8 +23,10 @@ struct mod_arch_specific
{
unsigned long got_offset, got_count, got_max;
unsigned long fdesc_offset, fdesc_count, fdesc_max;
- unsigned long stub_offset, stub_count, stub_max;
- unsigned long init_stub_offset, init_stub_count, init_stub_max;
+ struct {
+ unsigned long stub_offset;
+ unsigned int stub_entries;
+ } *section;
int unwind_section;
struct unwind_table *unwind;
};
diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h
index 409e698f4361..6ef4b7867b1b 100644
--- a/arch/parisc/include/asm/smp.h
+++ b/arch/parisc/include/asm/smp.h
@@ -16,8 +16,6 @@
#include <linux/cpumask.h>
typedef unsigned long address_t;
-extern cpumask_t cpu_online_map;
-
/*
* Private routines/data
diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c
index f5941c086551..1e25a45d64c1 100644
--- a/arch/parisc/kernel/init_task.c
+++ b/arch/parisc/kernel/init_task.c
@@ -34,7 +34,6 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 23ef950df008..4cea935e2f99 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -131,12 +131,12 @@ int cpu_check_affinity(unsigned int irq, cpumask_t *dest)
return 0;
}
-static void cpu_set_affinity_irq(unsigned int irq, cpumask_t dest)
+static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
{
- if (cpu_check_affinity(irq, &dest))
+ if (cpu_check_affinity(irq, dest))
return;
- irq_desc[irq].affinity = dest;
+ irq_desc[irq].affinity = *dest;
}
#endif
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index 44138c3e6ea7..9013243cecca 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -6,6 +6,7 @@
*
* Linux/PA-RISC Project (http://www.parisc-linux.org/)
* Copyright (C) 2003 Randolph Chung <tausq at debian . org>
+ * Copyright (C) 2008 Helge Deller <deller@gmx.de>
*
*
* This program is free software; you can redistribute it and/or modify
@@ -24,6 +25,19 @@
*
*
* Notes:
+ * - PLT stub handling
+ * On 32bit (and sometimes 64bit) and with big kernel modules like xfs or
+ * ipv6 the relocation types R_PARISC_PCREL17F and R_PARISC_PCREL22F may
+ * fail to reach their PLT stub if we only create one big stub array for
+ * all sections at the beginning of the core or init section.
+ * Instead we now insert individual PLT stub entries directly in front of
+ * of the code sections where the stubs are actually called.
+ * This reduces the distance between the PCREL location and the stub entry
+ * so that the relocations can be fulfilled.
+ * While calculating the final layout of the kernel module in memory, the
+ * kernel module loader calls arch_mod_section_prepend() to request the
+ * to be reserved amount of memory in front of each individual section.
+ *
* - SEGREL32 handling
* We are not doing SEGREL32 handling correctly. According to the ABI, we
* should do a value offset, like this:
@@ -58,9 +72,13 @@
#define DEBUGP(fmt...)
#endif
+#define RELOC_REACHABLE(val, bits) \
+ (( ( !((val) & (1<<((bits)-1))) && ((val)>>(bits)) != 0 ) || \
+ ( ((val) & (1<<((bits)-1))) && ((val)>>(bits)) != (((__typeof__(val))(~0))>>((bits)+2)))) ? \
+ 0 : 1)
+
#define CHECK_RELOC(val, bits) \
- if ( ( !((val) & (1<<((bits)-1))) && ((val)>>(bits)) != 0 ) || \
- ( ((val) & (1<<((bits)-1))) && ((val)>>(bits)) != (((__typeof__(val))(~0))>>((bits)+2)))) { \
+ if (!RELOC_REACHABLE(val, bits)) { \
printk(KERN_ERR "module %s relocation of symbol %s is out of range (0x%lx in %d bits)\n", \
me->name, strtab + sym->st_name, (unsigned long)val, bits); \
return -ENOEXEC; \
@@ -92,13 +110,6 @@ static inline int in_local(struct module *me, void *loc)
return in_init(me, loc) || in_core(me, loc);
}
-static inline int in_local_section(struct module *me, void *loc, void *dot)
-{
- return (in_init(me, loc) && in_init(me, dot)) ||
- (in_core(me, loc) && in_core(me, dot));
-}
-
-
#ifndef CONFIG_64BIT
struct got_entry {
Elf32_Addr addr;
@@ -258,23 +269,42 @@ static inline unsigned long count_stubs(const Elf_Rela *rela, unsigned long n)
/* Free memory returned from module_alloc */
void module_free(struct module *mod, void *module_region)
{
+ kfree(mod->arch.section);
+ mod->arch.section = NULL;
+
vfree(module_region);
/* FIXME: If module_region == mod->init_region, trim exception
table entries. */
}
+/* Additional bytes needed in front of individual sections */
+unsigned int arch_mod_section_prepend(struct module *mod,
+ unsigned int section)
+{
+ /* size needed for all stubs of this section (including
+ * one additional for correct alignment of the stubs) */
+ return (mod->arch.section[section].stub_entries + 1)
+ * sizeof(struct stub_entry);
+}
+
#define CONST
int module_frob_arch_sections(CONST Elf_Ehdr *hdr,
CONST Elf_Shdr *sechdrs,
CONST char *secstrings,
struct module *me)
{
- unsigned long gots = 0, fdescs = 0, stubs = 0, init_stubs = 0;
+ unsigned long gots = 0, fdescs = 0, len;
unsigned int i;
+ len = hdr->e_shnum * sizeof(me->arch.section[0]);
+ me->arch.section = kzalloc(len, GFP_KERNEL);
+ if (!me->arch.section)
+ return -ENOMEM;
+
for (i = 1; i < hdr->e_shnum; i++) {
- const Elf_Rela *rels = (void *)hdr + sechdrs[i].sh_offset;
+ const Elf_Rela *rels = (void *)sechdrs[i].sh_addr;
unsigned long nrels = sechdrs[i].sh_size / sizeof(*rels);
+ unsigned int count, s;
if (strncmp(secstrings + sechdrs[i].sh_name,
".PARISC.unwind", 14) == 0)
@@ -290,11 +320,23 @@ int module_frob_arch_sections(CONST Elf_Ehdr *hdr,
*/
gots += count_gots(rels, nrels);
fdescs += count_fdescs(rels, nrels);
- if(strncmp(secstrings + sechdrs[i].sh_name,
- ".rela.init", 10) == 0)
- init_stubs += count_stubs(rels, nrels);
- else
- stubs += count_stubs(rels, nrels);
+
+ /* XXX: By sorting the relocs and finding duplicate entries
+ * we could reduce the number of necessary stubs and save
+ * some memory. */
+ count = count_stubs(rels, nrels);
+ if (!count)
+ continue;
+
+ /* so we need relocation stubs. reserve necessary memory. */
+ /* sh_info gives the section for which we need to add stubs. */
+ s = sechdrs[i].sh_info;
+
+ /* each code section should only have one relocation section */
+ WARN_ON(me->arch.section[s].stub_entries);
+
+ /* store number of stubs we need for this section */
+ me->arch.section[s].stub_entries += count;
}
/* align things a bit */
@@ -306,18 +348,8 @@ int module_frob_arch_sections(CONST Elf_Ehdr *hdr,
me->arch.fdesc_offset = me->core_size;
me->core_size += fdescs * sizeof(Elf_Fdesc);
- me->core_size = ALIGN(me->core_size, 16);
- me->arch.stub_offset = me->core_size;
- me->core_size += stubs * sizeof(struct stub_entry);
-
- me->init_size = ALIGN(me->init_size, 16);
- me->arch.init_stub_offset = me->init_size;
- me->init_size += init_stubs * sizeof(struct stub_entry);
-
me->arch.got_max = gots;
me->arch.fdesc_max = fdescs;
- me->arch.stub_max = stubs;
- me->arch.init_stub_max = init_stubs;
return 0;
}
@@ -380,23 +412,27 @@ enum elf_stub_type {
};
static Elf_Addr get_stub(struct module *me, unsigned long value, long addend,
- enum elf_stub_type stub_type, int init_section)
+ enum elf_stub_type stub_type, Elf_Addr loc0, unsigned int targetsec)
{
- unsigned long i;
struct stub_entry *stub;
- if(init_section) {
- i = me->arch.init_stub_count++;
- BUG_ON(me->arch.init_stub_count > me->arch.init_stub_max);
- stub = me->module_init + me->arch.init_stub_offset +
- i * sizeof(struct stub_entry);
- } else {
- i = me->arch.stub_count++;
- BUG_ON(me->arch.stub_count > me->arch.stub_max);
- stub = me->module_core + me->arch.stub_offset +
- i * sizeof(struct stub_entry);
+ /* initialize stub_offset to point in front of the section */
+ if (!me->arch.section[targetsec].stub_offset) {
+ loc0 -= (me->arch.section[targetsec].stub_entries + 1) *
+ sizeof(struct stub_entry);
+ /* get correct alignment for the stubs */
+ loc0 = ALIGN(loc0, sizeof(struct stub_entry));
+ me->arch.section[targetsec].stub_offset = loc0;
}
+ /* get address of stub entry */
+ stub = (void *) me->arch.section[targetsec].stub_offset;
+ me->arch.section[targetsec].stub_offset += sizeof(struct stub_entry);
+
+ /* do not write outside available stub area */
+ BUG_ON(0 == me->arch.section[targetsec].stub_entries--);
+
+
#ifndef CONFIG_64BIT
/* for 32-bit the stub looks like this:
* ldil L'XXX,%r1
@@ -489,15 +525,19 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
Elf32_Addr val;
Elf32_Sword addend;
Elf32_Addr dot;
+ Elf_Addr loc0;
+ unsigned int targetsec = sechdrs[relsec].sh_info;
//unsigned long dp = (unsigned long)$global$;
register unsigned long dp asm ("r27");
DEBUGP("Applying relocate section %u to %u\n", relsec,
- sechdrs[relsec].sh_info);
+ targetsec);
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
/* This is where to make the change */
- loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+ loc = (void *)sechdrs[targetsec].sh_addr
+ rel[i].r_offset;
+ /* This is the start of the target section */
+ loc0 = sechdrs[targetsec].sh_addr;
/* This is the symbol it is referring to */
sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+ ELF32_R_SYM(rel[i].r_info);
@@ -569,19 +609,32 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
break;
case R_PARISC_PCREL17F:
/* 17-bit PC relative address */
- val = get_stub(me, val, addend, ELF_STUB_GOT, in_init(me, loc));
+ /* calculate direct call offset */
+ val += addend;
val = (val - dot - 8)/4;
- CHECK_RELOC(val, 17)
+ if (!RELOC_REACHABLE(val, 17)) {
+ /* direct distance too far, create
+ * stub entry instead */
+ val = get_stub(me, sym->st_value, addend,
+ ELF_STUB_DIRECT, loc0, targetsec);
+ val = (val - dot - 8)/4;
+ CHECK_RELOC(val, 17);
+ }
*loc = (*loc & ~0x1f1ffd) | reassemble_17(val);
break;
case R_PARISC_PCREL22F:
/* 22-bit PC relative address; only defined for pa20 */
- val = get_stub(me, val, addend, ELF_STUB_GOT, in_init(me, loc));
- DEBUGP("STUB FOR %s loc %lx+%lx at %lx\n",
- strtab + sym->st_name, (unsigned long)loc, addend,
- val)
+ /* calculate direct call offset */
+ val += addend;
val = (val - dot - 8)/4;
- CHECK_RELOC(val, 22);
+ if (!RELOC_REACHABLE(val, 22)) {
+ /* direct distance too far, create
+ * stub entry instead */
+ val = get_stub(me, sym->st_value, addend,
+ ELF_STUB_DIRECT, loc0, targetsec);
+ val = (val - dot - 8)/4;
+ CHECK_RELOC(val, 22);
+ }
*loc = (*loc & ~0x3ff1ffd) | reassemble_22(val);
break;
@@ -610,13 +663,17 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
Elf64_Addr val;
Elf64_Sxword addend;
Elf64_Addr dot;
+ Elf_Addr loc0;
+ unsigned int targetsec = sechdrs[relsec].sh_info;
DEBUGP("Applying relocate section %u to %u\n", relsec,
- sechdrs[relsec].sh_info);
+ targetsec);
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
/* This is where to make the change */
- loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+ loc = (void *)sechdrs[targetsec].sh_addr
+ rel[i].r_offset;
+ /* This is the start of the target section */
+ loc0 = sechdrs[targetsec].sh_addr;
/* This is the symbol it is referring to */
sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+ ELF64_R_SYM(rel[i].r_info);
@@ -672,42 +729,40 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
DEBUGP("PCREL22F Symbol %s loc %p val %lx\n",
strtab + sym->st_name,
loc, val);
+ val += addend;
/* can we reach it locally? */
- if(!in_local_section(me, (void *)val, (void *)dot)) {
-
- if (in_local(me, (void *)val))
- /* this is the case where the
- * symbol is local to the
- * module, but in a different
- * section, so stub the jump
- * in case it's more than 22
- * bits away */
- val = get_stub(me, val, addend, ELF_STUB_DIRECT,
- in_init(me, loc));
- else if (strncmp(strtab + sym->st_name, "$$", 2)
+ if (in_local(me, (void *)val)) {
+ /* this is the case where the symbol is local
+ * to the module, but in a different section,
+ * so stub the jump in case it's more than 22
+ * bits away */
+ val = (val - dot - 8)/4;
+ if (!RELOC_REACHABLE(val, 22)) {
+ /* direct distance too far, create
+ * stub entry instead */
+ val = get_stub(me, sym->st_value,
+ addend, ELF_STUB_DIRECT,
+ loc0, targetsec);
+ } else {
+ /* Ok, we can reach it directly. */
+ val = sym->st_value;
+ val += addend;
+ }
+ } else {
+ val = sym->st_value;
+ if (strncmp(strtab + sym->st_name, "$$", 2)
== 0)
val = get_stub(me, val, addend, ELF_STUB_MILLI,
- in_init(me, loc));
+ loc0, targetsec);
else
val = get_stub(me, val, addend, ELF_STUB_GOT,
- in_init(me, loc));
+ loc0, targetsec);
}
DEBUGP("STUB FOR %s loc %lx, val %lx+%lx at %lx\n",
strtab + sym->st_name, loc, sym->st_value,
addend, val);
- /* FIXME: local symbols work as long as the
- * core and init pieces aren't separated too
- * far. If this is ever broken, you will trip
- * the check below. The way to fix it would
- * be to generate local stubs to go between init
- * and core */
- if((Elf64_Sxword)(val - dot - 8) > 0x800000 -1 ||
- (Elf64_Sxword)(val - dot - 8) < -0x800000) {
- printk(KERN_ERR "Module %s, symbol %s is out of range for PCREL22F relocation\n",
- me->name, strtab + sym->st_name);
- return -ENOEXEC;
- }
val = (val - dot - 8)/4;
+ CHECK_RELOC(val, 22);
*loc = (*loc & ~0x3ff1ffd) | reassemble_22(val);
break;
case R_PARISC_DIR64:
@@ -794,12 +849,8 @@ int module_finalize(const Elf_Ehdr *hdr,
addr = (u32 *)entry->addr;
printk("INSNS: %x %x %x %x\n",
addr[0], addr[1], addr[2], addr[3]);
- printk("stubs used %ld, stubs max %ld\n"
- "init_stubs used %ld, init stubs max %ld\n"
- "got entries used %ld, gots max %ld\n"
+ printk("got entries used %ld, gots max %ld\n"
"fdescs used %ld, fdescs max %ld\n",
- me->arch.stub_count, me->arch.stub_max,
- me->arch.init_stub_count, me->arch.init_stub_max,
me->arch.got_count, me->arch.got_max,
me->arch.fdesc_count, me->arch.fdesc_max);
#endif
@@ -829,7 +880,10 @@ int module_finalize(const Elf_Ehdr *hdr,
me->name, me->arch.got_count, MAX_GOTS);
return -EINVAL;
}
-
+
+ kfree(me->arch.section);
+ me->arch.section = NULL;
+
/* no symbol table */
if(symhdr == NULL)
return 0;
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index d47f3975c9c6..80bc000523fa 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -67,21 +67,6 @@ static volatile int cpu_now_booting __read_mostly = 0; /* track which CPU is boo
static int parisc_max_cpus __read_mostly = 1;
-/* online cpus are ones that we've managed to bring up completely
- * possible cpus are all valid cpu
- * present cpus are all detected cpu
- *
- * On startup we bring up the "possible" cpus. Since we discover
- * CPUs later, we add them as hotplug, so the possible cpu mask is
- * empty in the beginning.
- */
-
-cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; /* Bitmap of online CPUs */
-cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; /* Bitmap of Present CPUs */
-
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_possible_map);
-
DEFINE_PER_CPU(spinlock_t, ipi_lock) = SPIN_LOCK_UNLOCKED;
enum ipi_message_type {
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
new file mode 100644
index 000000000000..9b198d1b3b2b
--- /dev/null
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -0,0 +1,80 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __ASM_PPC_DISASSEMBLE_H__
+#define __ASM_PPC_DISASSEMBLE_H__
+
+#include <linux/types.h>
+
+static inline unsigned int get_op(u32 inst)
+{
+ return inst >> 26;
+}
+
+static inline unsigned int get_xop(u32 inst)
+{
+ return (inst >> 1) & 0x3ff;
+}
+
+static inline unsigned int get_sprn(u32 inst)
+{
+ return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_dcrn(u32 inst)
+{
+ return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_rt(u32 inst)
+{
+ return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_rs(u32 inst)
+{
+ return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_ra(u32 inst)
+{
+ return (inst >> 16) & 0x1f;
+}
+
+static inline unsigned int get_rb(u32 inst)
+{
+ return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_rc(u32 inst)
+{
+ return inst & 0x1;
+}
+
+static inline unsigned int get_ws(u32 inst)
+{
+ return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_d(u32 inst)
+{
+ return inst & 0xffff;
+}
+
+#endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
new file mode 100644
index 000000000000..f49031b632ca
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -0,0 +1,61 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __ASM_44X_H__
+#define __ASM_44X_H__
+
+#include <linux/kvm_host.h>
+
+#define PPC44x_TLB_SIZE 64
+
+/* If the guest is expecting it, this can be as large as we like; we'd just
+ * need to find some way of advertising it. */
+#define KVM44x_GUEST_TLB_SIZE 64
+
+struct kvmppc_44x_shadow_ref {
+ struct page *page;
+ u16 gtlb_index;
+ u8 writeable;
+ u8 tid;
+};
+
+struct kvmppc_vcpu_44x {
+ /* Unmodified copy of the guest's TLB. */
+ struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
+
+ /* References to guest pages in the hardware TLB. */
+ struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
+
+ /* State of the shadow TLB at guest context switch time. */
+ struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
+ u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
+
+ struct kvm_vcpu vcpu;
+};
+
+static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
+{
+ return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
+}
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
+void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
+void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
+
+#endif /* __ASM_44X_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 34b52b7180cd..c1e436fe7738 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -64,27 +64,58 @@ struct kvm_vcpu_stat {
u32 halt_wakeup;
};
-struct tlbe {
+struct kvmppc_44x_tlbe {
u32 tid; /* Only the low 8 bits are used. */
u32 word0;
u32 word1;
u32 word2;
};
-struct kvm_arch {
+enum kvm_exit_types {
+ MMIO_EXITS,
+ DCR_EXITS,
+ SIGNAL_EXITS,
+ ITLB_REAL_MISS_EXITS,
+ ITLB_VIRT_MISS_EXITS,
+ DTLB_REAL_MISS_EXITS,
+ DTLB_VIRT_MISS_EXITS,
+ SYSCALL_EXITS,
+ ISI_EXITS,
+ DSI_EXITS,
+ EMULATED_INST_EXITS,
+ EMULATED_MTMSRWE_EXITS,
+ EMULATED_WRTEE_EXITS,
+ EMULATED_MTSPR_EXITS,
+ EMULATED_MFSPR_EXITS,
+ EMULATED_MTMSR_EXITS,
+ EMULATED_MFMSR_EXITS,
+ EMULATED_TLBSX_EXITS,
+ EMULATED_TLBWE_EXITS,
+ EMULATED_RFI_EXITS,
+ DEC_EXITS,
+ EXT_INTR_EXITS,
+ HALT_WAKEUP,
+ USR_PR_INST,
+ FP_UNAVAIL,
+ DEBUG_EXITS,
+ TIMEINGUEST,
+ __NUMBER_OF_KVM_EXIT_TYPES
};
-struct kvm_vcpu_arch {
- /* Unmodified copy of the guest's TLB. */
- struct tlbe guest_tlb[PPC44x_TLB_SIZE];
- /* TLB that's actually used when the guest is running. */
- struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
- /* Pages which are referenced in the shadow TLB. */
- struct page *shadow_pages[PPC44x_TLB_SIZE];
+/* allow access to big endian 32bit upper/lower parts and 64bit var */
+struct kvmppc_exit_timing {
+ union {
+ u64 tv64;
+ struct {
+ u32 tbu, tbl;
+ } tv32;
+ };
+};
- /* Track which TLB entries we've modified in the current exit. */
- u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
+struct kvm_arch {
+};
+struct kvm_vcpu_arch {
u32 host_stack;
u32 host_pid;
u32 host_dbcr0;
@@ -94,32 +125,32 @@ struct kvm_vcpu_arch {
u32 host_msr;
u64 fpr[32];
- u32 gpr[32];
+ ulong gpr[32];
- u32 pc;
+ ulong pc;
u32 cr;
- u32 ctr;
- u32 lr;
- u32 xer;
+ ulong ctr;
+ ulong lr;
+ ulong xer;
- u32 msr;
+ ulong msr;
u32 mmucr;
- u32 sprg0;
- u32 sprg1;
- u32 sprg2;
- u32 sprg3;
- u32 sprg4;
- u32 sprg5;
- u32 sprg6;
- u32 sprg7;
- u32 srr0;
- u32 srr1;
- u32 csrr0;
- u32 csrr1;
- u32 dsrr0;
- u32 dsrr1;
- u32 dear;
- u32 esr;
+ ulong sprg0;
+ ulong sprg1;
+ ulong sprg2;
+ ulong sprg3;
+ ulong sprg4;
+ ulong sprg5;
+ ulong sprg6;
+ ulong sprg7;
+ ulong srr0;
+ ulong srr1;
+ ulong csrr0;
+ ulong csrr1;
+ ulong dsrr0;
+ ulong dsrr1;
+ ulong dear;
+ ulong esr;
u32 dec;
u32 decar;
u32 tbl;
@@ -127,7 +158,7 @@ struct kvm_vcpu_arch {
u32 tcr;
u32 tsr;
u32 ivor[16];
- u32 ivpr;
+ ulong ivpr;
u32 pir;
u32 shadow_pid;
@@ -140,9 +171,22 @@ struct kvm_vcpu_arch {
u32 dbcr0;
u32 dbcr1;
+#ifdef CONFIG_KVM_EXIT_TIMING
+ struct kvmppc_exit_timing timing_exit;
+ struct kvmppc_exit_timing timing_last_enter;
+ u32 last_exit_type;
+ u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
+ u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+ u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+ u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+ u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
+ u64 timing_last_exit;
+ struct dentry *debugfs_exit_timing;
+#endif
+
u32 last_inst;
- u32 fault_dear;
- u32 fault_esr;
+ ulong fault_dear;
+ ulong fault_esr;
gpa_t paddr_accessed;
u8 io_gpr; /* GPR used as IO source/target */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index bb62ad876de3..36d2a50a8487 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -29,11 +29,6 @@
#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
-struct kvm_tlb {
- struct tlbe guest_tlb[PPC44x_TLB_SIZE];
- struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
-};
-
enum emulation_result {
EMULATE_DONE, /* no further processing */
EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */
@@ -41,9 +36,6 @@ enum emulation_result {
EMULATE_FAIL, /* can't emulate this instruction */
};
-extern const unsigned char exception_priority[];
-extern const unsigned char priority_exception[];
-
extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
extern char kvmppc_handlers_start[];
extern unsigned long kvmppc_handler_len;
@@ -58,51 +50,44 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
extern int kvmppc_emulate_instruction(struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
-extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn,
- u64 asid, u32 flags);
-extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
- gva_t eend, u32 asid);
+extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
+ u64 asid, u32 flags, u32 max_bytes,
+ unsigned int gtlb_idx);
extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
-/* XXX Book E specific */
-extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i);
-
-extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu);
-
-static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception)
-{
- unsigned int priority = exception_priority[exception];
- set_bit(priority, &vcpu->arch.pending_exceptions);
-}
-
-static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception)
-{
- unsigned int priority = exception_priority[exception];
- clear_bit(priority, &vcpu->arch.pending_exceptions);
-}
-
-/* Helper function for "full" MSR writes. No need to call this if only EE is
- * changing. */
-static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
-{
- if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
- kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
-
- vcpu->arch.msr = new_msr;
-
- if (vcpu->arch.msr & MSR_WE)
- kvm_vcpu_block(vcpu);
-}
-
-static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
-{
- if (vcpu->arch.pid != new_pid) {
- vcpu->arch.pid = new_pid;
- vcpu->arch.swap_pid = 1;
- }
-}
+/* Core-specific hooks */
+
+extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
+ unsigned int id);
+extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_check_processor_compat(void);
+extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr);
+
+extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
+
+extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
+
+extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+ struct kvm_interrupt *irq);
+
+extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int op, int *advance);
+extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
+extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
+
+extern int kvmppc_booke_init(void);
+extern void kvmppc_booke_exit(void);
extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index 8a97cfb08b7e..27cc6fdcd3b7 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -56,6 +56,7 @@
#ifndef __ASSEMBLY__
extern unsigned int tlb_44x_hwater;
+extern unsigned int tlb_44x_index;
typedef struct {
unsigned int id;
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index c32da6f97999..375258559ae6 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -22,11 +22,11 @@ static inline cpumask_t node_to_cpumask(int node)
return numa_cpumask_lookup_table[node];
}
+#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
+
static inline int node_to_first_cpu(int node)
{
- cpumask_t tmp;
- tmp = node_to_cpumask(node);
- return first_cpu(tmp);
+ return cpumask_first(cpumask_of_node(node));
}
int of_node_to_nid(struct device_node *device);
@@ -46,9 +46,12 @@ static inline int pcibus_to_node(struct pci_bus *bus)
node_to_cpumask(pcibus_to_node(bus)) \
)
+#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
+ cpu_all_mask : \
+ cpumask_of_node(pcibus_to_node(bus)))
+
/* sched_domains SD_NODE_INIT for PPC64 machines */
#define SD_NODE_INIT (struct sched_domain) { \
- .span = CPU_MASK_NONE, \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
@@ -109,6 +112,8 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev,
#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
+#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu))
#define topology_core_id(cpu) (cpu_to_core_id(cpu))
#endif
#endif
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 661d07d2146b..9937fe44555f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -23,9 +23,6 @@
#include <linux/mm.h>
#include <linux/suspend.h>
#include <linux/hrtimer.h>
-#ifdef CONFIG_KVM
-#include <linux/kvm_host.h>
-#endif
#ifdef CONFIG_PPC64
#include <linux/time.h>
#include <linux/hardirq.h>
@@ -51,6 +48,9 @@
#ifdef CONFIG_PPC_ISERIES
#include <asm/iseries/alpaca.h>
#endif
+#ifdef CONFIG_KVM
+#include <asm/kvm_44x.h>
+#endif
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
#include "head_booke.h"
@@ -357,12 +357,10 @@ int main(void)
DEFINE(PTE_SIZE, sizeof(pte_t));
#ifdef CONFIG_KVM
- DEFINE(TLBE_BYTES, sizeof(struct tlbe));
+ DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
- DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
- DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
@@ -385,5 +383,16 @@ int main(void)
DEFINE(PTE_T_LOG2, PTE_T_LOG2);
#endif
+#ifdef CONFIG_KVM_EXIT_TIMING
+ DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
+ arch.timing_exit.tv32.tbu));
+ DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
+ arch.timing_exit.tv32.tbl));
+ DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
+ arch.timing_last_enter.tv32.tbu));
+ DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
+ arch.timing_last_enter.tv32.tbl));
+#endif
+
return 0;
}
diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c
index 4c85b8d56478..688b329800bd 100644
--- a/arch/powerpc/kernel/init_task.c
+++ b/arch/powerpc/kernel/init_task.c
@@ -7,7 +7,6 @@
#include <linux/mqueue.h>
#include <asm/uaccess.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ac222d0ab12e..23b8b5e36f98 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -237,7 +237,7 @@ void fixup_irqs(cpumask_t map)
mask = map;
}
if (irq_desc[irq].chip->set_affinity)
- irq_desc[irq].chip->set_affinity(irq, mask);
+ irq_desc[irq].chip->set_affinity(irq, &mask);
else if (irq_desc[irq].action && !(warned++))
printk("Cannot set affinity for irq %i\n", irq);
}
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 51b201ddf9a1..fb7049c054c0 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -33,6 +33,7 @@
#include <linux/mqueue.h>
#include <linux/hardirq.h>
#include <linux/utsname.h>
+#include <linux/kernel_stat.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8ac3f721d235..65484b2200b3 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -59,13 +59,9 @@
struct thread_info *secondary_ti;
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-cpumask_t cpu_online_map = CPU_MASK_NONE;
DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_possible_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e1f3a5140429..c9564031a2a9 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -256,8 +256,10 @@ void account_system_vtime(struct task_struct *tsk)
delta += sys_time;
get_paca()->system_time = 0;
}
- account_system_time(tsk, 0, delta);
- account_system_time_scaled(tsk, deltascaled);
+ if (in_irq() || idle_task(smp_processor_id()) != tsk)
+ account_system_time(tsk, 0, delta, deltascaled);
+ else
+ account_idle_time(delta);
per_cpu(cputime_last_delta, smp_processor_id()) = delta;
per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled;
local_irq_restore(flags);
@@ -275,10 +277,8 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
utime = get_paca()->user_time;
get_paca()->user_time = 0;
- account_user_time(tsk, utime);
-
utimescaled = cputime_to_scaled(utime);
- account_user_time_scaled(tsk, utimescaled);
+ account_user_time(tsk, utime, utimescaled);
}
/*
@@ -338,8 +338,12 @@ void calculate_steal_time(void)
tb = mftb();
purr = mfspr(SPRN_PURR);
stolen = (tb - pme->tb) - (purr - pme->purr);
- if (stolen > 0)
- account_steal_time(current, stolen);
+ if (stolen > 0) {
+ if (idle_task(smp_processor_id()) != current)
+ account_steal_time(stolen);
+ else
+ account_idle_time(stolen);
+ }
pme->tb = tb;
pme->purr = purr;
}
@@ -844,7 +848,7 @@ static void register_decrementer_clockevent(int cpu)
struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
*dec = decrementer_clockevent;
- dec->cpumask = cpumask_of_cpu(cpu);
+ dec->cpumask = cpumask_of(cpu);
printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n",
dec->name, dec->mult, dec->shift, cpu);
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
new file mode 100644
index 000000000000..a66bec57265a
--- /dev/null
+++ b/arch/powerpc/kvm/44x.c
@@ -0,0 +1,228 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/tlbflush.h>
+#include <asm/kvm_44x.h>
+#include <asm/kvm_ppc.h>
+
+#include "44x_tlb.h"
+
+/* Note: clearing MSR[DE] just means that the debug interrupt will not be
+ * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
+ * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
+ * will be delivered as an "imprecise debug event" (which is indicated by
+ * DBSR[IDE].
+ */
+static void kvm44x_disable_debug_interrupts(void)
+{
+ mtmsr(mfmsr() & ~MSR_DE);
+}
+
+void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
+{
+ kvm44x_disable_debug_interrupts();
+
+ mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
+ mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
+ mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
+ mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
+ mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
+ mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
+ mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
+ mtmsr(vcpu->arch.host_msr);
+}
+
+void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
+{
+ struct kvm_guest_debug *dbg = &vcpu->guest_debug;
+ u32 dbcr0 = 0;
+
+ vcpu->arch.host_msr = mfmsr();
+ kvm44x_disable_debug_interrupts();
+
+ /* Save host debug register state. */
+ vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
+ vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
+ vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
+ vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
+ vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
+ vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
+ vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
+
+ /* set registers up for guest */
+
+ if (dbg->bp[0]) {
+ mtspr(SPRN_IAC1, dbg->bp[0]);
+ dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
+ }
+ if (dbg->bp[1]) {
+ mtspr(SPRN_IAC2, dbg->bp[1]);
+ dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
+ }
+ if (dbg->bp[2]) {
+ mtspr(SPRN_IAC3, dbg->bp[2]);
+ dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
+ }
+ if (dbg->bp[3]) {
+ mtspr(SPRN_IAC4, dbg->bp[3]);
+ dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
+ }
+
+ mtspr(SPRN_DBCR0, dbcr0);
+ mtspr(SPRN_DBCR1, 0);
+ mtspr(SPRN_DBCR2, 0);
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ kvmppc_44x_tlb_load(vcpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ kvmppc_44x_tlb_put(vcpu);
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+ int r;
+
+ if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
+ r = 0;
+ else
+ r = -ENOTSUPP;
+
+ return r;
+}
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
+ int i;
+
+ tlbe->tid = 0;
+ tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
+ tlbe->word1 = 0;
+ tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
+
+ tlbe++;
+ tlbe->tid = 0;
+ tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
+ tlbe->word1 = 0xef600000;
+ tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
+ | PPC44x_TLB_I | PPC44x_TLB_G;
+
+ /* Since the guest can directly access the timebase, it must know the
+ * real timebase frequency. Accordingly, it must see the state of
+ * CCR1[TCS]. */
+ vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
+
+ for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
+ vcpu_44x->shadow_refs[i].gtlb_index = -1;
+
+ return 0;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *gtlbe;
+ int index;
+ gva_t eaddr;
+ u8 pid;
+ u8 as;
+
+ eaddr = tr->linear_address;
+ pid = (tr->linear_address >> 32) & 0xff;
+ as = (tr->linear_address >> 40) & 0x1;
+
+ index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
+ if (index == -1) {
+ tr->valid = 0;
+ return 0;
+ }
+
+ gtlbe = &vcpu_44x->guest_tlb[index];
+
+ tr->physical_address = tlb_xlate(gtlbe, eaddr);
+ /* XXX what does "writeable" and "usermode" even mean? */
+ tr->valid = 1;
+
+ return 0;
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x;
+ struct kvm_vcpu *vcpu;
+ int err;
+
+ vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu_44x) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ vcpu = &vcpu_44x->vcpu;
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
+ return vcpu;
+
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
+out:
+ return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
+}
+
+static int kvmppc_44x_init(void)
+{
+ int r;
+
+ r = kvmppc_booke_init();
+ if (r)
+ return r;
+
+ return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
+}
+
+static void kvmppc_44x_exit(void)
+{
+ kvmppc_booke_exit();
+}
+
+module_init(kvmppc_44x_init);
+module_exit(kvmppc_44x_exit);
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
new file mode 100644
index 000000000000..82489a743a6f
--- /dev/null
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -0,0 +1,371 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <asm/kvm_ppc.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include <asm/disassemble.h>
+#include <asm/kvm_44x.h>
+#include "timing.h"
+
+#include "booke.h"
+#include "44x_tlb.h"
+
+#define OP_RFI 19
+
+#define XOP_RFI 50
+#define XOP_MFMSR 83
+#define XOP_WRTEE 131
+#define XOP_MTMSR 146
+#define XOP_WRTEEI 163
+#define XOP_MFDCR 323
+#define XOP_MTDCR 451
+#define XOP_TLBSX 914
+#define XOP_ICCCI 966
+#define XOP_TLBWE 978
+
+static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pc = vcpu->arch.srr0;
+ kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+}
+
+int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
+{
+ int emulated = EMULATE_DONE;
+ int dcrn;
+ int ra;
+ int rb;
+ int rc;
+ int rs;
+ int rt;
+ int ws;
+
+ switch (get_op(inst)) {
+ case OP_RFI:
+ switch (get_xop(inst)) {
+ case XOP_RFI:
+ kvmppc_emul_rfi(vcpu);
+ kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
+ *advance = 0;
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ break;
+ }
+ break;
+
+ case 31:
+ switch (get_xop(inst)) {
+
+ case XOP_MFMSR:
+ rt = get_rt(inst);
+ vcpu->arch.gpr[rt] = vcpu->arch.msr;
+ kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
+ break;
+
+ case XOP_MTMSR:
+ rs = get_rs(inst);
+ kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
+ kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+ break;
+
+ case XOP_WRTEE:
+ rs = get_rs(inst);
+ vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+ | (vcpu->arch.gpr[rs] & MSR_EE);
+ kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+ break;
+
+ case XOP_WRTEEI:
+ vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+ | (inst & MSR_EE);
+ kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+ break;
+
+ case XOP_MFDCR:
+ dcrn = get_dcrn(inst);
+ rt = get_rt(inst);
+
+ /* The guest may access CPR0 registers to determine the timebase
+ * frequency, and it must know the real host frequency because it
+ * can directly access the timebase registers.
+ *
+ * It would be possible to emulate those accesses in userspace,
+ * but userspace can really only figure out the end frequency.
+ * We could decompose that into the factors that compute it, but
+ * that's tricky math, and it's easier to just report the real
+ * CPR0 values.
+ */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
+ break;
+ case DCRN_CPR0_CONFIG_DATA:
+ local_irq_disable();
+ mtdcr(DCRN_CPR0_CONFIG_ADDR,
+ vcpu->arch.cpr0_cfgaddr);
+ vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
+ local_irq_enable();
+ break;
+ default:
+ run->dcr.dcrn = dcrn;
+ run->dcr.data = 0;
+ run->dcr.is_write = 0;
+ vcpu->arch.io_gpr = rt;
+ vcpu->arch.dcr_needed = 1;
+ kvmppc_account_exit(vcpu, DCR_EXITS);
+ emulated = EMULATE_DO_DCR;
+ }
+
+ break;
+
+ case XOP_MTDCR:
+ dcrn = get_dcrn(inst);
+ rs = get_rs(inst);
+
+ /* emulate some access in kernel */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
+ break;
+ default:
+ run->dcr.dcrn = dcrn;
+ run->dcr.data = vcpu->arch.gpr[rs];
+ run->dcr.is_write = 1;
+ vcpu->arch.dcr_needed = 1;
+ kvmppc_account_exit(vcpu, DCR_EXITS);
+ emulated = EMULATE_DO_DCR;
+ }
+
+ break;
+
+ case XOP_TLBWE:
+ ra = get_ra(inst);
+ rs = get_rs(inst);
+ ws = get_ws(inst);
+ emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
+ break;
+
+ case XOP_TLBSX:
+ rt = get_rt(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+ rc = get_rc(inst);
+ emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
+ break;
+
+ case XOP_ICCCI:
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ return emulated;
+}
+
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+ switch (sprn) {
+ case SPRN_MMUCR:
+ vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
+ case SPRN_PID:
+ kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
+ case SPRN_CCR0:
+ vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_CCR1:
+ vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
+ case SPRN_DEAR:
+ vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+ case SPRN_ESR:
+ vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+ case SPRN_DBCR0:
+ vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_DBCR1:
+ vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
+ case SPRN_TSR:
+ vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+ case SPRN_TCR:
+ vcpu->arch.tcr = vcpu->arch.gpr[rs];
+ kvmppc_emulate_dec(vcpu);
+ break;
+
+ /* Note: SPRG4-7 are user-readable. These values are
+ * loaded into the real SPRGs when resuming the
+ * guest. */
+ case SPRN_SPRG4:
+ vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG5:
+ vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG6:
+ vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG7:
+ vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+
+ case SPRN_IVPR:
+ vcpu->arch.ivpr = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR0:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR1:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR2:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR3:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR4:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR5:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR6:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR7:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR8:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR9:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR10:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR11:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR12:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR13:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR14:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR15:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
+ break;
+
+ default:
+ return EMULATE_FAIL;
+ }
+
+ kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
+ return EMULATE_DONE;
+}
+
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+ switch (sprn) {
+ /* 440 */
+ case SPRN_MMUCR:
+ vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
+ case SPRN_CCR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
+ case SPRN_CCR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
+
+ /* Book E */
+ case SPRN_PID:
+ vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
+ case SPRN_IVPR:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+ case SPRN_DEAR:
+ vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+ case SPRN_ESR:
+ vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+ case SPRN_DBCR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+ case SPRN_DBCR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
+
+ case SPRN_IVOR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+ break;
+ case SPRN_IVOR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+ break;
+ case SPRN_IVOR2:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+ break;
+ case SPRN_IVOR3:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+ break;
+ case SPRN_IVOR4:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+ break;
+ case SPRN_IVOR5:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+ break;
+ case SPRN_IVOR6:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+ break;
+ case SPRN_IVOR7:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+ break;
+ case SPRN_IVOR8:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+ break;
+ case SPRN_IVOR9:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+ break;
+ case SPRN_IVOR10:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+ break;
+ case SPRN_IVOR11:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+ break;
+ case SPRN_IVOR12:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+ break;
+ case SPRN_IVOR13:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+ break;
+ case SPRN_IVOR14:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+ break;
+ case SPRN_IVOR15:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+ break;
+
+ default:
+ return EMULATE_FAIL;
+ }
+
+ kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
+ return EMULATE_DONE;
+}
+
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ad72c6f9811f..9a34b8edb9e2 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -22,20 +22,103 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/highmem.h>
+
+#include <asm/tlbflush.h>
#include <asm/mmu-44x.h>
#include <asm/kvm_ppc.h>
+#include <asm/kvm_44x.h>
+#include "timing.h"
#include "44x_tlb.h"
+#ifndef PPC44x_TLBE_SIZE
+#define PPC44x_TLBE_SIZE PPC44x_TLB_4K
+#endif
+
+#define PAGE_SIZE_4K (1<<12)
+#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
+
+#define PPC44x_TLB_UATTR_MASK \
+ (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
-static unsigned int kvmppc_tlb_44x_pos;
+#ifdef DEBUG
+void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_44x_tlbe *tlbe;
+ int i;
+
+ printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
+ printk("| %2s | %3s | %8s | %8s | %8s |\n",
+ "nr", "tid", "word0", "word1", "word2");
+
+ for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
+ tlbe = &vcpu_44x->guest_tlb[i];
+ if (tlbe->word0 & PPC44x_TLB_VALID)
+ printk(" G%2d | %02X | %08X | %08X | %08X |\n",
+ i, tlbe->tid, tlbe->word0, tlbe->word1,
+ tlbe->word2);
+ }
+}
+#endif
+
+static inline void kvmppc_44x_tlbie(unsigned int index)
+{
+ /* 0 <= index < 64, so the V bit is clear and we can use the index as
+ * word0. */
+ asm volatile(
+ "tlbwe %[index], %[index], 0\n"
+ :
+ : [index] "r"(index)
+ );
+}
+
+static inline void kvmppc_44x_tlbre(unsigned int index,
+ struct kvmppc_44x_tlbe *tlbe)
+{
+ asm volatile(
+ "tlbre %[word0], %[index], 0\n"
+ "mfspr %[tid], %[sprn_mmucr]\n"
+ "andi. %[tid], %[tid], 0xff\n"
+ "tlbre %[word1], %[index], 1\n"
+ "tlbre %[word2], %[index], 2\n"
+ : [word0] "=r"(tlbe->word0),
+ [word1] "=r"(tlbe->word1),
+ [word2] "=r"(tlbe->word2),
+ [tid] "=r"(tlbe->tid)
+ : [index] "r"(index),
+ [sprn_mmucr] "i"(SPRN_MMUCR)
+ : "cc"
+ );
+}
+
+static inline void kvmppc_44x_tlbwe(unsigned int index,
+ struct kvmppc_44x_tlbe *stlbe)
+{
+ unsigned long tmp;
+
+ asm volatile(
+ "mfspr %[tmp], %[sprn_mmucr]\n"
+ "rlwimi %[tmp], %[tid], 0, 0xff\n"
+ "mtspr %[sprn_mmucr], %[tmp]\n"
+ "tlbwe %[word0], %[index], 0\n"
+ "tlbwe %[word1], %[index], 1\n"
+ "tlbwe %[word2], %[index], 2\n"
+ : [tmp] "=&r"(tmp)
+ : [word0] "r"(stlbe->word0),
+ [word1] "r"(stlbe->word1),
+ [word2] "r"(stlbe->word2),
+ [tid] "r"(stlbe->tid),
+ [index] "r"(index),
+ [sprn_mmucr] "i"(SPRN_MMUCR)
+ );
+}
static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
{
- /* Mask off reserved bits. */
- attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK;
+ /* We only care about the guest's permission and user bits. */
+ attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK;
if (!usermode) {
/* Guest is in supervisor mode, so we need to translate guest
@@ -47,18 +130,60 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
/* Make sure host can always access this memory. */
attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
+ /* WIMGE = 0b00100 */
+ attrib |= PPC44x_TLB_M;
+
return attrib;
}
+/* Load shadow TLB back into hardware. */
+void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ int i;
+
+ for (i = 0; i <= tlb_44x_hwater; i++) {
+ struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
+
+ if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
+ kvmppc_44x_tlbwe(i, stlbe);
+ }
+}
+
+static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x,
+ unsigned int i)
+{
+ vcpu_44x->shadow_tlb_mod[i] = 1;
+}
+
+/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */
+void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ int i;
+
+ for (i = 0; i <= tlb_44x_hwater; i++) {
+ struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
+
+ if (vcpu_44x->shadow_tlb_mod[i])
+ kvmppc_44x_tlbre(i, stlbe);
+
+ if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
+ kvmppc_44x_tlbie(i);
+ }
+}
+
+
/* Search the guest TLB for a matching entry. */
int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
unsigned int as)
{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
/* XXX Replace loop with fancy data structures. */
- for (i = 0; i < PPC44x_TLB_SIZE; i++) {
- struct tlbe *tlbe = &vcpu->arch.guest_tlb[i];
+ for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
+ struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i];
unsigned int tid;
if (eaddr < get_tlb_eaddr(tlbe))
@@ -83,78 +208,89 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
return -1;
}
-struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
+int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
unsigned int as = !!(vcpu->arch.msr & MSR_IS);
- unsigned int index;
- index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
- if (index == -1)
- return NULL;
- return &vcpu->arch.guest_tlb[index];
+ return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
}
-struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
+int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
unsigned int as = !!(vcpu->arch.msr & MSR_DS);
- unsigned int index;
- index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
- if (index == -1)
- return NULL;
- return &vcpu->arch.guest_tlb[index];
+ return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
}
-static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe)
+static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
+ unsigned int stlb_index)
{
- return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
-}
+ struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index];
-static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
- unsigned int index)
-{
- struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
- struct page *page = vcpu->arch.shadow_pages[index];
+ if (!ref->page)
+ return;
- if (get_tlb_v(stlbe)) {
- if (kvmppc_44x_tlbe_is_writable(stlbe))
- kvm_release_page_dirty(page);
- else
- kvm_release_page_clean(page);
- }
+ /* Discard from the TLB. */
+ /* Note: we could actually invalidate a host mapping, if the host overwrote
+ * this TLB entry since we inserted a guest mapping. */
+ kvmppc_44x_tlbie(stlb_index);
+
+ /* Now release the page. */
+ if (ref->writeable)
+ kvm_release_page_dirty(ref->page);
+ else
+ kvm_release_page_clean(ref->page);
+
+ ref->page = NULL;
+
+ /* XXX set tlb_44x_index to stlb_index? */
+
+ KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
}
void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
for (i = 0; i <= tlb_44x_hwater; i++)
- kvmppc_44x_shadow_release(vcpu, i);
-}
-
-void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
-{
- vcpu->arch.shadow_tlb_mod[i] = 1;
+ kvmppc_44x_shadow_release(vcpu_44x, i);
}
-/* Caller must ensure that the specified guest TLB entry is safe to insert into
- * the shadow TLB. */
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
- u32 flags)
+/**
+ * kvmppc_mmu_map -- create a host mapping for guest memory
+ *
+ * If the guest wanted a larger page than the host supports, only the first
+ * host page is mapped here and the rest are demand faulted.
+ *
+ * If the guest wanted a smaller page than the host page size, we map only the
+ * guest-size page (i.e. not a full host page mapping).
+ *
+ * Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB.
+ */
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
+ u32 flags, u32 max_bytes, unsigned int gtlb_index)
{
+ struct kvmppc_44x_tlbe stlbe;
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_shadow_ref *ref;
struct page *new_page;
- struct tlbe *stlbe;
hpa_t hpaddr;
+ gfn_t gfn;
unsigned int victim;
- /* Future optimization: don't overwrite the TLB entry containing the
- * current PC (or stack?). */
- victim = kvmppc_tlb_44x_pos++;
- if (kvmppc_tlb_44x_pos > tlb_44x_hwater)
- kvmppc_tlb_44x_pos = 0;
- stlbe = &vcpu->arch.shadow_tlb[victim];
+ /* Select TLB entry to clobber. Indirectly guard against races with the TLB
+ * miss handler by disabling interrupts. */
+ local_irq_disable();
+ victim = ++tlb_44x_index;
+ if (victim > tlb_44x_hwater)
+ victim = 0;
+ tlb_44x_index = victim;
+ local_irq_enable();
/* Get reference to new page. */
+ gfn = gpaddr >> PAGE_SHIFT;
new_page = gfn_to_page(vcpu->kvm, gfn);
if (is_error_page(new_page)) {
printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
@@ -163,10 +299,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
}
hpaddr = page_to_phys(new_page);
- /* Drop reference to old page. */
- kvmppc_44x_shadow_release(vcpu, victim);
-
- vcpu->arch.shadow_pages[victim] = new_page;
+ /* Invalidate any previous shadow mappings. */
+ kvmppc_44x_shadow_release(vcpu_44x, victim);
/* XXX Make sure (va, size) doesn't overlap any other
* entries. 440x6 user manual says the result would be
@@ -174,78 +308,193 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
/* XXX what about AS? */
- stlbe->tid = !(asid & 0xff);
-
/* Force TS=1 for all guest mappings. */
- /* For now we hardcode 4KB mappings, but it will be important to
- * use host large pages in the future. */
- stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS
- | PPC44x_TLB_4K;
- stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
- stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
- vcpu->arch.msr & MSR_PR);
- kvmppc_tlbe_set_modified(vcpu, victim);
+ stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
+
+ if (max_bytes >= PAGE_SIZE) {
+ /* Guest mapping is larger than or equal to host page size. We can use
+ * a "native" host mapping. */
+ stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
+ } else {
+ /* Guest mapping is smaller than host page size. We must restrict the
+ * size of the mapping to be at most the smaller of the two, but for
+ * simplicity we fall back to a 4K mapping (this is probably what the
+ * guest is using anyways). */
+ stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
+
+ /* 'hpaddr' is a host page, which is larger than the mapping we're
+ * inserting here. To compensate, we must add the in-page offset to the
+ * sub-page. */
+ hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
+ }
- KVMTRACE_5D(STLB_WRITE, vcpu, victim,
- stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2,
- handler);
+ stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
+ stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
+ vcpu->arch.msr & MSR_PR);
+ stlbe.tid = !(asid & 0xff);
+
+ /* Keep track of the reference so we can properly release it later. */
+ ref = &vcpu_44x->shadow_refs[victim];
+ ref->page = new_page;
+ ref->gtlb_index = gtlb_index;
+ ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW);
+ ref->tid = stlbe.tid;
+
+ /* Insert shadow mapping into hardware TLB. */
+ kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
+ kvmppc_44x_tlbwe(victim, &stlbe);
+ KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
+ stlbe.word2, handler);
}
-void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
- gva_t eend, u32 asid)
+/* For a particular guest TLB entry, invalidate the corresponding host TLB
+ * mappings and release the host pages. */
+static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
+ unsigned int gtlb_index)
{
- unsigned int pid = !(asid & 0xff);
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
- /* XXX Replace loop with fancy data structures. */
- for (i = 0; i <= tlb_44x_hwater; i++) {
- struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
- unsigned int tid;
+ for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
+ struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
+ if (ref->gtlb_index == gtlb_index)
+ kvmppc_44x_shadow_release(vcpu_44x, i);
+ }
+}
- if (!get_tlb_v(stlbe))
- continue;
+void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+{
+ vcpu->arch.shadow_pid = !usermode;
+}
- if (eend < get_tlb_eaddr(stlbe))
- continue;
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ int i;
- if (eaddr > get_tlb_end(stlbe))
- continue;
+ if (unlikely(vcpu->arch.pid == new_pid))
+ return;
- tid = get_tlb_tid(stlbe);
- if (tid && (tid != pid))
- continue;
+ vcpu->arch.pid = new_pid;
- kvmppc_44x_shadow_release(vcpu, i);
- stlbe->word0 = 0;
- kvmppc_tlbe_set_modified(vcpu, i);
- KVMTRACE_5D(STLB_INVAL, vcpu, i,
- stlbe->tid, stlbe->word0, stlbe->word1,
- stlbe->word2, handler);
+ /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it
+ * can't access guest kernel mappings (TID=1). When we switch to a new
+ * guest PID, which will also use host PID=0, we must discard the old guest
+ * userspace mappings. */
+ for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
+ struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
+
+ if (ref->tid == 0)
+ kvmppc_44x_shadow_release(vcpu_44x, i);
}
}
-/* Invalidate all mappings on the privilege switch after PID has been changed.
- * The guest always runs with PID=1, so we must clear the entire TLB when
- * switching address spaces. */
-void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+ const struct kvmppc_44x_tlbe *tlbe)
{
- int i;
+ gpa_t gpa;
- if (vcpu->arch.swap_pid) {
- /* XXX Replace loop with fancy data structures. */
- for (i = 0; i <= tlb_44x_hwater; i++) {
- struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
-
- /* Future optimization: clear only userspace mappings. */
- kvmppc_44x_shadow_release(vcpu, i);
- stlbe->word0 = 0;
- kvmppc_tlbe_set_modified(vcpu, i);
- KVMTRACE_5D(STLB_INVAL, vcpu, i,
- stlbe->tid, stlbe->word0, stlbe->word1,
- stlbe->word2, handler);
- }
- vcpu->arch.swap_pid = 0;
+ if (!get_tlb_v(tlbe))
+ return 0;
+
+ /* Does it match current guest AS? */
+ /* XXX what about IS != DS? */
+ if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+ return 0;
+
+ gpa = get_tlb_raddr(tlbe);
+ if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+ /* Mapping is not for RAM. */
+ return 0;
+
+ return 1;
+}
+
+int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *tlbe;
+ unsigned int gtlb_index;
+
+ gtlb_index = vcpu->arch.gpr[ra];
+ if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
+ printk("%s: index %d\n", __func__, gtlb_index);
+ kvmppc_dump_vcpu(vcpu);
+ return EMULATE_FAIL;
}
- vcpu->arch.shadow_pid = !usermode;
+ tlbe = &vcpu_44x->guest_tlb[gtlb_index];
+
+ /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */
+ if (tlbe->word0 & PPC44x_TLB_VALID)
+ kvmppc_44x_invalidate(vcpu, gtlb_index);
+
+ switch (ws) {
+ case PPC44x_TLB_PAGEID:
+ tlbe->tid = get_mmucr_stid(vcpu);
+ tlbe->word0 = vcpu->arch.gpr[rs];
+ break;
+
+ case PPC44x_TLB_XLAT:
+ tlbe->word1 = vcpu->arch.gpr[rs];
+ break;
+
+ case PPC44x_TLB_ATTRIB:
+ tlbe->word2 = vcpu->arch.gpr[rs];
+ break;
+
+ default:
+ return EMULATE_FAIL;
+ }
+
+ if (tlbe_is_host_safe(vcpu, tlbe)) {
+ u64 asid;
+ gva_t eaddr;
+ gpa_t gpaddr;
+ u32 flags;
+ u32 bytes;
+
+ eaddr = get_tlb_eaddr(tlbe);
+ gpaddr = get_tlb_raddr(tlbe);
+
+ /* Use the advertised page size to mask effective and real addrs. */
+ bytes = get_tlb_bytes(tlbe);
+ eaddr &= ~(bytes - 1);
+ gpaddr &= ~(bytes - 1);
+
+ asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
+ flags = tlbe->word2 & 0xffff;
+
+ kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
+ }
+
+ KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
+ tlbe->word1, tlbe->word2, handler);
+
+ kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
+ return EMULATE_DONE;
+}
+
+int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
+{
+ u32 ea;
+ int gtlb_index;
+ unsigned int as = get_mmucr_sts(vcpu);
+ unsigned int pid = get_mmucr_stid(vcpu);
+
+ ea = vcpu->arch.gpr[rb];
+ if (ra)
+ ea += vcpu->arch.gpr[ra];
+
+ gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
+ if (rc) {
+ if (gtlb_index < 0)
+ vcpu->arch.cr &= ~0x20000000;
+ else
+ vcpu->arch.cr |= 0x20000000;
+ }
+ vcpu->arch.gpr[rt] = gtlb_index;
+
+ kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
+ return EMULATE_DONE;
}
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index 2ccd46b6f6b7..772191f29e62 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -25,48 +25,52 @@
extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
unsigned int pid, unsigned int as);
-extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
-extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+
+extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
+ u8 rc);
+extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
/* TLB helper functions */
-static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
{
return (tlbe->word0 >> 4) & 0xf;
}
-static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
+static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe)
{
return tlbe->word0 & 0xfffffc00;
}
-static inline gva_t get_tlb_bytes(const struct tlbe *tlbe)
+static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe)
{
unsigned int pgsize = get_tlb_size(tlbe);
return 1 << 10 << (pgsize << 1);
}
-static inline gva_t get_tlb_end(const struct tlbe *tlbe)
+static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe)
{
return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
}
-static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
+static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe)
{
u64 word1 = tlbe->word1;
return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
}
-static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe)
{
return tlbe->tid & 0xff;
}
-static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe)
{
return (tlbe->word0 >> 8) & 0x1;
}
-static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
+static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe)
{
return (tlbe->word0 >> 9) & 0x1;
}
@@ -81,7 +85,7 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
return (vcpu->arch.mmucr >> 16) & 0x1;
}
-static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr)
+static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
{
unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 53aaa66b25e5..6dbdc4817d80 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -15,27 +15,33 @@ menuconfig VIRTUALIZATION
if VIRTUALIZATION
config KVM
- bool "Kernel-based Virtual Machine (KVM) support"
- depends on 44x && EXPERIMENTAL
+ bool
select PREEMPT_NOTIFIERS
select ANON_INODES
- # We can only run on Book E hosts so far
- select KVM_BOOKE_HOST
+
+config KVM_440
+ bool "KVM support for PowerPC 440 processors"
+ depends on EXPERIMENTAL && 44x
+ select KVM
---help---
- Support hosting virtualized guest machines. You will also
- need to select one or more of the processor modules below.
+ Support running unmodified 440 guest kernels in virtual machines on
+ 440 host processors.
This module provides access to the hardware capabilities through
a character device node named /dev/kvm.
If unsure, say N.
-config KVM_BOOKE_HOST
- bool "KVM host support for Book E PowerPC processors"
- depends on KVM && 44x
+config KVM_EXIT_TIMING
+ bool "Detailed exit timing"
+ depends on KVM
---help---
- Provides host support for KVM on Book E PowerPC processors. Currently
- this works on 440 processors only.
+ Calculate elapsed time for every exit/enter cycle. A per-vcpu
+ report is available in debugfs kvm/vm#_vcpu#_timing.
+ The overhead is relatively small, however it is not recommended for
+ production environments.
+
+ If unsure, say N.
config KVM_TRACE
bool "KVM trace support"
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 2a5d4397ac4b..df7ba59e6d53 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,10 +8,16 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o)
-kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o
+kvm-objs := $(common-objs-y) powerpc.o emulate.o
+obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
obj-$(CONFIG_KVM) += kvm.o
AFLAGS_booke_interrupts.o := -I$(obj)
-kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o
-obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o
+kvm-440-objs := \
+ booke.o \
+ booke_interrupts.o \
+ 44x.o \
+ 44x_tlb.o \
+ 44x_emulate.o
+obj-$(CONFIG_KVM_440) += kvm-440.o
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke.c
index 7b2591e26bae..35485dd6927e 100644
--- a/arch/powerpc/kvm/booke_guest.c
+++ b/arch/powerpc/kvm/booke.c
@@ -24,21 +24,26 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
+
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
+#include "timing.h"
+#include <asm/cacheflush.h>
+#include <asm/kvm_44x.h>
+#include "booke.h"
#include "44x_tlb.h"
+unsigned long kvmppc_booke_handlers;
+
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
struct kvm_stats_debugfs_item debugfs_entries[] = {
- { "exits", VCPU_STAT(sum_exits) },
{ "mmio", VCPU_STAT(mmio_exits) },
{ "dcr", VCPU_STAT(dcr_exits) },
{ "sig", VCPU_STAT(signal_exits) },
- { "light", VCPU_STAT(light_exits) },
{ "itlb_r", VCPU_STAT(itlb_real_miss_exits) },
{ "itlb_v", VCPU_STAT(itlb_virt_miss_exits) },
{ "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) },
@@ -53,103 +58,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
-static const u32 interrupt_msr_mask[16] = {
- [BOOKE_INTERRUPT_CRITICAL] = MSR_ME,
- [BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
- [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME,
- [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
- [BOOKE_INTERRUPT_DEBUG] = MSR_ME,
-};
-
-const unsigned char exception_priority[] = {
- [BOOKE_INTERRUPT_DATA_STORAGE] = 0,
- [BOOKE_INTERRUPT_INST_STORAGE] = 1,
- [BOOKE_INTERRUPT_ALIGNMENT] = 2,
- [BOOKE_INTERRUPT_PROGRAM] = 3,
- [BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
- [BOOKE_INTERRUPT_SYSCALL] = 5,
- [BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
- [BOOKE_INTERRUPT_DTLB_MISS] = 7,
- [BOOKE_INTERRUPT_ITLB_MISS] = 8,
- [BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
- [BOOKE_INTERRUPT_DEBUG] = 10,
- [BOOKE_INTERRUPT_CRITICAL] = 11,
- [BOOKE_INTERRUPT_WATCHDOG] = 12,
- [BOOKE_INTERRUPT_EXTERNAL] = 13,
- [BOOKE_INTERRUPT_FIT] = 14,
- [BOOKE_INTERRUPT_DECREMENTER] = 15,
-};
-
-const unsigned char priority_exception[] = {
- BOOKE_INTERRUPT_DATA_STORAGE,
- BOOKE_INTERRUPT_INST_STORAGE,
- BOOKE_INTERRUPT_ALIGNMENT,
- BOOKE_INTERRUPT_PROGRAM,
- BOOKE_INTERRUPT_FP_UNAVAIL,
- BOOKE_INTERRUPT_SYSCALL,
- BOOKE_INTERRUPT_AP_UNAVAIL,
- BOOKE_INTERRUPT_DTLB_MISS,
- BOOKE_INTERRUPT_ITLB_MISS,
- BOOKE_INTERRUPT_MACHINE_CHECK,
- BOOKE_INTERRUPT_DEBUG,
- BOOKE_INTERRUPT_CRITICAL,
- BOOKE_INTERRUPT_WATCHDOG,
- BOOKE_INTERRUPT_EXTERNAL,
- BOOKE_INTERRUPT_FIT,
- BOOKE_INTERRUPT_DECREMENTER,
-};
-
-
-void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
-{
- struct tlbe *tlbe;
- int i;
-
- printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
- printk("| %2s | %3s | %8s | %8s | %8s |\n",
- "nr", "tid", "word0", "word1", "word2");
-
- for (i = 0; i < PPC44x_TLB_SIZE; i++) {
- tlbe = &vcpu->arch.guest_tlb[i];
- if (tlbe->word0 & PPC44x_TLB_VALID)
- printk(" G%2d | %02X | %08X | %08X | %08X |\n",
- i, tlbe->tid, tlbe->word0, tlbe->word1,
- tlbe->word2);
- }
-
- for (i = 0; i < PPC44x_TLB_SIZE; i++) {
- tlbe = &vcpu->arch.shadow_tlb[i];
- if (tlbe->word0 & PPC44x_TLB_VALID)
- printk(" S%2d | %02X | %08X | %08X | %08X |\n",
- i, tlbe->tid, tlbe->word0, tlbe->word1,
- tlbe->word2);
- }
-}
-
/* TODO: use vcpu_printf() */
void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
{
int i;
- printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr);
- printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr);
- printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1);
+ printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
+ printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
+ printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
for (i = 0; i < 32; i += 4) {
- printk("gpr%02d: %08x %08x %08x %08x\n", i,
+ printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
vcpu->arch.gpr[i],
vcpu->arch.gpr[i+1],
vcpu->arch.gpr[i+2],
@@ -157,69 +78,96 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
}
}
-/* Check if we are ready to deliver the interrupt */
-static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
+static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
+ unsigned int priority)
{
- int r;
+ set_bit(priority, &vcpu->arch.pending_exceptions);
+}
- switch (interrupt) {
- case BOOKE_INTERRUPT_CRITICAL:
- r = vcpu->arch.msr & MSR_CE;
- break;
- case BOOKE_INTERRUPT_MACHINE_CHECK:
- r = vcpu->arch.msr & MSR_ME;
- break;
- case BOOKE_INTERRUPT_EXTERNAL:
- r = vcpu->arch.msr & MSR_EE;
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
+}
+
+void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
+}
+
+int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
+{
+ return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+ struct kvm_interrupt *irq)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
+}
+
+/* Deliver the interrupt of the corresponding priority, if possible. */
+static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
+ unsigned int priority)
+{
+ int allowed = 0;
+ ulong msr_mask;
+
+ switch (priority) {
+ case BOOKE_IRQPRIO_PROGRAM:
+ case BOOKE_IRQPRIO_DTLB_MISS:
+ case BOOKE_IRQPRIO_ITLB_MISS:
+ case BOOKE_IRQPRIO_SYSCALL:
+ case BOOKE_IRQPRIO_DATA_STORAGE:
+ case BOOKE_IRQPRIO_INST_STORAGE:
+ case BOOKE_IRQPRIO_FP_UNAVAIL:
+ case BOOKE_IRQPRIO_AP_UNAVAIL:
+ case BOOKE_IRQPRIO_ALIGNMENT:
+ allowed = 1;
+ msr_mask = MSR_CE|MSR_ME|MSR_DE;
break;
- case BOOKE_INTERRUPT_DECREMENTER:
- r = vcpu->arch.msr & MSR_EE;
+ case BOOKE_IRQPRIO_CRITICAL:
+ case BOOKE_IRQPRIO_WATCHDOG:
+ allowed = vcpu->arch.msr & MSR_CE;
+ msr_mask = MSR_ME;
break;
- case BOOKE_INTERRUPT_FIT:
- r = vcpu->arch.msr & MSR_EE;
+ case BOOKE_IRQPRIO_MACHINE_CHECK:
+ allowed = vcpu->arch.msr & MSR_ME;
+ msr_mask = 0;
break;
- case BOOKE_INTERRUPT_WATCHDOG:
- r = vcpu->arch.msr & MSR_CE;
+ case BOOKE_IRQPRIO_EXTERNAL:
+ case BOOKE_IRQPRIO_DECREMENTER:
+ case BOOKE_IRQPRIO_FIT:
+ allowed = vcpu->arch.msr & MSR_EE;
+ msr_mask = MSR_CE|MSR_ME|MSR_DE;
break;
- case BOOKE_INTERRUPT_DEBUG:
- r = vcpu->arch.msr & MSR_DE;
+ case BOOKE_IRQPRIO_DEBUG:
+ allowed = vcpu->arch.msr & MSR_DE;
+ msr_mask = MSR_ME;
break;
- default:
- r = 1;
}
- return r;
-}
+ if (allowed) {
+ vcpu->arch.srr0 = vcpu->arch.pc;
+ vcpu->arch.srr1 = vcpu->arch.msr;
+ vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
+ kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
-static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
-{
- switch (interrupt) {
- case BOOKE_INTERRUPT_DECREMENTER:
- vcpu->arch.tsr |= TSR_DIS;
- break;
+ clear_bit(priority, &vcpu->arch.pending_exceptions);
}
- vcpu->arch.srr0 = vcpu->arch.pc;
- vcpu->arch.srr1 = vcpu->arch.msr;
- vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
- kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
+ return allowed;
}
/* Check pending exceptions and deliver one, if possible. */
-void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
+void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
{
unsigned long *pending = &vcpu->arch.pending_exceptions;
- unsigned int exception;
unsigned int priority;
- priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending));
+ priority = __ffs(*pending);
while (priority <= BOOKE_MAX_INTERRUPT) {
- exception = priority_exception[priority];
- if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
- kvmppc_clear_exception(vcpu, exception);
- kvmppc_deliver_interrupt(vcpu, exception);
+ if (kvmppc_booke_irqprio_deliver(vcpu, priority))
break;
- }
priority = find_next_bit(pending,
BITS_PER_BYTE * sizeof(*pending),
@@ -238,6 +186,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
enum emulation_result er;
int r = RESUME_HOST;
+ /* update before a new last_exit_type is rewritten */
+ kvmppc_update_timing_stats(vcpu);
+
local_irq_enable();
run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -251,21 +202,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOKE_INTERRUPT_EXTERNAL:
+ kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
+ if (need_resched())
+ cond_resched();
+ r = RESUME_GUEST;
+ break;
+
case BOOKE_INTERRUPT_DECREMENTER:
/* Since we switched IVPR back to the host's value, the host
* handled this interrupt the moment we enabled interrupts.
* Now we just offer it a chance to reschedule the guest. */
-
- /* XXX At this point the TLB still holds our shadow TLB, so if
- * we do reschedule the host will fault over it. Perhaps we
- * should politely restore the host's entries to minimize
- * misses before ceding control. */
+ kvmppc_account_exit(vcpu, DEC_EXITS);
if (need_resched())
cond_resched();
- if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
- vcpu->stat.dec_exits++;
- else
- vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
@@ -274,17 +223,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Program traps generated by user-level software must be handled
* by the guest kernel. */
vcpu->arch.esr = vcpu->arch.fault_esr;
- kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
r = RESUME_GUEST;
+ kvmppc_account_exit(vcpu, USR_PR_INST);
break;
}
er = kvmppc_emulate_instruction(run, vcpu);
switch (er) {
case EMULATE_DONE:
+ /* don't overwrite subtypes, just account kvm_stats */
+ kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
/* Future optimization: only reload non-volatiles if
* they were actually modified by emulation. */
- vcpu->stat.emulated_inst_exits++;
r = RESUME_GUEST_NV;
break;
case EMULATE_DO_DCR:
@@ -293,7 +244,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case EMULATE_FAIL:
/* XXX Deliver Program interrupt to guest. */
- printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n",
+ printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
__func__, vcpu->arch.pc, vcpu->arch.last_inst);
/* For debugging, encode the failing instruction and
* report it to userspace. */
@@ -307,48 +258,53 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOKE_INTERRUPT_FP_UNAVAIL:
- kvmppc_queue_exception(vcpu, exit_nr);
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
+ kvmppc_account_exit(vcpu, FP_UNAVAIL);
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_DATA_STORAGE:
vcpu->arch.dear = vcpu->arch.fault_dear;
vcpu->arch.esr = vcpu->arch.fault_esr;
- kvmppc_queue_exception(vcpu, exit_nr);
- vcpu->stat.dsi_exits++;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
+ kvmppc_account_exit(vcpu, DSI_EXITS);
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_INST_STORAGE:
vcpu->arch.esr = vcpu->arch.fault_esr;
- kvmppc_queue_exception(vcpu, exit_nr);
- vcpu->stat.isi_exits++;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
+ kvmppc_account_exit(vcpu, ISI_EXITS);
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_SYSCALL:
- kvmppc_queue_exception(vcpu, exit_nr);
- vcpu->stat.syscall_exits++;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
+ kvmppc_account_exit(vcpu, SYSCALL_EXITS);
r = RESUME_GUEST;
break;
+ /* XXX move to a 440-specific file. */
case BOOKE_INTERRUPT_DTLB_MISS: {
- struct tlbe *gtlbe;
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *gtlbe;
unsigned long eaddr = vcpu->arch.fault_dear;
+ int gtlb_index;
gfn_t gfn;
/* Check the guest TLB. */
- gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
- if (!gtlbe) {
+ gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
+ if (gtlb_index < 0) {
/* The guest didn't have a mapping for it. */
- kvmppc_queue_exception(vcpu, exit_nr);
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
vcpu->arch.dear = vcpu->arch.fault_dear;
vcpu->arch.esr = vcpu->arch.fault_esr;
- vcpu->stat.dtlb_real_miss_exits++;
+ kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
r = RESUME_GUEST;
break;
}
+ gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
@@ -359,38 +315,45 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* b) the guest used a large mapping which we're faking
* Either way, we need to satisfy the fault without
* invoking the guest. */
- kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
- gtlbe->word2);
- vcpu->stat.dtlb_virt_miss_exits++;
+ kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
+ gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
+ kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
r = RESUME_GUEST;
} else {
/* Guest has mapped and accessed a page which is not
* actually RAM. */
r = kvmppc_emulate_mmio(run, vcpu);
+ kvmppc_account_exit(vcpu, MMIO_EXITS);
}
break;
}
+ /* XXX move to a 440-specific file. */
case BOOKE_INTERRUPT_ITLB_MISS: {
- struct tlbe *gtlbe;
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *gtlbe;
unsigned long eaddr = vcpu->arch.pc;
+ gpa_t gpaddr;
gfn_t gfn;
+ int gtlb_index;
r = RESUME_GUEST;
/* Check the guest TLB. */
- gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
- if (!gtlbe) {
+ gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
+ if (gtlb_index < 0) {
/* The guest didn't have a mapping for it. */
- kvmppc_queue_exception(vcpu, exit_nr);
- vcpu->stat.itlb_real_miss_exits++;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
+ kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
break;
}
- vcpu->stat.itlb_virt_miss_exits++;
+ kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
- gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
+ gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
+ gpaddr = tlb_xlate(gtlbe, eaddr);
+ gfn = gpaddr >> PAGE_SHIFT;
if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
/* The guest TLB had a mapping, but the shadow TLB
@@ -399,12 +362,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* b) the guest used a large mapping which we're faking
* Either way, we need to satisfy the fault without
* invoking the guest. */
- kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
- gtlbe->word2);
+ kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
+ gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
} else {
/* Guest mapped and leaped at non-RAM! */
- kvmppc_queue_exception(vcpu,
- BOOKE_INTERRUPT_MACHINE_CHECK);
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
}
break;
@@ -421,6 +383,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
mtspr(SPRN_DBSR, dbsr);
run->exit_reason = KVM_EXIT_DEBUG;
+ kvmppc_account_exit(vcpu, DEBUG_EXITS);
r = RESUME_HOST;
break;
}
@@ -432,10 +395,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
local_irq_disable();
- kvmppc_check_and_deliver_interrupts(vcpu);
+ kvmppc_core_deliver_interrupts(vcpu);
- /* Do some exit accounting. */
- vcpu->stat.sum_exits++;
if (!(r & RESUME_HOST)) {
/* To avoid clobbering exit_reason, only check for signals if
* we aren't already exiting to userspace for some other
@@ -443,22 +404,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (signal_pending(current)) {
run->exit_reason = KVM_EXIT_INTR;
r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
-
- vcpu->stat.signal_exits++;
- } else {
- vcpu->stat.light_exits++;
- }
- } else {
- switch (run->exit_reason) {
- case KVM_EXIT_MMIO:
- vcpu->stat.mmio_exits++;
- break;
- case KVM_EXIT_DCR:
- vcpu->stat.dcr_exits++;
- break;
- case KVM_EXIT_INTR:
- vcpu->stat.signal_exits++;
- break;
+ kvmppc_account_exit(vcpu, SIGNAL_EXITS);
}
}
@@ -468,20 +414,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
- struct tlbe *tlbe = &vcpu->arch.guest_tlb[0];
-
- tlbe->tid = 0;
- tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
- tlbe->word1 = 0;
- tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
-
- tlbe++;
- tlbe->tid = 0;
- tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
- tlbe->word1 = 0xef600000;
- tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
- | PPC44x_TLB_I | PPC44x_TLB_G;
-
vcpu->arch.pc = 0;
vcpu->arch.msr = 0;
vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
@@ -492,12 +424,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
* before it's programmed its own IVPR. */
vcpu->arch.ivpr = 0x55550000;
- /* Since the guest can directly access the timebase, it must know the
- * real timebase frequency. Accordingly, it must see the state of
- * CCR1[TCS]. */
- vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
+ kvmppc_init_timing_stats(vcpu);
- return 0;
+ return kvmppc_core_vcpu_setup(vcpu);
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -536,7 +465,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.ctr = regs->ctr;
vcpu->arch.lr = regs->lr;
vcpu->arch.xer = regs->xer;
- vcpu->arch.msr = regs->msr;
+ kvmppc_set_msr(vcpu, regs->msr);
vcpu->arch.srr0 = regs->srr0;
vcpu->arch.srr1 = regs->srr1;
vcpu->arch.sprg0 = regs->sprg0;
@@ -575,31 +504,62 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return -ENOTSUPP;
}
-/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
- struct tlbe *gtlbe;
- int index;
- gva_t eaddr;
- u8 pid;
- u8 as;
-
- eaddr = tr->linear_address;
- pid = (tr->linear_address >> 32) & 0xff;
- as = (tr->linear_address >> 40) & 0x1;
-
- index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
- if (index == -1) {
- tr->valid = 0;
- return 0;
- }
+ return kvmppc_core_vcpu_translate(vcpu, tr);
+}
- gtlbe = &vcpu->arch.guest_tlb[index];
+int kvmppc_booke_init(void)
+{
+ unsigned long ivor[16];
+ unsigned long max_ivor = 0;
+ int i;
- tr->physical_address = tlb_xlate(gtlbe, eaddr);
- /* XXX what does "writeable" and "usermode" even mean? */
- tr->valid = 1;
+ /* We install our own exception handlers by hijacking IVPR. IVPR must
+ * be 16-bit aligned, so we need a 64KB allocation. */
+ kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ VCPU_SIZE_ORDER);
+ if (!kvmppc_booke_handlers)
+ return -ENOMEM;
+
+ /* XXX make sure our handlers are smaller than Linux's */
+
+ /* Copy our interrupt handlers to match host IVORs. That way we don't
+ * have to swap the IVORs on every guest/host transition. */
+ ivor[0] = mfspr(SPRN_IVOR0);
+ ivor[1] = mfspr(SPRN_IVOR1);
+ ivor[2] = mfspr(SPRN_IVOR2);
+ ivor[3] = mfspr(SPRN_IVOR3);
+ ivor[4] = mfspr(SPRN_IVOR4);
+ ivor[5] = mfspr(SPRN_IVOR5);
+ ivor[6] = mfspr(SPRN_IVOR6);
+ ivor[7] = mfspr(SPRN_IVOR7);
+ ivor[8] = mfspr(SPRN_IVOR8);
+ ivor[9] = mfspr(SPRN_IVOR9);
+ ivor[10] = mfspr(SPRN_IVOR10);
+ ivor[11] = mfspr(SPRN_IVOR11);
+ ivor[12] = mfspr(SPRN_IVOR12);
+ ivor[13] = mfspr(SPRN_IVOR13);
+ ivor[14] = mfspr(SPRN_IVOR14);
+ ivor[15] = mfspr(SPRN_IVOR15);
+
+ for (i = 0; i < 16; i++) {
+ if (ivor[i] > max_ivor)
+ max_ivor = ivor[i];
+
+ memcpy((void *)kvmppc_booke_handlers + ivor[i],
+ kvmppc_handlers_start + i * kvmppc_handler_len,
+ kvmppc_handler_len);
+ }
+ flush_icache_range(kvmppc_booke_handlers,
+ kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
return 0;
}
+
+void __exit kvmppc_booke_exit(void)
+{
+ free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
+ kvm_exit();
+}
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
new file mode 100644
index 000000000000..cf7c94ca24bf
--- /dev/null
+++ b/arch/powerpc/kvm/booke.h
@@ -0,0 +1,60 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __KVM_BOOKE_H__
+#define __KVM_BOOKE_H__
+
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+#include "timing.h"
+
+/* interrupt priortity ordering */
+#define BOOKE_IRQPRIO_DATA_STORAGE 0
+#define BOOKE_IRQPRIO_INST_STORAGE 1
+#define BOOKE_IRQPRIO_ALIGNMENT 2
+#define BOOKE_IRQPRIO_PROGRAM 3
+#define BOOKE_IRQPRIO_FP_UNAVAIL 4
+#define BOOKE_IRQPRIO_SYSCALL 5
+#define BOOKE_IRQPRIO_AP_UNAVAIL 6
+#define BOOKE_IRQPRIO_DTLB_MISS 7
+#define BOOKE_IRQPRIO_ITLB_MISS 8
+#define BOOKE_IRQPRIO_MACHINE_CHECK 9
+#define BOOKE_IRQPRIO_DEBUG 10
+#define BOOKE_IRQPRIO_CRITICAL 11
+#define BOOKE_IRQPRIO_WATCHDOG 12
+#define BOOKE_IRQPRIO_EXTERNAL 13
+#define BOOKE_IRQPRIO_FIT 14
+#define BOOKE_IRQPRIO_DECREMENTER 15
+
+/* Helper function for "full" MSR writes. No need to call this if only EE is
+ * changing. */
+static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
+{
+ if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
+ kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
+
+ vcpu->arch.msr = new_msr;
+
+ if (vcpu->arch.msr & MSR_WE) {
+ kvm_vcpu_block(vcpu);
+ kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
+ };
+}
+
+#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c
deleted file mode 100644
index b480341bc31e..000000000000
--- a/arch/powerpc/kvm/booke_host.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <linux/errno.h>
-#include <linux/kvm_host.h>
-#include <linux/module.h>
-#include <asm/cacheflush.h>
-#include <asm/kvm_ppc.h>
-
-unsigned long kvmppc_booke_handlers;
-
-static int kvmppc_booke_init(void)
-{
- unsigned long ivor[16];
- unsigned long max_ivor = 0;
- int i;
-
- /* We install our own exception handlers by hijacking IVPR. IVPR must
- * be 16-bit aligned, so we need a 64KB allocation. */
- kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
- VCPU_SIZE_ORDER);
- if (!kvmppc_booke_handlers)
- return -ENOMEM;
-
- /* XXX make sure our handlers are smaller than Linux's */
-
- /* Copy our interrupt handlers to match host IVORs. That way we don't
- * have to swap the IVORs on every guest/host transition. */
- ivor[0] = mfspr(SPRN_IVOR0);
- ivor[1] = mfspr(SPRN_IVOR1);
- ivor[2] = mfspr(SPRN_IVOR2);
- ivor[3] = mfspr(SPRN_IVOR3);
- ivor[4] = mfspr(SPRN_IVOR4);
- ivor[5] = mfspr(SPRN_IVOR5);
- ivor[6] = mfspr(SPRN_IVOR6);
- ivor[7] = mfspr(SPRN_IVOR7);
- ivor[8] = mfspr(SPRN_IVOR8);
- ivor[9] = mfspr(SPRN_IVOR9);
- ivor[10] = mfspr(SPRN_IVOR10);
- ivor[11] = mfspr(SPRN_IVOR11);
- ivor[12] = mfspr(SPRN_IVOR12);
- ivor[13] = mfspr(SPRN_IVOR13);
- ivor[14] = mfspr(SPRN_IVOR14);
- ivor[15] = mfspr(SPRN_IVOR15);
-
- for (i = 0; i < 16; i++) {
- if (ivor[i] > max_ivor)
- max_ivor = ivor[i];
-
- memcpy((void *)kvmppc_booke_handlers + ivor[i],
- kvmppc_handlers_start + i * kvmppc_handler_len,
- kvmppc_handler_len);
- }
- flush_icache_range(kvmppc_booke_handlers,
- kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
-
- return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
-}
-
-static void __exit kvmppc_booke_exit(void)
-{
- free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
- kvm_exit();
-}
-
-module_init(kvmppc_booke_init)
-module_exit(kvmppc_booke_exit)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 95e165baf85f..084ebcd7dd83 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -107,6 +107,18 @@ _GLOBAL(kvmppc_resume_host)
li r6, 1
slw r6, r6, r5
+#ifdef CONFIG_KVM_EXIT_TIMING
+ /* save exit time */
+1:
+ mfspr r7, SPRN_TBRU
+ mfspr r8, SPRN_TBRL
+ mfspr r9, SPRN_TBRU
+ cmpw r9, r7
+ bne 1b
+ stw r8, VCPU_TIMING_EXIT_TBL(r4)
+ stw r9, VCPU_TIMING_EXIT_TBU(r4)
+#endif
+
/* Save the faulting instruction and all GPRs for emulation. */
andi. r7, r6, NEED_INST_MASK
beq ..skip_inst_copy
@@ -335,54 +347,6 @@ lightweight_exit:
lwz r3, VCPU_SHADOW_PID(r4)
mtspr SPRN_PID, r3
- /* Prevent all asynchronous TLB updates. */
- mfmsr r5
- lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
- ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
- andc r6, r5, r6
- mtmsr r6
-
- /* Load the guest mappings, leaving the host's "pinned" kernel mappings
- * in place. */
- mfspr r10, SPRN_MMUCR /* Save host MMUCR. */
- li r5, PPC44x_TLB_SIZE
- lis r5, tlb_44x_hwater@ha
- lwz r5, tlb_44x_hwater@l(r5)
- mtctr r5
- addi r9, r4, VCPU_SHADOW_TLB
- addi r5, r4, VCPU_SHADOW_MOD
- li r3, 0
-1:
- lbzx r7, r3, r5
- cmpwi r7, 0
- beq 3f
-
- /* Load guest entry. */
- mulli r11, r3, TLBE_BYTES
- add r11, r11, r9
- lwz r7, 0(r11)
- mtspr SPRN_MMUCR, r7
- lwz r7, 4(r11)
- tlbwe r7, r3, PPC44x_TLB_PAGEID
- lwz r7, 8(r11)
- tlbwe r7, r3, PPC44x_TLB_XLAT
- lwz r7, 12(r11)
- tlbwe r7, r3, PPC44x_TLB_ATTRIB
-3:
- addi r3, r3, 1 /* Increment index. */
- bdnz 1b
-
- mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */
-
- /* Clear bitmap of modified TLB entries */
- li r5, PPC44x_TLB_SIZE>>2
- mtctr r5
- addi r5, r4, VCPU_SHADOW_MOD - 4
- li r6, 0
-1:
- stwu r6, 4(r5)
- bdnz 1b
-
iccci 0, 0 /* XXX hack */
/* Load some guest volatiles. */
@@ -423,6 +387,18 @@ lightweight_exit:
lwz r3, VCPU_SPRG7(r4)
mtspr SPRN_SPRG7, r3
+#ifdef CONFIG_KVM_EXIT_TIMING
+ /* save enter time */
+1:
+ mfspr r6, SPRN_TBRU
+ mfspr r7, SPRN_TBRL
+ mfspr r8, SPRN_TBRU
+ cmpw r8, r6
+ bne 1b
+ stw r7, VCPU_TIMING_LAST_ENTER_TBL(r4)
+ stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
+#endif
+
/* Finish loading guest volatiles and jump to guest. */
lwz r3, VCPU_CTR(r4)
mtctr r3
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 0fce4fbdc20d..d1d38daa93fb 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -23,161 +23,14 @@
#include <linux/string.h>
#include <linux/kvm_host.h>
-#include <asm/dcr.h>
-#include <asm/dcr-regs.h>
+#include <asm/reg.h>
#include <asm/time.h>
#include <asm/byteorder.h>
#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include "timing.h"
-#include "44x_tlb.h"
-
-/* Instruction decoding */
-static inline unsigned int get_op(u32 inst)
-{
- return inst >> 26;
-}
-
-static inline unsigned int get_xop(u32 inst)
-{
- return (inst >> 1) & 0x3ff;
-}
-
-static inline unsigned int get_sprn(u32 inst)
-{
- return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
-}
-
-static inline unsigned int get_dcrn(u32 inst)
-{
- return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
-}
-
-static inline unsigned int get_rt(u32 inst)
-{
- return (inst >> 21) & 0x1f;
-}
-
-static inline unsigned int get_rs(u32 inst)
-{
- return (inst >> 21) & 0x1f;
-}
-
-static inline unsigned int get_ra(u32 inst)
-{
- return (inst >> 16) & 0x1f;
-}
-
-static inline unsigned int get_rb(u32 inst)
-{
- return (inst >> 11) & 0x1f;
-}
-
-static inline unsigned int get_rc(u32 inst)
-{
- return inst & 0x1;
-}
-
-static inline unsigned int get_ws(u32 inst)
-{
- return (inst >> 11) & 0x1f;
-}
-
-static inline unsigned int get_d(u32 inst)
-{
- return inst & 0xffff;
-}
-
-static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
- const struct tlbe *tlbe)
-{
- gpa_t gpa;
-
- if (!get_tlb_v(tlbe))
- return 0;
-
- /* Does it match current guest AS? */
- /* XXX what about IS != DS? */
- if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
- return 0;
-
- gpa = get_tlb_raddr(tlbe);
- if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
- /* Mapping is not for RAM. */
- return 0;
-
- return 1;
-}
-
-static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
-{
- u64 eaddr;
- u64 raddr;
- u64 asid;
- u32 flags;
- struct tlbe *tlbe;
- unsigned int ra;
- unsigned int rs;
- unsigned int ws;
- unsigned int index;
-
- ra = get_ra(inst);
- rs = get_rs(inst);
- ws = get_ws(inst);
-
- index = vcpu->arch.gpr[ra];
- if (index > PPC44x_TLB_SIZE) {
- printk("%s: index %d\n", __func__, index);
- kvmppc_dump_vcpu(vcpu);
- return EMULATE_FAIL;
- }
-
- tlbe = &vcpu->arch.guest_tlb[index];
-
- /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
- if (tlbe->word0 & PPC44x_TLB_VALID) {
- eaddr = get_tlb_eaddr(tlbe);
- asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
- kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid);
- }
-
- switch (ws) {
- case PPC44x_TLB_PAGEID:
- tlbe->tid = vcpu->arch.mmucr & 0xff;
- tlbe->word0 = vcpu->arch.gpr[rs];
- break;
-
- case PPC44x_TLB_XLAT:
- tlbe->word1 = vcpu->arch.gpr[rs];
- break;
-
- case PPC44x_TLB_ATTRIB:
- tlbe->word2 = vcpu->arch.gpr[rs];
- break;
-
- default:
- return EMULATE_FAIL;
- }
-
- if (tlbe_is_host_safe(vcpu, tlbe)) {
- eaddr = get_tlb_eaddr(tlbe);
- raddr = get_tlb_raddr(tlbe);
- asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
- flags = tlbe->word2 & 0xffff;
-
- /* Create a 4KB mapping on the host. If the guest wanted a
- * large page, only the first 4KB is mapped here and the rest
- * are mapped on the fly. */
- kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
- }
-
- KVMTRACE_5D(GTLB_WRITE, vcpu, index,
- tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2,
- handler);
-
- return EMULATE_DONE;
-}
-
-static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
+void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.tcr & TCR_DIE) {
/* The decrementer ticks at the same rate as the timebase, so
@@ -193,12 +46,6 @@ static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
}
}
-static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.pc = vcpu->arch.srr0;
- kvmppc_set_msr(vcpu, vcpu->arch.srr1);
-}
-
/* XXX to do:
* lhax
* lhaux
@@ -213,40 +60,30 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
*
* XXX is_bigendian should depend on MMU mapping or MSR[LE]
*/
+/* XXX Should probably auto-generate instruction decoding for a particular core
+ * from opcode tables in the future. */
int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
u32 inst = vcpu->arch.last_inst;
u32 ea;
int ra;
int rb;
- int rc;
int rs;
int rt;
int sprn;
- int dcrn;
enum emulation_result emulated = EMULATE_DONE;
int advance = 1;
+ /* this default type might be overwritten by subcategories */
+ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
+
switch (get_op(inst)) {
- case 3: /* trap */
- printk("trap!\n");
- kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+ case 3: /* trap */
+ vcpu->arch.esr |= ESR_PTR;
+ kvmppc_core_queue_program(vcpu);
advance = 0;
break;
- case 19:
- switch (get_xop(inst)) {
- case 50: /* rfi */
- kvmppc_emul_rfi(vcpu);
- advance = 0;
- break;
-
- default:
- emulated = EMULATE_FAIL;
- break;
- }
- break;
-
case 31:
switch (get_xop(inst)) {
@@ -255,27 +92,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
- case 83: /* mfmsr */
- rt = get_rt(inst);
- vcpu->arch.gpr[rt] = vcpu->arch.msr;
- break;
-
case 87: /* lbzx */
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
- case 131: /* wrtee */
- rs = get_rs(inst);
- vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
- | (vcpu->arch.gpr[rs] & MSR_EE);
- break;
-
- case 146: /* mtmsr */
- rs = get_rs(inst);
- kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
- break;
-
case 151: /* stwx */
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
@@ -283,11 +104,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
4, 1);
break;
- case 163: /* wrteei */
- vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
- | (inst & MSR_EE);
- break;
-
case 215: /* stbx */
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
@@ -328,42 +144,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = ea;
break;
- case 323: /* mfdcr */
- dcrn = get_dcrn(inst);
- rt = get_rt(inst);
-
- /* The guest may access CPR0 registers to determine the timebase
- * frequency, and it must know the real host frequency because it
- * can directly access the timebase registers.
- *
- * It would be possible to emulate those accesses in userspace,
- * but userspace can really only figure out the end frequency.
- * We could decompose that into the factors that compute it, but
- * that's tricky math, and it's easier to just report the real
- * CPR0 values.
- */
- switch (dcrn) {
- case DCRN_CPR0_CONFIG_ADDR:
- vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
- break;
- case DCRN_CPR0_CONFIG_DATA:
- local_irq_disable();
- mtdcr(DCRN_CPR0_CONFIG_ADDR,
- vcpu->arch.cpr0_cfgaddr);
- vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
- local_irq_enable();
- break;
- default:
- run->dcr.dcrn = dcrn;
- run->dcr.data = 0;
- run->dcr.is_write = 0;
- vcpu->arch.io_gpr = rt;
- vcpu->arch.dcr_needed = 1;
- emulated = EMULATE_DO_DCR;
- }
-
- break;
-
case 339: /* mfspr */
sprn = get_sprn(inst);
rt = get_rt(inst);
@@ -373,26 +153,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
case SPRN_SRR1:
vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
- case SPRN_MMUCR:
- vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
- case SPRN_PID:
- vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
- case SPRN_IVPR:
- vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
- case SPRN_CCR0:
- vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
- case SPRN_CCR1:
- vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
case SPRN_PVR:
vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
- case SPRN_DEAR:
- vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
- case SPRN_ESR:
- vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
- case SPRN_DBCR0:
- vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
- case SPRN_DBCR1:
- vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
/* Note: mftb and TBRL/TBWL are user-accessible, so
* the guest can always access the real TB anyways.
@@ -413,42 +175,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
/* Note: SPRG4-7 are user-readable, so we don't get
* a trap. */
- case SPRN_IVOR0:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
- case SPRN_IVOR1:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
- case SPRN_IVOR2:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
- case SPRN_IVOR3:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
- case SPRN_IVOR4:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
- case SPRN_IVOR5:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
- case SPRN_IVOR6:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
- case SPRN_IVOR7:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
- case SPRN_IVOR8:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
- case SPRN_IVOR9:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
- case SPRN_IVOR10:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
- case SPRN_IVOR11:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
- case SPRN_IVOR12:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
- case SPRN_IVOR13:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
- case SPRN_IVOR14:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
- case SPRN_IVOR15:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
-
default:
- printk("mfspr: unknown spr %x\n", sprn);
- vcpu->arch.gpr[rt] = 0;
+ emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
+ if (emulated == EMULATE_FAIL) {
+ printk("mfspr: unknown spr %x\n", sprn);
+ vcpu->arch.gpr[rt] = 0;
+ }
break;
}
break;
@@ -478,25 +210,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = ea;
break;
- case 451: /* mtdcr */
- dcrn = get_dcrn(inst);
- rs = get_rs(inst);
-
- /* emulate some access in kernel */
- switch (dcrn) {
- case DCRN_CPR0_CONFIG_ADDR:
- vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
- break;
- default:
- run->dcr.dcrn = dcrn;
- run->dcr.data = vcpu->arch.gpr[rs];
- run->dcr.is_write = 1;
- vcpu->arch.dcr_needed = 1;
- emulated = EMULATE_DO_DCR;
- }
-
- break;
-
case 467: /* mtspr */
sprn = get_sprn(inst);
rs = get_rs(inst);
@@ -505,22 +218,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
case SPRN_SRR1:
vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
- case SPRN_MMUCR:
- vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
- case SPRN_PID:
- kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
- case SPRN_CCR0:
- vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
- case SPRN_CCR1:
- vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
- case SPRN_DEAR:
- vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
- case SPRN_ESR:
- vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
- case SPRN_DBCR0:
- vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
- case SPRN_DBCR1:
- vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
/* XXX We need to context-switch the timebase for
* watchdog and FIT. */
@@ -532,14 +229,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvmppc_emulate_dec(vcpu);
break;
- case SPRN_TSR:
- vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
-
- case SPRN_TCR:
- vcpu->arch.tcr = vcpu->arch.gpr[rs];
- kvmppc_emulate_dec(vcpu);
- break;
-
case SPRN_SPRG0:
vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
case SPRN_SPRG1:
@@ -549,56 +238,10 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case SPRN_SPRG3:
vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
- /* Note: SPRG4-7 are user-readable. These values are
- * loaded into the real SPRGs when resuming the
- * guest. */
- case SPRN_SPRG4:
- vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
- case SPRN_SPRG5:
- vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
- case SPRN_SPRG6:
- vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
- case SPRN_SPRG7:
- vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
-
- case SPRN_IVPR:
- vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR0:
- vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR1:
- vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR2:
- vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR3:
- vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR4:
- vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR5:
- vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR6:
- vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR7:
- vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR8:
- vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR9:
- vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR10:
- vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR11:
- vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR12:
- vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR13:
- vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR14:
- vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
- case SPRN_IVOR15:
- vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
-
default:
- printk("mtspr: unknown spr %x\n", sprn);
- emulated = EMULATE_FAIL;
+ emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
+ if (emulated == EMULATE_FAIL)
+ printk("mtspr: unknown spr %x\n", sprn);
break;
}
break;
@@ -629,36 +272,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
4, 0);
break;
- case 978: /* tlbwe */
- emulated = kvmppc_emul_tlbwe(vcpu, inst);
- break;
-
- case 914: { /* tlbsx */
- int index;
- unsigned int as = get_mmucr_sts(vcpu);
- unsigned int pid = get_mmucr_stid(vcpu);
-
- rt = get_rt(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
- rc = get_rc(inst);
-
- ea = vcpu->arch.gpr[rb];
- if (ra)
- ea += vcpu->arch.gpr[ra];
-
- index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
- if (rc) {
- if (index < 0)
- vcpu->arch.cr &= ~0x20000000;
- else
- vcpu->arch.cr |= 0x20000000;
- }
- vcpu->arch.gpr[rt] = index;
-
- }
- break;
-
case 790: /* lhbrx */
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
@@ -674,14 +287,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
2, 0);
break;
- case 966: /* iccci */
- break;
-
default:
- printk("unknown: op %d xop %d\n", get_op(inst),
- get_xop(inst));
+ /* Attempt core-specific emulation below. */
emulated = EMULATE_FAIL;
- break;
}
break;
@@ -764,12 +372,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
default:
- printk("unknown op %d\n", get_op(inst));
emulated = EMULATE_FAIL;
- break;
}
- KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit);
+ if (emulated == EMULATE_FAIL) {
+ emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
+ if (emulated == EMULATE_FAIL) {
+ advance = 0;
+ printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
+ "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
+ }
+ }
+
+ KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
if (advance)
vcpu->arch.pc += 4; /* Advance past emulated instruction. */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8bef0efcdfe1..2822c8ccfaaf 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -28,9 +28,9 @@
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
#include <asm/tlbflush.h>
+#include "timing.h"
#include "../mm/mmu_decl.h"
-
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
{
return gfn;
@@ -99,14 +99,7 @@ void kvm_arch_hardware_unsetup(void)
void kvm_arch_check_processor_compat(void *rtn)
{
- int r;
-
- if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
- r = 0;
- else
- r = -ENOTSUPP;
-
- *(int *)rtn = r;
+ *(int *)rtn = kvmppc_core_check_processor_compat();
}
struct kvm *kvm_arch_create_vm(void)
@@ -144,9 +137,6 @@ int kvm_dev_ioctl_check_extension(long ext)
int r;
switch (ext) {
- case KVM_CAP_USER_MEMORY:
- r = 1;
- break;
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
@@ -179,30 +169,15 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvm_vcpu *vcpu;
- int err;
-
- vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu) {
- err = -ENOMEM;
- goto out;
- }
-
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
-
+ vcpu = kvmppc_core_vcpu_create(kvm, id);
+ kvmppc_create_vcpu_debugfs(vcpu, id);
return vcpu;
-
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu);
-out:
- return ERR_PTR(err);
}
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu);
+ kvmppc_remove_vcpu_debugfs(vcpu);
+ kvmppc_core_vcpu_free(vcpu);
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -212,16 +187,14 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER];
-
- return test_bit(priority, &vcpu->arch.pending_exceptions);
+ return kvmppc_core_pending_dec(vcpu);
}
static void kvmppc_decrementer_func(unsigned long data)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
- kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
+ kvmppc_core_queue_dec(vcpu);
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
@@ -242,96 +215,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
kvmppc_core_destroy_mmu(vcpu);
}
-/* Note: clearing MSR[DE] just means that the debug interrupt will not be
- * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
- * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
- * will be delivered as an "imprecise debug event" (which is indicated by
- * DBSR[IDE].
- */
-static void kvmppc_disable_debug_interrupts(void)
-{
- mtmsr(mfmsr() & ~MSR_DE);
-}
-
-static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu)
-{
- kvmppc_disable_debug_interrupts();
-
- mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
- mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
- mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
- mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
- mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
- mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
- mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
- mtmsr(vcpu->arch.host_msr);
-}
-
-static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu)
-{
- struct kvm_guest_debug *dbg = &vcpu->guest_debug;
- u32 dbcr0 = 0;
-
- vcpu->arch.host_msr = mfmsr();
- kvmppc_disable_debug_interrupts();
-
- /* Save host debug register state. */
- vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
- vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
- vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
- vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
- vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
- vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
- vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
-
- /* set registers up for guest */
-
- if (dbg->bp[0]) {
- mtspr(SPRN_IAC1, dbg->bp[0]);
- dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
- }
- if (dbg->bp[1]) {
- mtspr(SPRN_IAC2, dbg->bp[1]);
- dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
- }
- if (dbg->bp[2]) {
- mtspr(SPRN_IAC3, dbg->bp[2]);
- dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
- }
- if (dbg->bp[3]) {
- mtspr(SPRN_IAC4, dbg->bp[3]);
- dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
- }
-
- mtspr(SPRN_DBCR0, dbcr0);
- mtspr(SPRN_DBCR1, 0);
- mtspr(SPRN_DBCR2, 0);
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- int i;
-
if (vcpu->guest_debug.enabled)
- kvmppc_load_guest_debug_registers(vcpu);
+ kvmppc_core_load_guest_debugstate(vcpu);
- /* Mark every guest entry in the shadow TLB entry modified, so that they
- * will all be reloaded on the next vcpu run (instead of being
- * demand-faulted). */
- for (i = 0; i <= tlb_44x_hwater; i++)
- kvmppc_tlbe_set_modified(vcpu, i);
+ kvmppc_core_vcpu_load(vcpu, cpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
if (vcpu->guest_debug.enabled)
- kvmppc_restore_host_debug_state(vcpu);
+ kvmppc_core_load_host_debugstate(vcpu);
/* Don't leave guest TLB entries resident when being de-scheduled. */
/* XXX It would be nice to differentiate between heavyweight exit and
* sched_out here, since we could avoid the TLB flush for heavyweight
* exits. */
_tlbil_all();
+ kvmppc_core_vcpu_put(vcpu);
}
int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -355,14 +257,14 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
- u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+ ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
*gpr = run->dcr.data;
}
static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
- u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+ ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
if (run->mmio.len > sizeof(*gpr)) {
printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
@@ -460,7 +362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
vcpu->arch.dcr_needed = 0;
}
- kvmppc_check_and_deliver_interrupts(vcpu);
+ kvmppc_core_deliver_interrupts(vcpu);
local_irq_disable();
kvm_guest_enter();
@@ -478,7 +380,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
{
- kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
+ kvmppc_core_queue_external(vcpu, irq);
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
new file mode 100644
index 000000000000..47ee603f558e
--- /dev/null
+++ b/arch/powerpc/kvm/timing.c
@@ -0,0 +1,239 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#include <asm/time.h>
+#include <asm-generic/div64.h>
+
+#include "timing.h"
+
+void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ /* pause guest execution to avoid concurrent updates */
+ local_irq_disable();
+ mutex_lock(&vcpu->mutex);
+
+ vcpu->arch.last_exit_type = 0xDEAD;
+ for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+ vcpu->arch.timing_count_type[i] = 0;
+ vcpu->arch.timing_max_duration[i] = 0;
+ vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF;
+ vcpu->arch.timing_sum_duration[i] = 0;
+ vcpu->arch.timing_sum_quad_duration[i] = 0;
+ }
+ vcpu->arch.timing_last_exit = 0;
+ vcpu->arch.timing_exit.tv64 = 0;
+ vcpu->arch.timing_last_enter.tv64 = 0;
+
+ mutex_unlock(&vcpu->mutex);
+ local_irq_enable();
+}
+
+static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
+{
+ u64 old;
+
+ do_div(duration, tb_ticks_per_usec);
+ if (unlikely(duration > 0xFFFFFFFF)) {
+ printk(KERN_ERR"%s - duration too big -> overflow"
+ " duration %lld type %d exit #%d\n",
+ __func__, duration, type,
+ vcpu->arch.timing_count_type[type]);
+ return;
+ }
+
+ vcpu->arch.timing_count_type[type]++;
+
+ /* sum */
+ old = vcpu->arch.timing_sum_duration[type];
+ vcpu->arch.timing_sum_duration[type] += duration;
+ if (unlikely(old > vcpu->arch.timing_sum_duration[type])) {
+ printk(KERN_ERR"%s - wrap adding sum of durations"
+ " old %lld new %lld type %d exit # of type %d\n",
+ __func__, old, vcpu->arch.timing_sum_duration[type],
+ type, vcpu->arch.timing_count_type[type]);
+ }
+
+ /* square sum */
+ old = vcpu->arch.timing_sum_quad_duration[type];
+ vcpu->arch.timing_sum_quad_duration[type] += (duration*duration);
+ if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) {
+ printk(KERN_ERR"%s - wrap adding sum of squared durations"
+ " old %lld new %lld type %d exit # of type %d\n",
+ __func__, old,
+ vcpu->arch.timing_sum_quad_duration[type],
+ type, vcpu->arch.timing_count_type[type]);
+ }
+
+ /* set min/max */
+ if (unlikely(duration < vcpu->arch.timing_min_duration[type]))
+ vcpu->arch.timing_min_duration[type] = duration;
+ if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
+ vcpu->arch.timing_max_duration[type] = duration;
+}
+
+void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
+{
+ u64 exit = vcpu->arch.timing_last_exit;
+ u64 enter = vcpu->arch.timing_last_enter.tv64;
+
+ /* save exit time, used next exit when the reenter time is known */
+ vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64;
+
+ if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0))
+ return; /* skip incomplete cycle (e.g. after reset) */
+
+ /* update statistics for average and standard deviation */
+ add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type);
+ /* enter -> timing_last_exit is time spent in guest - log this too */
+ add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter),
+ TIMEINGUEST);
+}
+
+static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
+ [MMIO_EXITS] = "MMIO",
+ [DCR_EXITS] = "DCR",
+ [SIGNAL_EXITS] = "SIGNAL",
+ [ITLB_REAL_MISS_EXITS] = "ITLBREAL",
+ [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT",
+ [DTLB_REAL_MISS_EXITS] = "DTLBREAL",
+ [DTLB_VIRT_MISS_EXITS] = "DTLBVIRT",
+ [SYSCALL_EXITS] = "SYSCALL",
+ [ISI_EXITS] = "ISI",
+ [DSI_EXITS] = "DSI",
+ [EMULATED_INST_EXITS] = "EMULINST",
+ [EMULATED_MTMSRWE_EXITS] = "EMUL_WAIT",
+ [EMULATED_WRTEE_EXITS] = "EMUL_WRTEE",
+ [EMULATED_MTSPR_EXITS] = "EMUL_MTSPR",
+ [EMULATED_MFSPR_EXITS] = "EMUL_MFSPR",
+ [EMULATED_MTMSR_EXITS] = "EMUL_MTMSR",
+ [EMULATED_MFMSR_EXITS] = "EMUL_MFMSR",
+ [EMULATED_TLBSX_EXITS] = "EMUL_TLBSX",
+ [EMULATED_TLBWE_EXITS] = "EMUL_TLBWE",
+ [EMULATED_RFI_EXITS] = "EMUL_RFI",
+ [DEC_EXITS] = "DEC",
+ [EXT_INTR_EXITS] = "EXTINT",
+ [HALT_WAKEUP] = "HALT",
+ [USR_PR_INST] = "USR_PR_INST",
+ [FP_UNAVAIL] = "FP_UNAVAIL",
+ [DEBUG_EXITS] = "DEBUG",
+ [TIMEINGUEST] = "TIMEINGUEST"
+};
+
+static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
+{
+ struct kvm_vcpu *vcpu = m->private;
+ int i;
+
+ seq_printf(m, "%s", "type count min max sum sum_squared\n");
+
+ for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+ seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n",
+ kvm_exit_names[i],
+ vcpu->arch.timing_count_type[i],
+ vcpu->arch.timing_min_duration[i],
+ vcpu->arch.timing_max_duration[i],
+ vcpu->arch.timing_sum_duration[i],
+ vcpu->arch.timing_sum_quad_duration[i]);
+ }
+ return 0;
+}
+
+/* Write 'c' to clear the timing statistics. */
+static ssize_t kvmppc_exit_timing_write(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ int err = -EINVAL;
+ char c;
+
+ if (count > 1) {
+ goto done;
+ }
+
+ if (get_user(c, user_buf)) {
+ err = -EFAULT;
+ goto done;
+ }
+
+ if (c == 'c') {
+ struct seq_file *seqf = (struct seq_file *)file->private_data;
+ struct kvm_vcpu *vcpu = seqf->private;
+ /* Write does not affect our buffers previously generated with
+ * show. seq_file is locked here to prevent races of init with
+ * a show call */
+ mutex_lock(&seqf->lock);
+ kvmppc_init_timing_stats(vcpu);
+ mutex_unlock(&seqf->lock);
+ err = count;
+ }
+
+done:
+ return err;
+}
+
+static int kvmppc_exit_timing_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, kvmppc_exit_timing_show, inode->i_private);
+}
+
+static struct file_operations kvmppc_exit_timing_fops = {
+ .owner = THIS_MODULE,
+ .open = kvmppc_exit_timing_open,
+ .read = seq_read,
+ .write = kvmppc_exit_timing_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
+{
+ static char dbg_fname[50];
+ struct dentry *debugfs_file;
+
+ snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
+ current->pid, id);
+ debugfs_file = debugfs_create_file(dbg_fname, 0666,
+ kvm_debugfs_dir, vcpu,
+ &kvmppc_exit_timing_fops);
+
+ if (!debugfs_file) {
+ printk(KERN_ERR"%s: error creating debugfs file %s\n",
+ __func__, dbg_fname);
+ return;
+ }
+
+ vcpu->arch.debugfs_exit_timing = debugfs_file;
+}
+
+void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.debugfs_exit_timing) {
+ debugfs_remove(vcpu->arch.debugfs_exit_timing);
+ vcpu->arch.debugfs_exit_timing = NULL;
+ }
+}
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
new file mode 100644
index 000000000000..bb13b1f3cd5a
--- /dev/null
+++ b/arch/powerpc/kvm/timing.h
@@ -0,0 +1,102 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#ifndef __POWERPC_KVM_EXITTIMING_H__
+#define __POWERPC_KVM_EXITTIMING_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_host.h>
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
+void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
+void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
+void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
+
+static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
+{
+ vcpu->arch.last_exit_type = type;
+}
+
+#else
+/* if exit timing is not configured there is no need to build the c file */
+static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
+ unsigned int id) {}
+static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
+#endif /* CONFIG_KVM_EXIT_TIMING */
+
+/* account the exit in kvm_stats */
+static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
+{
+ /* type has to be known at build time for optimization */
+ BUILD_BUG_ON(__builtin_constant_p(type));
+ switch (type) {
+ case EXT_INTR_EXITS:
+ vcpu->stat.ext_intr_exits++;
+ break;
+ case DEC_EXITS:
+ vcpu->stat.dec_exits++;
+ break;
+ case EMULATED_INST_EXITS:
+ vcpu->stat.emulated_inst_exits++;
+ break;
+ case DCR_EXITS:
+ vcpu->stat.dcr_exits++;
+ break;
+ case DSI_EXITS:
+ vcpu->stat.dsi_exits++;
+ break;
+ case ISI_EXITS:
+ vcpu->stat.isi_exits++;
+ break;
+ case SYSCALL_EXITS:
+ vcpu->stat.syscall_exits++;
+ break;
+ case DTLB_REAL_MISS_EXITS:
+ vcpu->stat.dtlb_real_miss_exits++;
+ break;
+ case DTLB_VIRT_MISS_EXITS:
+ vcpu->stat.dtlb_virt_miss_exits++;
+ break;
+ case MMIO_EXITS:
+ vcpu->stat.mmio_exits++;
+ break;
+ case ITLB_REAL_MISS_EXITS:
+ vcpu->stat.itlb_real_miss_exits++;
+ break;
+ case ITLB_VIRT_MISS_EXITS:
+ vcpu->stat.itlb_virt_miss_exits++;
+ break;
+ case SIGNAL_EXITS:
+ vcpu->stat.signal_exits++;
+ break;
+ }
+}
+
+/* wrapper to set exit time and account for it in kvm_stats */
+static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type)
+{
+ kvmppc_set_exit_type(vcpu, type);
+ kvmppc_account_exit_stat(vcpu, type);
+}
+
+#endif /* __POWERPC_KVM_EXITTIMING_H__ */
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 2949126d28d1..6b793aeda72e 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -297,7 +297,7 @@ static inline unsigned long fast_get_dcookie(struct path *path)
{
unsigned long cookie;
- if (path->dentry->d_cookie)
+ if (path->dentry->d_flags & DCACHE_COOKIE)
return (unsigned long)path->dentry;
get_dcookie(path, &cookie);
return cookie;
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
index 906a0a2a9fe1..1410443731eb 100644
--- a/arch/powerpc/platforms/cell/spu_priv1_mmio.c
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
@@ -80,10 +80,10 @@ static void cpu_affinity_set(struct spu *spu, int cpu)
u64 route;
if (nr_cpus_node(spu->node)) {
- cpumask_t spumask = node_to_cpumask(spu->node);
- cpumask_t cpumask = node_to_cpumask(cpu_to_node(cpu));
+ const struct cpumask *spumask = cpumask_of_node(spu->node),
+ *cpumask = cpumask_of_node(cpu_to_node(cpu));
- if (!cpus_intersects(spumask, cpumask))
+ if (!cpumask_intersects(spumask, cpumask))
return;
}
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 6296bfd9cb0b..e309ef70a531 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -97,7 +97,6 @@ spufs_new_inode(struct super_block *sb, int mode)
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
- inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
out:
return inode;
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 2ad914c47493..6a0ad196aeb3 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -166,9 +166,9 @@ void spu_update_sched_info(struct spu_context *ctx)
static int __node_allowed(struct spu_context *ctx, int node)
{
if (nr_cpus_node(node)) {
- cpumask_t mask = node_to_cpumask(node);
+ const struct cpumask *mask = cpumask_of_node(node);
- if (cpus_intersects(mask, ctx->cpus_allowed))
+ if (cpumask_intersects(mask, &ctx->cpus_allowed))
return 1;
}
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index f7a69021b7bf..84e058f1e1cc 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -332,7 +332,7 @@ static void xics_eoi_lpar(unsigned int virq)
lpar_xirr_info_set((0xff << 24) | irq);
}
-static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
+static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
{
unsigned int irq;
int status;
@@ -870,7 +870,7 @@ void xics_migrate_irqs_away(void)
/* Reset affinity to all cpus */
irq_desc[virq].affinity = CPU_MASK_ALL;
- desc->chip->set_affinity(virq, CPU_MASK_ALL);
+ desc->chip->set_affinity(virq, cpu_all_mask);
unlock:
spin_unlock_irqrestore(&desc->lock, flags);
}
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index c82babb70074..3e0d89dcdba2 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -806,7 +806,7 @@ static void mpic_end_ipi(unsigned int irq)
#endif /* CONFIG_SMP */
-void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
+void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
{
struct mpic *mpic = mpic_from_irq(irq);
unsigned int src = mpic_irq_to_hw(irq);
@@ -818,7 +818,7 @@ void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
} else {
cpumask_t tmp;
- cpus_and(tmp, cpumask, cpu_online_map);
+ cpumask_and(&tmp, cpumask, cpu_online_mask);
mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
mpic_physmask(cpus_addr(tmp)[0]));
diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h
index 6209c62a426d..3cef2af10f42 100644
--- a/arch/powerpc/sysdev/mpic.h
+++ b/arch/powerpc/sysdev/mpic.h
@@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic)
extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type);
extern void mpic_set_vector(unsigned int virq, unsigned int vector);
-extern void mpic_set_affinity(unsigned int irq, cpumask_t cpumask);
+extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
#endif /* _POWERPC_SYSDEV_MPIC_H */
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8152fefc97b9..19577aeffd7b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -83,6 +83,7 @@ config S390
select HAVE_KRETPROBES
select HAVE_KVM if 64BIT
select HAVE_ARCH_TRACEHOOK
+ select INIT_ALL_POSSIBLE
source "init/Kconfig"
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 9d4f8e6c0800..5a805df216bb 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -106,7 +106,6 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode)
ret->i_mode = mode;
ret->i_uid = hypfs_info->uid;
ret->i_gid = hypfs_info->gid;
- ret->i_blocks = 0;
ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
if (mode & S_IFDIR)
ret->i_nlink = 2;
diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
index e5a6a9ba3adf..d60a2eefb17b 100644
--- a/arch/s390/include/asm/cpu.h
+++ b/arch/s390/include/asm/cpu.h
@@ -14,7 +14,6 @@
struct s390_idle_data {
spinlock_t lock;
- unsigned int in_idle;
unsigned long long idle_count;
unsigned long long idle_enter;
unsigned long long idle_time;
@@ -22,12 +21,12 @@ struct s390_idle_data {
DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
-void s390_idle_leave(void);
+void vtime_start_cpu(void);
static inline void s390_idle_check(void)
{
- if ((&__get_cpu_var(s390_idle))->in_idle)
- s390_idle_leave();
+ if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL)
+ vtime_start_cpu();
}
#endif /* _ASM_S390_CPU_H_ */
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 133ce054fc89..521726430afa 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -11,7 +11,7 @@
#include <asm/div64.h>
-/* We want to use micro-second resolution. */
+/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
typedef unsigned long long cputime_t;
typedef unsigned long long cputime64_t;
@@ -53,9 +53,9 @@ __div(unsigned long long n, unsigned int base)
#define cputime_ge(__a, __b) ((__a) >= (__b))
#define cputime_lt(__a, __b) ((__a) < (__b))
#define cputime_le(__a, __b) ((__a) <= (__b))
-#define cputime_to_jiffies(__ct) (__div((__ct), 1000000 / HZ))
+#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ))
#define cputime_to_scaled(__ct) (__ct)
-#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (1000000 / HZ))
+#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ))
#define cputime64_zero (0ULL)
#define cputime64_add(__a, __b) ((__a) + (__b))
@@ -64,7 +64,7 @@ __div(unsigned long long n, unsigned int base)
static inline u64
cputime64_to_jiffies64(cputime64_t cputime)
{
- do_div(cputime, 1000000 / HZ);
+ do_div(cputime, 4096000000ULL / HZ);
return cputime;
}
@@ -74,13 +74,13 @@ cputime64_to_jiffies64(cputime64_t cputime)
static inline unsigned int
cputime_to_msecs(const cputime_t cputime)
{
- return __div(cputime, 1000);
+ return __div(cputime, 4096000);
}
static inline cputime_t
msecs_to_cputime(const unsigned int m)
{
- return (cputime_t) m * 1000;
+ return (cputime_t) m * 4096000;
}
/*
@@ -89,13 +89,13 @@ msecs_to_cputime(const unsigned int m)
static inline unsigned int
cputime_to_secs(const cputime_t cputime)
{
- return __div(cputime, 1000000);
+ return __div(cputime, 2048000000) >> 1;
}
static inline cputime_t
secs_to_cputime(const unsigned int s)
{
- return (cputime_t) s * 1000000;
+ return (cputime_t) s * 4096000000ULL;
}
/*
@@ -104,7 +104,7 @@ secs_to_cputime(const unsigned int s)
static inline cputime_t
timespec_to_cputime(const struct timespec *value)
{
- return value->tv_nsec / 1000 + (u64) value->tv_sec * 1000000;
+ return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL;
}
static inline void
@@ -114,12 +114,12 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
register_pair rp;
rp.pair = cputime >> 1;
- asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1));
- value->tv_nsec = rp.subreg.even * 1000;
+ asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
+ value->tv_nsec = rp.subreg.even * 1000 / 4096;
value->tv_sec = rp.subreg.odd;
#else
- value->tv_nsec = (cputime % 1000000) * 1000;
- value->tv_sec = cputime / 1000000;
+ value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096;
+ value->tv_sec = cputime / 4096000000ULL;
#endif
}
@@ -131,7 +131,7 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
static inline cputime_t
timeval_to_cputime(const struct timeval *value)
{
- return value->tv_usec + (u64) value->tv_sec * 1000000;
+ return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL;
}
static inline void
@@ -141,12 +141,12 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value)
register_pair rp;
rp.pair = cputime >> 1;
- asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1));
- value->tv_usec = rp.subreg.even;
+ asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
+ value->tv_usec = rp.subreg.even / 4096;
value->tv_sec = rp.subreg.odd;
#else
- value->tv_usec = cputime % 1000000;
- value->tv_sec = cputime / 1000000;
+ value->tv_usec = cputime % 4096000000ULL;
+ value->tv_sec = cputime / 4096000000ULL;
#endif
}
@@ -156,13 +156,13 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value)
static inline clock_t
cputime_to_clock_t(cputime_t cputime)
{
- return __div(cputime, 1000000 / USER_HZ);
+ return __div(cputime, 4096000000ULL / USER_HZ);
}
static inline cputime_t
clock_t_to_cputime(unsigned long x)
{
- return (cputime_t) x * (1000000 / USER_HZ);
+ return (cputime_t) x * (4096000000ULL / USER_HZ);
}
/*
@@ -171,7 +171,7 @@ clock_t_to_cputime(unsigned long x)
static inline clock_t
cputime64_to_clock_t(cputime64_t cputime)
{
- return __div(cputime, 1000000 / USER_HZ);
+ return __div(cputime, 4096000000ULL / USER_HZ);
}
#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 0bc51d52a899..ffdef5fe8587 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -67,11 +67,11 @@
#define __LC_SYNC_ENTER_TIMER 0x248
#define __LC_ASYNC_ENTER_TIMER 0x250
#define __LC_EXIT_TIMER 0x258
-#define __LC_LAST_UPDATE_TIMER 0x260
-#define __LC_USER_TIMER 0x268
-#define __LC_SYSTEM_TIMER 0x270
-#define __LC_LAST_UPDATE_CLOCK 0x278
-#define __LC_STEAL_CLOCK 0x280
+#define __LC_USER_TIMER 0x260
+#define __LC_SYSTEM_TIMER 0x268
+#define __LC_STEAL_TIMER 0x270
+#define __LC_LAST_UPDATE_TIMER 0x278
+#define __LC_LAST_UPDATE_CLOCK 0x280
#define __LC_RETURN_MCCK_PSW 0x288
#define __LC_KERNEL_STACK 0xC40
#define __LC_THREAD_INFO 0xC44
@@ -89,11 +89,11 @@
#define __LC_SYNC_ENTER_TIMER 0x250
#define __LC_ASYNC_ENTER_TIMER 0x258
#define __LC_EXIT_TIMER 0x260
-#define __LC_LAST_UPDATE_TIMER 0x268
-#define __LC_USER_TIMER 0x270
-#define __LC_SYSTEM_TIMER 0x278
-#define __LC_LAST_UPDATE_CLOCK 0x280
-#define __LC_STEAL_CLOCK 0x288
+#define __LC_USER_TIMER 0x268
+#define __LC_SYSTEM_TIMER 0x270
+#define __LC_STEAL_TIMER 0x278
+#define __LC_LAST_UPDATE_TIMER 0x280
+#define __LC_LAST_UPDATE_CLOCK 0x288
#define __LC_RETURN_MCCK_PSW 0x290
#define __LC_KERNEL_STACK 0xD40
#define __LC_THREAD_INFO 0xD48
@@ -106,8 +106,10 @@
#define __LC_IPLDEV 0xDB8
#define __LC_CURRENT 0xDD8
#define __LC_INT_CLOCK 0xDE8
+#define __LC_VDSO_PER_CPU 0xE38
#endif /* __s390x__ */
+#define __LC_PASTE 0xE40
#define __LC_PANIC_MAGIC 0xE00
#ifndef __s390x__
@@ -252,11 +254,11 @@ struct _lowcore
__u64 sync_enter_timer; /* 0x248 */
__u64 async_enter_timer; /* 0x250 */
__u64 exit_timer; /* 0x258 */
- __u64 last_update_timer; /* 0x260 */
- __u64 user_timer; /* 0x268 */
- __u64 system_timer; /* 0x270 */
- __u64 last_update_clock; /* 0x278 */
- __u64 steal_clock; /* 0x280 */
+ __u64 user_timer; /* 0x260 */
+ __u64 system_timer; /* 0x268 */
+ __u64 steal_timer; /* 0x270 */
+ __u64 last_update_timer; /* 0x278 */
+ __u64 last_update_clock; /* 0x280 */
psw_t return_mcck_psw; /* 0x288 */
__u8 pad8[0xc00-0x290]; /* 0x290 */
@@ -343,11 +345,11 @@ struct _lowcore
__u64 sync_enter_timer; /* 0x250 */
__u64 async_enter_timer; /* 0x258 */
__u64 exit_timer; /* 0x260 */
- __u64 last_update_timer; /* 0x268 */
- __u64 user_timer; /* 0x270 */
- __u64 system_timer; /* 0x278 */
- __u64 last_update_clock; /* 0x280 */
- __u64 steal_clock; /* 0x288 */
+ __u64 user_timer; /* 0x268 */
+ __u64 system_timer; /* 0x270 */
+ __u64 steal_timer; /* 0x278 */
+ __u64 last_update_timer; /* 0x280 */
+ __u64 last_update_clock; /* 0x288 */
psw_t return_mcck_psw; /* 0x290 */
__u8 pad8[0xc00-0x2a0]; /* 0x2a0 */
/* System info area */
@@ -381,7 +383,12 @@ struct _lowcore
/* whether the kernel died with panic() or not */
__u32 panic_magic; /* 0xe00 */
- __u8 pad13[0x11b8-0xe04]; /* 0xe04 */
+ /* Per cpu primary space access list */
+ __u8 pad_0xe04[0xe3c-0xe04]; /* 0xe04 */
+ __u32 vdso_per_cpu_data; /* 0xe3c */
+ __u32 paste[16]; /* 0xe40 */
+
+ __u8 pad13[0x11b8-0xe80]; /* 0xe80 */
/* 64 bit extparam used for pfault, diag 250 etc */
__u64 ext_params2; /* 0x11B8 */
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index 024ef42ed6d7..3a8b26eb1f2e 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@@ -99,7 +99,7 @@ static inline void restore_access_regs(unsigned int *acrs)
prev = __switch_to(prev,next); \
} while (0)
-extern void account_vtime(struct task_struct *);
+extern void account_vtime(struct task_struct *, struct task_struct *);
extern void account_tick_vtime(struct task_struct *);
extern void account_system_vtime(struct task_struct *);
@@ -121,7 +121,7 @@ static inline void cmma_init(void) { }
#define finish_arch_switch(prev) do { \
set_fs(current->thread.mm_segment); \
- account_vtime(prev); \
+ account_vtime(prev, current); \
} while (0)
#define nop() asm volatile("nop")
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index c1eaf9604da7..c544aa524535 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -47,6 +47,8 @@ struct thread_info {
unsigned int cpu; /* current CPU */
int preempt_count; /* 0 => preemptable, <0 => BUG */
struct restart_block restart_block;
+ __u64 user_timer;
+ __u64 system_timer;
};
/*
diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h
index 61705d60f995..e4bcab739c19 100644
--- a/arch/s390/include/asm/timer.h
+++ b/arch/s390/include/asm/timer.h
@@ -23,20 +23,18 @@ struct vtimer_list {
__u64 expires;
__u64 interval;
- spinlock_t lock;
- unsigned long magic;
-
void (*function)(unsigned long);
unsigned long data;
};
-/* the offset value will wrap after ca. 71 years */
+/* the vtimer value will wrap after ca. 71 years */
struct vtimer_queue {
struct list_head list;
spinlock_t lock;
- __u64 to_expire; /* current event expire time */
- __u64 offset; /* list offset to zero */
- __u64 idle; /* temp var for idle */
+ __u64 timer; /* last programmed timer */
+ __u64 elapsed; /* elapsed time of timer expire values */
+ __u64 idle; /* temp var for idle */
+ int do_spt; /* =1: reprogram cpu timer in idle */
};
extern void init_virt_timer(struct vtimer_list *timer);
@@ -48,8 +46,8 @@ extern int del_virt_timer(struct vtimer_list *timer);
extern void init_cpu_vtimer(void);
extern void vtime_init(void);
-extern void vtime_start_cpu_timer(void);
-extern void vtime_stop_cpu_timer(void);
+extern void vtime_stop_cpu(void);
+extern void vtime_start_leave(void);
#endif /* __KERNEL__ */
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index d96c91643458..c93eb50e1d09 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -6,10 +6,12 @@
#define mc_capable() (1)
cpumask_t cpu_coregroup_map(unsigned int cpu);
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
extern cpumask_t cpu_core_map[NR_CPUS];
#define topology_core_siblings(cpu) (cpu_core_map[cpu])
+#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
int topology_set_cpu_management(int fc);
void topology_schedule_update(void);
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index a44f4fe16a35..7bdd7c8ebc91 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -12,9 +12,9 @@
#ifndef __ASSEMBLY__
/*
- * Note about this structure:
+ * Note about the vdso_data and vdso_per_cpu_data structures:
*
- * NEVER USE THIS IN USERSPACE CODE DIRECTLY. The layout of this
+ * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the
* structure is supposed to be known only to the function in the vdso
* itself and may change without notice.
*/
@@ -28,10 +28,21 @@ struct vdso_data {
__u64 wtom_clock_nsec; /* 0x28 */
__u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */
__u32 tz_dsttime; /* Type of dst correction 0x34 */
+ __u32 ectg_available;
+};
+
+struct vdso_per_cpu_data {
+ __u64 ectg_timer_base;
+ __u64 ectg_user_time;
};
extern struct vdso_data *vdso_data;
+#ifdef CONFIG_64BIT
+int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore);
+void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore);
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index e641f60bac99..67a60016babb 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -48,6 +48,11 @@ int main(void)
DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
+ DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
+ DEFINE(__VDSO_ECTG_BASE,
+ offsetof(struct vdso_per_cpu_data, ectg_timer_base));
+ DEFINE(__VDSO_ECTG_USER,
+ offsetof(struct vdso_per_cpu_data, ectg_user_time));
/* constants used by the vdso */
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 55de521aef77..1268aa2991bf 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -583,8 +583,8 @@ kernel_per:
.globl io_int_handler
io_int_handler:
- stpt __LC_ASYNC_ENTER_TIMER
stck __LC_INT_CLOCK
+ stpt __LC_ASYNC_ENTER_TIMER
SAVE_ALL_BASE __LC_SAVE_AREA+16
SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
@@ -723,8 +723,8 @@ io_notify_resume:
.globl ext_int_handler
ext_int_handler:
- stpt __LC_ASYNC_ENTER_TIMER
stck __LC_INT_CLOCK
+ stpt __LC_ASYNC_ENTER_TIMER
SAVE_ALL_BASE __LC_SAVE_AREA+16
SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
@@ -750,6 +750,7 @@ __critical_end:
.globl mcck_int_handler
mcck_int_handler:
+ stck __LC_INT_CLOCK
spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer
lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs
SAVE_ALL_BASE __LC_SAVE_AREA+32
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 16bb4fd1a403..c6fbde13971a 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -177,8 +177,11 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
.if !\sync
ni \psworg+1,0xfd # clear wait state bit
.endif
- lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user
+ lg %r14,__LC_VDSO_PER_CPU
+ lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user
stpt __LC_EXIT_TIMER
+ mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+ lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user
lpswe \psworg # back to caller
.endm
@@ -559,8 +562,8 @@ kernel_per:
*/
.globl io_int_handler
io_int_handler:
- stpt __LC_ASYNC_ENTER_TIMER
stck __LC_INT_CLOCK
+ stpt __LC_ASYNC_ENTER_TIMER
SAVE_ALL_BASE __LC_SAVE_AREA+32
SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
@@ -721,8 +724,8 @@ io_notify_resume:
*/
.globl ext_int_handler
ext_int_handler:
- stpt __LC_ASYNC_ENTER_TIMER
stck __LC_INT_CLOCK
+ stpt __LC_ASYNC_ENTER_TIMER
SAVE_ALL_BASE __LC_SAVE_AREA+32
SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
@@ -746,6 +749,7 @@ __critical_end:
*/
.globl mcck_int_handler
mcck_int_handler:
+ stck __LC_INT_CLOCK
la %r1,4095 # revalidate r1
spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
@@ -979,23 +983,23 @@ cleanup_sysc_return:
cleanup_sysc_leave:
clc 8(8,%r12),BASED(cleanup_sysc_leave_insn)
- je 2f
- mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+ je 3f
clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8)
- je 2f
- mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
+ jhe 0f
+ mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
cghi %r12,__LC_MCK_OLD_PSW
- jne 0f
+ jne 1f
mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
- j 1f
-0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
-1: lmg %r0,%r11,SP_R0(%r15)
+ j 2f
+1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
+2: lmg %r0,%r11,SP_R0(%r15)
lg %r15,SP_R15(%r15)
-2: la %r12,__LC_RETURN_PSW
+3: la %r12,__LC_RETURN_PSW
br %r14
cleanup_sysc_leave_insn:
.quad sysc_done - 4
- .quad sysc_done - 8
+ .quad sysc_done - 16
cleanup_io_return:
mvc __LC_RETURN_PSW(8),0(%r12)
@@ -1005,23 +1009,23 @@ cleanup_io_return:
cleanup_io_leave:
clc 8(8,%r12),BASED(cleanup_io_leave_insn)
- je 2f
- mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+ je 3f
clc 8(8,%r12),BASED(cleanup_io_leave_insn+8)
- je 2f
- mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
+ jhe 0f
+ mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
cghi %r12,__LC_MCK_OLD_PSW
- jne 0f
+ jne 1f
mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
- j 1f
-0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
-1: lmg %r0,%r11,SP_R0(%r15)
+ j 2f
+1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
+2: lmg %r0,%r11,SP_R0(%r15)
lg %r15,SP_R15(%r15)
-2: la %r12,__LC_RETURN_PSW
+3: la %r12,__LC_RETURN_PSW
br %r14
cleanup_io_leave_insn:
.quad io_done - 4
- .quad io_done - 8
+ .quad io_done - 16
/*
* Integer constants
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 3ccd36b24b8f..f9f70aa15244 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -87,6 +87,8 @@ startup_continue:
lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
# move IPL device to lowcore
mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12)
+ lghi %r0,__LC_PASTE
+ stg %r0,__LC_VDSO_PER_CPU
#
# Setup stack
#
diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c
index e80716843619..7db95c0b8693 100644
--- a/arch/s390/kernel/init_task.c
+++ b/arch/s390/kernel/init_task.c
@@ -16,7 +16,6 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 04f8c67a6101..b6110bdf8dc2 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -38,6 +38,7 @@
#include <linux/utsname.h>
#include <linux/tick.h>
#include <linux/elfcore.h>
+#include <linux/kernel_stat.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -45,7 +46,6 @@
#include <asm/processor.h>
#include <asm/irq.h>
#include <asm/timer.h>
-#include <asm/cpu.h>
#include "entry.h"
asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -75,36 +75,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return sf->gprs[8];
}
-DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
- .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
-};
-
-static int s390_idle_enter(void)
-{
- struct s390_idle_data *idle;
-
- idle = &__get_cpu_var(s390_idle);
- spin_lock(&idle->lock);
- idle->idle_count++;
- idle->in_idle = 1;
- idle->idle_enter = get_clock();
- spin_unlock(&idle->lock);
- vtime_stop_cpu_timer();
- return NOTIFY_OK;
-}
-
-void s390_idle_leave(void)
-{
- struct s390_idle_data *idle;
-
- vtime_start_cpu_timer();
- idle = &__get_cpu_var(s390_idle);
- spin_lock(&idle->lock);
- idle->idle_time += get_clock() - idle->idle_enter;
- idle->in_idle = 0;
- spin_unlock(&idle->lock);
-}
-
extern void s390_handle_mcck(void);
/*
* The idle loop on a S390...
@@ -117,10 +87,6 @@ static void default_idle(void)
local_irq_enable();
return;
}
- if (s390_idle_enter() == NOTIFY_BAD) {
- local_irq_enable();
- return;
- }
#ifdef CONFIG_HOTPLUG_CPU
if (cpu_is_offline(smp_processor_id())) {
preempt_enable_no_resched();
@@ -130,7 +96,6 @@ static void default_idle(void)
local_mcck_disable();
if (test_thread_flag(TIF_MCCK_PENDING)) {
local_mcck_enable();
- s390_idle_leave();
local_irq_enable();
s390_handle_mcck();
return;
@@ -138,9 +103,9 @@ static void default_idle(void)
trace_hardirqs_on();
/* Don't trace preempt off for idle. */
stop_critical_timings();
- /* Wait for external, I/O or machine check interrupt. */
- __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
- PSW_MASK_IO | PSW_MASK_EXT);
+ /* Stop virtual timer and halt the cpu. */
+ vtime_stop_cpu();
+ /* Reenable preemption tracer. */
start_critical_timings();
}
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index e019b419efc6..a0d2d55d7fb3 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -119,8 +119,8 @@ void do_extint(struct pt_regs *regs, unsigned short code)
struct pt_regs *old_regs;
old_regs = set_irq_regs(regs);
- irq_enter();
s390_idle_check();
+ irq_enter();
if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
/* Serve timer interrupts first. */
clock_comparator_work();
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b7a1efd5522c..d825f4950e4e 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -427,6 +427,8 @@ setup_lowcore(void)
/* enable extended save area */
__ctl_set_bit(14, 29);
}
+#else
+ lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
#endif
set_prefix((u32)(unsigned long) lc);
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 6fc78541dc57..9c0ccb532a45 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -47,6 +47,7 @@
#include <asm/lowcore.h>
#include <asm/sclp.h>
#include <asm/cpu.h>
+#include <asm/vdso.h>
#include "entry.h"
/*
@@ -55,12 +56,6 @@
struct _lowcore *lowcore_ptr[NR_CPUS];
EXPORT_SYMBOL(lowcore_ptr);
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
-
-cpumask_t cpu_possible_map = CPU_MASK_ALL;
-EXPORT_SYMBOL(cpu_possible_map);
-
static struct task_struct *current_set[NR_CPUS];
static u8 smp_cpu_type;
@@ -506,6 +501,9 @@ static int __cpuinit smp_alloc_lowcore(int cpu)
goto out;
lowcore->extended_save_area_addr = (u32) save_area;
}
+#else
+ if (vdso_alloc_per_cpu(cpu, lowcore))
+ goto out;
#endif
lowcore_ptr[cpu] = lowcore;
return 0;
@@ -528,6 +526,8 @@ static void smp_free_lowcore(int cpu)
#ifndef CONFIG_64BIT
if (MACHINE_HAS_IEEE)
free_page((unsigned long) lowcore->extended_save_area_addr);
+#else
+ vdso_free_per_cpu(cpu, lowcore);
#endif
free_page(lowcore->panic_stack - PAGE_SIZE);
free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER);
@@ -670,6 +670,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
panic_stack = __get_free_page(GFP_KERNEL);
async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
+ BUG_ON(!lowcore || !panic_stack || !async_stack);
#ifndef CONFIG_64BIT
if (MACHINE_HAS_IEEE)
save_area = get_zeroed_page(GFP_KERNEL);
@@ -683,6 +684,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
#ifndef CONFIG_64BIT
if (MACHINE_HAS_IEEE)
lowcore->extended_save_area_addr = (u32) save_area;
+#else
+ BUG_ON(vdso_alloc_per_cpu(smp_processor_id(), lowcore));
#endif
set_prefix((u32)(unsigned long) lowcore);
local_mcck_enable();
@@ -851,9 +854,11 @@ static ssize_t show_idle_count(struct sys_device *dev,
unsigned long long idle_count;
idle = &per_cpu(s390_idle, dev->id);
- spin_lock_irq(&idle->lock);
+ spin_lock(&idle->lock);
idle_count = idle->idle_count;
- spin_unlock_irq(&idle->lock);
+ if (idle->idle_enter)
+ idle_count++;
+ spin_unlock(&idle->lock);
return sprintf(buf, "%llu\n", idle_count);
}
static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL);
@@ -862,18 +867,17 @@ static ssize_t show_idle_time(struct sys_device *dev,
struct sysdev_attribute *attr, char *buf)
{
struct s390_idle_data *idle;
- unsigned long long new_time;
+ unsigned long long now, idle_time, idle_enter;
idle = &per_cpu(s390_idle, dev->id);
- spin_lock_irq(&idle->lock);
- if (idle->in_idle) {
- new_time = get_clock();
- idle->idle_time += new_time - idle->idle_enter;
- idle->idle_enter = new_time;
- }
- new_time = idle->idle_time;
- spin_unlock_irq(&idle->lock);
- return sprintf(buf, "%llu\n", new_time >> 12);
+ spin_lock(&idle->lock);
+ now = get_clock();
+ idle_time = idle->idle_time;
+ idle_enter = idle->idle_enter;
+ if (idle_enter != 0ULL && idle_enter < now)
+ idle_time += now - idle_enter;
+ spin_unlock(&idle->lock);
+ return sprintf(buf, "%llu\n", idle_time >> 12);
}
static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 5be981a36c3e..d649600df5b9 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -160,7 +160,7 @@ void init_cpu_timer(void)
cd->min_delta_ns = 1;
cd->max_delta_ns = LONG_MAX;
cd->rating = 400;
- cd->cpumask = cpumask_of_cpu(cpu);
+ cd->cpumask = cpumask_of(cpu);
cd->set_next_event = s390_next_event;
cd->set_mode = s390_set_mode;
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 90e9ba11eba1..cc362c9ea8f1 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -97,6 +97,11 @@ cpumask_t cpu_coregroup_map(unsigned int cpu)
return mask;
}
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
+{
+ return &cpu_core_map[cpu];
+}
+
static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)
{
unsigned int cpu;
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 10a6ccef4412..25a6a82f1c02 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -31,9 +31,6 @@
#include <asm/sections.h>
#include <asm/vdso.h>
-/* Max supported size for symbol names */
-#define MAX_SYMNAME 64
-
#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
extern char vdso32_start, vdso32_end;
static void *vdso32_kbase = &vdso32_start;
@@ -71,6 +68,119 @@ static union {
struct vdso_data *vdso_data = &vdso_data_store.data;
/*
+ * Setup vdso data page.
+ */
+static void vdso_init_data(struct vdso_data *vd)
+{
+ unsigned int facility_list;
+
+ facility_list = stfl();
+ vd->ectg_available = switch_amode && (facility_list & 1);
+}
+
+#ifdef CONFIG_64BIT
+/*
+ * Setup per cpu vdso data page.
+ */
+static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd)
+{
+}
+
+/*
+ * Allocate/free per cpu vdso data.
+ */
+#ifdef CONFIG_64BIT
+#define SEGMENT_ORDER 2
+#else
+#define SEGMENT_ORDER 1
+#endif
+
+int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore)
+{
+ unsigned long segment_table, page_table, page_frame;
+ u32 *psal, *aste;
+ int i;
+
+ lowcore->vdso_per_cpu_data = __LC_PASTE;
+
+ if (!switch_amode || !vdso_enabled)
+ return 0;
+
+ segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
+ page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ page_frame = get_zeroed_page(GFP_KERNEL);
+ if (!segment_table || !page_table || !page_frame)
+ goto out;
+
+ clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
+ PAGE_SIZE << SEGMENT_ORDER);
+ clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY,
+ 256*sizeof(unsigned long));
+
+ *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
+ *(unsigned long *) page_table = _PAGE_RO + page_frame;
+
+ psal = (u32 *) (page_table + 256*sizeof(unsigned long));
+ aste = psal + 32;
+
+ for (i = 4; i < 32; i += 4)
+ psal[i] = 0x80000000;
+
+ lowcore->paste[4] = (u32)(addr_t) psal;
+ psal[0] = 0x20000000;
+ psal[2] = (u32)(addr_t) aste;
+ *(unsigned long *) (aste + 2) = segment_table +
+ _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
+ aste[4] = (u32)(addr_t) psal;
+ lowcore->vdso_per_cpu_data = page_frame;
+
+ vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame);
+ return 0;
+
+out:
+ free_page(page_frame);
+ free_page(page_table);
+ free_pages(segment_table, SEGMENT_ORDER);
+ return -ENOMEM;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore)
+{
+ unsigned long segment_table, page_table, page_frame;
+ u32 *psal, *aste;
+
+ if (!switch_amode || !vdso_enabled)
+ return;
+
+ psal = (u32 *)(addr_t) lowcore->paste[4];
+ aste = (u32 *)(addr_t) psal[2];
+ segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK;
+ page_table = *(unsigned long *) segment_table;
+ page_frame = *(unsigned long *) page_table;
+
+ free_page(page_frame);
+ free_page(page_table);
+ free_pages(segment_table, SEGMENT_ORDER);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static void __vdso_init_cr5(void *dummy)
+{
+ unsigned long cr5;
+
+ cr5 = offsetof(struct _lowcore, paste);
+ __ctl_load(cr5, 5, 5);
+}
+
+static void vdso_init_cr5(void)
+{
+ if (switch_amode && vdso_enabled)
+ on_each_cpu(__vdso_init_cr5, NULL, 1);
+}
+#endif /* CONFIG_64BIT */
+
+/*
* This is called from binfmt_elf, we create the special vma for the
* vDSO and insert it into the mm struct tree
*/
@@ -172,6 +282,9 @@ static int __init vdso_init(void)
{
int i;
+ if (!vdso_enabled)
+ return 0;
+ vdso_init_data(vdso_data);
#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
/* Calculate the size of the 32 bit vDSO */
vdso32_pages = ((&vdso32_end - &vdso32_start
@@ -208,6 +321,10 @@ static int __init vdso_init(void)
}
vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
vdso64_pagelist[vdso64_pages] = NULL;
+#ifndef CONFIG_SMP
+ BUG_ON(vdso_alloc_per_cpu(0, S390_lowcore));
+#endif
+ vdso_init_cr5();
#endif /* CONFIG_64BIT */
get_page(virt_to_page(vdso_data));
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
index 488e31a3c0e7..9ce8caafdb4e 100644
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -22,7 +22,12 @@ __kernel_clock_getres:
cghi %r2,CLOCK_REALTIME
je 0f
cghi %r2,CLOCK_MONOTONIC
+ je 0f
+ cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
jne 2f
+ larl %r5,_vdso_data
+ icm %r0,15,__LC_ECTG_OK(%r5)
+ jz 2f
0: ltgr %r3,%r3
jz 1f /* res == NULL */
larl %r1,3f
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index 738a410b7eb2..79dbfee831ec 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -22,8 +22,10 @@ __kernel_clock_gettime:
larl %r5,_vdso_data
cghi %r2,CLOCK_REALTIME
je 4f
+ cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
+ je 9f
cghi %r2,CLOCK_MONOTONIC
- jne 9f
+ jne 12f
/* CLOCK_MONOTONIC */
ltgr %r3,%r3
@@ -42,7 +44,7 @@ __kernel_clock_gettime:
alg %r0,__VDSO_WTOM_SEC(%r5)
clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
jne 0b
- larl %r5,10f
+ larl %r5,13f
1: clg %r1,0(%r5)
jl 2f
slg %r1,0(%r5)
@@ -68,7 +70,7 @@ __kernel_clock_gettime:
lg %r0,__VDSO_XTIME_SEC(%r5)
clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
jne 5b
- larl %r5,10f
+ larl %r5,13f
6: clg %r1,0(%r5)
jl 7f
slg %r1,0(%r5)
@@ -79,11 +81,38 @@ __kernel_clock_gettime:
8: lghi %r2,0
br %r14
+ /* CLOCK_THREAD_CPUTIME_ID for this thread */
+9: icm %r0,15,__VDSO_ECTG_OK(%r5)
+ jz 12f
+ ear %r2,%a4
+ llilh %r4,0x0100
+ sar %a4,%r4
+ lghi %r4,0
+ sacf 512 /* Magic ectg instruction */
+ .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
+ sacf 0
+ sar %a4,%r2
+ algr %r1,%r0 /* r1 = cputime as TOD value */
+ mghi %r1,1000 /* convert to nanoseconds */
+ srlg %r1,%r1,12 /* r1 = cputime in nanosec */
+ lgr %r4,%r1
+ larl %r5,13f
+ srlg %r1,%r1,9 /* divide by 1000000000 */
+ mlg %r0,8(%r5)
+ srlg %r0,%r0,11 /* r0 = tv_sec */
+ stg %r0,0(%r3)
+ msg %r0,0(%r5) /* calculate tv_nsec */
+ slgr %r4,%r0 /* r4 = tv_nsec */
+ stg %r4,8(%r3)
+ lghi %r2,0
+ br %r14
+
/* Fallback to system call */
-9: lghi %r1,__NR_clock_gettime
+12: lghi %r1,__NR_clock_gettime
svc 0
br %r14
-10: .quad 1000000000
+13: .quad 1000000000
+14: .quad 19342813113834067
.cfi_endproc
.size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 75a6e62ea973..2fb36e462194 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -23,19 +23,43 @@
#include <asm/s390_ext.h>
#include <asm/timer.h>
#include <asm/irq_regs.h>
+#include <asm/cpu.h>
static ext_int_info_t ext_int_info_timer;
+
static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer);
+DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
+ .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
+};
+
+static inline __u64 get_vtimer(void)
+{
+ __u64 timer;
+
+ asm volatile("STPT %0" : "=m" (timer));
+ return timer;
+}
+
+static inline void set_vtimer(__u64 expires)
+{
+ __u64 timer;
+
+ asm volatile (" STPT %0\n" /* Store current cpu timer value */
+ " SPT %1" /* Set new value immediatly afterwards */
+ : "=m" (timer) : "m" (expires) );
+ S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
+ S390_lowcore.last_update_timer = expires;
+}
+
/*
* Update process times based on virtual cpu times stored by entry.S
* to the lowcore fields user_timer, system_timer & steal_clock.
*/
-void account_process_tick(struct task_struct *tsk, int user_tick)
+static void do_account_vtime(struct task_struct *tsk, int hardirq_offset)
{
- cputime_t cputime;
- __u64 timer, clock;
- int rcu_user_flag;
+ struct thread_info *ti = task_thread_info(tsk);
+ __u64 timer, clock, user, system, steal;
timer = S390_lowcore.last_update_timer;
clock = S390_lowcore.last_update_clock;
@@ -44,50 +68,41 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
: "=m" (S390_lowcore.last_update_timer),
"=m" (S390_lowcore.last_update_clock) );
S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
- S390_lowcore.steal_clock += S390_lowcore.last_update_clock - clock;
-
- cputime = S390_lowcore.user_timer >> 12;
- rcu_user_flag = cputime != 0;
- S390_lowcore.user_timer -= cputime << 12;
- S390_lowcore.steal_clock -= cputime << 12;
- account_user_time(tsk, cputime);
-
- cputime = S390_lowcore.system_timer >> 12;
- S390_lowcore.system_timer -= cputime << 12;
- S390_lowcore.steal_clock -= cputime << 12;
- account_system_time(tsk, HARDIRQ_OFFSET, cputime);
-
- cputime = S390_lowcore.steal_clock;
- if ((__s64) cputime > 0) {
- cputime >>= 12;
- S390_lowcore.steal_clock -= cputime << 12;
- account_steal_time(tsk, cputime);
+ S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
+
+ user = S390_lowcore.user_timer - ti->user_timer;
+ S390_lowcore.steal_timer -= user;
+ ti->user_timer = S390_lowcore.user_timer;
+ account_user_time(tsk, user, user);
+
+ system = S390_lowcore.system_timer - ti->system_timer;
+ S390_lowcore.steal_timer -= system;
+ ti->system_timer = S390_lowcore.system_timer;
+ account_system_time(tsk, hardirq_offset, system, system);
+
+ steal = S390_lowcore.steal_timer;
+ if ((s64) steal > 0) {
+ S390_lowcore.steal_timer = 0;
+ account_steal_time(steal);
}
}
-/*
- * Update process times based on virtual cpu times stored by entry.S
- * to the lowcore fields user_timer, system_timer & steal_clock.
- */
-void account_vtime(struct task_struct *tsk)
+void account_vtime(struct task_struct *prev, struct task_struct *next)
{
- cputime_t cputime;
- __u64 timer;
-
- timer = S390_lowcore.last_update_timer;
- asm volatile (" STPT %0" /* Store current cpu timer value */
- : "=m" (S390_lowcore.last_update_timer) );
- S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
-
- cputime = S390_lowcore.user_timer >> 12;
- S390_lowcore.user_timer -= cputime << 12;
- S390_lowcore.steal_clock -= cputime << 12;
- account_user_time(tsk, cputime);
+ struct thread_info *ti;
+
+ do_account_vtime(prev, 0);
+ ti = task_thread_info(prev);
+ ti->user_timer = S390_lowcore.user_timer;
+ ti->system_timer = S390_lowcore.system_timer;
+ ti = task_thread_info(next);
+ S390_lowcore.user_timer = ti->user_timer;
+ S390_lowcore.system_timer = ti->system_timer;
+}
- cputime = S390_lowcore.system_timer >> 12;
- S390_lowcore.system_timer -= cputime << 12;
- S390_lowcore.steal_clock -= cputime << 12;
- account_system_time(tsk, 0, cputime);
+void account_process_tick(struct task_struct *tsk, int user_tick)
+{
+ do_account_vtime(tsk, HARDIRQ_OFFSET);
}
/*
@@ -96,80 +111,131 @@ void account_vtime(struct task_struct *tsk)
*/
void account_system_vtime(struct task_struct *tsk)
{
- cputime_t cputime;
- __u64 timer;
+ struct thread_info *ti = task_thread_info(tsk);
+ __u64 timer, system;
timer = S390_lowcore.last_update_timer;
- asm volatile (" STPT %0" /* Store current cpu timer value */
- : "=m" (S390_lowcore.last_update_timer) );
+ S390_lowcore.last_update_timer = get_vtimer();
S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
- cputime = S390_lowcore.system_timer >> 12;
- S390_lowcore.system_timer -= cputime << 12;
- S390_lowcore.steal_clock -= cputime << 12;
- account_system_time(tsk, 0, cputime);
+ system = S390_lowcore.system_timer - ti->system_timer;
+ S390_lowcore.steal_timer -= system;
+ ti->system_timer = S390_lowcore.system_timer;
+ account_system_time(tsk, 0, system, system);
}
EXPORT_SYMBOL_GPL(account_system_vtime);
-static inline void set_vtimer(__u64 expires)
-{
- __u64 timer;
-
- asm volatile (" STPT %0\n" /* Store current cpu timer value */
- " SPT %1" /* Set new value immediatly afterwards */
- : "=m" (timer) : "m" (expires) );
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
- S390_lowcore.last_update_timer = expires;
-
- /* store expire time for this CPU timer */
- __get_cpu_var(virt_cpu_timer).to_expire = expires;
-}
-
-void vtime_start_cpu_timer(void)
+void vtime_start_cpu(void)
{
- struct vtimer_queue *vt_list;
-
- vt_list = &__get_cpu_var(virt_cpu_timer);
-
- /* CPU timer interrupt is pending, don't reprogramm it */
- if (vt_list->idle & 1LL<<63)
- return;
+ struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
+ struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
+ __u64 idle_time, expires;
+
+ /* Account time spent with enabled wait psw loaded as idle time. */
+ idle_time = S390_lowcore.int_clock - idle->idle_enter;
+ account_idle_time(idle_time);
+ S390_lowcore.last_update_clock = S390_lowcore.int_clock;
+
+ /* Account system time spent going idle. */
+ S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle;
+ S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer;
+
+ /* Restart vtime CPU timer */
+ if (vq->do_spt) {
+ /* Program old expire value but first save progress. */
+ expires = vq->idle - S390_lowcore.async_enter_timer;
+ expires += get_vtimer();
+ set_vtimer(expires);
+ } else {
+ /* Don't account the CPU timer delta while the cpu was idle. */
+ vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer;
+ }
- if (!list_empty(&vt_list->list))
- set_vtimer(vt_list->idle);
+ spin_lock(&idle->lock);
+ idle->idle_time += idle_time;
+ idle->idle_enter = 0ULL;
+ idle->idle_count++;
+ spin_unlock(&idle->lock);
}
-void vtime_stop_cpu_timer(void)
+void vtime_stop_cpu(void)
{
- struct vtimer_queue *vt_list;
-
- vt_list = &__get_cpu_var(virt_cpu_timer);
-
- /* nothing to do */
- if (list_empty(&vt_list->list)) {
- vt_list->idle = VTIMER_MAX_SLICE;
- goto fire;
+ struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
+ struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
+ psw_t psw;
+
+ /* Wait for external, I/O or machine check interrupt. */
+ psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT;
+
+ /* Check if the CPU timer needs to be reprogrammed. */
+ if (vq->do_spt) {
+ __u64 vmax = VTIMER_MAX_SLICE;
+ /*
+ * The inline assembly is equivalent to
+ * vq->idle = get_cpu_timer();
+ * set_cpu_timer(VTIMER_MAX_SLICE);
+ * idle->idle_enter = get_clock();
+ * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
+ * PSW_MASK_IO | PSW_MASK_EXT);
+ * The difference is that the inline assembly makes sure that
+ * the last three instruction are stpt, stck and lpsw in that
+ * order. This is done to increase the precision.
+ */
+ asm volatile(
+#ifndef CONFIG_64BIT
+ " basr 1,0\n"
+ "0: ahi 1,1f-0b\n"
+ " st 1,4(%2)\n"
+#else /* CONFIG_64BIT */
+ " larl 1,1f\n"
+ " stg 1,8(%2)\n"
+#endif /* CONFIG_64BIT */
+ " stpt 0(%4)\n"
+ " spt 0(%5)\n"
+ " stck 0(%3)\n"
+#ifndef CONFIG_64BIT
+ " lpsw 0(%2)\n"
+#else /* CONFIG_64BIT */
+ " lpswe 0(%2)\n"
+#endif /* CONFIG_64BIT */
+ "1:"
+ : "=m" (idle->idle_enter), "=m" (vq->idle)
+ : "a" (&psw), "a" (&idle->idle_enter),
+ "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw)
+ : "memory", "cc", "1");
+ } else {
+ /*
+ * The inline assembly is equivalent to
+ * vq->idle = get_cpu_timer();
+ * idle->idle_enter = get_clock();
+ * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
+ * PSW_MASK_IO | PSW_MASK_EXT);
+ * The difference is that the inline assembly makes sure that
+ * the last three instruction are stpt, stck and lpsw in that
+ * order. This is done to increase the precision.
+ */
+ asm volatile(
+#ifndef CONFIG_64BIT
+ " basr 1,0\n"
+ "0: ahi 1,1f-0b\n"
+ " st 1,4(%2)\n"
+#else /* CONFIG_64BIT */
+ " larl 1,1f\n"
+ " stg 1,8(%2)\n"
+#endif /* CONFIG_64BIT */
+ " stpt 0(%4)\n"
+ " stck 0(%3)\n"
+#ifndef CONFIG_64BIT
+ " lpsw 0(%2)\n"
+#else /* CONFIG_64BIT */
+ " lpswe 0(%2)\n"
+#endif /* CONFIG_64BIT */
+ "1:"
+ : "=m" (idle->idle_enter), "=m" (vq->idle)
+ : "a" (&psw), "a" (&idle->idle_enter),
+ "a" (&vq->idle), "m" (psw)
+ : "memory", "cc", "1");
}
-
- /* store the actual expire value */
- asm volatile ("STPT %0" : "=m" (vt_list->idle));
-
- /*
- * If the CPU timer is negative we don't reprogramm
- * it because we will get instantly an interrupt.
- */
- if (vt_list->idle & 1LL<<63)
- return;
-
- vt_list->offset += vt_list->to_expire - vt_list->idle;
-
- /*
- * We cannot halt the CPU timer, we just write a value that
- * nearly never expires (only after 71 years) and re-write
- * the stored expire value if we continue the timer
- */
- fire:
- set_vtimer(VTIMER_MAX_SLICE);
}
/*
@@ -195,30 +261,23 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
*/
static void do_callbacks(struct list_head *cb_list)
{
- struct vtimer_queue *vt_list;
+ struct vtimer_queue *vq;
struct vtimer_list *event, *tmp;
- void (*fn)(unsigned long);
- unsigned long data;
if (list_empty(cb_list))
return;
- vt_list = &__get_cpu_var(virt_cpu_timer);
+ vq = &__get_cpu_var(virt_cpu_timer);
list_for_each_entry_safe(event, tmp, cb_list, entry) {
- fn = event->function;
- data = event->data;
- fn(data);
-
- if (!event->interval)
- /* delete one shot timer */
- list_del_init(&event->entry);
- else {
- /* move interval timer back to list */
- spin_lock(&vt_list->lock);
- list_del_init(&event->entry);
- list_add_sorted(event, &vt_list->list);
- spin_unlock(&vt_list->lock);
+ list_del_init(&event->entry);
+ (event->function)(event->data);
+ if (event->interval) {
+ /* Recharge interval timer */
+ event->expires = event->interval + vq->elapsed;
+ spin_lock(&vq->lock);
+ list_add_sorted(event, &vq->list);
+ spin_unlock(&vq->lock);
}
}
}
@@ -228,64 +287,57 @@ static void do_callbacks(struct list_head *cb_list)
*/
static void do_cpu_timer_interrupt(__u16 error_code)
{
- __u64 next, delta;
- struct vtimer_queue *vt_list;
+ struct vtimer_queue *vq;
struct vtimer_list *event, *tmp;
- struct list_head *ptr;
- /* the callback queue */
- struct list_head cb_list;
+ struct list_head cb_list; /* the callback queue */
+ __u64 elapsed, next;
INIT_LIST_HEAD(&cb_list);
- vt_list = &__get_cpu_var(virt_cpu_timer);
+ vq = &__get_cpu_var(virt_cpu_timer);
/* walk timer list, fire all expired events */
- spin_lock(&vt_list->lock);
-
- if (vt_list->to_expire < VTIMER_MAX_SLICE)
- vt_list->offset += vt_list->to_expire;
-
- list_for_each_entry_safe(event, tmp, &vt_list->list, entry) {
- if (event->expires > vt_list->offset)
- /* found first unexpired event, leave */
- break;
-
- /* re-charge interval timer, we have to add the offset */
- if (event->interval)
- event->expires = event->interval + vt_list->offset;
-
- /* move expired timer to the callback queue */
- list_move_tail(&event->entry, &cb_list);
+ spin_lock(&vq->lock);
+
+ elapsed = vq->elapsed + (vq->timer - S390_lowcore.async_enter_timer);
+ BUG_ON((s64) elapsed < 0);
+ vq->elapsed = 0;
+ list_for_each_entry_safe(event, tmp, &vq->list, entry) {
+ if (event->expires < elapsed)
+ /* move expired timer to the callback queue */
+ list_move_tail(&event->entry, &cb_list);
+ else
+ event->expires -= elapsed;
}
- spin_unlock(&vt_list->lock);
+ spin_unlock(&vq->lock);
+
+ vq->do_spt = list_empty(&cb_list);
do_callbacks(&cb_list);
/* next event is first in list */
- spin_lock(&vt_list->lock);
- if (!list_empty(&vt_list->list)) {
- ptr = vt_list->list.next;
- event = list_entry(ptr, struct vtimer_list, entry);
- next = event->expires - vt_list->offset;
-
- /* add the expired time from this interrupt handler
- * and the callback functions
- */
- asm volatile ("STPT %0" : "=m" (delta));
- delta = 0xffffffffffffffffLL - delta + 1;
- vt_list->offset += delta;
- next -= delta;
- } else {
- vt_list->offset = 0;
- next = VTIMER_MAX_SLICE;
- }
- spin_unlock(&vt_list->lock);
- set_vtimer(next);
+ next = VTIMER_MAX_SLICE;
+ spin_lock(&vq->lock);
+ if (!list_empty(&vq->list)) {
+ event = list_first_entry(&vq->list, struct vtimer_list, entry);
+ next = event->expires;
+ } else
+ vq->do_spt = 0;
+ spin_unlock(&vq->lock);
+ /*
+ * To improve precision add the time spent by the
+ * interrupt handler to the elapsed time.
+ * Note: CPU timer counts down and we got an interrupt,
+ * the current content is negative
+ */
+ elapsed = S390_lowcore.async_enter_timer - get_vtimer();
+ set_vtimer(next - elapsed);
+ vq->timer = next - elapsed;
+ vq->elapsed = elapsed;
}
void init_virt_timer(struct vtimer_list *timer)
{
timer->function = NULL;
INIT_LIST_HEAD(&timer->entry);
- spin_lock_init(&timer->lock);
}
EXPORT_SYMBOL(init_virt_timer);
@@ -299,44 +351,40 @@ static inline int vtimer_pending(struct vtimer_list *timer)
*/
static void internal_add_vtimer(struct vtimer_list *timer)
{
+ struct vtimer_queue *vq;
unsigned long flags;
- __u64 done;
- struct vtimer_list *event;
- struct vtimer_queue *vt_list;
+ __u64 left, expires;
- vt_list = &per_cpu(virt_cpu_timer, timer->cpu);
- spin_lock_irqsave(&vt_list->lock, flags);
+ vq = &per_cpu(virt_cpu_timer, timer->cpu);
+ spin_lock_irqsave(&vq->lock, flags);
BUG_ON(timer->cpu != smp_processor_id());
- /* if list is empty we only have to set the timer */
- if (list_empty(&vt_list->list)) {
- /* reset the offset, this may happen if the last timer was
- * just deleted by mod_virt_timer and the interrupt
- * didn't happen until here
- */
- vt_list->offset = 0;
- goto fire;
+ if (list_empty(&vq->list)) {
+ /* First timer on this cpu, just program it. */
+ list_add(&timer->entry, &vq->list);
+ set_vtimer(timer->expires);
+ vq->timer = timer->expires;
+ vq->elapsed = 0;
+ } else {
+ /* Check progress of old timers. */
+ expires = timer->expires;
+ left = get_vtimer();
+ if (likely((s64) expires < (s64) left)) {
+ /* The new timer expires before the current timer. */
+ set_vtimer(expires);
+ vq->elapsed += vq->timer - left;
+ vq->timer = expires;
+ } else {
+ vq->elapsed += vq->timer - left;
+ vq->timer = left;
+ }
+ /* Insert new timer into per cpu list. */
+ timer->expires += vq->elapsed;
+ list_add_sorted(timer, &vq->list);
}
- /* save progress */
- asm volatile ("STPT %0" : "=m" (done));
-
- /* calculate completed work */
- done = vt_list->to_expire - done + vt_list->offset;
- vt_list->offset = 0;
-
- list_for_each_entry(event, &vt_list->list, entry)
- event->expires -= done;
-
- fire:
- list_add_sorted(timer, &vt_list->list);
-
- /* get first element, which is the next vtimer slice */
- event = list_entry(vt_list->list.next, struct vtimer_list, entry);
-
- set_vtimer(event->expires);
- spin_unlock_irqrestore(&vt_list->lock, flags);
+ spin_unlock_irqrestore(&vq->lock, flags);
/* release CPU acquired in prepare_vtimer or mod_virt_timer() */
put_cpu();
}
@@ -381,14 +429,15 @@ EXPORT_SYMBOL(add_virt_timer_periodic);
* If we change a pending timer the function must be called on the CPU
* where the timer is running on, e.g. by smp_call_function_single()
*
- * The original mod_timer adds the timer if it is not pending. For compatibility
- * we do the same. The timer will be added on the current CPU as a oneshot timer.
+ * The original mod_timer adds the timer if it is not pending. For
+ * compatibility we do the same. The timer will be added on the current
+ * CPU as a oneshot timer.
*
* returns whether it has modified a pending timer (1) or not (0)
*/
int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
{
- struct vtimer_queue *vt_list;
+ struct vtimer_queue *vq;
unsigned long flags;
int cpu;
@@ -404,17 +453,17 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
return 1;
cpu = get_cpu();
- vt_list = &per_cpu(virt_cpu_timer, cpu);
+ vq = &per_cpu(virt_cpu_timer, cpu);
/* check if we run on the right CPU */
BUG_ON(timer->cpu != cpu);
/* disable interrupts before test if timer is pending */
- spin_lock_irqsave(&vt_list->lock, flags);
+ spin_lock_irqsave(&vq->lock, flags);
/* if timer isn't pending add it on the current CPU */
if (!vtimer_pending(timer)) {
- spin_unlock_irqrestore(&vt_list->lock, flags);
+ spin_unlock_irqrestore(&vq->lock, flags);
/* we do not activate an interval timer with mod_virt_timer */
timer->interval = 0;
timer->expires = expires;
@@ -431,7 +480,7 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
timer->interval = expires;
/* the timer can't expire anymore so we can release the lock */
- spin_unlock_irqrestore(&vt_list->lock, flags);
+ spin_unlock_irqrestore(&vq->lock, flags);
internal_add_vtimer(timer);
return 1;
}
@@ -445,25 +494,19 @@ EXPORT_SYMBOL(mod_virt_timer);
int del_virt_timer(struct vtimer_list *timer)
{
unsigned long flags;
- struct vtimer_queue *vt_list;
+ struct vtimer_queue *vq;
/* check if timer is pending */
if (!vtimer_pending(timer))
return 0;
- vt_list = &per_cpu(virt_cpu_timer, timer->cpu);
- spin_lock_irqsave(&vt_list->lock, flags);
+ vq = &per_cpu(virt_cpu_timer, timer->cpu);
+ spin_lock_irqsave(&vq->lock, flags);
/* we don't interrupt a running timer, just let it expire! */
list_del_init(&timer->entry);
- /* last timer removed */
- if (list_empty(&vt_list->list)) {
- vt_list->to_expire = 0;
- vt_list->offset = 0;
- }
-
- spin_unlock_irqrestore(&vt_list->lock, flags);
+ spin_unlock_irqrestore(&vq->lock, flags);
return 1;
}
EXPORT_SYMBOL(del_virt_timer);
@@ -473,24 +516,19 @@ EXPORT_SYMBOL(del_virt_timer);
*/
void init_cpu_vtimer(void)
{
- struct vtimer_queue *vt_list;
+ struct vtimer_queue *vq;
/* kick the virtual timer */
- S390_lowcore.exit_timer = VTIMER_MAX_SLICE;
- S390_lowcore.last_update_timer = VTIMER_MAX_SLICE;
- asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer));
asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock));
+ asm volatile ("STPT %0" : "=m" (S390_lowcore.last_update_timer));
+
+ /* initialize per cpu vtimer structure */
+ vq = &__get_cpu_var(virt_cpu_timer);
+ INIT_LIST_HEAD(&vq->list);
+ spin_lock_init(&vq->lock);
/* enable cpu timer interrupts */
__ctl_set_bit(0,10);
-
- vt_list = &__get_cpu_var(virt_cpu_timer);
- INIT_LIST_HEAD(&vt_list->list);
- spin_lock_init(&vt_list->lock);
- vt_list->to_expire = 0;
- vt_list->offset = 0;
- vt_list->idle = 0;
-
}
void __init vtime_init(void)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8b00eb2ddf57..be8497186b96 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -113,8 +113,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
int kvm_dev_ioctl_check_extension(long ext)
{
switch (ext) {
- case KVM_CAP_USER_MEMORY:
- return 1;
default:
return 0;
}
@@ -185,8 +183,6 @@ struct kvm *kvm_arch_create_vm(void)
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
VM_EVENT(kvm, 3, "%s", "vm created");
- try_module_get(THIS_MODULE);
-
return kvm;
out_nodbf:
free_page((unsigned long)(kvm->arch.sca));
@@ -196,13 +192,33 @@ out_nokvm:
return ERR_PTR(rc);
}
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ VCPU_EVENT(vcpu, 3, "%s", "free cpu");
+ free_page((unsigned long)(vcpu->arch.sie_block));
+ kvm_vcpu_uninit(vcpu);
+ kfree(vcpu);
+}
+
+static void kvm_free_vcpus(struct kvm *kvm)
+{
+ unsigned int i;
+
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ if (kvm->vcpus[i]) {
+ kvm_arch_vcpu_destroy(kvm->vcpus[i]);
+ kvm->vcpus[i] = NULL;
+ }
+ }
+}
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- debug_unregister(kvm->arch.dbf);
+ kvm_free_vcpus(kvm);
kvm_free_physmem(kvm);
free_page((unsigned long)(kvm->arch.sca));
+ debug_unregister(kvm->arch.dbf);
kfree(kvm);
- module_put(THIS_MODULE);
}
/* Section: vcpu related */
@@ -213,8 +229,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
- /* kvm common code refers to this, but does'nt call it */
- BUG();
+ /* Nothing todo */
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -308,8 +323,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
vcpu->arch.sie_block);
- try_module_get(THIS_MODULE);
-
return vcpu;
out_free_cpu:
kfree(vcpu);
@@ -317,14 +330,6 @@ out_nomem:
return ERR_PTR(rc);
}
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
- VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
- free_page((unsigned long)(vcpu->arch.sie_block));
- kfree(vcpu);
- module_put(THIS_MODULE);
-}
-
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
/* kvm common code refers to this, but never calls it */
diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h
index 85b660c17eb0..c24e9c6a1736 100644
--- a/arch/sh/include/asm/smp.h
+++ b/arch/sh/include/asm/smp.h
@@ -31,7 +31,7 @@ enum {
};
void smp_message_recv(unsigned int msg);
-void smp_timer_broadcast(cpumask_t mask);
+void smp_timer_broadcast(const struct cpumask *mask);
void local_timer_interrupt(void);
void local_timer_setup(unsigned int cpu);
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index 95f0085e098a..066f0fba590e 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -5,7 +5,6 @@
/* sched_domains SD_NODE_INIT for sh machines */
#define SD_NODE_INIT (struct sched_domain) { \
- .span = CPU_MASK_NONE, \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
@@ -33,6 +32,7 @@
#define parent_node(node) ((void)(node),0)
#define node_to_cpumask(node) ((void)node, cpu_online_map)
+#define cpumask_of_node(node) ((void)node, cpu_online_mask)
#define node_to_first_cpu(node) ((void)(node),0)
#define pcibus_to_node(bus) ((void)(bus), -1)
diff --git a/arch/sh/kernel/init_task.c b/arch/sh/kernel/init_task.c
index b151a25cb14d..80c35ff71d56 100644
--- a/arch/sh/kernel/init_task.c
+++ b/arch/sh/kernel/init_task.c
@@ -7,7 +7,6 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct pt_regs fake_swapper_regs;
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 3c5ad1660bbc..8f4027412614 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -31,12 +31,6 @@
int __cpu_number_map[NR_CPUS]; /* Map physical to logical */
int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
-
-cpumask_t cpu_online_map;
-EXPORT_SYMBOL(cpu_online_map);
-
static inline void __init smp_store_cpu_info(unsigned int cpu)
{
struct sh_cpuinfo *c = cpu_data + cpu;
@@ -190,11 +184,11 @@ void arch_send_call_function_single_ipi(int cpu)
plat_send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE);
}
-void smp_timer_broadcast(cpumask_t mask)
+void smp_timer_broadcast(const struct cpumask *mask)
{
int cpu;
- for_each_cpu_mask(cpu, mask)
+ for_each_cpu(cpu, mask)
plat_send_ipi(cpu, SMP_MSG_TIMER);
}
diff --git a/arch/sh/kernel/timers/timer-broadcast.c b/arch/sh/kernel/timers/timer-broadcast.c
index c2317635230f..96e8eaea1e62 100644
--- a/arch/sh/kernel/timers/timer-broadcast.c
+++ b/arch/sh/kernel/timers/timer-broadcast.c
@@ -51,7 +51,7 @@ void __cpuinit local_timer_setup(unsigned int cpu)
clk->mult = 1;
clk->set_mode = dummy_timer_set_mode;
clk->broadcast = smp_timer_broadcast;
- clk->cpumask = cpumask_of_cpu(cpu);
+ clk->cpumask = cpumask_of(cpu);
clockevents_register_device(clk);
}
diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c
index 3c61ddd4d43e..0db3f9510336 100644
--- a/arch/sh/kernel/timers/timer-tmu.c
+++ b/arch/sh/kernel/timers/timer-tmu.c
@@ -263,7 +263,7 @@ static int tmu_timer_init(void)
tmu0_clockevent.min_delta_ns =
clockevent_delta2ns(1, &tmu0_clockevent);
- tmu0_clockevent.cpumask = cpumask_of_cpu(0);
+ tmu0_clockevent.cpumask = cpumask_of(0);
clockevents_register_device(&tmu0_clockevent);
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 0a94d9c9cde1..de58c02633b4 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -4,6 +4,17 @@
mainmenu "Linux/SPARC Kernel Configuration"
+config 64BIT
+ bool "64-bit kernel" if ARCH = "sparc"
+ default ARCH = "sparc64"
+ help
+ SPARC is a family of RISC microprocessors designed and marketed by
+ Sun Microsystems, incorporated. They are very widely found in Sun
+ workstations and clones.
+
+ Say yes to build a 64-bit kernel - formerly known as sparc64
+ Say no to build a 32-bit kernel - formerly known as sparc
+
config SPARC
bool
default y
@@ -15,22 +26,11 @@ config SPARC
select RTC_CLASS
select RTC_DRV_M48T59
-# Identify this as a Sparc32 build
config SPARC32
- bool
- default y if ARCH = "sparc"
- help
- SPARC is a family of RISC microprocessors designed and marketed by
- Sun Microsystems, incorporated. They are very widely found in Sun
- workstations and clones. This port covers the original 32-bit SPARC;
- it is old and stable and usually considered one of the "big three"
- along with the Intel and Alpha ports. The UltraLinux project
- maintains both the SPARC32 and SPARC64 ports; its web page is
- available at <http://www.ultralinux.org/>.
+ def_bool !64BIT
config SPARC64
- bool
- default y if ARCH = "sparc64"
+ def_bool 64BIT
select ARCH_SUPPORTS_MSI
select HAVE_FUNCTION_TRACER
select HAVE_KRETPROBES
@@ -53,9 +53,6 @@ config BITS
default 32 if SPARC32
default 64 if SPARC64
-config 64BIT
- def_bool y if SPARC64
-
config GENERIC_TIME
bool
default y if SPARC64
@@ -188,14 +185,6 @@ config ARCH_MAY_HAVE_PC_FDC
bool
default y
-config ARCH_HAS_ILOG2_U32
- bool
- default n
-
-config ARCH_HAS_ILOG2_U64
- bool
- default n
-
config EMULATED_CMPXCHG
bool
default y if SPARC32
@@ -442,26 +431,6 @@ config SERIAL_CONSOLE
endmenu
menu "Bus options (PCI etc.)"
-config ISA
- bool
- help
- ISA is found on Espresso only and is not supported currently.
-
-config ISAPNP
- bool
- help
- ISAPNP is not supported
-
-config EISA
- bool
- help
- EISA is not supported.
-
-config MCA
- bool
- help
- MCA is not supported.
-
config SBUS
bool
default y
diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
index 05d19a3e590f..cde19ae78f5a 100644
--- a/arch/sparc/configs/sparc64_defconfig
+++ b/arch/sparc/configs/sparc64_defconfig
@@ -1,27 +1,27 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.28-rc4
-# Mon Nov 10 12:35:09 2008
+# Linux kernel version: 2.6.28
+# Fri Jan 2 18:14:26 2009
#
CONFIG_SPARC=y
CONFIG_SPARC64=y
+CONFIG_ARCH_DEFCONFIG="arch/sparc/configs/sparc64_defconfig"
+CONFIG_BITS=64
+CONFIG_64BIT=y
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_CMOS_UPDATE=y
CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_64BIT=y
-CONFIG_MMU=y
CONFIG_IOMMU_HELPER=y
CONFIG_QUICKLIST=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_LOCKDEP_SUPPORT=y
-CONFIG_ARCH_MAY_HAVE_PC_FDC=y
-# CONFIG_ARCH_HAS_ILOG2_U32 is not set
-# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
CONFIG_AUDIT_ARCH=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_MMU=y
CONFIG_ARCH_NO_VIRT_TO_BUS=y
CONFIG_OF=y
-CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
#
@@ -86,6 +86,7 @@ CONFIG_SLUB_DEBUG=y
CONFIG_SLUB=y
# CONFIG_SLOB is not set
CONFIG_PROFILING=y
+CONFIG_TRACEPOINTS=y
# CONFIG_MARKERS is not set
CONFIG_OPROFILE=m
CONFIG_HAVE_OPROFILE=y
@@ -127,34 +128,40 @@ CONFIG_DEFAULT_AS=y
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="anticipatory"
CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
# CONFIG_FREEZER is not set
#
# Processor type and features
#
-CONFIG_SPARC64_PAGE_SIZE_8KB=y
-# CONFIG_SPARC64_PAGE_SIZE_64KB is not set
-CONFIG_SECCOMP=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=64
CONFIG_HZ_100=y
# CONFIG_HZ_250 is not set
# CONFIG_HZ_300 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=100
CONFIG_SCHED_HRTICK=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_SPARC64_SMP=y
+CONFIG_SPARC64_PAGE_SIZE_8KB=y
+# CONFIG_SPARC64_PAGE_SIZE_64KB is not set
+CONFIG_SECCOMP=y
CONFIG_HOTPLUG_CPU=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_TICK_ONESHOT=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=64
# CONFIG_CPU_FREQ is not set
CONFIG_US3_MC=y
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-CONFIG_GENERIC_FIND_NEXT_BIT=y
-CONFIG_GENERIC_HWEIGHT=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_HUGETLB_PAGE_SIZE_4MB=y
# CONFIG_HUGETLB_PAGE_SIZE_512K is not set
# CONFIG_HUGETLB_PAGE_SIZE_64K is not set
@@ -183,10 +190,18 @@ CONFIG_PHYS_ADDR_T_64BIT=y
CONFIG_ZONE_DMA_FLAG=0
CONFIG_NR_QUICK=1
CONFIG_UNEVICTABLE_LRU=y
+CONFIG_SCHED_SMT=y
+CONFIG_SCHED_MC=y
+# CONFIG_PREEMPT_NONE is not set
+CONFIG_PREEMPT_VOLUNTARY=y
+# CONFIG_PREEMPT is not set
+# CONFIG_CMDLINE_BOOL is not set
+
+#
+# Bus options (PCI etc.)
+#
CONFIG_SBUS=y
CONFIG_SBUSCHAR=y
-CONFIG_SUN_AUXIO=y
-CONFIG_SUN_IO=y
CONFIG_SUN_LDOMS=y
CONFIG_PCI=y
CONFIG_PCI_DOMAINS=y
@@ -195,7 +210,9 @@ CONFIG_ARCH_SUPPORTS_MSI=y
CONFIG_PCI_MSI=y
# CONFIG_PCI_LEGACY is not set
# CONFIG_PCI_DEBUG is not set
+# CONFIG_PCCARD is not set
CONFIG_SUN_OPENPROMFS=m
+CONFIG_SPARC64_PCI=y
#
# Executable file formats
@@ -207,17 +224,13 @@ CONFIG_COMPAT_BINFMT_ELF=y
CONFIG_BINFMT_MISC=m
CONFIG_COMPAT=y
CONFIG_SYSVIPC_COMPAT=y
-CONFIG_SCHED_SMT=y
-CONFIG_SCHED_MC=y
-# CONFIG_PREEMPT_NONE is not set
-CONFIG_PREEMPT_VOLUNTARY=y
-# CONFIG_PREEMPT is not set
-# CONFIG_CMDLINE_BOOL is not set
CONFIG_NET=y
#
# Networking options
#
+# CONFIG_NET_NS is not set
+CONFIG_COMPAT_NET_DEV_OPS=y
CONFIG_PACKET=y
CONFIG_PACKET_MMAP=y
CONFIG_UNIX=y
@@ -314,6 +327,7 @@ CONFIG_VLAN_8021Q=m
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
#
# Network testing
@@ -330,8 +344,8 @@ CONFIG_WIRELESS=y
# CONFIG_CFG80211 is not set
CONFIG_WIRELESS_OLD_REGULATORY=y
# CONFIG_WIRELESS_EXT is not set
+# CONFIG_LIB80211 is not set
# CONFIG_MAC80211 is not set
-# CONFIG_IEEE80211 is not set
# CONFIG_RFKILL is not set
# CONFIG_NET_9P is not set
@@ -378,8 +392,10 @@ CONFIG_MISC_DEVICES=y
# CONFIG_EEPROM_93CX6 is not set
# CONFIG_SGI_IOC4 is not set
# CONFIG_TIFM_CORE is not set
+# CONFIG_ICS932S401 is not set
# CONFIG_ENCLOSURE_SERVICES is not set
# CONFIG_HP_ILO is not set
+# CONFIG_C2PORT is not set
CONFIG_HAVE_IDE=y
CONFIG_IDE=y
@@ -387,6 +403,7 @@ CONFIG_IDE=y
# Please see Documentation/ide/ide.txt for help/info on IDE drives
#
CONFIG_IDE_TIMINGS=y
+CONFIG_IDE_ATAPI=y
# CONFIG_BLK_DEV_IDE_SATA is not set
CONFIG_IDE_GD=y
CONFIG_IDE_GD_ATA=y
@@ -394,7 +411,6 @@ CONFIG_IDE_GD_ATA=y
CONFIG_BLK_DEV_IDECD=y
CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS=y
# CONFIG_BLK_DEV_IDETAPE is not set
-# CONFIG_BLK_DEV_IDESCSI is not set
# CONFIG_IDE_TASK_IOCTL is not set
CONFIG_IDE_PROC_FS=y
@@ -477,6 +493,7 @@ CONFIG_SCSI_FC_ATTRS=y
# CONFIG_SCSI_SRP_ATTRS is not set
CONFIG_SCSI_LOWLEVEL=y
# CONFIG_ISCSI_TCP is not set
+# CONFIG_SCSI_CXGB3_ISCSI is not set
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_ACARD is not set
@@ -490,6 +507,8 @@ CONFIG_SCSI_LOWLEVEL=y
# CONFIG_MEGARAID_LEGACY is not set
# CONFIG_MEGARAID_SAS is not set
# CONFIG_SCSI_HPTIOP is not set
+# CONFIG_LIBFC is not set
+# CONFIG_FCOE is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_IPS is not set
@@ -564,6 +583,9 @@ CONFIG_PHYLIB=m
# CONFIG_BROADCOM_PHY is not set
# CONFIG_ICPLUS_PHY is not set
# CONFIG_REALTEK_PHY is not set
+# CONFIG_NATIONAL_PHY is not set
+# CONFIG_STE10XP is not set
+# CONFIG_LSI_ET1011C_PHY is not set
# CONFIG_MDIO_BITBANG is not set
CONFIG_NET_ETHERNET=y
CONFIG_MII=m
@@ -590,7 +612,6 @@ CONFIG_NET_PCI=y
# CONFIG_ADAPTEC_STARFIRE is not set
# CONFIG_B44 is not set
# CONFIG_FORCEDETH is not set
-# CONFIG_EEPRO100 is not set
# CONFIG_E100 is not set
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
@@ -600,6 +621,7 @@ CONFIG_NET_PCI=y
# CONFIG_R6040 is not set
# CONFIG_SIS900 is not set
# CONFIG_EPIC100 is not set
+# CONFIG_SMSC9420 is not set
# CONFIG_SUNDANCE is not set
# CONFIG_TLAN is not set
# CONFIG_VIA_RHINE is not set
@@ -629,6 +651,7 @@ CONFIG_BNX2=m
# CONFIG_JME is not set
CONFIG_NETDEV_10000=y
# CONFIG_CHELSIO_T1 is not set
+CONFIG_CHELSIO_T3_DEPENDS=y
# CONFIG_CHELSIO_T3 is not set
# CONFIG_ENIC is not set
# CONFIG_IXGBE is not set
@@ -778,6 +801,7 @@ CONFIG_SERIAL_CORE=y
CONFIG_SERIAL_CORE_CONSOLE=y
# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
# CONFIG_LEGACY_PTYS is not set
# CONFIG_IPMI_HANDLER is not set
CONFIG_HW_RANDOM=m
@@ -870,6 +894,7 @@ CONFIG_HWMON=y
# CONFIG_SENSORS_ADM1029 is not set
# CONFIG_SENSORS_ADM1031 is not set
# CONFIG_SENSORS_ADM9240 is not set
+# CONFIG_SENSORS_ADT7462 is not set
# CONFIG_SENSORS_ADT7470 is not set
# CONFIG_SENSORS_ADT7473 is not set
# CONFIG_SENSORS_ATXP1 is not set
@@ -919,11 +944,11 @@ CONFIG_HWMON=y
# CONFIG_THERMAL is not set
# CONFIG_THERMAL_HWMON is not set
# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
#
# Sonics Silicon Backplane
#
-CONFIG_SSB_POSSIBLE=y
# CONFIG_SSB is not set
#
@@ -1071,6 +1096,7 @@ CONFIG_SND_MIXER_OSS=m
CONFIG_SND_PCM_OSS=m
CONFIG_SND_PCM_OSS_PLUGINS=y
CONFIG_SND_SEQUENCER_OSS=y
+# CONFIG_SND_HRTIMER is not set
# CONFIG_SND_DYNAMIC_MINORS is not set
CONFIG_SND_SUPPORT_OLD_API=y
CONFIG_SND_VERBOSE_PROCFS=y
@@ -1242,11 +1268,11 @@ CONFIG_USB_UHCI_HCD=m
# CONFIG_USB_TMC is not set
#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
#
#
-# may also be needed; see USB_STORAGE Help for more information
+# see USB_STORAGE Help for more information
#
CONFIG_USB_STORAGE=m
# CONFIG_USB_STORAGE_DEBUG is not set
@@ -1337,6 +1363,7 @@ CONFIG_RTC_INTF_DEV=y
# CONFIG_RTC_DRV_M41T80 is not set
# CONFIG_RTC_DRV_S35390A is not set
# CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
#
# SPI RTC drivers
@@ -1365,7 +1392,6 @@ CONFIG_RTC_DRV_STARFIRE=y
# CONFIG_DMADEVICES is not set
# CONFIG_UIO is not set
# CONFIG_STAGING is not set
-CONFIG_STAGING_EXCLUDE_BUILD=y
#
# Misc Linux/SPARC drivers
@@ -1544,6 +1570,7 @@ CONFIG_SCHEDSTATS=y
# CONFIG_LOCK_STAT is not set
# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
# CONFIG_DEBUG_KOBJECT is not set
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_INFO is not set
@@ -1552,6 +1579,7 @@ CONFIG_DEBUG_BUGVERBOSE=y
CONFIG_DEBUG_MEMORY_INIT=y
# CONFIG_DEBUG_LIST is not set
# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
# CONFIG_BOOT_PRINTK_DELAY is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
@@ -1560,8 +1588,12 @@ CONFIG_DEBUG_MEMORY_INIT=y
# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_NOP_TRACER=y
CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
#
# Tracers
@@ -1571,7 +1603,9 @@ CONFIG_HAVE_FUNCTION_TRACER=y
# CONFIG_SCHED_TRACER is not set
# CONFIG_CONTEXT_SWITCH_TRACER is not set
# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
# CONFIG_STACK_TRACER is not set
+# CONFIG_FTRACE_STARTUP_TEST is not set
# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
# CONFIG_SAMPLES is not set
CONFIG_HAVE_ARCH_KGDB=y
@@ -1600,11 +1634,16 @@ CONFIG_CRYPTO=y
#
# CONFIG_CRYPTO_FIPS is not set
CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
CONFIG_CRYPTO_AEAD=y
+CONFIG_CRYPTO_AEAD2=y
CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_RNG=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
CONFIG_CRYPTO_GF128MUL=m
CONFIG_CRYPTO_NULL=m
# CONFIG_CRYPTO_CRYPTD is not set
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 89c260aab45c..deeb0fba8029 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -1,21 +1,6 @@
# User exported sparc header files
include include/asm-generic/Kbuild.asm
-header-y += ipcbuf_32.h
-header-y += ipcbuf_64.h
-header-y += posix_types_32.h
-header-y += posix_types_64.h
-header-y += ptrace_32.h
-header-y += ptrace_64.h
-header-y += sigcontext_32.h
-header-y += sigcontext_64.h
-header-y += siginfo_32.h
-header-y += siginfo_64.h
-header-y += signal_32.h
-header-y += signal_64.h
-header-y += stat_32.h
-header-y += stat_64.h
-
header-y += apc.h
header-y += asi.h
header-y += display7seg.h
@@ -23,16 +8,11 @@ header-y += envctrl.h
header-y += fbio.h
header-y += jsflash.h
header-y += openprom.h
-header-y += openprom_32.h
-header-y += openprom_64.h
header-y += openpromio.h
header-y += perfctr.h
header-y += psrcompat.h
header-y += psr.h
header-y += pstate.h
-header-y += reg.h
-header-y += reg_32.h
-header-y += reg_64.h
header-y += traps.h
header-y += uctx.h
header-y += utrap.h
diff --git a/arch/sparc/include/asm/byteorder.h b/arch/sparc/include/asm/byteorder.h
index 5a70f137f1f7..738414b26558 100644
--- a/arch/sparc/include/asm/byteorder.h
+++ b/arch/sparc/include/asm/byteorder.h
@@ -1,16 +1,12 @@
#ifndef _SPARC_BYTEORDER_H
#define _SPARC_BYTEORDER_H
-#include <asm/types.h>
+#include <linux/types.h>
#include <asm/asi.h>
#define __BIG_ENDIAN
-#ifdef CONFIG_SPARC32
-#define __SWAB_64_THRU_32__
-#endif
-
-#ifdef CONFIG_SPARC64
+#if defined(__sparc__) && defined(__arch64__)
static inline __u16 __arch_swab16p(const __u16 *addr)
{
__u16 ret;
@@ -44,7 +40,9 @@ static inline __u64 __arch_swab64p(const __u64 *addr)
}
#define __arch_swab64p __arch_swab64p
-#endif /* CONFIG_SPARC64 */
+#else
+#define __SWAB_64_THRU_32__
+#endif /* defined(__sparc__) && defined(__arch64__) */
#include <linux/byteorder.h>
diff --git a/arch/sparc/include/asm/ipcbuf.h b/arch/sparc/include/asm/ipcbuf.h
index 17d6ef7b23a4..66013b4fe10d 100644
--- a/arch/sparc/include/asm/ipcbuf.h
+++ b/arch/sparc/include/asm/ipcbuf.h
@@ -1,8 +1,32 @@
-#ifndef ___ASM_SPARC_IPCBUF_H
-#define ___ASM_SPARC_IPCBUF_H
-#if defined(__sparc__) && defined(__arch64__)
-#include <asm/ipcbuf_64.h>
-#else
-#include <asm/ipcbuf_32.h>
-#endif
+#ifndef __SPARC_IPCBUF_H
+#define __SPARC_IPCBUF_H
+
+/*
+ * The ipc64_perm structure for sparc/sparc64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 32-bit seq
+ * - on sparc for 32 bit mode (it is 32 bit on sparc64)
+ * - 2 miscellaneous 64-bit values
+ */
+
+struct ipc64_perm
+{
+ __kernel_key_t key;
+ __kernel_uid_t uid;
+ __kernel_gid_t gid;
+ __kernel_uid_t cuid;
+ __kernel_gid_t cgid;
+#ifndef __arch64__
+ unsigned short __pad0;
#endif
+ __kernel_mode_t mode;
+ unsigned short __pad1;
+ unsigned short seq;
+ unsigned long long __unused1;
+ unsigned long long __unused2;
+};
+
+#endif /* __SPARC_IPCBUF_H */
diff --git a/arch/sparc/include/asm/ipcbuf_32.h b/arch/sparc/include/asm/ipcbuf_32.h
deleted file mode 100644
index 6387209518f2..000000000000
--- a/arch/sparc/include/asm/ipcbuf_32.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef _SPARC_IPCBUF_H
-#define _SPARC_IPCBUF_H
-
-/*
- * The ipc64_perm structure for sparc architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 32-bit mode
- * - 32-bit seq
- * - 2 miscellaneous 64-bit values (so that this structure matches
- * sparc64 ipc64_perm)
- */
-
-struct ipc64_perm
-{
- __kernel_key_t key;
- __kernel_uid32_t uid;
- __kernel_gid32_t gid;
- __kernel_uid32_t cuid;
- __kernel_gid32_t cgid;
- unsigned short __pad1;
- __kernel_mode_t mode;
- unsigned short __pad2;
- unsigned short seq;
- unsigned long long __unused1;
- unsigned long long __unused2;
-};
-
-#endif /* _SPARC_IPCBUF_H */
diff --git a/arch/sparc/include/asm/ipcbuf_64.h b/arch/sparc/include/asm/ipcbuf_64.h
deleted file mode 100644
index a44b855b98db..000000000000
--- a/arch/sparc/include/asm/ipcbuf_64.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef _SPARC64_IPCBUF_H
-#define _SPARC64_IPCBUF_H
-
-/*
- * The ipc64_perm structure for sparc64 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 32-bit seq
- * - 2 miscellaneous 64-bit values
- */
-
-struct ipc64_perm
-{
- __kernel_key_t key;
- __kernel_uid_t uid;
- __kernel_gid_t gid;
- __kernel_uid_t cuid;
- __kernel_gid_t cgid;
- __kernel_mode_t mode;
- unsigned short __pad1;
- unsigned short seq;
- unsigned long __unused1;
- unsigned long __unused2;
-};
-
-#endif /* _SPARC64_IPCBUF_H */
diff --git a/arch/sparc/include/asm/jsflash.h b/arch/sparc/include/asm/jsflash.h
index 3457f29bd73b..0717d9e39d2d 100644
--- a/arch/sparc/include/asm/jsflash.h
+++ b/arch/sparc/include/asm/jsflash.h
@@ -8,7 +8,7 @@
#define _SPARC_JSFLASH_H
#ifndef _SPARC_TYPES_H
-#include <asm/types.h>
+#include <linux/types.h>
#endif
/*
diff --git a/arch/sparc/include/asm/openprom.h b/arch/sparc/include/asm/openprom.h
index aaeae905ed3f..963e1a45c35f 100644
--- a/arch/sparc/include/asm/openprom.h
+++ b/arch/sparc/include/asm/openprom.h
@@ -1,8 +1,277 @@
-#ifndef ___ASM_SPARC_OPENPROM_H
-#define ___ASM_SPARC_OPENPROM_H
+#ifndef __SPARC_OPENPROM_H
+#define __SPARC_OPENPROM_H
+
+/* openprom.h: Prom structures and defines for access to the OPENBOOT
+ * prom routines and data areas.
+ *
+ * Copyright (C) 1995,1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+/* Empirical constants... */
+#define LINUX_OPPROM_MAGIC 0x10010407
+
+#ifndef __ASSEMBLY__
+/* V0 prom device operations. */
+struct linux_dev_v0_funcs {
+ int (*v0_devopen)(char *device_str);
+ int (*v0_devclose)(int dev_desc);
+ int (*v0_rdblkdev)(int dev_desc, int num_blks, int blk_st, char *buf);
+ int (*v0_wrblkdev)(int dev_desc, int num_blks, int blk_st, char *buf);
+ int (*v0_wrnetdev)(int dev_desc, int num_bytes, char *buf);
+ int (*v0_rdnetdev)(int dev_desc, int num_bytes, char *buf);
+ int (*v0_rdchardev)(int dev_desc, int num_bytes, int dummy, char *buf);
+ int (*v0_wrchardev)(int dev_desc, int num_bytes, int dummy, char *buf);
+ int (*v0_seekdev)(int dev_desc, long logical_offst, int from);
+};
+
+/* V2 and later prom device operations. */
+struct linux_dev_v2_funcs {
+ int (*v2_inst2pkg)(int d); /* Convert ihandle to phandle */
+ char * (*v2_dumb_mem_alloc)(char *va, unsigned sz);
+ void (*v2_dumb_mem_free)(char *va, unsigned sz);
+
+ /* To map devices into virtual I/O space. */
+ char * (*v2_dumb_mmap)(char *virta, int which_io, unsigned paddr, unsigned sz);
+ void (*v2_dumb_munmap)(char *virta, unsigned size);
+
+ int (*v2_dev_open)(char *devpath);
+ void (*v2_dev_close)(int d);
+ int (*v2_dev_read)(int d, char *buf, int nbytes);
+ int (*v2_dev_write)(int d, char *buf, int nbytes);
+ int (*v2_dev_seek)(int d, int hi, int lo);
+
+ /* Never issued (multistage load support) */
+ void (*v2_wheee2)(void);
+ void (*v2_wheee3)(void);
+};
+
+struct linux_mlist_v0 {
+ struct linux_mlist_v0 *theres_more;
+ unsigned int start_adr;
+ unsigned num_bytes;
+};
+
+struct linux_mem_v0 {
+ struct linux_mlist_v0 **v0_totphys;
+ struct linux_mlist_v0 **v0_prommap;
+ struct linux_mlist_v0 **v0_available; /* What we can use */
+};
+
+/* Arguments sent to the kernel from the boot prompt. */
+struct linux_arguments_v0 {
+ char *argv[8];
+ char args[100];
+ char boot_dev[2];
+ int boot_dev_ctrl;
+ int boot_dev_unit;
+ int dev_partition;
+ char *kernel_file_name;
+ void *aieee1; /* XXX */
+};
+
+/* V2 and up boot things. */
+struct linux_bootargs_v2 {
+ char **bootpath;
+ char **bootargs;
+ int *fd_stdin;
+ int *fd_stdout;
+};
+
+/* The top level PROM vector. */
+struct linux_romvec {
+ /* Version numbers. */
+ unsigned int pv_magic_cookie;
+ unsigned int pv_romvers;
+ unsigned int pv_plugin_revision;
+ unsigned int pv_printrev;
+
+ /* Version 0 memory descriptors. */
+ struct linux_mem_v0 pv_v0mem;
+
+ /* Node operations. */
+ struct linux_nodeops *pv_nodeops;
+
+ char **pv_bootstr;
+ struct linux_dev_v0_funcs pv_v0devops;
+
+ char *pv_stdin;
+ char *pv_stdout;
+#define PROMDEV_KBD 0 /* input from keyboard */
+#define PROMDEV_SCREEN 0 /* output to screen */
+#define PROMDEV_TTYA 1 /* in/out to ttya */
+#define PROMDEV_TTYB 2 /* in/out to ttyb */
+
+ /* Blocking getchar/putchar. NOT REENTRANT! (grr) */
+ int (*pv_getchar)(void);
+ void (*pv_putchar)(int ch);
+
+ /* Non-blocking variants. */
+ int (*pv_nbgetchar)(void);
+ int (*pv_nbputchar)(int ch);
+
+ void (*pv_putstr)(char *str, int len);
+
+ /* Miscellany. */
+ void (*pv_reboot)(char *bootstr);
+ void (*pv_printf)(__const__ char *fmt, ...);
+ void (*pv_abort)(void);
+ __volatile__ int *pv_ticks;
+ void (*pv_halt)(void);
+ void (**pv_synchook)(void);
+
+ /* Evaluate a forth string, not different proto for V0 and V2->up. */
+ union {
+ void (*v0_eval)(int len, char *str);
+ void (*v2_eval)(char *str);
+ } pv_fortheval;
+
+ struct linux_arguments_v0 **pv_v0bootargs;
+
+ /* Get ether address. */
+ unsigned int (*pv_enaddr)(int d, char *enaddr);
+
+ struct linux_bootargs_v2 pv_v2bootargs;
+ struct linux_dev_v2_funcs pv_v2devops;
+
+ int filler[15];
+
+ /* This one is sun4c/sun4 only. */
+ void (*pv_setctxt)(int ctxt, char *va, int pmeg);
+
+ /* Prom version 3 Multiprocessor routines. This stuff is crazy.
+ * No joke. Calling these when there is only one cpu probably
+ * crashes the machine, have to test this. :-)
+ */
+
+ /* v3_cpustart() will start the cpu 'whichcpu' in mmu-context
+ * 'thiscontext' executing at address 'prog_counter'
+ */
+ int (*v3_cpustart)(unsigned int whichcpu, int ctxtbl_ptr,
+ int thiscontext, char *prog_counter);
+
+ /* v3_cpustop() will cause cpu 'whichcpu' to stop executing
+ * until a resume cpu call is made.
+ */
+ int (*v3_cpustop)(unsigned int whichcpu);
+
+ /* v3_cpuidle() will idle cpu 'whichcpu' until a stop or
+ * resume cpu call is made.
+ */
+ int (*v3_cpuidle)(unsigned int whichcpu);
+
+ /* v3_cpuresume() will resume processor 'whichcpu' executing
+ * starting with whatever 'pc' and 'npc' were left at the
+ * last 'idle' or 'stop' call.
+ */
+ int (*v3_cpuresume)(unsigned int whichcpu);
+};
+
+/* Routines for traversing the prom device tree. */
+struct linux_nodeops {
+ int (*no_nextnode)(int node);
+ int (*no_child)(int node);
+ int (*no_proplen)(int node, const char *name);
+ int (*no_getprop)(int node, const char *name, char *val);
+ int (*no_setprop)(int node, const char *name, char *val, int len);
+ char * (*no_nextprop)(int node, char *name);
+};
+
+/* More fun PROM structures for device probing. */
#if defined(__sparc__) && defined(__arch64__)
-#include <asm/openprom_64.h>
+#define PROMREG_MAX 24
+#define PROMVADDR_MAX 16
+#define PROMINTR_MAX 32
#else
-#include <asm/openprom_32.h>
+#define PROMREG_MAX 16
+#define PROMVADDR_MAX 16
+#define PROMINTR_MAX 15
#endif
+
+struct linux_prom_registers {
+ unsigned int which_io; /* hi part of physical address */
+ unsigned int phys_addr; /* The physical address of this register */
+ unsigned int reg_size; /* How many bytes does this register take up? */
+};
+
+struct linux_prom64_registers {
+ unsigned long phys_addr;
+ unsigned long reg_size;
+};
+
+struct linux_prom_irqs {
+ int pri; /* IRQ priority */
+ int vector; /* This is foobar, what does it do? */
+};
+
+/* Element of the "ranges" vector */
+struct linux_prom_ranges {
+ unsigned int ot_child_space;
+ unsigned int ot_child_base; /* Bus feels this */
+ unsigned int ot_parent_space;
+ unsigned int ot_parent_base; /* CPU looks from here */
+ unsigned int or_size;
+};
+
+/*
+ * Ranges and reg properties are a bit different for PCI.
+ */
+#if defined(__sparc__) && defined(__arch64__)
+struct linux_prom_pci_registers {
+ unsigned int phys_hi;
+ unsigned int phys_mid;
+ unsigned int phys_lo;
+
+ unsigned int size_hi;
+ unsigned int size_lo;
+};
+#else
+struct linux_prom_pci_registers {
+ /*
+ * We don't know what information this field contain.
+ * We guess, PCI device function is in bits 15:8
+ * So, ...
+ */
+ unsigned int which_io; /* Let it be which_io */
+
+ unsigned int phys_hi;
+ unsigned int phys_lo;
+
+ unsigned int size_hi;
+ unsigned int size_lo;
+};
+
#endif
+
+struct linux_prom_pci_ranges {
+ unsigned int child_phys_hi; /* Only certain bits are encoded here. */
+ unsigned int child_phys_mid;
+ unsigned int child_phys_lo;
+
+ unsigned int parent_phys_hi;
+ unsigned int parent_phys_lo;
+
+ unsigned int size_hi;
+ unsigned int size_lo;
+};
+
+struct linux_prom_pci_intmap {
+ unsigned int phys_hi;
+ unsigned int phys_mid;
+ unsigned int phys_lo;
+
+ unsigned int interrupt;
+
+ int cnode;
+ unsigned int cinterrupt;
+};
+
+struct linux_prom_pci_intmask {
+ unsigned int phys_hi;
+ unsigned int phys_mid;
+ unsigned int phys_lo;
+ unsigned int interrupt;
+};
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* !(__SPARC_OPENPROM_H) */
diff --git a/arch/sparc/include/asm/openprom_32.h b/arch/sparc/include/asm/openprom_32.h
deleted file mode 100644
index 875da3552d80..000000000000
--- a/arch/sparc/include/asm/openprom_32.h
+++ /dev/null
@@ -1,255 +0,0 @@
-#ifndef __SPARC_OPENPROM_H
-#define __SPARC_OPENPROM_H
-
-/* openprom.h: Prom structures and defines for access to the OPENBOOT
- * prom routines and data areas.
- *
- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
- */
-
-/* Empirical constants... */
-#define LINUX_OPPROM_MAGIC 0x10010407
-
-#ifndef __ASSEMBLY__
-/* V0 prom device operations. */
-struct linux_dev_v0_funcs {
- int (*v0_devopen)(char *device_str);
- int (*v0_devclose)(int dev_desc);
- int (*v0_rdblkdev)(int dev_desc, int num_blks, int blk_st, char *buf);
- int (*v0_wrblkdev)(int dev_desc, int num_blks, int blk_st, char *buf);
- int (*v0_wrnetdev)(int dev_desc, int num_bytes, char *buf);
- int (*v0_rdnetdev)(int dev_desc, int num_bytes, char *buf);
- int (*v0_rdchardev)(int dev_desc, int num_bytes, int dummy, char *buf);
- int (*v0_wrchardev)(int dev_desc, int num_bytes, int dummy, char *buf);
- int (*v0_seekdev)(int dev_desc, long logical_offst, int from);
-};
-
-/* V2 and later prom device operations. */
-struct linux_dev_v2_funcs {
- int (*v2_inst2pkg)(int d); /* Convert ihandle to phandle */
- char * (*v2_dumb_mem_alloc)(char *va, unsigned sz);
- void (*v2_dumb_mem_free)(char *va, unsigned sz);
-
- /* To map devices into virtual I/O space. */
- char * (*v2_dumb_mmap)(char *virta, int which_io, unsigned paddr, unsigned sz);
- void (*v2_dumb_munmap)(char *virta, unsigned size);
-
- int (*v2_dev_open)(char *devpath);
- void (*v2_dev_close)(int d);
- int (*v2_dev_read)(int d, char *buf, int nbytes);
- int (*v2_dev_write)(int d, char *buf, int nbytes);
- int (*v2_dev_seek)(int d, int hi, int lo);
-
- /* Never issued (multistage load support) */
- void (*v2_wheee2)(void);
- void (*v2_wheee3)(void);
-};
-
-struct linux_mlist_v0 {
- struct linux_mlist_v0 *theres_more;
- char *start_adr;
- unsigned num_bytes;
-};
-
-struct linux_mem_v0 {
- struct linux_mlist_v0 **v0_totphys;
- struct linux_mlist_v0 **v0_prommap;
- struct linux_mlist_v0 **v0_available; /* What we can use */
-};
-
-/* Arguments sent to the kernel from the boot prompt. */
-struct linux_arguments_v0 {
- char *argv[8];
- char args[100];
- char boot_dev[2];
- int boot_dev_ctrl;
- int boot_dev_unit;
- int dev_partition;
- char *kernel_file_name;
- void *aieee1; /* XXX */
-};
-
-/* V2 and up boot things. */
-struct linux_bootargs_v2 {
- char **bootpath;
- char **bootargs;
- int *fd_stdin;
- int *fd_stdout;
-};
-
-/* The top level PROM vector. */
-struct linux_romvec {
- /* Version numbers. */
- unsigned int pv_magic_cookie;
- unsigned int pv_romvers;
- unsigned int pv_plugin_revision;
- unsigned int pv_printrev;
-
- /* Version 0 memory descriptors. */
- struct linux_mem_v0 pv_v0mem;
-
- /* Node operations. */
- struct linux_nodeops *pv_nodeops;
-
- char **pv_bootstr;
- struct linux_dev_v0_funcs pv_v0devops;
-
- char *pv_stdin;
- char *pv_stdout;
-#define PROMDEV_KBD 0 /* input from keyboard */
-#define PROMDEV_SCREEN 0 /* output to screen */
-#define PROMDEV_TTYA 1 /* in/out to ttya */
-#define PROMDEV_TTYB 2 /* in/out to ttyb */
-
- /* Blocking getchar/putchar. NOT REENTRANT! (grr) */
- int (*pv_getchar)(void);
- void (*pv_putchar)(int ch);
-
- /* Non-blocking variants. */
- int (*pv_nbgetchar)(void);
- int (*pv_nbputchar)(int ch);
-
- void (*pv_putstr)(char *str, int len);
-
- /* Miscellany. */
- void (*pv_reboot)(char *bootstr);
- void (*pv_printf)(__const__ char *fmt, ...);
- void (*pv_abort)(void);
- __volatile__ int *pv_ticks;
- void (*pv_halt)(void);
- void (**pv_synchook)(void);
-
- /* Evaluate a forth string, not different proto for V0 and V2->up. */
- union {
- void (*v0_eval)(int len, char *str);
- void (*v2_eval)(char *str);
- } pv_fortheval;
-
- struct linux_arguments_v0 **pv_v0bootargs;
-
- /* Get ether address. */
- unsigned int (*pv_enaddr)(int d, char *enaddr);
-
- struct linux_bootargs_v2 pv_v2bootargs;
- struct linux_dev_v2_funcs pv_v2devops;
-
- int filler[15];
-
- /* This one is sun4c/sun4 only. */
- void (*pv_setctxt)(int ctxt, char *va, int pmeg);
-
- /* Prom version 3 Multiprocessor routines. This stuff is crazy.
- * No joke. Calling these when there is only one cpu probably
- * crashes the machine, have to test this. :-)
- */
-
- /* v3_cpustart() will start the cpu 'whichcpu' in mmu-context
- * 'thiscontext' executing at address 'prog_counter'
- */
- int (*v3_cpustart)(unsigned int whichcpu, int ctxtbl_ptr,
- int thiscontext, char *prog_counter);
-
- /* v3_cpustop() will cause cpu 'whichcpu' to stop executing
- * until a resume cpu call is made.
- */
- int (*v3_cpustop)(unsigned int whichcpu);
-
- /* v3_cpuidle() will idle cpu 'whichcpu' until a stop or
- * resume cpu call is made.
- */
- int (*v3_cpuidle)(unsigned int whichcpu);
-
- /* v3_cpuresume() will resume processor 'whichcpu' executing
- * starting with whatever 'pc' and 'npc' were left at the
- * last 'idle' or 'stop' call.
- */
- int (*v3_cpuresume)(unsigned int whichcpu);
-};
-
-/* Routines for traversing the prom device tree. */
-struct linux_nodeops {
- int (*no_nextnode)(int node);
- int (*no_child)(int node);
- int (*no_proplen)(int node, const char *name);
- int (*no_getprop)(int node, const char *name, char *val);
- int (*no_setprop)(int node, const char *name, char *val, int len);
- char * (*no_nextprop)(int node, char *name);
-};
-
-/* More fun PROM structures for device probing. */
-#define PROMREG_MAX 16
-#define PROMVADDR_MAX 16
-#define PROMINTR_MAX 15
-
-struct linux_prom_registers {
- unsigned int which_io; /* is this in OBIO space? */
- unsigned int phys_addr; /* The physical address of this register */
- unsigned int reg_size; /* How many bytes does this register take up? */
-};
-
-struct linux_prom_irqs {
- int pri; /* IRQ priority */
- int vector; /* This is foobar, what does it do? */
-};
-
-/* Element of the "ranges" vector */
-struct linux_prom_ranges {
- unsigned int ot_child_space;
- unsigned int ot_child_base; /* Bus feels this */
- unsigned int ot_parent_space;
- unsigned int ot_parent_base; /* CPU looks from here */
- unsigned int or_size;
-};
-
-/* Ranges and reg properties are a bit different for PCI. */
-struct linux_prom_pci_registers {
- /*
- * We don't know what information this field contain.
- * We guess, PCI device function is in bits 15:8
- * So, ...
- */
- unsigned int which_io; /* Let it be which_io */
-
- unsigned int phys_hi;
- unsigned int phys_lo;
-
- unsigned int size_hi;
- unsigned int size_lo;
-};
-
-struct linux_prom_pci_ranges {
- unsigned int child_phys_hi; /* Only certain bits are encoded here. */
- unsigned int child_phys_mid;
- unsigned int child_phys_lo;
-
- unsigned int parent_phys_hi;
- unsigned int parent_phys_lo;
-
- unsigned int size_hi;
- unsigned int size_lo;
-};
-
-struct linux_prom_pci_assigned_addresses {
- unsigned int which_io;
-
- unsigned int phys_hi;
- unsigned int phys_lo;
-
- unsigned int size_hi;
- unsigned int size_lo;
-};
-
-struct linux_prom_ebus_ranges {
- unsigned int child_phys_hi;
- unsigned int child_phys_lo;
-
- unsigned int parent_phys_hi;
- unsigned int parent_phys_mid;
- unsigned int parent_phys_lo;
-
- unsigned int size;
-};
-
-#endif /* !(__ASSEMBLY__) */
-
-#endif /* !(__SPARC_OPENPROM_H) */
diff --git a/arch/sparc/include/asm/openprom_64.h b/arch/sparc/include/asm/openprom_64.h
deleted file mode 100644
index b69e4a8c9170..000000000000
--- a/arch/sparc/include/asm/openprom_64.h
+++ /dev/null
@@ -1,280 +0,0 @@
-#ifndef __SPARC64_OPENPROM_H
-#define __SPARC64_OPENPROM_H
-
-/* openprom.h: Prom structures and defines for access to the OPENBOOT
- * prom routines and data areas.
- *
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#ifndef __ASSEMBLY__
-/* V0 prom device operations. */
-struct linux_dev_v0_funcs {
- int (*v0_devopen)(char *device_str);
- int (*v0_devclose)(int dev_desc);
- int (*v0_rdblkdev)(int dev_desc, int num_blks, int blk_st, char *buf);
- int (*v0_wrblkdev)(int dev_desc, int num_blks, int blk_st, char *buf);
- int (*v0_wrnetdev)(int dev_desc, int num_bytes, char *buf);
- int (*v0_rdnetdev)(int dev_desc, int num_bytes, char *buf);
- int (*v0_rdchardev)(int dev_desc, int num_bytes, int dummy, char *buf);
- int (*v0_wrchardev)(int dev_desc, int num_bytes, int dummy, char *buf);
- int (*v0_seekdev)(int dev_desc, long logical_offst, int from);
-};
-
-/* V2 and later prom device operations. */
-struct linux_dev_v2_funcs {
- int (*v2_inst2pkg)(int d); /* Convert ihandle to phandle */
- char * (*v2_dumb_mem_alloc)(char *va, unsigned sz);
- void (*v2_dumb_mem_free)(char *va, unsigned sz);
-
- /* To map devices into virtual I/O space. */
- char * (*v2_dumb_mmap)(char *virta, int which_io, unsigned paddr, unsigned sz);
- void (*v2_dumb_munmap)(char *virta, unsigned size);
-
- int (*v2_dev_open)(char *devpath);
- void (*v2_dev_close)(int d);
- int (*v2_dev_read)(int d, char *buf, int nbytes);
- int (*v2_dev_write)(int d, char *buf, int nbytes);
- int (*v2_dev_seek)(int d, int hi, int lo);
-
- /* Never issued (multistage load support) */
- void (*v2_wheee2)(void);
- void (*v2_wheee3)(void);
-};
-
-struct linux_mlist_v0 {
- struct linux_mlist_v0 *theres_more;
- unsigned start_adr;
- unsigned num_bytes;
-};
-
-struct linux_mem_v0 {
- struct linux_mlist_v0 **v0_totphys;
- struct linux_mlist_v0 **v0_prommap;
- struct linux_mlist_v0 **v0_available; /* What we can use */
-};
-
-/* Arguments sent to the kernel from the boot prompt. */
-struct linux_arguments_v0 {
- char *argv[8];
- char args[100];
- char boot_dev[2];
- int boot_dev_ctrl;
- int boot_dev_unit;
- int dev_partition;
- char *kernel_file_name;
- void *aieee1; /* XXX */
-};
-
-/* V2 and up boot things. */
-struct linux_bootargs_v2 {
- char **bootpath;
- char **bootargs;
- int *fd_stdin;
- int *fd_stdout;
-};
-
-/* The top level PROM vector. */
-struct linux_romvec {
- /* Version numbers. */
- unsigned int pv_magic_cookie;
- unsigned int pv_romvers;
- unsigned int pv_plugin_revision;
- unsigned int pv_printrev;
-
- /* Version 0 memory descriptors. */
- struct linux_mem_v0 pv_v0mem;
-
- /* Node operations. */
- struct linux_nodeops *pv_nodeops;
-
- char **pv_bootstr;
- struct linux_dev_v0_funcs pv_v0devops;
-
- char *pv_stdin;
- char *pv_stdout;
-#define PROMDEV_KBD 0 /* input from keyboard */
-#define PROMDEV_SCREEN 0 /* output to screen */
-#define PROMDEV_TTYA 1 /* in/out to ttya */
-#define PROMDEV_TTYB 2 /* in/out to ttyb */
-
- /* Blocking getchar/putchar. NOT REENTRANT! (grr) */
- int (*pv_getchar)(void);
- void (*pv_putchar)(int ch);
-
- /* Non-blocking variants. */
- int (*pv_nbgetchar)(void);
- int (*pv_nbputchar)(int ch);
-
- void (*pv_putstr)(char *str, int len);
-
- /* Miscellany. */
- void (*pv_reboot)(char *bootstr);
- void (*pv_printf)(__const__ char *fmt, ...);
- void (*pv_abort)(void);
- __volatile__ int *pv_ticks;
- void (*pv_halt)(void);
- void (**pv_synchook)(void);
-
- /* Evaluate a forth string, not different proto for V0 and V2->up. */
- union {
- void (*v0_eval)(int len, char *str);
- void (*v2_eval)(char *str);
- } pv_fortheval;
-
- struct linux_arguments_v0 **pv_v0bootargs;
-
- /* Get ether address. */
- unsigned int (*pv_enaddr)(int d, char *enaddr);
-
- struct linux_bootargs_v2 pv_v2bootargs;
- struct linux_dev_v2_funcs pv_v2devops;
-
- int filler[15];
-
- /* This one is sun4c/sun4 only. */
- void (*pv_setctxt)(int ctxt, char *va, int pmeg);
-
- /* Prom version 3 Multiprocessor routines. This stuff is crazy.
- * No joke. Calling these when there is only one cpu probably
- * crashes the machine, have to test this. :-)
- */
-
- /* v3_cpustart() will start the cpu 'whichcpu' in mmu-context
- * 'thiscontext' executing at address 'prog_counter'
- */
- int (*v3_cpustart)(unsigned int whichcpu, int ctxtbl_ptr,
- int thiscontext, char *prog_counter);
-
- /* v3_cpustop() will cause cpu 'whichcpu' to stop executing
- * until a resume cpu call is made.
- */
- int (*v3_cpustop)(unsigned int whichcpu);
-
- /* v3_cpuidle() will idle cpu 'whichcpu' until a stop or
- * resume cpu call is made.
- */
- int (*v3_cpuidle)(unsigned int whichcpu);
-
- /* v3_cpuresume() will resume processor 'whichcpu' executing
- * starting with whatever 'pc' and 'npc' were left at the
- * last 'idle' or 'stop' call.
- */
- int (*v3_cpuresume)(unsigned int whichcpu);
-};
-
-/* Routines for traversing the prom device tree. */
-struct linux_nodeops {
- int (*no_nextnode)(int node);
- int (*no_child)(int node);
- int (*no_proplen)(int node, char *name);
- int (*no_getprop)(int node, char *name, char *val);
- int (*no_setprop)(int node, char *name, char *val, int len);
- char * (*no_nextprop)(int node, char *name);
-};
-
-/* More fun PROM structures for device probing. */
-#define PROMREG_MAX 24
-#define PROMVADDR_MAX 16
-#define PROMINTR_MAX 32
-
-struct linux_prom_registers {
- unsigned which_io; /* hi part of physical address */
- unsigned phys_addr; /* The physical address of this register */
- int reg_size; /* How many bytes does this register take up? */
-};
-
-struct linux_prom64_registers {
- unsigned long phys_addr;
- unsigned long reg_size;
-};
-
-struct linux_prom_irqs {
- int pri; /* IRQ priority */
- int vector; /* This is foobar, what does it do? */
-};
-
-/* Element of the "ranges" vector */
-struct linux_prom_ranges {
- unsigned int ot_child_space;
- unsigned int ot_child_base; /* Bus feels this */
- unsigned int ot_parent_space;
- unsigned int ot_parent_base; /* CPU looks from here */
- unsigned int or_size;
-};
-
-struct linux_prom64_ranges {
- unsigned long ot_child_base; /* Bus feels this */
- unsigned long ot_parent_base; /* CPU looks from here */
- unsigned long or_size;
-};
-
-/* Ranges and reg properties are a bit different for PCI. */
-struct linux_prom_pci_registers {
- unsigned int phys_hi;
- unsigned int phys_mid;
- unsigned int phys_lo;
-
- unsigned int size_hi;
- unsigned int size_lo;
-};
-
-struct linux_prom_pci_ranges {
- unsigned int child_phys_hi; /* Only certain bits are encoded here. */
- unsigned int child_phys_mid;
- unsigned int child_phys_lo;
-
- unsigned int parent_phys_hi;
- unsigned int parent_phys_lo;
-
- unsigned int size_hi;
- unsigned int size_lo;
-};
-
-struct linux_prom_pci_intmap {
- unsigned int phys_hi;
- unsigned int phys_mid;
- unsigned int phys_lo;
-
- unsigned int interrupt;
-
- int cnode;
- unsigned int cinterrupt;
-};
-
-struct linux_prom_pci_intmask {
- unsigned int phys_hi;
- unsigned int phys_mid;
- unsigned int phys_lo;
- unsigned int interrupt;
-};
-
-struct linux_prom_ebus_ranges {
- unsigned int child_phys_hi;
- unsigned int child_phys_lo;
-
- unsigned int parent_phys_hi;
- unsigned int parent_phys_mid;
- unsigned int parent_phys_lo;
-
- unsigned int size;
-};
-
-struct linux_prom_ebus_intmap {
- unsigned int phys_hi;
- unsigned int phys_lo;
-
- unsigned int interrupt;
-
- int cnode;
- unsigned int cinterrupt;
-};
-
-struct linux_prom_ebus_intmask {
- unsigned int phys_hi;
- unsigned int phys_lo;
- unsigned int interrupt;
-};
-#endif /* !(__ASSEMBLY__) */
-
-#endif /* !(__SPARC64_OPENPROM_H) */
diff --git a/arch/sparc/include/asm/posix_types.h b/arch/sparc/include/asm/posix_types.h
index 03a0e091a884..98d6ebb922fb 100644
--- a/arch/sparc/include/asm/posix_types.h
+++ b/arch/sparc/include/asm/posix_types.h
@@ -1,8 +1,155 @@
-#ifndef ___ASM_SPARC_POSIX_TYPES_H
-#define ___ASM_SPARC_POSIX_TYPES_H
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc. Also, we cannot
+ * assume GCC is being used.
+ */
+
+#ifndef __SPARC_POSIX_TYPES_H
+#define __SPARC_POSIX_TYPES_H
+
#if defined(__sparc__) && defined(__arch64__)
-#include <asm/posix_types_64.h>
+/* sparc 64 bit */
+typedef unsigned long __kernel_size_t;
+typedef long __kernel_ssize_t;
+typedef long __kernel_ptrdiff_t;
+typedef long __kernel_time_t;
+typedef long __kernel_clock_t;
+typedef int __kernel_pid_t;
+typedef int __kernel_ipc_pid_t;
+typedef unsigned int __kernel_uid_t;
+typedef unsigned int __kernel_gid_t;
+typedef unsigned long __kernel_ino_t;
+typedef unsigned int __kernel_mode_t;
+typedef unsigned short __kernel_umode_t;
+typedef unsigned int __kernel_nlink_t;
+typedef int __kernel_daddr_t;
+typedef long __kernel_off_t;
+typedef char * __kernel_caddr_t;
+typedef unsigned short __kernel_uid16_t;
+typedef unsigned short __kernel_gid16_t;
+typedef int __kernel_clockid_t;
+typedef int __kernel_timer_t;
+
+typedef unsigned short __kernel_old_uid_t;
+typedef unsigned short __kernel_old_gid_t;
+typedef __kernel_uid_t __kernel_uid32_t;
+typedef __kernel_gid_t __kernel_gid32_t;
+
+typedef unsigned int __kernel_old_dev_t;
+
+/* Note this piece of asymmetry from the v9 ABI. */
+typedef int __kernel_suseconds_t;
+
#else
-#include <asm/posix_types_32.h>
-#endif
+/* sparc 32 bit */
+
+typedef unsigned int __kernel_size_t;
+typedef int __kernel_ssize_t;
+typedef long int __kernel_ptrdiff_t;
+typedef long __kernel_time_t;
+typedef long __kernel_suseconds_t;
+typedef long __kernel_clock_t;
+typedef int __kernel_pid_t;
+typedef unsigned short __kernel_ipc_pid_t;
+typedef unsigned short __kernel_uid_t;
+typedef unsigned short __kernel_gid_t;
+typedef unsigned long __kernel_ino_t;
+typedef unsigned short __kernel_mode_t;
+typedef unsigned short __kernel_umode_t;
+typedef short __kernel_nlink_t;
+typedef long __kernel_daddr_t;
+typedef long __kernel_off_t;
+typedef char * __kernel_caddr_t;
+typedef unsigned short __kernel_uid16_t;
+typedef unsigned short __kernel_gid16_t;
+typedef unsigned int __kernel_uid32_t;
+typedef unsigned int __kernel_gid32_t;
+typedef unsigned short __kernel_old_uid_t;
+typedef unsigned short __kernel_old_gid_t;
+typedef unsigned short __kernel_old_dev_t;
+typedef int __kernel_clockid_t;
+typedef int __kernel_timer_t;
+
+#endif /* defined(__sparc__) && defined(__arch64__) */
+
+#ifdef __GNUC__
+typedef long long __kernel_loff_t;
#endif
+
+typedef struct {
+ int val[2];
+} __kernel_fsid_t;
+
+#ifdef __KERNEL__
+
+#undef __FD_SET
+static inline void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp)
+{
+ unsigned long _tmp = fd / __NFDBITS;
+ unsigned long _rem = fd % __NFDBITS;
+ fdsetp->fds_bits[_tmp] |= (1UL<<_rem);
+}
+
+#undef __FD_CLR
+static inline void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp)
+{
+ unsigned long _tmp = fd / __NFDBITS;
+ unsigned long _rem = fd % __NFDBITS;
+ fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem);
+}
+
+#undef __FD_ISSET
+static inline int __FD_ISSET(unsigned long fd, __const__ __kernel_fd_set *p)
+{
+ unsigned long _tmp = fd / __NFDBITS;
+ unsigned long _rem = fd % __NFDBITS;
+ return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0;
+}
+
+/*
+ * This will unroll the loop for the normal constant cases (8 or 32 longs,
+ * for 256 and 1024-bit fd_sets respectively)
+ */
+#undef __FD_ZERO
+static inline void __FD_ZERO(__kernel_fd_set *p)
+{
+ unsigned long *tmp = p->fds_bits;
+ int i;
+
+ if (__builtin_constant_p(__FDSET_LONGS)) {
+ switch (__FDSET_LONGS) {
+ case 32:
+ tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+ tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+ tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
+ tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
+ tmp[16] = 0; tmp[17] = 0; tmp[18] = 0; tmp[19] = 0;
+ tmp[20] = 0; tmp[21] = 0; tmp[22] = 0; tmp[23] = 0;
+ tmp[24] = 0; tmp[25] = 0; tmp[26] = 0; tmp[27] = 0;
+ tmp[28] = 0; tmp[29] = 0; tmp[30] = 0; tmp[31] = 0;
+ return;
+ case 16:
+ tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+ tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+ tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
+ tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
+ return;
+ case 8:
+ tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+ tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+ return;
+ case 4:
+ tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+ return;
+ }
+ }
+ i = __FDSET_LONGS;
+ while (i) {
+ i--;
+ *tmp = 0;
+ tmp++;
+ }
+}
+
+#endif /* __KERNEL__ */
+#endif /* __SPARC_POSIX_TYPES_H */
diff --git a/arch/sparc/include/asm/posix_types_32.h b/arch/sparc/include/asm/posix_types_32.h
deleted file mode 100644
index 6bb6eb1ca0f2..000000000000
--- a/arch/sparc/include/asm/posix_types_32.h
+++ /dev/null
@@ -1,118 +0,0 @@
-#ifndef __ARCH_SPARC_POSIX_TYPES_H
-#define __ARCH_SPARC_POSIX_TYPES_H
-
-/*
- * This file is generally used by user-level software, so you need to
- * be a little careful about namespace pollution etc. Also, we cannot
- * assume GCC is being used.
- */
-
-typedef unsigned int __kernel_size_t;
-typedef int __kernel_ssize_t;
-typedef long int __kernel_ptrdiff_t;
-typedef long __kernel_time_t;
-typedef long __kernel_suseconds_t;
-typedef long __kernel_clock_t;
-typedef int __kernel_pid_t;
-typedef unsigned short __kernel_ipc_pid_t;
-typedef unsigned short __kernel_uid_t;
-typedef unsigned short __kernel_gid_t;
-typedef unsigned long __kernel_ino_t;
-typedef unsigned short __kernel_mode_t;
-typedef unsigned short __kernel_umode_t;
-typedef short __kernel_nlink_t;
-typedef long __kernel_daddr_t;
-typedef long __kernel_off_t;
-typedef char * __kernel_caddr_t;
-typedef unsigned short __kernel_uid16_t;
-typedef unsigned short __kernel_gid16_t;
-typedef unsigned int __kernel_uid32_t;
-typedef unsigned int __kernel_gid32_t;
-typedef unsigned short __kernel_old_uid_t;
-typedef unsigned short __kernel_old_gid_t;
-typedef unsigned short __kernel_old_dev_t;
-typedef int __kernel_clockid_t;
-typedef int __kernel_timer_t;
-
-#ifdef __GNUC__
-typedef long long __kernel_loff_t;
-#endif
-
-typedef struct {
- int val[2];
-} __kernel_fsid_t;
-
-#if defined(__KERNEL__)
-
-#undef __FD_SET
-static inline void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp)
-{
- unsigned long _tmp = fd / __NFDBITS;
- unsigned long _rem = fd % __NFDBITS;
- fdsetp->fds_bits[_tmp] |= (1UL<<_rem);
-}
-
-#undef __FD_CLR
-static inline void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp)
-{
- unsigned long _tmp = fd / __NFDBITS;
- unsigned long _rem = fd % __NFDBITS;
- fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem);
-}
-
-#undef __FD_ISSET
-static inline int __FD_ISSET(unsigned long fd, __const__ __kernel_fd_set *p)
-{
- unsigned long _tmp = fd / __NFDBITS;
- unsigned long _rem = fd % __NFDBITS;
- return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0;
-}
-
-/*
- * This will unroll the loop for the normal constant cases (8 or 32 longs,
- * for 256 and 1024-bit fd_sets respectively)
- */
-#undef __FD_ZERO
-static inline void __FD_ZERO(__kernel_fd_set *p)
-{
- unsigned long *tmp = p->fds_bits;
- int i;
-
- if (__builtin_constant_p(__FDSET_LONGS)) {
- switch (__FDSET_LONGS) {
- case 32:
- tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
- tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
- tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
- tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
- tmp[16] = 0; tmp[17] = 0; tmp[18] = 0; tmp[19] = 0;
- tmp[20] = 0; tmp[21] = 0; tmp[22] = 0; tmp[23] = 0;
- tmp[24] = 0; tmp[25] = 0; tmp[26] = 0; tmp[27] = 0;
- tmp[28] = 0; tmp[29] = 0; tmp[30] = 0; tmp[31] = 0;
- return;
- case 16:
- tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
- tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
- tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
- tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
- return;
- case 8:
- tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
- tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
- return;
- case 4:
- tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
- return;
- }
- }
- i = __FDSET_LONGS;
- while (i) {
- i--;
- *tmp = 0;
- tmp++;
- }
-}
-
-#endif /* defined(__KERNEL__) */
-
-#endif /* !(__ARCH_SPARC_POSIX_TYPES_H) */
diff --git a/arch/sparc/include/asm/posix_types_64.h b/arch/sparc/include/asm/posix_types_64.h
deleted file mode 100644
index ba8f93295763..000000000000
--- a/arch/sparc/include/asm/posix_types_64.h
+++ /dev/null
@@ -1,122 +0,0 @@
-#ifndef __ARCH_SPARC64_POSIX_TYPES_H
-#define __ARCH_SPARC64_POSIX_TYPES_H
-
-/*
- * This file is generally used by user-level software, so you need to
- * be a little careful about namespace pollution etc. Also, we cannot
- * assume GCC is being used.
- */
-
-typedef unsigned long __kernel_size_t;
-typedef long __kernel_ssize_t;
-typedef long __kernel_ptrdiff_t;
-typedef long __kernel_time_t;
-typedef long __kernel_clock_t;
-typedef int __kernel_pid_t;
-typedef int __kernel_ipc_pid_t;
-typedef unsigned int __kernel_uid_t;
-typedef unsigned int __kernel_gid_t;
-typedef unsigned long __kernel_ino_t;
-typedef unsigned int __kernel_mode_t;
-typedef unsigned short __kernel_umode_t;
-typedef unsigned int __kernel_nlink_t;
-typedef int __kernel_daddr_t;
-typedef long __kernel_off_t;
-typedef char * __kernel_caddr_t;
-typedef unsigned short __kernel_uid16_t;
-typedef unsigned short __kernel_gid16_t;
-typedef int __kernel_clockid_t;
-typedef int __kernel_timer_t;
-
-typedef unsigned short __kernel_old_uid_t;
-typedef unsigned short __kernel_old_gid_t;
-typedef __kernel_uid_t __kernel_uid32_t;
-typedef __kernel_gid_t __kernel_gid32_t;
-
-typedef unsigned int __kernel_old_dev_t;
-
-/* Note this piece of asymmetry from the v9 ABI. */
-typedef int __kernel_suseconds_t;
-
-#ifdef __GNUC__
-typedef long long __kernel_loff_t;
-#endif
-
-typedef struct {
- int val[2];
-} __kernel_fsid_t;
-
-#if defined(__KERNEL__)
-
-#undef __FD_SET
-static inline void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp)
-{
- unsigned long _tmp = fd / __NFDBITS;
- unsigned long _rem = fd % __NFDBITS;
- fdsetp->fds_bits[_tmp] |= (1UL<<_rem);
-}
-
-#undef __FD_CLR
-static inline void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp)
-{
- unsigned long _tmp = fd / __NFDBITS;
- unsigned long _rem = fd % __NFDBITS;
- fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem);
-}
-
-#undef __FD_ISSET
-static inline int __FD_ISSET(unsigned long fd, __const__ __kernel_fd_set *p)
-{
- unsigned long _tmp = fd / __NFDBITS;
- unsigned long _rem = fd % __NFDBITS;
- return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0;
-}
-
-/*
- * This will unroll the loop for the normal constant cases (8 or 32 longs,
- * for 256 and 1024-bit fd_sets respectively)
- */
-#undef __FD_ZERO
-static inline void __FD_ZERO(__kernel_fd_set *p)
-{
- unsigned long *tmp = p->fds_bits;
- int i;
-
- if (__builtin_constant_p(__FDSET_LONGS)) {
- switch (__FDSET_LONGS) {
- case 32:
- tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
- tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
- tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
- tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
- tmp[16] = 0; tmp[17] = 0; tmp[18] = 0; tmp[19] = 0;
- tmp[20] = 0; tmp[21] = 0; tmp[22] = 0; tmp[23] = 0;
- tmp[24] = 0; tmp[25] = 0; tmp[26] = 0; tmp[27] = 0;
- tmp[28] = 0; tmp[29] = 0; tmp[30] = 0; tmp[31] = 0;
- return;
- case 16:
- tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
- tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
- tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
- tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
- return;
- case 8:
- tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
- tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
- return;
- case 4:
- tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
- return;
- }
- }
- i = __FDSET_LONGS;
- while (i) {
- i--;
- *tmp = 0;
- tmp++;
- }
-}
-
-#endif /* defined(__KERNEL__) */
-
-#endif /* !(__ARCH_SPARC64_POSIX_TYPES_H) */
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index 2ae67a2e7f3a..09521c6a5edb 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -99,7 +99,7 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc,
"st\t%%g0, [%0 + %3 + 0x3c]"
: /* no outputs */
: "r" (regs),
- "r" (sp - sizeof(struct reg_window)),
+ "r" (sp - sizeof(struct reg_window32)),
"r" (zero),
"i" ((const unsigned long)(&((struct pt_regs *)0)->u_regs[0]))
: "memory");
diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h
index 6dcbe2eed2e2..30b0b797dc0c 100644
--- a/arch/sparc/include/asm/ptrace.h
+++ b/arch/sparc/include/asm/ptrace.h
@@ -1,8 +1,448 @@
-#ifndef ___ASM_SPARC_PTRACE_H
-#define ___ASM_SPARC_PTRACE_H
+#ifndef __SPARC_PTRACE_H
+#define __SPARC_PTRACE_H
+
#if defined(__sparc__) && defined(__arch64__)
-#include <asm/ptrace_64.h>
+/* 64 bit sparc */
+#include <asm/pstate.h>
+
+/* This struct defines the way the registers are stored on the
+ * stack during a system call and basically all traps.
+ */
+
+/* This magic value must have the low 9 bits clear,
+ * as that is where we encode the %tt value, see below.
+ */
+#define PT_REGS_MAGIC 0x57ac6c00
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+struct pt_regs {
+ unsigned long u_regs[16]; /* globals and ins */
+ unsigned long tstate;
+ unsigned long tpc;
+ unsigned long tnpc;
+ unsigned int y;
+
+ /* We encode a magic number, PT_REGS_MAGIC, along
+ * with the %tt (trap type) register value at trap
+ * entry time. The magic number allows us to identify
+ * accurately a trap stack frame in the stack
+ * unwinder, and the %tt value allows us to test
+ * things like "in a system call" etc. for an arbitray
+ * process.
+ *
+ * The PT_REGS_MAGIC is choosen such that it can be
+ * loaded completely using just a sethi instruction.
+ */
+ unsigned int magic;
+};
+
+struct pt_regs32 {
+ unsigned int psr;
+ unsigned int pc;
+ unsigned int npc;
+ unsigned int y;
+ unsigned int u_regs[16]; /* globals and ins */
+};
+
+/* A V9 register window */
+struct reg_window {
+ unsigned long locals[8];
+ unsigned long ins[8];
+};
+
+/* A 32-bit register window. */
+struct reg_window32 {
+ unsigned int locals[8];
+ unsigned int ins[8];
+};
+
+/* A V9 Sparc stack frame */
+struct sparc_stackf {
+ unsigned long locals[8];
+ unsigned long ins[6];
+ struct sparc_stackf *fp;
+ unsigned long callers_pc;
+ char *structptr;
+ unsigned long xargs[6];
+ unsigned long xxargs[1];
+};
+
+/* A 32-bit Sparc stack frame */
+struct sparc_stackf32 {
+ unsigned int locals[8];
+ unsigned int ins[6];
+ unsigned int fp;
+ unsigned int callers_pc;
+ unsigned int structptr;
+ unsigned int xargs[6];
+ unsigned int xxargs[1];
+};
+
+struct sparc_trapf {
+ unsigned long locals[8];
+ unsigned long ins[8];
+ unsigned long _unused;
+ struct pt_regs *regs;
+};
+#endif /* (!__ASSEMBLY__) */
#else
-#include <asm/ptrace_32.h>
+/* 32 bit sparc */
+
+#include <asm/psr.h>
+
+/* This struct defines the way the registers are stored on the
+ * stack during a system call and basically all traps.
+ */
+#ifndef __ASSEMBLY__
+
+struct pt_regs {
+ unsigned long psr;
+ unsigned long pc;
+ unsigned long npc;
+ unsigned long y;
+ unsigned long u_regs[16]; /* globals and ins */
+};
+
+/* A 32-bit register window. */
+struct reg_window32 {
+ unsigned long locals[8];
+ unsigned long ins[8];
+};
+
+/* A Sparc stack frame */
+struct sparc_stackf {
+ unsigned long locals[8];
+ unsigned long ins[6];
+ struct sparc_stackf *fp;
+ unsigned long callers_pc;
+ char *structptr;
+ unsigned long xargs[6];
+ unsigned long xxargs[1];
+};
+#endif /* (!__ASSEMBLY__) */
+
+#endif /* (defined(__sparc__) && defined(__arch64__))*/
+
+#ifndef __ASSEMBLY__
+
+#define TRACEREG_SZ sizeof(struct pt_regs)
+#define STACKFRAME_SZ sizeof(struct sparc_stackf)
+
+#define TRACEREG32_SZ sizeof(struct pt_regs32)
+#define STACKFRAME32_SZ sizeof(struct sparc_stackf32)
+
+#endif /* (!__ASSEMBLY__) */
+
+#define UREG_G0 0
+#define UREG_G1 1
+#define UREG_G2 2
+#define UREG_G3 3
+#define UREG_G4 4
+#define UREG_G5 5
+#define UREG_G6 6
+#define UREG_G7 7
+#define UREG_I0 8
+#define UREG_I1 9
+#define UREG_I2 10
+#define UREG_I3 11
+#define UREG_I4 12
+#define UREG_I5 13
+#define UREG_I6 14
+#define UREG_I7 15
+#define UREG_FP UREG_I6
+#define UREG_RETPC UREG_I7
+
+#if defined(__sparc__) && defined(__arch64__)
+/* 64 bit sparc */
+
+#ifndef __ASSEMBLY__
+
+#ifdef __KERNEL__
+
+#include <linux/threads.h>
+#include <asm/system.h>
+
+static inline int pt_regs_trap_type(struct pt_regs *regs)
+{
+ return regs->magic & 0x1ff;
+}
+
+static inline bool pt_regs_is_syscall(struct pt_regs *regs)
+{
+ return (regs->tstate & TSTATE_SYSCALL);
+}
+
+static inline bool pt_regs_clear_syscall(struct pt_regs *regs)
+{
+ return (regs->tstate &= ~TSTATE_SYSCALL);
+}
+
+#define arch_ptrace_stop_needed(exit_code, info) \
+({ flush_user_windows(); \
+ get_thread_wsaved() != 0; \
+})
+
+#define arch_ptrace_stop(exit_code, info) \
+ synchronize_user_stack()
+
+struct global_reg_snapshot {
+ unsigned long tstate;
+ unsigned long tpc;
+ unsigned long tnpc;
+ unsigned long o7;
+ unsigned long i7;
+ unsigned long rpc;
+ struct thread_info *thread;
+ unsigned long pad1;
+};
+extern struct global_reg_snapshot global_reg_snapshot[NR_CPUS];
+
+#define force_successful_syscall_return() \
+do { current_thread_info()->syscall_noerror = 1; \
+} while (0)
+#define user_mode(regs) (!((regs)->tstate & TSTATE_PRIV))
+#define instruction_pointer(regs) ((regs)->tpc)
+#define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
+#define regs_return_value(regs) ((regs)->u_regs[UREG_I0])
+#ifdef CONFIG_SMP
+extern unsigned long profile_pc(struct pt_regs *);
+#else
+#define profile_pc(regs) instruction_pointer(regs)
#endif
+extern void show_regs(struct pt_regs *);
+#endif /* (__KERNEL__) */
+
+#else /* __ASSEMBLY__ */
+/* For assembly code. */
+#define TRACEREG_SZ 0xa0
+#define STACKFRAME_SZ 0xc0
+
+#define TRACEREG32_SZ 0x50
+#define STACKFRAME32_SZ 0x60
+#endif /* __ASSEMBLY__ */
+
+#else /* (defined(__sparc__) && defined(__arch64__)) */
+
+/* 32 bit sparc */
+
+#ifndef __ASSEMBLY__
+
+#ifdef __KERNEL__
+
+#include <asm/system.h>
+
+static inline bool pt_regs_is_syscall(struct pt_regs *regs)
+{
+ return (regs->psr & PSR_SYSCALL);
+}
+
+static inline bool pt_regs_clear_syscall(struct pt_regs *regs)
+{
+ return (regs->psr &= ~PSR_SYSCALL);
+}
+
+#define arch_ptrace_stop_needed(exit_code, info) \
+({ flush_user_windows(); \
+ current_thread_info()->w_saved != 0; \
+})
+
+#define arch_ptrace_stop(exit_code, info) \
+ synchronize_user_stack()
+
+#define user_mode(regs) (!((regs)->psr & PSR_PS))
+#define instruction_pointer(regs) ((regs)->pc)
+#define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
+unsigned long profile_pc(struct pt_regs *);
+extern void show_regs(struct pt_regs *);
+#endif /* (__KERNEL__) */
+
+#else /* (!__ASSEMBLY__) */
+/* For assembly code. */
+#define TRACEREG_SZ 0x50
+#define STACKFRAME_SZ 0x60
+#endif /* (!__ASSEMBLY__) */
+
+#endif /* (defined(__sparc__) && defined(__arch64__)) */
+
+#ifdef __KERNEL__
+#define STACK_BIAS 2047
#endif
+
+/* These are for pt_regs. */
+#define PT_V9_G0 0x00
+#define PT_V9_G1 0x08
+#define PT_V9_G2 0x10
+#define PT_V9_G3 0x18
+#define PT_V9_G4 0x20
+#define PT_V9_G5 0x28
+#define PT_V9_G6 0x30
+#define PT_V9_G7 0x38
+#define PT_V9_I0 0x40
+#define PT_V9_I1 0x48
+#define PT_V9_I2 0x50
+#define PT_V9_I3 0x58
+#define PT_V9_I4 0x60
+#define PT_V9_I5 0x68
+#define PT_V9_I6 0x70
+#define PT_V9_FP PT_V9_I6
+#define PT_V9_I7 0x78
+#define PT_V9_TSTATE 0x80
+#define PT_V9_TPC 0x88
+#define PT_V9_TNPC 0x90
+#define PT_V9_Y 0x98
+#define PT_V9_MAGIC 0x9c
+#define PT_TSTATE PT_V9_TSTATE
+#define PT_TPC PT_V9_TPC
+#define PT_TNPC PT_V9_TNPC
+
+/* These for pt_regs32. */
+#define PT_PSR 0x0
+#define PT_PC 0x4
+#define PT_NPC 0x8
+#define PT_Y 0xc
+#define PT_G0 0x10
+#define PT_WIM PT_G0
+#define PT_G1 0x14
+#define PT_G2 0x18
+#define PT_G3 0x1c
+#define PT_G4 0x20
+#define PT_G5 0x24
+#define PT_G6 0x28
+#define PT_G7 0x2c
+#define PT_I0 0x30
+#define PT_I1 0x34
+#define PT_I2 0x38
+#define PT_I3 0x3c
+#define PT_I4 0x40
+#define PT_I5 0x44
+#define PT_I6 0x48
+#define PT_FP PT_I6
+#define PT_I7 0x4c
+
+/* Reg_window offsets */
+#define RW_V9_L0 0x00
+#define RW_V9_L1 0x08
+#define RW_V9_L2 0x10
+#define RW_V9_L3 0x18
+#define RW_V9_L4 0x20
+#define RW_V9_L5 0x28
+#define RW_V9_L6 0x30
+#define RW_V9_L7 0x38
+#define RW_V9_I0 0x40
+#define RW_V9_I1 0x48
+#define RW_V9_I2 0x50
+#define RW_V9_I3 0x58
+#define RW_V9_I4 0x60
+#define RW_V9_I5 0x68
+#define RW_V9_I6 0x70
+#define RW_V9_I7 0x78
+
+#define RW_L0 0x00
+#define RW_L1 0x04
+#define RW_L2 0x08
+#define RW_L3 0x0c
+#define RW_L4 0x10
+#define RW_L5 0x14
+#define RW_L6 0x18
+#define RW_L7 0x1c
+#define RW_I0 0x20
+#define RW_I1 0x24
+#define RW_I2 0x28
+#define RW_I3 0x2c
+#define RW_I4 0x30
+#define RW_I5 0x34
+#define RW_I6 0x38
+#define RW_I7 0x3c
+
+/* Stack_frame offsets */
+#define SF_V9_L0 0x00
+#define SF_V9_L1 0x08
+#define SF_V9_L2 0x10
+#define SF_V9_L3 0x18
+#define SF_V9_L4 0x20
+#define SF_V9_L5 0x28
+#define SF_V9_L6 0x30
+#define SF_V9_L7 0x38
+#define SF_V9_I0 0x40
+#define SF_V9_I1 0x48
+#define SF_V9_I2 0x50
+#define SF_V9_I3 0x58
+#define SF_V9_I4 0x60
+#define SF_V9_I5 0x68
+#define SF_V9_FP 0x70
+#define SF_V9_PC 0x78
+#define SF_V9_RETP 0x80
+#define SF_V9_XARG0 0x88
+#define SF_V9_XARG1 0x90
+#define SF_V9_XARG2 0x98
+#define SF_V9_XARG3 0xa0
+#define SF_V9_XARG4 0xa8
+#define SF_V9_XARG5 0xb0
+#define SF_V9_XXARG 0xb8
+
+#define SF_L0 0x00
+#define SF_L1 0x04
+#define SF_L2 0x08
+#define SF_L3 0x0c
+#define SF_L4 0x10
+#define SF_L5 0x14
+#define SF_L6 0x18
+#define SF_L7 0x1c
+#define SF_I0 0x20
+#define SF_I1 0x24
+#define SF_I2 0x28
+#define SF_I3 0x2c
+#define SF_I4 0x30
+#define SF_I5 0x34
+#define SF_FP 0x38
+#define SF_PC 0x3c
+#define SF_RETP 0x40
+#define SF_XARG0 0x44
+#define SF_XARG1 0x48
+#define SF_XARG2 0x4c
+#define SF_XARG3 0x50
+#define SF_XARG4 0x54
+#define SF_XARG5 0x58
+#define SF_XXARG 0x5c
+
+#ifdef __KERNEL__
+
+/* global_reg_snapshot offsets */
+#define GR_SNAP_TSTATE 0x00
+#define GR_SNAP_TPC 0x08
+#define GR_SNAP_TNPC 0x10
+#define GR_SNAP_O7 0x18
+#define GR_SNAP_I7 0x20
+#define GR_SNAP_RPC 0x28
+#define GR_SNAP_THREAD 0x30
+#define GR_SNAP_PAD1 0x38
+
+#endif /* __KERNEL__ */
+
+/* Stuff for the ptrace system call */
+#define PTRACE_SPARC_DETACH 11
+#define PTRACE_GETREGS 12
+#define PTRACE_SETREGS 13
+#define PTRACE_GETFPREGS 14
+#define PTRACE_SETFPREGS 15
+#define PTRACE_READDATA 16
+#define PTRACE_WRITEDATA 17
+#define PTRACE_READTEXT 18
+#define PTRACE_WRITETEXT 19
+#define PTRACE_GETFPAREGS 20
+#define PTRACE_SETFPAREGS 21
+
+/* There are for debugging 64-bit processes, either from a 32 or 64 bit
+ * parent. Thus their complements are for debugging 32-bit processes only.
+ */
+
+#define PTRACE_GETREGS64 22
+#define PTRACE_SETREGS64 23
+/* PTRACE_SYSCALL is 24 */
+#define PTRACE_GETFPREGS64 25
+#define PTRACE_SETFPREGS64 26
+
+#endif /* !(__SPARC_PTRACE_H) */
diff --git a/arch/sparc/include/asm/ptrace_32.h b/arch/sparc/include/asm/ptrace_32.h
deleted file mode 100644
index 4cef450167dd..000000000000
--- a/arch/sparc/include/asm/ptrace_32.h
+++ /dev/null
@@ -1,186 +0,0 @@
-#ifndef _SPARC_PTRACE_H
-#define _SPARC_PTRACE_H
-
-#include <asm/psr.h>
-
-/* This struct defines the way the registers are stored on the
- * stack during a system call and basically all traps.
- */
-
-#ifndef __ASSEMBLY__
-
-#include <linux/types.h>
-
-struct pt_regs {
- unsigned long psr;
- unsigned long pc;
- unsigned long npc;
- unsigned long y;
- unsigned long u_regs[16]; /* globals and ins */
-};
-
-#define UREG_G0 0
-#define UREG_G1 1
-#define UREG_G2 2
-#define UREG_G3 3
-#define UREG_G4 4
-#define UREG_G5 5
-#define UREG_G6 6
-#define UREG_G7 7
-#define UREG_I0 8
-#define UREG_I1 9
-#define UREG_I2 10
-#define UREG_I3 11
-#define UREG_I4 12
-#define UREG_I5 13
-#define UREG_I6 14
-#define UREG_I7 15
-#define UREG_WIM UREG_G0
-#define UREG_FADDR UREG_G0
-#define UREG_FP UREG_I6
-#define UREG_RETPC UREG_I7
-
-/* A register window */
-struct reg_window {
- unsigned long locals[8];
- unsigned long ins[8];
-};
-
-/* A Sparc stack frame */
-struct sparc_stackf {
- unsigned long locals[8];
- unsigned long ins[6];
- struct sparc_stackf *fp;
- unsigned long callers_pc;
- char *structptr;
- unsigned long xargs[6];
- unsigned long xxargs[1];
-};
-
-#define TRACEREG_SZ sizeof(struct pt_regs)
-#define STACKFRAME_SZ sizeof(struct sparc_stackf)
-
-#ifdef __KERNEL__
-
-#include <asm/system.h>
-
-static inline bool pt_regs_is_syscall(struct pt_regs *regs)
-{
- return (regs->psr & PSR_SYSCALL);
-}
-
-static inline bool pt_regs_clear_syscall(struct pt_regs *regs)
-{
- return (regs->psr &= ~PSR_SYSCALL);
-}
-
-#define arch_ptrace_stop_needed(exit_code, info) \
-({ flush_user_windows(); \
- current_thread_info()->w_saved != 0; \
-})
-
-#define arch_ptrace_stop(exit_code, info) \
- synchronize_user_stack()
-
-#define user_mode(regs) (!((regs)->psr & PSR_PS))
-#define instruction_pointer(regs) ((regs)->pc)
-#define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
-unsigned long profile_pc(struct pt_regs *);
-extern void show_regs(struct pt_regs *);
-#endif
-
-#else /* __ASSEMBLY__ */
-/* For assembly code. */
-#define TRACEREG_SZ 0x50
-#define STACKFRAME_SZ 0x60
-#endif
-
-/*
- * The asm-offsets.h is a generated file, so we cannot include it.
- * It may be OK for glibc headers, but it's utterly pointless for C code.
- * The assembly code using those offsets has to include it explicitly.
- */
-/* #include <asm/asm-offsets.h> */
-
-/* These are for pt_regs. */
-#define PT_PSR 0x0
-#define PT_PC 0x4
-#define PT_NPC 0x8
-#define PT_Y 0xc
-#define PT_G0 0x10
-#define PT_WIM PT_G0
-#define PT_G1 0x14
-#define PT_G2 0x18
-#define PT_G3 0x1c
-#define PT_G4 0x20
-#define PT_G5 0x24
-#define PT_G6 0x28
-#define PT_G7 0x2c
-#define PT_I0 0x30
-#define PT_I1 0x34
-#define PT_I2 0x38
-#define PT_I3 0x3c
-#define PT_I4 0x40
-#define PT_I5 0x44
-#define PT_I6 0x48
-#define PT_FP PT_I6
-#define PT_I7 0x4c
-
-/* Reg_window offsets */
-#define RW_L0 0x00
-#define RW_L1 0x04
-#define RW_L2 0x08
-#define RW_L3 0x0c
-#define RW_L4 0x10
-#define RW_L5 0x14
-#define RW_L6 0x18
-#define RW_L7 0x1c
-#define RW_I0 0x20
-#define RW_I1 0x24
-#define RW_I2 0x28
-#define RW_I3 0x2c
-#define RW_I4 0x30
-#define RW_I5 0x34
-#define RW_I6 0x38
-#define RW_I7 0x3c
-
-/* Stack_frame offsets */
-#define SF_L0 0x00
-#define SF_L1 0x04
-#define SF_L2 0x08
-#define SF_L3 0x0c
-#define SF_L4 0x10
-#define SF_L5 0x14
-#define SF_L6 0x18
-#define SF_L7 0x1c
-#define SF_I0 0x20
-#define SF_I1 0x24
-#define SF_I2 0x28
-#define SF_I3 0x2c
-#define SF_I4 0x30
-#define SF_I5 0x34
-#define SF_FP 0x38
-#define SF_PC 0x3c
-#define SF_RETP 0x40
-#define SF_XARG0 0x44
-#define SF_XARG1 0x48
-#define SF_XARG2 0x4c
-#define SF_XARG3 0x50
-#define SF_XARG4 0x54
-#define SF_XARG5 0x58
-#define SF_XXARG 0x5c
-
-/* Stuff for the ptrace system call */
-#define PTRACE_SPARC_DETACH 11
-#define PTRACE_GETREGS 12
-#define PTRACE_SETREGS 13
-#define PTRACE_GETFPREGS 14
-#define PTRACE_SETFPREGS 15
-#define PTRACE_READDATA 16
-#define PTRACE_WRITEDATA 17
-#define PTRACE_READTEXT 18
-#define PTRACE_WRITETEXT 19
-#define PTRACE_GETFPAREGS 20
-#define PTRACE_SETFPAREGS 21
-
-#endif /* !(_SPARC_PTRACE_H) */
diff --git a/arch/sparc/include/asm/ptrace_64.h b/arch/sparc/include/asm/ptrace_64.h
deleted file mode 100644
index cd6fbfc20435..000000000000
--- a/arch/sparc/include/asm/ptrace_64.h
+++ /dev/null
@@ -1,356 +0,0 @@
-#ifndef _SPARC64_PTRACE_H
-#define _SPARC64_PTRACE_H
-
-#include <asm/pstate.h>
-
-/* This struct defines the way the registers are stored on the
- * stack during a system call and basically all traps.
- */
-
-/* This magic value must have the low 9 bits clear,
- * as that is where we encode the %tt value, see below.
- */
-#define PT_REGS_MAGIC 0x57ac6c00
-
-#ifndef __ASSEMBLY__
-
-#include <linux/types.h>
-
-struct pt_regs {
- unsigned long u_regs[16]; /* globals and ins */
- unsigned long tstate;
- unsigned long tpc;
- unsigned long tnpc;
- unsigned int y;
-
- /* We encode a magic number, PT_REGS_MAGIC, along
- * with the %tt (trap type) register value at trap
- * entry time. The magic number allows us to identify
- * accurately a trap stack frame in the stack
- * unwinder, and the %tt value allows us to test
- * things like "in a system call" etc. for an arbitray
- * process.
- *
- * The PT_REGS_MAGIC is choosen such that it can be
- * loaded completely using just a sethi instruction.
- */
- unsigned int magic;
-};
-
-struct pt_regs32 {
- unsigned int psr;
- unsigned int pc;
- unsigned int npc;
- unsigned int y;
- unsigned int u_regs[16]; /* globals and ins */
-};
-
-#define UREG_G0 0
-#define UREG_G1 1
-#define UREG_G2 2
-#define UREG_G3 3
-#define UREG_G4 4
-#define UREG_G5 5
-#define UREG_G6 6
-#define UREG_G7 7
-#define UREG_I0 8
-#define UREG_I1 9
-#define UREG_I2 10
-#define UREG_I3 11
-#define UREG_I4 12
-#define UREG_I5 13
-#define UREG_I6 14
-#define UREG_I7 15
-#define UREG_FP UREG_I6
-#define UREG_RETPC UREG_I7
-
-/* A V9 register window */
-struct reg_window {
- unsigned long locals[8];
- unsigned long ins[8];
-};
-
-/* A 32-bit register window. */
-struct reg_window32 {
- unsigned int locals[8];
- unsigned int ins[8];
-};
-
-/* A V9 Sparc stack frame */
-struct sparc_stackf {
- unsigned long locals[8];
- unsigned long ins[6];
- struct sparc_stackf *fp;
- unsigned long callers_pc;
- char *structptr;
- unsigned long xargs[6];
- unsigned long xxargs[1];
-};
-
-/* A 32-bit Sparc stack frame */
-struct sparc_stackf32 {
- unsigned int locals[8];
- unsigned int ins[6];
- unsigned int fp;
- unsigned int callers_pc;
- unsigned int structptr;
- unsigned int xargs[6];
- unsigned int xxargs[1];
-};
-
-struct sparc_trapf {
- unsigned long locals[8];
- unsigned long ins[8];
- unsigned long _unused;
- struct pt_regs *regs;
-};
-
-#define TRACEREG_SZ sizeof(struct pt_regs)
-#define STACKFRAME_SZ sizeof(struct sparc_stackf)
-
-#define TRACEREG32_SZ sizeof(struct pt_regs32)
-#define STACKFRAME32_SZ sizeof(struct sparc_stackf32)
-
-#ifdef __KERNEL__
-
-#include <linux/threads.h>
-#include <asm/system.h>
-
-static inline int pt_regs_trap_type(struct pt_regs *regs)
-{
- return regs->magic & 0x1ff;
-}
-
-static inline bool pt_regs_is_syscall(struct pt_regs *regs)
-{
- return (regs->tstate & TSTATE_SYSCALL);
-}
-
-static inline bool pt_regs_clear_syscall(struct pt_regs *regs)
-{
- return (regs->tstate &= ~TSTATE_SYSCALL);
-}
-
-#define arch_ptrace_stop_needed(exit_code, info) \
-({ flush_user_windows(); \
- get_thread_wsaved() != 0; \
-})
-
-#define arch_ptrace_stop(exit_code, info) \
- synchronize_user_stack()
-
-struct global_reg_snapshot {
- unsigned long tstate;
- unsigned long tpc;
- unsigned long tnpc;
- unsigned long o7;
- unsigned long i7;
- unsigned long rpc;
- struct thread_info *thread;
- unsigned long pad1;
-};
-extern struct global_reg_snapshot global_reg_snapshot[NR_CPUS];
-
-#define force_successful_syscall_return() \
-do { current_thread_info()->syscall_noerror = 1; \
-} while (0)
-#define user_mode(regs) (!((regs)->tstate & TSTATE_PRIV))
-#define instruction_pointer(regs) ((regs)->tpc)
-#define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
-#define regs_return_value(regs) ((regs)->u_regs[UREG_I0])
-#ifdef CONFIG_SMP
-extern unsigned long profile_pc(struct pt_regs *);
-#else
-#define profile_pc(regs) instruction_pointer(regs)
-#endif
-extern void show_regs(struct pt_regs *);
-#endif
-
-#else /* __ASSEMBLY__ */
-/* For assembly code. */
-#define TRACEREG_SZ 0xa0
-#define STACKFRAME_SZ 0xc0
-
-#define TRACEREG32_SZ 0x50
-#define STACKFRAME32_SZ 0x60
-#endif
-
-#ifdef __KERNEL__
-#define STACK_BIAS 2047
-#endif
-
-/* These are for pt_regs. */
-#define PT_V9_G0 0x00
-#define PT_V9_G1 0x08
-#define PT_V9_G2 0x10
-#define PT_V9_G3 0x18
-#define PT_V9_G4 0x20
-#define PT_V9_G5 0x28
-#define PT_V9_G6 0x30
-#define PT_V9_G7 0x38
-#define PT_V9_I0 0x40
-#define PT_V9_I1 0x48
-#define PT_V9_I2 0x50
-#define PT_V9_I3 0x58
-#define PT_V9_I4 0x60
-#define PT_V9_I5 0x68
-#define PT_V9_I6 0x70
-#define PT_V9_FP PT_V9_I6
-#define PT_V9_I7 0x78
-#define PT_V9_TSTATE 0x80
-#define PT_V9_TPC 0x88
-#define PT_V9_TNPC 0x90
-#define PT_V9_Y 0x98
-#define PT_V9_MAGIC 0x9c
-#define PT_TSTATE PT_V9_TSTATE
-#define PT_TPC PT_V9_TPC
-#define PT_TNPC PT_V9_TNPC
-
-/* These for pt_regs32. */
-#define PT_PSR 0x0
-#define PT_PC 0x4
-#define PT_NPC 0x8
-#define PT_Y 0xc
-#define PT_G0 0x10
-#define PT_WIM PT_G0
-#define PT_G1 0x14
-#define PT_G2 0x18
-#define PT_G3 0x1c
-#define PT_G4 0x20
-#define PT_G5 0x24
-#define PT_G6 0x28
-#define PT_G7 0x2c
-#define PT_I0 0x30
-#define PT_I1 0x34
-#define PT_I2 0x38
-#define PT_I3 0x3c
-#define PT_I4 0x40
-#define PT_I5 0x44
-#define PT_I6 0x48
-#define PT_FP PT_I6
-#define PT_I7 0x4c
-
-/* Reg_window offsets */
-#define RW_V9_L0 0x00
-#define RW_V9_L1 0x08
-#define RW_V9_L2 0x10
-#define RW_V9_L3 0x18
-#define RW_V9_L4 0x20
-#define RW_V9_L5 0x28
-#define RW_V9_L6 0x30
-#define RW_V9_L7 0x38
-#define RW_V9_I0 0x40
-#define RW_V9_I1 0x48
-#define RW_V9_I2 0x50
-#define RW_V9_I3 0x58
-#define RW_V9_I4 0x60
-#define RW_V9_I5 0x68
-#define RW_V9_I6 0x70
-#define RW_V9_I7 0x78
-
-#define RW_L0 0x00
-#define RW_L1 0x04
-#define RW_L2 0x08
-#define RW_L3 0x0c
-#define RW_L4 0x10
-#define RW_L5 0x14
-#define RW_L6 0x18
-#define RW_L7 0x1c
-#define RW_I0 0x20
-#define RW_I1 0x24
-#define RW_I2 0x28
-#define RW_I3 0x2c
-#define RW_I4 0x30
-#define RW_I5 0x34
-#define RW_I6 0x38
-#define RW_I7 0x3c
-
-/* Stack_frame offsets */
-#define SF_V9_L0 0x00
-#define SF_V9_L1 0x08
-#define SF_V9_L2 0x10
-#define SF_V9_L3 0x18
-#define SF_V9_L4 0x20
-#define SF_V9_L5 0x28
-#define SF_V9_L6 0x30
-#define SF_V9_L7 0x38
-#define SF_V9_I0 0x40
-#define SF_V9_I1 0x48
-#define SF_V9_I2 0x50
-#define SF_V9_I3 0x58
-#define SF_V9_I4 0x60
-#define SF_V9_I5 0x68
-#define SF_V9_FP 0x70
-#define SF_V9_PC 0x78
-#define SF_V9_RETP 0x80
-#define SF_V9_XARG0 0x88
-#define SF_V9_XARG1 0x90
-#define SF_V9_XARG2 0x98
-#define SF_V9_XARG3 0xa0
-#define SF_V9_XARG4 0xa8
-#define SF_V9_XARG5 0xb0
-#define SF_V9_XXARG 0xb8
-
-#define SF_L0 0x00
-#define SF_L1 0x04
-#define SF_L2 0x08
-#define SF_L3 0x0c
-#define SF_L4 0x10
-#define SF_L5 0x14
-#define SF_L6 0x18
-#define SF_L7 0x1c
-#define SF_I0 0x20
-#define SF_I1 0x24
-#define SF_I2 0x28
-#define SF_I3 0x2c
-#define SF_I4 0x30
-#define SF_I5 0x34
-#define SF_FP 0x38
-#define SF_PC 0x3c
-#define SF_RETP 0x40
-#define SF_XARG0 0x44
-#define SF_XARG1 0x48
-#define SF_XARG2 0x4c
-#define SF_XARG3 0x50
-#define SF_XARG4 0x54
-#define SF_XARG5 0x58
-#define SF_XXARG 0x5c
-
-#ifdef __KERNEL__
-
-/* global_reg_snapshot offsets */
-#define GR_SNAP_TSTATE 0x00
-#define GR_SNAP_TPC 0x08
-#define GR_SNAP_TNPC 0x10
-#define GR_SNAP_O7 0x18
-#define GR_SNAP_I7 0x20
-#define GR_SNAP_RPC 0x28
-#define GR_SNAP_THREAD 0x30
-#define GR_SNAP_PAD1 0x38
-
-#endif /* __KERNEL__ */
-
-/* Stuff for the ptrace system call */
-#define PTRACE_SPARC_DETACH 11
-#define PTRACE_GETREGS 12
-#define PTRACE_SETREGS 13
-#define PTRACE_GETFPREGS 14
-#define PTRACE_SETFPREGS 15
-#define PTRACE_READDATA 16
-#define PTRACE_WRITEDATA 17
-#define PTRACE_READTEXT 18
-#define PTRACE_WRITETEXT 19
-#define PTRACE_GETFPAREGS 20
-#define PTRACE_SETFPAREGS 21
-
-/* There are for debugging 64-bit processes, either from a 32 or 64 bit
- * parent. Thus their complements are for debugging 32-bit processes only.
- */
-
-#define PTRACE_GETREGS64 22
-#define PTRACE_SETREGS64 23
-/* PTRACE_SYSCALL is 24 */
-#define PTRACE_GETFPREGS64 25
-#define PTRACE_SETFPREGS64 26
-
-#endif /* !(_SPARC64_PTRACE_H) */
diff --git a/arch/sparc/include/asm/reg.h b/arch/sparc/include/asm/reg.h
deleted file mode 100644
index 0c16e19cae4d..000000000000
--- a/arch/sparc/include/asm/reg.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef ___ASM_SPARC_REG_H
-#define ___ASM_SPARC_REG_H
-#if defined(__sparc__) && defined(__arch64__)
-#include <asm/reg_64.h>
-#else
-#include <asm/reg_32.h>
-#endif
-#endif
diff --git a/arch/sparc/include/asm/reg_32.h b/arch/sparc/include/asm/reg_32.h
deleted file mode 100644
index 1efb056fb3d1..000000000000
--- a/arch/sparc/include/asm/reg_32.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * linux/include/asm/reg.h
- * Layout of the registers as expected by gdb on the Sparc
- * we should replace the user.h definitions with those in
- * this file, we don't even use the other
- * -miguel
- *
- * The names of the structures, constants and aliases in this file
- * have the same names as the sunos ones, some programs rely on these
- * names (gdb for example).
- *
- */
-
-#ifndef __SPARC_REG_H
-#define __SPARC_REG_H
-
-struct regs {
- int r_psr;
-#define r_ps r_psr
- int r_pc;
- int r_npc;
- int r_y;
- int r_g1;
- int r_g2;
- int r_g3;
- int r_g4;
- int r_g5;
- int r_g6;
- int r_g7;
- int r_o0;
- int r_o1;
- int r_o2;
- int r_o3;
- int r_o4;
- int r_o5;
- int r_o6;
- int r_o7;
-};
-
-struct fpq {
- unsigned long *addr;
- unsigned long instr;
-};
-
-struct fq {
- union {
- double whole;
- struct fpq fpq;
- } FQu;
-};
-
-#define FPU_REGS_TYPE unsigned int
-#define FPU_FSR_TYPE unsigned
-
-struct fp_status {
- union {
- FPU_REGS_TYPE Fpu_regs[32];
- double Fpu_dregs[16];
- } fpu_fr;
- FPU_FSR_TYPE Fpu_fsr;
- unsigned Fpu_flags;
- unsigned Fpu_extra;
- unsigned Fpu_qcnt;
- struct fq Fpu_q[16];
-};
-
-#define fpu_regs f_fpstatus.fpu_fr.Fpu_regs
-#define fpu_dregs f_fpstatus.fpu_fr.Fpu_dregs
-#define fpu_fsr f_fpstatus.Fpu_fsr
-#define fpu_flags f_fpstatus.Fpu_flags
-#define fpu_extra f_fpstatus.Fpu_extra
-#define fpu_q f_fpstatus.Fpu_q
-#define fpu_qcnt f_fpstatus.Fpu_qcnt
-
-struct fpu {
- struct fp_status f_fpstatus;
-};
-
-#endif /* __SPARC_REG_H */
diff --git a/arch/sparc/include/asm/reg_64.h b/arch/sparc/include/asm/reg_64.h
deleted file mode 100644
index 6f277d7c7d88..000000000000
--- a/arch/sparc/include/asm/reg_64.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * linux/asm/reg.h
- * Layout of the registers as expected by gdb on the Sparc
- * we should replace the user.h definitions with those in
- * this file, we don't even use the other
- * -miguel
- *
- * The names of the structures, constants and aliases in this file
- * have the same names as the sunos ones, some programs rely on these
- * names (gdb for example).
- *
- */
-
-#ifndef __SPARC64_REG_H
-#define __SPARC64_REG_H
-
-struct regs {
- unsigned long r_g1;
- unsigned long r_g2;
- unsigned long r_g3;
- unsigned long r_g4;
- unsigned long r_g5;
- unsigned long r_g6;
- unsigned long r_g7;
- unsigned long r_o0;
- unsigned long r_o1;
- unsigned long r_o2;
- unsigned long r_o3;
- unsigned long r_o4;
- unsigned long r_o5;
- unsigned long r_o6;
- unsigned long r_o7;
- unsigned long __pad;
- unsigned long r_tstate;
- unsigned long r_tpc;
- unsigned long r_tnpc;
- unsigned int r_y;
- unsigned int r_fprs;
-};
-
-#define FPU_REGS_TYPE unsigned int
-#define FPU_FSR_TYPE unsigned long
-
-struct fp_status {
- unsigned long fpu_fr[32];
- unsigned long Fpu_fsr;
-};
-
-struct fpu {
- struct fp_status f_fpstatus;
-};
-
-#define fpu_regs f_fpstatus.fpu_fr
-#define fpu_fsr f_fpstatus.Fpu_fsr
-
-#endif /* __SPARC64_REG_H */
diff --git a/arch/sparc/include/asm/sigcontext.h b/arch/sparc/include/asm/sigcontext.h
index e92de7e286b5..a1607d180354 100644
--- a/arch/sparc/include/asm/sigcontext.h
+++ b/arch/sparc/include/asm/sigcontext.h
@@ -1,8 +1,96 @@
-#ifndef ___ASM_SPARC_SIGCONTEXT_H
-#define ___ASM_SPARC_SIGCONTEXT_H
-#if defined(__sparc__) && defined(__arch64__)
-#include <asm/sigcontext_64.h>
+#ifndef __SPARC_SIGCONTEXT_H
+#define __SPARC_SIGCONTEXT_H
+
+#ifdef __KERNEL__
+#include <asm/ptrace.h>
+
+#ifndef __ASSEMBLY__
+
+#define __SUNOS_MAXWIN 31
+
+/* This is what SunOS does, so shall I unless we use new 32bit signals or rt signals. */
+struct sigcontext32 {
+ int sigc_onstack; /* state to restore */
+ int sigc_mask; /* sigmask to restore */
+ int sigc_sp; /* stack pointer */
+ int sigc_pc; /* program counter */
+ int sigc_npc; /* next program counter */
+ int sigc_psr; /* for condition codes etc */
+ int sigc_g1; /* User uses these two registers */
+ int sigc_o0; /* within the trampoline code. */
+
+ /* Now comes information regarding the users window set
+ * at the time of the signal.
+ */
+ int sigc_oswins; /* outstanding windows */
+
+ /* stack ptrs for each regwin buf */
+ unsigned sigc_spbuf[__SUNOS_MAXWIN];
+
+ /* Windows to restore after signal */
+ struct reg_window32 sigc_wbuf[__SUNOS_MAXWIN];
+};
+
+
+/* This is what we use for 32bit new non-rt signals. */
+
+typedef struct {
+ struct {
+ unsigned int psr;
+ unsigned int pc;
+ unsigned int npc;
+ unsigned int y;
+ unsigned int u_regs[16]; /* globals and ins */
+ } si_regs;
+ int si_mask;
+} __siginfo32_t;
+
+#ifdef CONFIG_SPARC64
+typedef struct {
+ unsigned int si_float_regs [64];
+ unsigned long si_fsr;
+ unsigned long si_gsr;
+ unsigned long si_fprs;
+} __siginfo_fpu_t;
+
+/* This is what SunOS doesn't, so we have to write this alone
+ and do it properly. */
+struct sigcontext {
+ /* The size of this array has to match SI_MAX_SIZE from siginfo.h */
+ char sigc_info[128];
+ struct {
+ unsigned long u_regs[16]; /* globals and ins */
+ unsigned long tstate;
+ unsigned long tpc;
+ unsigned long tnpc;
+ unsigned int y;
+ unsigned int fprs;
+ } sigc_regs;
+ __siginfo_fpu_t * sigc_fpu_save;
+ struct {
+ void * ss_sp;
+ int ss_flags;
+ unsigned long ss_size;
+ } sigc_stack;
+ unsigned long sigc_mask;
+};
+
#else
-#include <asm/sigcontext_32.h>
-#endif
-#endif
+
+typedef struct {
+ unsigned long si_float_regs [32];
+ unsigned long si_fsr;
+ unsigned long si_fpqdepth;
+ struct {
+ unsigned long *insn_addr;
+ unsigned long insn;
+ } si_fpqueue [16];
+} __siginfo_fpu_t;
+#endif /* (CONFIG_SPARC64) */
+
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* (__KERNEL__) */
+
+#endif /* !(__SPARC_SIGCONTEXT_H) */
diff --git a/arch/sparc/include/asm/sigcontext_32.h b/arch/sparc/include/asm/sigcontext_32.h
deleted file mode 100644
index c5fb60dcbd75..000000000000
--- a/arch/sparc/include/asm/sigcontext_32.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#ifndef __SPARC_SIGCONTEXT_H
-#define __SPARC_SIGCONTEXT_H
-
-#ifdef __KERNEL__
-#include <asm/ptrace.h>
-
-#ifndef __ASSEMBLY__
-
-#define __SUNOS_MAXWIN 31
-
-/* This is what SunOS does, so shall I. */
-struct sigcontext {
- int sigc_onstack; /* state to restore */
- int sigc_mask; /* sigmask to restore */
- int sigc_sp; /* stack pointer */
- int sigc_pc; /* program counter */
- int sigc_npc; /* next program counter */
- int sigc_psr; /* for condition codes etc */
- int sigc_g1; /* User uses these two registers */
- int sigc_o0; /* within the trampoline code. */
-
- /* Now comes information regarding the users window set
- * at the time of the signal.
- */
- int sigc_oswins; /* outstanding windows */
-
- /* stack ptrs for each regwin buf */
- char *sigc_spbuf[__SUNOS_MAXWIN];
-
- /* Windows to restore after signal */
- struct {
- unsigned long locals[8];
- unsigned long ins[8];
- } sigc_wbuf[__SUNOS_MAXWIN];
-};
-
-typedef struct {
- struct {
- unsigned long psr;
- unsigned long pc;
- unsigned long npc;
- unsigned long y;
- unsigned long u_regs[16]; /* globals and ins */
- } si_regs;
- int si_mask;
-} __siginfo_t;
-
-typedef struct {
- unsigned long si_float_regs [32];
- unsigned long si_fsr;
- unsigned long si_fpqdepth;
- struct {
- unsigned long *insn_addr;
- unsigned long insn;
- } si_fpqueue [16];
-} __siginfo_fpu_t;
-
-#endif /* !(__ASSEMBLY__) */
-
-#endif /* (__KERNEL__) */
-
-#endif /* !(__SPARC_SIGCONTEXT_H) */
diff --git a/arch/sparc/include/asm/sigcontext_64.h b/arch/sparc/include/asm/sigcontext_64.h
deleted file mode 100644
index 1c868d680cfc..000000000000
--- a/arch/sparc/include/asm/sigcontext_64.h
+++ /dev/null
@@ -1,87 +0,0 @@
-#ifndef __SPARC64_SIGCONTEXT_H
-#define __SPARC64_SIGCONTEXT_H
-
-#ifdef __KERNEL__
-#include <asm/ptrace.h>
-#endif
-
-#ifndef __ASSEMBLY__
-
-#ifdef __KERNEL__
-
-#define __SUNOS_MAXWIN 31
-
-/* This is what SunOS does, so shall I unless we use new 32bit signals or rt signals. */
-struct sigcontext32 {
- int sigc_onstack; /* state to restore */
- int sigc_mask; /* sigmask to restore */
- int sigc_sp; /* stack pointer */
- int sigc_pc; /* program counter */
- int sigc_npc; /* next program counter */
- int sigc_psr; /* for condition codes etc */
- int sigc_g1; /* User uses these two registers */
- int sigc_o0; /* within the trampoline code. */
-
- /* Now comes information regarding the users window set
- * at the time of the signal.
- */
- int sigc_oswins; /* outstanding windows */
-
- /* stack ptrs for each regwin buf */
- unsigned sigc_spbuf[__SUNOS_MAXWIN];
-
- /* Windows to restore after signal */
- struct reg_window32 sigc_wbuf[__SUNOS_MAXWIN];
-};
-
-#endif
-
-#ifdef __KERNEL__
-
-/* This is what we use for 32bit new non-rt signals. */
-
-typedef struct {
- struct {
- unsigned int psr;
- unsigned int pc;
- unsigned int npc;
- unsigned int y;
- unsigned int u_regs[16]; /* globals and ins */
- } si_regs;
- int si_mask;
-} __siginfo32_t;
-
-#endif
-
-typedef struct {
- unsigned int si_float_regs [64];
- unsigned long si_fsr;
- unsigned long si_gsr;
- unsigned long si_fprs;
-} __siginfo_fpu_t;
-
-/* This is what SunOS doesn't, so we have to write this alone
- and do it properly. */
-struct sigcontext {
- /* The size of this array has to match SI_MAX_SIZE from siginfo.h */
- char sigc_info[128];
- struct {
- unsigned long u_regs[16]; /* globals and ins */
- unsigned long tstate;
- unsigned long tpc;
- unsigned long tnpc;
- unsigned int y;
- unsigned int fprs;
- } sigc_regs;
- __siginfo_fpu_t * sigc_fpu_save;
- struct {
- void * ss_sp;
- int ss_flags;
- unsigned long ss_size;
- } sigc_stack;
- unsigned long sigc_mask;
-};
-
-#endif /* !(__ASSEMBLY__) */
-
-#endif /* !(__SPARC64_SIGCONTEXT_H) */
diff --git a/arch/sparc/include/asm/siginfo.h b/arch/sparc/include/asm/siginfo.h
index bd81f8d7f5ce..988e5d8ed11a 100644
--- a/arch/sparc/include/asm/siginfo.h
+++ b/arch/sparc/include/asm/siginfo.h
@@ -1,8 +1,37 @@
-#ifndef ___ASM_SPARC_SIGINFO_H
-#define ___ASM_SPARC_SIGINFO_H
+#ifndef __SPARC_SIGINFO_H
+#define __SPARC_SIGINFO_H
+
#if defined(__sparc__) && defined(__arch64__)
-#include <asm/siginfo_64.h>
-#else
-#include <asm/siginfo_32.h>
-#endif
-#endif
+
+#define SI_PAD_SIZE32 ((SI_MAX_SIZE/sizeof(int)) - 3)
+#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
+#define __ARCH_SI_BAND_T int
+
+#endif /* defined(__sparc__) && defined(__arch64__) */
+
+
+#define __ARCH_SI_TRAPNO
+
+#include <asm-generic/siginfo.h>
+
+#ifdef __KERNEL__
+
+#include <linux/compat.h>
+
+#ifdef CONFIG_COMPAT
+
+struct compat_siginfo;
+
+#endif /* CONFIG_COMPAT */
+
+#endif /* __KERNEL__ */
+
+#define SI_NOINFO 32767 /* no information in siginfo_t */
+
+/*
+ * SIGEMT si_codes
+ */
+#define EMT_TAGOVF (__SI_FAULT|1) /* tag overflow */
+#define NSIGEMT 1
+
+#endif /* !(__SPARC_SIGINFO_H) */
diff --git a/arch/sparc/include/asm/siginfo_32.h b/arch/sparc/include/asm/siginfo_32.h
deleted file mode 100644
index 3c71af135c52..000000000000
--- a/arch/sparc/include/asm/siginfo_32.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _SPARC_SIGINFO_H
-#define _SPARC_SIGINFO_H
-
-#define __ARCH_SI_UID_T unsigned int
-#define __ARCH_SI_TRAPNO
-
-#include <asm-generic/siginfo.h>
-
-#define SI_NOINFO 32767 /* no information in siginfo_t */
-
-/*
- * SIGEMT si_codes
- */
-#define EMT_TAGOVF (__SI_FAULT|1) /* tag overflow */
-#define NSIGEMT 1
-
-#endif /* !(_SPARC_SIGINFO_H) */
diff --git a/arch/sparc/include/asm/siginfo_64.h b/arch/sparc/include/asm/siginfo_64.h
deleted file mode 100644
index c96e6c30f8b0..000000000000
--- a/arch/sparc/include/asm/siginfo_64.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef _SPARC64_SIGINFO_H
-#define _SPARC64_SIGINFO_H
-
-#define SI_PAD_SIZE32 ((SI_MAX_SIZE/sizeof(int)) - 3)
-
-#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
-#define __ARCH_SI_TRAPNO
-#define __ARCH_SI_BAND_T int
-
-#include <asm-generic/siginfo.h>
-
-#ifdef __KERNEL__
-
-#include <linux/compat.h>
-
-#ifdef CONFIG_COMPAT
-
-struct compat_siginfo;
-
-#endif /* CONFIG_COMPAT */
-
-#endif /* __KERNEL__ */
-
-#define SI_NOINFO 32767 /* no information in siginfo_t */
-
-/*
- * SIGEMT si_codes
- */
-#define EMT_TAGOVF (__SI_FAULT|1) /* tag overflow */
-#define NSIGEMT 1
-
-#endif
diff --git a/arch/sparc/include/asm/signal.h b/arch/sparc/include/asm/signal.h
index 27ab05dc203e..41535e77b255 100644
--- a/arch/sparc/include/asm/signal.h
+++ b/arch/sparc/include/asm/signal.h
@@ -1,8 +1,210 @@
-#ifndef ___ASM_SPARC_SIGNAL_H
-#define ___ASM_SPARC_SIGNAL_H
-#if defined(__sparc__) && defined(__arch64__)
-#include <asm/signal_64.h>
+#ifndef __SPARC_SIGNAL_H
+#define __SPARC_SIGNAL_H
+
+#include <asm/sigcontext.h>
+#include <linux/compiler.h>
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+#include <linux/personality.h>
+#include <linux/types.h>
+#endif
+#endif
+
+/* On the Sparc the signal handlers get passed a 'sub-signal' code
+ * for certain signal types, which we document here.
+ */
+#define SIGHUP 1
+#define SIGINT 2
+#define SIGQUIT 3
+#define SIGILL 4
+#define SUBSIG_STACK 0
+#define SUBSIG_ILLINST 2
+#define SUBSIG_PRIVINST 3
+#define SUBSIG_BADTRAP(t) (0x80 + (t))
+
+#define SIGTRAP 5
+#define SIGABRT 6
+#define SIGIOT 6
+
+#define SIGEMT 7
+#define SUBSIG_TAG 10
+
+#define SIGFPE 8
+#define SUBSIG_FPDISABLED 0x400
+#define SUBSIG_FPERROR 0x404
+#define SUBSIG_FPINTOVFL 0x001
+#define SUBSIG_FPSTSIG 0x002
+#define SUBSIG_IDIVZERO 0x014
+#define SUBSIG_FPINEXACT 0x0c4
+#define SUBSIG_FPDIVZERO 0x0c8
+#define SUBSIG_FPUNFLOW 0x0cc
+#define SUBSIG_FPOPERROR 0x0d0
+#define SUBSIG_FPOVFLOW 0x0d4
+
+#define SIGKILL 9
+#define SIGBUS 10
+#define SUBSIG_BUSTIMEOUT 1
+#define SUBSIG_ALIGNMENT 2
+#define SUBSIG_MISCERROR 5
+
+#define SIGSEGV 11
+#define SUBSIG_NOMAPPING 3
+#define SUBSIG_PROTECTION 4
+#define SUBSIG_SEGERROR 5
+
+#define SIGSYS 12
+
+#define SIGPIPE 13
+#define SIGALRM 14
+#define SIGTERM 15
+#define SIGURG 16
+
+/* SunOS values which deviate from the Linux/i386 ones */
+#define SIGSTOP 17
+#define SIGTSTP 18
+#define SIGCONT 19
+#define SIGCHLD 20
+#define SIGTTIN 21
+#define SIGTTOU 22
+#define SIGIO 23
+#define SIGPOLL SIGIO /* SysV name for SIGIO */
+#define SIGXCPU 24
+#define SIGXFSZ 25
+#define SIGVTALRM 26
+#define SIGPROF 27
+#define SIGWINCH 28
+#define SIGLOST 29
+#define SIGPWR SIGLOST
+#define SIGUSR1 30
+#define SIGUSR2 31
+
+/* Most things should be clean enough to redefine this at will, if care
+ is taken to make libc match. */
+
+#define __OLD_NSIG 32
+#define __NEW_NSIG 64
+#define _NSIG_BPW 64
+#define _NSIG_WORDS (__NEW_NSIG / _NSIG_BPW)
+
+#define SIGRTMIN 32
+#define SIGRTMAX __NEW_NSIG
+
+#if defined(__KERNEL__) || defined(__WANT_POSIX1B_SIGNALS__)
+#define _NSIG __NEW_NSIG
+#define __new_sigset_t sigset_t
+#define __new_sigaction sigaction
+#define __new_sigaction32 sigaction32
+#define __old_sigset_t old_sigset_t
+#define __old_sigaction old_sigaction
+#define __old_sigaction32 old_sigaction32
#else
-#include <asm/signal_32.h>
+#define _NSIG __OLD_NSIG
+#define NSIG _NSIG
+#define __old_sigset_t sigset_t
+#define __old_sigaction sigaction
+#define __old_sigaction32 sigaction32
#endif
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long __old_sigset_t; /* at least 32 bits */
+
+typedef struct {
+ unsigned long sig[_NSIG_WORDS];
+} __new_sigset_t;
+
+/* A SunOS sigstack */
+struct sigstack {
+ /* XXX 32-bit pointers pinhead XXX */
+ char *the_stack;
+ int cur_status;
+};
+
+/* Sigvec flags */
+#define _SV_SSTACK 1u /* This signal handler should use sig-stack */
+#define _SV_INTR 2u /* Sig return should not restart system call */
+#define _SV_RESET 4u /* Set handler to SIG_DFL upon taken signal */
+#define _SV_IGNCHILD 8u /* Do not send SIGCHLD */
+
+/*
+ * sa_flags values: SA_STACK is not currently supported, but will allow the
+ * usage of signal stacks by using the (now obsolete) sa_restorer field in
+ * the sigaction structure as a stack pointer. This is now possible due to
+ * the changes in signal handling. LBT 010493.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ */
+#define SA_NOCLDSTOP _SV_IGNCHILD
+#define SA_STACK _SV_SSTACK
+#define SA_ONSTACK _SV_SSTACK
+#define SA_RESTART _SV_INTR
+#define SA_ONESHOT _SV_RESET
+#define SA_NOMASK 0x20u
+#define SA_NOCLDWAIT 0x100u
+#define SA_SIGINFO 0x200u
+
+
+#define SIG_BLOCK 0x01 /* for blocking signals */
+#define SIG_UNBLOCK 0x02 /* for unblocking signals */
+#define SIG_SETMASK 0x04 /* for setting the signal mask */
+
+/*
+ * sigaltstack controls
+ */
+#define SS_ONSTACK 1
+#define SS_DISABLE 2
+
+#define MINSIGSTKSZ 4096
+#define SIGSTKSZ 16384
+
+#ifdef __KERNEL__
+/*
+ * DJHR
+ * SA_STATIC_ALLOC is used for the sparc32 system to indicate that this
+ * interrupt handler's irq structure should be statically allocated
+ * by the request_irq routine.
+ * The alternative is that arch/sparc/kernel/irq.c has carnal knowledge
+ * of interrupt usage and that sucks. Also without a flag like this
+ * it may be possible for the free_irq routine to attempt to free
+ * statically allocated data.. which is NOT GOOD.
+ *
+ */
+#define SA_STATIC_ALLOC 0x8000
#endif
+
+#include <asm-generic/signal.h>
+
+struct __new_sigaction {
+ __sighandler_t sa_handler;
+ unsigned long sa_flags;
+ __sigrestore_t sa_restorer; /* not used by Linux/SPARC yet */
+ __new_sigset_t sa_mask;
+};
+
+struct __old_sigaction {
+ __sighandler_t sa_handler;
+ __old_sigset_t sa_mask;
+ unsigned long sa_flags;
+ void (*sa_restorer)(void); /* not used by Linux/SPARC yet */
+};
+
+typedef struct sigaltstack {
+ void __user *ss_sp;
+ int ss_flags;
+ size_t ss_size;
+} stack_t;
+
+#ifdef __KERNEL__
+
+struct k_sigaction {
+ struct __new_sigaction sa;
+ void __user *ka_restorer;
+};
+
+#define ptrace_signal_deliver(regs, cookie) do { } while (0)
+
+#endif /* !(__KERNEL__) */
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* !(__SPARC_SIGNAL_H) */
diff --git a/arch/sparc/include/asm/signal_32.h b/arch/sparc/include/asm/signal_32.h
deleted file mode 100644
index 96a60ab03ca1..000000000000
--- a/arch/sparc/include/asm/signal_32.h
+++ /dev/null
@@ -1,207 +0,0 @@
-#ifndef _ASMSPARC_SIGNAL_H
-#define _ASMSPARC_SIGNAL_H
-
-#include <asm/sigcontext.h>
-#include <linux/compiler.h>
-
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-#include <linux/personality.h>
-#include <linux/types.h>
-#endif
-#endif
-
-/* On the Sparc the signal handlers get passed a 'sub-signal' code
- * for certain signal types, which we document here.
- */
-#define SIGHUP 1
-#define SIGINT 2
-#define SIGQUIT 3
-#define SIGILL 4
-#define SUBSIG_STACK 0
-#define SUBSIG_ILLINST 2
-#define SUBSIG_PRIVINST 3
-#define SUBSIG_BADTRAP(t) (0x80 + (t))
-
-#define SIGTRAP 5
-#define SIGABRT 6
-#define SIGIOT 6
-
-#define SIGEMT 7
-#define SUBSIG_TAG 10
-
-#define SIGFPE 8
-#define SUBSIG_FPDISABLED 0x400
-#define SUBSIG_FPERROR 0x404
-#define SUBSIG_FPINTOVFL 0x001
-#define SUBSIG_FPSTSIG 0x002
-#define SUBSIG_IDIVZERO 0x014
-#define SUBSIG_FPINEXACT 0x0c4
-#define SUBSIG_FPDIVZERO 0x0c8
-#define SUBSIG_FPUNFLOW 0x0cc
-#define SUBSIG_FPOPERROR 0x0d0
-#define SUBSIG_FPOVFLOW 0x0d4
-
-#define SIGKILL 9
-#define SIGBUS 10
-#define SUBSIG_BUSTIMEOUT 1
-#define SUBSIG_ALIGNMENT 2
-#define SUBSIG_MISCERROR 5
-
-#define SIGSEGV 11
-#define SUBSIG_NOMAPPING 3
-#define SUBSIG_PROTECTION 4
-#define SUBSIG_SEGERROR 5
-
-#define SIGSYS 12
-
-#define SIGPIPE 13
-#define SIGALRM 14
-#define SIGTERM 15
-#define SIGURG 16
-
-/* SunOS values which deviate from the Linux/i386 ones */
-#define SIGSTOP 17
-#define SIGTSTP 18
-#define SIGCONT 19
-#define SIGCHLD 20
-#define SIGTTIN 21
-#define SIGTTOU 22
-#define SIGIO 23
-#define SIGPOLL SIGIO /* SysV name for SIGIO */
-#define SIGXCPU 24
-#define SIGXFSZ 25
-#define SIGVTALRM 26
-#define SIGPROF 27
-#define SIGWINCH 28
-#define SIGLOST 29
-#define SIGPWR SIGLOST
-#define SIGUSR1 30
-#define SIGUSR2 31
-
-/* Most things should be clean enough to redefine this at will, if care
- * is taken to make libc match.
- */
-
-#define __OLD_NSIG 32
-#define __NEW_NSIG 64
-#define _NSIG_BPW 32
-#define _NSIG_WORDS (__NEW_NSIG / _NSIG_BPW)
-
-#define SIGRTMIN 32
-#define SIGRTMAX __NEW_NSIG
-
-#if defined(__KERNEL__) || defined(__WANT_POSIX1B_SIGNALS__)
-#define _NSIG __NEW_NSIG
-#define __new_sigset_t sigset_t
-#define __new_sigaction sigaction
-#define __old_sigset_t old_sigset_t
-#define __old_sigaction old_sigaction
-#else
-#define _NSIG __OLD_NSIG
-#define __old_sigset_t sigset_t
-#define __old_sigaction sigaction
-#endif
-
-#ifndef __ASSEMBLY__
-
-typedef unsigned long __old_sigset_t;
-
-typedef struct {
- unsigned long sig[_NSIG_WORDS];
-} __new_sigset_t;
-
-
-#ifdef __KERNEL__
-/* A SunOS sigstack */
-struct sigstack {
- char *the_stack;
- int cur_status;
-};
-#endif
-
-/* Sigvec flags */
-#define _SV_SSTACK 1u /* This signal handler should use sig-stack */
-#define _SV_INTR 2u /* Sig return should not restart system call */
-#define _SV_RESET 4u /* Set handler to SIG_DFL upon taken signal */
-#define _SV_IGNCHILD 8u /* Do not send SIGCHLD */
-
-/*
- * sa_flags values: SA_STACK is not currently supported, but will allow the
- * usage of signal stacks by using the (now obsolete) sa_restorer field in
- * the sigaction structure as a stack pointer. This is now possible due to
- * the changes in signal handling. LBT 010493.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- */
-#define SA_NOCLDSTOP _SV_IGNCHILD
-#define SA_STACK _SV_SSTACK
-#define SA_ONSTACK _SV_SSTACK
-#define SA_RESTART _SV_INTR
-#define SA_ONESHOT _SV_RESET
-#define SA_NOMASK 0x20u
-#define SA_NOCLDWAIT 0x100u
-#define SA_SIGINFO 0x200u
-
-#define SIG_BLOCK 0x01 /* for blocking signals */
-#define SIG_UNBLOCK 0x02 /* for unblocking signals */
-#define SIG_SETMASK 0x04 /* for setting the signal mask */
-
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK 1
-#define SS_DISABLE 2
-
-#define MINSIGSTKSZ 4096
-#define SIGSTKSZ 16384
-
-#ifdef __KERNEL__
-/*
- * DJHR
- * SA_STATIC_ALLOC is used for the SPARC system to indicate that this
- * interrupt handler's irq structure should be statically allocated
- * by the request_irq routine.
- * The alternative is that arch/sparc/kernel/irq.c has carnal knowledge
- * of interrupt usage and that sucks. Also without a flag like this
- * it may be possible for the free_irq routine to attempt to free
- * statically allocated data.. which is NOT GOOD.
- *
- */
-#define SA_STATIC_ALLOC 0x8000
-#endif
-
-#include <asm-generic/signal.h>
-
-#ifdef __KERNEL__
-struct __new_sigaction {
- __sighandler_t sa_handler;
- unsigned long sa_flags;
- void (*sa_restorer)(void); /* Not used by Linux/SPARC */
- __new_sigset_t sa_mask;
-};
-
-struct k_sigaction {
- struct __new_sigaction sa;
- void __user *ka_restorer;
-};
-
-struct __old_sigaction {
- __sighandler_t sa_handler;
- __old_sigset_t sa_mask;
- unsigned long sa_flags;
- void (*sa_restorer) (void); /* not used by Linux/SPARC */
-};
-
-typedef struct sigaltstack {
- void __user *ss_sp;
- int ss_flags;
- size_t ss_size;
-} stack_t;
-
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
-#endif /* !(__KERNEL__) */
-
-#endif /* !(__ASSEMBLY__) */
-
-#endif /* !(_ASMSPARC_SIGNAL_H) */
diff --git a/arch/sparc/include/asm/signal_64.h b/arch/sparc/include/asm/signal_64.h
deleted file mode 100644
index ab1509a101c5..000000000000
--- a/arch/sparc/include/asm/signal_64.h
+++ /dev/null
@@ -1,194 +0,0 @@
-#ifndef _ASMSPARC64_SIGNAL_H
-#define _ASMSPARC64_SIGNAL_H
-
-#include <asm/sigcontext.h>
-
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-#include <linux/personality.h>
-#include <linux/types.h>
-#endif
-#endif
-
-/* On the Sparc the signal handlers get passed a 'sub-signal' code
- * for certain signal types, which we document here.
- */
-#define SIGHUP 1
-#define SIGINT 2
-#define SIGQUIT 3
-#define SIGILL 4
-#define SUBSIG_STACK 0
-#define SUBSIG_ILLINST 2
-#define SUBSIG_PRIVINST 3
-#define SUBSIG_BADTRAP(t) (0x80 + (t))
-
-#define SIGTRAP 5
-#define SIGABRT 6
-#define SIGIOT 6
-
-#define SIGEMT 7
-#define SUBSIG_TAG 10
-
-#define SIGFPE 8
-#define SUBSIG_FPDISABLED 0x400
-#define SUBSIG_FPERROR 0x404
-#define SUBSIG_FPINTOVFL 0x001
-#define SUBSIG_FPSTSIG 0x002
-#define SUBSIG_IDIVZERO 0x014
-#define SUBSIG_FPINEXACT 0x0c4
-#define SUBSIG_FPDIVZERO 0x0c8
-#define SUBSIG_FPUNFLOW 0x0cc
-#define SUBSIG_FPOPERROR 0x0d0
-#define SUBSIG_FPOVFLOW 0x0d4
-
-#define SIGKILL 9
-#define SIGBUS 10
-#define SUBSIG_BUSTIMEOUT 1
-#define SUBSIG_ALIGNMENT 2
-#define SUBSIG_MISCERROR 5
-
-#define SIGSEGV 11
-#define SUBSIG_NOMAPPING 3
-#define SUBSIG_PROTECTION 4
-#define SUBSIG_SEGERROR 5
-
-#define SIGSYS 12
-
-#define SIGPIPE 13
-#define SIGALRM 14
-#define SIGTERM 15
-#define SIGURG 16
-
-/* SunOS values which deviate from the Linux/i386 ones */
-#define SIGSTOP 17
-#define SIGTSTP 18
-#define SIGCONT 19
-#define SIGCHLD 20
-#define SIGTTIN 21
-#define SIGTTOU 22
-#define SIGIO 23
-#define SIGPOLL SIGIO /* SysV name for SIGIO */
-#define SIGXCPU 24
-#define SIGXFSZ 25
-#define SIGVTALRM 26
-#define SIGPROF 27
-#define SIGWINCH 28
-#define SIGLOST 29
-#define SIGPWR SIGLOST
-#define SIGUSR1 30
-#define SIGUSR2 31
-
-/* Most things should be clean enough to redefine this at will, if care
- is taken to make libc match. */
-
-#define __OLD_NSIG 32
-#define __NEW_NSIG 64
-#define _NSIG_BPW 64
-#define _NSIG_WORDS (__NEW_NSIG / _NSIG_BPW)
-
-#define SIGRTMIN 32
-#define SIGRTMAX __NEW_NSIG
-
-#if defined(__KERNEL__) || defined(__WANT_POSIX1B_SIGNALS__)
-#define _NSIG __NEW_NSIG
-#define __new_sigset_t sigset_t
-#define __new_sigaction sigaction
-#define __new_sigaction32 sigaction32
-#define __old_sigset_t old_sigset_t
-#define __old_sigaction old_sigaction
-#define __old_sigaction32 old_sigaction32
-#else
-#define _NSIG __OLD_NSIG
-#define NSIG _NSIG
-#define __old_sigset_t sigset_t
-#define __old_sigaction sigaction
-#define __old_sigaction32 sigaction32
-#endif
-
-#ifndef __ASSEMBLY__
-
-typedef unsigned long __old_sigset_t; /* at least 32 bits */
-
-typedef struct {
- unsigned long sig[_NSIG_WORDS];
-} __new_sigset_t;
-
-/* A SunOS sigstack */
-struct sigstack {
- /* XXX 32-bit pointers pinhead XXX */
- char *the_stack;
- int cur_status;
-};
-
-/* Sigvec flags */
-#define _SV_SSTACK 1u /* This signal handler should use sig-stack */
-#define _SV_INTR 2u /* Sig return should not restart system call */
-#define _SV_RESET 4u /* Set handler to SIG_DFL upon taken signal */
-#define _SV_IGNCHILD 8u /* Do not send SIGCHLD */
-
-/*
- * sa_flags values: SA_STACK is not currently supported, but will allow the
- * usage of signal stacks by using the (now obsolete) sa_restorer field in
- * the sigaction structure as a stack pointer. This is now possible due to
- * the changes in signal handling. LBT 010493.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- */
-#define SA_NOCLDSTOP _SV_IGNCHILD
-#define SA_STACK _SV_SSTACK
-#define SA_ONSTACK _SV_SSTACK
-#define SA_RESTART _SV_INTR
-#define SA_ONESHOT _SV_RESET
-#define SA_NOMASK 0x20u
-#define SA_NOCLDWAIT 0x100u
-#define SA_SIGINFO 0x200u
-
-
-#define SIG_BLOCK 0x01 /* for blocking signals */
-#define SIG_UNBLOCK 0x02 /* for unblocking signals */
-#define SIG_SETMASK 0x04 /* for setting the signal mask */
-
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK 1
-#define SS_DISABLE 2
-
-#define MINSIGSTKSZ 4096
-#define SIGSTKSZ 16384
-
-#include <asm-generic/signal.h>
-
-struct __new_sigaction {
- __sighandler_t sa_handler;
- unsigned long sa_flags;
- __sigrestore_t sa_restorer; /* not used by Linux/SPARC yet */
- __new_sigset_t sa_mask;
-};
-
-struct __old_sigaction {
- __sighandler_t sa_handler;
- __old_sigset_t sa_mask;
- unsigned long sa_flags;
- void (*sa_restorer)(void); /* not used by Linux/SPARC yet */
-};
-
-typedef struct sigaltstack {
- void __user *ss_sp;
- int ss_flags;
- size_t ss_size;
-} stack_t;
-
-#ifdef __KERNEL__
-
-struct k_sigaction {
- struct __new_sigaction sa;
- void __user *ka_restorer;
-};
-
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
-#endif /* !(__KERNEL__) */
-
-#endif /* !(__ASSEMBLY__) */
-
-#endif /* !(_ASMSPARC64_SIGNAL_H) */
diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h
index a8180e546a48..58101dc70493 100644
--- a/arch/sparc/include/asm/smp_32.h
+++ b/arch/sparc/include/asm/smp_32.h
@@ -29,8 +29,6 @@
*/
extern unsigned char boot_cpu_id;
-extern cpumask_t phys_cpu_present_map;
-#define cpu_possible_map phys_cpu_present_map
typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long,
unsigned long, unsigned long);
@@ -172,7 +170,4 @@ void smp_setup_cpu_possible_map(void);
#define smp_setup_cpu_possible_map() do { } while (0)
#endif /* !(SMP) */
-
-#define NO_PROC_ID 0xFF
-
#endif /* !(_SPARC_SMP_H) */
diff --git a/arch/sparc/include/asm/stat.h b/arch/sparc/include/asm/stat.h
index d8153013df72..55db5eca08e2 100644
--- a/arch/sparc/include/asm/stat.h
+++ b/arch/sparc/include/asm/stat.h
@@ -1,8 +1,107 @@
-#ifndef ___ASM_SPARC_STAT_H
-#define ___ASM_SPARC_STAT_H
+#ifndef __SPARC_STAT_H
+#define __SPARC_STAT_H
+
+#include <linux/types.h>
+
#if defined(__sparc__) && defined(__arch64__)
-#include <asm/stat_64.h>
+/* 64 bit sparc */
+struct stat {
+ unsigned st_dev;
+ ino_t st_ino;
+ mode_t st_mode;
+ short st_nlink;
+ uid_t st_uid;
+ gid_t st_gid;
+ unsigned st_rdev;
+ off_t st_size;
+ time_t st_atime;
+ time_t st_mtime;
+ time_t st_ctime;
+ off_t st_blksize;
+ off_t st_blocks;
+ unsigned long __unused4[2];
+};
+
+struct stat64 {
+ unsigned long st_dev;
+ unsigned long st_ino;
+ unsigned long st_nlink;
+
+ unsigned int st_mode;
+ unsigned int st_uid;
+ unsigned int st_gid;
+ unsigned int __pad0;
+
+ unsigned long st_rdev;
+ long st_size;
+ long st_blksize;
+ long st_blocks;
+
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+ unsigned long st_mtime_nsec;
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+ long __unused[3];
+};
+
#else
-#include <asm/stat_32.h>
-#endif
-#endif
+/* 32 bit sparc */
+struct stat {
+ unsigned short st_dev;
+ ino_t st_ino;
+ mode_t st_mode;
+ short st_nlink;
+ uid_t st_uid;
+ gid_t st_gid;
+ unsigned short st_rdev;
+ off_t st_size;
+ time_t st_atime;
+ unsigned long st_atime_nsec;
+ time_t st_mtime;
+ unsigned long st_mtime_nsec;
+ time_t st_ctime;
+ unsigned long st_ctime_nsec;
+ off_t st_blksize;
+ off_t st_blocks;
+ unsigned long __unused4[2];
+};
+
+#define STAT_HAVE_NSEC 1
+
+struct stat64 {
+ unsigned long long st_dev;
+
+ unsigned long long st_ino;
+
+ unsigned int st_mode;
+ unsigned int st_nlink;
+
+ unsigned int st_uid;
+ unsigned int st_gid;
+
+ unsigned long long st_rdev;
+
+ unsigned char __pad3[8];
+
+ long long st_size;
+ unsigned int st_blksize;
+
+ unsigned char __pad4[8];
+ unsigned int st_blocks;
+
+ unsigned int st_atime;
+ unsigned int st_atime_nsec;
+
+ unsigned int st_mtime;
+ unsigned int st_mtime_nsec;
+
+ unsigned int st_ctime;
+ unsigned int st_ctime_nsec;
+
+ unsigned int __unused4;
+ unsigned int __unused5;
+};
+#endif /* defined(__sparc__) && defined(__arch64__) */
+#endif /* __SPARC_STAT_H */
diff --git a/arch/sparc/include/asm/stat_32.h b/arch/sparc/include/asm/stat_32.h
deleted file mode 100644
index 2299e1d5d94c..000000000000
--- a/arch/sparc/include/asm/stat_32.h
+++ /dev/null
@@ -1,76 +0,0 @@
-#ifndef _SPARC_STAT_H
-#define _SPARC_STAT_H
-
-#include <linux/types.h>
-
-struct __old_kernel_stat {
- unsigned short st_dev;
- unsigned short st_ino;
- unsigned short st_mode;
- unsigned short st_nlink;
- unsigned short st_uid;
- unsigned short st_gid;
- unsigned short st_rdev;
- unsigned long st_size;
- unsigned long st_atime;
- unsigned long st_mtime;
- unsigned long st_ctime;
-};
-
-struct stat {
- unsigned short st_dev;
- unsigned long st_ino;
- unsigned short st_mode;
- short st_nlink;
- unsigned short st_uid;
- unsigned short st_gid;
- unsigned short st_rdev;
- long st_size;
- long st_atime;
- unsigned long st_atime_nsec;
- long st_mtime;
- unsigned long st_mtime_nsec;
- long st_ctime;
- unsigned long st_ctime_nsec;
- long st_blksize;
- long st_blocks;
- unsigned long __unused4[2];
-};
-
-#define STAT_HAVE_NSEC 1
-
-struct stat64 {
- unsigned long long st_dev;
-
- unsigned long long st_ino;
-
- unsigned int st_mode;
- unsigned int st_nlink;
-
- unsigned int st_uid;
- unsigned int st_gid;
-
- unsigned long long st_rdev;
-
- unsigned char __pad3[8];
-
- long long st_size;
- unsigned int st_blksize;
-
- unsigned char __pad4[8];
- unsigned int st_blocks;
-
- unsigned int st_atime;
- unsigned int st_atime_nsec;
-
- unsigned int st_mtime;
- unsigned int st_mtime_nsec;
-
- unsigned int st_ctime;
- unsigned int st_ctime_nsec;
-
- unsigned int __unused4;
- unsigned int __unused5;
-};
-
-#endif
diff --git a/arch/sparc/include/asm/stat_64.h b/arch/sparc/include/asm/stat_64.h
deleted file mode 100644
index 9650fdea847f..000000000000
--- a/arch/sparc/include/asm/stat_64.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef _SPARC64_STAT_H
-#define _SPARC64_STAT_H
-
-#include <linux/types.h>
-
-struct stat {
- unsigned st_dev;
- ino_t st_ino;
- mode_t st_mode;
- short st_nlink;
- uid_t st_uid;
- gid_t st_gid;
- unsigned st_rdev;
- off_t st_size;
- time_t st_atime;
- time_t st_mtime;
- time_t st_ctime;
- off_t st_blksize;
- off_t st_blocks;
- unsigned long __unused4[2];
-};
-
-struct stat64 {
- unsigned long st_dev;
- unsigned long st_ino;
- unsigned long st_nlink;
-
- unsigned int st_mode;
- unsigned int st_uid;
- unsigned int st_gid;
- unsigned int __pad0;
-
- unsigned long st_rdev;
- long st_size;
- long st_blksize;
- long st_blocks;
-
- unsigned long st_atime;
- unsigned long st_atime_nsec;
- unsigned long st_mtime;
- unsigned long st_mtime_nsec;
- unsigned long st_ctime;
- unsigned long st_ctime_nsec;
- long __unused[3];
-};
-
-#endif
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index 80fe547c3f45..0f7b0e5fb1c7 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -45,7 +45,7 @@ struct thread_info {
/* A place to store user windows and stack pointers
* when the stack needs inspection.
*/
- struct reg_window reg_window[NSWINS]; /* align for ldd! */
+ struct reg_window32 reg_window[NSWINS]; /* align for ldd! */
unsigned long rwbuf_stkptrs[NSWINS];
unsigned long w_saved;
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 001c04027c82..b8a65b64e1df 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -16,8 +16,12 @@ static inline cpumask_t node_to_cpumask(int node)
{
return numa_cpumask_lookup_table[node];
}
+#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
-/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
+/*
+ * Returns a pointer to the cpumask of CPUs on Node 'node'.
+ * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
+ */
#define node_to_cpumask_ptr(v, node) \
cpumask_t *v = &(numa_cpumask_lookup_table[node])
@@ -26,9 +30,7 @@ static inline cpumask_t node_to_cpumask(int node)
static inline int node_to_first_cpu(int node)
{
- cpumask_t tmp;
- tmp = node_to_cpumask(node);
- return first_cpu(tmp);
+ return cpumask_first(cpumask_of_node(node));
}
struct pci_bus;
@@ -77,10 +79,13 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
#define topology_core_id(cpu) (cpu_data(cpu).core_id)
#define topology_core_siblings(cpu) (cpu_core_map[cpu])
#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
+#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
#define mc_capable() (sparc64_multi_core)
#define smt_capable() (sparc64_multi_core)
#endif /* CONFIG_SMP */
#define cpu_coregroup_map(cpu) (cpu_core_map[cpu])
+#define cpu_coregroup_mask(cpu) (&cpu_core_map[cpu])
#endif /* _ASM_SPARC64_TOPOLOGY_H */
diff --git a/arch/sparc/include/asm/traps.h b/arch/sparc/include/asm/traps.h
index bebdbf8f43a8..3aa62dde343f 100644
--- a/arch/sparc/include/asm/traps.h
+++ b/arch/sparc/include/asm/traps.h
@@ -10,7 +10,7 @@
#define NUM_SPARC_TRAPS 255
#ifndef __ASSEMBLY__
-
+#ifdef __KERNEL__
/* This is for V8 compliant Sparc CPUS */
struct tt_entry {
unsigned long inst_one;
@@ -22,14 +22,7 @@ struct tt_entry {
/* We set this to _start in system setup. */
extern struct tt_entry *sparc_ttable;
-static inline unsigned long get_tbr(void)
-{
- unsigned long tbr;
-
- __asm__ __volatile__("rd %%tbr, %0\n\t" : "=r" (tbr));
- return tbr;
-}
-
+#endif /* (__KERNEL__) */
#endif /* !(__ASSEMBLY__) */
/* For patching the trap table at boot time, we need to know how to
diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c
index 62126e4cec54..f28cb8278e98 100644
--- a/arch/sparc/kernel/init_task.c
+++ b/arch/sparc/kernel/init_task.c
@@ -8,7 +8,6 @@
#include <asm/pgtable.h>
#include <asm/uaccess.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c
index f3488c45d57a..1eff942fe22f 100644
--- a/arch/sparc/kernel/irq_32.c
+++ b/arch/sparc/kernel/irq_32.c
@@ -669,7 +669,9 @@ void __init init_IRQ(void)
btfixup();
}
+#ifdef CONFIG_PROC_FS
void init_irq_proc(void)
{
/* For now, nothing... */
}
+#endif /* CONFIG_PROC_FS */
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index a3ea2bcb95de..cab8e0286871 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -312,7 +312,8 @@ static void sun4u_irq_enable(unsigned int virt_irq)
}
}
-static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
+static void sun4u_set_affinity(unsigned int virt_irq,
+ const struct cpumask *mask)
{
sun4u_irq_enable(virt_irq);
}
@@ -362,7 +363,8 @@ static void sun4v_irq_enable(unsigned int virt_irq)
ino, err);
}
-static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
+static void sun4v_set_affinity(unsigned int virt_irq,
+ const struct cpumask *mask)
{
unsigned int ino = virt_irq_table[virt_irq].dev_ino;
unsigned long cpuid = irq_choose_cpu(virt_irq);
@@ -429,7 +431,8 @@ static void sun4v_virq_enable(unsigned int virt_irq)
dev_handle, dev_ino, err);
}
-static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
+static void sun4v_virt_set_affinity(unsigned int virt_irq,
+ const struct cpumask *mask)
{
unsigned long cpuid, dev_handle, dev_ino;
int err;
@@ -851,7 +854,7 @@ void fixup_irqs(void)
!(irq_desc[irq].status & IRQ_PER_CPU)) {
if (irq_desc[irq].chip->set_affinity)
irq_desc[irq].chip->set_affinity(irq,
- irq_desc[irq].affinity);
+ &irq_desc[irq].affinity);
}
spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
}
diff --git a/arch/sparc/kernel/kgdb_32.c b/arch/sparc/kernel/kgdb_32.c
index 757805ce02ee..04df4edc0073 100644
--- a/arch/sparc/kernel/kgdb_32.c
+++ b/arch/sparc/kernel/kgdb_32.c
@@ -14,14 +14,14 @@ extern unsigned long trapbase;
void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
{
- struct reg_window *win;
+ struct reg_window32 *win;
int i;
gdb_regs[GDB_G0] = 0;
for (i = 0; i < 15; i++)
gdb_regs[GDB_G1 + i] = regs->u_regs[UREG_G1 + i];
- win = (struct reg_window *) regs->u_regs[UREG_FP];
+ win = (struct reg_window32 *) regs->u_regs[UREG_FP];
for (i = 0; i < 8; i++)
gdb_regs[GDB_L0 + i] = win->locals[i];
for (i = 0; i < 8; i++)
@@ -43,7 +43,7 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
{
struct thread_info *t = task_thread_info(p);
- struct reg_window *win;
+ struct reg_window32 *win;
int i;
for (i = GDB_G0; i < GDB_G6; i++)
@@ -55,7 +55,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
gdb_regs[GDB_SP] = t->ksp;
gdb_regs[GDB_O7] = 0;
- win = (struct reg_window *) t->ksp;
+ win = (struct reg_window32 *) t->ksp;
for (i = 0; i < 8; i++)
gdb_regs[GDB_L0 + i] = win->locals[i];
for (i = 0; i < 8; i++)
@@ -77,7 +77,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
{
- struct reg_window *win;
+ struct reg_window32 *win;
int i;
for (i = 0; i < 15; i++)
@@ -96,7 +96,7 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
regs->npc = gdb_regs[GDB_NPC];
regs->y = gdb_regs[GDB_Y];
- win = (struct reg_window *) regs->u_regs[UREG_FP];
+ win = (struct reg_window32 *) regs->u_regs[UREG_FP];
for (i = 0; i < 8; i++)
win->locals[i] = gdb_regs[GDB_L0 + i];
for (i = 0; i < 8; i++)
diff --git a/arch/sparc/kernel/muldiv.c b/arch/sparc/kernel/muldiv.c
index ba960c02bb55..6ce1021d487c 100644
--- a/arch/sparc/kernel/muldiv.c
+++ b/arch/sparc/kernel/muldiv.c
@@ -60,7 +60,7 @@ static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
}
#define fetch_reg(reg, regs) ({ \
- struct reg_window __user *win; \
+ struct reg_window32 __user *win; \
register unsigned long ret; \
\
if (!(reg)) ret = 0; \
@@ -68,7 +68,7 @@ static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
ret = regs->u_regs[(reg)]; \
} else { \
/* Ho hum, the slightly complicated case. */ \
- win = (struct reg_window __user *)regs->u_regs[UREG_FP];\
+ win = (struct reg_window32 __user *)regs->u_regs[UREG_FP];\
if (get_user (ret, &win->locals[(reg) - 16])) return -1;\
} \
ret; \
@@ -77,7 +77,7 @@ static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
static inline int
store_reg(unsigned int result, unsigned int reg, struct pt_regs *regs)
{
- struct reg_window __user *win;
+ struct reg_window32 __user *win;
if (!reg)
return 0;
@@ -86,7 +86,7 @@ store_reg(unsigned int result, unsigned int reg, struct pt_regs *regs)
return 0;
} else {
/* need to use put_user() in this case: */
- win = (struct reg_window __user *) regs->u_regs[UREG_FP];
+ win = (struct reg_window32 __user *) regs->u_regs[UREG_FP];
return (put_user(result, &win->locals[reg - 16]));
}
}
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index 46e231f7c5ce..4873f28905b0 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -778,9 +778,9 @@ static unsigned int __init build_one_device_irq(struct of_device *op,
out:
nid = of_node_to_nid(dp);
if (nid != -1) {
- cpumask_t numa_mask = node_to_cpumask(nid);
+ cpumask_t numa_mask = *cpumask_of_node(nid);
- irq_set_affinity(irq, numa_mask);
+ irq_set_affinity(irq, &numa_mask);
}
return irq;
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
index 2e680f34f727..4ef282e81912 100644
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@ -286,9 +286,9 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
nid = pbm->numa_node;
if (nid != -1) {
- cpumask_t numa_mask = node_to_cpumask(nid);
+ cpumask_t numa_mask = *cpumask_of_node(nid);
- irq_set_affinity(irq, numa_mask);
+ irq_set_affinity(irq, &numa_mask);
}
err = request_irq(irq, sparc64_msiq_interrupt, 0,
"MSIQ",
diff --git a/arch/sparc/kernel/pci_psycho.c b/arch/sparc/kernel/pci_psycho.c
index dfb3ec892987..3b34344082ef 100644
--- a/arch/sparc/kernel/pci_psycho.c
+++ b/arch/sparc/kernel/pci_psycho.c
@@ -307,10 +307,7 @@ static void psycho_register_error_handlers(struct pci_pbm_info *pbm)
/* We really mean to ignore the return result here. Two
* PCI controller share the same interrupt numbers and
- * drive the same front-end hardware. Whichever of the
- * two get in here first will register the IRQ handler
- * the second will just error out since we do not pass in
- * IRQF_SHARED.
+ * drive the same front-end hardware.
*/
err = request_irq(op->irqs[1], psycho_ue_intr, IRQF_SHARED,
"PSYCHO_UE", pbm);
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 69d9315f4a93..5a8d8ced33da 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -180,13 +180,13 @@ static DEFINE_SPINLOCK(sparc_backtrace_lock);
void __show_backtrace(unsigned long fp)
{
- struct reg_window *rw;
+ struct reg_window32 *rw;
unsigned long flags;
int cpu = smp_processor_id();
spin_lock_irqsave(&sparc_backtrace_lock, flags);
- rw = (struct reg_window *)fp;
+ rw = (struct reg_window32 *)fp;
while(rw && (((unsigned long) rw) >= PAGE_OFFSET) &&
!(((unsigned long) rw) & 0x7)) {
printk("CPU[%d]: ARGS[%08lx,%08lx,%08lx,%08lx,%08lx,%08lx] "
@@ -196,7 +196,7 @@ void __show_backtrace(unsigned long fp)
rw->ins[6],
rw->ins[7]);
printk("%pS\n", (void *) rw->ins[7]);
- rw = (struct reg_window *) rw->ins[6];
+ rw = (struct reg_window32 *) rw->ins[6];
}
spin_unlock_irqrestore(&sparc_backtrace_lock, flags);
}
@@ -258,7 +258,7 @@ void show_stackframe(struct sparc_stackf *sf)
void show_regs(struct pt_regs *r)
{
- struct reg_window *rw = (struct reg_window *) r->u_regs[14];
+ struct reg_window32 *rw = (struct reg_window32 *) r->u_regs[14];
printk("PSR: %08lx PC: %08lx NPC: %08lx Y: %08lx %s\n",
r->psr, r->pc, r->npc, r->y, print_tainted());
@@ -287,7 +287,7 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
{
unsigned long pc, fp;
unsigned long task_base;
- struct reg_window *rw;
+ struct reg_window32 *rw;
int count = 0;
if (tsk != NULL)
@@ -301,7 +301,7 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
if (fp < (task_base + sizeof(struct thread_info)) ||
fp >= (task_base + (PAGE_SIZE << 1)))
break;
- rw = (struct reg_window *) fp;
+ rw = (struct reg_window32 *) fp;
pc = rw->ins[7];
printk("[%08lx : ", pc);
printk("%pS ] ", (void *) pc);
@@ -679,7 +679,7 @@ unsigned long get_wchan(struct task_struct *task)
unsigned long pc, fp, bias = 0;
unsigned long task_base = (unsigned long) task;
unsigned long ret = 0;
- struct reg_window *rw;
+ struct reg_window32 *rw;
int count = 0;
if (!task || task == current ||
@@ -692,7 +692,7 @@ unsigned long get_wchan(struct task_struct *task)
if (fp < (task_base + sizeof(struct thread_info)) ||
fp >= (task_base + (2 * PAGE_SIZE)))
break;
- rw = (struct reg_window *) fp;
+ rw = (struct reg_window32 *) fp;
pc = rw->ins[7];
if (!in_sched_functions(pc)) {
ret = pc;
diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c
index 4e9af593db49..ff7b591c8946 100644
--- a/arch/sparc/kernel/prom_common.c
+++ b/arch/sparc/kernel/prom_common.c
@@ -155,20 +155,12 @@ static struct property * __init build_one_prop(phandle node, char *prev,
p->value = prom_early_alloc(special_len);
memcpy(p->value, special_val, special_len);
} else {
-#ifdef CONFIG_SPARC32
- if (prev == NULL) {
- name = prom_firstprop(node, NULL);
- } else {
- name = prom_nextprop(node, prev, NULL);
- }
-#else
if (prev == NULL) {
- prom_firstprop(node, p->name);
+ name = prom_firstprop(node, p->name);
} else {
- prom_nextprop(node, prev, p->name);
+ name = prom_nextprop(node, prev, p->name);
}
- name = p->name;
-#endif
+
if (strlen(name) == 0) {
tmp = p;
return NULL;
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index c94f91c8b6e0..181d069a2d44 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -34,7 +34,7 @@ extern void fpload(unsigned long *fpregs, unsigned long *fsr);
struct signal_frame {
struct sparc_stackf ss;
- __siginfo_t info;
+ __siginfo32_t info;
__siginfo_fpu_t __user *fpu_save;
unsigned long insns[2] __attribute__ ((aligned (8)));
unsigned int extramask[_NSIG_WORDS - 1];
@@ -351,7 +351,7 @@ static void setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
err |= __copy_to_user(sf->extramask, &oldset->sig[1],
(_NSIG_WORDS - 1) * sizeof(unsigned int));
err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
- sizeof(struct reg_window));
+ sizeof(struct reg_window32));
if (err)
goto sigsegv;
@@ -433,7 +433,7 @@ static void setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
err |= __put_user(current->sas_ss_size, &sf->stack.ss_size);
err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
- sizeof(struct reg_window));
+ sizeof(struct reg_window32));
err |= copy_siginfo_to_user(&sf->info, info);
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index e396c1f17a92..1e5ac4e282e1 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -39,8 +39,6 @@ volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,};
unsigned char boot_cpu_id = 0;
unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
cpumask_t smp_commenced_mask = CPU_MASK_NONE;
/* The only guaranteed locking primitive available on all Sparc
@@ -334,7 +332,7 @@ void __init smp_setup_cpu_possible_map(void)
instance = 0;
while (!cpu_find_by_instance(instance, NULL, &mid)) {
if (mid < NR_CPUS) {
- cpu_set(mid, phys_cpu_present_map);
+ cpu_set(mid, cpu_possible_map);
cpu_set(mid, cpu_present_map);
}
instance++;
@@ -354,7 +352,7 @@ void __init smp_prepare_boot_cpu(void)
current_thread_info()->cpu = cpuid;
cpu_set(cpuid, cpu_online_map);
- cpu_set(cpuid, phys_cpu_present_map);
+ cpu_set(cpuid, cpu_possible_map);
}
int __cpuinit __cpu_up(unsigned int cpu)
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index bfe99d82d458..46329799f346 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -49,14 +49,10 @@
int sparc64_multi_core __read_mostly;
-cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
-cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
-EXPORT_SYMBOL(cpu_possible_map);
-EXPORT_SYMBOL(cpu_online_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_SYMBOL(cpu_core_map);
diff --git a/arch/sparc/kernel/sparc_ksyms_32.c b/arch/sparc/kernel/sparc_ksyms_32.c
index a4d45fc29b21..e1e97639231b 100644
--- a/arch/sparc/kernel/sparc_ksyms_32.c
+++ b/arch/sparc/kernel/sparc_ksyms_32.c
@@ -112,10 +112,6 @@ EXPORT_PER_CPU_SYMBOL(__cpu_data);
#ifdef CONFIG_SMP
/* IRQ implementation. */
EXPORT_SYMBOL(synchronize_irq);
-
-/* CPU online map and active count. */
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(phys_cpu_present_map);
#endif
EXPORT_SYMBOL(__udelay);
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index 141da3759091..9df8f095a8b1 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -763,7 +763,7 @@ void __devinit setup_sparc64_timer(void)
sevt = &__get_cpu_var(sparc64_events);
memcpy(sevt, &sparc64_clockevent, sizeof(*sevt));
- sevt->cpumask = cpumask_of_cpu(smp_processor_id());
+ sevt->cpumask = cpumask_of(smp_processor_id());
clockevents_register_device(sevt);
}
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index 716f3946c494..213645be6e92 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -67,7 +67,7 @@ void die_if_kernel(char *str, struct pt_regs *regs)
__RESTORE; __RESTORE; __RESTORE; __RESTORE;
{
- struct reg_window *rw = (struct reg_window *)regs->u_regs[UREG_FP];
+ struct reg_window32 *rw = (struct reg_window32 *)regs->u_regs[UREG_FP];
/* Stop the back trace when we hit userland or we
* find some badly aligned kernel stack. Set an upper
@@ -79,7 +79,7 @@ void die_if_kernel(char *str, struct pt_regs *regs)
!(((unsigned long) rw) & 0x7)) {
printk("Caller[%08lx]: %pS\n", rw->ins[7],
(void *) rw->ins[7]);
- rw = (struct reg_window *)rw->ins[6];
+ rw = (struct reg_window32 *)rw->ins[6];
}
}
printk("Instruction DUMP:");
diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c
index c2a28c5ad650..6b1e6cde6fff 100644
--- a/arch/sparc/kernel/unaligned_32.c
+++ b/arch/sparc/kernel/unaligned_32.c
@@ -97,26 +97,26 @@ static inline int sign_extend_imm13(int imm)
static inline unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
{
- struct reg_window *win;
+ struct reg_window32 *win;
if(reg < 16)
return (!reg ? 0 : regs->u_regs[reg]);
/* Ho hum, the slightly complicated case. */
- win = (struct reg_window *) regs->u_regs[UREG_FP];
+ win = (struct reg_window32 *) regs->u_regs[UREG_FP];
return win->locals[reg - 16]; /* yes, I know what this does... */
}
static inline unsigned long safe_fetch_reg(unsigned int reg, struct pt_regs *regs)
{
- struct reg_window __user *win;
+ struct reg_window32 __user *win;
unsigned long ret;
if (reg < 16)
return (!reg ? 0 : regs->u_regs[reg]);
/* Ho hum, the slightly complicated case. */
- win = (struct reg_window __user *) regs->u_regs[UREG_FP];
+ win = (struct reg_window32 __user *) regs->u_regs[UREG_FP];
if ((unsigned long)win & 3)
return -1;
@@ -129,11 +129,11 @@ static inline unsigned long safe_fetch_reg(unsigned int reg, struct pt_regs *reg
static inline unsigned long *fetch_reg_addr(unsigned int reg, struct pt_regs *regs)
{
- struct reg_window *win;
+ struct reg_window32 *win;
if(reg < 16)
return &regs->u_regs[reg];
- win = (struct reg_window *) regs->u_regs[UREG_FP];
+ win = (struct reg_window32 *) regs->u_regs[UREG_FP];
return &win->locals[reg - 16];
}
diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c
index 9cc93eaa4abf..f24d298bda29 100644
--- a/arch/sparc/kernel/windows.c
+++ b/arch/sparc/kernel/windows.c
@@ -42,7 +42,7 @@ static inline void shift_window_buffer(int first_win, int last_win, struct threa
for(i = first_win; i < last_win; i++) {
tp->rwbuf_stkptrs[i] = tp->rwbuf_stkptrs[i+1];
- memcpy(&tp->reg_window[i], &tp->reg_window[i+1], sizeof(struct reg_window));
+ memcpy(&tp->reg_window[i], &tp->reg_window[i+1], sizeof(struct reg_window32));
}
}
@@ -70,7 +70,7 @@ void synchronize_user_stack(void)
/* Ok, let it rip. */
if (copy_to_user((char __user *) sp, &tp->reg_window[window],
- sizeof(struct reg_window)))
+ sizeof(struct reg_window32)))
continue;
shift_window_buffer(window, tp->w_saved - 1, tp);
@@ -119,7 +119,7 @@ void try_to_clear_window_buffer(struct pt_regs *regs, int who)
if ((sp & 7) ||
copy_to_user((char __user *) sp, &tp->reg_window[window],
- sizeof(struct reg_window)))
+ sizeof(struct reg_window32)))
do_exit(SIGILL);
}
tp->w_saved = 0;
diff --git a/arch/um/Makefile b/arch/um/Makefile
index d944c343acdb..0728def32234 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -22,10 +22,11 @@ MODE_INCLUDE += -I$(srctree)/$(ARCH_DIR)/include/shared/skas
include $(srctree)/$(ARCH_DIR)/Makefile-skas
-ARCH_INCLUDE := -I$(srctree)/$(ARCH_DIR)/include/shared
+SHARED_HEADERS := $(ARCH_DIR)/include/shared
+ARCH_INCLUDE := -I$(srctree)/$(SHARED_HEADERS)
ARCH_INCLUDE += -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH)/shared
ifneq ($(KBUILD_SRC),)
-ARCH_INCLUDE += -I$(ARCH_DIR)/include/shared # for two generated files
+ARCH_INCLUDE += -I$(SHARED_HEADERS)
endif
KBUILD_CPPFLAGS += -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH)
@@ -85,8 +86,8 @@ endef
KBUILD_KCONFIG := arch/um/Kconfig.$(HEADER_ARCH)
-archprepare: $(ARCH_DIR)/include/shared/user_constants.h
-prepare: $(ARCH_DIR)/include/shared/kern_constants.h
+archprepare: $(SHARED_HEADERS)/user_constants.h
+archprepare: $(SHARED_HEADERS)/kern_constants.h
LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib
@@ -119,17 +120,13 @@ endef
# When cleaning we don't include .config, so we don't include
# TT or skas makefiles and don't clean skas_ptregs.h.
CLEAN_FILES += linux x.i gmon.out \
- $(ARCH_DIR)/include/shared/user_constants.h \
- $(ARCH_DIR)/include/shared/kern_constants.h
+ $(SHARED_HEADERS)/user_constants.h \
+ $(SHARED_HEADERS)/kern_constants.h
archclean:
@find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
-o -name '*.gcov' \) -type f -print | xargs rm -f
-$(objtree)/$(ARCH_DIR)/include/shared:
- @echo ' MKDIR $@'
- $(Q)mkdir -p $@
-
# Generated files
$(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s: FORCE
@@ -148,11 +145,11 @@ define filechk_gen-asm-offsets
echo ""; )
endef
-$(ARCH_DIR)/include/shared/user_constants.h: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s
+$(SHARED_HEADERS)/user_constants.h: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s
$(call filechk,gen-asm-offsets)
-$(ARCH_DIR)/include/shared/kern_constants.h: $(objtree)/$(ARCH_DIR)/include/shared
- @echo ' SYMLINK $@'
- $(Q)ln -sf ../../../../include/asm/asm-offsets.h $@
+$(SHARED_HEADERS)/kern_constants.h:
+ $(Q)mkdir -p $(dir $@)
+ $(Q)echo '#include "../../../../include/asm/asm-offsets.h"' >$@
export SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS HEADER_ARCH DEV_NULL_PATH
diff --git a/arch/um/include/asm/system.h b/arch/um/include/asm/system.h
index ae5f94d6317d..753346e2cdfd 100644
--- a/arch/um/include/asm/system.h
+++ b/arch/um/include/asm/system.h
@@ -11,21 +11,21 @@ extern int get_signals(void);
extern void block_signals(void);
extern void unblock_signals(void);
-#define raw_local_save_flags(flags) do { typecheck(unsigned long, flags); \
+#define local_save_flags(flags) do { typecheck(unsigned long, flags); \
(flags) = get_signals(); } while(0)
-#define raw_local_irq_restore(flags) do { typecheck(unsigned long, flags); \
+#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \
set_signals(flags); } while(0)
-#define raw_local_irq_save(flags) do { raw_local_save_flags(flags); \
- raw_local_irq_disable(); } while(0)
+#define local_irq_save(flags) do { local_save_flags(flags); \
+ local_irq_disable(); } while(0)
-#define raw_local_irq_enable() unblock_signals()
-#define raw_local_irq_disable() block_signals()
+#define local_irq_enable() unblock_signals()
+#define local_irq_disable() block_signals()
#define irqs_disabled() \
({ \
unsigned long flags; \
- raw_local_save_flags(flags); \
+ local_save_flags(flags); \
(flags == 0); \
})
diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c
index 910eda8fca18..806d381947bf 100644
--- a/arch/um/kernel/init_task.c
+++ b/arch/um/kernel/init_task.c
@@ -10,7 +10,6 @@
#include "linux/mqueue.h"
#include "asm/uaccess.h"
-static struct fs_struct init_fs = INIT_FS;
struct mm_struct init_mm = INIT_MM(init_mm);
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index 045772142844..98351c78bc81 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -25,13 +25,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
#include "irq_user.h"
#include "os.h"
-/* CPU online map, set by smp_boot_cpus */
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_possible_map);
-
/* Per CPU bogomips and other parameters
* The only piece used here is the ipi pipe, which is set before SMP is
* started and never changed.
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 47f04f4a3464..b13a87a3ec95 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -50,7 +50,7 @@ static int itimer_next_event(unsigned long delta,
static struct clock_event_device itimer_clockevent = {
.name = "itimer",
.rating = 250,
- .cpumask = CPU_MASK_ALL,
+ .cpumask = cpu_all_mask,
.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
.set_mode = itimer_set_mode,
.set_next_event = itimer_next_event,
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0f44add3e0b7..862adb9bf0d4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -586,6 +586,16 @@ config AMD_IOMMU
your BIOS for an option to enable it or if you have an IVRS ACPI
table.
+config AMD_IOMMU_STATS
+ bool "Export AMD IOMMU statistics to debugfs"
+ depends on AMD_IOMMU
+ select DEBUG_FS
+ help
+ This option enables code in the AMD IOMMU driver to collect various
+ statistics about whats happening in the driver and exports that
+ information to userspace via debugfs.
+ If unsure, say N.
+
# need this always selected by IOMMU for the VIA workaround
config SWIOTLB
def_bool y if X86_64
@@ -599,21 +609,25 @@ config SWIOTLB
config IOMMU_HELPER
def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
+config IOMMU_API
+ def_bool (AMD_IOMMU || DMAR)
+
config MAXSMP
bool "Configure Maximum number of SMP Processors and NUMA Nodes"
- depends on X86_64 && SMP && BROKEN
+ depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
+ select CPUMASK_OFFSTACK
default n
help
Configure maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
config NR_CPUS
- int "Maximum number of CPUs (2-512)" if !MAXSMP
- range 2 512
- depends on SMP
+ int "Maximum number of CPUs" if SMP && !MAXSMP
+ range 2 512 if SMP && !MAXSMP
+ default "1" if !SMP
default "4096" if MAXSMP
- default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
- default "8"
+ default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000)
+ default "8" if SMP
help
This allows you to specify the maximum number of CPUs which this
kernel will support. The maximum supported value is 512 and the
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 85a78575956c..8078955845ae 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -408,7 +408,7 @@ config X86_MINIMUM_CPU_FAMILY
config X86_DEBUGCTLMSR
def_bool y
- depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
+ depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) && !UML
menuconfig PROCESSOR_SELECT
bool "Supported processor vendors" if EMBEDDED
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index b195f85526e3..9dabd00e9805 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -24,15 +24,14 @@
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
-#include <asm/ia32.h>
#include <asm/ptrace.h>
#include <asm/ia32_unistd.h>
#include <asm/user32.h>
#include <asm/sigcontext32.h>
#include <asm/proto.h>
#include <asm/vdso.h>
-
#include <asm/sigframe.h>
+#include <asm/sys_ia32.h>
#define DEBUG_SIG 0
diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c
index d21991ce606c..29cdcd02ead3 100644
--- a/arch/x86/ia32/ipc32.c
+++ b/arch/x86/ia32/ipc32.c
@@ -8,6 +8,7 @@
#include <linux/shm.h>
#include <linux/ipc.h>
#include <linux/compat.h>
+#include <asm/sys_ia32.h>
asmlinkage long sys32_ipc(u32 call, int first, int second, int third,
compat_uptr_t ptr, u32 fifth)
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 2e09dcd3c0a6..6c0d7f6231af 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -44,8 +44,8 @@
#include <asm/types.h>
#include <asm/uaccess.h>
#include <asm/atomic.h>
-#include <asm/ia32.h>
#include <asm/vgtod.h>
+#include <asm/sys_ia32.h>
#define AA(__x) ((unsigned long)(__x))
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index ac302a2fa339..95c8cd9d22b5 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -190,16 +190,23 @@
/* FIXME: move this macro to <linux/pci.h> */
#define PCI_BUS(x) (((x) >> 8) & 0xff)
+/* Protection domain flags */
+#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
+#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
+ domain for an IOMMU */
+
/*
* This structure contains generic data for IOMMU protection domains
* independent of their use.
*/
struct protection_domain {
- spinlock_t lock; /* mostly used to lock the page table*/
- u16 id; /* the domain id written to the device table */
- int mode; /* paging mode (0-6 levels) */
- u64 *pt_root; /* page table root pointer */
- void *priv; /* private data */
+ spinlock_t lock; /* mostly used to lock the page table*/
+ u16 id; /* the domain id written to the device table */
+ int mode; /* paging mode (0-6 levels) */
+ u64 *pt_root; /* page table root pointer */
+ unsigned long flags; /* flags to find out type of domain */
+ unsigned dev_cnt; /* devices assigned to this domain */
+ void *priv; /* private data */
};
/*
@@ -295,7 +302,7 @@ struct amd_iommu {
bool int_enabled;
/* if one, we need to send a completion wait command */
- int need_sync;
+ bool need_sync;
/* default dma_ops domain for that IOMMU */
struct dma_ops_domain *default_dom;
@@ -374,7 +381,7 @@ extern struct protection_domain **amd_iommu_pd_table;
extern unsigned long *amd_iommu_pd_alloc_bitmap;
/* will be 1 if device isolation is enabled */
-extern int amd_iommu_isolate;
+extern bool amd_iommu_isolate;
/*
* If true, the addresses will be flushed on unmap time, not when
@@ -382,18 +389,6 @@ extern int amd_iommu_isolate;
*/
extern bool amd_iommu_unmap_flush;
-/* takes a PCI device id and prints it out in a readable form */
-static inline void print_devid(u16 devid, int nl)
-{
- int bus = devid >> 8;
- int dev = devid >> 3 & 0x1f;
- int fn = devid & 0x07;
-
- printk("%02x:%02x.%x", bus, dev, fn);
- if (nl)
- printk("\n");
-}
-
/* takes bus and device/function and returns the device id
* FIXME: should that be in generic PCI code? */
static inline u16 calc_devid(u8 bus, u8 devfn)
@@ -401,4 +396,32 @@ static inline u16 calc_devid(u8 bus, u8 devfn)
return (((u16)bus) << 8) | devfn;
}
+#ifdef CONFIG_AMD_IOMMU_STATS
+
+struct __iommu_counter {
+ char *name;
+ struct dentry *dent;
+ u64 value;
+};
+
+#define DECLARE_STATS_COUNTER(nm) \
+ static struct __iommu_counter nm = { \
+ .name = #nm, \
+ }
+
+#define INC_STATS_COUNTER(name) name.value += 1
+#define ADD_STATS_COUNTER(name, x) name.value += (x)
+#define SUB_STATS_COUNTER(name, x) name.value -= (x)
+
+#else /* CONFIG_AMD_IOMMU_STATS */
+
+#define DECLARE_STATS_COUNTER(name)
+#define INC_STATS_COUNTER(name)
+#define ADD_STATS_COUNTER(name, x)
+#define SUB_STATS_COUNTER(name, x)
+
+static inline void amd_iommu_stats_init(void) { }
+
+#endif /* CONFIG_AMD_IOMMU_STATS */
+
#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 25caa0738af5..ab1d51a8855e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -54,7 +54,6 @@ extern int disable_apic;
extern int is_vsmp_box(void);
extern void xapic_wait_icr_idle(void);
extern u32 safe_xapic_wait_icr_idle(void);
-extern u64 xapic_icr_read(void);
extern void xapic_icr_write(u32, u32);
extern int setup_profiling_timer(unsigned int);
@@ -93,7 +92,7 @@ static inline u32 native_apic_msr_read(u32 reg)
}
#ifndef CONFIG_X86_32
-extern int x2apic, x2apic_preenabled;
+extern int x2apic;
extern void check_x2apic(void);
extern void enable_x2apic(void);
extern void enable_IR_x2apic(void);
diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index ce547f24a1cd..d8dd9f537911 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -9,12 +9,12 @@ static inline int apic_id_registered(void)
return (1);
}
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
{
#ifdef CONFIG_SMP
- return cpu_online_map;
+ return &cpu_online_map;
#else
- return cpumask_of_cpu(0);
+ return &cpumask_of_cpu(0);
#endif
}
@@ -79,7 +79,7 @@ static inline int apicid_to_node(int logical_apicid)
static inline int cpu_present_to_apicid(int mps_cpu)
{
- if (mps_cpu < NR_CPUS)
+ if (mps_cpu < nr_cpu_ids)
return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
return BAD_APICID;
@@ -94,7 +94,7 @@ extern u8 cpu_2_logical_apicid[];
/* Mapping from cpu number to logical apicid */
static inline int cpu_to_logical_apicid(int cpu)
{
- if (cpu >= NR_CPUS)
+ if (cpu >= nr_cpu_ids)
return BAD_APICID;
return cpu_physical_id(cpu);
}
@@ -119,16 +119,34 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
}
/* As we are using single CPU as destination, pick only one CPU here */
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
{
int cpu;
int apicid;
- cpu = first_cpu(cpumask);
+ cpu = first_cpu(*cpumask);
apicid = cpu_to_logical_apicid(cpu);
return apicid;
}
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask)
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ if (cpu < nr_cpu_ids)
+ return cpu_to_logical_apicid(cpu);
+
+ return BAD_APICID;
+}
+
static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
{
return cpuid_apic >> index_msb;
diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h
index 9404c535b7ec..27fcd01b3ae6 100644
--- a/arch/x86/include/asm/bigsmp/ipi.h
+++ b/arch/x86/include/asm/bigsmp/ipi.h
@@ -1,25 +1,22 @@
#ifndef __ASM_MACH_IPI_H
#define __ASM_MACH_IPI_H
-void send_IPI_mask_sequence(cpumask_t mask, int vector);
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
{
send_IPI_mask_sequence(mask, vector);
}
static inline void send_IPI_allbutself(int vector)
{
- cpumask_t mask = cpu_online_map;
- cpu_clear(smp_processor_id(), mask);
-
- if (!cpus_empty(mask))
- send_IPI_mask(mask, vector);
+ send_IPI_mask_allbutself(cpu_online_mask, vector);
}
static inline void send_IPI_all(int vector)
{
- send_IPI_mask(cpu_online_map, vector);
+ send_IPI_mask(cpu_online_mask, vector);
}
#endif /* __ASM_MACH_IPI_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e6b82b17b072..dc27705f5443 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -320,16 +320,14 @@ static inline void set_intr_gate(unsigned int n, void *addr)
_set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
}
-#define SYS_VECTOR_FREE 0
-#define SYS_VECTOR_ALLOCED 1
-
extern int first_system_vector;
-extern char system_vectors[];
+/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
+extern unsigned long used_vectors[];
static inline void alloc_system_vector(int vector)
{
- if (system_vectors[vector] == SYS_VECTOR_FREE) {
- system_vectors[vector] = SYS_VECTOR_ALLOCED;
+ if (!test_bit(vector, used_vectors)) {
+ set_bit(vector, used_vectors);
if (first_system_vector > vector)
first_system_vector = vector;
} else
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index a2e545c91c35..ca5ffb2856b6 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size);
#endif /* CONFIG_X86_32 */
+extern int add_efi_memmap;
extern void efi_reserve_early(void);
extern void efi_call_phys_prelog(void);
extern void efi_call_phys_epilog(void);
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index e24ef876915f..bc53d5ef1386 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -9,14 +9,14 @@ static inline int apic_id_registered(void)
return (1);
}
-static inline cpumask_t target_cpus_cluster(void)
+static inline const cpumask_t *target_cpus_cluster(void)
{
- return CPU_MASK_ALL;
+ return &CPU_MASK_ALL;
}
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
{
- return cpumask_of_cpu(smp_processor_id());
+ return &cpumask_of_cpu(smp_processor_id());
}
#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER)
@@ -80,9 +80,10 @@ extern int apic_version [MAX_APICS];
static inline void setup_apic_routing(void)
{
int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
- printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
+ printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
(apic_version[apic] == 0x14) ?
- "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]);
+ "Physical Cluster" : "Logical Cluster",
+ nr_ioapics, cpus_addr(*target_cpus())[0]);
}
static inline int multi_timer_check(int apic, int irq)
@@ -100,7 +101,7 @@ static inline int cpu_present_to_apicid(int mps_cpu)
{
if (!mps_cpu)
return boot_cpu_physical_apicid;
- else if (mps_cpu < NR_CPUS)
+ else if (mps_cpu < nr_cpu_ids)
return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
else
return BAD_APICID;
@@ -120,9 +121,9 @@ extern u8 cpu_2_logical_apicid[];
static inline int cpu_to_logical_apicid(int cpu)
{
#ifdef CONFIG_SMP
- if (cpu >= NR_CPUS)
- return BAD_APICID;
- return (int)cpu_2_logical_apicid[cpu];
+ if (cpu >= nr_cpu_ids)
+ return BAD_APICID;
+ return (int)cpu_2_logical_apicid[cpu];
#else
return logical_smp_processor_id();
#endif
@@ -146,25 +147,26 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid)
return (1);
}
-static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
+static inline unsigned int
+cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
{
int num_bits_set;
int cpus_found = 0;
int cpu;
int apicid;
- num_bits_set = cpus_weight(cpumask);
+ num_bits_set = cpumask_weight(cpumask);
/* Return id to all */
- if (num_bits_set == NR_CPUS)
+ if (num_bits_set == nr_cpu_ids)
return 0xFF;
/*
* The cpus in the mask must all be on the apic cluster. If are not
* on the same apicid cluster return default value of TARGET_CPUS.
*/
- cpu = first_cpu(cpumask);
+ cpu = cpumask_first(cpumask);
apicid = cpu_to_logical_apicid(cpu);
while (cpus_found < num_bits_set) {
- if (cpu_isset(cpu, cpumask)) {
+ if (cpumask_test_cpu(cpu, cpumask)) {
int new_apicid = cpu_to_logical_apicid(cpu);
if (apicid_cluster(apicid) !=
apicid_cluster(new_apicid)){
@@ -179,25 +181,25 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
return apicid;
}
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
{
int num_bits_set;
int cpus_found = 0;
int cpu;
int apicid;
- num_bits_set = cpus_weight(cpumask);
+ num_bits_set = cpus_weight(*cpumask);
/* Return id to all */
- if (num_bits_set == NR_CPUS)
+ if (num_bits_set == nr_cpu_ids)
return cpu_to_logical_apicid(0);
/*
* The cpus in the mask must all be on the apic cluster. If are not
* on the same apicid cluster return default value of TARGET_CPUS.
*/
- cpu = first_cpu(cpumask);
+ cpu = first_cpu(*cpumask);
apicid = cpu_to_logical_apicid(cpu);
while (cpus_found < num_bits_set) {
- if (cpu_isset(cpu, cpumask)) {
+ if (cpu_isset(cpu, *cpumask)) {
int new_apicid = cpu_to_logical_apicid(cpu);
if (apicid_cluster(apicid) !=
apicid_cluster(new_apicid)){
@@ -212,6 +214,24 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
return apicid;
}
+
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
+ const struct cpumask *andmask)
+{
+ int apicid = cpu_to_logical_apicid(0);
+ cpumask_var_t cpumask;
+
+ if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
+ return apicid;
+
+ cpumask_and(cpumask, inmask, andmask);
+ cpumask_and(cpumask, cpumask, cpu_online_mask);
+ apicid = cpu_mask_to_apicid(cpumask);
+
+ free_cpumask_var(cpumask);
+ return apicid;
+}
+
static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
{
return cpuid_apic >> index_msb;
diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h
index 632a955fcc0a..7e8ed24d4b8a 100644
--- a/arch/x86/include/asm/es7000/ipi.h
+++ b/arch/x86/include/asm/es7000/ipi.h
@@ -1,24 +1,22 @@
#ifndef __ASM_ES7000_IPI_H
#define __ASM_ES7000_IPI_H
-void send_IPI_mask_sequence(cpumask_t mask, int vector);
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
{
send_IPI_mask_sequence(mask, vector);
}
static inline void send_IPI_allbutself(int vector)
{
- cpumask_t mask = cpu_online_map;
- cpu_clear(smp_processor_id(), mask);
- if (!cpus_empty(mask))
- send_IPI_mask(mask, vector);
+ send_IPI_mask_allbutself(cpu_online_mask, vector);
}
static inline void send_IPI_all(int vector)
{
- send_IPI_mask(cpu_online_map, vector);
+ send_IPI_mask(cpu_online_mask, vector);
}
#endif /* __ASM_ES7000_IPI_H */
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 0ac17d33a8c7..746f37a7963a 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -24,7 +24,7 @@ struct genapic {
int (*probe)(void);
int (*apic_id_registered)(void);
- cpumask_t (*target_cpus)(void);
+ const struct cpumask *(*target_cpus)(void);
int int_delivery_mode;
int int_dest_mode;
int ESR_DISABLE;
@@ -57,12 +57,16 @@ struct genapic {
unsigned (*get_apic_id)(unsigned long x);
unsigned long apic_id_mask;
- unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
- cpumask_t (*vector_allocation_domain)(int cpu);
+ unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
+ unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+ const struct cpumask *andmask);
+ void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
#ifdef CONFIG_SMP
/* ipi */
- void (*send_IPI_mask)(cpumask_t mask, int vector);
+ void (*send_IPI_mask)(const struct cpumask *mask, int vector);
+ void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
+ int vector);
void (*send_IPI_allbutself)(int vector);
void (*send_IPI_all)(int vector);
#endif
@@ -114,6 +118,7 @@ struct genapic {
APICFUNC(get_apic_id) \
.apic_id_mask = APIC_ID_MASK, \
APICFUNC(cpu_mask_to_apicid) \
+ APICFUNC(cpu_mask_to_apicid_and) \
APICFUNC(vector_allocation_domain) \
APICFUNC(acpi_madt_oem_check) \
IPIFUNC(send_IPI_mask) \
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 2cae011668b7..adf32fb56aa6 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_GENAPIC_64_H
#define _ASM_X86_GENAPIC_64_H
+#include <linux/cpumask.h>
+
/*
* Copyright 2004 James Cleverdon, IBM.
* Subject to the GNU Public License, v.2
@@ -18,16 +20,20 @@ struct genapic {
u32 int_delivery_mode;
u32 int_dest_mode;
int (*apic_id_registered)(void);
- cpumask_t (*target_cpus)(void);
- cpumask_t (*vector_allocation_domain)(int cpu);
+ const struct cpumask *(*target_cpus)(void);
+ void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
void (*init_apic_ldr)(void);
/* ipi */
- void (*send_IPI_mask)(cpumask_t mask, int vector);
+ void (*send_IPI_mask)(const struct cpumask *mask, int vector);
+ void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
+ int vector);
void (*send_IPI_allbutself)(int vector);
void (*send_IPI_all)(int vector);
void (*send_IPI_self)(int vector);
/* */
- unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
+ unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
+ unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+ const struct cpumask *andmask);
unsigned int (*phys_pkg_id)(int index_msb);
unsigned int (*get_apic_id)(unsigned long x);
unsigned long (*set_apic_id)(unsigned int id);
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index f89dffb28aa9..c745a306f7d3 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -117,7 +117,8 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
native_apic_mem_write(APIC_ICR, cfg);
}
-static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
+static inline void send_IPI_mask_sequence(const struct cpumask *mask,
+ int vector)
{
unsigned long flags;
unsigned long query_cpu;
@@ -128,11 +129,29 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
* - mbligh
*/
local_irq_save(flags);
- for_each_cpu_mask_nr(query_cpu, mask) {
+ for_each_cpu(query_cpu, mask) {
__send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
vector, APIC_DEST_PHYSICAL);
}
local_irq_restore(flags);
}
+static inline void send_IPI_mask_allbutself(const struct cpumask *mask,
+ int vector)
+{
+ unsigned long flags;
+ unsigned int query_cpu;
+ unsigned int this_cpu = smp_processor_id();
+
+ /* See Hack comment above */
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask)
+ if (query_cpu != this_cpu)
+ __send_IPI_dest_field(
+ per_cpu(x86_cpu_to_apicid, query_cpu),
+ vector, APIC_DEST_PHYSICAL);
+ local_irq_restore(flags);
+}
+
#endif /* _ASM_X86_IPI_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 28e409fc73f3..592688ed04d3 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -33,7 +33,7 @@ static inline int irq_canonicalize(int irq)
#ifdef CONFIG_HOTPLUG_CPU
#include <linux/cpumask.h>
-extern void fixup_irqs(cpumask_t map);
+extern void fixup_irqs(void);
#endif
extern unsigned int do_IRQ(struct pt_regs *regs);
@@ -42,5 +42,6 @@ extern void native_init_IRQ(void);
/* Interrupt vector management */
extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
+extern int vector_used_by_percpu_irq(unsigned int vector);
#endif /* _ASM_X86_IRQ_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8346be87cfa1..730843d1d2fb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -21,6 +21,7 @@
#include <asm/pvclock-abi.h>
#include <asm/desc.h>
+#include <asm/mtrr.h>
#define KVM_MAX_VCPUS 16
#define KVM_MEMORY_SLOTS 32
@@ -86,6 +87,7 @@
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
#define KVM_MAX_CPUID_ENTRIES 40
+#define KVM_NR_FIXED_MTRR_REGION 88
#define KVM_NR_VAR_MTRR 8
extern spinlock_t kvm_lock;
@@ -180,6 +182,8 @@ struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
+ struct list_head oos_link;
+
/*
* The following two entries are used to key the shadow page in the
* hash table.
@@ -190,13 +194,16 @@ struct kvm_mmu_page {
u64 *spt;
/* hold the gfn of each spte inside spt */
gfn_t *gfns;
- unsigned long slot_bitmap; /* One bit set per slot which has memory
- * in this shadow page.
- */
+ /*
+ * One bit set per slot which has memory
+ * in this shadow page.
+ */
+ DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
int multimapped; /* More than one parent_pte? */
int root_count; /* Currently serving as active root */
bool unsync;
- bool unsync_children;
+ bool global;
+ unsigned int unsync_children;
union {
u64 *parent_pte; /* !multimapped */
struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
@@ -327,8 +334,10 @@ struct kvm_vcpu_arch {
bool nmi_pending;
bool nmi_injected;
+ bool nmi_window_open;
- u64 mtrr[0x100];
+ struct mtrr_state_type mtrr_state;
+ u32 pat;
};
struct kvm_mem_alias {
@@ -350,11 +359,13 @@ struct kvm_arch{
*/
struct list_head active_mmu_pages;
struct list_head assigned_dev_head;
- struct dmar_domain *intel_iommu_domain;
+ struct list_head oos_global_pages;
+ struct iommu_domain *iommu_domain;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
struct hlist_head irq_ack_notifier_list;
+ int vapics_in_nmi_mode;
int round_robin_prev_vcpu;
unsigned int tss_addr;
@@ -378,6 +389,7 @@ struct kvm_vm_stat {
u32 mmu_recycled;
u32 mmu_cache_miss;
u32 mmu_unsync;
+ u32 mmu_unsync_global;
u32 remote_tlb_flush;
u32 lpages;
};
@@ -397,6 +409,7 @@ struct kvm_vcpu_stat {
u32 halt_exits;
u32 halt_wakeup;
u32 request_irq_exits;
+ u32 request_nmi_exits;
u32 irq_exits;
u32 host_state_reload;
u32 efer_reload;
@@ -405,6 +418,7 @@ struct kvm_vcpu_stat {
u32 insn_emulation_fail;
u32 hypercalls;
u32 irq_injections;
+ u32 nmi_injections;
};
struct descriptor_table {
@@ -477,6 +491,7 @@ struct kvm_x86_ops {
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
+ int (*get_mt_mask_shift)(void);
};
extern struct kvm_x86_ops *kvm_x86_ops;
@@ -490,7 +505,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
void kvm_mmu_set_base_ptes(u64 base_pte);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
- u64 dirty_mask, u64 nx_mask, u64 x_mask);
+ u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask);
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
@@ -587,12 +602,14 @@ unsigned long segment_base(u16 selector);
void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
- const u8 *new, int bytes);
+ const u8 *new, int bytes,
+ bool guest_initiated);
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
@@ -607,6 +624,8 @@ void kvm_disable_tdp(void);
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
int complete_pio(struct kvm_vcpu *vcpu);
+struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
+
static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
{
struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
@@ -702,18 +721,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
}
-#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
-#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
-#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
-#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
-#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
-#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
-#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
-#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
-
#define MSR_IA32_TIME_STAMP_COUNTER 0x010
#define TSS_IOPB_BASE_OFFSET 0x66
diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h
index 25179a29f208..6a159732881a 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -123,6 +123,7 @@ struct decode_cache {
u8 ad_bytes;
u8 rex_prefix;
struct operand src;
+ struct operand src2;
struct operand dst;
bool has_seg_override;
u8 seg_override;
@@ -146,22 +147,18 @@ struct x86_emulate_ctxt {
/* Register state before/after emulation. */
struct kvm_vcpu *vcpu;
- /* Linear faulting address (if emulating a page-faulting instruction) */
unsigned long eflags;
-
/* Emulated execution mode, represented by an X86EMUL_MODE value. */
int mode;
-
u32 cs_base;
/* decode cache */
-
struct decode_cache decode;
};
/* Repeat String Operation Prefix */
-#define REPE_PREFIX 1
-#define REPNE_PREFIX 2
+#define REPE_PREFIX 1
+#define REPNE_PREFIX 2
/* Execution mode, passed to the emulator. */
#define X86EMUL_MODE_REAL 0 /* Real mode. */
@@ -170,7 +167,7 @@ struct x86_emulate_ctxt {
#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
/* Host execution mode. */
-#if defined(__i386__)
+#if defined(CONFIG_X86_32)
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
#elif defined(CONFIG_X86_64)
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h
index d28a507cef39..1caf57628b9c 100644
--- a/arch/x86/include/asm/lguest.h
+++ b/arch/x86/include/asm/lguest.h
@@ -15,7 +15,7 @@
#define SHARED_SWITCHER_PAGES \
DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE)
/* Pages for switcher itself, then two pages per cpu */
-#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * NR_CPUS)
+#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids)
/* We map at -4M for ease of mapping into the guest (one PTE page). */
#define SWITCHER_ADDR 0xFFC00000
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index 6cb3a467e067..cc09cbbee27e 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -8,12 +8,12 @@
#define APIC_DFR_VALUE (APIC_DFR_FLAT)
-static inline cpumask_t target_cpus(void)
+static inline const struct cpumask *target_cpus(void)
{
#ifdef CONFIG_SMP
- return cpu_online_map;
+ return cpu_online_mask;
#else
- return cpumask_of_cpu(0);
+ return cpumask_of(0);
#endif
}
@@ -28,6 +28,7 @@ static inline cpumask_t target_cpus(void)
#define apic_id_registered (genapic->apic_id_registered)
#define init_apic_ldr (genapic->init_apic_ldr)
#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
#define phys_pkg_id (genapic->phys_pkg_id)
#define vector_allocation_domain (genapic->vector_allocation_domain)
#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
@@ -61,9 +62,19 @@ static inline int apic_id_registered(void)
return physid_isset(read_apic_id(), phys_cpu_present_map);
}
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask)
{
- return cpus_addr(cpumask)[0];
+ return cpumask_bits(cpumask)[0];
+}
+
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ unsigned long mask1 = cpumask_bits(cpumask)[0];
+ unsigned long mask2 = cpumask_bits(andmask)[0];
+ unsigned long mask3 = cpumask_bits(cpu_online_mask)[0];
+
+ return (unsigned int)(mask1 & mask2 & mask3);
}
static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
@@ -88,7 +99,7 @@ static inline int apicid_to_node(int logical_apicid)
#endif
}
-static inline cpumask_t vector_allocation_domain(int cpu)
+static inline void vector_allocation_domain(int cpu, struct cpumask *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
@@ -98,8 +109,7 @@ static inline cpumask_t vector_allocation_domain(int cpu)
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
- cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
- return domain;
+ *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
}
#endif
@@ -131,7 +141,7 @@ static inline int cpu_to_logical_apicid(int cpu)
static inline int cpu_present_to_apicid(int mps_cpu)
{
- if (mps_cpu < NR_CPUS && cpu_present(mps_cpu))
+ if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
else
return BAD_APICID;
diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h
index fabca01ebacf..191312d155da 100644
--- a/arch/x86/include/asm/mach-default/mach_ipi.h
+++ b/arch/x86/include/asm/mach-default/mach_ipi.h
@@ -4,7 +4,8 @@
/* Avoid include hell */
#define NMI_VECTOR 0x02
-void send_IPI_mask_bitmask(cpumask_t mask, int vector);
+void send_IPI_mask_bitmask(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
void __send_IPI_shortcut(unsigned int shortcut, int vector);
extern int no_broadcast;
@@ -12,28 +13,27 @@ extern int no_broadcast;
#ifdef CONFIG_X86_64
#include <asm/genapic.h>
#define send_IPI_mask (genapic->send_IPI_mask)
+#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
#else
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
{
send_IPI_mask_bitmask(mask, vector);
}
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
#endif
static inline void __local_send_IPI_allbutself(int vector)
{
- if (no_broadcast || vector == NMI_VECTOR) {
- cpumask_t mask = cpu_online_map;
-
- cpu_clear(smp_processor_id(), mask);
- send_IPI_mask(mask, vector);
- } else
+ if (no_broadcast || vector == NMI_VECTOR)
+ send_IPI_mask_allbutself(cpu_online_mask, vector);
+ else
__send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
}
static inline void __local_send_IPI_all(int vector)
{
if (no_broadcast || vector == NMI_VECTOR)
- send_IPI_mask(cpu_online_map, vector);
+ send_IPI_mask(cpu_online_mask, vector);
else
__send_IPI_shortcut(APIC_DEST_ALLINC, vector);
}
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h
index e430f47df667..48553e958ad5 100644
--- a/arch/x86/include/asm/mach-generic/mach_apic.h
+++ b/arch/x86/include/asm/mach-generic/mach_apic.h
@@ -24,6 +24,7 @@
#define check_phys_apicid_present (genapic->check_phys_apicid_present)
#define check_apicid_used (genapic->check_apicid_used)
#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
#define vector_allocation_domain (genapic->vector_allocation_domain)
#define enable_apic_mode (genapic->enable_apic_mode)
#define phys_pkg_id (genapic->phys_pkg_id)
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 91885c28f66b..62d14ce3cd00 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -6,13 +6,13 @@
#include <asm/mpspec_def.h>
extern int apic_version[MAX_APICS];
+extern int pic_mode;
#ifdef CONFIG_X86_32
#include <mach_mpspec.h>
extern unsigned int def_to_bigsmp;
extern u8 apicid_2_node[];
-extern int pic_mode;
#ifdef CONFIG_X86_NUMAQ
extern int mp_bus_id_to_node[MAX_MP_BUSSES];
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 7c1e4258b31e..cb988aab716d 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -57,6 +57,31 @@ struct mtrr_gentry {
};
#endif /* !__i386__ */
+struct mtrr_var_range {
+ u32 base_lo;
+ u32 base_hi;
+ u32 mask_lo;
+ u32 mask_hi;
+};
+
+/* In the Intel processor's MTRR interface, the MTRR type is always held in
+ an 8 bit field: */
+typedef u8 mtrr_type;
+
+#define MTRR_NUM_FIXED_RANGES 88
+#define MTRR_MAX_VAR_RANGES 256
+
+struct mtrr_state_type {
+ struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
+ mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
+ unsigned char enabled;
+ unsigned char have_fixed;
+ mtrr_type def_type;
+};
+
+#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
+#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
+
/* These are the various ioctls */
#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry)
#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry)
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
index 0bf2a06b7a4e..bf37bc49bd8e 100644
--- a/arch/x86/include/asm/numaq/apic.h
+++ b/arch/x86/include/asm/numaq/apic.h
@@ -7,9 +7,9 @@
#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
{
- return CPU_MASK_ALL;
+ return &CPU_MASK_ALL;
}
#define NO_BALANCE_IRQ (1)
@@ -63,8 +63,8 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
extern u8 cpu_2_logical_apicid[];
static inline int cpu_to_logical_apicid(int cpu)
{
- if (cpu >= NR_CPUS)
- return BAD_APICID;
+ if (cpu >= nr_cpu_ids)
+ return BAD_APICID;
return (int)cpu_2_logical_apicid[cpu];
}
@@ -122,7 +122,13 @@ static inline void enable_apic_mode(void)
* We use physical apicids here, not logical, so just return the default
* physical broadcast to stop people from breaking us
*/
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+{
+ return (int) 0xF;
+}
+
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
{
return (int) 0xF;
}
diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h
index 935588d286cf..a8374c652778 100644
--- a/arch/x86/include/asm/numaq/ipi.h
+++ b/arch/x86/include/asm/numaq/ipi.h
@@ -1,25 +1,22 @@
#ifndef __ASM_NUMAQ_IPI_H
#define __ASM_NUMAQ_IPI_H
-void send_IPI_mask_sequence(cpumask_t, int vector);
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
{
send_IPI_mask_sequence(mask, vector);
}
static inline void send_IPI_allbutself(int vector)
{
- cpumask_t mask = cpu_online_map;
- cpu_clear(smp_processor_id(), mask);
-
- if (!cpus_empty(mask))
- send_IPI_mask(mask, vector);
+ send_IPI_mask_allbutself(cpu_online_mask, vector);
}
static inline void send_IPI_all(int vector)
{
- send_IPI_mask(cpu_online_map, vector);
+ send_IPI_mask(cpu_online_mask, vector);
}
#endif /* __ASM_NUMAQ_IPI_H */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 66834c41c049..a977de23cb4d 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -102,9 +102,9 @@ extern void pci_iommu_alloc(void);
#ifdef CONFIG_NUMA
/* Returns the node based on pci bus */
-static inline int __pcibus_to_node(struct pci_bus *bus)
+static inline int __pcibus_to_node(const struct pci_bus *bus)
{
- struct pci_sysdata *sd = bus->sysdata;
+ const struct pci_sysdata *sd = bus->sysdata;
return sd->node;
}
@@ -113,6 +113,12 @@ static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus)
{
return node_to_cpumask(__pcibus_to_node(bus));
}
+
+static inline const struct cpumask *
+cpumask_of_pcibus(const struct pci_bus *bus)
+{
+ return cpumask_of_node(__pcibus_to_node(bus));
+}
#endif
#endif /* _ASM_X86_PCI_H */
diff --git a/arch/x86/pci/pci.h b/arch/x86/include/asm/pci_x86.h
index 1959018aac02..e60fd3e14bdf 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -57,7 +57,8 @@ extern struct pci_ops pci_root_ops;
struct irq_info {
u8 bus, devfn; /* Bus, device and function */
struct {
- u8 link; /* IRQ line ID, chipset dependent, 0=not routed */
+ u8 link; /* IRQ line ID, chipset dependent,
+ 0 = not routed */
u16 bitmap; /* Available IRQs */
} __attribute__((packed)) irq[4];
u8 slot; /* Slot number, 0=onboard */
@@ -69,11 +70,13 @@ struct irq_routing_table {
u16 version; /* PIRQ_VERSION */
u16 size; /* Table size in bytes */
u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */
- u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */
- u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */
+ u16 exclusive_irqs; /* IRQs devoted exclusively to
+ PCI usage */
+ u16 rtr_vendor, rtr_device; /* Vendor and device ID of
+ interrupt router */
u32 miniport_data; /* Crap */
u8 rfu[11];
- u8 checksum; /* Modulo 256 checksum must give zero */
+ u8 checksum; /* Modulo 256 checksum must give 0 */
struct irq_info slots[0];
} __attribute__((packed));
@@ -148,15 +151,15 @@ static inline unsigned int mmio_config_readl(void __iomem *pos)
static inline void mmio_config_writeb(void __iomem *pos, u8 val)
{
- asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory");
+ asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
}
static inline void mmio_config_writew(void __iomem *pos, u16 val)
{
- asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory");
+ asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
}
static inline void mmio_config_writel(void __iomem *pos, u32 val)
{
- asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory");
+ asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
}
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index d12811ce51d9..830b9fcb6427 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -60,7 +60,7 @@ struct smp_ops {
void (*cpu_die)(unsigned int cpu);
void (*play_dead)(void);
- void (*send_call_func_ipi)(cpumask_t mask);
+ void (*send_call_func_ipi)(const struct cpumask *mask);
void (*send_call_func_single_ipi)(int cpu);
};
@@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu)
static inline void arch_send_call_function_ipi(cpumask_t mask)
{
- smp_ops.send_call_func_ipi(mask);
+ smp_ops.send_call_func_ipi(&mask);
}
void cpu_disable_common(void);
@@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu);
void native_play_dead(void);
void play_dead_common(void);
-void native_send_call_func_ipi(cpumask_t mask);
+void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
extern void prefill_possible_map(void);
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 9b3070f1c2ac..4bb5fb34f030 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -14,13 +14,13 @@
#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
{
/* CPU_MASK_ALL (0xff) has undefined behaviour with
* dest_LowestPrio mode logical clustered apic interrupt routing
* Just start on cpu 0. IRQ balancing will spread load
*/
- return cpumask_of_cpu(0);
+ return &cpumask_of_cpu(0);
}
#define INT_DELIVERY_MODE (dest_LowestPrio)
@@ -52,7 +52,7 @@ static inline void init_apic_ldr(void)
int i;
/* Create logical APIC IDs by counting CPUs already in cluster. */
- for (count = 0, i = NR_CPUS; --i >= 0; ) {
+ for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
lid = cpu_2_logical_apicid[i];
if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster)
++count;
@@ -97,8 +97,8 @@ static inline int apicid_to_node(int logical_apicid)
static inline int cpu_to_logical_apicid(int cpu)
{
#ifdef CONFIG_SMP
- if (cpu >= NR_CPUS)
- return BAD_APICID;
+ if (cpu >= nr_cpu_ids)
+ return BAD_APICID;
return (int)cpu_2_logical_apicid[cpu];
#else
return logical_smp_processor_id();
@@ -107,7 +107,7 @@ static inline int cpu_to_logical_apicid(int cpu)
static inline int cpu_present_to_apicid(int mps_cpu)
{
- if (mps_cpu < NR_CPUS)
+ if (mps_cpu < nr_cpu_ids)
return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
else
return BAD_APICID;
@@ -137,25 +137,25 @@ static inline void enable_apic_mode(void)
{
}
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
{
int num_bits_set;
int cpus_found = 0;
int cpu;
int apicid;
- num_bits_set = cpus_weight(cpumask);
+ num_bits_set = cpus_weight(*cpumask);
/* Return id to all */
- if (num_bits_set == NR_CPUS)
+ if (num_bits_set >= nr_cpu_ids)
return (int) 0xFF;
/*
* The cpus in the mask must all be on the apic cluster. If are not
* on the same apicid cluster return default value of TARGET_CPUS.
*/
- cpu = first_cpu(cpumask);
+ cpu = first_cpu(*cpumask);
apicid = cpu_to_logical_apicid(cpu);
while (cpus_found < num_bits_set) {
- if (cpu_isset(cpu, cpumask)) {
+ if (cpu_isset(cpu, *cpumask)) {
int new_apicid = cpu_to_logical_apicid(cpu);
if (apicid_cluster(apicid) !=
apicid_cluster(new_apicid)){
@@ -170,6 +170,23 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
return apicid;
}
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
+ const struct cpumask *andmask)
+{
+ int apicid = cpu_to_logical_apicid(0);
+ cpumask_var_t cpumask;
+
+ if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
+ return apicid;
+
+ cpumask_and(cpumask, inmask, andmask);
+ cpumask_and(cpumask, cpumask, cpu_online_mask);
+ apicid = cpu_mask_to_apicid(cpumask);
+
+ free_cpumask_var(cpumask);
+ return apicid;
+}
+
/* cpuid returns the value latched in the HW at reset, not the APIC ID
* register's value. For any box whose BIOS changes APIC IDs, like
* clustered APIC systems, we must use hard_smp_processor_id.
diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h
index 53bd1e7bd7b4..a8a2c24f50cc 100644
--- a/arch/x86/include/asm/summit/ipi.h
+++ b/arch/x86/include/asm/summit/ipi.h
@@ -1,9 +1,10 @@
#ifndef __ASM_SUMMIT_IPI_H
#define __ASM_SUMMIT_IPI_H
-void send_IPI_mask_sequence(cpumask_t mask, int vector);
+void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const cpumask_t *mask, int vector)
{
send_IPI_mask_sequence(mask, vector);
}
@@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector)
cpu_clear(smp_processor_id(), mask);
if (!cpus_empty(mask))
- send_IPI_mask(mask, vector);
+ send_IPI_mask(&mask, vector);
}
static inline void send_IPI_all(int vector)
{
- send_IPI_mask(cpu_online_map, vector);
+ send_IPI_mask(&cpu_online_map, vector);
}
#endif /* __ASM_SUMMIT_IPI_H */
diff --git a/arch/x86/kvm/svm.h b/arch/x86/include/asm/svm.h
index 1b8afa78e869..1b8afa78e869 100644
--- a/arch/x86/kvm/svm.h
+++ b/arch/x86/include/asm/svm.h
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
new file mode 100644
index 000000000000..ffb08be2a530
--- /dev/null
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -0,0 +1,101 @@
+/*
+ * sys_ia32.h - Linux ia32 syscall interfaces
+ *
+ * Copyright (c) 2008 Jaswinder Singh Rajput
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#ifndef _ASM_X86_SYS_IA32_H
+#define _ASM_X86_SYS_IA32_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/signal.h>
+#include <asm/compat.h>
+#include <asm/ia32.h>
+
+/* ia32/sys_ia32.c */
+asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long);
+asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long);
+
+asmlinkage long sys32_stat64(char __user *, struct stat64 __user *);
+asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *);
+asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
+asmlinkage long sys32_fstatat(unsigned int, char __user *,
+ struct stat64 __user *, int);
+struct mmap_arg_struct;
+asmlinkage long sys32_mmap(struct mmap_arg_struct __user *);
+asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
+
+asmlinkage long sys32_pipe(int __user *);
+struct sigaction32;
+struct old_sigaction32;
+asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *,
+ struct sigaction32 __user *, unsigned int);
+asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *,
+ struct old_sigaction32 __user *);
+asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *,
+ compat_sigset_t __user *, unsigned int);
+asmlinkage long sys32_alarm(unsigned int);
+
+struct sel_arg_struct;
+asmlinkage long sys32_old_select(struct sel_arg_struct __user *);
+asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int);
+asmlinkage long sys32_sysfs(int, u32, u32);
+
+asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
+ struct compat_timespec __user *);
+asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t);
+asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *);
+
+#ifdef CONFIG_SYSCTL_SYSCALL
+struct sysctl_ia32;
+asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *);
+#endif
+
+asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
+asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
+
+asmlinkage long sys32_personality(unsigned long);
+asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
+
+asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long,
+ unsigned long, unsigned long, unsigned long);
+
+struct oldold_utsname;
+struct old_utsname;
+asmlinkage long sys32_olduname(struct oldold_utsname __user *);
+long sys32_uname(struct old_utsname __user *);
+
+long sys32_ustat(unsigned, struct ustat32 __user *);
+
+asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *,
+ compat_uptr_t __user *, struct pt_regs *);
+asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
+
+long sys32_lseek(unsigned int, int, unsigned int);
+long sys32_kill(int, int);
+long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
+long sys32_vm86_warning(void);
+long sys32_lookup_dcookie(u32, u32, char __user *, size_t);
+
+asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t);
+asmlinkage long sys32_sync_file_range(int, unsigned, unsigned,
+ unsigned, unsigned, int);
+asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int);
+asmlinkage long sys32_fallocate(int, int, unsigned,
+ unsigned, unsigned, unsigned);
+
+/* ia32/ia32_signal.c */
+asmlinkage long sys32_sigsuspend(int, int, old_sigset_t);
+asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *,
+ stack_ia32_t __user *, struct pt_regs *);
+asmlinkage long sys32_sigreturn(struct pt_regs *);
+asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
+
+/* ia32/ipc32.c */
+asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32);
+#endif /* _ASM_X86_SYS_IA32_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index ff386ff50ed7..4e2f2e0aab27 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -61,13 +61,19 @@ static inline int cpu_to_node(int cpu)
*
* Side note: this function creates the returned cpumask on the stack
* so with a high NR_CPUS count, excessive stack space is used. The
- * node_to_cpumask_ptr function should be used whenever possible.
+ * cpumask_of_node function should be used whenever possible.
*/
static inline cpumask_t node_to_cpumask(int node)
{
return node_to_cpumask_map[node];
}
+/* Returns a bitmask of CPUs on Node 'node'. */
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+ return &node_to_cpumask_map[node];
+}
+
#else /* CONFIG_X86_64 */
/* Mappings between node number and cpus on that node. */
@@ -82,7 +88,7 @@ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
extern int cpu_to_node(int cpu);
extern int early_cpu_to_node(int cpu);
-extern const cpumask_t *_node_to_cpumask_ptr(int node);
+extern const cpumask_t *cpumask_of_node(int node);
extern cpumask_t node_to_cpumask(int node);
#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
@@ -103,7 +109,7 @@ static inline int early_cpu_to_node(int cpu)
}
/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
-static inline const cpumask_t *_node_to_cpumask_ptr(int node)
+static inline const cpumask_t *cpumask_of_node(int node)
{
return &node_to_cpumask_map[node];
}
@@ -116,12 +122,15 @@ static inline cpumask_t node_to_cpumask(int node)
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
-/* Replace default node_to_cpumask_ptr with optimized version */
+/*
+ * Replace default node_to_cpumask_ptr with optimized version
+ * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
+ */
#define node_to_cpumask_ptr(v, node) \
- const cpumask_t *v = _node_to_cpumask_ptr(node)
+ const cpumask_t *v = cpumask_of_node(node)
#define node_to_cpumask_ptr_next(v, node) \
- v = _node_to_cpumask_ptr(node)
+ v = cpumask_of_node(node)
#endif /* CONFIG_X86_64 */
@@ -187,7 +196,7 @@ extern int __node_distance(int, int);
#define cpu_to_node(cpu) 0
#define early_cpu_to_node(cpu) 0
-static inline const cpumask_t *_node_to_cpumask_ptr(int node)
+static inline const cpumask_t *cpumask_of_node(int node)
{
return &cpu_online_map;
}
@@ -200,12 +209,15 @@ static inline int node_to_first_cpu(int node)
return first_cpu(cpu_online_map);
}
-/* Replace default node_to_cpumask_ptr with optimized version */
+/*
+ * Replace default node_to_cpumask_ptr with optimized version
+ * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
+ */
#define node_to_cpumask_ptr(v, node) \
- const cpumask_t *v = _node_to_cpumask_ptr(node)
+ const cpumask_t *v = cpumask_of_node(node)
#define node_to_cpumask_ptr_next(v, node) \
- v = _node_to_cpumask_ptr(node)
+ v = cpumask_of_node(node)
#endif
#include <asm-generic/topology.h>
@@ -214,18 +226,20 @@ static inline int node_to_first_cpu(int node)
/* Returns the number of the first CPU on Node 'node'. */
static inline int node_to_first_cpu(int node)
{
- node_to_cpumask_ptr(mask, node);
- return first_cpu(*mask);
+ return cpumask_first(cpumask_of_node(node));
}
#endif
extern cpumask_t cpu_coregroup_map(int cpu);
+extern const struct cpumask *cpu_coregroup_mask(int cpu);
#ifdef ENABLE_TOPO_DEFINES
#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu))
+#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
/* indicates that pointers to the topology cpumask_t maps are valid */
#define arch_provides_topology_pointers yes
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index e2363253bbbf..50423c7b56b2 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -133,61 +133,61 @@ struct bau_msg_payload {
* see table 4.2.3.0.1 in broacast_assist spec.
*/
struct bau_msg_header {
- int dest_subnodeid:6; /* must be zero */
+ unsigned int dest_subnodeid:6; /* must be zero */
/* bits 5:0 */
- int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */
- /* bits 20:6 */
- int command:8; /* message type */
+ unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */
+ /* bits 20:6 */ /* first bit in node_map */
+ unsigned int command:8; /* message type */
/* bits 28:21 */
/* 0x38: SN3net EndPoint Message */
- int rsvd_1:3; /* must be zero */
+ unsigned int rsvd_1:3; /* must be zero */
/* bits 31:29 */
/* int will align on 32 bits */
- int rsvd_2:9; /* must be zero */
+ unsigned int rsvd_2:9; /* must be zero */
/* bits 40:32 */
/* Suppl_A is 56-41 */
- int payload_2a:8; /* becomes byte 16 of msg */
+ unsigned int payload_2a:8;/* becomes byte 16 of msg */
/* bits 48:41 */ /* not currently using */
- int payload_2b:8; /* becomes byte 17 of msg */
+ unsigned int payload_2b:8;/* becomes byte 17 of msg */
/* bits 56:49 */ /* not currently using */
/* Address field (96:57) is never used as an
address (these are address bits 42:3) */
- int rsvd_3:1; /* must be zero */
+ unsigned int rsvd_3:1; /* must be zero */
/* bit 57 */
/* address bits 27:4 are payload */
/* these 24 bits become bytes 12-14 of msg */
- int replied_to:1; /* sent as 0 by the source to byte 12 */
+ unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */
/* bit 58 */
- int payload_1a:5; /* not currently used */
+ unsigned int payload_1a:5;/* not currently used */
/* bits 63:59 */
- int payload_1b:8; /* not currently used */
+ unsigned int payload_1b:8;/* not currently used */
/* bits 71:64 */
- int payload_1c:8; /* not currently used */
+ unsigned int payload_1c:8;/* not currently used */
/* bits 79:72 */
- int payload_1d:2; /* not currently used */
+ unsigned int payload_1d:2;/* not currently used */
/* bits 81:80 */
- int rsvd_4:7; /* must be zero */
+ unsigned int rsvd_4:7; /* must be zero */
/* bits 88:82 */
- int sw_ack_flag:1; /* software acknowledge flag */
+ unsigned int sw_ack_flag:1;/* software acknowledge flag */
/* bit 89 */
/* INTD trasactions at destination are to
wait for software acknowledge */
- int rsvd_5:6; /* must be zero */
+ unsigned int rsvd_5:6; /* must be zero */
/* bits 95:90 */
- int rsvd_6:5; /* must be zero */
+ unsigned int rsvd_6:5; /* must be zero */
/* bits 100:96 */
- int int_both:1; /* if 1, interrupt both sockets on the blade */
+ unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */
/* bit 101*/
- int fairness:3; /* usually zero */
+ unsigned int fairness:3;/* usually zero */
/* bits 104:102 */
- int multilevel:1; /* multi-level multicast format */
+ unsigned int multilevel:1; /* multi-level multicast format */
/* bit 105 */
/* 0 for TLB: endpoint multi-unicast messages */
- int chaining:1; /* next descriptor is part of this activation*/
+ unsigned int chaining:1;/* next descriptor is part of this activation*/
/* bit 106 */
- int rsvd_7:21; /* must be zero */
+ unsigned int rsvd_7:21; /* must be zero */
/* bits 127:107 */
};
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
new file mode 100644
index 000000000000..593636275238
--- /dev/null
+++ b/arch/x86/include/asm/virtext.h
@@ -0,0 +1,132 @@
+/* CPU virtualization extensions handling
+ *
+ * This should carry the code for handling CPU virtualization extensions
+ * that needs to live in the kernel core.
+ *
+ * Author: Eduardo Habkost <ehabkost@redhat.com>
+ *
+ * Copyright (C) 2008, Red Hat Inc.
+ *
+ * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+#ifndef _ASM_X86_VIRTEX_H
+#define _ASM_X86_VIRTEX_H
+
+#include <asm/processor.h>
+#include <asm/system.h>
+
+#include <asm/vmx.h>
+#include <asm/svm.h>
+
+/*
+ * VMX functions:
+ */
+
+static inline int cpu_has_vmx(void)
+{
+ unsigned long ecx = cpuid_ecx(1);
+ return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
+}
+
+
+/** Disable VMX on the current CPU
+ *
+ * vmxoff causes a undefined-opcode exception if vmxon was not run
+ * on the CPU previously. Only call this function if you know VMX
+ * is enabled.
+ */
+static inline void cpu_vmxoff(void)
+{
+ asm volatile (ASM_VMX_VMXOFF : : : "cc");
+ write_cr4(read_cr4() & ~X86_CR4_VMXE);
+}
+
+static inline int cpu_vmx_enabled(void)
+{
+ return read_cr4() & X86_CR4_VMXE;
+}
+
+/** Disable VMX if it is enabled on the current CPU
+ *
+ * You shouldn't call this if cpu_has_vmx() returns 0.
+ */
+static inline void __cpu_emergency_vmxoff(void)
+{
+ if (cpu_vmx_enabled())
+ cpu_vmxoff();
+}
+
+/** Disable VMX if it is supported and enabled on the current CPU
+ */
+static inline void cpu_emergency_vmxoff(void)
+{
+ if (cpu_has_vmx())
+ __cpu_emergency_vmxoff();
+}
+
+
+
+
+/*
+ * SVM functions:
+ */
+
+/** Check if the CPU has SVM support
+ *
+ * You can use the 'msg' arg to get a message describing the problem,
+ * if the function returns zero. Simply pass NULL if you are not interested
+ * on the messages; gcc should take care of not generating code for
+ * the messages on this case.
+ */
+static inline int cpu_has_svm(const char **msg)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+ if (msg)
+ *msg = "not amd";
+ return 0;
+ }
+
+ cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+ if (eax < SVM_CPUID_FUNC) {
+ if (msg)
+ *msg = "can't execute cpuid_8000000a";
+ return 0;
+ }
+
+ cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+ if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
+ if (msg)
+ *msg = "svm not available";
+ return 0;
+ }
+ return 1;
+}
+
+
+/** Disable SVM on the current CPU
+ *
+ * You should call this only if cpu_has_svm() returned true.
+ */
+static inline void cpu_svm_disable(void)
+{
+ uint64_t efer;
+
+ wrmsrl(MSR_VM_HSAVE_PA, 0);
+ rdmsrl(MSR_EFER, efer);
+ wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+}
+
+/** Makes sure SVM is disabled, if it is supported on the CPU
+ */
+static inline void cpu_emergency_svm_disable(void)
+{
+ if (cpu_has_svm(NULL))
+ cpu_svm_disable();
+}
+
+#endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/include/asm/vmx.h
index ec5edc339da6..d0238e6151d8 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -63,10 +63,13 @@
#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
+#define VM_EXIT_SAVE_IA32_PAT 0x00040000
+#define VM_EXIT_LOAD_IA32_PAT 0x00080000
#define VM_ENTRY_IA32E_MODE 0x00000200
#define VM_ENTRY_SMM 0x00000400
#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
+#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
/* VMCS Encodings */
enum vmcs_field {
@@ -112,6 +115,8 @@ enum vmcs_field {
VMCS_LINK_POINTER_HIGH = 0x00002801,
GUEST_IA32_DEBUGCTL = 0x00002802,
GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
+ GUEST_IA32_PAT = 0x00002804,
+ GUEST_IA32_PAT_HIGH = 0x00002805,
GUEST_PDPTR0 = 0x0000280a,
GUEST_PDPTR0_HIGH = 0x0000280b,
GUEST_PDPTR1 = 0x0000280c,
@@ -120,6 +125,8 @@ enum vmcs_field {
GUEST_PDPTR2_HIGH = 0x0000280f,
GUEST_PDPTR3 = 0x00002810,
GUEST_PDPTR3_HIGH = 0x00002811,
+ HOST_IA32_PAT = 0x00002c00,
+ HOST_IA32_PAT_HIGH = 0x00002c01,
PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
EXCEPTION_BITMAP = 0x00004004,
@@ -331,8 +338,9 @@ enum vmcs_field {
#define AR_RESERVD_MASK 0xfffe0f00
-#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9
-#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10
+#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0)
+#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1)
+#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2)
#define VMX_NR_VPIDS (1 << 16)
#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
@@ -356,4 +364,19 @@ enum vmcs_field {
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
+
+#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
+#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
+#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
+#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
+#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
+#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
+#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
+#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
+#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
+
+
+
#endif
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 65d0b72777ea..29dc0c89d4af 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -538,9 +538,10 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
union acpi_object *obj;
struct acpi_madt_local_apic *lapic;
- cpumask_t tmp_map, new_map;
+ cpumask_var_t tmp_map, new_map;
u8 physid;
int cpu;
+ int retval = -ENOMEM;
if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
return -EINVAL;
@@ -569,23 +570,37 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
buffer.length = ACPI_ALLOCATE_BUFFER;
buffer.pointer = NULL;
- tmp_map = cpu_present_map;
+ if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL))
+ goto out;
+
+ if (!alloc_cpumask_var(&new_map, GFP_KERNEL))
+ goto free_tmp_map;
+
+ cpumask_copy(tmp_map, cpu_present_mask);
acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED);
/*
* If mp_register_lapic successfully generates a new logical cpu
* number, then the following will get us exactly what was mapped
*/
- cpus_andnot(new_map, cpu_present_map, tmp_map);
- if (cpus_empty(new_map)) {
+ cpumask_andnot(new_map, cpu_present_mask, tmp_map);
+ if (cpumask_empty(new_map)) {
printk ("Unable to map lapic to logical cpu number\n");
- return -EINVAL;
+ retval = -EINVAL;
+ goto free_new_map;
}
- cpu = first_cpu(new_map);
+ cpu = cpumask_first(new_map);
*pcpu = cpu;
- return 0;
+ retval = 0;
+
+free_new_map:
+ free_cpumask_var(new_map);
+free_tmp_map:
+ free_cpumask_var(tmp_map);
+out:
+ return retval;
}
/* wrapper to silence section mismatch warning */
@@ -598,7 +613,7 @@ EXPORT_SYMBOL(acpi_map_lsapic);
int acpi_unmap_lsapic(int cpu)
{
per_cpu(x86_cpu_to_apicid, cpu) = -1;
- cpu_clear(cpu, cpu_present_map);
+ set_cpu_present(cpu, false);
num_processors--;
return (0);
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2e2da717b350..5113c080f0c4 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -20,8 +20,12 @@
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/bitops.h>
+#include <linux/debugfs.h>
#include <linux/scatterlist.h>
#include <linux/iommu-helper.h>
+#ifdef CONFIG_IOMMU_API
+#include <linux/iommu.h>
+#endif
#include <asm/proto.h>
#include <asm/iommu.h>
#include <asm/gart.h>
@@ -38,6 +42,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
static LIST_HEAD(iommu_pd_list);
static DEFINE_SPINLOCK(iommu_pd_list_lock);
+#ifdef CONFIG_IOMMU_API
+static struct iommu_ops amd_iommu_ops;
+#endif
+
/*
* general struct to manage commands send to an IOMMU
*/
@@ -47,6 +55,68 @@ struct iommu_cmd {
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
struct unity_map_entry *e);
+static struct dma_ops_domain *find_protection_domain(u16 devid);
+
+
+#ifdef CONFIG_AMD_IOMMU_STATS
+
+/*
+ * Initialization code for statistics collection
+ */
+
+DECLARE_STATS_COUNTER(compl_wait);
+DECLARE_STATS_COUNTER(cnt_map_single);
+DECLARE_STATS_COUNTER(cnt_unmap_single);
+DECLARE_STATS_COUNTER(cnt_map_sg);
+DECLARE_STATS_COUNTER(cnt_unmap_sg);
+DECLARE_STATS_COUNTER(cnt_alloc_coherent);
+DECLARE_STATS_COUNTER(cnt_free_coherent);
+DECLARE_STATS_COUNTER(cross_page);
+DECLARE_STATS_COUNTER(domain_flush_single);
+DECLARE_STATS_COUNTER(domain_flush_all);
+DECLARE_STATS_COUNTER(alloced_io_mem);
+DECLARE_STATS_COUNTER(total_map_requests);
+
+static struct dentry *stats_dir;
+static struct dentry *de_isolate;
+static struct dentry *de_fflush;
+
+static void amd_iommu_stats_add(struct __iommu_counter *cnt)
+{
+ if (stats_dir == NULL)
+ return;
+
+ cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
+ &cnt->value);
+}
+
+static void amd_iommu_stats_init(void)
+{
+ stats_dir = debugfs_create_dir("amd-iommu", NULL);
+ if (stats_dir == NULL)
+ return;
+
+ de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
+ (u32 *)&amd_iommu_isolate);
+
+ de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
+ (u32 *)&amd_iommu_unmap_flush);
+
+ amd_iommu_stats_add(&compl_wait);
+ amd_iommu_stats_add(&cnt_map_single);
+ amd_iommu_stats_add(&cnt_unmap_single);
+ amd_iommu_stats_add(&cnt_map_sg);
+ amd_iommu_stats_add(&cnt_unmap_sg);
+ amd_iommu_stats_add(&cnt_alloc_coherent);
+ amd_iommu_stats_add(&cnt_free_coherent);
+ amd_iommu_stats_add(&cross_page);
+ amd_iommu_stats_add(&domain_flush_single);
+ amd_iommu_stats_add(&domain_flush_all);
+ amd_iommu_stats_add(&alloced_io_mem);
+ amd_iommu_stats_add(&total_map_requests);
+}
+
+#endif
/* returns !0 if the IOMMU is caching non-present entries in its TLB */
static int iommu_has_npcache(struct amd_iommu *iommu)
@@ -189,13 +259,55 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
spin_lock_irqsave(&iommu->lock, flags);
ret = __iommu_queue_command(iommu, cmd);
if (!ret)
- iommu->need_sync = 1;
+ iommu->need_sync = true;
spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
}
/*
+ * This function waits until an IOMMU has completed a completion
+ * wait command
+ */
+static void __iommu_wait_for_completion(struct amd_iommu *iommu)
+{
+ int ready = 0;
+ unsigned status = 0;
+ unsigned long i = 0;
+
+ INC_STATS_COUNTER(compl_wait);
+
+ while (!ready && (i < EXIT_LOOP_COUNT)) {
+ ++i;
+ /* wait for the bit to become one */
+ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+ ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
+ }
+
+ /* set bit back to zero */
+ status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
+ writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
+
+ if (unlikely(i == EXIT_LOOP_COUNT))
+ panic("AMD IOMMU: Completion wait loop failed\n");
+}
+
+/*
+ * This function queues a completion wait command into the command
+ * buffer of an IOMMU
+ */
+static int __iommu_completion_wait(struct amd_iommu *iommu)
+{
+ struct iommu_cmd cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
+ CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
+
+ return __iommu_queue_command(iommu, &cmd);
+}
+
+/*
* This function is called whenever we need to ensure that the IOMMU has
* completed execution of all commands we sent. It sends a
* COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
@@ -204,40 +316,22 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
*/
static int iommu_completion_wait(struct amd_iommu *iommu)
{
- int ret = 0, ready = 0;
- unsigned status = 0;
- struct iommu_cmd cmd;
- unsigned long flags, i = 0;
-
- memset(&cmd, 0, sizeof(cmd));
- cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
- CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
+ int ret = 0;
+ unsigned long flags;
spin_lock_irqsave(&iommu->lock, flags);
if (!iommu->need_sync)
goto out;
- iommu->need_sync = 0;
+ ret = __iommu_completion_wait(iommu);
- ret = __iommu_queue_command(iommu, &cmd);
+ iommu->need_sync = false;
if (ret)
goto out;
- while (!ready && (i < EXIT_LOOP_COUNT)) {
- ++i;
- /* wait for the bit to become one */
- status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
- ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
- }
-
- /* set bit back to zero */
- status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
- writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
-
- if (unlikely(i == EXIT_LOOP_COUNT))
- panic("AMD IOMMU: Completion wait loop failed\n");
+ __iommu_wait_for_completion(iommu);
out:
spin_unlock_irqrestore(&iommu->lock, flags);
@@ -264,6 +358,21 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
return ret;
}
+static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
+ u16 domid, int pde, int s)
+{
+ memset(cmd, 0, sizeof(*cmd));
+ address &= PAGE_MASK;
+ CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
+ cmd->data[1] |= domid;
+ cmd->data[2] = lower_32_bits(address);
+ cmd->data[3] = upper_32_bits(address);
+ if (s) /* size bit - we flush more than one 4kb page */
+ cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+ if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
+ cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+}
+
/*
* Generic command send function for invalidaing TLB entries
*/
@@ -273,16 +382,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
struct iommu_cmd cmd;
int ret;
- memset(&cmd, 0, sizeof(cmd));
- address &= PAGE_MASK;
- CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
- cmd.data[1] |= domid;
- cmd.data[2] = lower_32_bits(address);
- cmd.data[3] = upper_32_bits(address);
- if (s) /* size bit - we flush more than one 4kb page */
- cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
- if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
- cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+ __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
ret = iommu_queue_command(iommu, &cmd);
@@ -321,9 +421,35 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
{
u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+ INC_STATS_COUNTER(domain_flush_single);
+
iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
}
+/*
+ * This function is used to flush the IO/TLB for a given protection domain
+ * on every IOMMU in the system
+ */
+static void iommu_flush_domain(u16 domid)
+{
+ unsigned long flags;
+ struct amd_iommu *iommu;
+ struct iommu_cmd cmd;
+
+ INC_STATS_COUNTER(domain_flush_all);
+
+ __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
+ domid, 1, 1);
+
+ list_for_each_entry(iommu, &amd_iommu_list, list) {
+ spin_lock_irqsave(&iommu->lock, flags);
+ __iommu_queue_command(iommu, &cmd);
+ __iommu_completion_wait(iommu);
+ __iommu_wait_for_completion(iommu);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+}
+
/****************************************************************************
*
* The functions below are used the create the page table mappings for
@@ -338,10 +464,10 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
* supporting all features of AMD IOMMU page tables like level skipping
* and full 64 bit address spaces.
*/
-static int iommu_map(struct protection_domain *dom,
- unsigned long bus_addr,
- unsigned long phys_addr,
- int prot)
+static int iommu_map_page(struct protection_domain *dom,
+ unsigned long bus_addr,
+ unsigned long phys_addr,
+ int prot)
{
u64 __pte, *pte, *page;
@@ -388,6 +514,28 @@ static int iommu_map(struct protection_domain *dom,
return 0;
}
+static void iommu_unmap_page(struct protection_domain *dom,
+ unsigned long bus_addr)
+{
+ u64 *pte;
+
+ pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
+
+ if (!IOMMU_PTE_PRESENT(*pte))
+ return;
+
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
+
+ if (!IOMMU_PTE_PRESENT(*pte))
+ return;
+
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
+
+ *pte = 0;
+}
+
/*
* This function checks if a specific unity mapping entry is needed for
* this specific IOMMU.
@@ -440,7 +588,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
for (addr = e->address_start; addr < e->address_end;
addr += PAGE_SIZE) {
- ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);
+ ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
if (ret)
return ret;
/*
@@ -571,6 +719,16 @@ static u16 domain_id_alloc(void)
return id;
}
+static void domain_id_free(int id)
+{
+ unsigned long flags;
+
+ write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ if (id > 0 && id < MAX_DOMAIN_ID)
+ __clear_bit(id, amd_iommu_pd_alloc_bitmap);
+ write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+}
+
/*
* Used to reserve address ranges in the aperture (e.g. for exclusion
* ranges.
@@ -587,12 +745,12 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
iommu_area_reserve(dom->bitmap, start_page, pages);
}
-static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
+static void free_pagetable(struct protection_domain *domain)
{
int i, j;
u64 *p1, *p2, *p3;
- p1 = dma_dom->domain.pt_root;
+ p1 = domain->pt_root;
if (!p1)
return;
@@ -613,6 +771,8 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
}
free_page((unsigned long)p1);
+
+ domain->pt_root = NULL;
}
/*
@@ -624,7 +784,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
if (!dom)
return;
- dma_ops_free_pagetable(dom);
+ free_pagetable(&dom->domain);
kfree(dom->pte_pages);
@@ -663,6 +823,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
goto free_dma_dom;
dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+ dma_dom->domain.flags = PD_DMA_OPS_MASK;
dma_dom->domain.priv = dma_dom;
if (!dma_dom->domain.pt_root)
goto free_dma_dom;
@@ -725,6 +886,15 @@ free_dma_dom:
}
/*
+ * little helper function to check whether a given protection domain is a
+ * dma_ops domain
+ */
+static bool dma_ops_domain(struct protection_domain *domain)
+{
+ return domain->flags & PD_DMA_OPS_MASK;
+}
+
+/*
* Find out the protection domain structure for a given PCI device. This
* will give us the pointer to the page table root for example.
*/
@@ -744,14 +914,15 @@ static struct protection_domain *domain_for_device(u16 devid)
* If a device is not yet associated with a domain, this function does
* assigns it visible for the hardware
*/
-static void set_device_domain(struct amd_iommu *iommu,
- struct protection_domain *domain,
- u16 devid)
+static void attach_device(struct amd_iommu *iommu,
+ struct protection_domain *domain,
+ u16 devid)
{
unsigned long flags;
-
u64 pte_root = virt_to_phys(domain->pt_root);
+ domain->dev_cnt += 1;
+
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
@@ -767,6 +938,116 @@ static void set_device_domain(struct amd_iommu *iommu,
iommu_queue_inv_dev_entry(iommu, devid);
}
+/*
+ * Removes a device from a protection domain (unlocked)
+ */
+static void __detach_device(struct protection_domain *domain, u16 devid)
+{
+
+ /* lock domain */
+ spin_lock(&domain->lock);
+
+ /* remove domain from the lookup table */
+ amd_iommu_pd_table[devid] = NULL;
+
+ /* remove entry from the device table seen by the hardware */
+ amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
+ amd_iommu_dev_table[devid].data[1] = 0;
+ amd_iommu_dev_table[devid].data[2] = 0;
+
+ /* decrease reference counter */
+ domain->dev_cnt -= 1;
+
+ /* ready */
+ spin_unlock(&domain->lock);
+}
+
+/*
+ * Removes a device from a protection domain (with devtable_lock held)
+ */
+static void detach_device(struct protection_domain *domain, u16 devid)
+{
+ unsigned long flags;
+
+ /* lock device table */
+ write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ __detach_device(domain, devid);
+ write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+}
+
+static int device_change_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
+ struct protection_domain *domain;
+ struct dma_ops_domain *dma_domain;
+ struct amd_iommu *iommu;
+ int order = amd_iommu_aperture_order;
+ unsigned long flags;
+
+ if (devid > amd_iommu_last_bdf)
+ goto out;
+
+ devid = amd_iommu_alias_table[devid];
+
+ iommu = amd_iommu_rlookup_table[devid];
+ if (iommu == NULL)
+ goto out;
+
+ domain = domain_for_device(devid);
+
+ if (domain && !dma_ops_domain(domain))
+ WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
+ "to a non-dma-ops domain\n", dev_name(dev));
+
+ switch (action) {
+ case BUS_NOTIFY_BOUND_DRIVER:
+ if (domain)
+ goto out;
+ dma_domain = find_protection_domain(devid);
+ if (!dma_domain)
+ dma_domain = iommu->default_dom;
+ attach_device(iommu, &dma_domain->domain, devid);
+ printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
+ "device %s\n", dma_domain->domain.id, dev_name(dev));
+ break;
+ case BUS_NOTIFY_UNBIND_DRIVER:
+ if (!domain)
+ goto out;
+ detach_device(domain, devid);
+ break;
+ case BUS_NOTIFY_ADD_DEVICE:
+ /* allocate a protection domain if a device is added */
+ dma_domain = find_protection_domain(devid);
+ if (dma_domain)
+ goto out;
+ dma_domain = dma_ops_domain_alloc(iommu, order);
+ if (!dma_domain)
+ goto out;
+ dma_domain->target_dev = devid;
+
+ spin_lock_irqsave(&iommu_pd_list_lock, flags);
+ list_add_tail(&dma_domain->list, &iommu_pd_list);
+ spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
+
+ break;
+ default:
+ goto out;
+ }
+
+ iommu_queue_inv_dev_entry(iommu, devid);
+ iommu_completion_wait(iommu);
+
+out:
+ return 0;
+}
+
+struct notifier_block device_nb = {
+ .notifier_call = device_change_notifier,
+};
+
/*****************************************************************************
*
* The next functions belong to the dma_ops mapping/unmapping code.
@@ -802,7 +1083,6 @@ static struct dma_ops_domain *find_protection_domain(u16 devid)
list_for_each_entry(entry, &iommu_pd_list, list) {
if (entry->target_dev == devid) {
ret = entry;
- list_del(&ret->list);
break;
}
}
@@ -853,14 +1133,13 @@ static int get_device_resources(struct device *dev,
if (!dma_dom)
dma_dom = (*iommu)->default_dom;
*domain = &dma_dom->domain;
- set_device_domain(*iommu, *domain, *bdf);
+ attach_device(*iommu, *domain, *bdf);
printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
- "device ", (*domain)->id);
- print_devid(_bdf, 1);
+ "device %s\n", (*domain)->id, dev_name(dev));
}
if (domain_for_device(_bdf) == NULL)
- set_device_domain(*iommu, *domain, _bdf);
+ attach_device(*iommu, *domain, _bdf);
return 1;
}
@@ -946,6 +1225,11 @@ static dma_addr_t __map_single(struct device *dev,
pages = iommu_num_pages(paddr, size, PAGE_SIZE);
paddr &= PAGE_MASK;
+ INC_STATS_COUNTER(total_map_requests);
+
+ if (pages > 1)
+ INC_STATS_COUNTER(cross_page);
+
if (align)
align_mask = (1UL << get_order(size)) - 1;
@@ -962,6 +1246,8 @@ static dma_addr_t __map_single(struct device *dev,
}
address += offset;
+ ADD_STATS_COUNTER(alloced_io_mem, size);
+
if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
iommu_flush_tlb(iommu, dma_dom->domain.id);
dma_dom->need_flush = false;
@@ -998,6 +1284,8 @@ static void __unmap_single(struct amd_iommu *iommu,
start += PAGE_SIZE;
}
+ SUB_STATS_COUNTER(alloced_io_mem, size);
+
dma_ops_free_addresses(dma_dom, dma_addr, pages);
if (amd_iommu_unmap_flush || dma_dom->need_flush) {
@@ -1019,6 +1307,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
dma_addr_t addr;
u64 dma_mask;
+ INC_STATS_COUNTER(cnt_map_single);
+
if (!check_device(dev))
return bad_dma_address;
@@ -1030,6 +1320,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
/* device not handled by any AMD IOMMU */
return (dma_addr_t)paddr;
+ if (!dma_ops_domain(domain))
+ return bad_dma_address;
+
spin_lock_irqsave(&domain->lock, flags);
addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
dma_mask);
@@ -1055,11 +1348,16 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
struct protection_domain *domain;
u16 devid;
+ INC_STATS_COUNTER(cnt_unmap_single);
+
if (!check_device(dev) ||
!get_device_resources(dev, &iommu, &domain, &devid))
/* device not handled by any AMD IOMMU */
return;
+ if (!dma_ops_domain(domain))
+ return;
+
spin_lock_irqsave(&domain->lock, flags);
__unmap_single(iommu, domain->priv, dma_addr, size, dir);
@@ -1104,6 +1402,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
int mapped_elems = 0;
u64 dma_mask;
+ INC_STATS_COUNTER(cnt_map_sg);
+
if (!check_device(dev))
return 0;
@@ -1114,6 +1414,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
if (!iommu || !domain)
return map_sg_no_iommu(dev, sglist, nelems, dir);
+ if (!dma_ops_domain(domain))
+ return 0;
+
spin_lock_irqsave(&domain->lock, flags);
for_each_sg(sglist, s, nelems, i) {
@@ -1163,10 +1466,15 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
u16 devid;
int i;
+ INC_STATS_COUNTER(cnt_unmap_sg);
+
if (!check_device(dev) ||
!get_device_resources(dev, &iommu, &domain, &devid))
return;
+ if (!dma_ops_domain(domain))
+ return;
+
spin_lock_irqsave(&domain->lock, flags);
for_each_sg(sglist, s, nelems, i) {
@@ -1194,6 +1502,8 @@ static void *alloc_coherent(struct device *dev, size_t size,
phys_addr_t paddr;
u64 dma_mask = dev->coherent_dma_mask;
+ INC_STATS_COUNTER(cnt_alloc_coherent);
+
if (!check_device(dev))
return NULL;
@@ -1212,6 +1522,9 @@ static void *alloc_coherent(struct device *dev, size_t size,
return virt_addr;
}
+ if (!dma_ops_domain(domain))
+ goto out_free;
+
if (!dma_mask)
dma_mask = *dev->dma_mask;
@@ -1220,18 +1533,20 @@ static void *alloc_coherent(struct device *dev, size_t size,
*dma_addr = __map_single(dev, iommu, domain->priv, paddr,
size, DMA_BIDIRECTIONAL, true, dma_mask);
- if (*dma_addr == bad_dma_address) {
- free_pages((unsigned long)virt_addr, get_order(size));
- virt_addr = NULL;
- goto out;
- }
+ if (*dma_addr == bad_dma_address)
+ goto out_free;
iommu_completion_wait(iommu);
-out:
spin_unlock_irqrestore(&domain->lock, flags);
return virt_addr;
+
+out_free:
+
+ free_pages((unsigned long)virt_addr, get_order(size));
+
+ return NULL;
}
/*
@@ -1245,6 +1560,8 @@ static void free_coherent(struct device *dev, size_t size,
struct protection_domain *domain;
u16 devid;
+ INC_STATS_COUNTER(cnt_free_coherent);
+
if (!check_device(dev))
return;
@@ -1253,6 +1570,9 @@ static void free_coherent(struct device *dev, size_t size,
if (!iommu || !domain)
goto free_mem;
+ if (!dma_ops_domain(domain))
+ goto free_mem;
+
spin_lock_irqsave(&domain->lock, flags);
__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
@@ -1296,7 +1616,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
* we don't need to preallocate the protection domains anymore.
* For now we have to.
*/
-void prealloc_protection_domains(void)
+static void prealloc_protection_domains(void)
{
struct pci_dev *dev = NULL;
struct dma_ops_domain *dma_dom;
@@ -1305,7 +1625,7 @@ void prealloc_protection_domains(void)
u16 devid;
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
- devid = (dev->bus->number << 8) | dev->devfn;
+ devid = calc_devid(dev->bus->number, dev->devfn);
if (devid > amd_iommu_last_bdf)
continue;
devid = amd_iommu_alias_table[devid];
@@ -1352,6 +1672,7 @@ int __init amd_iommu_init_dma_ops(void)
iommu->default_dom = dma_ops_domain_alloc(iommu, order);
if (iommu->default_dom == NULL)
return -ENOMEM;
+ iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
ret = iommu_init_unity_mappings(iommu);
if (ret)
goto free_domains;
@@ -1375,6 +1696,12 @@ int __init amd_iommu_init_dma_ops(void)
/* Make the driver finally visible to the drivers */
dma_ops = &amd_iommu_dma_ops;
+ register_iommu(&amd_iommu_ops);
+
+ bus_register_notifier(&pci_bus_type, &device_nb);
+
+ amd_iommu_stats_init();
+
return 0;
free_domains:
@@ -1386,3 +1713,224 @@ free_domains:
return ret;
}
+
+/*****************************************************************************
+ *
+ * The following functions belong to the exported interface of AMD IOMMU
+ *
+ * This interface allows access to lower level functions of the IOMMU
+ * like protection domain handling and assignement of devices to domains
+ * which is not possible with the dma_ops interface.
+ *
+ *****************************************************************************/
+
+static void cleanup_domain(struct protection_domain *domain)
+{
+ unsigned long flags;
+ u16 devid;
+
+ write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+
+ for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
+ if (amd_iommu_pd_table[devid] == domain)
+ __detach_device(domain, devid);
+
+ write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+}
+
+static int amd_iommu_domain_init(struct iommu_domain *dom)
+{
+ struct protection_domain *domain;
+
+ domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+ if (!domain)
+ return -ENOMEM;
+
+ spin_lock_init(&domain->lock);
+ domain->mode = PAGE_MODE_3_LEVEL;
+ domain->id = domain_id_alloc();
+ if (!domain->id)
+ goto out_free;
+ domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!domain->pt_root)
+ goto out_free;
+
+ dom->priv = domain;
+
+ return 0;
+
+out_free:
+ kfree(domain);
+
+ return -ENOMEM;
+}
+
+static void amd_iommu_domain_destroy(struct iommu_domain *dom)
+{
+ struct protection_domain *domain = dom->priv;
+
+ if (!domain)
+ return;
+
+ if (domain->dev_cnt > 0)
+ cleanup_domain(domain);
+
+ BUG_ON(domain->dev_cnt != 0);
+
+ free_pagetable(domain);
+
+ domain_id_free(domain->id);
+
+ kfree(domain);
+
+ dom->priv = NULL;
+}
+
+static void amd_iommu_detach_device(struct iommu_domain *dom,
+ struct device *dev)
+{
+ struct protection_domain *domain = dom->priv;
+ struct amd_iommu *iommu;
+ struct pci_dev *pdev;
+ u16 devid;
+
+ if (dev->bus != &pci_bus_type)
+ return;
+
+ pdev = to_pci_dev(dev);
+
+ devid = calc_devid(pdev->bus->number, pdev->devfn);
+
+ if (devid > 0)
+ detach_device(domain, devid);
+
+ iommu = amd_iommu_rlookup_table[devid];
+ if (!iommu)
+ return;
+
+ iommu_queue_inv_dev_entry(iommu, devid);
+ iommu_completion_wait(iommu);
+}
+
+static int amd_iommu_attach_device(struct iommu_domain *dom,
+ struct device *dev)
+{
+ struct protection_domain *domain = dom->priv;
+ struct protection_domain *old_domain;
+ struct amd_iommu *iommu;
+ struct pci_dev *pdev;
+ u16 devid;
+
+ if (dev->bus != &pci_bus_type)
+ return -EINVAL;
+
+ pdev = to_pci_dev(dev);
+
+ devid = calc_devid(pdev->bus->number, pdev->devfn);
+
+ if (devid >= amd_iommu_last_bdf ||
+ devid != amd_iommu_alias_table[devid])
+ return -EINVAL;
+
+ iommu = amd_iommu_rlookup_table[devid];
+ if (!iommu)
+ return -EINVAL;
+
+ old_domain = domain_for_device(devid);
+ if (old_domain)
+ return -EBUSY;
+
+ attach_device(iommu, domain, devid);
+
+ iommu_completion_wait(iommu);
+
+ return 0;
+}
+
+static int amd_iommu_map_range(struct iommu_domain *dom,
+ unsigned long iova, phys_addr_t paddr,
+ size_t size, int iommu_prot)
+{
+ struct protection_domain *domain = dom->priv;
+ unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE);
+ int prot = 0;
+ int ret;
+
+ if (iommu_prot & IOMMU_READ)
+ prot |= IOMMU_PROT_IR;
+ if (iommu_prot & IOMMU_WRITE)
+ prot |= IOMMU_PROT_IW;
+
+ iova &= PAGE_MASK;
+ paddr &= PAGE_MASK;
+
+ for (i = 0; i < npages; ++i) {
+ ret = iommu_map_page(domain, iova, paddr, prot);
+ if (ret)
+ return ret;
+
+ iova += PAGE_SIZE;
+ paddr += PAGE_SIZE;
+ }
+
+ return 0;
+}
+
+static void amd_iommu_unmap_range(struct iommu_domain *dom,
+ unsigned long iova, size_t size)
+{
+
+ struct protection_domain *domain = dom->priv;
+ unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE);
+
+ iova &= PAGE_MASK;
+
+ for (i = 0; i < npages; ++i) {
+ iommu_unmap_page(domain, iova);
+ iova += PAGE_SIZE;
+ }
+
+ iommu_flush_domain(domain->id);
+}
+
+static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
+ unsigned long iova)
+{
+ struct protection_domain *domain = dom->priv;
+ unsigned long offset = iova & ~PAGE_MASK;
+ phys_addr_t paddr;
+ u64 *pte;
+
+ pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
+
+ if (!IOMMU_PTE_PRESENT(*pte))
+ return 0;
+
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
+
+ if (!IOMMU_PTE_PRESENT(*pte))
+ return 0;
+
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
+
+ if (!IOMMU_PTE_PRESENT(*pte))
+ return 0;
+
+ paddr = *pte & IOMMU_PAGE_MASK;
+ paddr |= offset;
+
+ return paddr;
+}
+
+static struct iommu_ops amd_iommu_ops = {
+ .domain_init = amd_iommu_domain_init,
+ .domain_destroy = amd_iommu_domain_destroy,
+ .attach_dev = amd_iommu_attach_device,
+ .detach_dev = amd_iommu_detach_device,
+ .map = amd_iommu_map_range,
+ .unmap = amd_iommu_unmap_range,
+ .iova_to_phys = amd_iommu_iova_to_phys,
+};
+
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c625800c55ca..42c33cebf00f 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -122,7 +122,8 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
we find in ACPI */
unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
-int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */
+bool amd_iommu_isolate = true; /* if true, device isolation is
+ enabled */
bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -243,20 +244,16 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
}
/* Function to enable the hardware */
-void __init iommu_enable(struct amd_iommu *iommu)
+static void __init iommu_enable(struct amd_iommu *iommu)
{
- printk(KERN_INFO "AMD IOMMU: Enabling IOMMU "
- "at %02x:%02x.%x cap 0x%hx\n",
- iommu->dev->bus->number,
- PCI_SLOT(iommu->dev->devfn),
- PCI_FUNC(iommu->dev->devfn),
- iommu->cap_ptr);
+ printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
+ dev_name(&iommu->dev->dev), iommu->cap_ptr);
iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
}
/* Function to enable IOMMU event logging and event interrupts */
-void __init iommu_enable_event_logging(struct amd_iommu *iommu)
+static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
{
iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
@@ -1218,9 +1215,9 @@ static int __init parse_amd_iommu_options(char *str)
{
for (; *str; ++str) {
if (strncmp(str, "isolate", 7) == 0)
- amd_iommu_isolate = 1;
+ amd_iommu_isolate = true;
if (strncmp(str, "share", 5) == 0)
- amd_iommu_isolate = 0;
+ amd_iommu_isolate = false;
if (strncmp(str, "fullflush", 9) == 0)
amd_iommu_unmap_flush = true;
}
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index b5229affb953..b13d3c4dbd42 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -98,8 +98,8 @@ __setup("apicpmtimer", setup_apicpmtimer);
#ifdef HAVE_X2APIC
int x2apic;
/* x2apic enabled before OS handover */
-int x2apic_preenabled;
-int disable_x2apic;
+static int x2apic_preenabled;
+static int disable_x2apic;
static __init int setup_nox2apic(char *str)
{
disable_x2apic = 1;
@@ -119,8 +119,6 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
int first_system_vector = 0xfe;
-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
-
/*
* Debug level, exported for io_apic.c
*/
@@ -142,7 +140,7 @@ static int lapic_next_event(unsigned long delta,
struct clock_event_device *evt);
static void lapic_timer_setup(enum clock_event_mode mode,
struct clock_event_device *evt);
-static void lapic_timer_broadcast(cpumask_t mask);
+static void lapic_timer_broadcast(const struct cpumask *mask);
static void apic_pm_activate(void);
/*
@@ -228,7 +226,7 @@ void xapic_icr_write(u32 low, u32 id)
apic_write(APIC_ICR, low);
}
-u64 xapic_icr_read(void)
+static u64 xapic_icr_read(void)
{
u32 icr1, icr2;
@@ -268,7 +266,7 @@ void x2apic_icr_write(u32 low, u32 id)
wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
}
-u64 x2apic_icr_read(void)
+static u64 x2apic_icr_read(void)
{
unsigned long val;
@@ -455,7 +453,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
/*
* Local APIC timer broadcast function
*/
-static void lapic_timer_broadcast(cpumask_t mask)
+static void lapic_timer_broadcast(const struct cpumask *mask)
{
#ifdef CONFIG_SMP
send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
@@ -471,7 +469,7 @@ static void __cpuinit setup_APIC_timer(void)
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
memcpy(levt, &lapic_clockevent, sizeof(*levt));
- levt->cpumask = cpumask_of_cpu(smp_processor_id());
+ levt->cpumask = cpumask_of(smp_processor_id());
clockevents_register_device(levt);
}
@@ -1807,28 +1805,32 @@ void disconnect_bsp_APIC(int virt_wire_setup)
void __cpuinit generic_processor_info(int apicid, int version)
{
int cpu;
- cpumask_t tmp_map;
/*
* Validate version
*/
if (version == 0x0) {
pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
- "fixing up to 0x10. (tell your hw vendor)\n",
- version);
+ "fixing up to 0x10. (tell your hw vendor)\n",
+ version);
version = 0x10;
}
apic_version[apicid] = version;
- if (num_processors >= NR_CPUS) {
- pr_warning("WARNING: NR_CPUS limit of %i reached."
- " Processor ignored.\n", NR_CPUS);
+ if (num_processors >= nr_cpu_ids) {
+ int max = nr_cpu_ids;
+ int thiscpu = max + disabled_cpus;
+
+ pr_warning(
+ "ACPI: NR_CPUS/possible_cpus limit of %i reached."
+ " Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
+
+ disabled_cpus++;
return;
}
num_processors++;
- cpus_complement(tmp_map, cpu_present_map);
- cpu = first_cpu(tmp_map);
+ cpu = cpumask_next_zero(-1, cpu_present_mask);
physid_set(apicid, phys_cpu_present_map);
if (apicid == boot_cpu_physical_apicid) {
@@ -1878,8 +1880,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
}
#endif
- cpu_set(cpu, cpu_possible_map);
- cpu_set(cpu, cpu_present_map);
+ set_cpu_possible(cpu, true);
+ set_cpu_present(cpu, true);
}
#ifdef CONFIG_X86_64
@@ -2081,7 +2083,7 @@ __cpuinit int apic_is_clustered_box(void)
bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
/* are we being called early in kernel startup? */
if (bios_cpu_apicid) {
id = bios_cpu_apicid[i];
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 2a0a2a3cac26..f63882728d91 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -25,7 +25,7 @@
#include <asm/uv/bios.h>
#include <asm/uv/uv_hub.h>
-struct uv_systab uv_systab;
+static struct uv_systab uv_systab;
s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
{
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 42e0853030cb..3f95a40f718a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -355,7 +355,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
} else if (smp_num_siblings > 1) {
- if (smp_num_siblings > NR_CPUS) {
+ if (smp_num_siblings > nr_cpu_ids) {
printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
smp_num_siblings);
smp_num_siblings = 1;
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 88ea02dcb622..28102ad1a363 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -517,6 +517,17 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
}
}
+static void free_acpi_perf_data(void)
+{
+ unsigned int i;
+
+ /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
+ for_each_possible_cpu(i)
+ free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
+ ->shared_cpu_map);
+ free_percpu(acpi_perf_data);
+}
+
/*
* acpi_cpufreq_early_init - initialize ACPI P-States library
*
@@ -527,6 +538,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
*/
static int __init acpi_cpufreq_early_init(void)
{
+ unsigned int i;
dprintk("acpi_cpufreq_early_init\n");
acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
@@ -534,6 +546,16 @@ static int __init acpi_cpufreq_early_init(void)
dprintk("Memory allocation error for acpi_perf_data.\n");
return -ENOMEM;
}
+ for_each_possible_cpu(i) {
+ if (!alloc_cpumask_var_node(
+ &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
+ GFP_KERNEL, cpu_to_node(i))) {
+
+ /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
+ free_acpi_perf_data();
+ return -ENOMEM;
+ }
+ }
/* Do initialization in ACPI core */
acpi_processor_preregister_performance(acpi_perf_data);
@@ -604,9 +626,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
*/
if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
- policy->cpus = perf->shared_cpu_map;
+ cpumask_copy(&policy->cpus, perf->shared_cpu_map);
}
- policy->related_cpus = perf->shared_cpu_map;
+ cpumask_copy(&policy->related_cpus, perf->shared_cpu_map);
#ifdef CONFIG_SMP
dmi_check_system(sw_any_bug_dmi_table);
@@ -795,7 +817,7 @@ static int __init acpi_cpufreq_init(void)
ret = cpufreq_register_driver(&acpi_cpufreq_driver);
if (ret)
- free_percpu(acpi_perf_data);
+ free_acpi_perf_data();
return ret;
}
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index b8e05ee4f736..beea4466b063 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -160,6 +160,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
switch (c->x86_model) {
case 0x0E: /* Core */
case 0x0F: /* Core Duo */
+ case 0x16: /* Celeron Core */
p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE);
case 0x0D: /* Pentium M (Dothan) */
@@ -171,7 +172,9 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
}
if (c->x86 != 0xF) {
- printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n");
+ if (!cpu_has(c, X86_FEATURE_EST))
+ printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. "
+ "Please send an e-mail to <cpufreq@vger.kernel.org>\n");
return 0;
}
@@ -274,6 +277,7 @@ static struct cpufreq_driver p4clockmod_driver = {
.name = "p4-clockmod",
.owner = THIS_MODULE,
.attr = p4clockmod_attr,
+ .hide_interface = 1,
};
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 7c7d56b43136..1b446d79a8fd 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -310,6 +310,12 @@ static int powernow_acpi_init(void)
goto err0;
}
+ if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
+ GFP_KERNEL)) {
+ retval = -ENOMEM;
+ goto err05;
+ }
+
if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
retval = -EIO;
goto err1;
@@ -412,6 +418,8 @@ static int powernow_acpi_init(void)
err2:
acpi_processor_unregister_performance(acpi_processor_perf, 0);
err1:
+ free_cpumask_var(acpi_processor_perf->shared_cpu_map);
+err05:
kfree(acpi_processor_perf);
err0:
printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n");
@@ -652,6 +660,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) {
#ifdef CONFIG_X86_POWERNOW_K7_ACPI
if (acpi_processor_perf) {
acpi_processor_unregister_performance(acpi_processor_perf, 0);
+ free_cpumask_var(acpi_processor_perf->shared_cpu_map);
kfree(acpi_processor_perf);
}
#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 7f05f44b97e9..c3c9adbaa26f 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -766,7 +766,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned
static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
{
struct cpufreq_frequency_table *powernow_table;
- int ret_val;
+ int ret_val = -ENODEV;
if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
dprintk("register performance failed: bad ACPI data\n");
@@ -815,6 +815,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
/* notify BIOS that we exist */
acpi_processor_notify_smm(THIS_MODULE);
+ if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
+ printk(KERN_ERR PFX
+ "unable to alloc powernow_k8_data cpumask\n");
+ ret_val = -ENOMEM;
+ goto err_out_mem;
+ }
+
return 0;
err_out_mem:
@@ -826,7 +833,7 @@ err_out:
/* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
data->acpi_data.state_count = 0;
- return -ENODEV;
+ return ret_val;
}
static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
@@ -929,6 +936,7 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
{
if (data->acpi_data.state_count)
acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+ free_cpumask_var(data->acpi_data.shared_cpu_map);
}
#else
@@ -1134,7 +1142,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
data->cpu = pol->cpu;
data->currpstate = HW_PSTATE_INVALID;
- if (powernow_k8_cpu_init_acpi(data)) {
+ rc = powernow_k8_cpu_init_acpi(data);
+ if (rc) {
/*
* Use the PSB BIOS structure. This is only availabe on
* an UP version, and is deprecated by AMD.
@@ -1152,20 +1161,17 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
"ACPI maintainers and complain to your BIOS "
"vendor.\n");
#endif
- kfree(data);
- return -ENODEV;
+ goto err_out;
}
if (pol->cpu != 0) {
printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
"CPU other than CPU0. Complain to your BIOS "
"vendor.\n");
- kfree(data);
- return -ENODEV;
+ goto err_out;
}
rc = find_psb_table(data);
if (rc) {
- kfree(data);
- return -ENODEV;
+ goto err_out;
}
}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 3b5f06423e77..f0ea6fa2f53c 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -459,9 +459,7 @@ static int centrino_verify (struct cpufreq_policy *policy)
* Sets a new CPUFreq policy.
*/
struct allmasks {
- cpumask_t online_policy_cpus;
cpumask_t saved_mask;
- cpumask_t set_mask;
cpumask_t covered_cpus;
};
@@ -475,9 +473,7 @@ static int centrino_target (struct cpufreq_policy *policy,
int retval = 0;
unsigned int j, k, first_cpu, tmp;
CPUMASK_ALLOC(allmasks);
- CPUMASK_PTR(online_policy_cpus, allmasks);
CPUMASK_PTR(saved_mask, allmasks);
- CPUMASK_PTR(set_mask, allmasks);
CPUMASK_PTR(covered_cpus, allmasks);
if (unlikely(allmasks == NULL))
@@ -497,30 +493,28 @@ static int centrino_target (struct cpufreq_policy *policy,
goto out;
}
-#ifdef CONFIG_HOTPLUG_CPU
- /* cpufreq holds the hotplug lock, so we are safe from here on */
- cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus);
-#else
- *online_policy_cpus = policy->cpus;
-#endif
-
*saved_mask = current->cpus_allowed;
first_cpu = 1;
cpus_clear(*covered_cpus);
- for_each_cpu_mask_nr(j, *online_policy_cpus) {
+ for_each_cpu_mask_nr(j, policy->cpus) {
+ const cpumask_t *mask;
+
+ /* cpufreq holds the hotplug lock, so we are safe here */
+ if (!cpu_online(j))
+ continue;
+
/*
* Support for SMP systems.
* Make sure we are running on CPU that wants to change freq
*/
- cpus_clear(*set_mask);
if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
- cpus_or(*set_mask, *set_mask, *online_policy_cpus);
+ mask = &policy->cpus;
else
- cpu_set(j, *set_mask);
+ mask = &cpumask_of_cpu(j);
- set_cpus_allowed_ptr(current, set_mask);
+ set_cpus_allowed_ptr(current, mask);
preempt_disable();
- if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) {
+ if (unlikely(!cpu_isset(smp_processor_id(), *mask))) {
dprintk("couldn't limit to CPUs in this domain\n");
retval = -EAGAIN;
if (first_cpu) {
@@ -548,7 +542,9 @@ static int centrino_target (struct cpufreq_policy *policy,
dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
target_freq, freqs.old, freqs.new, msr);
- for_each_cpu_mask_nr(k, *online_policy_cpus) {
+ for_each_cpu_mask_nr(k, policy->cpus) {
+ if (!cpu_online(k))
+ continue;
freqs.cpu = k;
cpufreq_notify_transition(&freqs,
CPUFREQ_PRECHANGE);
@@ -571,7 +567,9 @@ static int centrino_target (struct cpufreq_policy *policy,
preempt_enable();
}
- for_each_cpu_mask_nr(k, *online_policy_cpus) {
+ for_each_cpu_mask_nr(k, policy->cpus) {
+ if (!cpu_online(k))
+ continue;
freqs.cpu = k;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
@@ -584,18 +582,17 @@ static int centrino_target (struct cpufreq_policy *policy,
* Best effort undo..
*/
- if (!cpus_empty(*covered_cpus))
- for_each_cpu_mask_nr(j, *covered_cpus) {
- set_cpus_allowed_ptr(current,
- &cpumask_of_cpu(j));
- wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
- }
+ for_each_cpu_mask_nr(j, *covered_cpus) {
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(j));
+ wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+ }
tmp = freqs.new;
freqs.new = freqs.old;
freqs.old = tmp;
- for_each_cpu_mask_nr(j, *online_policy_cpus) {
- freqs.cpu = j;
+ for_each_cpu_mask_nr(j, policy->cpus) {
+ if (!cpu_online(j))
+ continue;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index 98d4fdb7dc04..cdac7d62369b 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -139,6 +139,15 @@ static unsigned int pentium_core_get_frequency(void)
case 3:
fsb = 166667;
break;
+ case 2:
+ fsb = 200000;
+ break;
+ case 0:
+ fsb = 266667;
+ break;
+ case 4:
+ fsb = 333333;
+ break;
default:
printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value");
}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 68b5d8681cbb..48533d77be78 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -534,31 +534,16 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
per_cpu(cpuid4_info, cpu) = NULL;
}
-static int __cpuinit detect_cache_attributes(unsigned int cpu)
+static void __cpuinit get_cpu_leaves(void *_retval)
{
- struct _cpuid4_info *this_leaf;
- unsigned long j;
- int retval;
- cpumask_t oldmask;
-
- if (num_cache_leaves == 0)
- return -ENOENT;
-
- per_cpu(cpuid4_info, cpu) = kzalloc(
- sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
- if (per_cpu(cpuid4_info, cpu) == NULL)
- return -ENOMEM;
-
- oldmask = current->cpus_allowed;
- retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
- if (retval)
- goto out;
+ int j, *retval = _retval, cpu = smp_processor_id();
/* Do cpuid and store the results */
for (j = 0; j < num_cache_leaves; j++) {
+ struct _cpuid4_info *this_leaf;
this_leaf = CPUID4_INFO_IDX(cpu, j);
- retval = cpuid4_cache_lookup(j, this_leaf);
- if (unlikely(retval < 0)) {
+ *retval = cpuid4_cache_lookup(j, this_leaf);
+ if (unlikely(*retval < 0)) {
int i;
for (i = 0; i < j; i++)
@@ -567,9 +552,21 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
}
cache_shared_cpu_map_setup(cpu, j);
}
- set_cpus_allowed_ptr(current, &oldmask);
+}
+
+static int __cpuinit detect_cache_attributes(unsigned int cpu)
+{
+ int retval;
+
+ if (num_cache_leaves == 0)
+ return -ENOENT;
+
+ per_cpu(cpuid4_info, cpu) = kzalloc(
+ sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
+ if (per_cpu(cpuid4_info, cpu) == NULL)
+ return -ENOMEM;
-out:
+ smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
if (retval) {
kfree(per_cpu(cpuid4_info, cpu));
per_cpu(cpuid4_info, cpu) = NULL;
@@ -626,8 +623,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
cpumask_t *mask = &this_leaf->shared_cpu_map;
n = type?
- cpulist_scnprintf(buf, len-2, *mask):
- cpumask_scnprintf(buf, len-2, *mask);
+ cpulist_scnprintf(buf, len-2, mask) :
+ cpumask_scnprintf(buf, len-2, mask);
buf[n++] = '\n';
buf[n] = '\0';
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 748c8f9e7a05..a5a5e0530370 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -83,34 +83,41 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
* CPU Initialization
*/
+struct thresh_restart {
+ struct threshold_block *b;
+ int reset;
+ u16 old_limit;
+};
+
/* must be called with correct cpu affinity */
-static void threshold_restart_bank(struct threshold_block *b,
- int reset, u16 old_limit)
+static long threshold_restart_bank(void *_tr)
{
+ struct thresh_restart *tr = _tr;
u32 mci_misc_hi, mci_misc_lo;
- rdmsr(b->address, mci_misc_lo, mci_misc_hi);
+ rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
- if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
- reset = 1; /* limit cannot be lower than err count */
+ if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
+ tr->reset = 1; /* limit cannot be lower than err count */
- if (reset) { /* reset err count and overflow bit */
+ if (tr->reset) { /* reset err count and overflow bit */
mci_misc_hi =
(mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
- (THRESHOLD_MAX - b->threshold_limit);
- } else if (old_limit) { /* change limit w/o reset */
+ (THRESHOLD_MAX - tr->b->threshold_limit);
+ } else if (tr->old_limit) { /* change limit w/o reset */
int new_count = (mci_misc_hi & THRESHOLD_MAX) +
- (old_limit - b->threshold_limit);
+ (tr->old_limit - tr->b->threshold_limit);
mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
(new_count & THRESHOLD_MAX);
}
- b->interrupt_enable ?
+ tr->b->interrupt_enable ?
(mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
(mci_misc_hi &= ~MASK_INT_TYPE_HI);
mci_misc_hi |= MASK_COUNT_EN_HI;
- wrmsr(b->address, mci_misc_lo, mci_misc_hi);
+ wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+ return 0;
}
/* cpu init entry point, called from mce.c with preempt off */
@@ -120,6 +127,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
unsigned int cpu = smp_processor_id();
u8 lvt_off;
u32 low = 0, high = 0, address = 0;
+ struct thresh_restart tr;
for (bank = 0; bank < NR_BANKS; ++bank) {
for (block = 0; block < NR_BLOCKS; ++block) {
@@ -162,7 +170,10 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
wrmsr(address, low, high);
threshold_defaults.address = address;
- threshold_restart_bank(&threshold_defaults, 0, 0);
+ tr.b = &threshold_defaults;
+ tr.reset = 0;
+ tr.old_limit = 0;
+ threshold_restart_bank(&tr);
}
}
}
@@ -251,20 +262,6 @@ struct threshold_attr {
ssize_t(*store) (struct threshold_block *, const char *, size_t count);
};
-static void affinity_set(unsigned int cpu, cpumask_t *oldmask,
- cpumask_t *newmask)
-{
- *oldmask = current->cpus_allowed;
- cpus_clear(*newmask);
- cpu_set(cpu, *newmask);
- set_cpus_allowed_ptr(current, newmask);
-}
-
-static void affinity_restore(const cpumask_t *oldmask)
-{
- set_cpus_allowed_ptr(current, oldmask);
-}
-
#define SHOW_FIELDS(name) \
static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
{ \
@@ -277,15 +274,16 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
const char *buf, size_t count)
{
char *end;
- cpumask_t oldmask, newmask;
+ struct thresh_restart tr;
unsigned long new = simple_strtoul(buf, &end, 0);
if (end == buf)
return -EINVAL;
b->interrupt_enable = !!new;
- affinity_set(b->cpu, &oldmask, &newmask);
- threshold_restart_bank(b, 0, 0);
- affinity_restore(&oldmask);
+ tr.b = b;
+ tr.reset = 0;
+ tr.old_limit = 0;
+ work_on_cpu(b->cpu, threshold_restart_bank, &tr);
return end - buf;
}
@@ -294,8 +292,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
const char *buf, size_t count)
{
char *end;
- cpumask_t oldmask, newmask;
- u16 old;
+ struct thresh_restart tr;
unsigned long new = simple_strtoul(buf, &end, 0);
if (end == buf)
return -EINVAL;
@@ -303,34 +300,36 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
new = THRESHOLD_MAX;
if (new < 1)
new = 1;
- old = b->threshold_limit;
+ tr.old_limit = b->threshold_limit;
b->threshold_limit = new;
+ tr.b = b;
+ tr.reset = 0;
- affinity_set(b->cpu, &oldmask, &newmask);
- threshold_restart_bank(b, 0, old);
- affinity_restore(&oldmask);
+ work_on_cpu(b->cpu, threshold_restart_bank, &tr);
return end - buf;
}
-static ssize_t show_error_count(struct threshold_block *b, char *buf)
+static long local_error_count(void *_b)
{
- u32 high, low;
- cpumask_t oldmask, newmask;
- affinity_set(b->cpu, &oldmask, &newmask);
+ struct threshold_block *b = _b;
+ u32 low, high;
+
rdmsr(b->address, low, high);
- affinity_restore(&oldmask);
- return sprintf(buf, "%x\n",
- (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
+ return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
+}
+
+static ssize_t show_error_count(struct threshold_block *b, char *buf)
+{
+ return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
}
static ssize_t store_error_count(struct threshold_block *b,
const char *buf, size_t count)
{
- cpumask_t oldmask, newmask;
- affinity_set(b->cpu, &oldmask, &newmask);
- threshold_restart_bank(b, 1, 0);
- affinity_restore(&oldmask);
+ struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
+
+ work_on_cpu(b->cpu, threshold_restart_bank, &tr);
return 1;
}
@@ -463,12 +462,19 @@ out_free:
return err;
}
+static long local_allocate_threshold_blocks(void *_bank)
+{
+ unsigned int *bank = _bank;
+
+ return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
+ MSR_IA32_MC0_MISC + *bank * 4);
+}
+
/* symlinks sibling shared banks to first core. first core owns dir/files. */
static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
{
int i, err = 0;
struct threshold_bank *b = NULL;
- cpumask_t oldmask, newmask;
char name[32];
sprintf(name, "threshold_bank%i", bank);
@@ -519,11 +525,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
per_cpu(threshold_banks, cpu)[bank] = b;
- affinity_set(cpu, &oldmask, &newmask);
- err = allocate_threshold_blocks(cpu, bank, 0,
- MSR_IA32_MC0_MISC + bank * 4);
- affinity_restore(&oldmask);
-
+ err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
if (err)
goto out_free;
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 4e8d77f01eeb..b59ddcc88cd8 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,14 +14,6 @@
#include <asm/pat.h>
#include "mtrr.h"
-struct mtrr_state {
- struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
- mtrr_type fixed_ranges[NUM_FIXED_RANGES];
- unsigned char enabled;
- unsigned char have_fixed;
- mtrr_type def_type;
-};
-
struct fixed_range_block {
int base_msr; /* start address of an MTRR block */
int ranges; /* number of MTRRs in this block */
@@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = {
};
static unsigned long smp_changes_mask;
-static struct mtrr_state mtrr_state = {};
static int mtrr_state_set;
u64 mtrr_tom2;
+struct mtrr_state_type mtrr_state = {};
+EXPORT_SYMBOL_GPL(mtrr_state);
+
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "mtrr."
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 1159e269e596..d259e5d2e054 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -49,7 +49,7 @@
u32 num_var_ranges = 0;
-unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
static DEFINE_MUTEX(mtrr_mutex);
u64 size_or_mask, size_and_mask;
@@ -574,7 +574,7 @@ struct mtrr_value {
unsigned long lsize;
};
-static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
+static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
{
@@ -824,16 +824,14 @@ static int enable_mtrr_cleanup __initdata =
static int __init disable_mtrr_cleanup_setup(char *str)
{
- if (enable_mtrr_cleanup != -1)
- enable_mtrr_cleanup = 0;
+ enable_mtrr_cleanup = 0;
return 0;
}
early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
static int __init enable_mtrr_cleanup_setup(char *str)
{
- if (enable_mtrr_cleanup != -1)
- enable_mtrr_cleanup = 1;
+ enable_mtrr_cleanup = 1;
return 0;
}
early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2dc4ec656b23..ffd60409cc6d 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -8,11 +8,6 @@
#define MTRRcap_MSR 0x0fe
#define MTRRdefType_MSR 0x2ff
-#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
-#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
-
-#define NUM_FIXED_RANGES 88
-#define MAX_VAR_RANGES 256
#define MTRRfix64K_00000_MSR 0x250
#define MTRRfix16K_80000_MSR 0x258
#define MTRRfix16K_A0000_MSR 0x259
@@ -29,11 +24,7 @@
#define MTRR_CHANGE_MASK_VARIABLE 0x02
#define MTRR_CHANGE_MASK_DEFTYPE 0x04
-/* In the Intel processor's MTRR interface, the MTRR type is always held in
- an 8 bit field: */
-typedef u8 mtrr_type;
-
-extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
struct mtrr_ops {
u32 vendor;
@@ -70,13 +61,6 @@ struct set_mtrr_context {
u32 ccr3;
};
-struct mtrr_var_range {
- u32 base_lo;
- u32 base_hi;
- u32 mask_lo;
- u32 mask_hi;
-};
-
void set_mtrr_done(struct set_mtrr_context *ctxt);
void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 72cefd1e649b..2ac1f0c2beb3 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -39,10 +39,10 @@
#include <linux/device.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
+#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/msr.h>
-#include <asm/uaccess.h>
#include <asm/system.h>
static struct class *cpuid_class;
@@ -82,7 +82,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
}
static ssize_t cpuid_read(struct file *file, char __user *buf,
- size_t count, loff_t * ppos)
+ size_t count, loff_t *ppos)
{
char __user *tmp = buf;
struct cpuid_regs cmd;
@@ -117,11 +117,11 @@ static int cpuid_open(struct inode *inode, struct file *file)
unsigned int cpu;
struct cpuinfo_x86 *c;
int ret = 0;
-
+
lock_kernel();
cpu = iminor(file->f_path.dentry->d_inode);
- if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+ if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
ret = -ENXIO; /* No such CPU */
goto out;
}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index d84a852e4cd7..c689d19e35ab 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -26,6 +26,7 @@
#include <linux/kdebug.h>
#include <asm/smp.h>
#include <asm/reboot.h>
+#include <asm/virtext.h>
#include <mach_ipi.h>
@@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args)
#endif
crash_save_cpu(regs, cpu);
+ /* Disable VMX or SVM if needed.
+ *
+ * We need to disable virtualization on all CPUs.
+ * Having VMX or SVM enabled on any CPU may break rebooting
+ * after the kdump kernel has finished its task.
+ */
+ cpu_emergency_vmxoff();
+ cpu_emergency_svm_disable();
+
disable_local_APIC();
}
@@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
local_irq_disable();
kdump_nmi_shootdown_cpus();
+
+ /* Booting kdump kernel with VMX or SVM enabled won't work,
+ * because (among other limitations) we can't disable paging
+ * with the virt flags.
+ */
+ cpu_emergency_vmxoff();
+ cpu_emergency_svm_disable();
+
lapic_shutdown();
#if defined(CONFIG_X86_IO_APIC)
disable_IO_APIC();
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 23b138e31e9c..504ad198e4ad 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -886,7 +886,7 @@ asmlinkage void early_printk(const char *fmt, ...)
va_list ap;
va_start(ap, fmt);
- n = vscnprintf(buf, 512, fmt, ap);
+ n = vscnprintf(buf, sizeof(buf), fmt, ap);
early_console->write(early_console, buf, n);
va_end(ap);
}
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index c0262791bda4..34185488e4fb 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return 1;
}
-static cpumask_t flat_target_cpus(void)
+static const struct cpumask *flat_target_cpus(void)
{
- return cpu_online_map;
+ return cpu_online_mask;
}
-static cpumask_t flat_vector_allocation_domain(int cpu)
+static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
@@ -45,8 +45,8 @@ static cpumask_t flat_vector_allocation_domain(int cpu)
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
- cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
- return domain;
+ cpumask_clear(retmask);
+ cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
}
/*
@@ -69,9 +69,8 @@ static void flat_init_apic_ldr(void)
apic_write(APIC_LDR, val);
}
-static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
+static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
{
- unsigned long mask = cpus_addr(cpumask)[0];
unsigned long flags;
local_irq_save(flags);
@@ -79,20 +78,41 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
local_irq_restore(flags);
}
+static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
+{
+ unsigned long mask = cpumask_bits(cpumask)[0];
+
+ _flat_send_IPI_mask(mask, vector);
+}
+
+static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
+ int vector)
+{
+ unsigned long mask = cpumask_bits(cpumask)[0];
+ int cpu = smp_processor_id();
+
+ if (cpu < BITS_PER_LONG)
+ clear_bit(cpu, &mask);
+ _flat_send_IPI_mask(mask, vector);
+}
+
static void flat_send_IPI_allbutself(int vector)
{
+ int cpu = smp_processor_id();
#ifdef CONFIG_HOTPLUG_CPU
int hotplug = 1;
#else
int hotplug = 0;
#endif
if (hotplug || vector == NMI_VECTOR) {
- cpumask_t allbutme = cpu_online_map;
+ if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
+ unsigned long mask = cpumask_bits(cpu_online_mask)[0];
- cpu_clear(smp_processor_id(), allbutme);
+ if (cpu < BITS_PER_LONG)
+ clear_bit(cpu, &mask);
- if (!cpus_empty(allbutme))
- flat_send_IPI_mask(allbutme, vector);
+ _flat_send_IPI_mask(mask, vector);
+ }
} else if (num_online_cpus() > 1) {
__send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
}
@@ -101,7 +121,7 @@ static void flat_send_IPI_allbutself(int vector)
static void flat_send_IPI_all(int vector)
{
if (vector == NMI_VECTOR)
- flat_send_IPI_mask(cpu_online_map, vector);
+ flat_send_IPI_mask(cpu_online_mask, vector);
else
__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
}
@@ -135,9 +155,18 @@ static int flat_apic_id_registered(void)
return physid_isset(read_xapic_id(), phys_cpu_present_map);
}
-static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+}
+
+static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
{
- return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
+ unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+ unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
+
+ return mask1 & mask2;
}
static unsigned int phys_pkg_id(int index_msb)
@@ -157,8 +186,10 @@ struct genapic apic_flat = {
.send_IPI_all = flat_send_IPI_all,
.send_IPI_allbutself = flat_send_IPI_allbutself,
.send_IPI_mask = flat_send_IPI_mask,
+ .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
.send_IPI_self = apic_send_IPI_self,
.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
@@ -188,35 +219,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return 0;
}
-static cpumask_t physflat_target_cpus(void)
+static const struct cpumask *physflat_target_cpus(void)
{
- return cpu_online_map;
+ return cpu_online_mask;
}
-static cpumask_t physflat_vector_allocation_domain(int cpu)
+static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
- return cpumask_of_cpu(cpu);
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
}
-static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
+static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
{
send_IPI_mask_sequence(cpumask, vector);
}
-static void physflat_send_IPI_allbutself(int vector)
+static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
+ int vector)
{
- cpumask_t allbutme = cpu_online_map;
+ send_IPI_mask_allbutself(cpumask, vector);
+}
- cpu_clear(smp_processor_id(), allbutme);
- physflat_send_IPI_mask(allbutme, vector);
+static void physflat_send_IPI_allbutself(int vector)
+{
+ send_IPI_mask_allbutself(cpu_online_mask, vector);
}
static void physflat_send_IPI_all(int vector)
{
- physflat_send_IPI_mask(cpu_online_map, vector);
+ physflat_send_IPI_mask(cpu_online_mask, vector);
}
-static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
int cpu;
@@ -224,13 +259,31 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
- cpu = first_cpu(cpumask);
+ cpu = cpumask_first(cpumask);
if ((unsigned)cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_apicid, cpu);
else
return BAD_APICID;
}
+static unsigned int
+physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask)
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ if (cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ return BAD_APICID;
+}
+
struct genapic apic_physflat = {
.name = "physical flat",
.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
@@ -243,8 +296,10 @@ struct genapic apic_physflat = {
.send_IPI_all = physflat_send_IPI_all,
.send_IPI_allbutself = physflat_send_IPI_allbutself,
.send_IPI_mask = physflat_send_IPI_mask,
+ .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
.send_IPI_self = apic_send_IPI_self,
.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index f6a2c8eb48a6..6ce497cc372d 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -22,19 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
-static cpumask_t x2apic_target_cpus(void)
+static const struct cpumask *x2apic_target_cpus(void)
{
- return cpumask_of_cpu(0);
+ return cpumask_of(0);
}
/*
* for now each logical cpu is in its own vector allocation domain.
*/
-static cpumask_t x2apic_vector_allocation_domain(int cpu)
+static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
- cpumask_t domain = CPU_MASK_NONE;
- cpu_set(cpu, domain);
- return domain;
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
}
static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -56,32 +55,53 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
* at once. We have 16 cpu's in a cluster. This will minimize IPI register
* writes.
*/
-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
{
unsigned long flags;
unsigned long query_cpu;
local_irq_save(flags);
- for_each_cpu_mask(query_cpu, mask) {
- __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
- vector, APIC_DEST_LOGICAL);
- }
+ for_each_cpu(query_cpu, mask)
+ __x2apic_send_IPI_dest(
+ per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, APIC_DEST_LOGICAL);
local_irq_restore(flags);
}
-static void x2apic_send_IPI_allbutself(int vector)
+static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
+ int vector)
{
- cpumask_t mask = cpu_online_map;
+ unsigned long flags;
+ unsigned long query_cpu;
+ unsigned long this_cpu = smp_processor_id();
- cpu_clear(smp_processor_id(), mask);
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask)
+ if (query_cpu != this_cpu)
+ __x2apic_send_IPI_dest(
+ per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, APIC_DEST_LOGICAL);
+ local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+ unsigned long flags;
+ unsigned long query_cpu;
+ unsigned long this_cpu = smp_processor_id();
- if (!cpus_empty(mask))
- x2apic_send_IPI_mask(mask, vector);
+ local_irq_save(flags);
+ for_each_online_cpu(query_cpu)
+ if (query_cpu != this_cpu)
+ __x2apic_send_IPI_dest(
+ per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, APIC_DEST_LOGICAL);
+ local_irq_restore(flags);
}
static void x2apic_send_IPI_all(int vector)
{
- x2apic_send_IPI_mask(cpu_online_map, vector);
+ x2apic_send_IPI_mask(cpu_online_mask, vector);
}
static int x2apic_apic_id_registered(void)
@@ -89,21 +109,38 @@ static int x2apic_apic_id_registered(void)
return 1;
}
-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
int cpu;
/*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * We're using fixed IRQ delivery, can only return one logical APIC ID.
* May as well be the first.
*/
- cpu = first_cpu(cpumask);
- if ((unsigned)cpu < NR_CPUS)
+ cpu = cpumask_first(cpumask);
+ if ((unsigned)cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_logical_apicid, cpu);
else
return BAD_APICID;
}
+static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one logical APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask)
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ if (cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_logical_apicid, cpu);
+ return BAD_APICID;
+}
+
static unsigned int get_apic_id(unsigned long x)
{
unsigned int id;
@@ -150,8 +187,10 @@ struct genapic apic_x2apic_cluster = {
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
.send_IPI_mask = x2apic_send_IPI_mask,
+ .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
.send_IPI_self = x2apic_send_IPI_self,
.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index d042211768b7..21bcc0e098ba 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
-static cpumask_t x2apic_target_cpus(void)
+static const struct cpumask *x2apic_target_cpus(void)
{
- return cpumask_of_cpu(0);
+ return cpumask_of(0);
}
-static cpumask_t x2apic_vector_allocation_domain(int cpu)
+static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
- cpumask_t domain = CPU_MASK_NONE;
- cpu_set(cpu, domain);
- return domain;
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
}
static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -54,32 +53,54 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
x2apic_icr_write(cfg, apicid);
}
-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
{
unsigned long flags;
unsigned long query_cpu;
local_irq_save(flags);
- for_each_cpu_mask(query_cpu, mask) {
+ for_each_cpu(query_cpu, mask) {
__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
vector, APIC_DEST_PHYSICAL);
}
local_irq_restore(flags);
}
-static void x2apic_send_IPI_allbutself(int vector)
+static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
+ int vector)
{
- cpumask_t mask = cpu_online_map;
+ unsigned long flags;
+ unsigned long query_cpu;
+ unsigned long this_cpu = smp_processor_id();
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask) {
+ if (query_cpu != this_cpu)
+ __x2apic_send_IPI_dest(
+ per_cpu(x86_cpu_to_apicid, query_cpu),
+ vector, APIC_DEST_PHYSICAL);
+ }
+ local_irq_restore(flags);
+}
- cpu_clear(smp_processor_id(), mask);
+static void x2apic_send_IPI_allbutself(int vector)
+{
+ unsigned long flags;
+ unsigned long query_cpu;
+ unsigned long this_cpu = smp_processor_id();
- if (!cpus_empty(mask))
- x2apic_send_IPI_mask(mask, vector);
+ local_irq_save(flags);
+ for_each_online_cpu(query_cpu)
+ if (query_cpu != this_cpu)
+ __x2apic_send_IPI_dest(
+ per_cpu(x86_cpu_to_apicid, query_cpu),
+ vector, APIC_DEST_PHYSICAL);
+ local_irq_restore(flags);
}
static void x2apic_send_IPI_all(int vector)
{
- x2apic_send_IPI_mask(cpu_online_map, vector);
+ x2apic_send_IPI_mask(cpu_online_mask, vector);
}
static int x2apic_apic_id_registered(void)
@@ -87,7 +108,7 @@ static int x2apic_apic_id_registered(void)
return 1;
}
-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
int cpu;
@@ -95,13 +116,30 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
- cpu = first_cpu(cpumask);
- if ((unsigned)cpu < NR_CPUS)
+ cpu = cpumask_first(cpumask);
+ if ((unsigned)cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_apicid, cpu);
else
return BAD_APICID;
}
+static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask)
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ if (cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ return BAD_APICID;
+}
+
static unsigned int get_apic_id(unsigned long x)
{
unsigned int id;
@@ -123,12 +161,12 @@ static unsigned int phys_pkg_id(int index_msb)
return current_cpu_data.initial_apicid >> index_msb;
}
-void x2apic_send_IPI_self(int vector)
+static void x2apic_send_IPI_self(int vector)
{
apic_write(APIC_SELF_IPI, vector);
}
-void init_x2apic_ldr(void)
+static void init_x2apic_ldr(void)
{
return;
}
@@ -145,8 +183,10 @@ struct genapic apic_x2apic_phys = {
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
.send_IPI_mask = x2apic_send_IPI_mask,
+ .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
.send_IPI_self = x2apic_send_IPI_self,
.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index dece17289731..b193e082f6ce 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -79,16 +79,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
-static cpumask_t uv_target_cpus(void)
+static const struct cpumask *uv_target_cpus(void)
{
- return cpumask_of_cpu(0);
+ return cpumask_of(0);
}
-static cpumask_t uv_vector_allocation_domain(int cpu)
+static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
- cpumask_t domain = CPU_MASK_NONE;
- cpu_set(cpu, domain);
- return domain;
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
}
int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
@@ -127,28 +126,37 @@ static void uv_send_IPI_one(int cpu, int vector)
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
}
-static void uv_send_IPI_mask(cpumask_t mask, int vector)
+static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
{
unsigned int cpu;
- for_each_possible_cpu(cpu)
- if (cpu_isset(cpu, mask))
+ for_each_cpu(cpu, mask)
+ uv_send_IPI_one(cpu, vector);
+}
+
+static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+ unsigned int cpu;
+ unsigned int this_cpu = smp_processor_id();
+
+ for_each_cpu(cpu, mask)
+ if (cpu != this_cpu)
uv_send_IPI_one(cpu, vector);
}
static void uv_send_IPI_allbutself(int vector)
{
- cpumask_t mask = cpu_online_map;
-
- cpu_clear(smp_processor_id(), mask);
+ unsigned int cpu;
+ unsigned int this_cpu = smp_processor_id();
- if (!cpus_empty(mask))
- uv_send_IPI_mask(mask, vector);
+ for_each_online_cpu(cpu)
+ if (cpu != this_cpu)
+ uv_send_IPI_one(cpu, vector);
}
static void uv_send_IPI_all(int vector)
{
- uv_send_IPI_mask(cpu_online_map, vector);
+ uv_send_IPI_mask(cpu_online_mask, vector);
}
static int uv_apic_id_registered(void)
@@ -160,7 +168,7 @@ static void uv_init_apic_ldr(void)
{
}
-static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
int cpu;
@@ -168,13 +176,30 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
- cpu = first_cpu(cpumask);
+ cpu = cpumask_first(cpumask);
if ((unsigned)cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_apicid, cpu);
else
return BAD_APICID;
}
+static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ for_each_cpu_and(cpu, cpumask, andmask)
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ if (cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ return BAD_APICID;
+}
+
static unsigned int get_apic_id(unsigned long x)
{
unsigned int id;
@@ -222,8 +247,10 @@ struct genapic apic_x2apic_uv_x = {
.send_IPI_all = uv_send_IPI_all,
.send_IPI_allbutself = uv_send_IPI_allbutself,
.send_IPI_mask = uv_send_IPI_mask,
+ .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
.send_IPI_self = uv_send_IPI_self,
.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 388e05a5fc17..b9a4d8c4b935 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -27,7 +27,7 @@
#include <asm/trampoline.h>
/* boot cpu pda */
-static struct x8664_pda _boot_cpu_pda __read_mostly;
+static struct x8664_pda _boot_cpu_pda;
#ifdef CONFIG_SMP
/*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 845ea097383e..cd759ad90690 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -248,7 +248,7 @@ static void hpet_legacy_clockevent_register(void)
* Start hpet with the boot cpu mask and make it
* global after the IO_APIC has been initialized.
*/
- hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+ hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
clockevents_register_device(&hpet_clockevent);
global_clock_event = &hpet_clockevent;
printk(KERN_DEBUG "hpet clockevent registered\n");
@@ -303,7 +303,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
hpet_setup_msi_irq(hdev->irq);
disable_irq(hdev->irq);
- irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+ irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
enable_irq(hdev->irq);
}
break;
@@ -451,7 +451,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
return -1;
disable_irq(dev->irq);
- irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+ irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
enable_irq(dev->irq);
printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
@@ -502,7 +502,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
/* 5 usec minimum reprogramming delta. */
evt->min_delta_ns = 5000;
- evt->cpumask = cpumask_of_cpu(hdev->cpu);
+ evt->cpumask = cpumask_of(hdev->cpu);
clockevents_register_device(evt);
}
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index c1b5e3ece1f2..10f92fb532f3 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -114,7 +114,7 @@ void __init setup_pit_timer(void)
* Start pit with the boot cpu mask and make it global after the
* IO_APIC has been initialized.
*/
- pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+ pit_clockevent.cpumask = cpumask_of(smp_processor_id());
pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
pit_clockevent.shift);
pit_clockevent.max_delta_ns =
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index d39918076bb4..df3bf269beab 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -10,7 +10,6 @@
#include <asm/pgtable.h>
#include <asm/desc.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index f6ea94b74da1..3639442aa7a4 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -136,8 +136,8 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
struct irq_cfg {
struct irq_pin_list *irq_2_pin;
- cpumask_t domain;
- cpumask_t old_domain;
+ cpumask_var_t domain;
+ cpumask_var_t old_domain;
unsigned move_cleanup_count;
u8 vector;
u8 move_in_progress : 1;
@@ -152,25 +152,25 @@ static struct irq_cfg irq_cfgx[] = {
#else
static struct irq_cfg irq_cfgx[NR_IRQS] = {
#endif
- [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
- [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
- [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
- [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
- [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
- [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
- [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
- [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
- [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
- [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
- [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
- [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
- [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
- [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
- [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
- [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+ [0] = { .vector = IRQ0_VECTOR, },
+ [1] = { .vector = IRQ1_VECTOR, },
+ [2] = { .vector = IRQ2_VECTOR, },
+ [3] = { .vector = IRQ3_VECTOR, },
+ [4] = { .vector = IRQ4_VECTOR, },
+ [5] = { .vector = IRQ5_VECTOR, },
+ [6] = { .vector = IRQ6_VECTOR, },
+ [7] = { .vector = IRQ7_VECTOR, },
+ [8] = { .vector = IRQ8_VECTOR, },
+ [9] = { .vector = IRQ9_VECTOR, },
+ [10] = { .vector = IRQ10_VECTOR, },
+ [11] = { .vector = IRQ11_VECTOR, },
+ [12] = { .vector = IRQ12_VECTOR, },
+ [13] = { .vector = IRQ13_VECTOR, },
+ [14] = { .vector = IRQ14_VECTOR, },
+ [15] = { .vector = IRQ15_VECTOR, },
};
-void __init arch_early_irq_init(void)
+int __init arch_early_irq_init(void)
{
struct irq_cfg *cfg;
struct irq_desc *desc;
@@ -183,7 +183,13 @@ void __init arch_early_irq_init(void)
for (i = 0; i < count; i++) {
desc = irq_to_desc(i);
desc->chip_data = &cfg[i];
+ alloc_bootmem_cpumask_var(&cfg[i].domain);
+ alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+ if (i < NR_IRQS_LEGACY)
+ cpumask_setall(cfg[i].domain);
}
+
+ return 0;
}
#ifdef CONFIG_SPARSE_IRQ
@@ -207,12 +213,26 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
node = cpu_to_node(cpu);
cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+ if (cfg) {
+ if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
+ kfree(cfg);
+ cfg = NULL;
+ } else if (!alloc_cpumask_var_node(&cfg->old_domain,
+ GFP_ATOMIC, node)) {
+ free_cpumask_var(cfg->domain);
+ kfree(cfg);
+ cfg = NULL;
+ } else {
+ cpumask_clear(cfg->domain);
+ cpumask_clear(cfg->old_domain);
+ }
+ }
printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node);
return cfg;
}
-void arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int cpu)
{
struct irq_cfg *cfg;
@@ -224,6 +244,8 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu)
BUG_ON(1);
}
}
+
+ return 0;
}
#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
@@ -329,13 +351,14 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
}
}
-static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+static void
+set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg = desc->chip_data;
if (!cfg->move_in_progress) {
/* it means that domain is not changed */
- if (!cpus_intersects(desc->affinity, mask))
+ if (!cpumask_intersects(&desc->affinity, mask))
cfg->move_desc_pending = 1;
}
}
@@ -350,7 +373,8 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
#endif
#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+static inline void
+set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
{
}
#endif
@@ -481,6 +505,26 @@ static void ioapic_mask_entry(int apic, int pin)
}
#ifdef CONFIG_SMP
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+ cpumask_var_t cleanup_mask;
+
+ if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+ unsigned int i;
+ cfg->move_cleanup_count = 0;
+ for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+ cfg->move_cleanup_count++;
+ for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+ send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+ } else {
+ cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+ cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ free_cpumask_var(cleanup_mask);
+ }
+ cfg->move_in_progress = 0;
+}
+
static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
{
int apic, pin;
@@ -516,41 +560,55 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
}
}
-static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
+/*
+ * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
+ * of that, or returns BAD_APICID and leaves desc->affinity untouched.
+ */
+static unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg;
- unsigned long flags;
- unsigned int dest;
- cpumask_t tmp;
unsigned int irq;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
- return;
+ if (!cpumask_intersects(mask, cpu_online_mask))
+ return BAD_APICID;
irq = desc->irq;
cfg = desc->chip_data;
if (assign_irq_vector(irq, cfg, mask))
- return;
+ return BAD_APICID;
+ cpumask_and(&desc->affinity, cfg->domain, mask);
set_extra_move_desc(desc, mask);
+ return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
+}
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
- /*
- * Only the high 8 bits are valid.
- */
- dest = SET_APIC_LOGICAL_ID(dest);
+static void
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+ struct irq_cfg *cfg;
+ unsigned long flags;
+ unsigned int dest;
+ unsigned int irq;
+
+ irq = desc->irq;
+ cfg = desc->chip_data;
spin_lock_irqsave(&ioapic_lock, flags);
- __target_IO_APIC_irq(irq, dest, cfg);
- desc->affinity = mask;
+ dest = set_desc_affinity(desc, mask);
+ if (dest != BAD_APICID) {
+ /* Only the high 8 bits are valid. */
+ dest = SET_APIC_LOGICAL_ID(dest);
+ __target_IO_APIC_irq(irq, dest, cfg);
+ }
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc;
@@ -648,7 +706,7 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
}
#ifdef CONFIG_X86_64
-void io_apic_sync(struct irq_pin_list *entry)
+static void io_apic_sync(struct irq_pin_list *entry)
{
/*
* Synchronize the IO-APIC and the CPU by doing
@@ -1218,7 +1276,8 @@ void unlock_vector_lock(void)
spin_unlock(&vector_lock);
}
-static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
+static int
+__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
{
/*
* NOTE! The local APIC isn't very good at handling
@@ -1233,49 +1292,49 @@ static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
*/
static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
unsigned int old_vector;
- int cpu;
+ int cpu, err;
+ cpumask_var_t tmp_mask;
if ((cfg->move_in_progress) || cfg->move_cleanup_count)
return -EBUSY;
- /* Only try and allocate irqs on cpus that are present */
- cpus_and(mask, mask, cpu_online_map);
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+ return -ENOMEM;
old_vector = cfg->vector;
if (old_vector) {
- cpumask_t tmp;
- cpus_and(tmp, cfg->domain, mask);
- if (!cpus_empty(tmp))
+ cpumask_and(tmp_mask, mask, cpu_online_mask);
+ cpumask_and(tmp_mask, cfg->domain, tmp_mask);
+ if (!cpumask_empty(tmp_mask)) {
+ free_cpumask_var(tmp_mask);
return 0;
+ }
}
- for_each_cpu_mask_nr(cpu, mask) {
- cpumask_t domain, new_mask;
+ /* Only try and allocate irqs on cpus that are present */
+ err = -ENOSPC;
+ for_each_cpu_and(cpu, mask, cpu_online_mask) {
int new_cpu;
int vector, offset;
- domain = vector_allocation_domain(cpu);
- cpus_and(new_mask, domain, cpu_online_map);
+ vector_allocation_domain(cpu, tmp_mask);
vector = current_vector;
offset = current_offset;
next:
vector += 8;
if (vector >= first_system_vector) {
- /* If we run out of vectors on large boxen, must share them. */
+ /* If out of vectors on large boxen, must share them. */
offset = (offset + 1) % 8;
vector = FIRST_DEVICE_VECTOR + offset;
}
if (unlikely(current_vector == vector))
continue;
-#ifdef CONFIG_X86_64
- if (vector == IA32_SYSCALL_VECTOR)
- goto next;
-#else
- if (vector == SYSCALL_VECTOR)
+
+ if (test_bit(vector, used_vectors))
goto next;
-#endif
- for_each_cpu_mask_nr(new_cpu, new_mask)
+
+ for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
if (per_cpu(vector_irq, new_cpu)[vector] != -1)
goto next;
/* Found one! */
@@ -1283,18 +1342,21 @@ next:
current_offset = offset;
if (old_vector) {
cfg->move_in_progress = 1;
- cfg->old_domain = cfg->domain;
+ cpumask_copy(cfg->old_domain, cfg->domain);
}
- for_each_cpu_mask_nr(new_cpu, new_mask)
+ for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
per_cpu(vector_irq, new_cpu)[vector] = irq;
cfg->vector = vector;
- cfg->domain = domain;
- return 0;
+ cpumask_copy(cfg->domain, tmp_mask);
+ err = 0;
+ break;
}
- return -ENOSPC;
+ free_cpumask_var(tmp_mask);
+ return err;
}
-static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
{
int err;
unsigned long flags;
@@ -1307,23 +1369,20 @@ static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
{
- cpumask_t mask;
int cpu, vector;
BUG_ON(!cfg->vector);
vector = cfg->vector;
- cpus_and(mask, cfg->domain, cpu_online_map);
- for_each_cpu_mask_nr(cpu, mask)
+ for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
per_cpu(vector_irq, cpu)[vector] = -1;
cfg->vector = 0;
- cpus_clear(cfg->domain);
+ cpumask_clear(cfg->domain);
if (likely(!cfg->move_in_progress))
return;
- cpus_and(mask, cfg->old_domain, cpu_online_map);
- for_each_cpu_mask_nr(cpu, mask) {
+ for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
vector++) {
if (per_cpu(vector_irq, cpu)[vector] != irq)
@@ -1345,10 +1404,8 @@ void __setup_vector_irq(int cpu)
/* Mark the inuse vectors */
for_each_irq_desc(irq, desc) {
- if (!desc)
- continue;
cfg = desc->chip_data;
- if (!cpu_isset(cpu, cfg->domain))
+ if (!cpumask_test_cpu(cpu, cfg->domain))
continue;
vector = cfg->vector;
per_cpu(vector_irq, cpu)[vector] = irq;
@@ -1360,7 +1417,7 @@ void __setup_vector_irq(int cpu)
continue;
cfg = irq_cfg(irq);
- if (!cpu_isset(cpu, cfg->domain))
+ if (!cpumask_test_cpu(cpu, cfg->domain))
per_cpu(vector_irq, cpu)[vector] = -1;
}
}
@@ -1496,18 +1553,17 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
{
struct irq_cfg *cfg;
struct IO_APIC_route_entry entry;
- cpumask_t mask;
+ unsigned int dest;
if (!IO_APIC_IRQ(irq))
return;
cfg = desc->chip_data;
- mask = TARGET_CPUS;
- if (assign_irq_vector(irq, cfg, mask))
+ if (assign_irq_vector(irq, cfg, TARGET_CPUS))
return;
- cpus_and(mask, cfg->domain, mask);
+ dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
apic_printk(APIC_VERBOSE,KERN_DEBUG
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1517,8 +1573,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
- cpu_mask_to_apicid(mask), trigger, polarity,
- cfg->vector)) {
+ dest, trigger, polarity, cfg->vector)) {
printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
mp_ioapics[apic].mp_apicid, pin);
__clear_irq_vector(irq, cfg);
@@ -1730,8 +1785,6 @@ __apicdebuginit(void) print_IO_APIC(void)
for_each_irq_desc(irq, desc) {
struct irq_pin_list *entry;
- if (!desc)
- continue;
cfg = desc->chip_data;
entry = cfg->irq_2_pin;
if (!entry)
@@ -2240,7 +2293,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
unsigned long flags;
spin_lock_irqsave(&vector_lock, flags);
- send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
+ send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
spin_unlock_irqrestore(&vector_lock, flags);
return 1;
@@ -2289,18 +2342,17 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
* as simple as edge triggered migration and we can do the irq migration
* with a simple atomic update to IO-APIC RTE.
*/
-static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
+static void
+migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg;
- cpumask_t tmp, cleanup_mask;
struct irte irte;
int modify_ioapic_rte;
unsigned int dest;
unsigned long flags;
unsigned int irq;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ if (!cpumask_intersects(mask, cpu_online_mask))
return;
irq = desc->irq;
@@ -2313,8 +2365,7 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
set_extra_move_desc(desc, mask);
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
+ dest = cpu_mask_to_apicid_and(cfg->domain, mask);
modify_ioapic_rte = desc->status & IRQ_LEVEL;
if (modify_ioapic_rte) {
@@ -2331,14 +2382,10 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
*/
modify_irte(irq, &irte);
- if (cfg->move_in_progress) {
- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
- cfg->move_cleanup_count = cpus_weight(cleanup_mask);
- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
- cfg->move_in_progress = 0;
- }
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
- desc->affinity = mask;
+ cpumask_copy(&desc->affinity, mask);
}
static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2360,11 +2407,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
}
/* everthing is clear. we have right of way */
- migrate_ioapic_irq_desc(desc, desc->pending_mask);
+ migrate_ioapic_irq_desc(desc, &desc->pending_mask);
ret = 0;
desc->status &= ~IRQ_MOVE_PENDING;
- cpus_clear(desc->pending_mask);
+ cpumask_clear(&desc->pending_mask);
unmask:
unmask_IO_APIC_irq_desc(desc);
@@ -2378,9 +2425,6 @@ static void ir_irq_migration(struct work_struct *work)
struct irq_desc *desc;
for_each_irq_desc(irq, desc) {
- if (!desc)
- continue;
-
if (desc->status & IRQ_MOVE_PENDING) {
unsigned long flags;
@@ -2392,7 +2436,7 @@ static void ir_irq_migration(struct work_struct *work)
continue;
}
- desc->chip->set_affinity(irq, desc->pending_mask);
+ desc->chip->set_affinity(irq, &desc->pending_mask);
spin_unlock_irqrestore(&desc->lock, flags);
}
}
@@ -2401,18 +2445,20 @@ static void ir_irq_migration(struct work_struct *work)
/*
* Migrates the IRQ destination in the process context.
*/
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
+static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+ const struct cpumask *mask)
{
if (desc->status & IRQ_LEVEL) {
desc->status |= IRQ_MOVE_PENDING;
- desc->pending_mask = mask;
+ cpumask_copy(&desc->pending_mask, mask);
migrate_irq_remapped_level_desc(desc);
return;
}
migrate_ioapic_irq_desc(desc, mask);
}
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ir_ioapic_affinity_irq(unsigned int irq,
+ const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -2447,7 +2493,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
if (!cfg->move_cleanup_count)
goto unlock;
- if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
+ if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
goto unlock;
__get_cpu_var(vector_irq)[vector] = -1;
@@ -2484,20 +2530,14 @@ static void irq_complete_move(struct irq_desc **descp)
vector = ~get_irq_regs()->orig_ax;
me = smp_processor_id();
- if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
- cpumask_t cleanup_mask;
-
#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
*descp = desc = move_irq_desc(desc, me);
/* get the new one */
cfg = desc->chip_data;
#endif
- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
- cfg->move_cleanup_count = cpus_weight(cleanup_mask);
- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
- cfg->move_in_progress = 0;
- }
+ if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+ send_cleanup_vector(cfg);
}
#else
static inline void irq_complete_move(struct irq_desc **descp) {}
@@ -2670,9 +2710,6 @@ static inline void init_IO_APIC_traps(void)
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
for_each_irq_desc(irq, desc) {
- if (!desc)
- continue;
-
cfg = desc->chip_data;
if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
/*
@@ -3222,16 +3259,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
struct irq_cfg *cfg;
int err;
unsigned dest;
- cpumask_t tmp;
cfg = irq_cfg(irq);
- tmp = TARGET_CPUS;
- err = assign_irq_vector(irq, cfg, tmp);
+ err = assign_irq_vector(irq, cfg, TARGET_CPUS);
if (err)
return err;
- cpus_and(tmp, cfg->domain, tmp);
- dest = cpu_mask_to_apicid(tmp);
+ dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
#ifdef CONFIG_INTR_REMAP
if (irq_remapped(irq)) {
@@ -3285,26 +3319,18 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
}
#ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
struct msi_msg msg;
unsigned int dest;
- cpumask_t tmp;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ dest = set_desc_affinity(desc, mask);
+ if (dest == BAD_APICID)
return;
cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
- return;
-
- set_extra_move_desc(desc, mask);
-
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
read_msi_msg_desc(desc, &msg);
@@ -3314,37 +3340,27 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
write_msi_msg_desc(desc, &msg);
- desc->affinity = mask;
}
#ifdef CONFIG_INTR_REMAP
/*
* Migrate the MSI irq to another cpumask. This migration is
* done in the process context using interrupt-remapping hardware.
*/
-static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void
+ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
+ struct irq_cfg *cfg = desc->chip_data;
unsigned int dest;
- cpumask_t tmp, cleanup_mask;
struct irte irte;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
- return;
-
if (get_irte(irq, &irte))
return;
- cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
+ dest = set_desc_affinity(desc, mask);
+ if (dest == BAD_APICID)
return;
- set_extra_move_desc(desc, mask);
-
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
-
irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
@@ -3358,14 +3374,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
* at the new destination. So, time to cleanup the previous
* vector allocation.
*/
- if (cfg->move_in_progress) {
- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
- cfg->move_cleanup_count = cpus_weight(cleanup_mask);
- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
- cfg->move_in_progress = 0;
- }
-
- desc->affinity = mask;
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
}
#endif
@@ -3556,26 +3566,18 @@ void arch_teardown_msi_irq(unsigned int irq)
#ifdef CONFIG_DMAR
#ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
struct msi_msg msg;
unsigned int dest;
- cpumask_t tmp;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ dest = set_desc_affinity(desc, mask);
+ if (dest == BAD_APICID)
return;
cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
- return;
-
- set_extra_move_desc(desc, mask);
-
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
dmar_msi_read(irq, &msg);
@@ -3585,7 +3587,6 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
dmar_msi_write(irq, &msg);
- desc->affinity = mask;
}
#endif /* CONFIG_SMP */
@@ -3619,26 +3620,18 @@ int arch_setup_dmar_msi(unsigned int irq)
#ifdef CONFIG_HPET_TIMER
#ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
struct msi_msg msg;
unsigned int dest;
- cpumask_t tmp;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ dest = set_desc_affinity(desc, mask);
+ if (dest == BAD_APICID)
return;
cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
- return;
-
- set_extra_move_desc(desc, mask);
-
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
hpet_msi_read(irq, &msg);
@@ -3648,7 +3641,6 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
hpet_msi_write(irq, &msg);
- desc->affinity = mask;
}
#endif /* CONFIG_SMP */
@@ -3703,28 +3695,19 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
write_ht_irq_msg(irq, &msg);
}
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
unsigned int dest;
- cpumask_t tmp;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ dest = set_desc_affinity(desc, mask);
+ if (dest == BAD_APICID)
return;
cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
- return;
-
- set_extra_move_desc(desc, mask);
-
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
target_ht_irq(irq, dest, cfg->vector);
- desc->affinity = mask;
}
#endif
@@ -3744,17 +3727,14 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
{
struct irq_cfg *cfg;
int err;
- cpumask_t tmp;
cfg = irq_cfg(irq);
- tmp = TARGET_CPUS;
- err = assign_irq_vector(irq, cfg, tmp);
+ err = assign_irq_vector(irq, cfg, TARGET_CPUS);
if (!err) {
struct ht_irq_msg msg;
unsigned dest;
- cpus_and(tmp, cfg->domain, tmp);
- dest = cpu_mask_to_apicid(tmp);
+ dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
@@ -3790,7 +3770,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
unsigned long mmr_offset)
{
- const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
+ const struct cpumask *eligible_cpu = cpumask_of(cpu);
struct irq_cfg *cfg;
int mmr_pnode;
unsigned long mmr_value;
@@ -3800,7 +3780,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, *eligible_cpu);
+ err = assign_irq_vector(irq, cfg, eligible_cpu);
if (err != 0)
return err;
@@ -3819,7 +3799,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
entry->polarity = 0;
entry->trigger = 0;
entry->mask = 0;
- entry->dest = cpu_mask_to_apicid(*eligible_cpu);
+ entry->dest = cpu_mask_to_apicid(eligible_cpu);
mmr_pnode = uv_blade_to_pnode(mmr_blade);
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -4030,7 +4010,7 @@ void __init setup_ioapic_dest(void)
int pin, ioapic, irq, irq_entry;
struct irq_desc *desc;
struct irq_cfg *cfg;
- cpumask_t mask;
+ const struct cpumask *mask;
if (skip_ioapic_setup == 1)
return;
@@ -4061,7 +4041,7 @@ void __init setup_ioapic_dest(void)
*/
if (desc->status &
(IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
- mask = desc->affinity;
+ mask = &desc->affinity;
else
mask = TARGET_CPUS;
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index f1c688e46f35..285bbf8831fa 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
/*
* This is only used on smaller machines.
*/
-void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
+void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
{
- unsigned long mask = cpus_addr(cpumask)[0];
+ unsigned long mask = cpumask_bits(cpumask)[0];
unsigned long flags;
local_irq_save(flags);
- WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
+ WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
__send_IPI_dest_field(mask, vector);
local_irq_restore(flags);
}
-void send_IPI_mask_sequence(cpumask_t mask, int vector)
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
{
unsigned long flags;
unsigned int query_cpu;
@@ -139,12 +139,24 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
*/
local_irq_save(flags);
- for_each_possible_cpu(query_cpu) {
- if (cpu_isset(query_cpu, mask)) {
+ for_each_cpu(query_cpu, mask)
+ __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
+ local_irq_restore(flags);
+}
+
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+ unsigned long flags;
+ unsigned int query_cpu;
+ unsigned int this_cpu = smp_processor_id();
+
+ /* See Hack comment above */
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask)
+ if (query_cpu != this_cpu)
__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
vector);
- }
- }
local_irq_restore(flags);
}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3f1d9d18df67..bce53e1352a0 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -9,6 +9,7 @@
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/smp.h>
+#include <asm/irq.h>
atomic_t irq_err_count;
@@ -190,3 +191,5 @@ u64 arch_irq_stat(void)
#endif
return sum;
}
+
+EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 119fc9c8ff7f..9dc5588f336a 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -233,27 +233,28 @@ unsigned int do_IRQ(struct pt_regs *regs)
#ifdef CONFIG_HOTPLUG_CPU
#include <mach_apic.h>
-void fixup_irqs(cpumask_t map)
+/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
+void fixup_irqs(void)
{
unsigned int irq;
static int warned;
struct irq_desc *desc;
for_each_irq_desc(irq, desc) {
- cpumask_t mask;
+ const struct cpumask *affinity;
if (!desc)
continue;
if (irq == 2)
continue;
- cpus_and(mask, desc->affinity, map);
- if (any_online_cpu(mask) == NR_CPUS) {
+ affinity = &desc->affinity;
+ if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
printk("Breaking affinity for irq %i\n", irq);
- mask = map;
+ affinity = cpu_all_mask;
}
if (desc->chip->set_affinity)
- desc->chip->set_affinity(irq, mask);
+ desc->chip->set_affinity(irq, affinity);
else if (desc->action && !(warned++))
printk("Cannot set affinity for irq %i\n", irq);
}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index a174a217eb1a..6383d50f82ea 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -80,16 +80,17 @@ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
}
#ifdef CONFIG_HOTPLUG_CPU
-void fixup_irqs(cpumask_t map)
+/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
+void fixup_irqs(void)
{
unsigned int irq;
static int warned;
struct irq_desc *desc;
for_each_irq_desc(irq, desc) {
- cpumask_t mask;
int break_affinity = 0;
int set_affinity = 1;
+ const struct cpumask *affinity;
if (!desc)
continue;
@@ -99,23 +100,23 @@ void fixup_irqs(cpumask_t map)
/* interrupt's are disabled at this point */
spin_lock(&desc->lock);
+ affinity = &desc->affinity;
if (!irq_has_action(irq) ||
- cpus_equal(desc->affinity, map)) {
+ cpumask_equal(affinity, cpu_online_mask)) {
spin_unlock(&desc->lock);
continue;
}
- cpus_and(mask, desc->affinity, map);
- if (cpus_empty(mask)) {
+ if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
break_affinity = 1;
- mask = map;
+ affinity = cpu_all_mask;
}
if (desc->chip->mask)
desc->chip->mask(irq);
if (desc->chip->set_affinity)
- desc->chip->set_affinity(irq, mask);
+ desc->chip->set_affinity(irq, affinity);
else if (!(warned++))
set_affinity = 0;
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 203384ed2b5d..84723295f88a 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -110,6 +110,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
};
+int vector_used_by_percpu_irq(unsigned int vector)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ if (per_cpu(vector_irq, cpu)[vector] != -1)
+ return 1;
+ }
+
+ return 0;
+}
+
/* Overridden in paravirt.c */
void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
@@ -146,10 +158,12 @@ void __init native_init_IRQ(void)
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
/* IPI for single call function */
- set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
+ alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+ call_function_single_interrupt);
/* Low priority IPI to cleanup after moving an irq */
set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+ set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
#endif
#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 6190e6ef546c..31ebfe38e96c 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -69,6 +69,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
[IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
};
+int vector_used_by_percpu_irq(unsigned int vector)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ if (per_cpu(vector_irq, cpu)[vector] != -1)
+ return 1;
+ }
+
+ return 0;
+}
+
void __init init_ISA_irqs(void)
{
int i;
@@ -121,6 +133,7 @@ static void __init smp_intr_init(void)
/* Low priority IPI to cleanup after moving an irq */
set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+ set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
#endif
}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e169ae9b6a62..652fce6d2cce 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void)
*/
static unsigned long kvm_get_tsc_khz(void)
{
- return preset_lpj;
+ struct pvclock_vcpu_time_info *src;
+ src = &per_cpu(hv_clock, 0);
+ return pvclock_tsc_khz(src);
}
static void kvm_get_preset_lpj(void)
{
- struct pvclock_vcpu_time_info *src;
unsigned long khz;
u64 lpj;
- src = &per_cpu(hv_clock, 0);
- khz = pvclock_tsc_khz(src);
+ khz = kvm_get_tsc_khz();
lpj = ((u64)khz * 1000);
do_div(lpj, HZ);
@@ -194,5 +194,7 @@ void __init kvmclock_init(void)
#endif
kvm_get_preset_lpj();
clocksource_register(&kvm_clock);
+ pv_info.paravirt_enabled = 1;
+ pv_info.name = "KVM";
}
}
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index eee32b43fee3..71f1d99a635d 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,8 +12,8 @@
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/vmalloc.h>
+#include <linux/uaccess.h>
-#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/ldt.h>
#include <asm/desc.h>
@@ -93,7 +93,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
if (err < 0)
return err;
- for(i = 0; i < old->size; i++)
+ for (i = 0; i < old->size; i++)
write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
return 0;
}
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3b599518c322..c12314c9e86f 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = {
.set_mode = mfgpt_set_mode,
.set_next_event = mfgpt_next_event,
.rating = 250,
- .cpumask = CPU_MASK_ALL,
+ .cpumask = cpu_all_mask,
.shift = 32
};
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index efc2f361fe85..666e43df51f9 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -13,8 +13,7 @@
#include <asm/msr.h>
#include <asm/acpi.h>
#include <asm/mmconfig.h>
-
-#include "../pci/pci.h"
+#include <asm/pci_x86.h>
struct pci_hostbridge_probe {
u32 bus;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 45e3b69808ba..c5c5b8df1dbc 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -16,14 +16,14 @@
#include <linux/bitops.h>
#include <linux/acpi.h>
#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/acpi.h>
-#include <asm/smp.h>
#include <asm/mtrr.h>
#include <asm/mpspec.h>
#include <asm/pgalloc.h>
#include <asm/io_apic.h>
#include <asm/proto.h>
-#include <asm/acpi.h>
#include <asm/bios_ebda.h>
#include <asm/e820.h>
#include <asm/trampoline.h>
@@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
#endif
if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
- set_bit(m->mpc_busid, mp_bus_not_pci);
-#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
+ set_bit(m->mpc_busid, mp_bus_not_pci);
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
#endif
} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
@@ -104,7 +104,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
x86_quirks->mpc_oem_pci_bus(m);
clear_bit(m->mpc_busid, mp_bus_not_pci);
-#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 82a7c7ed6d45..726266695b2c 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -136,7 +136,7 @@ static int msr_open(struct inode *inode, struct file *file)
lock_kernel();
cpu = iminor(file->f_path.dentry->d_inode);
- if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+ if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
ret = -ENXIO; /* No such CPU */
goto out;
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8bd1bf9622a7..45a09ccdc214 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -26,11 +26,10 @@
#include <linux/kernel_stat.h>
#include <linux/kdebug.h>
#include <linux/smp.h>
+#include <linux/nmi.h>
#include <asm/i8259.h>
#include <asm/io_apic.h>
-#include <asm/smp.h>
-#include <asm/nmi.h>
#include <asm/proto.h>
#include <asm/timer.h>
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a35eaa379ff6..00c2bcd41463 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -52,7 +52,7 @@ static u32 *iommu_gatt_base; /* Remapping table */
* to trigger bugs with some popular PCI cards, in particular 3ware (but
* has been also also seen with Qlogic at least).
*/
-int iommu_fullflush = 1;
+static int iommu_fullflush = 1;
/* Allocation bitmap for the remapping area: */
static DEFINE_SPINLOCK(iommu_bitmap_lock);
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 242c3440687f..8cba3749a511 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -13,7 +13,7 @@
int swiotlb __read_mostly;
-void *swiotlb_alloc_boot(size_t size, unsigned long nslabs)
+void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
{
return alloc_bootmem_low_pages(size);
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 61f718df6eec..2b46eb41643b 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,8 @@
#include <asm/proto.h>
#include <asm/reboot_fixups.h>
#include <asm/reboot.h>
+#include <asm/pci_x86.h>
+#include <asm/virtext.h>
#ifdef CONFIG_X86_32
# include <linux/dmi.h>
@@ -23,7 +25,6 @@
#include <mach_ipi.h>
-
/*
* Power off function, if any
*/
@@ -39,6 +40,12 @@ int reboot_force;
static int reboot_cpu = -1;
#endif
+/* This is set if we need to go through the 'emergency' path.
+ * When machine_emergency_restart() is called, we may be on
+ * an inconsistent state and won't be able to do a clean cleanup
+ */
+static int reboot_emergency;
+
/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
bool port_cf9_safe = false;
@@ -368,6 +375,48 @@ static inline void kb_wait(void)
}
}
+static void vmxoff_nmi(int cpu, struct die_args *args)
+{
+ cpu_emergency_vmxoff();
+}
+
+/* Use NMIs as IPIs to tell all CPUs to disable virtualization
+ */
+static void emergency_vmx_disable_all(void)
+{
+ /* Just make sure we won't change CPUs while doing this */
+ local_irq_disable();
+
+ /* We need to disable VMX on all CPUs before rebooting, otherwise
+ * we risk hanging up the machine, because the CPU ignore INIT
+ * signals when VMX is enabled.
+ *
+ * We can't take any locks and we may be on an inconsistent
+ * state, so we use NMIs as IPIs to tell the other CPUs to disable
+ * VMX and halt.
+ *
+ * For safety, we will avoid running the nmi_shootdown_cpus()
+ * stuff unnecessarily, but we don't have a way to check
+ * if other CPUs have VMX enabled. So we will call it only if the
+ * CPU we are running on has VMX enabled.
+ *
+ * We will miss cases where VMX is not enabled on all CPUs. This
+ * shouldn't do much harm because KVM always enable VMX on all
+ * CPUs anyway. But we can miss it on the small window where KVM
+ * is still enabling VMX.
+ */
+ if (cpu_has_vmx() && cpu_vmx_enabled()) {
+ /* Disable VMX on this CPU.
+ */
+ cpu_vmxoff();
+
+ /* Halt and disable VMX on the other CPUs */
+ nmi_shootdown_cpus(vmxoff_nmi);
+
+ }
+}
+
+
void __attribute__((weak)) mach_reboot_fixups(void)
{
}
@@ -376,6 +425,9 @@ static void native_machine_emergency_restart(void)
{
int i;
+ if (reboot_emergency)
+ emergency_vmx_disable_all();
+
/* Tell the BIOS if we want cold or warm reboot */
*((unsigned short *)__va(0x472)) = reboot_mode;
@@ -449,7 +501,7 @@ void native_machine_shutdown(void)
#ifdef CONFIG_X86_32
/* See if there has been given a command line override */
- if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
+ if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) &&
cpu_online(reboot_cpu))
reboot_cpu_id = reboot_cpu;
#endif
@@ -459,7 +511,7 @@ void native_machine_shutdown(void)
reboot_cpu_id = smp_processor_id();
/* Make certain I only run on the appropriate processor */
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id));
+ set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
/* O.K Now that I'm on the appropriate processor,
* stop all of the others.
@@ -482,13 +534,19 @@ void native_machine_shutdown(void)
#endif
}
+static void __machine_emergency_restart(int emergency)
+{
+ reboot_emergency = emergency;
+ machine_ops.emergency_restart();
+}
+
static void native_machine_restart(char *__unused)
{
printk("machine restart\n");
if (!reboot_force)
machine_shutdown();
- machine_emergency_restart();
+ __machine_emergency_restart(0);
}
static void native_machine_halt(void)
@@ -532,7 +590,7 @@ void machine_shutdown(void)
void machine_emergency_restart(void)
{
- machine_ops.emergency_restart();
+ __machine_emergency_restart(1);
}
void machine_restart(char *cmd)
@@ -592,10 +650,7 @@ static int crash_nmi_callback(struct notifier_block *self,
static void smp_send_nmi_allbutself(void)
{
- cpumask_t mask = cpu_online_map;
- cpu_clear(safe_smp_processor_id(), mask);
- if (!cpus_empty(mask))
- send_IPI_mask(mask, NMI_VECTOR);
+ send_IPI_allbutself(NMI_VECTOR);
}
static struct notifier_block crash_nmi_nb = {
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ae0c0d3bb770..a4b619c33106 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -152,8 +152,11 @@ void __init setup_per_cpu_areas(void)
old_size = PERCPU_ENOUGH_ROOM;
align = max_t(unsigned long, PAGE_SIZE, align);
size = roundup(old_size, align);
- printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
- size);
+
+ pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
+ NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
+
+ pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size);
for_each_possible_cpu(cpu) {
#ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -164,28 +167,21 @@ void __init setup_per_cpu_areas(void)
if (!node_online(node) || !NODE_DATA(node)) {
ptr = __alloc_bootmem(size, align,
__pa(MAX_DMA_ADDRESS));
- printk(KERN_INFO
- "cpu %d has no node %d or node-local memory\n",
+ pr_info("cpu %d has no node %d or node-local memory\n",
cpu, node);
- if (ptr)
- printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n",
- cpu, __pa(ptr));
- }
- else {
+ pr_debug("per cpu data for cpu%d at %016lx\n",
+ cpu, __pa(ptr));
+ } else {
ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
__pa(MAX_DMA_ADDRESS));
- if (ptr)
- printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
- cpu, node, __pa(ptr));
+ pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
+ cpu, node, __pa(ptr));
}
#endif
per_cpu_offset(cpu) = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
}
- printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
- NR_CPUS, nr_cpu_ids, nr_node_ids);
-
/* Setup percpu data maps */
setup_per_cpu_maps();
@@ -282,7 +278,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
else
cpu_clear(cpu, *mask);
- cpulist_scnprintf(buf, sizeof(buf), *mask);
+ cpulist_scnprintf(buf, sizeof(buf), mask);
printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
}
@@ -334,25 +330,25 @@ static const cpumask_t cpu_mask_none;
/*
* Returns a pointer to the bitmask of CPUs on Node 'node'.
*/
-const cpumask_t *_node_to_cpumask_ptr(int node)
+const cpumask_t *cpumask_of_node(int node)
{
if (node_to_cpumask_map == NULL) {
printk(KERN_WARNING
- "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n",
+ "cpumask_of_node(%d): no node_to_cpumask_map!\n",
node);
dump_stack();
return (const cpumask_t *)&cpu_online_map;
}
if (node >= nr_node_ids) {
printk(KERN_WARNING
- "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n",
+ "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
node, nr_node_ids);
dump_stack();
return &cpu_mask_none;
}
return &node_to_cpumask_map[node];
}
-EXPORT_SYMBOL(_node_to_cpumask_ptr);
+EXPORT_SYMBOL(cpumask_of_node);
/*
* Returns a bitmask of CPUs on Node 'node'.
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 7e558db362c1..beea2649a240 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -118,22 +118,22 @@ static void native_smp_send_reschedule(int cpu)
WARN_ON(1);
return;
}
- send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+ send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
}
void native_send_call_func_single_ipi(int cpu)
{
- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+ send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
}
-void native_send_call_func_ipi(cpumask_t mask)
+void native_send_call_func_ipi(const struct cpumask *mask)
{
cpumask_t allbutself;
allbutself = cpu_online_map;
cpu_clear(smp_processor_id(), allbutself);
- if (cpus_equal(mask, allbutself) &&
+ if (cpus_equal(*mask, allbutself) &&
cpus_equal(cpu_online_map, cpu_callout_map))
send_IPI_allbutself(CALL_FUNCTION_VECTOR);
else
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f8500c969442..6bd4d9b73870 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -102,14 +102,8 @@ EXPORT_SYMBOL(smp_num_siblings);
/* Last level cache ID of each logical CPU */
DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
-/* bitmap of online cpus */
-cpumask_t cpu_online_map __read_mostly;
-EXPORT_SYMBOL(cpu_online_map);
-
cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
/* representing HT siblings of each logical CPU */
DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
@@ -502,7 +496,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
}
/* maps the cpu to the sched domain representing multi-core */
-cpumask_t cpu_coregroup_map(int cpu)
+const struct cpumask *cpu_coregroup_mask(int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
/*
@@ -510,9 +504,14 @@ cpumask_t cpu_coregroup_map(int cpu)
* And for power savings, we return cpu_core_map
*/
if (sched_mc_power_savings || sched_smt_power_savings)
- return per_cpu(cpu_core_map, cpu);
+ return &per_cpu(cpu_core_map, cpu);
else
- return c->llc_shared_map;
+ return &c->llc_shared_map;
+}
+
+cpumask_t cpu_coregroup_map(int cpu)
+{
+ return *cpu_coregroup_mask(cpu);
}
static void impress_friends(void)
@@ -1155,7 +1154,7 @@ static void __init smp_cpu_index_default(void)
for_each_possible_cpu(i) {
c = &cpu_data(i);
/* mark all to hotplug */
- c->cpu_index = NR_CPUS;
+ c->cpu_index = nr_cpu_ids;
}
}
@@ -1260,6 +1259,15 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
check_nmi_watchdog();
}
+static int __initdata setup_possible_cpus = -1;
+static int __init _setup_possible_cpus(char *str)
+{
+ get_option(&str, &setup_possible_cpus);
+ return 0;
+}
+early_param("possible_cpus", _setup_possible_cpus);
+
+
/*
* cpu_possible_map should be static, it cannot change as cpu's
* are onlined, or offlined. The reason is per-cpu data-structures
@@ -1272,7 +1280,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
*
* Three ways to find out the number of additional hotplug CPUs:
* - If the BIOS specified disabled CPUs in ACPI/mptables use that.
- * - The user can overwrite it with additional_cpus=NUM
+ * - The user can overwrite it with possible_cpus=NUM
* - Otherwise don't reserve additional CPUs.
* We do this because additional CPUs waste a lot of memory.
* -AK
@@ -1285,9 +1293,19 @@ __init void prefill_possible_map(void)
if (!num_processors)
num_processors = 1;
- possible = num_processors + disabled_cpus;
- if (possible > NR_CPUS)
- possible = NR_CPUS;
+ if (setup_possible_cpus == -1)
+ possible = num_processors + disabled_cpus;
+ else
+ possible = setup_possible_cpus;
+
+ total_cpus = max_t(int, possible, num_processors + disabled_cpus);
+
+ if (possible > CONFIG_NR_CPUS) {
+ printk(KERN_WARNING
+ "%d Processors exceeds NR_CPUS limit of %d\n",
+ possible, CONFIG_NR_CPUS);
+ possible = CONFIG_NR_CPUS;
+ }
printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
possible, max_t(int, possible - num_processors, 0));
@@ -1352,7 +1370,7 @@ void cpu_disable_common(void)
lock_vector_lock();
remove_cpu_from_maps(cpu);
unlock_vector_lock();
- fixup_irqs(cpu_online_map);
+ fixup_irqs();
}
int native_cpu_disable(void)
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index 8da059f949be..ce5054642247 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -163,7 +163,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
* We have to send the IPI only to
* CPUs affected.
*/
- send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+ send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
while (!cpus_empty(flush_cpumask))
/* nothing. lockup detection does not belong here */
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 29887d7081a9..f8be6f1d2e48 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
* We have to send the IPI only to
* CPUs affected.
*/
- send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+ send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
while (!cpus_empty(f->flush_cpumask))
cpu_relax();
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 6a00e5faaa74..f885023167e0 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -582,7 +582,6 @@ static int __init uv_ptc_init(void)
static struct bau_control * __init uv_table_bases_init(int blade, int node)
{
int i;
- int *ip;
struct bau_msg_status *msp;
struct bau_control *bau_tabp;
@@ -599,13 +598,6 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)
bau_cpubits_clear(&msp->seen_by, (int)
uv_blade_nr_possible_cpus(blade));
- bau_tabp->watching =
- kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node);
- BUG_ON(!bau_tabp->watching);
-
- for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++)
- *ip = 0;
-
uv_bau_table_bases[blade] = bau_tabp;
return bau_tabp;
@@ -628,7 +620,6 @@ uv_table_bases_finish(int blade, int node, int cur_cpu,
bcp->bau_msg_head = bau_tablesp->va_queue_first;
bcp->va_queue_first = bau_tablesp->va_queue_first;
bcp->va_queue_last = bau_tablesp->va_queue_last;
- bcp->watching = bau_tablesp->watching;
bcp->msg_statuses = bau_tablesp->msg_statuses;
bcp->descriptor_base = adp;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 141907ab6e22..ce6650eb64e9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -72,9 +72,6 @@
#include "cpu/mcheck/mce.h"
-DECLARE_BITMAP(used_vectors, NR_VECTORS);
-EXPORT_SYMBOL_GPL(used_vectors);
-
asmlinkage int system_call(void);
/* Do we ignore FPU interrupts ? */
@@ -89,6 +86,9 @@ gate_desc idt_table[256]
__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
#endif
+DECLARE_BITMAP(used_vectors, NR_VECTORS);
+EXPORT_SYMBOL_GPL(used_vectors);
+
static int ignore_nmis;
static inline void conditional_sti(struct pt_regs *regs)
@@ -292,8 +292,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 8;
- /* This is always a kernel trap and never fixable (and thus must
- never return). */
+ /*
+ * This is always a kernel trap and never fixable (and thus must
+ * never return).
+ */
for (;;)
die(str, regs, error_code);
}
@@ -520,9 +522,11 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
}
#ifdef CONFIG_X86_64
-/* Help handler running on IST stack to switch back to user stack
- for scheduling or signal handling. The actual stack switch is done in
- entry.S */
+/*
+ * Help handler running on IST stack to switch back to user stack
+ * for scheduling or signal handling. The actual stack switch is done in
+ * entry.S
+ */
asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
{
struct pt_regs *regs = eregs;
@@ -532,8 +536,10 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
/* Exception from user space */
else if (user_mode(eregs))
regs = task_pt_regs(current);
- /* Exception from kernel and interrupts are enabled. Move to
- kernel process stack. */
+ /*
+ * Exception from kernel and interrupts are enabled. Move to
+ * kernel process stack.
+ */
else if (eregs->flags & X86_EFLAGS_IF)
regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
if (eregs != regs)
@@ -685,12 +691,7 @@ void math_error(void __user *ip)
cwd = get_fpu_cwd(task);
swd = get_fpu_swd(task);
- err = swd & ~cwd & 0x3f;
-
-#ifdef CONFIG_X86_32
- if (!err)
- return;
-#endif
+ err = swd & ~cwd;
if (err & 0x001) { /* Invalid op */
/*
@@ -708,7 +709,11 @@ void math_error(void __user *ip)
} else if (err & 0x020) { /* Precision */
info.si_code = FPE_FLTRES;
} else {
- info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */
+ /*
+ * If we're using IRQ 13, or supposedly even some trap 16
+ * implementations, it's possible we get a spurious trap...
+ */
+ return; /* Spurious trap, no error */
}
force_sig_info(SIGFPE, &info, task);
}
@@ -941,9 +946,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
void __init trap_init(void)
{
-#ifdef CONFIG_X86_32
int i;
-#endif
#ifdef CONFIG_EISA
void __iomem *p = early_ioremap(0x0FFFD9, 4);
@@ -1000,11 +1003,15 @@ void __init trap_init(void)
}
set_system_trap_gate(SYSCALL_VECTOR, &system_call);
+#endif
/* Reserve all the builtin and the syscall vector: */
for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
set_bit(i, used_vectors);
+#ifdef CONFIG_X86_64
+ set_bit(IA32_SYSCALL_VECTOR, used_vectors);
+#else
set_bit(SYSCALL_VECTOR, used_vectors);
#endif
/*
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 254ee07f8635..c4c1f9e09402 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void)
/* Upper bound is clockevent's use of ulong for cycle deltas. */
evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
evt->min_delta_ns = clockevent_delta2ns(1, evt);
- evt->cpumask = cpumask_of_cpu(cpu);
+ evt->cpumask = cpumask_of(cpu);
printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
evt->name, evt->mult, evt->shift);
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 15c3e6999182..2b54fe002e94 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -159,7 +159,7 @@ int save_i387_xstate(void __user *buf)
* Restore the extended state if present. Otherwise, restore the FP/SSE
* state.
*/
-int restore_user_xstate(void __user *buf)
+static int restore_user_xstate(void __user *buf)
{
struct _fpx_sw_bytes fx_sw_user;
u64 mask;
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index c02343594b4d..d3ec292f00f2 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,8 +7,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
ifeq ($(CONFIG_KVM_TRACE),y)
common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
endif
-ifeq ($(CONFIG_DMAR),y)
-common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
+ifeq ($(CONFIG_IOMMU_API),y)
+common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
endif
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 59ebd37ad79e..e665d1c623ca 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -603,10 +603,29 @@ void kvm_free_pit(struct kvm *kvm)
static void __inject_pit_timer_intr(struct kvm *kvm)
{
+ struct kvm_vcpu *vcpu;
+ int i;
+
mutex_lock(&kvm->lock);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
mutex_unlock(&kvm->lock);
+
+ /*
+ * Provides NMI watchdog support via Virtual Wire mode.
+ * The route is: PIT -> PIC -> LVT0 in NMI mode.
+ *
+ * Note: Our Virtual Wire implementation is simplified, only
+ * propagating PIT interrupts to all VCPUs when they have set
+ * LVT0 to NMI delivery. Other PIC interrupts are just sent to
+ * VCPU0, and only if its LVT0 is in EXTINT mode.
+ */
+ if (kvm->arch.vapics_in_nmi_mode > 0)
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = kvm->vcpus[i];
+ if (vcpu)
+ kvm_apic_nmi_wd_deliver(vcpu);
+ }
}
void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 17e41e165f1a..179dcb0103fd 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -26,10 +26,40 @@
* Port from Qemu.
*/
#include <linux/mm.h>
+#include <linux/bitops.h>
#include "irq.h"
#include <linux/kvm_host.h>
+static void pic_lock(struct kvm_pic *s)
+{
+ spin_lock(&s->lock);
+}
+
+static void pic_unlock(struct kvm_pic *s)
+{
+ struct kvm *kvm = s->kvm;
+ unsigned acks = s->pending_acks;
+ bool wakeup = s->wakeup_needed;
+ struct kvm_vcpu *vcpu;
+
+ s->pending_acks = 0;
+ s->wakeup_needed = false;
+
+ spin_unlock(&s->lock);
+
+ while (acks) {
+ kvm_notify_acked_irq(kvm, __ffs(acks));
+ acks &= acks - 1;
+ }
+
+ if (wakeup) {
+ vcpu = s->kvm->vcpus[0];
+ if (vcpu)
+ kvm_vcpu_kick(vcpu);
+ }
+}
+
static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
{
s->isr &= ~(1 << irq);
@@ -136,17 +166,21 @@ static void pic_update_irq(struct kvm_pic *s)
void kvm_pic_update_irq(struct kvm_pic *s)
{
+ pic_lock(s);
pic_update_irq(s);
+ pic_unlock(s);
}
void kvm_pic_set_irq(void *opaque, int irq, int level)
{
struct kvm_pic *s = opaque;
+ pic_lock(s);
if (irq >= 0 && irq < PIC_NUM_PINS) {
pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
pic_update_irq(s);
}
+ pic_unlock(s);
}
/*
@@ -172,6 +206,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
int irq, irq2, intno;
struct kvm_pic *s = pic_irqchip(kvm);
+ pic_lock(s);
irq = pic_get_irq(&s->pics[0]);
if (irq >= 0) {
pic_intack(&s->pics[0], irq);
@@ -196,6 +231,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
intno = s->pics[0].irq_base + irq;
}
pic_update_irq(s);
+ pic_unlock(s);
kvm_notify_acked_irq(kvm, irq);
return intno;
@@ -203,7 +239,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
void kvm_pic_reset(struct kvm_kpic_state *s)
{
- int irq, irqbase;
+ int irq, irqbase, n;
struct kvm *kvm = s->pics_state->irq_request_opaque;
struct kvm_vcpu *vcpu0 = kvm->vcpus[0];
@@ -214,8 +250,10 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
- if (s->irr & (1 << irq) || s->isr & (1 << irq))
- kvm_notify_acked_irq(kvm, irq+irqbase);
+ if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
+ n = irq + irqbase;
+ s->pics_state->pending_acks |= 1 << n;
+ }
}
s->last_irr = 0;
s->irr = 0;
@@ -406,6 +444,7 @@ static void picdev_write(struct kvm_io_device *this,
printk(KERN_ERR "PIC: non byte write\n");
return;
}
+ pic_lock(s);
switch (addr) {
case 0x20:
case 0x21:
@@ -418,6 +457,7 @@ static void picdev_write(struct kvm_io_device *this,
elcr_ioport_write(&s->pics[addr & 1], addr, data);
break;
}
+ pic_unlock(s);
}
static void picdev_read(struct kvm_io_device *this,
@@ -431,6 +471,7 @@ static void picdev_read(struct kvm_io_device *this,
printk(KERN_ERR "PIC: non byte read\n");
return;
}
+ pic_lock(s);
switch (addr) {
case 0x20:
case 0x21:
@@ -444,6 +485,7 @@ static void picdev_read(struct kvm_io_device *this,
break;
}
*(unsigned char *)val = data;
+ pic_unlock(s);
}
/*
@@ -459,7 +501,7 @@ static void pic_irq_request(void *opaque, int level)
s->output = level;
if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
s->pics[0].isr_ack &= ~(1 << irq);
- kvm_vcpu_kick(vcpu);
+ s->wakeup_needed = true;
}
}
@@ -469,6 +511,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
if (!s)
return NULL;
+ spin_lock_init(&s->lock);
+ s->kvm = kvm;
s->pics[0].elcr_mask = 0xf8;
s->pics[1].elcr_mask = 0xde;
s->irq_request = pic_irq_request;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index f17c8f5bbf31..2bf32a03ceec 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -25,6 +25,7 @@
#include <linux/mm_types.h>
#include <linux/hrtimer.h>
#include <linux/kvm_host.h>
+#include <linux/spinlock.h>
#include "iodev.h"
#include "ioapic.h"
@@ -59,6 +60,10 @@ struct kvm_kpic_state {
};
struct kvm_pic {
+ spinlock_t lock;
+ bool wakeup_needed;
+ unsigned pending_acks;
+ struct kvm *kvm;
struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
irq_request_func *irq_request;
void *irq_request_opaque;
@@ -87,6 +92,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 65ef0fc2c036..8e5ee99551f6 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -7,7 +7,7 @@
#include <linux/kvm_host.h>
#include <asm/msr.h>
-#include "svm.h"
+#include <asm/svm.h>
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0fc3cab48943..afac68c0815c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -130,6 +130,11 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic)
return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
}
+static inline int apic_lvt_nmi_mode(u32 lvt_val)
+{
+ return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
+}
+
static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */
LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
@@ -354,6 +359,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
case APIC_DM_NMI:
kvm_inject_nmi(vcpu);
+ kvm_vcpu_kick(vcpu);
break;
case APIC_DM_INIT:
@@ -380,6 +386,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
}
break;
+ case APIC_DM_EXTINT:
+ /*
+ * Should only be called by kvm_apic_local_deliver() with LVT0,
+ * before NMI watchdog was enabled. Already handled by
+ * kvm_apic_accept_pic_intr().
+ */
+ break;
+
default:
printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
delivery_mode);
@@ -663,6 +677,20 @@ static void start_apic_timer(struct kvm_lapic *apic)
apic->timer.period)));
}
+static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
+{
+ int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0));
+
+ if (apic_lvt_nmi_mode(lvt0_val)) {
+ if (!nmi_wd_enabled) {
+ apic_debug("Receive NMI setting on APIC_LVT0 "
+ "for cpu %d\n", apic->vcpu->vcpu_id);
+ apic->vcpu->kvm->arch.vapics_in_nmi_mode++;
+ }
+ } else if (nmi_wd_enabled)
+ apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
+}
+
static void apic_mmio_write(struct kvm_io_device *this,
gpa_t address, int len, const void *data)
{
@@ -743,10 +771,11 @@ static void apic_mmio_write(struct kvm_io_device *this,
apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
break;
+ case APIC_LVT0:
+ apic_manage_nmi_watchdog(apic, val);
case APIC_LVTT:
case APIC_LVTTHMR:
case APIC_LVTPC:
- case APIC_LVT0:
case APIC_LVT1:
case APIC_LVTERR:
/* TODO: Check vector */
@@ -961,12 +990,26 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
return 0;
}
-static int __inject_apic_timer_irq(struct kvm_lapic *apic)
+static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
+{
+ u32 reg = apic_get_reg(apic, lvt_type);
+ int vector, mode, trig_mode;
+
+ if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
+ vector = reg & APIC_VECTOR_MASK;
+ mode = reg & APIC_MODE_MASK;
+ trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
+ return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
+ }
+ return 0;
+}
+
+void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
{
- int vector;
+ struct kvm_lapic *apic = vcpu->arch.apic;
- vector = apic_lvt_vector(apic, APIC_LVTT);
- return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
+ if (apic)
+ kvm_apic_local_deliver(apic, APIC_LVT0);
}
static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
@@ -1061,9 +1104,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (apic && apic_lvt_enabled(apic, APIC_LVTT) &&
- atomic_read(&apic->timer.pending) > 0) {
- if (__inject_apic_timer_irq(apic))
+ if (apic && atomic_read(&apic->timer.pending) > 0) {
+ if (kvm_apic_local_deliver(apic, APIC_LVTT))
atomic_dec(&apic->timer.pending);
}
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 410ddbc1aa2e..83f11c7474a1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -17,7 +17,6 @@
*
*/
-#include "vmx.h"
#include "mmu.h"
#include <linux/kvm_host.h>
@@ -33,6 +32,7 @@
#include <asm/page.h>
#include <asm/cmpxchg.h>
#include <asm/io.h>
+#include <asm/vmx.h>
/*
* When setting this variable to true it enables Two-Dimensional-Paging
@@ -168,6 +168,7 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
static u64 __read_mostly shadow_user_mask;
static u64 __read_mostly shadow_accessed_mask;
static u64 __read_mostly shadow_dirty_mask;
+static u64 __read_mostly shadow_mt_mask;
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
{
@@ -183,13 +184,14 @@ void kvm_mmu_set_base_ptes(u64 base_pte)
EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
- u64 dirty_mask, u64 nx_mask, u64 x_mask)
+ u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask)
{
shadow_user_mask = user_mask;
shadow_accessed_mask = accessed_mask;
shadow_dirty_mask = dirty_mask;
shadow_nx_mask = nx_mask;
shadow_x_mask = x_mask;
+ shadow_mt_mask = mt_mask;
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
@@ -384,7 +386,9 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
{
int *write_count;
- write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
+ gfn = unalias_gfn(kvm, gfn);
+ write_count = slot_largepage_idx(gfn,
+ gfn_to_memslot_unaliased(kvm, gfn));
*write_count += 1;
}
@@ -392,16 +396,20 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
{
int *write_count;
- write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
+ gfn = unalias_gfn(kvm, gfn);
+ write_count = slot_largepage_idx(gfn,
+ gfn_to_memslot_unaliased(kvm, gfn));
*write_count -= 1;
WARN_ON(*write_count < 0);
}
static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
{
- struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+ struct kvm_memory_slot *slot;
int *largepage_idx;
+ gfn = unalias_gfn(kvm, gfn);
+ slot = gfn_to_memslot_unaliased(kvm, gfn);
if (slot) {
largepage_idx = slot_largepage_idx(gfn, slot);
return *largepage_idx;
@@ -613,7 +621,7 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
return NULL;
}
-static void rmap_write_protect(struct kvm *kvm, u64 gfn)
+static int rmap_write_protect(struct kvm *kvm, u64 gfn)
{
unsigned long *rmapp;
u64 *spte;
@@ -659,8 +667,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
spte = rmap_next(kvm, rmapp, spte);
}
- if (write_protected)
- kvm_flush_remote_tlbs(kvm);
+ return write_protected;
}
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
@@ -786,9 +793,11 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
+ INIT_LIST_HEAD(&sp->oos_link);
ASSERT(is_empty_shadow_page(sp->spt));
- sp->slot_bitmap = 0;
+ bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
sp->multimapped = 0;
+ sp->global = 1;
sp->parent_pte = parent_pte;
--vcpu->kvm->arch.n_free_mmu_pages;
return sp;
@@ -900,8 +909,9 @@ static void kvm_mmu_update_unsync_bitmap(u64 *spte)
struct kvm_mmu_page *sp = page_header(__pa(spte));
index = spte - sp->spt;
- __set_bit(index, sp->unsync_child_bitmap);
- sp->unsync_children = 1;
+ if (!__test_and_set_bit(index, sp->unsync_child_bitmap))
+ sp->unsync_children++;
+ WARN_ON(!sp->unsync_children);
}
static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
@@ -928,7 +938,6 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
- sp->unsync_children = 1;
kvm_mmu_update_parents_unsync(sp);
return 1;
}
@@ -959,38 +968,66 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
}
+#define KVM_PAGE_ARRAY_NR 16
+
+struct kvm_mmu_pages {
+ struct mmu_page_and_offset {
+ struct kvm_mmu_page *sp;
+ unsigned int idx;
+ } page[KVM_PAGE_ARRAY_NR];
+ unsigned int nr;
+};
+
#define for_each_unsync_children(bitmap, idx) \
for (idx = find_first_bit(bitmap, 512); \
idx < 512; \
idx = find_next_bit(bitmap, 512, idx+1))
-static int mmu_unsync_walk(struct kvm_mmu_page *sp,
- struct kvm_unsync_walk *walker)
+int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
+ int idx)
{
- int i, ret;
+ int i;
- if (!sp->unsync_children)
- return 0;
+ if (sp->unsync)
+ for (i=0; i < pvec->nr; i++)
+ if (pvec->page[i].sp == sp)
+ return 0;
+
+ pvec->page[pvec->nr].sp = sp;
+ pvec->page[pvec->nr].idx = idx;
+ pvec->nr++;
+ return (pvec->nr == KVM_PAGE_ARRAY_NR);
+}
+
+static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
+ struct kvm_mmu_pages *pvec)
+{
+ int i, ret, nr_unsync_leaf = 0;
for_each_unsync_children(sp->unsync_child_bitmap, i) {
u64 ent = sp->spt[i];
- if (is_shadow_present_pte(ent)) {
+ if (is_shadow_present_pte(ent) && !is_large_pte(ent)) {
struct kvm_mmu_page *child;
child = page_header(ent & PT64_BASE_ADDR_MASK);
if (child->unsync_children) {
- ret = mmu_unsync_walk(child, walker);
- if (ret)
+ if (mmu_pages_add(pvec, child, i))
+ return -ENOSPC;
+
+ ret = __mmu_unsync_walk(child, pvec);
+ if (!ret)
+ __clear_bit(i, sp->unsync_child_bitmap);
+ else if (ret > 0)
+ nr_unsync_leaf += ret;
+ else
return ret;
- __clear_bit(i, sp->unsync_child_bitmap);
}
if (child->unsync) {
- ret = walker->entry(child, walker);
- __clear_bit(i, sp->unsync_child_bitmap);
- if (ret)
- return ret;
+ nr_unsync_leaf++;
+ if (mmu_pages_add(pvec, child, i))
+ return -ENOSPC;
}
}
}
@@ -998,7 +1035,17 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
if (find_first_bit(sp->unsync_child_bitmap, 512) == 512)
sp->unsync_children = 0;
- return 0;
+ return nr_unsync_leaf;
+}
+
+static int mmu_unsync_walk(struct kvm_mmu_page *sp,
+ struct kvm_mmu_pages *pvec)
+{
+ if (!sp->unsync_children)
+ return 0;
+
+ mmu_pages_add(pvec, sp, 0);
+ return __mmu_unsync_walk(sp, pvec);
}
static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
@@ -1021,10 +1068,18 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
return NULL;
}
+static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ list_del(&sp->oos_link);
+ --kvm->stat.mmu_unsync_global;
+}
+
static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
WARN_ON(!sp->unsync);
sp->unsync = 0;
+ if (sp->global)
+ kvm_unlink_unsync_global(kvm, sp);
--kvm->stat.mmu_unsync;
}
@@ -1037,7 +1092,8 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
return 1;
}
- rmap_write_protect(vcpu->kvm, sp->gfn);
+ if (rmap_write_protect(vcpu->kvm, sp->gfn))
+ kvm_flush_remote_tlbs(vcpu->kvm);
kvm_unlink_unsync_page(vcpu->kvm, sp);
if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
kvm_mmu_zap_page(vcpu->kvm, sp);
@@ -1048,30 +1104,89 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
return 0;
}
-struct sync_walker {
- struct kvm_vcpu *vcpu;
- struct kvm_unsync_walk walker;
+struct mmu_page_path {
+ struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
+ unsigned int idx[PT64_ROOT_LEVEL-1];
};
-static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
+#define for_each_sp(pvec, sp, parents, i) \
+ for (i = mmu_pages_next(&pvec, &parents, -1), \
+ sp = pvec.page[i].sp; \
+ i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
+ i = mmu_pages_next(&pvec, &parents, i))
+
+int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
+ int i)
{
- struct sync_walker *sync_walk = container_of(walk, struct sync_walker,
- walker);
- struct kvm_vcpu *vcpu = sync_walk->vcpu;
+ int n;
- kvm_sync_page(vcpu, sp);
- return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock));
+ for (n = i+1; n < pvec->nr; n++) {
+ struct kvm_mmu_page *sp = pvec->page[n].sp;
+
+ if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
+ parents->idx[0] = pvec->page[n].idx;
+ return n;
+ }
+
+ parents->parent[sp->role.level-2] = sp;
+ parents->idx[sp->role.level-1] = pvec->page[n].idx;
+ }
+
+ return n;
}
-static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+void mmu_pages_clear_parents(struct mmu_page_path *parents)
{
- struct sync_walker walker = {
- .walker = { .entry = mmu_sync_fn, },
- .vcpu = vcpu,
- };
+ struct kvm_mmu_page *sp;
+ unsigned int level = 0;
+
+ do {
+ unsigned int idx = parents->idx[level];
+
+ sp = parents->parent[level];
+ if (!sp)
+ return;
+
+ --sp->unsync_children;
+ WARN_ON((int)sp->unsync_children < 0);
+ __clear_bit(idx, sp->unsync_child_bitmap);
+ level++;
+ } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
+}
+
+static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
+ struct mmu_page_path *parents,
+ struct kvm_mmu_pages *pvec)
+{
+ parents->parent[parent->role.level-1] = NULL;
+ pvec->nr = 0;
+}
+
+static void mmu_sync_children(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *parent)
+{
+ int i;
+ struct kvm_mmu_page *sp;
+ struct mmu_page_path parents;
+ struct kvm_mmu_pages pages;
+
+ kvm_mmu_pages_init(parent, &parents, &pages);
+ while (mmu_unsync_walk(parent, &pages)) {
+ int protected = 0;
- while (mmu_unsync_walk(sp, &walker.walker))
+ for_each_sp(pages, sp, parents, i)
+ protected |= rmap_write_protect(vcpu->kvm, sp->gfn);
+
+ if (protected)
+ kvm_flush_remote_tlbs(vcpu->kvm);
+
+ for_each_sp(pages, sp, parents, i) {
+ kvm_sync_page(vcpu, sp);
+ mmu_pages_clear_parents(&parents);
+ }
cond_resched_lock(&vcpu->kvm->mmu_lock);
+ kvm_mmu_pages_init(parent, &parents, &pages);
+ }
}
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
@@ -1129,7 +1244,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
sp->role = role;
hlist_add_head(&sp->hash_link, bucket);
if (!metaphysical) {
- rmap_write_protect(vcpu->kvm, gfn);
+ if (rmap_write_protect(vcpu->kvm, gfn))
+ kvm_flush_remote_tlbs(vcpu->kvm);
account_shadowed(vcpu->kvm, gfn);
}
if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
@@ -1153,6 +1269,8 @@ static int walk_shadow(struct kvm_shadow_walk *walker,
if (level == PT32E_ROOT_LEVEL) {
shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
shadow_addr &= PT64_BASE_ADDR_MASK;
+ if (!shadow_addr)
+ return 1;
--level;
}
@@ -1237,33 +1355,29 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
}
}
-struct zap_walker {
- struct kvm_unsync_walk walker;
- struct kvm *kvm;
- int zapped;
-};
-
-static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
+static int mmu_zap_unsync_children(struct kvm *kvm,
+ struct kvm_mmu_page *parent)
{
- struct zap_walker *zap_walk = container_of(walk, struct zap_walker,
- walker);
- kvm_mmu_zap_page(zap_walk->kvm, sp);
- zap_walk->zapped = 1;
- return 0;
-}
+ int i, zapped = 0;
+ struct mmu_page_path parents;
+ struct kvm_mmu_pages pages;
-static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
- struct zap_walker walker = {
- .walker = { .entry = mmu_zap_fn, },
- .kvm = kvm,
- .zapped = 0,
- };
-
- if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+ if (parent->role.level == PT_PAGE_TABLE_LEVEL)
return 0;
- mmu_unsync_walk(sp, &walker.walker);
- return walker.zapped;
+
+ kvm_mmu_pages_init(parent, &parents, &pages);
+ while (mmu_unsync_walk(parent, &pages)) {
+ struct kvm_mmu_page *sp;
+
+ for_each_sp(pages, sp, parents, i) {
+ kvm_mmu_zap_page(kvm, sp);
+ mmu_pages_clear_parents(&parents);
+ }
+ zapped += pages.nr;
+ kvm_mmu_pages_init(parent, &parents, &pages);
+ }
+
+ return zapped;
}
static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -1362,7 +1476,7 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
struct kvm_mmu_page *sp = page_header(__pa(pte));
- __set_bit(slot, &sp->slot_bitmap);
+ __set_bit(slot, sp->slot_bitmap);
}
static void mmu_convert_notrap(struct kvm_mmu_page *sp)
@@ -1393,6 +1507,110 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
return page;
}
+/*
+ * The function is based on mtrr_type_lookup() in
+ * arch/x86/kernel/cpu/mtrr/generic.c
+ */
+static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
+ u64 start, u64 end)
+{
+ int i;
+ u64 base, mask;
+ u8 prev_match, curr_match;
+ int num_var_ranges = KVM_NR_VAR_MTRR;
+
+ if (!mtrr_state->enabled)
+ return 0xFF;
+
+ /* Make end inclusive end, instead of exclusive */
+ end--;
+
+ /* Look in fixed ranges. Just return the type as per start */
+ if (mtrr_state->have_fixed && (start < 0x100000)) {
+ int idx;
+
+ if (start < 0x80000) {
+ idx = 0;
+ idx += (start >> 16);
+ return mtrr_state->fixed_ranges[idx];
+ } else if (start < 0xC0000) {
+ idx = 1 * 8;
+ idx += ((start - 0x80000) >> 14);
+ return mtrr_state->fixed_ranges[idx];
+ } else if (start < 0x1000000) {
+ idx = 3 * 8;
+ idx += ((start - 0xC0000) >> 12);
+ return mtrr_state->fixed_ranges[idx];
+ }
+ }
+
+ /*
+ * Look in variable ranges
+ * Look of multiple ranges matching this address and pick type
+ * as per MTRR precedence
+ */
+ if (!(mtrr_state->enabled & 2))
+ return mtrr_state->def_type;
+
+ prev_match = 0xFF;
+ for (i = 0; i < num_var_ranges; ++i) {
+ unsigned short start_state, end_state;
+
+ if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11)))
+ continue;
+
+ base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) +
+ (mtrr_state->var_ranges[i].base_lo & PAGE_MASK);
+ mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) +
+ (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK);
+
+ start_state = ((start & mask) == (base & mask));
+ end_state = ((end & mask) == (base & mask));
+ if (start_state != end_state)
+ return 0xFE;
+
+ if ((start & mask) != (base & mask))
+ continue;
+
+ curr_match = mtrr_state->var_ranges[i].base_lo & 0xff;
+ if (prev_match == 0xFF) {
+ prev_match = curr_match;
+ continue;
+ }
+
+ if (prev_match == MTRR_TYPE_UNCACHABLE ||
+ curr_match == MTRR_TYPE_UNCACHABLE)
+ return MTRR_TYPE_UNCACHABLE;
+
+ if ((prev_match == MTRR_TYPE_WRBACK &&
+ curr_match == MTRR_TYPE_WRTHROUGH) ||
+ (prev_match == MTRR_TYPE_WRTHROUGH &&
+ curr_match == MTRR_TYPE_WRBACK)) {
+ prev_match = MTRR_TYPE_WRTHROUGH;
+ curr_match = MTRR_TYPE_WRTHROUGH;
+ }
+
+ if (prev_match != curr_match)
+ return MTRR_TYPE_UNCACHABLE;
+ }
+
+ if (prev_match != 0xFF)
+ return prev_match;
+
+ return mtrr_state->def_type;
+}
+
+static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ u8 mtrr;
+
+ mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT,
+ (gfn << PAGE_SHIFT) + PAGE_SIZE);
+ if (mtrr == 0xfe || mtrr == 0xff)
+ mtrr = MTRR_TYPE_WRBACK;
+ return mtrr;
+}
+
static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
unsigned index;
@@ -1409,9 +1627,15 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
if (s->role.word != sp->role.word)
return 1;
}
- kvm_mmu_mark_parents_unsync(vcpu, sp);
++vcpu->kvm->stat.mmu_unsync;
sp->unsync = 1;
+
+ if (sp->global) {
+ list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
+ ++vcpu->kvm->stat.mmu_unsync_global;
+ } else
+ kvm_mmu_mark_parents_unsync(vcpu, sp);
+
mmu_convert_notrap(sp);
return 0;
}
@@ -1437,11 +1661,24 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pte_access, int user_fault,
int write_fault, int dirty, int largepage,
- gfn_t gfn, pfn_t pfn, bool speculative,
+ int global, gfn_t gfn, pfn_t pfn, bool speculative,
bool can_unsync)
{
u64 spte;
int ret = 0;
+ u64 mt_mask = shadow_mt_mask;
+ struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
+
+ if (!(vcpu->arch.cr4 & X86_CR4_PGE))
+ global = 0;
+ if (!global && sp->global) {
+ sp->global = 0;
+ if (sp->unsync) {
+ kvm_unlink_unsync_global(vcpu->kvm, sp);
+ kvm_mmu_mark_parents_unsync(vcpu, sp);
+ }
+ }
+
/*
* We don't set the accessed bit, since we sometimes want to see
* whether the guest actually used the pte (in order to detect
@@ -1460,6 +1697,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
spte |= shadow_user_mask;
if (largepage)
spte |= PT_PAGE_SIZE_MASK;
+ if (mt_mask) {
+ mt_mask = get_memory_type(vcpu, gfn) <<
+ kvm_x86_ops->get_mt_mask_shift();
+ spte |= mt_mask;
+ }
spte |= (u64)pfn << PAGE_SHIFT;
@@ -1474,6 +1716,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
spte |= PT_WRITABLE_MASK;
+ /*
+ * Optimization: for pte sync, if spte was writable the hash
+ * lookup is unnecessary (and expensive). Write protection
+ * is responsibility of mmu_get_page / kvm_sync_page.
+ * Same reasoning can be applied to dirty page accounting.
+ */
+ if (!can_unsync && is_writeble_pte(*shadow_pte))
+ goto set_pte;
+
if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
pgprintk("%s: found shadow page for %lx, marking ro\n",
__func__, gfn);
@@ -1495,8 +1746,8 @@ set_pte:
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty,
- int *ptwrite, int largepage, gfn_t gfn,
- pfn_t pfn, bool speculative)
+ int *ptwrite, int largepage, int global,
+ gfn_t gfn, pfn_t pfn, bool speculative)
{
int was_rmapped = 0;
int was_writeble = is_writeble_pte(*shadow_pte);
@@ -1529,7 +1780,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
}
}
if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
- dirty, largepage, gfn, pfn, speculative, true)) {
+ dirty, largepage, global, gfn, pfn, speculative, true)) {
if (write_fault)
*ptwrite = 1;
kvm_x86_ops->tlb_flush(vcpu);
@@ -1586,7 +1837,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk,
|| (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
0, walk->write, 1, &walk->pt_write,
- walk->largepage, gfn, walk->pfn, false);
+ walk->largepage, 0, gfn, walk->pfn, false);
++vcpu->stat.pf_fixed;
return 1;
}
@@ -1773,6 +2024,15 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
}
}
+static void mmu_sync_global(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_mmu_page *sp, *n;
+
+ list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
+ kvm_sync_page(vcpu, sp);
+}
+
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->kvm->mmu_lock);
@@ -1780,6 +2040,13 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
spin_unlock(&vcpu->kvm->mmu_lock);
}
+void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
+{
+ spin_lock(&vcpu->kvm->mmu_lock);
+ mmu_sync_global(vcpu);
+ spin_unlock(&vcpu->kvm->mmu_lock);
+}
+
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
{
return vaddr;
@@ -2178,7 +2445,8 @@ static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
}
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
- const u8 *new, int bytes)
+ const u8 *new, int bytes,
+ bool guest_initiated)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
struct kvm_mmu_page *sp;
@@ -2204,15 +2472,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
kvm_mmu_free_some_pages(vcpu);
++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, "pre pte write");
- if (gfn == vcpu->arch.last_pt_write_gfn
- && !last_updated_pte_accessed(vcpu)) {
- ++vcpu->arch.last_pt_write_count;
- if (vcpu->arch.last_pt_write_count >= 3)
- flooded = 1;
- } else {
- vcpu->arch.last_pt_write_gfn = gfn;
- vcpu->arch.last_pt_write_count = 1;
- vcpu->arch.last_pte_updated = NULL;
+ if (guest_initiated) {
+ if (gfn == vcpu->arch.last_pt_write_gfn
+ && !last_updated_pte_accessed(vcpu)) {
+ ++vcpu->arch.last_pt_write_count;
+ if (vcpu->arch.last_pt_write_count >= 3)
+ flooded = 1;
+ } else {
+ vcpu->arch.last_pt_write_gfn = gfn;
+ vcpu->arch.last_pt_write_count = 1;
+ vcpu->arch.last_pte_updated = NULL;
+ }
}
index = kvm_page_table_hashfn(gfn);
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
@@ -2352,9 +2622,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
- spin_lock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.invlpg(vcpu, gva);
- spin_unlock(&vcpu->kvm->mmu_lock);
kvm_mmu_flush_tlb(vcpu);
++vcpu->stat.invlpg;
}
@@ -2451,7 +2719,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
int i;
u64 *pt;
- if (!test_bit(slot, &sp->slot_bitmap))
+ if (!test_bit(slot, sp->slot_bitmap))
continue;
pt = sp->spt;
@@ -2860,8 +3128,8 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
if (sp->role.metaphysical)
continue;
- slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
gfn = unalias_gfn(vcpu->kvm, sp->gfn);
+ slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn);
rmapp = &slot->rmap[gfn - slot->base_gfn];
if (*rmapp)
printk(KERN_ERR "%s: (%s) shadow page has writable"
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 84eee43bbe74..9fd78b6e17ad 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -82,6 +82,7 @@ struct shadow_walker {
int *ptwrite;
pfn_t pfn;
u64 *sptep;
+ gpa_t pte_gpa;
};
static gfn_t gpte_to_gfn(pt_element_t gpte)
@@ -222,7 +223,7 @@ walk:
if (ret)
goto walk;
pte |= PT_DIRTY_MASK;
- kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
+ kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
walker->ptes[walker->level - 1] = pte;
}
@@ -274,7 +275,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
return;
kvm_get_pfn(pfn);
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
- gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
+ gpte & PT_DIRTY_MASK, NULL, largepage,
+ gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
pfn, true);
}
@@ -301,8 +303,9 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
sw->user_fault, sw->write_fault,
gw->ptes[gw->level-1] & PT_DIRTY_MASK,
- sw->ptwrite, sw->largepage, gw->gfn, sw->pfn,
- false);
+ sw->ptwrite, sw->largepage,
+ gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
+ gw->gfn, sw->pfn, false);
sw->sptep = sptep;
return 1;
}
@@ -466,10 +469,22 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
struct kvm_vcpu *vcpu, u64 addr,
u64 *sptep, int level)
{
+ struct shadow_walker *sw =
+ container_of(_sw, struct shadow_walker, walker);
- if (level == PT_PAGE_TABLE_LEVEL) {
- if (is_shadow_present_pte(*sptep))
+ /* FIXME: properly handle invlpg on large guest pages */
+ if (level == PT_PAGE_TABLE_LEVEL ||
+ ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
+ struct kvm_mmu_page *sp = page_header(__pa(sptep));
+
+ sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
+ sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+
+ if (is_shadow_present_pte(*sptep)) {
rmap_remove(vcpu->kvm, sptep);
+ if (is_large_pte(*sptep))
+ --vcpu->kvm->stat.lpages;
+ }
set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
return 1;
}
@@ -480,11 +495,26 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
{
+ pt_element_t gpte;
struct shadow_walker walker = {
.walker = { .entry = FNAME(shadow_invlpg_entry), },
+ .pte_gpa = -1,
};
+ spin_lock(&vcpu->kvm->mmu_lock);
walk_shadow(&walker.walker, vcpu, gva);
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ if (walker.pte_gpa == -1)
+ return;
+ if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
+ sizeof(pt_element_t)))
+ return;
+ if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
+ if (mmu_topup_memory_caches(vcpu))
+ return;
+ kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
+ sizeof(pt_element_t), 0);
+ }
}
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -580,7 +610,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
nr_present++;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
- is_dirty_pte(gpte), 0, gfn,
+ is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
spte_to_pfn(sp->spt[i]), true, false);
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9c4ce657d963..1452851ae258 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -28,6 +28,8 @@
#include <asm/desc.h>
+#include <asm/virtext.h>
+
#define __ex(x) __kvm_handle_fault_on_reboot(x)
MODULE_AUTHOR("Qumranet");
@@ -245,34 +247,19 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
static int has_svm(void)
{
- uint32_t eax, ebx, ecx, edx;
-
- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
- printk(KERN_INFO "has_svm: not amd\n");
- return 0;
- }
+ const char *msg;
- cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
- if (eax < SVM_CPUID_FUNC) {
- printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n");
+ if (!cpu_has_svm(&msg)) {
+ printk(KERN_INFO "has_svn: %s\n", msg);
return 0;
}
- cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
- if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
- printk(KERN_DEBUG "has_svm: svm not available\n");
- return 0;
- }
return 1;
}
static void svm_hardware_disable(void *garbage)
{
- uint64_t efer;
-
- wrmsrl(MSR_VM_HSAVE_PA, 0);
- rdmsrl(MSR_EFER, efer);
- wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+ cpu_svm_disable();
}
static void svm_hardware_enable(void *garbage)
@@ -772,6 +759,22 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
+
+ /*
+ * SVM always stores 0 for the 'G' bit in the CS selector in
+ * the VMCB on a VMEXIT. This hurts cross-vendor migration:
+ * Intel's VMENTRY has a check on the 'G' bit.
+ */
+ if (seg == VCPU_SREG_CS)
+ var->g = s->limit > 0xfffff;
+
+ /*
+ * Work around a bug where the busy flag in the tr selector
+ * isn't exposed
+ */
+ if (seg == VCPU_SREG_TR)
+ var->type |= 0x2;
+
var->unusable = !var->present;
}
@@ -1099,6 +1102,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
rep = (io_info & SVM_IOIO_REP_MASK) != 0;
down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
+ skip_emulated_instruction(&svm->vcpu);
return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
}
@@ -1912,6 +1916,11 @@ static int get_npt_level(void)
#endif
}
+static int svm_get_mt_mask_shift(void)
+{
+ return 0;
+}
+
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -1967,6 +1976,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
+ .get_mt_mask_shift = svm_get_mt_mask_shift,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a4018b01e1f9..6259d7467648 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -16,7 +16,6 @@
*/
#include "irq.h"
-#include "vmx.h"
#include "mmu.h"
#include <linux/kvm_host.h>
@@ -31,6 +30,8 @@
#include <asm/io.h>
#include <asm/desc.h>
+#include <asm/vmx.h>
+#include <asm/virtext.h>
#define __ex(x) __kvm_handle_fault_on_reboot(x)
@@ -90,6 +91,11 @@ struct vcpu_vmx {
} rmode;
int vpid;
bool emulation_required;
+
+ /* Support for vnmi-less CPUs */
+ int soft_vnmi_blocked;
+ ktime_t entry_time;
+ s64 vnmi_blocked_time;
};
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -122,7 +128,7 @@ static struct vmcs_config {
u32 vmentry_ctrl;
} vmcs_config;
-struct vmx_capability {
+static struct vmx_capability {
u32 ept;
u32 vpid;
} vmx_capability;
@@ -957,6 +963,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
break;
+ case MSR_IA32_CR_PAT:
+ if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+ vmcs_write64(GUEST_IA32_PAT, data);
+ vcpu->arch.pat = data;
+ break;
+ }
+ /* Otherwise falls through to kvm_set_msr_common */
default:
vmx_load_host_state(vmx);
msr = find_msr_entry(vmx, msr_index);
@@ -1032,8 +1045,7 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu)
static __init int cpu_has_kvm_support(void)
{
- unsigned long ecx = cpuid_ecx(1);
- return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
+ return cpu_has_vmx();
}
static __init int vmx_disabled_by_bios(void)
@@ -1079,13 +1091,22 @@ static void vmclear_local_vcpus(void)
__vcpu_clear(vmx);
}
-static void hardware_disable(void *garbage)
+
+/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
+ * tricks.
+ */
+static void kvm_cpu_vmxoff(void)
{
- vmclear_local_vcpus();
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
write_cr4(read_cr4() & ~X86_CR4_VMXE);
}
+static void hardware_disable(void *garbage)
+{
+ vmclear_local_vcpus();
+ kvm_cpu_vmxoff();
+}
+
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
u32 msr, u32 *result)
{
@@ -1176,12 +1197,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
#ifdef CONFIG_X86_64
min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
#endif
- opt = 0;
+ opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
&_vmexit_control) < 0)
return -EIO;
- min = opt = 0;
+ min = 0;
+ opt = VM_ENTRY_LOAD_IA32_PAT;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
&_vmentry_control) < 0)
return -EIO;
@@ -2087,8 +2109,9 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
*/
static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
{
- u32 host_sysenter_cs;
+ u32 host_sysenter_cs, msr_low, msr_high;
u32 junk;
+ u64 host_pat;
unsigned long a;
struct descriptor_table dt;
int i;
@@ -2176,6 +2199,20 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
rdmsrl(MSR_IA32_SYSENTER_EIP, a);
vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */
+ if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
+ rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
+ host_pat = msr_low | ((u64) msr_high << 32);
+ vmcs_write64(HOST_IA32_PAT, host_pat);
+ }
+ if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+ rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
+ host_pat = msr_low | ((u64) msr_high << 32);
+ /* Write the default value follow host pat */
+ vmcs_write64(GUEST_IA32_PAT, host_pat);
+ /* Keep arch.pat sync with GUEST_IA32_PAT */
+ vmx->vcpu.arch.pat = host_pat;
+ }
+
for (i = 0; i < NR_VMX_MSR; ++i) {
u32 index = vmx_msr_index[i];
u32 data_low, data_high;
@@ -2230,6 +2267,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.rmode.active = 0;
+ vmx->soft_vnmi_blocked = 0;
+
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(&vmx->vcpu, 0);
msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
@@ -2335,6 +2374,29 @@ out:
return ret;
}
+static void enable_irq_window(struct kvm_vcpu *vcpu)
+{
+ u32 cpu_based_vm_exec_control;
+
+ cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+ u32 cpu_based_vm_exec_control;
+
+ if (!cpu_has_virtual_nmis()) {
+ enable_irq_window(vcpu);
+ return;
+ }
+
+ cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2358,10 +2420,54 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (!cpu_has_virtual_nmis()) {
+ /*
+ * Tracking the NMI-blocked state in software is built upon
+ * finding the next open IRQ window. This, in turn, depends on
+ * well-behaving guests: They have to keep IRQs disabled at
+ * least as long as the NMI handler runs. Otherwise we may
+ * cause NMI nesting, maybe breaking the guest. But as this is
+ * highly unlikely, we can live with the residual risk.
+ */
+ vmx->soft_vnmi_blocked = 1;
+ vmx->vnmi_blocked_time = 0;
+ }
+
+ ++vcpu->stat.nmi_injections;
+ if (vcpu->arch.rmode.active) {
+ vmx->rmode.irq.pending = true;
+ vmx->rmode.irq.vector = NMI_VECTOR;
+ vmx->rmode.irq.rip = kvm_rip_read(vcpu);
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ NMI_VECTOR | INTR_TYPE_SOFT_INTR |
+ INTR_INFO_VALID_MASK);
+ vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+ kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
+ return;
+ }
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
}
+static void vmx_update_window_states(struct kvm_vcpu *vcpu)
+{
+ u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+
+ vcpu->arch.nmi_window_open =
+ !(guest_intr & (GUEST_INTR_STATE_STI |
+ GUEST_INTR_STATE_MOV_SS |
+ GUEST_INTR_STATE_NMI));
+ if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
+ vcpu->arch.nmi_window_open = 0;
+
+ vcpu->arch.interrupt_window_open =
+ ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+ !(guest_intr & (GUEST_INTR_STATE_STI |
+ GUEST_INTR_STATE_MOV_SS)));
+}
+
static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
{
int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2374,40 +2480,49 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
kvm_queue_interrupt(vcpu, irq);
}
-
static void do_interrupt_requests(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
- u32 cpu_based_vm_exec_control;
-
- vcpu->arch.interrupt_window_open =
- ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
- (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
+ vmx_update_window_states(vcpu);
- if (vcpu->arch.interrupt_window_open &&
- vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
- kvm_do_inject_irq(vcpu);
+ if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+ if (vcpu->arch.interrupt.pending) {
+ enable_nmi_window(vcpu);
+ } else if (vcpu->arch.nmi_window_open) {
+ vcpu->arch.nmi_pending = false;
+ vcpu->arch.nmi_injected = true;
+ } else {
+ enable_nmi_window(vcpu);
+ return;
+ }
+ }
+ if (vcpu->arch.nmi_injected) {
+ vmx_inject_nmi(vcpu);
+ if (vcpu->arch.nmi_pending)
+ enable_nmi_window(vcpu);
+ else if (vcpu->arch.irq_summary
+ || kvm_run->request_interrupt_window)
+ enable_irq_window(vcpu);
+ return;
+ }
- if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending)
- vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+ if (vcpu->arch.interrupt_window_open) {
+ if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
+ kvm_do_inject_irq(vcpu);
- cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ if (vcpu->arch.interrupt.pending)
+ vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+ }
if (!vcpu->arch.interrupt_window_open &&
(vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
- /*
- * Interrupts blocked. Wait for unblock.
- */
- cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
- else
- cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+ enable_irq_window(vcpu);
}
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
int ret;
struct kvm_userspace_memory_region tss_mem = {
- .slot = 8,
+ .slot = TSS_PRIVATE_MEMSLOT,
.guest_phys_addr = addr,
.memory_size = PAGE_SIZE * 3,
.flags = 0,
@@ -2492,7 +2607,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
}
- if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
+ if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
return 1; /* already handled by vmx_vcpu_run() */
if (is_no_device(intr_info)) {
@@ -2581,6 +2696,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rep = (exit_qualification & 32) != 0;
port = exit_qualification >> 16;
+ skip_emulated_instruction(vcpu);
return kvm_emulate_pio(vcpu, kvm_run, in, size, port);
}
@@ -2767,6 +2883,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
KVMTRACE_0D(PEND_INTR, vcpu, handler);
+ ++vcpu->stat.irq_window_exits;
/*
* If the user space waits to inject interrupts, exit as soon as
@@ -2775,7 +2892,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
if (kvm_run->request_interrupt_window &&
!vcpu->arch.irq_summary) {
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
- ++vcpu->stat.irq_window_exits;
return 0;
}
return 1;
@@ -2832,6 +2948,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qualification;
u16 tss_selector;
int reason;
@@ -2839,6 +2956,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
reason = (u32)exit_qualification >> 30;
+ if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
+ (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
+ == INTR_TYPE_NMI_INTR) {
+ vcpu->arch.nmi_injected = false;
+ if (cpu_has_virtual_nmis())
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ }
tss_selector = exit_qualification;
return kvm_task_switch(vcpu, tss_selector, reason);
@@ -2927,16 +3053,12 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
while (!guest_state_valid(vcpu)) {
err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
- switch (err) {
- case EMULATE_DONE:
- break;
- case EMULATE_DO_MMIO:
- kvm_report_emulation_failure(vcpu, "mmio");
- /* TODO: Handle MMIO */
- return;
- default:
- kvm_report_emulation_failure(vcpu, "emulation failure");
- return;
+ if (err == EMULATE_DO_MMIO)
+ break;
+
+ if (err != EMULATE_DONE) {
+ kvm_report_emulation_failure(vcpu, "emulation failure");
+ return;
}
if (signal_pending(current))
@@ -2948,8 +3070,10 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
local_irq_disable();
preempt_disable();
- /* Guest state should be valid now, no more emulation should be needed */
- vmx->emulation_required = 0;
+ /* Guest state should be valid now except if we need to
+ * emulate an MMIO */
+ if (guest_state_valid(vcpu))
+ vmx->emulation_required = 0;
}
/*
@@ -2996,6 +3120,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
(u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
+ /* If we need to emulate an MMIO from handle_invalid_guest_state
+ * we just return 0 */
+ if (vmx->emulation_required && emulate_invalid_guest_state)
+ return 0;
+
/* Access CR3 don't cause VMExit in paging mode, so we need
* to sync with guest real CR3. */
if (vm_need_ept() && is_paging(vcpu)) {
@@ -3012,9 +3141,32 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
- exit_reason != EXIT_REASON_EPT_VIOLATION))
- printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
- "exit reason is 0x%x\n", __func__, exit_reason);
+ exit_reason != EXIT_REASON_EPT_VIOLATION &&
+ exit_reason != EXIT_REASON_TASK_SWITCH))
+ printk(KERN_WARNING "%s: unexpected, valid vectoring info "
+ "(0x%x) and exit reason is 0x%x\n",
+ __func__, vectoring_info, exit_reason);
+
+ if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
+ if (vcpu->arch.interrupt_window_open) {
+ vmx->soft_vnmi_blocked = 0;
+ vcpu->arch.nmi_window_open = 1;
+ } else if (vmx->vnmi_blocked_time > 1000000000LL &&
+ vcpu->arch.nmi_pending) {
+ /*
+ * This CPU don't support us in finding the end of an
+ * NMI-blocked window if the guest runs with IRQs
+ * disabled. So we pull the trigger after 1 s of
+ * futile waiting, but inform the user about this.
+ */
+ printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
+ "state on VCPU %d after 1 s timeout\n",
+ __func__, vcpu->vcpu_id);
+ vmx->soft_vnmi_blocked = 0;
+ vmx->vcpu.arch.nmi_window_open = 1;
+ }
+ }
+
if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -3042,51 +3194,6 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu)
vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
}
-static void enable_irq_window(struct kvm_vcpu *vcpu)
-{
- u32 cpu_based_vm_exec_control;
-
- cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
- cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static void enable_nmi_window(struct kvm_vcpu *vcpu)
-{
- u32 cpu_based_vm_exec_control;
-
- if (!cpu_has_virtual_nmis())
- return;
-
- cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
- cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
-{
- u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
- return !(guest_intr & (GUEST_INTR_STATE_NMI |
- GUEST_INTR_STATE_MOV_SS |
- GUEST_INTR_STATE_STI));
-}
-
-static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
-{
- u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
- return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
- GUEST_INTR_STATE_STI)) &&
- (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
-}
-
-static void enable_intr_window(struct kvm_vcpu *vcpu)
-{
- if (vcpu->arch.nmi_pending)
- enable_nmi_window(vcpu);
- else if (kvm_cpu_has_interrupt(vcpu))
- enable_irq_window(vcpu);
-}
-
static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
@@ -3109,7 +3216,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
if (unblock_nmi && vector != DF_VECTOR)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
- }
+ } else if (unlikely(vmx->soft_vnmi_blocked))
+ vmx->vnmi_blocked_time +=
+ ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
idt_vectoring_info = vmx->idt_vectoring_info;
idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
@@ -3147,26 +3256,29 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
{
update_tpr_threshold(vcpu);
- if (cpu_has_virtual_nmis()) {
- if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
- if (vcpu->arch.interrupt.pending) {
- enable_nmi_window(vcpu);
- } else if (vmx_nmi_enabled(vcpu)) {
- vcpu->arch.nmi_pending = false;
- vcpu->arch.nmi_injected = true;
- } else {
- enable_intr_window(vcpu);
- return;
- }
- }
- if (vcpu->arch.nmi_injected) {
- vmx_inject_nmi(vcpu);
- enable_intr_window(vcpu);
+ vmx_update_window_states(vcpu);
+
+ if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+ if (vcpu->arch.interrupt.pending) {
+ enable_nmi_window(vcpu);
+ } else if (vcpu->arch.nmi_window_open) {
+ vcpu->arch.nmi_pending = false;
+ vcpu->arch.nmi_injected = true;
+ } else {
+ enable_nmi_window(vcpu);
return;
}
}
+ if (vcpu->arch.nmi_injected) {
+ vmx_inject_nmi(vcpu);
+ if (vcpu->arch.nmi_pending)
+ enable_nmi_window(vcpu);
+ else if (kvm_cpu_has_interrupt(vcpu))
+ enable_irq_window(vcpu);
+ return;
+ }
if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
- if (vmx_irq_enabled(vcpu))
+ if (vcpu->arch.interrupt_window_open)
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
else
enable_irq_window(vcpu);
@@ -3174,6 +3286,8 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
if (vcpu->arch.interrupt.pending) {
vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
+ if (kvm_cpu_has_interrupt(vcpu))
+ enable_irq_window(vcpu);
}
}
@@ -3213,6 +3327,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 intr_info;
+ /* Record the guest's net vcpu time for enforced NMI injections. */
+ if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
+ vmx->entry_time = ktime_get();
+
/* Handle invalid guest state instead of entering VMX */
if (vmx->emulation_required && emulate_invalid_guest_state) {
handle_invalid_guest_state(vcpu, kvm_run);
@@ -3327,9 +3445,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (vmx->rmode.irq.pending)
fixup_rmode_irq(vmx);
- vcpu->arch.interrupt_window_open =
- (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
- (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
+ vmx_update_window_states(vcpu);
asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
vmx->launched = 1;
@@ -3337,7 +3453,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
/* We need to handle NMIs before interrupts are enabled */
- if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
+ if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
(intr_info & INTR_INFO_VALID_MASK)) {
KVMTRACE_0D(NMI, vcpu, handler);
asm("int $2");
@@ -3455,6 +3571,11 @@ static int get_ept_level(void)
return VMX_EPT_DEFAULT_GAW + 1;
}
+static int vmx_get_mt_mask_shift(void)
+{
+ return VMX_EPT_MT_EPTE_SHIFT;
+}
+
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -3510,6 +3631,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.set_tss_addr = vmx_set_tss_addr,
.get_tdp_level = get_ept_level,
+ .get_mt_mask_shift = vmx_get_mt_mask_shift,
};
static int __init vmx_init(void)
@@ -3566,10 +3688,10 @@ static int __init vmx_init(void)
bypass_guest_pf = 0;
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
VMX_EPT_WRITABLE_MASK |
- VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
VMX_EPT_IGMT_BIT);
kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
- VMX_EPT_EXECUTABLE_MASK);
+ VMX_EPT_EXECUTABLE_MASK,
+ VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
kvm_enable_tdp();
} else
kvm_disable_tdp();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f1f8ff2f1fa2..cc17546a2406 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -34,11 +34,13 @@
#include <linux/module.h>
#include <linux/mman.h>
#include <linux/highmem.h>
+#include <linux/iommu.h>
#include <linux/intel-iommu.h>
#include <asm/uaccess.h>
#include <asm/msr.h>
#include <asm/desc.h>
+#include <asm/mtrr.h>
#define MAX_IO_MSRS 256
#define CR0_RESERVED_BITS \
@@ -86,6 +88,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "hypercalls", VCPU_STAT(hypercalls) },
{ "request_irq", VCPU_STAT(request_irq_exits) },
+ { "request_nmi", VCPU_STAT(request_nmi_exits) },
{ "irq_exits", VCPU_STAT(irq_exits) },
{ "host_state_reload", VCPU_STAT(host_state_reload) },
{ "efer_reload", VCPU_STAT(efer_reload) },
@@ -93,6 +96,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "insn_emulation", VCPU_STAT(insn_emulation) },
{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
{ "irq_injections", VCPU_STAT(irq_injections) },
+ { "nmi_injections", VCPU_STAT(nmi_injections) },
{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -101,6 +105,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "mmu_recycled", VM_STAT(mmu_recycled) },
{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
{ "mmu_unsync", VM_STAT(mmu_unsync) },
+ { "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
{ "largepages", VM_STAT(lpages) },
{ NULL }
@@ -312,6 +317,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
kvm_x86_ops->set_cr0(vcpu, cr0);
vcpu->arch.cr0 = cr0;
+ kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
return;
}
@@ -355,6 +361,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
}
kvm_x86_ops->set_cr4(vcpu, cr4);
vcpu->arch.cr4 = cr4;
+ kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -449,7 +456,7 @@ static u32 msrs_to_save[] = {
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
- MSR_IA32_PERF_STATUS,
+ MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT
};
static unsigned num_msrs_to_save;
@@ -648,10 +655,38 @@ static bool msr_mtrr_valid(unsigned msr)
static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
+ u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
if (!msr_mtrr_valid(msr))
return 1;
- vcpu->arch.mtrr[msr - 0x200] = data;
+ if (msr == MSR_MTRRdefType) {
+ vcpu->arch.mtrr_state.def_type = data;
+ vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
+ } else if (msr == MSR_MTRRfix64K_00000)
+ p[0] = data;
+ else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+ p[1 + msr - MSR_MTRRfix16K_80000] = data;
+ else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+ p[3 + msr - MSR_MTRRfix4K_C0000] = data;
+ else if (msr == MSR_IA32_CR_PAT)
+ vcpu->arch.pat = data;
+ else { /* Variable MTRRs */
+ int idx, is_mtrr_mask;
+ u64 *pt;
+
+ idx = (msr - 0x200) / 2;
+ is_mtrr_mask = msr - 0x200 - 2 * idx;
+ if (!is_mtrr_mask)
+ pt =
+ (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+ else
+ pt =
+ (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+ *pt = data;
+ }
+
+ kvm_mmu_reset_context(vcpu);
return 0;
}
@@ -747,10 +782,37 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
+ u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
if (!msr_mtrr_valid(msr))
return 1;
- *pdata = vcpu->arch.mtrr[msr - 0x200];
+ if (msr == MSR_MTRRdefType)
+ *pdata = vcpu->arch.mtrr_state.def_type +
+ (vcpu->arch.mtrr_state.enabled << 10);
+ else if (msr == MSR_MTRRfix64K_00000)
+ *pdata = p[0];
+ else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+ *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
+ else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+ *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
+ else if (msr == MSR_IA32_CR_PAT)
+ *pdata = vcpu->arch.pat;
+ else { /* Variable MTRRs */
+ int idx, is_mtrr_mask;
+ u64 *pt;
+
+ idx = (msr - 0x200) / 2;
+ is_mtrr_mask = msr - 0x200 - 2 * idx;
+ if (!is_mtrr_mask)
+ pt =
+ (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+ else
+ pt =
+ (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+ *pdata = *pt;
+ }
+
return 0;
}
@@ -903,7 +965,6 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_IRQCHIP:
case KVM_CAP_HLT:
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
- case KVM_CAP_USER_MEMORY:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
case KVM_CAP_CLOCKSOURCE:
@@ -929,7 +990,7 @@ int kvm_dev_ioctl_check_extension(long ext)
r = !tdp_enabled;
break;
case KVM_CAP_IOMMU:
- r = intel_iommu_found();
+ r = iommu_found();
break;
default:
r = 0;
@@ -1188,6 +1249,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
int t, times = entry->eax & 0xff;
entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
+ entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
for (t = 1; t < times && *nent < maxnent; ++t) {
do_cpuid_1_ent(&entry[t], function, 0);
entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
@@ -1218,7 +1280,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until level_type is zero */
for (i = 1; *nent < maxnent; ++i) {
- level_type = entry[i - 1].ecx & 0xff;
+ level_type = entry[i - 1].ecx & 0xff00;
if (!level_type)
break;
do_cpuid_1_ent(&entry[i], function, i);
@@ -1318,6 +1380,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
return 0;
}
+static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
+{
+ vcpu_load(vcpu);
+ kvm_inject_nmi(vcpu);
+ vcpu_put(vcpu);
+
+ return 0;
+}
+
static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
struct kvm_tpr_access_ctl *tac)
{
@@ -1377,6 +1448,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = 0;
break;
}
+ case KVM_NMI: {
+ r = kvm_vcpu_ioctl_nmi(vcpu);
+ if (r)
+ goto out;
+ r = 0;
+ break;
+ }
case KVM_SET_CPUID: {
struct kvm_cpuid __user *cpuid_arg = argp;
struct kvm_cpuid cpuid;
@@ -1968,7 +2046,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
if (ret < 0)
return 0;
- kvm_mmu_pte_write(vcpu, gpa, val, bytes);
+ kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
return 1;
}
@@ -2404,8 +2482,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
val = kvm_register_read(vcpu, VCPU_REGS_RAX);
memcpy(vcpu->arch.pio_data, &val, 4);
- kvm_x86_ops->skip_emulated_instruction(vcpu);
-
pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
if (pio_dev) {
kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
@@ -2541,7 +2617,7 @@ int kvm_arch_init(void *opaque)
kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
- PT_DIRTY_MASK, PT64_NX_MASK, 0);
+ PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
return 0;
out:
@@ -2729,7 +2805,7 @@ static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
/* when no next entry is found, the current entry[i] is reselected */
- for (j = i + 1; j == i; j = (j + 1) % nent) {
+ for (j = i + 1; ; j = (j + 1) % nent) {
struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
if (ej->function == e->function) {
ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
@@ -2973,7 +3049,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
pr_debug("vcpu %d received sipi with vector # %x\n",
vcpu->vcpu_id, vcpu->arch.sipi_vector);
kvm_lapic_reset(vcpu);
- r = kvm_x86_ops->vcpu_reset(vcpu);
+ r = kvm_arch_vcpu_reset(vcpu);
if (r)
return r;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -3275,9 +3351,9 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
kvm_desct->padding = 0;
}
-static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
- u16 selector,
- struct descriptor_table *dtable)
+static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
+ u16 selector,
+ struct descriptor_table *dtable)
{
if (selector & 1 << 2) {
struct kvm_segment kvm_seg;
@@ -3302,7 +3378,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
struct descriptor_table dtable;
u16 index = selector >> 3;
- get_segment_descritptor_dtable(vcpu, selector, &dtable);
+ get_segment_descriptor_dtable(vcpu, selector, &dtable);
if (dtable.limit < index * 8 + 7) {
kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
@@ -3321,7 +3397,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
struct descriptor_table dtable;
u16 index = selector >> 3;
- get_segment_descritptor_dtable(vcpu, selector, &dtable);
+ get_segment_descriptor_dtable(vcpu, selector, &dtable);
if (dtable.limit < index * 8 + 7)
return 1;
@@ -3900,6 +3976,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
/* We do fxsave: this must be aligned. */
BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
+ vcpu->arch.mtrr_state.have_fixed = 1;
vcpu_load(vcpu);
r = kvm_arch_vcpu_reset(vcpu);
if (r == 0)
@@ -3925,6 +4002,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
{
+ vcpu->arch.nmi_pending = false;
+ vcpu->arch.nmi_injected = false;
+
return kvm_x86_ops->vcpu_reset(vcpu);
}
@@ -4012,6 +4092,7 @@ struct kvm *kvm_arch_create_vm(void)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+ INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
@@ -4048,8 +4129,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- kvm_iommu_unmap_guest(kvm);
kvm_free_all_assigned_devices(kvm);
+ kvm_iommu_unmap_guest(kvm);
kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
@@ -4127,7 +4208,8 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
- || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED;
+ || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
+ || vcpu->arch.nmi_pending;
}
static void vcpu_kick_intr(void *info)
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index ea051173b0da..d174db7a3370 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -58,6 +58,7 @@
#define SrcMem32 (4<<4) /* Memory operand (32-bit). */
#define SrcImm (5<<4) /* Immediate operand. */
#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
+#define SrcOne (7<<4) /* Implied '1' */
#define SrcMask (7<<4)
/* Generic ModRM decode. */
#define ModRM (1<<7)
@@ -70,17 +71,23 @@
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
#define GroupMask 0xff /* Group number stored in bits 0:7 */
+/* Source 2 operand type */
+#define Src2None (0<<29)
+#define Src2CL (1<<29)
+#define Src2ImmByte (2<<29)
+#define Src2One (3<<29)
+#define Src2Mask (7<<29)
enum {
Group1_80, Group1_81, Group1_82, Group1_83,
Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
};
-static u16 opcode_table[256] = {
+static u32 opcode_table[256] = {
/* 0x00 - 0x07 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- 0, 0, 0, 0,
+ ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
/* 0x08 - 0x0F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -195,7 +202,7 @@ static u16 opcode_table[256] = {
ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
};
-static u16 twobyte_table[256] = {
+static u32 twobyte_table[256] = {
/* 0x00 - 0x0F */
0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
@@ -230,9 +237,14 @@ static u16 twobyte_table[256] = {
/* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xA0 - 0xA7 */
- 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
+ 0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
+ DstMem | SrcReg | Src2ImmByte | ModRM,
+ DstMem | SrcReg | Src2CL | ModRM, 0, 0,
/* 0xA8 - 0xAF */
- 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0,
+ 0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
+ DstMem | SrcReg | Src2ImmByte | ModRM,
+ DstMem | SrcReg | Src2CL | ModRM,
+ ModRM, 0,
/* 0xB0 - 0xB7 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
DstMem | SrcReg | ModRM | BitOp,
@@ -253,7 +265,7 @@ static u16 twobyte_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
-static u16 group_table[] = {
+static u32 group_table[] = {
[Group1_80*8] =
ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
@@ -297,9 +309,9 @@ static u16 group_table[] = {
SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
};
-static u16 group2_table[] = {
+static u32 group2_table[] = {
[Group7*8] =
- SrcNone | ModRM, 0, 0, 0,
+ SrcNone | ModRM, 0, 0, SrcNone | ModRM,
SrcNone | ModRM | DstMem | Mov, 0,
SrcMem16 | ModRM | Mov, 0,
};
@@ -359,49 +371,48 @@ static u16 group2_table[] = {
"andl %"_msk",%"_LO32 _tmp"; " \
"orl %"_LO32 _tmp",%"_sav"; "
+#ifdef CONFIG_X86_64
+#define ON64(x) x
+#else
+#define ON64(x)
+#endif
+
+#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \
+ do { \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0", "4", "2") \
+ _op _suffix " %"_x"3,%1; " \
+ _POST_EFLAGS("0", "4", "2") \
+ : "=m" (_eflags), "=m" ((_dst).val), \
+ "=&r" (_tmp) \
+ : _y ((_src).val), "i" (EFLAGS_MASK)); \
+ } while (0)
+
+
/* Raw emulation: instruction has two explicit operands. */
#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
- do { \
- unsigned long _tmp; \
- \
- switch ((_dst).bytes) { \
- case 2: \
- __asm__ __volatile__ ( \
- _PRE_EFLAGS("0", "4", "2") \
- _op"w %"_wx"3,%1; " \
- _POST_EFLAGS("0", "4", "2") \
- : "=m" (_eflags), "=m" ((_dst).val), \
- "=&r" (_tmp) \
- : _wy ((_src).val), "i" (EFLAGS_MASK)); \
- break; \
- case 4: \
- __asm__ __volatile__ ( \
- _PRE_EFLAGS("0", "4", "2") \
- _op"l %"_lx"3,%1; " \
- _POST_EFLAGS("0", "4", "2") \
- : "=m" (_eflags), "=m" ((_dst).val), \
- "=&r" (_tmp) \
- : _ly ((_src).val), "i" (EFLAGS_MASK)); \
- break; \
- case 8: \
- __emulate_2op_8byte(_op, _src, _dst, \
- _eflags, _qx, _qy); \
- break; \
- } \
+ do { \
+ unsigned long _tmp; \
+ \
+ switch ((_dst).bytes) { \
+ case 2: \
+ ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
+ break; \
+ case 4: \
+ ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
+ break; \
+ case 8: \
+ ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
+ break; \
+ } \
} while (0)
#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
do { \
- unsigned long __tmp; \
+ unsigned long _tmp; \
switch ((_dst).bytes) { \
case 1: \
- __asm__ __volatile__ ( \
- _PRE_EFLAGS("0", "4", "2") \
- _op"b %"_bx"3,%1; " \
- _POST_EFLAGS("0", "4", "2") \
- : "=m" (_eflags), "=m" ((_dst).val), \
- "=&r" (__tmp) \
- : _by ((_src).val), "i" (EFLAGS_MASK)); \
+ ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \
break; \
default: \
__emulate_2op_nobyte(_op, _src, _dst, _eflags, \
@@ -425,71 +436,68 @@ static u16 group2_table[] = {
__emulate_2op_nobyte(_op, _src, _dst, _eflags, \
"w", "r", _LO32, "r", "", "r")
-/* Instruction has only one explicit operand (no source operand). */
-#define emulate_1op(_op, _dst, _eflags) \
- do { \
- unsigned long _tmp; \
- \
- switch ((_dst).bytes) { \
- case 1: \
- __asm__ __volatile__ ( \
- _PRE_EFLAGS("0", "3", "2") \
- _op"b %1; " \
- _POST_EFLAGS("0", "3", "2") \
- : "=m" (_eflags), "=m" ((_dst).val), \
- "=&r" (_tmp) \
- : "i" (EFLAGS_MASK)); \
- break; \
- case 2: \
- __asm__ __volatile__ ( \
- _PRE_EFLAGS("0", "3", "2") \
- _op"w %1; " \
- _POST_EFLAGS("0", "3", "2") \
- : "=m" (_eflags), "=m" ((_dst).val), \
- "=&r" (_tmp) \
- : "i" (EFLAGS_MASK)); \
- break; \
- case 4: \
- __asm__ __volatile__ ( \
- _PRE_EFLAGS("0", "3", "2") \
- _op"l %1; " \
- _POST_EFLAGS("0", "3", "2") \
- : "=m" (_eflags), "=m" ((_dst).val), \
- "=&r" (_tmp) \
- : "i" (EFLAGS_MASK)); \
- break; \
- case 8: \
- __emulate_1op_8byte(_op, _dst, _eflags); \
- break; \
- } \
+/* Instruction has three operands and one operand is stored in ECX register */
+#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
+ do { \
+ unsigned long _tmp; \
+ _type _clv = (_cl).val; \
+ _type _srcv = (_src).val; \
+ _type _dstv = (_dst).val; \
+ \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0", "5", "2") \
+ _op _suffix " %4,%1 \n" \
+ _POST_EFLAGS("0", "5", "2") \
+ : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
+ : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
+ ); \
+ \
+ (_cl).val = (unsigned long) _clv; \
+ (_src).val = (unsigned long) _srcv; \
+ (_dst).val = (unsigned long) _dstv; \
} while (0)
-/* Emulate an instruction with quadword operands (x86/64 only). */
-#if defined(CONFIG_X86_64)
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \
- do { \
- __asm__ __volatile__ ( \
- _PRE_EFLAGS("0", "4", "2") \
- _op"q %"_qx"3,%1; " \
- _POST_EFLAGS("0", "4", "2") \
- : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
- : _qy ((_src).val), "i" (EFLAGS_MASK)); \
+#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
+ do { \
+ switch ((_dst).bytes) { \
+ case 2: \
+ __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
+ "w", unsigned short); \
+ break; \
+ case 4: \
+ __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
+ "l", unsigned int); \
+ break; \
+ case 8: \
+ ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
+ "q", unsigned long)); \
+ break; \
+ } \
} while (0)
-#define __emulate_1op_8byte(_op, _dst, _eflags) \
- do { \
- __asm__ __volatile__ ( \
- _PRE_EFLAGS("0", "3", "2") \
- _op"q %1; " \
- _POST_EFLAGS("0", "3", "2") \
- : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
- : "i" (EFLAGS_MASK)); \
+#define __emulate_1op(_op, _dst, _eflags, _suffix) \
+ do { \
+ unsigned long _tmp; \
+ \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0", "3", "2") \
+ _op _suffix " %1; " \
+ _POST_EFLAGS("0", "3", "2") \
+ : "=m" (_eflags), "+m" ((_dst).val), \
+ "=&r" (_tmp) \
+ : "i" (EFLAGS_MASK)); \
} while (0)
-#elif defined(__i386__)
-#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
-#define __emulate_1op_8byte(_op, _dst, _eflags)
-#endif /* __i386__ */
+/* Instruction has only one explicit operand (no source operand). */
+#define emulate_1op(_op, _dst, _eflags) \
+ do { \
+ switch ((_dst).bytes) { \
+ case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \
+ case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \
+ case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \
+ case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
+ } \
+ } while (0)
/* Fetch next part of the instruction being emulated. */
#define insn_fetch(_type, _size, _eip) \
@@ -1041,6 +1049,33 @@ done_prefixes:
c->src.bytes = 1;
c->src.val = insn_fetch(s8, 1, c->eip);
break;
+ case SrcOne:
+ c->src.bytes = 1;
+ c->src.val = 1;
+ break;
+ }
+
+ /*
+ * Decode and fetch the second source operand: register, memory
+ * or immediate.
+ */
+ switch (c->d & Src2Mask) {
+ case Src2None:
+ break;
+ case Src2CL:
+ c->src2.bytes = 1;
+ c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
+ break;
+ case Src2ImmByte:
+ c->src2.type = OP_IMM;
+ c->src2.ptr = (unsigned long *)c->eip;
+ c->src2.bytes = 1;
+ c->src2.val = insn_fetch(u8, 1, c->eip);
+ break;
+ case Src2One:
+ c->src2.bytes = 1;
+ c->src2.val = 1;
+ break;
}
/* Decode and fetch the destination operand: register or memory. */
@@ -1100,20 +1135,33 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
c->regs[VCPU_REGS_RSP]);
}
-static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
+static int emulate_pop(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
{
struct decode_cache *c = &ctxt->decode;
int rc;
- rc = ops->read_std(register_address(c, ss_base(ctxt),
- c->regs[VCPU_REGS_RSP]),
- &c->dst.val, c->dst.bytes, ctxt->vcpu);
+ rc = ops->read_emulated(register_address(c, ss_base(ctxt),
+ c->regs[VCPU_REGS_RSP]),
+ &c->src.val, c->src.bytes, ctxt->vcpu);
if (rc != 0)
return rc;
- register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes);
+ register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
+ return rc;
+}
+
+static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ struct decode_cache *c = &ctxt->decode;
+ int rc;
+ c->src.bytes = c->dst.bytes;
+ rc = emulate_pop(ctxt, ops);
+ if (rc != 0)
+ return rc;
+ c->dst.val = c->src.val;
return 0;
}
@@ -1415,24 +1463,15 @@ special_insn:
emulate_1op("dec", c->dst, ctxt->eflags);
break;
case 0x50 ... 0x57: /* push reg */
- c->dst.type = OP_MEM;
- c->dst.bytes = c->op_bytes;
- c->dst.val = c->src.val;
- register_address_increment(c, &c->regs[VCPU_REGS_RSP],
- -c->op_bytes);
- c->dst.ptr = (void *) register_address(
- c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]);
+ emulate_push(ctxt);
break;
case 0x58 ... 0x5f: /* pop reg */
pop_instruction:
- if ((rc = ops->read_std(register_address(c, ss_base(ctxt),
- c->regs[VCPU_REGS_RSP]), c->dst.ptr,
- c->op_bytes, ctxt->vcpu)) != 0)
+ c->src.bytes = c->op_bytes;
+ rc = emulate_pop(ctxt, ops);
+ if (rc != 0)
goto done;
-
- register_address_increment(c, &c->regs[VCPU_REGS_RSP],
- c->op_bytes);
- c->dst.type = OP_NONE; /* Disable writeback. */
+ c->dst.val = c->src.val;
break;
case 0x63: /* movsxd */
if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -1591,7 +1630,9 @@ special_insn:
emulate_push(ctxt);
break;
case 0x9d: /* popf */
+ c->dst.type = OP_REG;
c->dst.ptr = (unsigned long *) &ctxt->eflags;
+ c->dst.bytes = c->op_bytes;
goto pop_instruction;
case 0xa0 ... 0xa1: /* mov */
c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
@@ -1689,7 +1730,9 @@ special_insn:
emulate_grp2(ctxt);
break;
case 0xc3: /* ret */
+ c->dst.type = OP_REG;
c->dst.ptr = &c->eip;
+ c->dst.bytes = c->op_bytes;
goto pop_instruction;
case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
mov:
@@ -1778,7 +1821,7 @@ special_insn:
c->eip = saved_eip;
goto cannot_emulate;
}
- return 0;
+ break;
case 0xf4: /* hlt */
ctxt->vcpu->arch.halt_request = 1;
break;
@@ -1999,12 +2042,20 @@ twobyte_insn:
c->src.val &= (c->dst.bytes << 3) - 1;
emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
break;
+ case 0xa4: /* shld imm8, r, r/m */
+ case 0xa5: /* shld cl, r, r/m */
+ emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
+ break;
case 0xab:
bts: /* bts */
/* only subword offset */
c->src.val &= (c->dst.bytes << 3) - 1;
emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
break;
+ case 0xac: /* shrd imm8, r, r/m */
+ case 0xad: /* shrd cl, r, r/m */
+ emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
+ break;
case 0xae: /* clflush */
break;
case 0xb0 ... 0xb1: /* cmpxchg */
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 50a779264bb1..a7ed208f81e3 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -738,7 +738,7 @@ static void lguest_time_init(void)
/* We can't set cpumask in the initializer: damn C limitations! Set it
* here and register our timer device. */
- lguest_clockevent.cpumask = cpumask_of_cpu(0);
+ lguest_clockevent.cpumask = cpumask_of(0);
clockevents_register_device(&lguest_clockevent);
/* Finally, we unblock the timer interrupt. */
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 37b9ae4d44c5..df167f265622 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -133,29 +133,28 @@ void __init time_init_hook(void)
**/
void mca_nmi_hook(void)
{
- /* If I recall correctly, there's a whole bunch of other things that
+ /*
+ * If I recall correctly, there's a whole bunch of other things that
* we can do to check for NMI problems, but that's all I know about
* at the moment.
*/
-
- printk("NMI generated from unknown source!\n");
+ pr_warning("NMI generated from unknown source!\n");
}
#endif
static __init int no_ipi_broadcast(char *str)
{
get_option(&str, &no_broadcast);
- printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" :
- "IPI Broadcast");
+ pr_info("Using %s mode\n",
+ no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
return 1;
}
-
__setup("no_ipi_broadcast=", no_ipi_broadcast);
static int __init print_ipi_mode(void)
{
- printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" :
- "Shortcut");
+ pr_info("Using IPI %s mode\n",
+ no_broadcast ? "No-Shortcut" : "Shortcut");
return 0;
}
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 3624a364b7f3..bc4c7840b2a8 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -42,9 +42,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
{ }
};
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
{
- return cpumask_of_cpu(cpu);
+ cpus_clear(*retmask);
+ cpu_set(cpu, *retmask);
}
static int probe_bigsmp(void)
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 7b4e6d0d1690..4ba5ccaa1584 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -87,7 +87,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
}
#endif
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
@@ -97,8 +97,7 @@ static cpumask_t vector_allocation_domain(int cpu)
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
- cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
- return domain;
+ *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
}
struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 71a309b122e6..511d7941364f 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return 0;
}
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
@@ -48,8 +48,7 @@ static cpumask_t vector_allocation_domain(int cpu)
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
- cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
- return domain;
+ *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
}
struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 2c6d234e0009..2821ffc188b5 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -24,7 +24,7 @@ static int probe_summit(void)
return 0;
}
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
@@ -34,8 +34,7 @@ static cpumask_t vector_allocation_domain(int cpu)
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
- cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
- return domain;
+ *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
}
struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 52145007bd7e..9840b7ec749a 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -63,11 +63,6 @@ static int voyager_extended_cpus = 1;
/* Used for the invalidate map that's also checked in the spinlock */
static volatile unsigned long smp_invalidate_needed;
-/* Bitmask of currently online CPUs - used by setup.c for
- /proc/cpuinfo, visible externally but still physical */
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
-
/* Bitmask of CPUs present in the system - exported by i386_syms.c, used
* by scheduler but indexed physically */
cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
@@ -218,8 +213,6 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
/* This is for the new dynamic CPU boot code */
cpumask_t cpu_callin_map = CPU_MASK_NONE;
cpumask_t cpu_callout_map = CPU_MASK_NONE;
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_possible_map);
/* The per processor IRQ masks (these are usually kept in sync) */
static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
@@ -364,9 +357,8 @@ void __init find_smp_config(void)
printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id);
/* initialize the CPU structures (moved from smp_boot_cpus) */
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++)
cpu_irq_affinity[i] = ~0;
- }
cpu_online_map = cpumask_of_cpu(boot_cpu_id);
/* The boot CPU must be extended */
@@ -679,7 +671,7 @@ void __init smp_boot_cpus(void)
/* loop over all the extended VIC CPUs and boot them. The
* Quad CPUs must be bootstrapped by their extended VIC cpu */
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
continue;
do_boot_cpu(i);
@@ -1234,7 +1226,7 @@ int setup_profiling_timer(unsigned int multiplier)
* new values until the next timer interrupt in which they do process
* accounting.
*/
- for (i = 0; i < NR_CPUS; ++i)
+ for (i = 0; i < nr_cpu_ids; ++i)
per_cpu(prof_multiplier, i) = multiplier;
return 0;
@@ -1264,7 +1256,7 @@ void __init voyager_smp_intr_init(void)
int i;
/* initialize the per cpu irq mask to all disabled */
- for (i = 0; i < NR_CPUS; i++)
+ for (i = 0; i < nr_cpu_ids; i++)
vic_irq_mask[i] = 0xFFFF;
VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8655b5bb0963..f99a6c6c432e 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -435,8 +435,12 @@ static void __init set_highmem_pages_init(void)
#endif /* !CONFIG_NUMA */
#else
-# define permanent_kmaps_init(pgd_base) do { } while (0)
-# define set_highmem_pages_init() do { } while (0)
+static inline void permanent_kmaps_init(pgd_t *pgd_base)
+{
+}
+static inline void set_highmem_pages_init(void)
+{
+}
#endif /* CONFIG_HIGHMEM */
void __init native_pagetable_setup_start(pgd_t *base)
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index cebcbf152d46..71a14f89f89e 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -278,7 +278,7 @@ void __init numa_init_array(void)
int rr, i;
rr = first_node(node_online_map);
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
if (early_cpu_to_node(i) != NUMA_NO_NODE)
continue;
numa_set_node(i, rr);
@@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
memnodemap[0] = 0;
node_set_online(0);
node_set(0, node_possible_map);
- for (i = 0; i < NR_CPUS; i++)
+ for (i = 0; i < nr_cpu_ids; i++)
numa_set_node(i, 0);
e820_register_active_regions(0, start_pfn, last_pfn);
setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 51c0a2fc14fe..09737c8af074 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
if (!node_online(i))
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
int node = early_cpu_to_node(i);
if (node == NUMA_NO_NODE)
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 1d88d2b39771..9e5752fe4d15 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -4,7 +4,7 @@
#include <linux/irq.h>
#include <linux/dmi.h>
#include <asm/numa.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
struct pci_root_info {
char *name;
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 22e057665e55..9bb09823b362 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -2,7 +2,7 @@
#include <linux/pci.h>
#include <linux/topology.h>
#include <linux/cpu.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
#ifdef CONFIG_X86_64
#include <asm/pci-direct.h>
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index bb1a01f089e2..62ddb73e09ed 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -14,8 +14,7 @@
#include <asm/segment.h>
#include <asm/io.h>
#include <asm/smp.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
PCI_PROBE_MMCONF;
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 9a5af6c8fbe9..bd13c3e4c6db 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -5,7 +5,7 @@
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/dmi.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
/*
* Functions for accessing PCI base (first 256 bytes) and extended
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index 86631ccbc25a..f6adf2c6d751 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -2,7 +2,7 @@
#include <linux/pci.h>
#include <asm/pci-direct.h>
#include <asm/io.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
/* Direct PCI access. This is used for PCI accesses in early boot before
the PCI subsystem works. */
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 2051dc96b8e9..7d388d5cf548 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -6,8 +6,7 @@
#include <linux/dmi.h>
#include <linux/pci.h>
#include <linux/init.h>
-#include "pci.h"
-
+#include <asm/pci_x86.h>
static void __devinit pci_fixup_i450nx(struct pci_dev *d)
{
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 844df0cbbd3e..e51bf2cda4b0 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -34,8 +34,8 @@
#include <asm/pat.h>
#include <asm/e820.h>
+#include <asm/pci_x86.h>
-#include "pci.h"
static int
skip_isa_ioresource_align(struct pci_dev *dev) {
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index d6c950f81858..bec3b048e72b 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -1,6 +1,6 @@
#include <linux/pci.h>
#include <linux/init.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
/* arch_initcall has too random ordering, so call the initializers
in the right sequence from here. */
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index bf69dbe08bff..373b9afe6d44 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -16,8 +16,7 @@
#include <asm/io_apic.h>
#include <linux/irq.h>
#include <linux/acpi.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
#define PIRQ_VERSION 0x0100
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index b722dd481b39..f1065b129e9c 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -3,7 +3,7 @@
*/
#include <linux/init.h>
#include <linux/pci.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
/*
* Discover remaining PCI buses in case there are peer host bridges.
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 654a2234f8f3..89bf9242c80a 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -15,8 +15,7 @@
#include <linux/acpi.h>
#include <linux/bitmap.h>
#include <asm/e820.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
/* aperture is up to 256MB but BIOS may reserve less */
#define MMCONFIG_APER_MIN (2 * 1024*1024)
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index f3c761dce695..8b2d561046a3 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -13,7 +13,7 @@
#include <linux/init.h>
#include <linux/acpi.h>
#include <asm/e820.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
/* Assume systems with more busses have correct MCFG */
#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index a1994163c99d..30007ffc8e11 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -10,8 +10,7 @@
#include <linux/acpi.h>
#include <linux/bitmap.h>
#include <asm/e820.h>
-
-#include "pci.h"
+#include <asm/pci_x86.h>
/* Static virtual mapping of the MMCONFIG aperture */
struct mmcfg_virt {
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 1177845d3186..2089354968a2 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -7,7 +7,7 @@
#include <linux/nodemask.h>
#include <mach_apic.h>
#include <asm/mpspec.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
#define XQUAD_PORTIO_BASE 0xfe400000
#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c
index e11e9e803d5f..b889d824f7c6 100644
--- a/arch/x86/pci/olpc.c
+++ b/arch/x86/pci/olpc.c
@@ -29,7 +29,7 @@
#include <linux/init.h>
#include <asm/olpc.h>
#include <asm/geode.h>
-#include "pci.h"
+#include <asm/pci_x86.h>
/*
* In the tables below, the first two line (8 longwords) are the
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 37472fc6f729..b82cae970dfd 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -6,9 +6,8 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/uaccess.h>
-#include "pci.h"
-#include "pci-functions.h"
-
+#include <asm/pci_x86.h>
+#include <asm/mach-default/pci-functions.h>
/* BIOS32 signature: "_32_" */
#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 42f4cb19faca..16d0c0eb0d19 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -9,11 +9,10 @@
#include <linux/init.h>
#include <asm/setup.h>
+#include <asm/pci_x86.h>
#include <asm/visws/cobalt.h>
#include <asm/visws/lithium.h>
-#include "pci.h"
-
static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
static void pci_visws_disable_irq(struct pci_dev *dev) { }
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 773d68d3e912..503c240e26c7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1082,7 +1082,7 @@ static void drop_other_mm_ref(void *info)
static void xen_drop_mm_ref(struct mm_struct *mm)
{
- cpumask_t mask;
+ cpumask_var_t mask;
unsigned cpu;
if (current->active_mm == mm) {
@@ -1094,7 +1094,16 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
}
/* Get the "official" set of cpus referring to our pagetable. */
- mask = mm->cpu_vm_mask;
+ if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
+ for_each_online_cpu(cpu) {
+ if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask)
+ && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
+ continue;
+ smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
+ }
+ return;
+ }
+ cpumask_copy(mask, &mm->cpu_vm_mask);
/* It's possible that a vcpu may have a stale reference to our
cr3, because its in lazy mode, and it hasn't yet flushed
@@ -1103,11 +1112,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
if needed. */
for_each_online_cpu(cpu) {
if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
- cpu_set(cpu, mask);
+ cpumask_set_cpu(cpu, mask);
}
- if (!cpus_empty(mask))
- smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
+ if (!cpumask_empty(mask))
+ smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
+ free_cpumask_var(mask);
}
#else
static void xen_drop_mm_ref(struct mm_struct *mm)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index acd9b6705e02..c44e2069c7c7 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -33,7 +33,7 @@
#include "xen-ops.h"
#include "mmu.h"
-cpumask_t xen_cpu_initialized_map;
+cpumask_var_t xen_cpu_initialized_map;
static DEFINE_PER_CPU(int, resched_irq);
static DEFINE_PER_CPU(int, callfunc_irq);
@@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void)
{
int i, rc;
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
if (rc >= 0) {
num_processors++;
@@ -192,11 +192,14 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
if (xen_smp_intr_init(0))
BUG();
- xen_cpu_initialized_map = cpumask_of_cpu(0);
+ if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
+ panic("could not allocate xen_cpu_initialized_map\n");
+
+ cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
/* Restrict the possible_map according to max_cpus. */
while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
- for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--)
+ for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
continue;
cpu_clear(cpu, cpu_possible_map);
}
@@ -221,7 +224,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
struct vcpu_guest_context *ctxt;
struct desc_struct *gdt;
- if (cpu_test_and_set(cpu, xen_cpu_initialized_map))
+ if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
return 0;
ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
@@ -408,24 +411,23 @@ static void xen_smp_send_reschedule(int cpu)
xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
}
-static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
+static void xen_send_IPI_mask(const struct cpumask *mask,
+ enum ipi_vector vector)
{
unsigned cpu;
- cpus_and(mask, mask, cpu_online_map);
-
- for_each_cpu_mask_nr(cpu, mask)
+ for_each_cpu_and(cpu, mask, cpu_online_mask)
xen_send_IPI_one(cpu, vector);
}
-static void xen_smp_send_call_function_ipi(cpumask_t mask)
+static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
{
int cpu;
xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
/* Make sure other vcpus get a chance to run if they need to. */
- for_each_cpu_mask_nr(cpu, mask) {
+ for_each_cpu(cpu, mask) {
if (xen_vcpu_stolen(cpu)) {
HYPERVISOR_sched_op(SCHEDOP_yield, 0);
break;
@@ -435,7 +437,8 @@ static void xen_smp_send_call_function_ipi(cpumask_t mask)
static void xen_smp_send_call_function_single_ipi(int cpu)
{
- xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
+ xen_send_IPI_mask(cpumask_of(cpu),
+ XEN_CALL_FUNCTION_SINGLE_VECTOR);
}
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 2a234db5949b..212ffe012b76 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -35,7 +35,8 @@ void xen_post_suspend(int suspend_cancelled)
pfn_to_mfn(xen_start_info->console.domU.mfn);
} else {
#ifdef CONFIG_SMP
- xen_cpu_initialized_map = cpu_online_map;
+ BUG_ON(xen_cpu_initialized_map == NULL);
+ cpumask_copy(xen_cpu_initialized_map, cpu_online_mask);
#endif
xen_vcpu_restore();
}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c9f7cda48ed7..14f240623497 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -132,8 +132,7 @@ static void do_stolen_accounting(void)
*snap = state;
/* Add the appropriate number of ticks of stolen time,
- including any left-overs from last time. Passing NULL to
- account_steal_time accounts the time as stolen. */
+ including any left-overs from last time. */
stolen = runnable + offline + __get_cpu_var(residual_stolen);
if (stolen < 0)
@@ -141,11 +140,10 @@ static void do_stolen_accounting(void)
ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
__get_cpu_var(residual_stolen) = stolen;
- account_steal_time(NULL, ticks);
+ account_steal_ticks(ticks);
/* Add the appropriate number of ticks of blocked time,
- including any left-overs from last time. Passing idle to
- account_steal_time accounts the time as idle/wait. */
+ including any left-overs from last time. */
blocked += __get_cpu_var(residual_blocked);
if (blocked < 0)
@@ -153,7 +151,7 @@ static void do_stolen_accounting(void)
ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
__get_cpu_var(residual_blocked) = blocked;
- account_steal_time(idle_task(smp_processor_id()), ticks);
+ account_idle_ticks(ticks);
}
/*
@@ -437,7 +435,7 @@ void xen_setup_timer(int cpu)
evt = &per_cpu(xen_clock_events, cpu);
memcpy(evt, xen_clockevent, sizeof(*evt));
- evt->cpumask = cpumask_of_cpu(cpu);
+ evt->cpumask = cpumask_of(cpu);
evt->irq = irq;
setup_runstate_info(cpu);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9e1afae8461f..c1f8faf0a2c5 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -58,7 +58,7 @@ void __init xen_init_spinlocks(void);
__cpuinit void xen_init_lock_cpu(int cpu);
void xen_uninit_lock_cpu(int cpu);
-extern cpumask_t xen_cpu_initialized_map;
+extern cpumask_var_t xen_cpu_initialized_map;
#else
static inline void xen_smp_init(void) {}
#endif
diff --git a/arch/xtensa/kernel/init_task.c b/arch/xtensa/kernel/init_task.c
index 3df469dbe814..e07f5c9fcd35 100644
--- a/arch/xtensa/kernel/init_task.c
+++ b/arch/xtensa/kernel/init_task.c
@@ -21,7 +21,6 @@
#include <asm/uaccess.h>
-static struct fs_struct init_fs = INIT_FS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);