summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm/fault.c')
-rw-r--r--arch/powerpc/mm/fault.c571
1 files changed, 299 insertions, 272 deletions
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 3a7d580fdc59..4797d08581ce 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -45,43 +45,39 @@
#include <asm/siginfo.h>
#include <asm/debug.h>
-#include "icswx.h"
-
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs)
+static inline bool notify_page_fault(struct pt_regs *regs)
{
- int ret = 0;
+ bool ret = false;
+#ifdef CONFIG_KPROBES
/* kprobe_running() needs smp_processor_id() */
if (!user_mode(regs)) {
preempt_disable();
if (kprobe_running() && kprobe_fault_handler(regs, 11))
- ret = 1;
+ ret = true;
preempt_enable();
}
+#endif /* CONFIG_KPROBES */
+
+ if (unlikely(debugger_fault_handler(regs)))
+ ret = true;
return ret;
}
-#else
-static inline int notify_page_fault(struct pt_regs *regs)
-{
- return 0;
-}
-#endif
/*
* Check whether the instruction at regs->nip is a store using
* an update addressing form which will update r1.
*/
-static int store_updates_sp(struct pt_regs *regs)
+static bool store_updates_sp(struct pt_regs *regs)
{
unsigned int inst;
if (get_user(inst, (unsigned int __user *)regs->nip))
- return 0;
+ return false;
/* check for 1 in the rA field */
if (((inst >> 16) & 0x1f) != 1)
- return 0;
+ return false;
/* check major opcode */
switch (inst >> 26) {
case 37: /* stwu */
@@ -89,7 +85,7 @@ static int store_updates_sp(struct pt_regs *regs)
case 45: /* sthu */
case 53: /* stfsu */
case 55: /* stfdu */
- return 1;
+ return true;
case 62: /* std or stdu */
return (inst & 3) == 1;
case 31:
@@ -101,18 +97,53 @@ static int store_updates_sp(struct pt_regs *regs)
case 439: /* sthux */
case 695: /* stfsux */
case 759: /* stfdux */
- return 1;
+ return true;
}
}
- return 0;
+ return false;
}
/*
* do_page_fault error handling helpers
*/
-#define MM_FAULT_RETURN 0
-#define MM_FAULT_CONTINUE -1
-#define MM_FAULT_ERR(sig) (sig)
+static int
+__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
+{
+ /*
+ * If we are in kernel mode, bail out with a SEGV, this will
+ * be caught by the assembly which will restore the non-volatile
+ * registers before calling bad_page_fault()
+ */
+ if (!user_mode(regs))
+ return SIGSEGV;
+
+ _exception(SIGSEGV, regs, si_code, address);
+
+ return 0;
+}
+
+static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address)
+{
+ return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
+}
+
+static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
+{
+ struct mm_struct *mm = current->mm;
+
+ /*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+ up_read(&mm->mmap_sem);
+
+ return __bad_area_nosemaphore(regs, address, si_code);
+}
+
+static noinline int bad_area(struct pt_regs *regs, unsigned long address)
+{
+ return __bad_area(regs, address, SEGV_MAPERR);
+}
static int do_sigbus(struct pt_regs *regs, unsigned long address,
unsigned int fault)
@@ -121,7 +152,7 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address,
unsigned int lsb = 0;
if (!user_mode(regs))
- return MM_FAULT_ERR(SIGBUS);
+ return SIGBUS;
current->thread.trap_nr = BUS_ADRERR;
info.si_signo = SIGBUS;
@@ -142,25 +173,17 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address,
#endif
info.si_addr_lsb = lsb;
force_sig_info(SIGBUS, &info, current);
- return MM_FAULT_RETURN;
+ return 0;
}
static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
{
/*
- * Pagefault was interrupted by SIGKILL. We have no reason to
- * continue the pagefault.
+ * Kernel page fault interrupted by SIGKILL. We have no reason to
+ * continue processing.
*/
- if (fatal_signal_pending(current)) {
- /* Coming from kernel, we need to deal with uaccess fixups */
- if (user_mode(regs))
- return MM_FAULT_RETURN;
- return MM_FAULT_ERR(SIGKILL);
- }
-
- /* No fault: be happy */
- if (!(fault & VM_FAULT_ERROR))
- return MM_FAULT_CONTINUE;
+ if (fatal_signal_pending(current) && !user_mode(regs))
+ return SIGKILL;
/* Out of memory */
if (fault & VM_FAULT_OOM) {
@@ -169,18 +192,175 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
* made us unable to handle the page fault gracefully.
*/
if (!user_mode(regs))
- return MM_FAULT_ERR(SIGKILL);
+ return SIGSEGV;
pagefault_out_of_memory();
- return MM_FAULT_RETURN;
+ } else {
+ if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
+ VM_FAULT_HWPOISON_LARGE))
+ return do_sigbus(regs, addr, fault);
+ else if (fault & VM_FAULT_SIGSEGV)
+ return bad_area_nosemaphore(regs, addr);
+ else
+ BUG();
+ }
+ return 0;
+}
+
+/* Is this a bad kernel fault ? */
+static bool bad_kernel_fault(bool is_exec, unsigned long error_code,
+ unsigned long address)
+{
+ if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT))) {
+ printk_ratelimited(KERN_CRIT "kernel tried to execute"
+ " exec-protected page (%lx) -"
+ "exploit attempt? (uid: %d)\n",
+ address, from_kuid(&init_user_ns,
+ current_uid()));
+ }
+ return is_exec || (address >= TASK_SIZE);
+}
+
+static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
+ struct vm_area_struct *vma,
+ bool store_update_sp)
+{
+ /*
+ * N.B. The POWER/Open ABI allows programs to access up to
+ * 288 bytes below the stack pointer.
+ * The kernel signal delivery code writes up to about 1.5kB
+ * below the stack pointer (r1) before decrementing it.
+ * The exec code can write slightly over 640kB to the stack
+ * before setting the user r1. Thus we allow the stack to
+ * expand to 1MB without further checks.
+ */
+ if (address + 0x100000 < vma->vm_end) {
+ /* get user regs even if this fault is in kernel mode */
+ struct pt_regs *uregs = current->thread.regs;
+ if (uregs == NULL)
+ return true;
+
+ /*
+ * A user-mode access to an address a long way below
+ * the stack pointer is only valid if the instruction
+ * is one which would update the stack pointer to the
+ * address accessed if the instruction completed,
+ * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
+ * (or the byte, halfword, float or double forms).
+ *
+ * If we don't check this then any write to the area
+ * between the last mapped region and the stack will
+ * expand the stack rather than segfaulting.
+ */
+ if (address + 2048 < uregs->gpr[1] && !store_update_sp)
+ return true;
+ }
+ return false;
+}
+
+static bool access_error(bool is_write, bool is_exec,
+ struct vm_area_struct *vma)
+{
+ /*
+ * Allow execution from readable areas if the MMU does not
+ * provide separate controls over reading and executing.
+ *
+ * Note: That code used to not be enabled for 4xx/BookE.
+ * It is now as I/D cache coherency for these is done at
+ * set_pte_at() time and I see no reason why the test
+ * below wouldn't be valid on those processors. This -may-
+ * break programs compiled with a really old ABI though.
+ */
+ if (is_exec) {
+ return !(vma->vm_flags & VM_EXEC) &&
+ (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
+ !(vma->vm_flags & (VM_READ | VM_WRITE)));
+ }
+
+ if (is_write) {
+ if (unlikely(!(vma->vm_flags & VM_WRITE)))
+ return true;
+ return false;
}
- if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE))
- return do_sigbus(regs, addr, fault);
+ if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
+ return true;
- /* We don't understand the fault code, this is fatal */
- BUG();
- return MM_FAULT_CONTINUE;
+ return false;
+}
+
+#ifdef CONFIG_PPC_SMLPAR
+static inline void cmo_account_page_fault(void)
+{
+ if (firmware_has_feature(FW_FEATURE_CMO)) {
+ u32 page_ins;
+
+ preempt_disable();
+ page_ins = be32_to_cpu(get_lppaca()->page_ins);
+ page_ins += 1 << PAGE_FACTOR;
+ get_lppaca()->page_ins = cpu_to_be32(page_ins);
+ preempt_enable();
+ }
+}
+#else
+static inline void cmo_account_page_fault(void) { }
+#endif /* CONFIG_PPC_SMLPAR */
+
+#ifdef CONFIG_PPC_STD_MMU
+static void sanity_check_fault(bool is_write, unsigned long error_code)
+{
+ /*
+ * For hash translation mode, we should never get a
+ * PROTFAULT. Any update to pte to reduce access will result in us
+ * removing the hash page table entry, thus resulting in a DSISR_NOHPTE
+ * fault instead of DSISR_PROTFAULT.
+ *
+ * A pte update to relax the access will not result in a hash page table
+ * entry invalidate and hence can result in DSISR_PROTFAULT.
+ * ptep_set_access_flags() doesn't do a hpte flush. This is why we have
+ * the special !is_write in the below conditional.
+ *
+ * For platforms that doesn't supports coherent icache and do support
+ * per page noexec bit, we do setup things such that we do the
+ * sync between D/I cache via fault. But that is handled via low level
+ * hash fault code (hash_page_do_lazy_icache()) and we should not reach
+ * here in such case.
+ *
+ * For wrong access that can result in PROTFAULT, the above vma->vm_flags
+ * check should handle those and hence we should fall to the bad_area
+ * handling correctly.
+ *
+ * For embedded with per page exec support that doesn't support coherent
+ * icache we do get PROTFAULT and we handle that D/I cache sync in
+ * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON
+ * is conditional for server MMU.
+ *
+ * For radix, we can get prot fault for autonuma case, because radix
+ * page table will have them marked noaccess for user.
+ */
+ if (!radix_enabled() && !is_write)
+ WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
}
+#else
+static void sanity_check_fault(bool is_write, unsigned long error_code) { }
+#endif /* CONFIG_PPC_STD_MMU */
+
+/*
+ * Define the correct "is_write" bit in error_code based
+ * on the processor family
+ */
+#if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+#define page_fault_is_write(__err) ((__err) & ESR_DST)
+#define page_fault_is_bad(__err) (0)
+#else
+#define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE)
+#if defined(CONFIG_PPC_8xx)
+#define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G)
+#elif defined(CONFIG_PPC64)
+#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S)
+#else
+#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S)
+#endif
+#endif
/*
* For 600- and 800-family processors, the error_code parameter is DSISR
@@ -195,91 +375,56 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
* The return value is 0 if the fault was handled, or the signal
* number if this is a kernel fault that can't be handled here.
*/
-int do_page_fault(struct pt_regs *regs, unsigned long address,
- unsigned long error_code)
+static int __do_page_fault(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code)
{
- enum ctx_state prev_state = exception_enter();
struct vm_area_struct * vma;
struct mm_struct *mm = current->mm;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
- int code = SEGV_MAPERR;
- int is_write = 0;
- int trap = TRAP(regs);
- int is_exec = trap == 0x400;
- int fault;
- int rc = 0, store_update_sp = 0;
-
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
- /*
- * Fortunately the bit assignments in SRR1 for an instruction
- * fault and DSISR for a data fault are mostly the same for the
- * bits we are interested in. But there are some bits which
- * indicate errors in DSISR but can validly be set in SRR1.
- */
- if (trap == 0x400)
- error_code &= 0x48200000;
- else
- is_write = error_code & DSISR_ISSTORE;
-#else
- is_write = error_code & ESR_DST;
-#endif /* CONFIG_4xx || CONFIG_BOOKE */
-
-#ifdef CONFIG_PPC_ICSWX
- /*
- * we need to do this early because this "data storage
- * interrupt" does not update the DAR/DEAR so we don't want to
- * look at it
- */
- if (error_code & ICSWX_DSI_UCT) {
- rc = acop_handle_fault(regs, address, error_code);
- if (rc)
- goto bail;
- }
-#endif /* CONFIG_PPC_ICSWX */
+ int is_exec = TRAP(regs) == 0x400;
+ int is_user = user_mode(regs);
+ int is_write = page_fault_is_write(error_code);
+ int fault, major = 0;
+ bool store_update_sp = false;
if (notify_page_fault(regs))
- goto bail;
+ return 0;
- if (unlikely(debugger_fault_handler(regs)))
- goto bail;
+ if (unlikely(page_fault_is_bad(error_code))) {
+ if (is_user) {
+ _exception(SIGBUS, regs, BUS_OBJERR, address);
+ return 0;
+ }
+ return SIGBUS;
+ }
+
+ /* Additional sanity check(s) */
+ sanity_check_fault(is_write, error_code);
/*
* The kernel should never take an execute fault nor should it
* take a page fault to a kernel address.
*/
- if (!user_mode(regs) && (is_exec || (address >= TASK_SIZE))) {
- rc = SIGSEGV;
- goto bail;
- }
+ if (unlikely(!is_user && bad_kernel_fault(is_exec, error_code, address)))
+ return SIGSEGV;
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \
- defined(CONFIG_PPC_BOOK3S_64))
- if (error_code & DSISR_DABRMATCH) {
- /* breakpoint match */
- do_break(regs, address, error_code);
- goto bail;
+ /*
+ * If we're in an interrupt, have no user context or are running
+ * in a region with pagefaults disabled then we must not take the fault
+ */
+ if (unlikely(faulthandler_disabled() || !mm)) {
+ if (is_user)
+ printk_ratelimited(KERN_ERR "Page fault in user mode"
+ " with faulthandler_disabled()=%d"
+ " mm=%p\n",
+ faulthandler_disabled(), mm);
+ return bad_area_nosemaphore(regs, address);
}
-#endif
/* We restore the interrupt state now */
if (!arch_irq_disabled_regs(regs))
local_irq_enable();
- if (faulthandler_disabled() || mm == NULL) {
- if (!user_mode(regs)) {
- rc = SIGSEGV;
- goto bail;
- }
- /* faulthandler_disabled() in user mode is really bad,
- as is current->mm == NULL. */
- printk(KERN_EMERG "Page fault in user mode with "
- "faulthandler_disabled() = %d mm = %p\n",
- faulthandler_disabled(), mm);
- printk(KERN_EMERG "NIP = %lx MSR = %lx\n",
- regs->nip, regs->msr);
- die("Weird page fault", regs, SIGSEGV);
- }
-
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
/*
@@ -287,11 +432,15 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* can result in fault, which will cause a deadlock when called with
* mmap_sem held
*/
- if (!is_exec && user_mode(regs))
+ if (is_write && is_user)
store_update_sp = store_updates_sp(regs);
- if (user_mode(regs))
+ if (is_user)
flags |= FAULT_FLAG_USER;
+ if (is_write)
+ flags |= FAULT_FLAG_WRITE;
+ if (is_exec)
+ flags |= FAULT_FLAG_INSTRUCTION;
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
@@ -308,9 +457,9 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* source. If this is invalid we can skip the address space check,
* thus avoiding the deadlock.
*/
- if (!down_read_trylock(&mm->mmap_sem)) {
- if (!user_mode(regs) && !search_exception_tables(regs->nip))
- goto bad_area_nosemaphore;
+ if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
+ if (!is_user && !search_exception_tables(regs->nip))
+ return bad_area_nosemaphore(regs, address);
retry:
down_read(&mm->mmap_sem);
@@ -324,122 +473,24 @@ retry:
}
vma = find_vma(mm, address);
- if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
+ if (unlikely(!vma))
+ return bad_area(regs, address);
+ if (likely(vma->vm_start <= address))
goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
+ return bad_area(regs, address);
- /*
- * N.B. The POWER/Open ABI allows programs to access up to
- * 288 bytes below the stack pointer.
- * The kernel signal delivery code writes up to about 1.5kB
- * below the stack pointer (r1) before decrementing it.
- * The exec code can write slightly over 640kB to the stack
- * before setting the user r1. Thus we allow the stack to
- * expand to 1MB without further checks.
- */
- if (address + 0x100000 < vma->vm_end) {
- /* get user regs even if this fault is in kernel mode */
- struct pt_regs *uregs = current->thread.regs;
- if (uregs == NULL)
- goto bad_area;
+ /* The stack is being expanded, check if it's valid */
+ if (unlikely(bad_stack_expansion(regs, address, vma, store_update_sp)))
+ return bad_area(regs, address);
- /*
- * A user-mode access to an address a long way below
- * the stack pointer is only valid if the instruction
- * is one which would update the stack pointer to the
- * address accessed if the instruction completed,
- * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
- * (or the byte, halfword, float or double forms).
- *
- * If we don't check this then any write to the area
- * between the last mapped region and the stack will
- * expand the stack rather than segfaulting.
- */
- if (address + 2048 < uregs->gpr[1] && !store_update_sp)
- goto bad_area;
- }
- if (expand_stack(vma, address))
- goto bad_area;
+ /* Try to expand it */
+ if (unlikely(expand_stack(vma, address)))
+ return bad_area(regs, address);
good_area:
- code = SEGV_ACCERR;
-#if defined(CONFIG_6xx)
- if (error_code & 0x95700000)
- /* an error such as lwarx to I/O controller space,
- address matching DABR, eciwx, etc. */
- goto bad_area;
-#endif /* CONFIG_6xx */
-#if defined(CONFIG_8xx)
- /* The MPC8xx seems to always set 0x80000000, which is
- * "undefined". Of those that can be set, this is the only
- * one which seems bad.
- */
- if (error_code & 0x10000000)
- /* Guarded storage error. */
- goto bad_area;
-#endif /* CONFIG_8xx */
-
- if (is_exec) {
- /*
- * Allow execution from readable areas if the MMU does not
- * provide separate controls over reading and executing.
- *
- * Note: That code used to not be enabled for 4xx/BookE.
- * It is now as I/D cache coherency for these is done at
- * set_pte_at() time and I see no reason why the test
- * below wouldn't be valid on those processors. This -may-
- * break programs compiled with a really old ABI though.
- */
- if (!(vma->vm_flags & VM_EXEC) &&
- (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
- !(vma->vm_flags & (VM_READ | VM_WRITE))))
- goto bad_area;
- /* a write */
- } else if (is_write) {
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
- flags |= FAULT_FLAG_WRITE;
- /* a read */
- } else {
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
- goto bad_area;
- }
-#ifdef CONFIG_PPC_STD_MMU
- /*
- * For hash translation mode, we should never get a
- * PROTFAULT. Any update to pte to reduce access will result in us
- * removing the hash page table entry, thus resulting in a DSISR_NOHPTE
- * fault instead of DSISR_PROTFAULT.
- *
- * A pte update to relax the access will not result in a hash page table
- * entry invalidate and hence can result in DSISR_PROTFAULT.
- * ptep_set_access_flags() doesn't do a hpte flush. This is why we have
- * the special !is_write in the below conditional.
- *
- * For platforms that doesn't supports coherent icache and do support
- * per page noexec bit, we do setup things such that we do the
- * sync between D/I cache via fault. But that is handled via low level
- * hash fault code (hash_page_do_lazy_icache()) and we should not reach
- * here in such case.
- *
- * For wrong access that can result in PROTFAULT, the above vma->vm_flags
- * check should handle those and hence we should fall to the bad_area
- * handling correctly.
- *
- * For embedded with per page exec support that doesn't support coherent
- * icache we do get PROTFAULT and we handle that D/I cache sync in
- * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON
- * is conditional for server MMU.
- *
- * For radix, we can get prot fault for autonuma case, because radix
- * page table will have them marked noaccess for user.
- */
- if (!radix_enabled() && !is_write)
- WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
-#endif /* CONFIG_PPC_STD_MMU */
+ if (unlikely(access_error(is_write, is_exec, vma)))
+ return bad_area(regs, address);
/*
* If for any reason at all we couldn't handle the fault,
@@ -447,6 +498,7 @@ good_area:
* the fault.
*/
fault = handle_mm_fault(vma, address, flags);
+ major |= fault & VM_FAULT_MAJOR;
/*
* Handle the retry right now, the mmap_sem has been released in that
@@ -464,64 +516,39 @@ good_area:
if (!fatal_signal_pending(current))
goto retry;
}
- /* We will enter mm_fault_error() below */
- } else
- up_read(&current->mm->mmap_sem);
-
- if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
- if (fault & VM_FAULT_SIGSEGV)
- goto bad_area_nosemaphore;
- rc = mm_fault_error(regs, address, fault);
- if (rc >= MM_FAULT_RETURN)
- goto bail;
- else
- rc = 0;
+
+ /*
+ * User mode? Just return to handle the fatal exception otherwise
+ * return to bad_page_fault
+ */
+ return is_user ? 0 : SIGBUS;
}
+ up_read(&current->mm->mmap_sem);
+
+ if (unlikely(fault & VM_FAULT_ERROR))
+ return mm_fault_error(regs, address, fault);
+
/*
* Major/minor page fault accounting.
*/
- if (fault & VM_FAULT_MAJOR) {
+ if (major) {
current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
-#ifdef CONFIG_PPC_SMLPAR
- if (firmware_has_feature(FW_FEATURE_CMO)) {
- u32 page_ins;
-
- preempt_disable();
- page_ins = be32_to_cpu(get_lppaca()->page_ins);
- page_ins += 1 << PAGE_FACTOR;
- get_lppaca()->page_ins = cpu_to_be32(page_ins);
- preempt_enable();
- }
-#endif /* CONFIG_PPC_SMLPAR */
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+ cmo_account_page_fault();
} else {
current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
- }
-
- goto bail;
-
-bad_area:
- up_read(&mm->mmap_sem);
-
-bad_area_nosemaphore:
- /* User mode accesses cause a SIGSEGV */
- if (user_mode(regs)) {
- _exception(SIGSEGV, regs, code, address);
- goto bail;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
}
+ return 0;
+}
+NOKPROBE_SYMBOL(__do_page_fault);
- if (is_exec && (error_code & DSISR_PROTFAULT))
- printk_ratelimited(KERN_CRIT "kernel tried to execute NX-protected"
- " page (%lx) - exploit attempt? (uid: %d)\n",
- address, from_kuid(&init_user_ns, current_uid()));
-
- rc = SIGSEGV;
-
-bail:
+int do_page_fault(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code)
+{
+ enum ctx_state prev_state = exception_enter();
+ int rc = __do_page_fault(regs, address, error_code);
exception_exit(prev_state);
return rc;
}