summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2005-10-30 02:16:32 +0100
committerLinus Torvalds <torvalds@g5.osdl.org>2005-10-30 05:40:41 +0100
commitc34d1b4d165c67b966bca4aba026443d7ff161eb (patch)
tree27ffca9daba2a6b16d29bd508faf3e68bda2aad1
parent[PATCH] mm: rmap with inner ptlock (diff)
downloadlinux-c34d1b4d165c67b966bca4aba026443d7ff161eb.tar.xz
linux-c34d1b4d165c67b966bca4aba026443d7ff161eb.zip
[PATCH] mm: kill check_user_page_readable
check_user_page_readable is a problematic variant of follow_page. It's used only by oprofile's i386 and arm backtrace code, at interrupt time, to establish whether a userspace stackframe is currently readable. This is problematic, because we want to push the page_table_lock down inside follow_page, and later split it; whereas oprofile is doing a spin_trylock on it (in the i386 case, forgotten in the arm case), and needs that to pin perhaps two pages spanned by the stackframe (which might be covered by different locks when we split). I think oprofile is going about this in the wrong way: it doesn't need to know the area is readable (neither i386 nor arm uses read protection of user pages), it doesn't need to pin the memory, it should simply __copy_from_user_inatomic, and see if that succeeds or not. Sorry, but I've not got around to devising the sparse __user annotations for this. Then we can eliminate check_user_page_readable, and return to a single follow_page without the __follow_page variants. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/arm/oprofile/backtrace.c46
-rw-r--r--arch/i386/oprofile/backtrace.c38
-rw-r--r--include/linux/mm.h1
-rw-r--r--mm/memory.c29
4 files changed, 26 insertions, 88 deletions
diff --git a/arch/arm/oprofile/backtrace.c b/arch/arm/oprofile/backtrace.c
index df35c452a8bf..7c22c12618cc 100644
--- a/arch/arm/oprofile/backtrace.c
+++ b/arch/arm/oprofile/backtrace.c
@@ -49,42 +49,22 @@ static struct frame_tail* kernel_backtrace(struct frame_tail *tail)
static struct frame_tail* user_backtrace(struct frame_tail *tail)
{
- struct frame_tail buftail;
+ struct frame_tail buftail[2];
- /* hardware pte might not be valid due to dirty/accessed bit emulation
- * so we use copy_from_user and benefit from exception fixups */
- if (copy_from_user(&buftail, tail, sizeof(struct frame_tail)))
+ /* Also check accessibility of one struct frame_tail beyond */
+ if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
+ return NULL;
+ if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail)))
return NULL;
- oprofile_add_trace(buftail.lr);
+ oprofile_add_trace(buftail[0].lr);
/* frame pointers should strictly progress back up the stack
* (towards higher addresses) */
- if (tail >= buftail.fp)
+ if (tail >= buftail[0].fp)
return NULL;
- return buftail.fp-1;
-}
-
-/* Compare two addresses and see if they're on the same page */
-#define CMP_ADDR_EQUAL(x,y,offset) ((((unsigned long) x) >> PAGE_SHIFT) \
- == ((((unsigned long) y) + offset) >> PAGE_SHIFT))
-
-/* check that the page(s) containing the frame tail are present */
-static int pages_present(struct frame_tail *tail)
-{
- struct mm_struct * mm = current->mm;
-
- if (!check_user_page_readable(mm, (unsigned long)tail))
- return 0;
-
- if (CMP_ADDR_EQUAL(tail, tail, 8))
- return 1;
-
- if (!check_user_page_readable(mm, ((unsigned long)tail) + 8))
- return 0;
-
- return 1;
+ return buftail[0].fp-1;
}
/*
@@ -118,7 +98,6 @@ static int valid_kernel_stack(struct frame_tail *tail, struct pt_regs *regs)
void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
{
struct frame_tail *tail;
- unsigned long last_address = 0;
tail = ((struct frame_tail *) regs->ARM_fp) - 1;
@@ -132,13 +111,6 @@ void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
return;
}
- while (depth-- && tail && !((unsigned long) tail & 3)) {
- if ((!CMP_ADDR_EQUAL(last_address, tail, 0)
- || !CMP_ADDR_EQUAL(last_address, tail, 8))
- && !pages_present(tail))
- return;
- last_address = (unsigned long) tail;
+ while (depth-- && tail && !((unsigned long) tail & 3))
tail = user_backtrace(tail);
- }
}
-
diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c
index 65dfd2edb671..21654be3f73f 100644
--- a/arch/i386/oprofile/backtrace.c
+++ b/arch/i386/oprofile/backtrace.c
@@ -12,6 +12,7 @@
#include <linux/sched.h>
#include <linux/mm.h>
#include <asm/ptrace.h>
+#include <asm/uaccess.h>
struct frame_head {
struct frame_head * ebp;
@@ -21,26 +22,22 @@ struct frame_head {
static struct frame_head *
dump_backtrace(struct frame_head * head)
{
- oprofile_add_trace(head->ret);
+ struct frame_head bufhead[2];
- /* frame pointers should strictly progress back up the stack
- * (towards higher addresses) */
- if (head >= head->ebp)
+ /* Also check accessibility of one struct frame_head beyond */
+ if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
+ return NULL;
+ if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
return NULL;
- return head->ebp;
-}
-
-/* check that the page(s) containing the frame head are present */
-static int pages_present(struct frame_head * head)
-{
- struct mm_struct * mm = current->mm;
+ oprofile_add_trace(bufhead[0].ret);
- /* FIXME: only necessary once per page */
- if (!check_user_page_readable(mm, (unsigned long)head))
- return 0;
+ /* frame pointers should strictly progress back up the stack
+ * (towards higher addresses) */
+ if (head >= bufhead[0].ebp)
+ return NULL;
- return check_user_page_readable(mm, (unsigned long)(head + 1));
+ return bufhead[0].ebp;
}
/*
@@ -97,15 +94,6 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
return;
}
-#ifdef CONFIG_SMP
- if (!spin_trylock(&current->mm->page_table_lock))
- return;
-#endif
-
- while (depth-- && head && pages_present(head))
+ while (depth-- && head)
head = dump_backtrace(head);
-
-#ifdef CONFIG_SMP
- spin_unlock(&current->mm->page_table_lock);
-#endif
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 972e2ce8e07c..aa8de20e2e80 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -944,7 +944,6 @@ extern struct page * vmalloc_to_page(void *addr);
extern unsigned long vmalloc_to_pfn(void *addr);
extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
int write);
-extern int check_user_page_readable(struct mm_struct *mm, unsigned long address);
int remap_pfn_range(struct vm_area_struct *, unsigned long,
unsigned long, unsigned long, pgprot_t);
diff --git a/mm/memory.c b/mm/memory.c
index 622a4ef5409f..51f7c0a220d4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -809,8 +809,7 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
* Do a quick page-table lookup for a single page.
* mm->page_table_lock must be held.
*/
-static struct page *__follow_page(struct mm_struct *mm, unsigned long address,
- int read, int write, int accessed)
+struct page *follow_page(struct mm_struct *mm, unsigned long address, int write)
{
pgd_t *pgd;
pud_t *pud;
@@ -846,16 +845,12 @@ static struct page *__follow_page(struct mm_struct *mm, unsigned long address,
if (pte_present(pte)) {
if (write && !pte_write(pte))
goto out;
- if (read && !pte_read(pte))
- goto out;
pfn = pte_pfn(pte);
if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
- if (accessed) {
- if (write && !pte_dirty(pte) &&!PageDirty(page))
- set_page_dirty(page);
- mark_page_accessed(page);
- }
+ if (write && !pte_dirty(pte) &&!PageDirty(page))
+ set_page_dirty(page);
+ mark_page_accessed(page);
return page;
}
}
@@ -864,22 +859,6 @@ out:
return NULL;
}
-inline struct page *
-follow_page(struct mm_struct *mm, unsigned long address, int write)
-{
- return __follow_page(mm, address, 0, write, 1);
-}
-
-/*
- * check_user_page_readable() can be called frm niterrupt context by oprofile,
- * so we need to avoid taking any non-irq-safe locks
- */
-int check_user_page_readable(struct mm_struct *mm, unsigned long address)
-{
- return __follow_page(mm, address, 1, 0, 0) != NULL;
-}
-EXPORT_SYMBOL(check_user_page_readable);
-
static inline int
untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma,
unsigned long address)