From b5810039a54e5babf428e9a1e89fc1940fabff11 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 29 Oct 2005 18:16:12 -0700
Subject: [PATCH] core remove PageReserved

Remove PageReserved() calls from core code by tightening VM_RESERVED
handling in mm/ to cover PageReserved functionality.

PageReserved special casing is removed from get_page and put_page.

All setting and clearing of PageReserved is retained, and it is now flagged
in the page_alloc checks to help ensure we don't introduce any refcount
based freeing of Reserved pages.

MAP_PRIVATE, PROT_WRITE of VM_RESERVED regions is tentatively being
deprecated.  We never completely handled it correctly anyway, and is be
reintroduced in future if required (Hugh has a proof of concept).

Once PageReserved() calls are removed from kernel/power/swsusp.c, and all
arch/ and driver code, the Set and Clear calls, and the PG_reserved bit can
be trivially removed.

Last real user of PageReserved is swsusp, which uses PageReserved to
determine whether a struct page points to valid memory or not.  This still
needs to be addressed (a generic page_is_ram() should work).

A last caveat: the ZERO_PAGE is now refcounted and managed with rmap (and
thus mapcounted and count towards shared rss).  These writes to the struct
page could cause excessive cacheline bouncing on big systems.  There are a
number of ways this could be addressed if it is an issue.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Refcount bug fix for filemap_xip.c

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sparc64/mm/generic.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/sparc64/mm/generic.c')

diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c
index c954d91f01d0..afc01cec701f 100644
--- a/arch/sparc64/mm/generic.c
+++ b/arch/sparc64/mm/generic.c
@@ -127,6 +127,9 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 	int space = GET_IOSPACE(pfn);
 	unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
 
+	/* See comment in mm/memory.c remap_pfn_range */
+	vma->vm_flags |= VM_IO | VM_RESERVED;
+
 	prot = __pgprot(pg_iobits);
 	offset -= from;
 	dir = pgd_offset(mm, from);
-- 
cgit v1.2.3


From b462705ac679f6195d1b23a752cda592d9107495 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Sat, 29 Oct 2005 18:16:24 -0700
Subject: [PATCH] mm: arches skip ptlock

Convert those few architectures which are calling pud_alloc, pmd_alloc,
pte_alloc_map on a user mm, not to take the page_table_lock first, nor drop it
after.  Each of these can continue to use pte_alloc_map, no need to change
over to pte_alloc_map_lock, they're neither racy nor swappable.

In the sparc64 io_remap_pfn_range, flush_tlb_range then falls outside of the
page_table_lock: that's okay, on sparc64 it's like flush_tlb_mm, and that has
always been called from outside of page_table_lock in dup_mmap.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/arm/mm/mm-armv.c     | 14 --------------
 arch/arm26/mm/memc.c      | 15 ---------------
 arch/sparc/mm/generic.c   |  4 +---
 arch/sparc64/mm/generic.c |  6 ++----
 arch/um/kernel/skas/mmu.c |  3 ---
 5 files changed, 3 insertions(+), 39 deletions(-)

(limited to 'arch/sparc64/mm/generic.c')

diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c
index 61bc2fa0511e..60f3e039bac2 100644
--- a/arch/arm/mm/mm-armv.c
+++ b/arch/arm/mm/mm-armv.c
@@ -179,11 +179,6 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 	clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
 
 	if (!vectors_high()) {
-		/*
-		 * This lock is here just to satisfy pmd_alloc and pte_lock
-		 */
-		spin_lock(&mm->page_table_lock);
-
 		/*
 		 * On ARM, first page must always be allocated since it
 		 * contains the machine vectors.
@@ -201,23 +196,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 		set_pte(new_pte, *init_pte);
 		pte_unmap_nested(init_pte);
 		pte_unmap(new_pte);
-
-		spin_unlock(&mm->page_table_lock);
 	}
 
 	return new_pgd;
 
 no_pte:
-	spin_unlock(&mm->page_table_lock);
 	pmd_free(new_pmd);
-	free_pages((unsigned long)new_pgd, 2);
-	return NULL;
-
 no_pmd:
-	spin_unlock(&mm->page_table_lock);
 	free_pages((unsigned long)new_pgd, 2);
-	return NULL;
-
 no_pgd:
 	return NULL;
 }
diff --git a/arch/arm26/mm/memc.c b/arch/arm26/mm/memc.c
index d6b008b8db76..34def6397c3c 100644
--- a/arch/arm26/mm/memc.c
+++ b/arch/arm26/mm/memc.c
@@ -78,12 +78,6 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 	if (!new_pgd)
 		goto no_pgd;
 
-	/*
-	 * This lock is here just to satisfy pmd_alloc and pte_lock
-         * FIXME: I bet we could avoid taking it pretty much altogether
-	 */
-	spin_lock(&mm->page_table_lock);
-
 	/*
 	 * On ARM, first page must always be allocated since it contains
 	 * the machine vectors.
@@ -113,23 +107,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 	memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
 		(PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));
 
-	spin_unlock(&mm->page_table_lock);
-
 	/* update MEMC tables */
 	cpu_memc_update_all(new_pgd);
 	return new_pgd;
 
 no_pte:
-	spin_unlock(&mm->page_table_lock);
 	pmd_free(new_pmd);
-	free_pgd_slow(new_pgd);
-	return NULL;
-
 no_pmd:
-	spin_unlock(&mm->page_table_lock);
 	free_pgd_slow(new_pgd);
-	return NULL;
-
 no_pgd:
 	return NULL;
 }
diff --git a/arch/sparc/mm/generic.c b/arch/sparc/mm/generic.c
index 659c9a71f867..9604893ffdbd 100644
--- a/arch/sparc/mm/generic.c
+++ b/arch/sparc/mm/generic.c
@@ -81,9 +81,8 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 	dir = pgd_offset(mm, from);
 	flush_cache_range(vma, beg, end);
 
-	spin_lock(&mm->page_table_lock);
 	while (from < end) {
-		pmd_t *pmd = pmd_alloc(current->mm, dir, from);
+		pmd_t *pmd = pmd_alloc(mm, dir, from);
 		error = -ENOMEM;
 		if (!pmd)
 			break;
@@ -93,7 +92,6 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 		from = (from + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	}
-	spin_unlock(&mm->page_table_lock);
 
 	flush_tlb_range(vma, beg, end);
 	return error;
diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c
index afc01cec701f..112c316e7cd2 100644
--- a/arch/sparc64/mm/generic.c
+++ b/arch/sparc64/mm/generic.c
@@ -135,9 +135,8 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 	dir = pgd_offset(mm, from);
 	flush_cache_range(vma, beg, end);
 
-	spin_lock(&mm->page_table_lock);
 	while (from < end) {
-		pud_t *pud = pud_alloc(current->mm, dir, from);
+		pud_t *pud = pud_alloc(mm, dir, from);
 		error = -ENOMEM;
 		if (!pud)
 			break;
@@ -147,8 +146,7 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 		from = (from + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	}
-	flush_tlb_range(vma, beg, end);
-	spin_unlock(&mm->page_table_lock);
 
+	flush_tlb_range(vma, beg, end);
 	return error;
 }
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 240143b616a2..02cf36e0331a 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -28,7 +28,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
 	pmd_t *pmd;
 	pte_t *pte;
 
-	spin_lock(&mm->page_table_lock);
 	pgd = pgd_offset(mm, proc);
 	pud = pud_alloc(mm, pgd, proc);
 	if (!pud)
@@ -63,7 +62,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
 	*pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT));
 	*pte = pte_mkexec(*pte);
 	*pte = pte_wrprotect(*pte);
-	spin_unlock(&mm->page_table_lock);
 	return(0);
 
  out_pmd:
@@ -71,7 +69,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
  out_pte:
 	pmd_free(pmd);
  out:
-	spin_unlock(&mm->page_table_lock);
 	return(-ENOMEM);
 }
 
-- 
cgit v1.2.3