From c26f91a3df1999ec1b3298372d73f90cbab81106 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 22 Mar 2010 09:32:26 +0100 Subject: x86: Remove excessive early_res debug output Commit 08677214e318297 ("x86: Make 64 bit use early_res instead of bootmem before slab") introduced early_res replacement for bootmem, but left code in __free_pages_memory() which dumps all the ranges that are beeing freed, without any additional information, causing some noise in dmesg during bootup. Just remove printing of the ranges, that doesn't provide anything useful anyway. While at it, remove other commented-out KERN_DEBUG messages in the NO_BOOTMEM code as well. Signed-off-by: Jiri Kosina Found-OK-by: Andrew Morton Cc: Johannes Weiner Cc: Yinghai Lu LKML-Reference: Signed-off-by: Ingo Molnar --- mm/bootmem.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'mm') diff --git a/mm/bootmem.c b/mm/bootmem.c index d7c791ef0036..9b134460b016 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -180,19 +180,12 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end) end_aligned = end & ~(BITS_PER_LONG - 1); if (end_aligned <= start_aligned) { -#if 1 - printk(KERN_DEBUG " %lx - %lx\n", start, end); -#endif for (i = start; i < end; i++) __free_pages_bootmem(pfn_to_page(i), 0); return; } -#if 1 - printk(KERN_DEBUG " %lx %lx - %lx %lx\n", - start, start_aligned, end_aligned, end); -#endif for (i = start; i < start_aligned; i++) __free_pages_bootmem(pfn_to_page(i), 0); @@ -428,9 +421,6 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, { #ifdef CONFIG_NO_BOOTMEM free_early(physaddr, physaddr + size); -#if 0 - printk(KERN_DEBUG "free %lx %lx\n", physaddr, size); -#endif #else unsigned long start, end; @@ -456,9 +446,6 @@ void __init free_bootmem(unsigned long addr, unsigned long size) { #ifdef CONFIG_NO_BOOTMEM free_early(addr, addr + size); -#if 0 - printk(KERN_DEBUG "free %lx %lx\n", addr, size); -#endif #else unsigned long start, end; -- cgit v1.2.3 From 5cfb80a73b5a52fb19d8b0611203e4dd58e8e9a2 Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Tue, 23 Mar 2010 13:35:11 -0700 Subject: memcg: disable move charge in no mmu case In commit 02491447 ("memcg: move charges of anonymous swap"), I tried to disable move charge feature in no mmu case by enclosing all the related functions with "#ifdef CONFIG_MMU", but the commit places these ifdefs in wrong place. (it seems that it's mangled while handling some fixes...) This patch fixes it up. Signed-off-by: Daisuke Nishimura Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'mm') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7973b5221fb8..00dda352144c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3946,28 +3946,6 @@ one_by_one: } return ret; } -#else /* !CONFIG_MMU */ -static int mem_cgroup_can_attach(struct cgroup_subsys *ss, - struct cgroup *cgroup, - struct task_struct *p, - bool threadgroup) -{ - return 0; -} -static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss, - struct cgroup *cgroup, - struct task_struct *p, - bool threadgroup) -{ -} -static void mem_cgroup_move_task(struct cgroup_subsys *ss, - struct cgroup *cont, - struct cgroup *old_cont, - struct task_struct *p, - bool threadgroup) -{ -} -#endif /** * is_target_pte_for_mc - check a pte whether it is valid for move charge @@ -4330,6 +4308,28 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, } mem_cgroup_clear_mc(); } +#else /* !CONFIG_MMU */ +static int mem_cgroup_can_attach(struct cgroup_subsys *ss, + struct cgroup *cgroup, + struct task_struct *p, + bool threadgroup) +{ + return 0; +} +static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss, + struct cgroup *cgroup, + struct task_struct *p, + bool threadgroup) +{ +} +static void mem_cgroup_move_task(struct cgroup_subsys *ss, + struct cgroup *cont, + struct cgroup *old_cont, + struct task_struct *p, + bool threadgroup) +{ +} +#endif struct cgroup_subsys mem_cgroup_subsys = { .name = "memory", -- cgit v1.2.3 From e7bbcdf3747e3919c31cfa87853c69d178bce548 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 Mar 2010 13:35:12 -0700 Subject: memcontrol: fix potential null deref There was a potential null deref introduced in c62b1a3b31b5 ("memcg: use generic percpu instead of private implementation"). Signed-off-by: Dan Carpenter Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 00dda352144c..9ed760dc7448 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3691,8 +3691,10 @@ static struct mem_cgroup *mem_cgroup_alloc(void) else mem = vmalloc(size); - if (mem) - memset(mem, 0, size); + if (!mem) + return NULL; + + memset(mem, 0, size); mem->stat = alloc_percpu(struct mem_cgroup_stat_cpu); if (!mem->stat) { if (size < PAGE_SIZE) -- cgit v1.2.3 From 3fa30460ea502133a18a07b14452cd660906f16f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Mar 2010 13:35:21 -0700 Subject: nommu: fix an incorrect comment in the do_mmap_shared_file() Fix an incorrect comment in the do_mmap_shared_file(). If a mapping is requested MAP_SHARED, then a private copy cannot be made and still provide correct semantics. Signed-off-by: David Howells Reported-by: Dave Hudson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/nommu.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'mm') diff --git a/mm/nommu.c b/mm/nommu.c index 605ace8982a8..e4b8f4d28a3f 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1040,10 +1040,9 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) if (ret != -ENOSYS) return ret; - /* getting an ENOSYS error indicates that direct mmap isn't - * possible (as opposed to tried but failed) so we'll fall - * through to making a private copy of the data and mapping - * that if we can */ + /* getting -ENOSYS indicates that direct mmap isn't possible (as + * opposed to tried but failed) so we can only give a suitable error as + * it's not possible to make a private copy if MAP_SHARED was given */ return -ENODEV; } -- cgit v1.2.3 From cb53237513bd1e090cce120efe12ede72c932b5f Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Tue, 23 Mar 2010 13:35:26 -0700 Subject: mm/ksm.c is doing an unneeded _notify in write_protect_page. ksm.c's write_protect_page implements a lockless means of verifying a page does not have any users of the page which are not accounted for via other kernel tracking means. It does this by removing the writable pte with TLB flushes, checking the page_count against the total known users, and then using set_pte_at_notify to make it a read-only entry. An unneeded mmu_notifier callout is made in the case where the known users does not match the page_count. In that event, we are inserting the identical pte and there is no need for the set_pte_at_notify, but rather the simpler set_pte_at suffices. Signed-off-by: Robin Holt Acked-by: Izik Eidus Acked-by: Andrea Arcangeli Acked-by: Hugh Dickins Cc: Chris Wright Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/ksm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/ksm.c b/mm/ksm.c index a93f1b7f508c..8cdfc2a1e8bf 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -751,7 +751,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, * page */ if (page_mapcount(page) + 1 + swapped != page_count(page)) { - set_pte_at_notify(mm, addr, ptep, entry); + set_pte_at(mm, addr, ptep, entry); goto out_unlock; } entry = pte_wrprotect(entry); -- cgit v1.2.3 From 413b43deab8377819aba1dbad2abf0c15d59b491 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Tue, 23 Mar 2010 13:35:28 -0700 Subject: tmpfs: fix oops on mounts with mpol=default Fix an 'oops' when a tmpfs mount point is mounted with the mpol=default mempolicy. Upon remounting a tmpfs mount point with 'mpol=default' option, the mount code crashed with a null pointer dereference. The initial problem report was on 2.6.27, but the problem exists in mainline 2.6.34-rc as well. On examining the code, we see that mpol_new returns NULL if default mempolicy was requested. This 'NULL' mempolicy is accessed to store the node mask resulting in oops. The following patch fixes it. Signed-off-by: Ravikiran Thirumalai Signed-off-by: KOSAKI Motohiro Cc: Christoph Lameter Cc: Mel Gorman Acked-by: Lee Schermerhorn Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 643f66e10187..745ce90308a6 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2215,10 +2215,15 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) goto out; mode = MPOL_PREFERRED; break; - + case MPOL_DEFAULT: + /* + * Insist on a empty nodelist + */ + if (!nodelist) + err = 0; + goto out; /* * case MPOL_BIND: mpol_new() enforces non-empty nodemask. - * case MPOL_DEFAULT: mpol_new() enforces empty nodemask, ignores flags. */ } -- cgit v1.2.3 From d69b2e63e9172afb4d07c305601b79a55509ac4c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 23 Mar 2010 13:35:30 -0700 Subject: tmpfs: mpol=bind:0 don't cause mount error. Currently, following mount operation cause mount error. % mount -t tmpfs -ompol=bind:0 none /tmp Because commit 71fe804b6d5 (mempolicy: use struct mempolicy pointer in shmem_sb_info) corrupted MPOL_BIND parse code. This patch restore the needed one. Signed-off-by: KOSAKI Motohiro Cc: Ravikiran Thirumalai Cc: Christoph Lameter Cc: Mel Gorman Acked-by: Lee Schermerhorn Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 745ce90308a6..10db44f95749 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2222,9 +2222,13 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) if (!nodelist) err = 0; goto out; - /* - * case MPOL_BIND: mpol_new() enforces non-empty nodemask. - */ + case MPOL_BIND: + /* + * Insist on a nodelist + */ + if (!nodelist) + goto out; + err = 0; } mode_flags = 0; -- cgit v1.2.3 From 12821f5fb942e795f8009ece14bde868893bd811 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 23 Mar 2010 13:35:31 -0700 Subject: tmpfs: handle MPOL_LOCAL mount option properly commit 71fe804b6d5 (mempolicy: use struct mempolicy pointer in shmem_sb_info) added mpol=local mount option. but its feature is broken since it was born. because such code always return 1 (i.e. mount failure). This patch fixes it. Signed-off-by: KOSAKI Motohiro Cc: Ravikiran Thirumalai Cc: Christoph Lameter Cc: Mel Gorman Acked-by: Lee Schermerhorn Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mm') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 10db44f95749..fb71790398f0 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2214,6 +2214,7 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) if (nodelist) goto out; mode = MPOL_PREFERRED; + err = 0; break; case MPOL_DEFAULT: /* -- cgit v1.2.3 From 926f2ae04f183098cf9a30521776fb2759c8afeb Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 23 Mar 2010 13:35:32 -0700 Subject: tmpfs: cleanup mpol_parse_str() mpol_parse_str() made lots 'err' variable related bug. Because it is ugly and reviewing unfriendly. This patch simplifies it. Signed-off-by: KOSAKI Motohiro Cc: Ravikiran Thirumalai Cc: Christoph Lameter Cc: Mel Gorman Acked-by: Lee Schermerhorn Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'mm') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index fb71790398f0..6cdfa1df57f6 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2195,8 +2195,8 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) char *rest = nodelist; while (isdigit(*rest)) rest++; - if (!*rest) - err = 0; + if (*rest) + goto out; } break; case MPOL_INTERLEAVE: @@ -2205,7 +2205,6 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) */ if (!nodelist) nodes = node_states[N_HIGH_MEMORY]; - err = 0; break; case MPOL_LOCAL: /* @@ -2214,7 +2213,6 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) if (nodelist) goto out; mode = MPOL_PREFERRED; - err = 0; break; case MPOL_DEFAULT: /* @@ -2229,7 +2227,6 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) */ if (!nodelist) goto out; - err = 0; } mode_flags = 0; @@ -2243,13 +2240,14 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) else if (!strcmp(flags, "relative")) mode_flags |= MPOL_F_RELATIVE_NODES; else - err = 1; + goto out; } new = mpol_new(mode, mode_flags, &nodes); if (IS_ERR(new)) - err = 1; - else { + goto out; + + { int ret; NODEMASK_SCRATCH(scratch); if (scratch) { @@ -2260,13 +2258,15 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) ret = -ENOMEM; NODEMASK_SCRATCH_FREE(scratch); if (ret) { - err = 1; mpol_put(new); - } else if (no_context) { - /* save for contextualization */ - new->w.user_nodemask = nodes; + goto out; } } + err = 0; + if (no_context) { + /* save for contextualization */ + new->w.user_nodemask = nodes; + } out: /* Restore string for error message */ -- cgit v1.2.3 From 298359c5bf06c04258d7cf552426e198c47e83c1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 23 Mar 2010 13:35:37 -0700 Subject: exit: fix oops in sync_mm_rss In 2.6.34-rc1, removing vhost_net module causes an oops in sync_mm_rss (called from do_exit) when workqueue is destroyed. This does not happen on net-next, or with vhost on top of to 2.6.33. The issue seems to be introduced by 34e55232e59f7b19050267a05ff1226e5cd122a5 ("mm: avoid false sharing of mm_counter) which added sync_mm_rss() that is passed task->mm, and dereferences it without checking. If task is a kernel thread, mm might be NULL. I think this might also happen e.g. with aio. This patch fixes the oops by calling sync_mm_rss when task->mm is set to NULL. I also added BUG_ON to detect any other cases where counters get incremented while mm is NULL. The oops I observed looks like this: BUG: unable to handle kernel NULL pointer dereference at 00000000000002a8 IP: [] sync_mm_rss+0x33/0x6f PGD 0 Oops: 0002 [#1] SMP last sysfs file: /sys/devices/system/cpu/cpu7/cache/index2/shared_cpu_map CPU 2 Modules linked in: vhost_net(-) tun bridge stp sunrpc ipv6 cpufreq_ondemand acpi_cpufreq freq_table kvm_intel kvm i5000_edac edac_core rtc_cmos bnx2 button i2c_i801 i2c_core rtc_core e1000e sg joydev ide_cd_mod serio_raw pcspkr rtc_lib cdrom virtio_net virtio_blk virtio_pci virtio_ring virtio af_packet e1000 shpchp aacraid uhci_hcd ohci_hcd ehci_hcd [last unloaded: microcode] Pid: 2046, comm: vhost Not tainted 2.6.34-rc1-vhost #25 System Planar/IBM System x3550 -[7978B3G]- RIP: 0010:[] [] sync_mm_rss+0x33/0x6f RSP: 0018:ffff8802379b7e60 EFLAGS: 00010202 RAX: 0000000000000008 RBX: ffff88023f2390c0 RCX: 0000000000000000 RDX: ffff88023f2396b0 RSI: 0000000000000000 RDI: ffff88023f2390c0 RBP: ffff8802379b7e60 R08: 0000000000000000 R09: 0000000000000000 R10: ffff88023aecfbc0 R11: 0000000000013240 R12: 0000000000000000 R13: ffffffff81051a6c R14: ffffe8ffffc0f540 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff880001e80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000000002a8 CR3: 000000023af23000 CR4: 00000000000406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process vhost (pid: 2046, threadinfo ffff8802379b6000, task ffff88023f2390c0) Stack: ffff8802379b7ee0 ffffffff81040687 ffffe8ffffc0f558 ffffffffa00a3e2d <0> 0000000000000000 ffff88023f2390c0 ffffffff81055817 ffff8802379b7e98 <0> ffff8802379b7e98 0000000100000286 ffff8802379b7ee0 ffff88023ad47d78 Call Trace: [] do_exit+0x147/0x6c4 [] ? handle_rx_net+0x0/0x17 [vhost_net] [] ? autoremove_wake_function+0x0/0x39 [] ? worker_thread+0x0/0x229 [] kthreadd+0x0/0xf2 [] kernel_thread_helper+0x4/0x10 [] ? kthread+0x0/0x87 [] ? kernel_thread_helper+0x0/0x10 Code: 00 8b 87 6c 02 00 00 85 c0 74 14 48 98 f0 48 01 86 a0 02 00 00 c7 87 6c 02 00 00 00 00 00 00 8b 87 70 02 00 00 85 c0 74 14 48 98 48 01 86 a8 02 00 00 c7 87 70 02 00 00 00 00 00 00 8b 87 74 RIP [] sync_mm_rss+0x33/0x6f RSP CR2: 00000000000002a8 ---[ end trace 41603ba922beddd2 ]--- Fixing recursive fault but reboot is needed! (note: handle_rx_net is a work item using workqueue in question). sync_mm_rss+0x33/0x6f gave me a hint. I also tried reverting 34e55232e59f7b19050267a05ff1226e5cd122a5 and the oops goes away. The module in question calls use_mm and later unuse_mm from a kernel thread. It is when this kernel thread is destroyed that the crash happens. Signed-off-by: Michael S. Tsirkin Andrea Arcangeli Reviewed-by: Rik van Riel Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 1 + mm/mmu_context.c | 1 + 2 files changed, 2 insertions(+) (limited to 'mm') diff --git a/mm/memory.c b/mm/memory.c index 5b7f2002e54b..bc9ba5a1f5b9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -130,6 +130,7 @@ void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm) for (i = 0; i < NR_MM_COUNTERS; i++) { if (task->rss_stat.count[i]) { + BUG_ON(!mm); add_mm_counter(mm, i, task->rss_stat.count[i]); task->rss_stat.count[i] = 0; } diff --git a/mm/mmu_context.c b/mm/mmu_context.c index 0777654147c9..9e82e937000e 100644 --- a/mm/mmu_context.c +++ b/mm/mmu_context.c @@ -53,6 +53,7 @@ void unuse_mm(struct mm_struct *mm) struct task_struct *tsk = current; task_lock(tsk); + sync_mm_rss(tsk, mm); tsk->mm = NULL; /* active_mm is still 'mm' */ enter_lazy_tlb(mm, tsk); -- cgit v1.2.3 From c6b6ef8bb05af632889c5536513b9f4004961f73 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Tue, 23 Mar 2010 13:35:41 -0700 Subject: mempolicy: fix get_mempolicy() for relative and static nodes Discovered while testing other mempolicy changes: get_mempolicy() does not handle static/relative mode flags correctly. Return the value that the user specified so that it can be restored via set_mempolicy() if desired. Signed-off-by: Lee Schermerhorn Cc: Hugh Dickins Cc: Ravikiran Thirumalai Cc: KOSAKI Motohiro Cc: Christoph Lameter Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 6cdfa1df57f6..8034abd3a135 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -806,9 +806,13 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, err = 0; if (nmask) { - task_lock(current); - get_policy_nodemask(pol, nmask); - task_unlock(current); + if (mpol_store_user_nodemask(pol)) { + *nmask = pol->w.user_nodemask; + } else { + task_lock(current); + get_policy_nodemask(pol, nmask); + task_unlock(current); + } } out: -- cgit v1.2.3 From 7561e8ca0dfaf6fca3feef982830de3b65300e5b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 25 Mar 2010 16:48:38 +0000 Subject: NOMMU: Revert 'nommu: get_user_pages(): pin last page on non-page-aligned start' Revert the following patch: commit c08c6e1f54c85fc299cf9f88cf330d6dd28a9a1d Author: Steven J. Magnani Date: Fri Mar 5 13:42:24 2010 -0800 nommu: get_user_pages(): pin last page on non-page-aligned start As it assumes that the mappings begin at the start of pages - something that isn't necessarily true on NOMMU systems. On NOMMU systems, it is possible for a mapping to only occupy part of the page, and not necessarily touch either end of it; in fact it's also possible for multiple non-overlapping mappings to coexist on one page (consider direct mappings of ROMFS files, for example). Signed-off-by: David Howells Acked-by: Steven J. Magnani Signed-off-by: Linus Torvalds --- mm/nommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/nommu.c b/mm/nommu.c index e4b8f4d28a3f..089982f5a4cf 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -146,7 +146,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); for (i = 0; i < nr_pages; i++) { - vma = find_extend_vma(mm, start); + vma = find_vma(mm, start); if (!vma) goto finish_or_fault; @@ -764,7 +764,7 @@ EXPORT_SYMBOL(find_vma); */ struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) { - return find_vma(mm, addr & PAGE_MASK); + return find_vma(mm, addr); } /* -- cgit v1.2.3 From e1ee65d85904c5dd4b9cea1b15d5e85e20eae8a1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 25 Mar 2010 16:48:44 +0000 Subject: NOMMU: Fix __get_user_pages() to pin last page on offset buffers Fix __get_user_pages() to make it pin the last page on a buffer that doesn't begin at the start of a page, but is a multiple of PAGE_SIZE in size. The problem is that __get_user_pages() advances the pointer too much when it iterates to the next page if the page it's currently looking at isn't used from the first byte. This can cause the end of a short VMA to be reached prematurely, resulting in the last page being lost. Signed-off-by: Steven J. Magnani Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- mm/nommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/nommu.c b/mm/nommu.c index 089982f5a4cf..63fa17d121f0 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -162,7 +162,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, } if (vmas) vmas[i] = vma; - start += PAGE_SIZE; + start = (start + PAGE_SIZE) & PAGE_MASK; } return i; -- cgit v1.2.3