diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 2 | ||||
-rw-r--r-- | mm/hugetlb.c | 4 | ||||
-rw-r--r-- | mm/memory.c | 89 | ||||
-rw-r--r-- | mm/mmap.c | 4 | ||||
-rw-r--r-- | mm/nommu.c | 4 | ||||
-rw-r--r-- | mm/page-writeback.c | 1 | ||||
-rw-r--r-- | mm/page_alloc.c | 228 | ||||
-rw-r--r-- | mm/readahead.c | 31 | ||||
-rw-r--r-- | mm/slab.c | 131 | ||||
-rw-r--r-- | mm/swap.c | 1 | ||||
-rw-r--r-- | mm/swap_state.c | 1 | ||||
-rw-r--r-- | mm/swapfile.c | 2 | ||||
-rw-r--r-- | mm/vmalloc.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 6 |
14 files changed, 263 insertions, 245 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 1a4473fcb2ca..ae9ce6b73e8a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -126,9 +126,11 @@ comment "Memory hotplug is currently incompatible with Software Suspend" # Default to 4 for wider testing, though 8 might be more appropriate. # ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock. # PA-RISC's debug spinlock_t is too large for the 32-bit struct page. +# ARM26 and SPARC32 and PPC64 may use one page for multiple page tables. # config SPLIT_PTLOCK_CPUS int default "4096" if ARM && !CPU_CACHE_VIPT default "4096" if PARISC && DEBUG_SPINLOCK && !64BIT + default "4096" if ARM26 || SPARC32 || PPC64 default "4" diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c9b43360fd33..728e9bda12ea 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -103,6 +103,9 @@ static int __init hugetlb_init(void) unsigned long i; struct page *page; + if (HPAGE_SHIFT == 0) + return 0; + for (i = 0; i < MAX_NUMNODES; ++i) INIT_LIST_HEAD(&hugepage_freelists[i]); @@ -234,7 +237,6 @@ unsigned long hugetlb_total_pages(void) { return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE); } -EXPORT_SYMBOL(hugetlb_total_pages); /* * We cannot handle pagefaults against hugetlb pages at all. They cause diff --git a/mm/memory.c b/mm/memory.c index 0f60baf6f69b..2998cfc12f5b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -549,10 +549,10 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, return 0; } -static void zap_pte_range(struct mmu_gather *tlb, +static unsigned long zap_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, - struct zap_details *details) + long *zap_work, struct zap_details *details) { struct mm_struct *mm = tlb->mm; pte_t *pte; @@ -563,10 +563,15 @@ static void zap_pte_range(struct mmu_gather *tlb, pte = pte_offset_map_lock(mm, pmd, addr, &ptl); do { pte_t ptent = *pte; - if (pte_none(ptent)) + if (pte_none(ptent)) { + (*zap_work)--; continue; + } if (pte_present(ptent)) { struct page *page = NULL; + + (*zap_work) -= PAGE_SIZE; + if (!(vma->vm_flags & VM_RESERVED)) { unsigned long pfn = pte_pfn(ptent); if (unlikely(!pfn_valid(pfn))) @@ -624,16 +629,18 @@ static void zap_pte_range(struct mmu_gather *tlb, if (!pte_file(ptent)) free_swap_and_cache(pte_to_swp_entry(ptent)); pte_clear_full(mm, addr, pte, tlb->fullmm); - } while (pte++, addr += PAGE_SIZE, addr != end); + } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); add_mm_rss(mm, file_rss, anon_rss); pte_unmap_unlock(pte - 1, ptl); + + return addr; } -static inline void zap_pmd_range(struct mmu_gather *tlb, +static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, - struct zap_details *details) + long *zap_work, struct zap_details *details) { pmd_t *pmd; unsigned long next; @@ -641,16 +648,21 @@ static inline void zap_pmd_range(struct mmu_gather *tlb, pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); - if (pmd_none_or_clear_bad(pmd)) + if (pmd_none_or_clear_bad(pmd)) { + (*zap_work)--; continue; - zap_pte_range(tlb, vma, pmd, addr, next, details); - } while (pmd++, addr = next, addr != end); + } + next = zap_pte_range(tlb, vma, pmd, addr, next, + zap_work, details); + } while (pmd++, addr = next, (addr != end && *zap_work > 0)); + + return addr; } -static inline void zap_pud_range(struct mmu_gather *tlb, +static inline unsigned long zap_pud_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, - struct zap_details *details) + long *zap_work, struct zap_details *details) { pud_t *pud; unsigned long next; @@ -658,15 +670,21 @@ static inline void zap_pud_range(struct mmu_gather *tlb, pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) + if (pud_none_or_clear_bad(pud)) { + (*zap_work)--; continue; - zap_pmd_range(tlb, vma, pud, addr, next, details); - } while (pud++, addr = next, addr != end); + } + next = zap_pmd_range(tlb, vma, pud, addr, next, + zap_work, details); + } while (pud++, addr = next, (addr != end && *zap_work > 0)); + + return addr; } -static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, +static unsigned long unmap_page_range(struct mmu_gather *tlb, + struct vm_area_struct *vma, unsigned long addr, unsigned long end, - struct zap_details *details) + long *zap_work, struct zap_details *details) { pgd_t *pgd; unsigned long next; @@ -679,11 +697,16 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pgd = pgd_offset(vma->vm_mm, addr); do { next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) + if (pgd_none_or_clear_bad(pgd)) { + (*zap_work)--; continue; - zap_pud_range(tlb, vma, pgd, addr, next, details); - } while (pgd++, addr = next, addr != end); + } + next = zap_pud_range(tlb, vma, pgd, addr, next, + zap_work, details); + } while (pgd++, addr = next, (addr != end && *zap_work > 0)); tlb_end_vma(tlb, vma); + + return addr; } #ifdef CONFIG_PREEMPT @@ -724,7 +747,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, unsigned long end_addr, unsigned long *nr_accounted, struct zap_details *details) { - unsigned long zap_bytes = ZAP_BLOCK_SIZE; + long zap_work = ZAP_BLOCK_SIZE; unsigned long tlb_start = 0; /* For tlb_finish_mmu */ int tlb_start_valid = 0; unsigned long start = start_addr; @@ -745,27 +768,25 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, *nr_accounted += (end - start) >> PAGE_SHIFT; while (start != end) { - unsigned long block; - if (!tlb_start_valid) { tlb_start = start; tlb_start_valid = 1; } - if (is_vm_hugetlb_page(vma)) { - block = end - start; + if (unlikely(is_vm_hugetlb_page(vma))) { unmap_hugepage_range(vma, start, end); - } else { - block = min(zap_bytes, end - start); - unmap_page_range(*tlbp, vma, start, - start + block, details); + zap_work -= (end - start) / + (HPAGE_SIZE / PAGE_SIZE); + start = end; + } else + start = unmap_page_range(*tlbp, vma, + start, end, &zap_work, details); + + if (zap_work > 0) { + BUG_ON(start != end); + break; } - start += block; - zap_bytes -= block; - if ((long)zap_bytes > 0) - continue; - tlb_finish_mmu(*tlbp, tlb_start, start); if (need_resched() || @@ -779,7 +800,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm); tlb_start_valid = 0; - zap_bytes = ZAP_BLOCK_SIZE; + zap_work = ZAP_BLOCK_SIZE; } } out: diff --git a/mm/mmap.c b/mm/mmap.c index 320dda1778c3..6c997b159600 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -155,10 +155,6 @@ int __vm_enough_memory(long pages, int cap_sys_admin) return -ENOMEM; } -EXPORT_SYMBOL(sysctl_overcommit_memory); -EXPORT_SYMBOL(sysctl_overcommit_ratio); -EXPORT_SYMBOL(sysctl_max_map_count); -EXPORT_SYMBOL(vm_committed_space); EXPORT_SYMBOL(__vm_enough_memory); /* diff --git a/mm/nommu.c b/mm/nommu.c index d1e076a487cb..6deb6ab3d6ad 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -44,10 +44,6 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; int heap_stack_gap = 0; EXPORT_SYMBOL(mem_map); -EXPORT_SYMBOL(sysctl_max_map_count); -EXPORT_SYMBOL(sysctl_overcommit_memory); -EXPORT_SYMBOL(sysctl_overcommit_ratio); -EXPORT_SYMBOL(vm_committed_space); EXPORT_SYMBOL(__vm_enough_memory); /* list of shareable VMAs */ diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0166ea15c9ee..74138c9a22b9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -750,7 +750,6 @@ int clear_page_dirty_for_io(struct page *page) } return TestClearPageDirty(page); } -EXPORT_SYMBOL(clear_page_dirty_for_io); int test_clear_page_writeback(struct page *page) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 259a71bacca4..104e69ca55e0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -67,7 +67,6 @@ long nr_swap_pages; int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 }; EXPORT_SYMBOL(totalram_pages); -EXPORT_SYMBOL(nr_swap_pages); /* * Used by page_zone() to look up the address of the struct zone whose @@ -736,9 +735,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) } local_irq_restore(flags); put_cpu(); - } - - if (page == NULL) { + } else { spin_lock_irqsave(&zone->lock, flags); page = __rmqueue(zone, order); spin_unlock_irqrestore(&zone->lock, flags); @@ -758,20 +755,25 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) return page; } +#define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */ +#define ALLOC_HARDER 0x02 /* try to alloc harder */ +#define ALLOC_HIGH 0x04 /* __GFP_HIGH set */ +#define ALLOC_CPUSET 0x08 /* check for correct cpuset */ + /* * Return 1 if free pages are above 'mark'. This takes into account the order * of the allocation. */ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, - int classzone_idx, int can_try_harder, gfp_t gfp_high) + int classzone_idx, int alloc_flags) { /* free_pages my go negative - that's OK */ long min = mark, free_pages = z->free_pages - (1 << order) + 1; int o; - if (gfp_high) + if (alloc_flags & ALLOC_HIGH) min -= min / 2; - if (can_try_harder) + if (alloc_flags & ALLOC_HARDER) min -= min / 4; if (free_pages <= min + z->lowmem_reserve[classzone_idx]) @@ -789,14 +791,40 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, return 1; } -static inline int -should_reclaim_zone(struct zone *z, gfp_t gfp_mask) +/* + * get_page_from_freeliest goes through the zonelist trying to allocate + * a page. + */ +static struct page * +get_page_from_freelist(gfp_t gfp_mask, unsigned int order, + struct zonelist *zonelist, int alloc_flags) { - if (!z->reclaim_pages) - return 0; - if (gfp_mask & __GFP_NORECLAIM) - return 0; - return 1; + struct zone **z = zonelist->zones; + struct page *page = NULL; + int classzone_idx = zone_idx(*z); + + /* + * Go through the zonelist once, looking for a zone with enough free. + * See also cpuset_zone_allowed() comment in kernel/cpuset.c. + */ + do { + if ((alloc_flags & ALLOC_CPUSET) && + !cpuset_zone_allowed(*z, gfp_mask)) + continue; + + if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { + if (!zone_watermark_ok(*z, order, (*z)->pages_low, + classzone_idx, alloc_flags)) + continue; + } + + page = buffered_rmqueue(*z, order, gfp_mask); + if (page) { + zone_statistics(zonelist, *z); + break; + } + } while (*(++z) != NULL); + return page; } /* @@ -807,105 +835,75 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist) { const gfp_t wait = gfp_mask & __GFP_WAIT; - struct zone **zones, *z; + struct zone **z; struct page *page; struct reclaim_state reclaim_state; struct task_struct *p = current; - int i; - int classzone_idx; int do_retry; - int can_try_harder; + int alloc_flags; int did_some_progress; might_sleep_if(wait); - /* - * The caller may dip into page reserves a bit more if the caller - * cannot run direct reclaim, or is the caller has realtime scheduling - * policy - */ - can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait; + z = zonelist->zones; /* the list of zones suitable for gfp_mask */ - zones = zonelist->zones; /* the list of zones suitable for gfp_mask */ - - if (unlikely(zones[0] == NULL)) { + if (unlikely(*z == NULL)) { /* Should this ever happen?? */ return NULL; } +restart: + page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, + zonelist, ALLOC_CPUSET); + if (page) + goto got_pg; - classzone_idx = zone_idx(zones[0]); + do + wakeup_kswapd(*z, order); + while (*(++z)); -restart: /* - * Go through the zonelist once, looking for a zone with enough free. - * See also cpuset_zone_allowed() comment in kernel/cpuset.c. + * OK, we're below the kswapd watermark and have kicked background + * reclaim. Now things get more complex, so set up alloc_flags according + * to how we want to proceed. + * + * The caller may dip into page reserves a bit more if the caller + * cannot run direct reclaim, or if the caller has realtime scheduling + * policy. */ - for (i = 0; (z = zones[i]) != NULL; i++) { - int do_reclaim = should_reclaim_zone(z, gfp_mask); - - if (!cpuset_zone_allowed(z, __GFP_HARDWALL)) - continue; - - /* - * If the zone is to attempt early page reclaim then this loop - * will try to reclaim pages and check the watermark a second - * time before giving up and falling back to the next zone. - */ -zone_reclaim_retry: - if (!zone_watermark_ok(z, order, z->pages_low, - classzone_idx, 0, 0)) { - if (!do_reclaim) - continue; - else { - zone_reclaim(z, gfp_mask, order); - /* Only try reclaim once */ - do_reclaim = 0; - goto zone_reclaim_retry; - } - } - - page = buffered_rmqueue(z, order, gfp_mask); - if (page) - goto got_pg; - } - - for (i = 0; (z = zones[i]) != NULL; i++) - wakeup_kswapd(z, order); + alloc_flags = 0; + if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait) + alloc_flags |= ALLOC_HARDER; + if (gfp_mask & __GFP_HIGH) + alloc_flags |= ALLOC_HIGH; + if (wait) + alloc_flags |= ALLOC_CPUSET; /* * Go through the zonelist again. Let __GFP_HIGH and allocations - * coming from realtime tasks to go deeper into reserves + * coming from realtime tasks go deeper into reserves. * * This is the last chance, in general, before the goto nopage. * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ - for (i = 0; (z = zones[i]) != NULL; i++) { - if (!zone_watermark_ok(z, order, z->pages_min, - classzone_idx, can_try_harder, - gfp_mask & __GFP_HIGH)) - continue; - - if (wait && !cpuset_zone_allowed(z, gfp_mask)) - continue; - - page = buffered_rmqueue(z, order, gfp_mask); - if (page) - goto got_pg; - } + page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags); + if (page) + goto got_pg; /* This allocation should allow future memory freeing. */ if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) && !in_interrupt()) { if (!(gfp_mask & __GFP_NOMEMALLOC)) { +nofail_alloc: /* go through the zonelist yet again, ignoring mins */ - for (i = 0; (z = zones[i]) != NULL; i++) { - if (!cpuset_zone_allowed(z, gfp_mask)) - continue; - page = buffered_rmqueue(z, order, gfp_mask); - if (page) - goto got_pg; + page = get_page_from_freelist(gfp_mask, order, + zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET); + if (page) + goto got_pg; + if (gfp_mask & __GFP_NOFAIL) { + blk_congestion_wait(WRITE, HZ/50); + goto nofail_alloc; } } goto nopage; @@ -923,7 +921,7 @@ rebalance: reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; - did_some_progress = try_to_free_pages(zones, gfp_mask); + did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask); p->reclaim_state = NULL; p->flags &= ~PF_MEMALLOC; @@ -931,19 +929,10 @@ rebalance: cond_resched(); if (likely(did_some_progress)) { - for (i = 0; (z = zones[i]) != NULL; i++) { - if (!zone_watermark_ok(z, order, z->pages_min, - classzone_idx, can_try_harder, - gfp_mask & __GFP_HIGH)) - continue; - - if (!cpuset_zone_allowed(z, gfp_mask)) - continue; - - page = buffered_rmqueue(z, order, gfp_mask); - if (page) - goto got_pg; - } + page = get_page_from_freelist(gfp_mask, order, + zonelist, alloc_flags); + if (page) + goto got_pg; } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { /* * Go through the zonelist yet one more time, keep @@ -951,18 +940,10 @@ rebalance: * a parallel oom killing, we must fail if we're still * under heavy pressure. */ - for (i = 0; (z = zones[i]) != NULL; i++) { - if (!zone_watermark_ok(z, order, z->pages_high, - classzone_idx, 0, 0)) - continue; - - if (!cpuset_zone_allowed(z, __GFP_HARDWALL)) - continue; - - page = buffered_rmqueue(z, order, gfp_mask); - if (page) - goto got_pg; - } + page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, + zonelist, ALLOC_CPUSET); + if (page) + goto got_pg; out_of_memory(gfp_mask, order); goto restart; @@ -995,9 +976,7 @@ nopage: dump_stack(); show_mem(); } - return NULL; got_pg: - zone_statistics(zonelist, z); return page; } @@ -1334,7 +1313,7 @@ void show_free_areas(void) } else printk("\n"); - for_each_cpu(cpu) { + for_each_online_cpu(cpu) { struct per_cpu_pageset *pageset; pageset = zone_pcp(zone, cpu); @@ -2426,13 +2405,18 @@ void setup_per_zone_pages_min(void) } for_each_zone(zone) { + unsigned long tmp; spin_lock_irqsave(&zone->lru_lock, flags); + tmp = (pages_min * zone->present_pages) / lowmem_pages; if (is_highmem(zone)) { /* - * Often, highmem doesn't need to reserve any pages. - * But the pages_min/low/high values are also used for - * batching up page reclaim activity so we need a - * decent value here. + * __GFP_HIGH and PF_MEMALLOC allocations usually don't + * need highmem pages, so cap pages_min to a small + * value here. + * + * The (pages_high-pages_low) and (pages_low-pages_min) + * deltas controls asynch page reclaim, and so should + * not be capped for highmem. */ int min_pages; @@ -2443,19 +2427,15 @@ void setup_per_zone_pages_min(void) min_pages = 128; zone->pages_min = min_pages; } else { - /* if it's a lowmem zone, reserve a number of pages + /* + * If it's a lowmem zone, reserve a number of pages * proportionate to the zone's size. */ - zone->pages_min = (pages_min * zone->present_pages) / - lowmem_pages; + zone->pages_min = tmp; } - /* - * When interpreting these watermarks, just keep in mind that: - * zone->pages_min == (zone->pages_min * 4) / 4; - */ - zone->pages_low = (zone->pages_min * 5) / 4; - zone->pages_high = (zone->pages_min * 6) / 4; + zone->pages_low = zone->pages_min + tmp / 4; + zone->pages_high = zone->pages_min + tmp / 2; spin_unlock_irqrestore(&zone->lru_lock, flags); } } diff --git a/mm/readahead.c b/mm/readahead.c index d0b50034e245..72e7adbb87c7 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -254,7 +254,7 @@ out: */ static int __do_page_cache_readahead(struct address_space *mapping, struct file *filp, - unsigned long offset, unsigned long nr_to_read) + pgoff_t offset, unsigned long nr_to_read) { struct inode *inode = mapping->host; struct page *page; @@ -274,7 +274,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, */ read_lock_irq(&mapping->tree_lock); for (page_idx = 0; page_idx < nr_to_read; page_idx++) { - unsigned long page_offset = offset + page_idx; + pgoff_t page_offset = offset + page_idx; if (page_offset > end_index) break; @@ -311,7 +311,7 @@ out: * memory at once. */ int force_page_cache_readahead(struct address_space *mapping, struct file *filp, - unsigned long offset, unsigned long nr_to_read) + pgoff_t offset, unsigned long nr_to_read) { int ret = 0; @@ -368,7 +368,7 @@ static inline int check_ra_success(struct file_ra_state *ra, * request queues. */ int do_page_cache_readahead(struct address_space *mapping, struct file *filp, - unsigned long offset, unsigned long nr_to_read) + pgoff_t offset, unsigned long nr_to_read) { if (bdi_read_congested(mapping->backing_dev_info)) return -1; @@ -385,7 +385,7 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp, */ static int blockable_page_cache_readahead(struct address_space *mapping, struct file *filp, - unsigned long offset, unsigned long nr_to_read, + pgoff_t offset, unsigned long nr_to_read, struct file_ra_state *ra, int block) { int actual; @@ -430,14 +430,27 @@ static int make_ahead_window(struct address_space *mapping, struct file *filp, return ret; } -/* - * page_cache_readahead is the main function. If performs the adaptive +/** + * page_cache_readahead - generic adaptive readahead + * @mapping: address_space which holds the pagecache and I/O vectors + * @ra: file_ra_state which holds the readahead state + * @filp: passed on to ->readpage() and ->readpages() + * @offset: start offset into @mapping, in PAGE_CACHE_SIZE units + * @req_size: hint: total size of the read which the caller is performing in + * PAGE_CACHE_SIZE units + * + * page_cache_readahead() is the main function. If performs the adaptive * readahead window size management and submits the readahead I/O. + * + * Note that @filp is purely used for passing on to the ->readpage[s]() + * handler: it may refer to a different file from @mapping (so we may not use + * @filp->f_mapping or @filp->f_dentry->d_inode here). + * Also, @ra may not be equal to &@filp->f_ra. + * */ unsigned long page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, - struct file *filp, unsigned long offset, - unsigned long req_size) + struct file *filp, pgoff_t offset, unsigned long req_size) { unsigned long max, newsize; int sequential; diff --git a/mm/slab.c b/mm/slab.c index 22bfb0b2ac8b..e5ec26e0c460 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -368,7 +368,7 @@ static inline void kmem_list3_init(struct kmem_list3 *parent) * manages a cache. */ -struct kmem_cache_s { +struct kmem_cache { /* 1) per-cpu data, touched during every alloc/free */ struct array_cache *array[NR_CPUS]; unsigned int batchcount; @@ -434,7 +434,7 @@ struct kmem_cache_s { /* Optimization question: fewer reaps means less * probability for unnessary cpucache drain/refill cycles. * - * OTHO the cpuarrays can contain lots of objects, + * OTOH the cpuarrays can contain lots of objects, * which could lock up otherwise freeable slabs. */ #define REAPTIMEOUT_CPUC (2*HZ) @@ -565,14 +565,29 @@ static void **dbg_userword(kmem_cache_t *cachep, void *objp) #define BREAK_GFP_ORDER_LO 0 static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; -/* Macros for storing/retrieving the cachep and or slab from the +/* Functions for storing/retrieving the cachep and or slab from the * global 'mem_map'. These are used to find the slab an obj belongs to. * With kfree(), these are used to find the cache which an obj belongs to. */ -#define SET_PAGE_CACHE(pg,x) ((pg)->lru.next = (struct list_head *)(x)) -#define GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->lru.next) -#define SET_PAGE_SLAB(pg,x) ((pg)->lru.prev = (struct list_head *)(x)) -#define GET_PAGE_SLAB(pg) ((struct slab *)(pg)->lru.prev) +static inline void page_set_cache(struct page *page, struct kmem_cache *cache) +{ + page->lru.next = (struct list_head *)cache; +} + +static inline struct kmem_cache *page_get_cache(struct page *page) +{ + return (struct kmem_cache *)page->lru.next; +} + +static inline void page_set_slab(struct page *page, struct slab *slab) +{ + page->lru.prev = (struct list_head *)slab; +} + +static inline struct slab *page_get_slab(struct page *page) +{ + return (struct slab *)page->lru.prev; +} /* These are the default caches for kmalloc. Custom caches can have other sizes. */ struct cache_sizes malloc_sizes[] = { @@ -1190,11 +1205,7 @@ static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid) int i; flags |= cachep->gfpflags; - if (likely(nodeid == -1)) { - page = alloc_pages(flags, cachep->gfporder); - } else { - page = alloc_pages_node(nodeid, flags, cachep->gfporder); - } + page = alloc_pages_node(nodeid, flags, cachep->gfporder); if (!page) return NULL; addr = page_address(page); @@ -1368,7 +1379,7 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp) /* Print some data about the neighboring objects, if they * exist: */ - struct slab *slabp = GET_PAGE_SLAB(virt_to_page(objp)); + struct slab *slabp = page_get_slab(virt_to_page(objp)); int objnr; objnr = (objp-slabp->s_mem)/cachep->objsize; @@ -1502,6 +1513,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, { size_t left_over, slab_size, ralign; kmem_cache_t *cachep = NULL; + struct list_head *p; /* * Sanity checks... these are all serious usage bugs. @@ -1516,6 +1528,35 @@ kmem_cache_create (const char *name, size_t size, size_t align, BUG(); } + down(&cache_chain_sem); + + list_for_each(p, &cache_chain) { + kmem_cache_t *pc = list_entry(p, kmem_cache_t, next); + mm_segment_t old_fs = get_fs(); + char tmp; + int res; + + /* + * This happens when the module gets unloaded and doesn't + * destroy its slab cache and no-one else reuses the vmalloc + * area of the module. Print a warning. + */ + set_fs(KERNEL_DS); + res = __get_user(tmp, pc->name); + set_fs(old_fs); + if (res) { + printk("SLAB: cache with size %d has lost its name\n", + pc->objsize); + continue; + } + + if (!strcmp(pc->name,name)) { + printk("kmem_cache_create: duplicate cache %s\n", name); + dump_stack(); + goto oops; + } + } + #if DEBUG WARN_ON(strchr(name, ' ')); /* It confuses parsers */ if ((flags & SLAB_DEBUG_INITIAL) && !ctor) { @@ -1592,7 +1633,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, /* Get cache's description obj. */ cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL); if (!cachep) - goto opps; + goto oops; memset(cachep, 0, sizeof(kmem_cache_t)); #if DEBUG @@ -1686,7 +1727,7 @@ next: printk("kmem_cache_create: couldn't create cache %s.\n", name); kmem_cache_free(&cache_cache, cachep); cachep = NULL; - goto opps; + goto oops; } slab_size = ALIGN(cachep->num*sizeof(kmem_bufctl_t) + sizeof(struct slab), align); @@ -1781,43 +1822,14 @@ next: cachep->limit = BOOT_CPUCACHE_ENTRIES; } - /* Need the semaphore to access the chain. */ - down(&cache_chain_sem); - { - struct list_head *p; - mm_segment_t old_fs; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - list_for_each(p, &cache_chain) { - kmem_cache_t *pc = list_entry(p, kmem_cache_t, next); - char tmp; - /* This happens when the module gets unloaded and doesn't - destroy its slab cache and noone else reuses the vmalloc - area of the module. Print a warning. */ - if (__get_user(tmp,pc->name)) { - printk("SLAB: cache with size %d has lost its name\n", - pc->objsize); - continue; - } - if (!strcmp(pc->name,name)) { - printk("kmem_cache_create: duplicate cache %s\n",name); - up(&cache_chain_sem); - unlock_cpu_hotplug(); - BUG(); - } - } - set_fs(old_fs); - } - /* cache setup completed, link it into the list */ list_add(&cachep->next, &cache_chain); - up(&cache_chain_sem); unlock_cpu_hotplug(); -opps: +oops: if (!cachep && (flags & SLAB_PANIC)) panic("kmem_cache_create(): failed to create slab `%s'\n", name); + up(&cache_chain_sem); return cachep; } EXPORT_SYMBOL(kmem_cache_create); @@ -2137,8 +2149,8 @@ static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp) i = 1 << cachep->gfporder; page = virt_to_page(objp); do { - SET_PAGE_CACHE(page, cachep); - SET_PAGE_SLAB(page, slabp); + page_set_cache(page, cachep); + page_set_slab(page, slabp); page++; } while (--i); } @@ -2268,14 +2280,14 @@ static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp, kfree_debugcheck(objp); page = virt_to_page(objp); - if (GET_PAGE_CACHE(page) != cachep) { + if (page_get_cache(page) != cachep) { printk(KERN_ERR "mismatch in kmem_cache_free: expected cache %p, got %p\n", - GET_PAGE_CACHE(page),cachep); + page_get_cache(page),cachep); printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); - printk(KERN_ERR "%p is %s.\n", GET_PAGE_CACHE(page), GET_PAGE_CACHE(page)->name); + printk(KERN_ERR "%p is %s.\n", page_get_cache(page), page_get_cache(page)->name); WARN_ON(1); } - slabp = GET_PAGE_SLAB(page); + slabp = page_get_slab(page); if (cachep->flags & SLAB_RED_ZONE) { if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || *dbg_redzone2(cachep, objp) != RED_ACTIVE) { @@ -2627,7 +2639,7 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int n struct slab *slabp; unsigned int objnr; - slabp = GET_PAGE_SLAB(virt_to_page(objp)); + slabp = page_get_slab(virt_to_page(objp)); l3 = cachep->nodelists[node]; list_del(&slabp->list); objnr = (objp - slabp->s_mem) / cachep->objsize; @@ -2743,7 +2755,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp) #ifdef CONFIG_NUMA { struct slab *slabp; - slabp = GET_PAGE_SLAB(virt_to_page(objp)); + slabp = page_get_slab(virt_to_page(objp)); if (unlikely(slabp->nodeid != numa_node_id())) { struct array_cache *alien = NULL; int nodeid = slabp->nodeid; @@ -2829,7 +2841,7 @@ int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr) page = virt_to_page(ptr); if (unlikely(!PageSlab(page))) goto out; - if (unlikely(GET_PAGE_CACHE(page) != cachep)) + if (unlikely(page_get_cache(page) != cachep)) goto out; return 1; out: @@ -3025,7 +3037,7 @@ void kfree(const void *objp) return; local_irq_save(flags); kfree_debugcheck(objp); - c = GET_PAGE_CACHE(virt_to_page(objp)); + c = page_get_cache(virt_to_page(objp)); __cache_free(c, (void*)objp); local_irq_restore(flags); } @@ -3262,6 +3274,7 @@ static void drain_array_locked(kmem_cache_t *cachep, /** * cache_reap - Reclaim memory from caches. + * @unused: unused parameter * * Called from workqueue/eventd every few seconds. * Purpose: @@ -3278,7 +3291,7 @@ static void cache_reap(void *unused) if (down_trylock(&cache_chain_sem)) { /* Give up. Setup the next iteration. */ - schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC + smp_processor_id()); + schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); return; } @@ -3347,7 +3360,7 @@ next: up(&cache_chain_sem); drain_remote_pages(); /* Setup the next iteration */ - schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC + smp_processor_id()); + schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); } #ifdef CONFIG_PROC_FS @@ -3594,7 +3607,7 @@ unsigned int ksize(const void *objp) if (unlikely(objp == NULL)) return 0; - return obj_reallen(GET_PAGE_CACHE(virt_to_page(objp))); + return obj_reallen(page_get_cache(virt_to_page(objp))); } diff --git a/mm/swap.c b/mm/swap.c index 154ae13d8b7e..d09cf7f03e76 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -413,7 +413,6 @@ void vm_acct_memory(long pages) } preempt_enable(); } -EXPORT_SYMBOL(vm_acct_memory); #ifdef CONFIG_HOTPLUG_CPU static void lru_drain_cache(unsigned int cpu) diff --git a/mm/swap_state.c b/mm/swap_state.c index dfd9a46755b8..0df9a57b1de8 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -40,7 +40,6 @@ struct address_space swapper_space = { .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), .backing_dev_info = &swap_backing_dev_info, }; -EXPORT_SYMBOL(swapper_space); #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) diff --git a/mm/swapfile.c b/mm/swapfile.c index 8970c0b74194..edafeace301f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -36,8 +36,6 @@ unsigned int nr_swapfiles; long total_swap_pages; static int swap_overflow; -EXPORT_SYMBOL(total_swap_pages); - static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 54a90e83cb31..729eb3eec75f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -457,7 +457,7 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) * @size: allocation size * @gfp_mask: flags for the page level allocator * @prot: protection mask for the allocated pages - * @node node to use for allocation or -1 + * @node: node to use for allocation or -1 * * Allocate enough pages to cover @size from the page level * allocator with @gfp_mask flags. Map them into contiguous @@ -507,7 +507,7 @@ EXPORT_SYMBOL(vmalloc); * vmalloc_node - allocate memory on a specific node * * @size: allocation size - * @node; numa node + * @node: numa node * * Allocate enough pages to cover @size from the page level * allocator and map them into contiguous kernel virtual space. diff --git a/mm/vmscan.c b/mm/vmscan.c index 135bf8ca96ee..28130541270f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1074,7 +1074,7 @@ loop_again: continue; if (!zone_watermark_ok(zone, order, - zone->pages_high, 0, 0, 0)) { + zone->pages_high, 0, 0)) { end_zone = i; goto scan; } @@ -1111,7 +1111,7 @@ scan: if (nr_pages == 0) { /* Not software suspend */ if (!zone_watermark_ok(zone, order, - zone->pages_high, end_zone, 0, 0)) + zone->pages_high, end_zone, 0)) all_zones_ok = 0; } zone->temp_priority = priority; @@ -1259,7 +1259,7 @@ void wakeup_kswapd(struct zone *zone, int order) return; pgdat = zone->zone_pgdat; - if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0)) + if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0)) return; if (pgdat->kswapd_max_order < order) pgdat->kswapd_max_order = order; |