diff options
author | Mark Brown <broonie@kernel.org> | 2015-11-10 20:01:58 +0100 |
---|---|---|
committer | Mark Brown <broonie@kernel.org> | 2015-11-10 20:01:58 +0100 |
commit | 2a148b6f2726ced30f796435f61d6e915c979784 (patch) | |
tree | 48d233fa5639677b16b10a3442029f3a2e40ef92 /mm | |
parent | ASoC: rsnd: fixup SCU_SYS_INT_EN1 address (diff) | |
parent | Merge remote-tracking branches 'asoc/topic/wm8904', 'asoc/topic/wm8955' and '... (diff) | |
download | linux-2a148b6f2726ced30f796435f61d6e915c979784.tar.xz linux-2a148b6f2726ced30f796435f61d6e915c979784.zip |
Merge tag 'asoc-v4.3-rc6' into asoc-fix-rcar
ASoC: Updates for v4.4
Not much core work here, a few small tweaks to interfaces but mainly the
changes here are driver ones. Highlights include:
- Updates to the topology userspace interface
- Big updates to the Renesas support from Morimoto-san
- Most of the support for Intel Sky Lake systems.
- New drivers for Asahi Kasei Microdevices AK4613, Allwinnner A10,
Cirrus Logic WM8998, Dialog DA7219, Nuvoton NAU8825 and Rockchip
S/PDIF.
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 36 | ||||
-rw-r--r-- | mm/cma.c | 4 | ||||
-rw-r--r-- | mm/dmapool.c | 2 | ||||
-rw-r--r-- | mm/filemap.c | 43 | ||||
-rw-r--r-- | mm/huge_memory.c | 3 | ||||
-rw-r--r-- | mm/hugetlb.c | 8 | ||||
-rw-r--r-- | mm/kasan/kasan.c | 3 | ||||
-rw-r--r-- | mm/memcontrol.c | 67 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/migrate.c | 14 | ||||
-rw-r--r-- | mm/mmap.c | 11 | ||||
-rw-r--r-- | mm/page-writeback.c | 54 | ||||
-rw-r--r-- | mm/readahead.c | 8 | ||||
-rw-r--r-- | mm/slab.c | 13 | ||||
-rw-r--r-- | mm/vmscan.c | 2 | ||||
-rw-r--r-- | mm/vmstat.c | 7 |
16 files changed, 168 insertions, 109 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 2df8ddcb0ca0..619984fc07ec 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -480,6 +480,10 @@ static void cgwb_release_workfn(struct work_struct *work) release_work); struct backing_dev_info *bdi = wb->bdi; + spin_lock_irq(&cgwb_lock); + list_del_rcu(&wb->bdi_node); + spin_unlock_irq(&cgwb_lock); + wb_shutdown(wb); css_put(wb->memcg_css); @@ -575,6 +579,7 @@ static int cgwb_create(struct backing_dev_info *bdi, ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb); if (!ret) { atomic_inc(&bdi->usage_cnt); + list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); list_add(&wb->memcg_node, memcg_cgwb_list); list_add(&wb->blkcg_node, blkcg_cgwb_list); css_get(memcg_css); @@ -676,7 +681,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { struct radix_tree_iter iter; - struct bdi_writeback_congested *congested, *congested_n; + struct rb_node *rbn; void **slot; WARN_ON(test_bit(WB_registered, &bdi->wb.state)); @@ -686,9 +691,11 @@ static void cgwb_bdi_destroy(struct backing_dev_info *bdi) radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) cgwb_kill(*slot); - rbtree_postorder_for_each_entry_safe(congested, congested_n, - &bdi->cgwb_congested_tree, rb_node) { - rb_erase(&congested->rb_node, &bdi->cgwb_congested_tree); + while ((rbn = rb_first(&bdi->cgwb_congested_tree))) { + struct bdi_writeback_congested *congested = + rb_entry(rbn, struct bdi_writeback_congested, rb_node); + + rb_erase(rbn, &bdi->cgwb_congested_tree); congested->bdi = NULL; /* mark @congested unlinked */ } @@ -764,15 +771,22 @@ static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { } int bdi_init(struct backing_dev_info *bdi) { + int ret; + bdi->dev = NULL; bdi->min_ratio = 0; bdi->max_ratio = 100; bdi->max_prop_frac = FPROP_FRAC_BASE; INIT_LIST_HEAD(&bdi->bdi_list); + INIT_LIST_HEAD(&bdi->wb_list); init_waitqueue_head(&bdi->wb_waitq); - return cgwb_bdi_init(bdi); + ret = cgwb_bdi_init(bdi); + + list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list); + + return ret; } EXPORT_SYMBOL(bdi_init); @@ -823,7 +837,7 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi) synchronize_rcu_expedited(); } -void bdi_destroy(struct backing_dev_info *bdi) +void bdi_unregister(struct backing_dev_info *bdi) { /* make sure nobody finds us on the bdi_list anymore */ bdi_remove_from_list(bdi); @@ -835,9 +849,19 @@ void bdi_destroy(struct backing_dev_info *bdi) device_unregister(bdi->dev); bdi->dev = NULL; } +} +void bdi_exit(struct backing_dev_info *bdi) +{ + WARN_ON_ONCE(bdi->dev); wb_exit(&bdi->wb); } + +void bdi_destroy(struct backing_dev_info *bdi) +{ + bdi_unregister(bdi); + bdi_exit(bdi); +} EXPORT_SYMBOL(bdi_destroy); /* @@ -361,7 +361,7 @@ err: * This function allocates part of contiguous memory on specific * contiguous memory area. */ -struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align) +struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align) { unsigned long mask, offset, pfn, start = 0; unsigned long bitmap_maxno, bitmap_no, bitmap_count; @@ -371,7 +371,7 @@ struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align) if (!cma || !cma->count) return NULL; - pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, + pr_debug("%s(cma %p, count %zu, align %d)\n", __func__, (void *)cma, count, align); if (!count) diff --git a/mm/dmapool.c b/mm/dmapool.c index 71a8998cd03a..312a716fa14c 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c @@ -394,7 +394,7 @@ static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma) list_for_each_entry(page, &pool->page_list, page_list) { if (dma < page->dma) continue; - if (dma < (page->dma + pool->allocation)) + if ((dma - page->dma) < pool->allocation) return page; } return NULL; diff --git a/mm/filemap.c b/mm/filemap.c index 72940fb38666..327910c2400c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2473,6 +2473,26 @@ ssize_t generic_perform_write(struct file *file, iov_iter_count(i)); again: + /* + * Bring in the user page that we will copy from _first_. + * Otherwise there's a nasty deadlock on copying from the + * same page as we're writing to, without it being marked + * up-to-date. + * + * Not only is this an optimisation, but it is also required + * to check that the address is actually valid, when atomic + * usercopies are used, below. + */ + if (unlikely(iov_iter_fault_in_readable(i, bytes))) { + status = -EFAULT; + break; + } + + if (fatal_signal_pending(current)) { + status = -EINTR; + break; + } + status = a_ops->write_begin(file, mapping, pos, bytes, flags, &page, &fsdata); if (unlikely(status < 0)) @@ -2480,17 +2500,8 @@ again: if (mapping_writably_mapped(mapping)) flush_dcache_page(page); - /* - * 'page' is now locked. If we are trying to copy from a - * mapping of 'page' in userspace, the copy might fault and - * would need PageUptodate() to complete. But, page can not be - * made Uptodate without acquiring the page lock, which we hold. - * Deadlock. Avoid with pagefault_disable(). Fix up below with - * iov_iter_fault_in_readable(). - */ - pagefault_disable(); + copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); - pagefault_enable(); flush_dcache_page(page); status = a_ops->write_end(file, mapping, pos, bytes, copied, @@ -2513,24 +2524,12 @@ again: */ bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, iov_iter_single_seg_count(i)); - /* - * This is the fallback to recover if the copy from - * userspace above faults. - */ - if (unlikely(iov_iter_fault_in_readable(i, bytes))) { - status = -EFAULT; - break; - } goto again; } pos += copied; written += copied; balance_dirty_pages_ratelimited(mapping); - if (fatal_signal_pending(current)) { - status = -EINTR; - break; - } } while (iov_iter_count(i)); return written ? written : status; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4b06b8db9df2..bbac913f96bc 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2206,7 +2206,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, for (_pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++, address += PAGE_SIZE) { pte_t pteval = *_pte; - if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { + if (pte_none(pteval) || (pte_present(pteval) && + is_zero_pfn(pte_pfn(pteval)))) { if (!userfaultfd_armed(vma) && ++none_or_zero <= khugepaged_max_ptes_none) continue; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 999fb0aef8f1..9cc773483624 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3202,6 +3202,14 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, continue; /* + * Shared VMAs have their own reserves and do not affect + * MAP_PRIVATE accounting but it is possible that a shared + * VMA is using the same page so check and skip such VMAs. + */ + if (iter_vma->vm_flags & VM_MAYSHARE) + continue; + + /* * Unmap the page from other VMAs without their own reserves. * They get marked to be SIGKILLed if they fault in these * areas. This is because a future no-page fault on this VMA diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 7b28e9cdf1c7..8da211411b57 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -135,12 +135,11 @@ static __always_inline bool memory_is_poisoned_16(unsigned long addr) if (unlikely(*shadow_addr)) { u16 shadow_first_bytes = *(u16 *)shadow_addr; - s8 last_byte = (addr + 15) & KASAN_SHADOW_MASK; if (unlikely(shadow_first_bytes)) return true; - if (likely(!last_byte)) + if (likely(IS_ALIGNED(addr, 8))) return false; return memory_is_poisoned_1(addr + 15); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6ddaeba34e09..c57c4423c688 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -644,12 +644,14 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) } /* + * Return page count for single (non recursive) @memcg. + * * Implementation Note: reading percpu statistics for memcg. * * Both of vmstat[] and percpu_counter has threshold and do periodic * synchronization to implement "quick" read. There are trade-off between * reading cost and precision of value. Then, we may have a chance to implement - * a periodic synchronizion of counter in memcg's counter. + * a periodic synchronization of counter in memcg's counter. * * But this _read() function is used for user interface now. The user accounts * memory usage by memory cgroup and he _always_ requires exact value because @@ -659,17 +661,24 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) * * If there are kernel internal actions which can make use of some not-exact * value, and reading all cpu value can be performance bottleneck in some - * common workload, threashold and synchonization as vmstat[] should be + * common workload, threshold and synchronization as vmstat[] should be * implemented. */ -static long mem_cgroup_read_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx) +static unsigned long +mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { long val = 0; int cpu; + /* Per-cpu values can be negative, use a signed accumulator */ for_each_possible_cpu(cpu) val += per_cpu(memcg->stat->count[idx], cpu); + /* + * Summing races with updates, so val may be negative. Avoid exposing + * transient negative values. + */ + if (val < 0) + val = 0; return val; } @@ -1254,7 +1263,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) continue; - pr_cont(" %s:%ldKB", mem_cgroup_stat_names[i], + pr_cont(" %s:%luKB", mem_cgroup_stat_names[i], K(mem_cgroup_read_stat(iter, i))); } @@ -2819,14 +2828,11 @@ static unsigned long tree_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { struct mem_cgroup *iter; - long val = 0; + unsigned long val = 0; - /* Per-cpu values can be negative, use a signed accumulator */ for_each_mem_cgroup_tree(iter, memcg) val += mem_cgroup_read_stat(iter, idx); - if (val < 0) /* race ? */ - val = 0; return val; } @@ -3169,7 +3175,7 @@ static int memcg_stat_show(struct seq_file *m, void *v) for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) continue; - seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i], + seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i], mem_cgroup_read_stat(memcg, i) * PAGE_SIZE); } @@ -3194,13 +3200,13 @@ static int memcg_stat_show(struct seq_file *m, void *v) (u64)memsw * PAGE_SIZE); for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { - long long val = 0; + unsigned long long val = 0; if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) continue; for_each_mem_cgroup_tree(mi, memcg) val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE; - seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val); + seq_printf(m, "total_%s %llu\n", mem_cgroup_stat_names[i], val); } for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { @@ -3381,6 +3387,7 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg, ret = page_counter_memparse(args, "-1", &threshold); if (ret) return ret; + threshold <<= PAGE_SHIFT; mutex_lock(&memcg->thresholds_lock); @@ -3734,44 +3741,43 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) /** * mem_cgroup_wb_stats - retrieve writeback related stats from its memcg * @wb: bdi_writeback in question - * @pavail: out parameter for number of available pages + * @pfilepages: out parameter for number of file pages + * @pheadroom: out parameter for number of allocatable pages according to memcg * @pdirty: out parameter for number of dirty pages * @pwriteback: out parameter for number of pages under writeback * - * Determine the numbers of available, dirty, and writeback pages in @wb's - * memcg. Dirty and writeback are self-explanatory. Available is a bit - * more involved. + * Determine the numbers of file, headroom, dirty, and writeback pages in + * @wb's memcg. File, dirty and writeback are self-explanatory. Headroom + * is a bit more involved. * - * A memcg's headroom is "min(max, high) - used". The available memory is - * calculated as the lowest headroom of itself and the ancestors plus the - * number of pages already being used for file pages. Note that this - * doesn't consider the actual amount of available memory in the system. - * The caller should further cap *@pavail accordingly. + * A memcg's headroom is "min(max, high) - used". In the hierarchy, the + * headroom is calculated as the lowest headroom of itself and the + * ancestors. Note that this doesn't consider the actual amount of + * available memory in the system. The caller should further cap + * *@pheadroom accordingly. */ -void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pavail, - unsigned long *pdirty, unsigned long *pwriteback) +void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, + unsigned long *pheadroom, unsigned long *pdirty, + unsigned long *pwriteback) { struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css); struct mem_cgroup *parent; - unsigned long head_room = PAGE_COUNTER_MAX; - unsigned long file_pages; *pdirty = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_DIRTY); /* this should eventually include NR_UNSTABLE_NFS */ *pwriteback = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_WRITEBACK); + *pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) | + (1 << LRU_ACTIVE_FILE)); + *pheadroom = PAGE_COUNTER_MAX; - file_pages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) | - (1 << LRU_ACTIVE_FILE)); while ((parent = parent_mem_cgroup(memcg))) { unsigned long ceiling = min(memcg->memory.limit, memcg->high); unsigned long used = page_counter_read(&memcg->memory); - head_room = min(head_room, ceiling - min(ceiling, used)); + *pheadroom = min(*pheadroom, ceiling - min(ceiling, used)); memcg = parent; } - - *pavail = file_pages + head_room; } #else /* CONFIG_CGROUP_WRITEBACK */ @@ -4179,7 +4185,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void) if (memcg_wb_domain_init(memcg, GFP_KERNEL)) goto out_free_stat; - spin_lock_init(&memcg->pcp_counter_lock); return memcg; out_free_stat: diff --git a/mm/memory.c b/mm/memory.c index 9cb27470fee9..deb679c31f2a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2426,6 +2426,8 @@ void unmap_mapping_range(struct address_space *mapping, if (details.last_index < details.first_index) details.last_index = ULONG_MAX; + + /* DAX uses i_mmap_lock to serialise file truncate vs page fault */ i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap))) unmap_mapping_range_tree(&mapping->i_mmap, &details); diff --git a/mm/migrate.c b/mm/migrate.c index c3cb566af3e2..842ecd7aaf7f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -740,6 +740,15 @@ static int move_to_new_page(struct page *newpage, struct page *page, if (PageSwapBacked(page)) SetPageSwapBacked(newpage); + /* + * Indirectly called below, migrate_page_copy() copies PG_dirty and thus + * needs newpage's memcg set to transfer memcg dirty page accounting. + * So perform memcg migration in two steps: + * 1. set newpage->mem_cgroup (here) + * 2. clear page->mem_cgroup (below) + */ + set_page_memcg(newpage, page_memcg(page)); + mapping = page_mapping(page); if (!mapping) rc = migrate_page(mapping, newpage, page, mode); @@ -756,9 +765,10 @@ static int move_to_new_page(struct page *newpage, struct page *page, rc = fallback_migrate_page(mapping, newpage, page, mode); if (rc != MIGRATEPAGE_SUCCESS) { + set_page_memcg(newpage, NULL); newpage->mapping = NULL; } else { - mem_cgroup_migrate(page, newpage, false); + set_page_memcg(page, NULL); if (page_was_mapped) remove_migration_ptes(page, newpage); page->mapping = NULL; @@ -1075,7 +1085,7 @@ out: if (rc != MIGRATEPAGE_SUCCESS && put_new_page) put_new_page(new_hpage, private); else - put_page(new_hpage); + putback_active_hugepage(new_hpage); if (result) { if (rc) diff --git a/mm/mmap.c b/mm/mmap.c index 971dd2cb77d2..79bcc9f92e48 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -612,8 +612,6 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm, void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, struct rb_node **rb_link, struct rb_node *rb_parent) { - WARN_ONCE(vma->vm_file && !vma->vm_ops, "missing vma->vm_ops"); - /* Update tracking information for the gap following the new vma. */ if (vma->vm_next) vma_gap_update(vma->vm_next); @@ -1492,13 +1490,14 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) int vma_wants_writenotify(struct vm_area_struct *vma) { vm_flags_t vm_flags = vma->vm_flags; + const struct vm_operations_struct *vm_ops = vma->vm_ops; /* If it was private or non-writable, the write bit is already clear */ if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) return 0; /* The backer wishes to know when pages are first written to? */ - if (vma->vm_ops && vma->vm_ops->page_mkwrite) + if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite)) return 1; /* The open routine did something to the protections that pgprot_modify @@ -1638,12 +1637,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, */ WARN_ON_ONCE(addr != vma->vm_start); - /* All file mapping must have ->vm_ops set */ - if (!vma->vm_ops) { - static const struct vm_operations_struct dummy_ops = {}; - vma->vm_ops = &dummy_ops; - } - addr = vma->vm_start; vm_flags = vma->vm_flags; } else if (vm_flags & VM_SHARED) { diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0a931cdd4f6b..2c90357c34ea 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -145,9 +145,6 @@ struct dirty_throttle_control { unsigned long pos_ratio; }; -#define DTC_INIT_COMMON(__wb) .wb = (__wb), \ - .wb_completions = &(__wb)->completions - /* * Length of period for aging writeout fractions of bdis. This is an * arbitrarily chosen number. The longer the period, the slower fractions will @@ -157,12 +154,16 @@ struct dirty_throttle_control { #ifdef CONFIG_CGROUP_WRITEBACK -#define GDTC_INIT(__wb) .dom = &global_wb_domain, \ - DTC_INIT_COMMON(__wb) +#define GDTC_INIT(__wb) .wb = (__wb), \ + .dom = &global_wb_domain, \ + .wb_completions = &(__wb)->completions + #define GDTC_INIT_NO_WB .dom = &global_wb_domain -#define MDTC_INIT(__wb, __gdtc) .dom = mem_cgroup_wb_domain(__wb), \ - .gdtc = __gdtc, \ - DTC_INIT_COMMON(__wb) + +#define MDTC_INIT(__wb, __gdtc) .wb = (__wb), \ + .dom = mem_cgroup_wb_domain(__wb), \ + .wb_completions = &(__wb)->memcg_completions, \ + .gdtc = __gdtc static bool mdtc_valid(struct dirty_throttle_control *dtc) { @@ -213,7 +214,8 @@ static void wb_min_max_ratio(struct bdi_writeback *wb, #else /* CONFIG_CGROUP_WRITEBACK */ -#define GDTC_INIT(__wb) DTC_INIT_COMMON(__wb) +#define GDTC_INIT(__wb) .wb = (__wb), \ + .wb_completions = &(__wb)->completions #define GDTC_INIT_NO_WB #define MDTC_INIT(__wb, __gdtc) @@ -682,13 +684,19 @@ static unsigned long hard_dirty_limit(struct wb_domain *dom, return max(thresh, dom->dirty_limit); } -/* memory available to a memcg domain is capped by system-wide clean memory */ -static void mdtc_cap_avail(struct dirty_throttle_control *mdtc) +/* + * Memory which can be further allocated to a memcg domain is capped by + * system-wide clean memory excluding the amount being used in the domain. + */ +static void mdtc_calc_avail(struct dirty_throttle_control *mdtc, + unsigned long filepages, unsigned long headroom) { struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc); - unsigned long clean = gdtc->avail - min(gdtc->avail, gdtc->dirty); + unsigned long clean = filepages - min(filepages, mdtc->dirty); + unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty); + unsigned long other_clean = global_clean - min(global_clean, clean); - mdtc->avail = min(mdtc->avail, clean); + mdtc->avail = filepages + min(headroom, other_clean); } /** @@ -1562,16 +1570,16 @@ static void balance_dirty_pages(struct address_space *mapping, } if (mdtc) { - unsigned long writeback; + unsigned long filepages, headroom, writeback; /* * If @wb belongs to !root memcg, repeat the same * basic calculations for the memcg domain. */ - mem_cgroup_wb_stats(wb, &mdtc->avail, &mdtc->dirty, - &writeback); - mdtc_cap_avail(mdtc); + mem_cgroup_wb_stats(wb, &filepages, &headroom, + &mdtc->dirty, &writeback); mdtc->dirty += writeback; + mdtc_calc_avail(mdtc, filepages, headroom); domain_dirty_limits(mdtc); @@ -1893,10 +1901,11 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb) return true; if (mdtc) { - unsigned long writeback; + unsigned long filepages, headroom, writeback; - mem_cgroup_wb_stats(wb, &mdtc->avail, &mdtc->dirty, &writeback); - mdtc_cap_avail(mdtc); + mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty, + &writeback); + mdtc_calc_avail(mdtc, filepages, headroom); domain_dirty_limits(mdtc); /* ditto, ignore writeback */ if (mdtc->dirty > mdtc->bg_thresh) @@ -1956,7 +1965,6 @@ void laptop_mode_timer_fn(unsigned long data) int nr_pages = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS); struct bdi_writeback *wb; - struct wb_iter iter; /* * We want to write everything out, not just down to the dirty @@ -1965,10 +1973,12 @@ void laptop_mode_timer_fn(unsigned long data) if (!bdi_has_dirty_io(&q->backing_dev_info)) return; - bdi_for_each_wb(wb, &q->backing_dev_info, &iter, 0) + rcu_read_lock(); + list_for_each_entry_rcu(wb, &q->backing_dev_info.wb_list, bdi_node) if (wb_has_dirty_io(wb)) wb_start_writeback(wb, nr_pages, true, WB_REASON_LAPTOP_TIMER); + rcu_read_unlock(); } /* diff --git a/mm/readahead.c b/mm/readahead.c index 60cd846a9a44..24682f6f4cfd 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -89,8 +89,8 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, while (!list_empty(pages)) { page = list_to_page(pages); list_del(&page->lru); - if (add_to_page_cache_lru(page, mapping, - page->index, GFP_KERNEL)) { + if (add_to_page_cache_lru(page, mapping, page->index, + GFP_KERNEL & mapping_gfp_mask(mapping))) { read_cache_pages_invalidate_page(mapping, page); continue; } @@ -127,8 +127,8 @@ static int read_pages(struct address_space *mapping, struct file *filp, for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = list_to_page(pages); list_del(&page->lru); - if (!add_to_page_cache_lru(page, mapping, - page->index, GFP_KERNEL)) { + if (!add_to_page_cache_lru(page, mapping, page->index, + GFP_KERNEL & mapping_gfp_mask(mapping))) { mapping->a_ops->readpage(filp, page); } page_cache_release(page); diff --git a/mm/slab.c b/mm/slab.c index c77ebe6cc87c..4fcc5dd8d5a6 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2190,9 +2190,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) size += BYTES_PER_WORD; } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) - if (size >= kmalloc_size(INDEX_NODE + 1) - && cachep->object_size > cache_line_size() - && ALIGN(size, cachep->align) < PAGE_SIZE) { + /* + * To activate debug pagealloc, off-slab management is necessary + * requirement. In early phase of initialization, small sized slab + * doesn't get initialized so it would not be possible. So, we need + * to check size >= 256. It guarantees that all necessary small + * sized slab is initialized in current slab initialization sequence. + */ + if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) && + size >= 256 && cachep->object_size > cache_line_size() && + ALIGN(size, cachep->align) < PAGE_SIZE) { cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align); size = PAGE_SIZE; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 2d978b28a410..7f63a9381f71 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -175,7 +175,7 @@ static bool sane_reclaim(struct scan_control *sc) if (!memcg) return true; #ifdef CONFIG_CGROUP_WRITEBACK - if (memcg->css.cgroup) + if (cgroup_on_dfl(memcg->css.cgroup)) return true; #endif return false; diff --git a/mm/vmstat.c b/mm/vmstat.c index 4f5cd974e11a..fbf14485a049 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1363,15 +1363,16 @@ static cpumask_var_t cpu_stat_off; static void vmstat_update(struct work_struct *w) { - if (refresh_cpu_vm_stats()) + if (refresh_cpu_vm_stats()) { /* * Counters were updated so we expect more updates * to occur in the future. Keep on running the * update worker thread. */ - schedule_delayed_work(this_cpu_ptr(&vmstat_work), + schedule_delayed_work_on(smp_processor_id(), + this_cpu_ptr(&vmstat_work), round_jiffies_relative(sysctl_stat_interval)); - else { + } else { /* * We did not update any counters so the app may be in * a mode where it does not cause counter updates. |