diff options
Diffstat (limited to 'mm/vmstat.c')
-rw-r--r-- | mm/vmstat.c | 169 |
1 files changed, 162 insertions, 7 deletions
diff --git a/mm/vmstat.c b/mm/vmstat.c index 9a4441bbeef2..4bb13e72ac97 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -30,6 +30,8 @@ #include "internal.h" +#define NUMA_STATS_THRESHOLD (U16_MAX - 2) + #ifdef CONFIG_VM_EVENT_COUNTERS DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; EXPORT_PER_CPU_SYMBOL(vm_event_states); @@ -87,8 +89,10 @@ void vm_events_fold_cpu(int cpu) * vm_stat contains the global counters */ atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp; +atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp; atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp; EXPORT_SYMBOL(vm_zone_stat); +EXPORT_SYMBOL(vm_numa_stat); EXPORT_SYMBOL(vm_node_stat); #ifdef CONFIG_SMP @@ -604,6 +608,32 @@ EXPORT_SYMBOL(dec_node_page_state); * Fold a differential into the global counters. * Returns the number of counters updated. */ +#ifdef CONFIG_NUMA +static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff) +{ + int i; + int changes = 0; + + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) + if (zone_diff[i]) { + atomic_long_add(zone_diff[i], &vm_zone_stat[i]); + changes++; + } + + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) + if (numa_diff[i]) { + atomic_long_add(numa_diff[i], &vm_numa_stat[i]); + changes++; + } + + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + if (node_diff[i]) { + atomic_long_add(node_diff[i], &vm_node_stat[i]); + changes++; + } + return changes; +} +#else static int fold_diff(int *zone_diff, int *node_diff) { int i; @@ -622,6 +652,7 @@ static int fold_diff(int *zone_diff, int *node_diff) } return changes; } +#endif /* CONFIG_NUMA */ /* * Update the zone counters for the current cpu. @@ -645,6 +676,9 @@ static int refresh_cpu_vm_stats(bool do_pagesets) struct zone *zone; int i; int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; +#ifdef CONFIG_NUMA + int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, }; +#endif int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; int changes = 0; @@ -666,6 +700,18 @@ static int refresh_cpu_vm_stats(bool do_pagesets) } } #ifdef CONFIG_NUMA + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) { + int v; + + v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0); + if (v) { + + atomic_long_add(v, &zone->vm_numa_stat[i]); + global_numa_diff[i] += v; + __this_cpu_write(p->expire, 3); + } + } + if (do_pagesets) { cond_resched(); /* @@ -712,7 +758,12 @@ static int refresh_cpu_vm_stats(bool do_pagesets) } } +#ifdef CONFIG_NUMA + changes += fold_diff(global_zone_diff, global_numa_diff, + global_node_diff); +#else changes += fold_diff(global_zone_diff, global_node_diff); +#endif return changes; } @@ -727,6 +778,9 @@ void cpu_vm_stats_fold(int cpu) struct zone *zone; int i; int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; +#ifdef CONFIG_NUMA + int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, }; +#endif int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; for_each_populated_zone(zone) { @@ -743,6 +797,18 @@ void cpu_vm_stats_fold(int cpu) atomic_long_add(v, &zone->vm_stat[i]); global_zone_diff[i] += v; } + +#ifdef CONFIG_NUMA + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) + if (p->vm_numa_stat_diff[i]) { + int v; + + v = p->vm_numa_stat_diff[i]; + p->vm_numa_stat_diff[i] = 0; + atomic_long_add(v, &zone->vm_numa_stat[i]); + global_numa_diff[i] += v; + } +#endif } for_each_online_pgdat(pgdat) { @@ -761,7 +827,11 @@ void cpu_vm_stats_fold(int cpu) } } +#ifdef CONFIG_NUMA + fold_diff(global_zone_diff, global_numa_diff, global_node_diff); +#else fold_diff(global_zone_diff, global_node_diff); +#endif } /* @@ -779,10 +849,36 @@ void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset) atomic_long_add(v, &zone->vm_stat[i]); atomic_long_add(v, &vm_zone_stat[i]); } + +#ifdef CONFIG_NUMA + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) + if (pset->vm_numa_stat_diff[i]) { + int v = pset->vm_numa_stat_diff[i]; + + pset->vm_numa_stat_diff[i] = 0; + atomic_long_add(v, &zone->vm_numa_stat[i]); + atomic_long_add(v, &vm_numa_stat[i]); + } +#endif } #endif #ifdef CONFIG_NUMA +void __inc_numa_state(struct zone *zone, + enum numa_stat_item item) +{ + struct per_cpu_pageset __percpu *pcp = zone->pageset; + u16 __percpu *p = pcp->vm_numa_stat_diff + item; + u16 v; + + v = __this_cpu_inc_return(*p); + + if (unlikely(v > NUMA_STATS_THRESHOLD)) { + zone_numa_state_add(v, zone, item); + __this_cpu_write(*p, 0); + } +} + /* * Determine the per node value of a stat item. This function * is called frequently in a NUMA machine, so try to be as @@ -802,6 +898,23 @@ unsigned long sum_zone_node_page_state(int node, } /* + * Determine the per node value of a numa stat item. To avoid deviation, + * the per cpu stat number in vm_numa_stat_diff[] is also included. + */ +unsigned long sum_zone_numa_state(int node, + enum numa_stat_item item) +{ + struct zone *zones = NODE_DATA(node)->node_zones; + int i; + unsigned long count = 0; + + for (i = 0; i < MAX_NR_ZONES; i++) + count += zone_numa_state_snapshot(zones + i, item); + + return count; +} + +/* * Determine the per node value of a stat item. */ unsigned long node_page_state(struct pglist_data *pgdat, @@ -870,6 +983,9 @@ static int __fragmentation_index(unsigned int order, struct contig_page_info *in { unsigned long requested = 1UL << order; + if (WARN_ON_ONCE(order >= MAX_ORDER)) + return 0; + if (!info->free_blocks_total) return 0; @@ -934,6 +1050,9 @@ const char * const vmstat_text[] = { #if IS_ENABLED(CONFIG_ZSMALLOC) "nr_zspages", #endif + "nr_free_cma", + + /* enum numa_stat_item counters */ #ifdef CONFIG_NUMA "numa_hit", "numa_miss", @@ -942,7 +1061,6 @@ const char * const vmstat_text[] = { "numa_local", "numa_other", #endif - "nr_free_cma", /* Node-based counters */ "nr_inactive_anon", @@ -1071,6 +1189,8 @@ const char * const vmstat_text[] = { #endif "thp_zero_page_alloc", "thp_zero_page_alloc_failed", + "thp_swpout", + "thp_swpout_fallback", #endif #ifdef CONFIG_MEMORY_BALLOON "balloon_inflate", @@ -1093,11 +1213,14 @@ const char * const vmstat_text[] = { "vmacache_find_hits", "vmacache_full_flushes", #endif +#ifdef CONFIG_SWAP + "swap_ra", + "swap_ra_hit", +#endif #endif /* CONFIG_VM_EVENTS_COUNTERS */ }; #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */ - #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \ defined(CONFIG_PROC_FS) static void *frag_start(struct seq_file *m, loff_t *pos) @@ -1250,7 +1373,7 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m, seq_putc(m, '\n'); } -/* Print out the free pages at each order for each migratetype */ +/* Print out the number of pageblocks for each migratetype */ static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg) { int mtype; @@ -1375,7 +1498,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, seq_printf(m, "\n per-node stats"); for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { seq_printf(m, "\n %-12s %lu", - vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], + vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + + NR_VM_NUMA_STAT_ITEMS], node_page_state(pgdat, i)); } } @@ -1412,6 +1536,13 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, seq_printf(m, "\n %-12s %lu", vmstat_text[i], zone_page_state(zone, i)); +#ifdef CONFIG_NUMA + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) + seq_printf(m, "\n %-12s %lu", + vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], + zone_numa_state_snapshot(zone, i)); +#endif + seq_printf(m, "\n pagesets"); for_each_online_cpu(i) { struct per_cpu_pageset *pageset; @@ -1488,6 +1619,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) if (*pos >= ARRAY_SIZE(vmstat_text)) return NULL; stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) + + NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) + NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) + NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long); @@ -1500,9 +1632,15 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) if (!v) return ERR_PTR(-ENOMEM); for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) - v[i] = global_page_state(i); + v[i] = global_zone_page_state(i); v += NR_VM_ZONE_STAT_ITEMS; +#ifdef CONFIG_NUMA + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) + v[i] = global_numa_state(i); + v += NR_VM_NUMA_STAT_ITEMS; +#endif + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) v[i] = global_node_page_state(i); v += NR_VM_NODE_STAT_ITEMS; @@ -1589,7 +1727,7 @@ int vmstat_refresh(struct ctl_table *table, int write, * which can equally be echo'ed to or cat'ted from (by root), * can be used to update the stats just before reading them. * - * Oh, and since global_page_state() etc. are so careful to hide + * Oh, and since global_zone_page_state() etc. are so careful to hide * transiently negative values, report an error here if any of * the stats is negative, so we know to go looking for imbalance. */ @@ -1604,6 +1742,16 @@ int vmstat_refresh(struct ctl_table *table, int write, err = -EINVAL; } } +#ifdef CONFIG_NUMA + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) { + val = atomic_long_read(&vm_numa_stat[i]); + if (val < 0) { + pr_warn("%s: %s %ld\n", + __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val); + err = -EINVAL; + } + } +#endif if (err) return err; if (write) @@ -1645,13 +1793,20 @@ static bool need_update(int cpu) struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu); BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1); +#ifdef CONFIG_NUMA + BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2); +#endif + /* * The fast way of checking if there are any vmstat diffs. * This works because the diffs are byte sized items. */ if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS)) return true; - +#ifdef CONFIG_NUMA + if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS)) + return true; +#endif } return false; } |