diff options
Diffstat (limited to 'kernel/sched/debug.c')
-rw-r--r-- | kernel/sched/debug.c | 435 |
1 files changed, 229 insertions, 206 deletions
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 486f403a778b..9c882f20803e 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -8,8 +8,6 @@ */ #include "sched.h" -static DEFINE_SPINLOCK(sched_debug_lock); - /* * This allows printing both to /proc/sched_debug and * to the console @@ -169,245 +167,258 @@ static const struct file_operations sched_feat_fops = { .release = single_release, }; -__read_mostly bool sched_debug_enabled; +#ifdef CONFIG_SMP -static __init int sched_init_debug(void) +static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) { - debugfs_create_file("sched_features", 0644, NULL, NULL, - &sched_feat_fops); + char buf[16]; - debugfs_create_bool("sched_debug", 0644, NULL, - &sched_debug_enabled); + if (cnt > 15) + cnt = 15; - return 0; + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + if (kstrtouint(buf, 10, &sysctl_sched_tunable_scaling)) + return -EINVAL; + + if (sched_update_scaling()) + return -EINVAL; + + *ppos += cnt; + return cnt; } -late_initcall(sched_init_debug); -#ifdef CONFIG_SMP +static int sched_scaling_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", sysctl_sched_tunable_scaling); + return 0; +} -#ifdef CONFIG_SYSCTL +static int sched_scaling_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_scaling_show, NULL); +} -static struct ctl_table sd_ctl_dir[] = { - { - .procname = "sched_domain", - .mode = 0555, - }, - {} +static const struct file_operations sched_scaling_fops = { + .open = sched_scaling_open, + .write = sched_scaling_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, }; -static struct ctl_table sd_ctl_root[] = { - { - .procname = "kernel", - .mode = 0555, - .child = sd_ctl_dir, - }, - {} -}; +#endif /* SMP */ -static struct ctl_table *sd_alloc_ctl_entry(int n) +#ifdef CONFIG_PREEMPT_DYNAMIC + +static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) { - struct ctl_table *entry = - kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL); + char buf[16]; + int mode; + + if (cnt > 15) + cnt = 15; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; - return entry; + buf[cnt] = 0; + mode = sched_dynamic_mode(strstrip(buf)); + if (mode < 0) + return mode; + + sched_dynamic_update(mode); + + *ppos += cnt; + + return cnt; } -static void sd_free_ctl_entry(struct ctl_table **tablep) +static int sched_dynamic_show(struct seq_file *m, void *v) { - struct ctl_table *entry; - - /* - * In the intermediate directories, both the child directory and - * procname are dynamically allocated and could fail but the mode - * will always be set. In the lowest directory the names are - * static strings and all have proc handlers. - */ - for (entry = *tablep; entry->mode; entry++) { - if (entry->child) - sd_free_ctl_entry(&entry->child); - if (entry->proc_handler == NULL) - kfree(entry->procname); + static const char * preempt_modes[] = { + "none", "voluntary", "full" + }; + int i; + + for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) { + if (preempt_dynamic_mode == i) + seq_puts(m, "("); + seq_puts(m, preempt_modes[i]); + if (preempt_dynamic_mode == i) + seq_puts(m, ")"); + + seq_puts(m, " "); } - kfree(*tablep); - *tablep = NULL; + seq_puts(m, "\n"); + return 0; } -static void -set_table_entry(struct ctl_table *entry, - const char *procname, void *data, int maxlen, - umode_t mode, proc_handler *proc_handler) +static int sched_dynamic_open(struct inode *inode, struct file *filp) { - entry->procname = procname; - entry->data = data; - entry->maxlen = maxlen; - entry->mode = mode; - entry->proc_handler = proc_handler; + return single_open(filp, sched_dynamic_show, NULL); } -static int sd_ctl_doflags(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) +static const struct file_operations sched_dynamic_fops = { + .open = sched_dynamic_open, + .write = sched_dynamic_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif /* CONFIG_PREEMPT_DYNAMIC */ + +__read_mostly bool sched_debug_verbose; + +static const struct seq_operations sched_debug_sops; + +static int sched_debug_open(struct inode *inode, struct file *filp) { - unsigned long flags = *(unsigned long *)table->data; - size_t data_size = 0; - size_t len = 0; - char *tmp, *buf; - int idx; + return seq_open(filp, &sched_debug_sops); +} - if (write) - return 0; +static const struct file_operations sched_debug_fops = { + .open = sched_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; - for_each_set_bit(idx, &flags, __SD_FLAG_CNT) { - char *name = sd_flag_debug[idx].name; +static struct dentry *debugfs_sched; - /* Name plus whitespace */ - data_size += strlen(name) + 1; - } +static __init int sched_init_debug(void) +{ + struct dentry __maybe_unused *numa; - if (*ppos > data_size) { - *lenp = 0; - return 0; - } + debugfs_sched = debugfs_create_dir("sched", NULL); - buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL); - if (!buf) - return -ENOMEM; + debugfs_create_file("features", 0644, debugfs_sched, NULL, &sched_feat_fops); + debugfs_create_bool("verbose", 0644, debugfs_sched, &sched_debug_verbose); +#ifdef CONFIG_PREEMPT_DYNAMIC + debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops); +#endif - for_each_set_bit(idx, &flags, __SD_FLAG_CNT) { - char *name = sd_flag_debug[idx].name; + debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency); + debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity); + debugfs_create_u32("wakeup_granularity_ns", 0644, debugfs_sched, &sysctl_sched_wakeup_granularity); - len += snprintf(buf + len, strlen(name) + 2, "%s ", name); - } + debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms); + debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once); - tmp = buf + *ppos; - len -= *ppos; +#ifdef CONFIG_SMP + debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops); + debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost); + debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate); - if (len > *lenp) - len = *lenp; - if (len) - memcpy(buffer, tmp, len); - if (len < *lenp) { - ((char *)buffer)[len] = '\n'; - len++; - } + mutex_lock(&sched_domains_mutex); + update_sched_domain_debugfs(); + mutex_unlock(&sched_domains_mutex); +#endif - *lenp = len; - *ppos += len; +#ifdef CONFIG_NUMA_BALANCING + numa = debugfs_create_dir("numa_balancing", debugfs_sched); - kfree(buf); + debugfs_create_u32("scan_delay_ms", 0644, numa, &sysctl_numa_balancing_scan_delay); + debugfs_create_u32("scan_period_min_ms", 0644, numa, &sysctl_numa_balancing_scan_period_min); + debugfs_create_u32("scan_period_max_ms", 0644, numa, &sysctl_numa_balancing_scan_period_max); + debugfs_create_u32("scan_size_mb", 0644, numa, &sysctl_numa_balancing_scan_size); +#endif + + debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops); return 0; } +late_initcall(sched_init_debug); + +#ifdef CONFIG_SMP -static struct ctl_table * -sd_alloc_ctl_domain_table(struct sched_domain *sd) +static cpumask_var_t sd_sysctl_cpus; +static struct dentry *sd_dentry; + +static int sd_flags_show(struct seq_file *m, void *v) { - struct ctl_table *table = sd_alloc_ctl_entry(9); - - if (table == NULL) - return NULL; - - set_table_entry(&table[0], "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax); - set_table_entry(&table[1], "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax); - set_table_entry(&table[2], "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[3], "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[4], "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[5], "flags", &sd->flags, sizeof(int), 0444, sd_ctl_doflags); - set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax); - set_table_entry(&table[7], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring); - /* &table[8] is terminator */ - - return table; + unsigned long flags = *(unsigned int *)m->private; + int idx; + + for_each_set_bit(idx, &flags, __SD_FLAG_CNT) { + seq_puts(m, sd_flag_debug[idx].name); + seq_puts(m, " "); + } + seq_puts(m, "\n"); + + return 0; } -static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu) +static int sd_flags_open(struct inode *inode, struct file *file) { - struct ctl_table *entry, *table; - struct sched_domain *sd; - int domain_num = 0, i; - char buf[32]; - - for_each_domain(cpu, sd) - domain_num++; - entry = table = sd_alloc_ctl_entry(domain_num + 1); - if (table == NULL) - return NULL; - - i = 0; - for_each_domain(cpu, sd) { - snprintf(buf, 32, "domain%d", i); - entry->procname = kstrdup(buf, GFP_KERNEL); - entry->mode = 0555; - entry->child = sd_alloc_ctl_domain_table(sd); - entry++; - i++; - } - return table; + return single_open(file, sd_flags_show, inode->i_private); } -static cpumask_var_t sd_sysctl_cpus; -static struct ctl_table_header *sd_sysctl_header; +static const struct file_operations sd_flags_fops = { + .open = sd_flags_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; -void register_sched_domain_sysctl(void) +static void register_sd(struct sched_domain *sd, struct dentry *parent) { - static struct ctl_table *cpu_entries; - static struct ctl_table **cpu_idx; - static bool init_done = false; - char buf[32]; - int i; +#define SDM(type, mode, member) \ + debugfs_create_##type(#member, mode, parent, &sd->member) - if (!cpu_entries) { - cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1); - if (!cpu_entries) - return; + SDM(ulong, 0644, min_interval); + SDM(ulong, 0644, max_interval); + SDM(u64, 0644, max_newidle_lb_cost); + SDM(u32, 0644, busy_factor); + SDM(u32, 0644, imbalance_pct); + SDM(u32, 0644, cache_nice_tries); + SDM(str, 0444, name); - WARN_ON(sd_ctl_dir[0].child); - sd_ctl_dir[0].child = cpu_entries; - } +#undef SDM - if (!cpu_idx) { - struct ctl_table *e = cpu_entries; - - cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL); - if (!cpu_idx) - return; + debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops); +} - /* deal with sparse possible map */ - for_each_possible_cpu(i) { - cpu_idx[i] = e; - e++; - } - } +void update_sched_domain_debugfs(void) +{ + int cpu, i; if (!cpumask_available(sd_sysctl_cpus)) { if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL)) return; - } - - if (!init_done) { - init_done = true; - /* init to possible to not have holes in @cpu_entries */ cpumask_copy(sd_sysctl_cpus, cpu_possible_mask); } - for_each_cpu(i, sd_sysctl_cpus) { - struct ctl_table *e = cpu_idx[i]; + if (!sd_dentry) + sd_dentry = debugfs_create_dir("domains", debugfs_sched); - if (e->child) - sd_free_ctl_entry(&e->child); + for_each_cpu(cpu, sd_sysctl_cpus) { + struct sched_domain *sd; + struct dentry *d_cpu; + char buf[32]; - if (!e->procname) { - snprintf(buf, 32, "cpu%d", i); - e->procname = kstrdup(buf, GFP_KERNEL); + snprintf(buf, sizeof(buf), "cpu%d", cpu); + debugfs_remove(debugfs_lookup(buf, sd_dentry)); + d_cpu = debugfs_create_dir(buf, sd_dentry); + + i = 0; + for_each_domain(cpu, sd) { + struct dentry *d_sd; + + snprintf(buf, sizeof(buf), "domain%d", i); + d_sd = debugfs_create_dir(buf, d_cpu); + + register_sd(sd, d_sd); + i++; } - e->mode = 0555; - e->child = sd_alloc_ctl_cpu_table(i); - __cpumask_clear_cpu(i, sd_sysctl_cpus); + __cpumask_clear_cpu(cpu, sd_sysctl_cpus); } - - WARN_ON(sd_sysctl_header); - sd_sysctl_header = register_sysctl_table(sd_ctl_root); } void dirty_sched_domain_sysctl(int cpu) @@ -416,13 +427,6 @@ void dirty_sched_domain_sysctl(int cpu) __cpumask_set_cpu(cpu, sd_sysctl_cpus); } -/* may be called multiple times per register */ -void unregister_sched_domain_sysctl(void) -{ - unregister_sysctl_table(sd_sysctl_header); - sd_sysctl_header = NULL; -} -#endif /* CONFIG_SYSCTL */ #endif /* CONFIG_SMP */ #ifdef CONFIG_FAIR_GROUP_SCHED @@ -470,16 +474,37 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group #endif #ifdef CONFIG_CGROUP_SCHED +static DEFINE_SPINLOCK(sched_debug_lock); static char group_path[PATH_MAX]; -static char *task_group_path(struct task_group *tg) +static void task_group_path(struct task_group *tg, char *path, int plen) { - if (autogroup_path(tg, group_path, PATH_MAX)) - return group_path; + if (autogroup_path(tg, path, plen)) + return; - cgroup_path(tg->css.cgroup, group_path, PATH_MAX); + cgroup_path(tg->css.cgroup, path, plen); +} - return group_path; +/* + * Only 1 SEQ_printf_task_group_path() caller can use the full length + * group_path[] for cgroup path. Other simultaneous callers will have + * to use a shorter stack buffer. A "..." suffix is appended at the end + * of the stack buffer so that it will show up in case the output length + * matches the given buffer size to indicate possible path name truncation. + */ +#define SEQ_printf_task_group_path(m, tg, fmt...) \ +{ \ + if (spin_trylock(&sched_debug_lock)) { \ + task_group_path(tg, group_path, sizeof(group_path)); \ + SEQ_printf(m, fmt, group_path); \ + spin_unlock(&sched_debug_lock); \ + } else { \ + char buf[128]; \ + char *bufend = buf + sizeof(buf) - 3; \ + task_group_path(tg, buf, bufend - buf); \ + strcpy(bufend - 1, "..."); \ + SEQ_printf(m, fmt, buf); \ + } \ } #endif @@ -506,7 +531,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); #endif #ifdef CONFIG_CGROUP_SCHED - SEQ_printf(m, " %s", task_group_path(task_group(p))); + SEQ_printf_task_group_path(m, task_group(p), " %s") #endif SEQ_printf(m, "\n"); @@ -543,7 +568,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #ifdef CONFIG_FAIR_GROUP_SCHED SEQ_printf(m, "\n"); - SEQ_printf(m, "cfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg)); + SEQ_printf_task_group_path(m, cfs_rq->tg, "cfs_rq[%d]:%s\n", cpu); #else SEQ_printf(m, "\n"); SEQ_printf(m, "cfs_rq[%d]:\n", cpu); @@ -614,7 +639,7 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) { #ifdef CONFIG_RT_GROUP_SCHED SEQ_printf(m, "\n"); - SEQ_printf(m, "rt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg)); + SEQ_printf_task_group_path(m, rt_rq->tg, "rt_rq[%d]:%s\n", cpu); #else SEQ_printf(m, "\n"); SEQ_printf(m, "rt_rq[%d]:\n", cpu); @@ -666,7 +691,6 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq) static void print_cpu(struct seq_file *m, int cpu) { struct rq *rq = cpu_rq(cpu); - unsigned long flags; #ifdef CONFIG_X86 { @@ -717,13 +741,11 @@ do { \ } #undef P - spin_lock_irqsave(&sched_debug_lock, flags); print_cfs_stats(m, cpu); print_rt_stats(m, cpu); print_dl_stats(m, cpu); print_rq(m, rq, cpu); - spin_unlock_irqrestore(&sched_debug_lock, flags); SEQ_printf(m, "\n"); } @@ -815,7 +837,7 @@ void sysrq_sched_debug_show(void) } /* - * This itererator needs some explanation. + * This iterator needs some explanation. * It returns 1 for the header position. * This means 2 is CPU 0. * In a hotplugged system some CPUs, including CPU 0, may be missing so we have @@ -860,15 +882,6 @@ static const struct seq_operations sched_debug_sops = { .show = sched_debug_show, }; -static int __init init_sched_debug_procfs(void) -{ - if (!proc_create_seq("sched_debug", 0444, NULL, &sched_debug_sops)) - return -ENOMEM; - return 0; -} - -__initcall(init_sched_debug_procfs); - #define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F)) #define __P(F) __PS(#F, F) #define P(F) __PS(#F, p->F) @@ -1033,3 +1046,13 @@ void proc_sched_set_task(struct task_struct *p) memset(&p->se.statistics, 0, sizeof(p->se.statistics)); #endif } + +void resched_latency_warn(int cpu, u64 latency) +{ + static DEFINE_RATELIMIT_STATE(latency_check_ratelimit, 60 * 60 * HZ, 1); + + WARN(__ratelimit(&latency_check_ratelimit), + "sched: CPU %d need_resched set for > %llu ns (%d ticks) " + "without schedule\n", + cpu, latency, cpu_rq(cpu)->ticks_without_resched); +} |