summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-12 21:15:10 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-12 21:15:10 +0100
commit92c020d08d83673ecd15a9069d4457378668da31 (patch)
tree3dbc5a9c1ab179f55be49e30e378cc4e650fc20e /arch/x86/kernel
parentMerge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kern... (diff)
parentsched/core: Use load_avg for selecting idlest group (diff)
downloadlinux-92c020d08d83673ecd15a9069d4457378668da31.tar.xz
linux-92c020d08d83673ecd15a9069d4457378668da31.zip
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "The main scheduler changes in this cycle were: - support Intel Turbo Boost Max Technology 3.0 (TBM3) by introducig a notion of 'better cores', which the scheduler will prefer to schedule single threaded workloads on. (Tim Chen, Srinivas Pandruvada) - enhance the handling of asymmetric capacity CPUs further (Morten Rasmussen) - improve/fix load handling when moving tasks between task groups (Vincent Guittot) - simplify and clean up the cputime code (Stanislaw Gruszka) - improve mass fork()ed task spread a.k.a. hackbench speedup (Vincent Guittot) - make struct kthread kmalloc()ed and related fixes (Oleg Nesterov) - add uaccess atomicity debugging (when using access_ok() in the wrong context), under CONFIG_DEBUG_ATOMIC_SLEEP=y (Peter Zijlstra) - implement various fixes, cleanups and other enhancements (Daniel Bristot de Oliveira, Martin Schwidefsky, Rafael J. Wysocki)" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (41 commits) sched/core: Use load_avg for selecting idlest group sched/core: Fix find_idlest_group() for fork kthread: Don't abuse kthread_create_on_cpu() in __kthread_create_worker() kthread: Don't use to_live_kthread() in kthread_[un]park() kthread: Don't use to_live_kthread() in kthread_stop() Revert "kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function" kthread: Make struct kthread kmalloc'ed x86/uaccess, sched/preempt: Verify access_ok() context sched/x86: Make CONFIG_SCHED_MC_PRIO=y easier to enable sched/x86: Change CONFIG_SCHED_ITMT to CONFIG_SCHED_MC_PRIO x86/sched: Use #include <linux/mutex.h> instead of #include <asm/mutex.h> cpufreq/intel_pstate: Use CPPC to get max performance acpi/bus: Set _OSC for diverse core support acpi/bus: Enable HWP CPPC objects x86/sched: Add SD_ASYM_PACKING flags to x86 ITMT CPU x86/sysctl: Add sysctl for ITMT scheduling feature x86: Enable Intel Turbo Boost Max Technology 3.0 x86/topology: Define x86's arch_update_cpu_topology sched: Extend scheduler's asym packing sched/fair: Clean up the tunable parameter definitions ...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/itmt.c215
-rw-r--r--arch/x86/kernel/smpboot.c39
4 files changed, 253 insertions, 6 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 79076d75bdbf..05110c1097ae 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -123,6 +123,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o
+obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
ifdef CONFIG_FRAME_POINTER
obj-y += unwind_frame.o
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 51287cd90bf6..643818a7688b 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -906,14 +906,14 @@ static int apm_cpu_idle(struct cpuidle_device *dev,
static int use_apm_idle; /* = 0 */
static unsigned int last_jiffies; /* = 0 */
static unsigned int last_stime; /* = 0 */
- cputime_t stime;
+ cputime_t stime, utime;
int apm_idle_done = 0;
unsigned int jiffies_since_last_check = jiffies - last_jiffies;
unsigned int bucket;
recalc:
- task_cputime(current, NULL, &stime);
+ task_cputime(current, &utime, &stime);
if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
use_apm_idle = 0;
} else if (jiffies_since_last_check > idle_period) {
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
new file mode 100644
index 000000000000..cb9c1ed1d391
--- /dev/null
+++ b/arch/x86/kernel/itmt.c
@@ -0,0 +1,215 @@
+/*
+ * itmt.c: Support Intel Turbo Boost Max Technology 3.0
+ *
+ * (C) Copyright 2016 Intel Corporation
+ * Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
+ * the maximum turbo frequencies of some cores in a CPU package may be
+ * higher than for the other cores in the same package. In that case,
+ * better performance can be achieved by making the scheduler prefer
+ * to run tasks on the CPUs with higher max turbo frequencies.
+ *
+ * This file provides functions and data structures for enabling the
+ * scheduler to favor scheduling on cores can be boosted to a higher
+ * frequency under ITMT.
+ */
+
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/cpuset.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/sysctl.h>
+#include <linux/nodemask.h>
+
+static DEFINE_MUTEX(itmt_update_mutex);
+DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
+
+/* Boolean to track if system has ITMT capabilities */
+static bool __read_mostly sched_itmt_capable;
+
+/*
+ * Boolean to control whether we want to move processes to cpu capable
+ * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
+ * Technology 3.0.
+ *
+ * It can be set via /proc/sys/kernel/sched_itmt_enabled
+ */
+unsigned int __read_mostly sysctl_sched_itmt_enabled;
+
+static int sched_itmt_update_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ unsigned int old_sysctl;
+ int ret;
+
+ mutex_lock(&itmt_update_mutex);
+
+ if (!sched_itmt_capable) {
+ mutex_unlock(&itmt_update_mutex);
+ return -EINVAL;
+ }
+
+ old_sysctl = sysctl_sched_itmt_enabled;
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
+ x86_topology_update = true;
+ rebuild_sched_domains();
+ }
+
+ mutex_unlock(&itmt_update_mutex);
+
+ return ret;
+}
+
+static unsigned int zero;
+static unsigned int one = 1;
+static struct ctl_table itmt_kern_table[] = {
+ {
+ .procname = "sched_itmt_enabled",
+ .data = &sysctl_sched_itmt_enabled,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_itmt_update_handler,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {}
+};
+
+static struct ctl_table itmt_root_table[] = {
+ {
+ .procname = "kernel",
+ .mode = 0555,
+ .child = itmt_kern_table,
+ },
+ {}
+};
+
+static struct ctl_table_header *itmt_sysctl_header;
+
+/**
+ * sched_set_itmt_support() - Indicate platform supports ITMT
+ *
+ * This function is used by the OS to indicate to scheduler that the platform
+ * is capable of supporting the ITMT feature.
+ *
+ * The current scheme has the pstate driver detects if the system
+ * is ITMT capable and call sched_set_itmt_support.
+ *
+ * This must be done only after sched_set_itmt_core_prio
+ * has been called to set the cpus' priorities.
+ * It must not be called with cpu hot plug lock
+ * held as we need to acquire the lock to rebuild sched domains
+ * later.
+ *
+ * Return: 0 on success
+ */
+int sched_set_itmt_support(void)
+{
+ mutex_lock(&itmt_update_mutex);
+
+ if (sched_itmt_capable) {
+ mutex_unlock(&itmt_update_mutex);
+ return 0;
+ }
+
+ itmt_sysctl_header = register_sysctl_table(itmt_root_table);
+ if (!itmt_sysctl_header) {
+ mutex_unlock(&itmt_update_mutex);
+ return -ENOMEM;
+ }
+
+ sched_itmt_capable = true;
+
+ sysctl_sched_itmt_enabled = 1;
+
+ if (sysctl_sched_itmt_enabled) {
+ x86_topology_update = true;
+ rebuild_sched_domains();
+ }
+
+ mutex_unlock(&itmt_update_mutex);
+
+ return 0;
+}
+
+/**
+ * sched_clear_itmt_support() - Revoke platform's support of ITMT
+ *
+ * This function is used by the OS to indicate that it has
+ * revoked the platform's support of ITMT feature.
+ *
+ * It must not be called with cpu hot plug lock
+ * held as we need to acquire the lock to rebuild sched domains
+ * later.
+ */
+void sched_clear_itmt_support(void)
+{
+ mutex_lock(&itmt_update_mutex);
+
+ if (!sched_itmt_capable) {
+ mutex_unlock(&itmt_update_mutex);
+ return;
+ }
+ sched_itmt_capable = false;
+
+ if (itmt_sysctl_header) {
+ unregister_sysctl_table(itmt_sysctl_header);
+ itmt_sysctl_header = NULL;
+ }
+
+ if (sysctl_sched_itmt_enabled) {
+ /* disable sched_itmt if we are no longer ITMT capable */
+ sysctl_sched_itmt_enabled = 0;
+ x86_topology_update = true;
+ rebuild_sched_domains();
+ }
+
+ mutex_unlock(&itmt_update_mutex);
+}
+
+int arch_asym_cpu_priority(int cpu)
+{
+ return per_cpu(sched_core_priority, cpu);
+}
+
+/**
+ * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
+ * @prio: Priority of cpu core
+ * @core_cpu: The cpu number associated with the core
+ *
+ * The pstate driver will find out the max boost frequency
+ * and call this function to set a priority proportional
+ * to the max boost frequency. CPU with higher boost
+ * frequency will receive higher priority.
+ *
+ * No need to rebuild sched domain after updating
+ * the CPU priorities. The sched domains have no
+ * dependency on CPU priorities.
+ */
+void sched_set_itmt_core_prio(int prio, int core_cpu)
+{
+ int cpu, i = 1;
+
+ for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
+ int smt_prio;
+
+ /*
+ * Ensure that the siblings are moved to the end
+ * of the priority chain and only used when
+ * all other high priority cpus are out of capacity.
+ */
+ smt_prio = prio * smp_num_siblings / i;
+ per_cpu(sched_core_priority, cpu) = smt_prio;
+ i++;
+ }
+}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index b9f02383f372..118e792a7be6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -109,6 +109,17 @@ static bool logical_packages_frozen __read_mostly;
/* Maximum number of SMT threads on any online core */
int __max_smt_threads __read_mostly;
+/* Flag to indicate if a complete sched domain rebuild is required */
+bool x86_topology_update;
+
+int arch_update_cpu_topology(void)
+{
+ int retval = x86_topology_update;
+
+ x86_topology_update = false;
+ return retval;
+}
+
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
@@ -471,22 +482,42 @@ static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
+#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
+static inline int x86_sched_itmt_flags(void)
+{
+ return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
+}
+
+#ifdef CONFIG_SCHED_MC
+static int x86_core_flags(void)
+{
+ return cpu_core_flags() | x86_sched_itmt_flags();
+}
+#endif
+#ifdef CONFIG_SCHED_SMT
+static int x86_smt_flags(void)
+{
+ return cpu_smt_flags() | x86_sched_itmt_flags();
+}
+#endif
+#endif
+
static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+ { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
#endif
#ifdef CONFIG_SCHED_MC
- { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+ { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
#endif
{ NULL, },
};
static struct sched_domain_topology_level x86_topology[] = {
#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+ { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
#endif
#ifdef CONFIG_SCHED_MC
- { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+ { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
#endif
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
{ NULL, },