From 8fcc9bc3eaa2ef8345e2b4b22e3a88804ac46337 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Sat, 13 May 2017 13:46:30 +0800 Subject: Documentation/kernel-parameters.txt: Update 'memmap=' boot option description In commit: 9710f581bb4c ("x86, mm: Let "memmap=" take more entries one time") ... 'memmap=' was changed to adopt multiple, comma delimited values in a single entry, so update the related description. In the special case of only specifying size value without an offset, like memmap=nn[KMG], memmap behaves similarly to mem=nn[KMG], so update it too here. Furthermore, for memmap=nn[KMG]$ss[KMG], an escape character needs be added before '$' for some bootloaders. E.g in grub2, if we specify memmap=100M$5G as suggested by the documentation, "memmap=100MG" gets passed to the kernel. Clarify all this. Signed-off-by: Baoquan He Acked-by: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Cc: douly.fnst@cn.fujitsu.com Cc: dyoung@redhat.com Cc: m.mizuma@jp.fujitsu.com Link: http://lkml.kernel.org/r/1494654390-23861-4-git-send-email-bhe@redhat.com [ Various spelling fixes. ] Signed-off-by: Ingo Molnar --- Documentation/admin-guide/kernel-parameters.txt | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'Documentation/admin-guide/kernel-parameters.txt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 15f79c27748d..4e4c3402412e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2127,6 +2127,12 @@ memmap=nn[KMG]@ss[KMG] [KNL] Force usage of a specific region of memory. Region of memory to be used is from ss to ss+nn. + If @ss[KMG] is omitted, it is equivalent to mem=nn[KMG], + which limits max address to nn[KMG]. + Multiple different regions can be specified, + comma delimited. + Example: + memmap=100M@2G,100M#3G,1G!1024G memmap=nn[KMG]#ss[KMG] [KNL,ACPI] Mark specific memory as ACPI data. @@ -2139,6 +2145,9 @@ memmap=64K$0x18690000 or memmap=0x10000$0x18690000 + Some bootloaders may need an escape character before '$', + like Grub2, otherwise '$' and the following number + will be eaten. memmap=nn[KMG]!ss[KMG] [KNL,X86] Mark specific memory as protected. -- cgit v1.2.3 From 881ed593a323c832c2e9383effeb6a0c99859210 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 17 Apr 2017 12:47:10 -0700 Subject: rcuperf: Add ability to performance-test call_rcu() and friends This commit upgrades rcuperf so that it can do performance testing on asynchronous grace-period primitives such as call_srcu(). There is a new rcuperf.gp_async module parameter that specifies this new behavior, with the pre-existing rcuperf.gp_exp testing expedited grace periods such as synchronize_rcu_expedited, and with the default being to test synchronous non-expedited grace periods such as synchronize_rcu(). There is also a new rcuperf.gp_async_max module parameter that specifies the maximum number of outstanding callbacks per writer kthread, defaulting to 1,000. When this limit is exceeded, the writer thread invokes the appropriate flavor of rcu_barrier() to wait for callbacks to drain. Signed-off-by: Paul E. McKenney [ paulmck: Removed the redundant initialization noted by Arnd Bergmann. ] --- Documentation/admin-guide/kernel-parameters.txt | 11 ++++ kernel/rcu/rcuperf.c | 69 +++++++++++++++++++++++-- 2 files changed, 75 insertions(+), 5 deletions(-) (limited to 'Documentation/admin-guide/kernel-parameters.txt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 15f79c27748d..3598464ca8ed 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3319,6 +3319,17 @@ This wake_up() will be accompanied by a WARN_ONCE() splat and an ftrace_dump(). + rcuperf.gp_async= [KNL] + Measure performance of asynchronous + grace-period primitives such as call_rcu(). + + rcuperf.gp_async_max= [KNL] + Specify the maximum number of outstanding + callbacks per writer thread. When a writer + thread exceeds this limit, it invokes the + corresponding flavor of rcu_barrier() to allow + previously posted callbacks to drain. + rcuperf.gp_exp= [KNL] Measure performance of expedited synchronous grace-period primitives. diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index ef5b1faac495..e1ce97bead94 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -59,6 +59,8 @@ MODULE_AUTHOR("Paul E. McKenney "); #define VERBOSE_PERFOUT_ERRSTRING(s) \ do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0) +torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives"); +torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader"); torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); torture_param(int, holdoff, 10, "Holdoff time before test start (s)"); torture_param(int, nreaders, -1, "Number of RCU reader threads"); @@ -86,13 +88,16 @@ static u64 t_rcu_perf_writer_started; static u64 t_rcu_perf_writer_finished; static unsigned long b_rcu_perf_writer_started; static unsigned long b_rcu_perf_writer_finished; +static DEFINE_PER_CPU(atomic_t, n_async_inflight); static int rcu_perf_writer_state; #define RTWS_INIT 0 -#define RTWS_EXP_SYNC 1 -#define RTWS_SYNC 2 -#define RTWS_IDLE 2 -#define RTWS_STOPPING 3 +#define RTWS_ASYNC 1 +#define RTWS_BARRIER 2 +#define RTWS_EXP_SYNC 3 +#define RTWS_SYNC 4 +#define RTWS_IDLE 5 +#define RTWS_STOPPING 6 #define MAX_MEAS 10000 #define MIN_MEAS 100 @@ -114,6 +119,8 @@ struct rcu_perf_ops { unsigned long (*started)(void); unsigned long (*completed)(void); unsigned long (*exp_completed)(void); + void (*async)(struct rcu_head *head, rcu_callback_t func); + void (*gp_barrier)(void); void (*sync)(void); void (*exp_sync)(void); const char *name; @@ -153,6 +160,8 @@ static struct rcu_perf_ops rcu_ops = { .started = rcu_batches_started, .completed = rcu_batches_completed, .exp_completed = rcu_exp_batches_completed, + .async = call_rcu, + .gp_barrier = rcu_barrier, .sync = synchronize_rcu, .exp_sync = synchronize_rcu_expedited, .name = "rcu" @@ -181,6 +190,8 @@ static struct rcu_perf_ops rcu_bh_ops = { .started = rcu_batches_started_bh, .completed = rcu_batches_completed_bh, .exp_completed = rcu_exp_batches_completed_sched, + .async = call_rcu_bh, + .gp_barrier = rcu_barrier_bh, .sync = synchronize_rcu_bh, .exp_sync = synchronize_rcu_bh_expedited, .name = "rcu_bh" @@ -208,6 +219,16 @@ static unsigned long srcu_perf_completed(void) return srcu_batches_completed(srcu_ctlp); } +static void srcu_call_rcu(struct rcu_head *head, rcu_callback_t func) +{ + call_srcu(srcu_ctlp, head, func); +} + +static void srcu_rcu_barrier(void) +{ + srcu_barrier(srcu_ctlp); +} + static void srcu_perf_synchronize(void) { synchronize_srcu(srcu_ctlp); @@ -226,6 +247,8 @@ static struct rcu_perf_ops srcu_ops = { .started = NULL, .completed = srcu_perf_completed, .exp_completed = srcu_perf_completed, + .async = srcu_call_rcu, + .gp_barrier = srcu_rcu_barrier, .sync = srcu_perf_synchronize, .exp_sync = srcu_perf_synchronize_expedited, .name = "srcu" @@ -254,6 +277,8 @@ static struct rcu_perf_ops sched_ops = { .started = rcu_batches_started_sched, .completed = rcu_batches_completed_sched, .exp_completed = rcu_exp_batches_completed_sched, + .async = call_rcu_sched, + .gp_barrier = rcu_barrier_sched, .sync = synchronize_sched, .exp_sync = synchronize_sched_expedited, .name = "sched" @@ -281,6 +306,8 @@ static struct rcu_perf_ops tasks_ops = { .readunlock = tasks_perf_read_unlock, .started = rcu_no_completed, .completed = rcu_no_completed, + .async = call_rcu_tasks, + .gp_barrier = rcu_barrier_tasks, .sync = synchronize_rcu_tasks, .exp_sync = synchronize_rcu_tasks, .name = "tasks" @@ -343,6 +370,15 @@ rcu_perf_reader(void *arg) return 0; } +/* + * Callback function for asynchronous grace periods from rcu_perf_writer(). + */ +static void rcu_perf_async_cb(struct rcu_head *rhp) +{ + atomic_dec(this_cpu_ptr(&n_async_inflight)); + kfree(rhp); +} + /* * RCU perf writer kthread. Repeatedly does a grace period. */ @@ -352,6 +388,7 @@ rcu_perf_writer(void *arg) int i = 0; int i_max; long me = (long)arg; + struct rcu_head *rhp = NULL; struct sched_param sp; bool started = false, done = false, alldone = false; u64 t; @@ -382,7 +419,23 @@ rcu_perf_writer(void *arg) do { wdp = &wdpp[i]; *wdp = ktime_get_mono_fast_ns(); - if (gp_exp) { + if (gp_async) { +retry: + if (!rhp) + rhp = kmalloc(sizeof(*rhp), GFP_KERNEL); + if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) { + rcu_perf_writer_state = RTWS_ASYNC; + atomic_inc(this_cpu_ptr(&n_async_inflight)); + cur_ops->async(rhp, rcu_perf_async_cb); + rhp = NULL; + } else if (!kthread_should_stop()) { + rcu_perf_writer_state = RTWS_BARRIER; + cur_ops->gp_barrier(); + goto retry; + } else { + kfree(rhp); /* Because we are stopping. */ + } + } else if (gp_exp) { rcu_perf_writer_state = RTWS_EXP_SYNC; cur_ops->exp_sync(); } else { @@ -429,6 +482,10 @@ rcu_perf_writer(void *arg) i++; rcu_perf_wait_shutdown(); } while (!torture_must_stop()); + if (gp_async) { + rcu_perf_writer_state = RTWS_BARRIER; + cur_ops->gp_barrier(); + } rcu_perf_writer_state = RTWS_STOPPING; writer_n_durations[me] = i_max; torture_kthread_stopping("rcu_perf_writer"); @@ -460,6 +517,8 @@ rcu_perf_cleanup(void) VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); if (rcu_gp_is_normal() && gp_exp) VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); + if (gp_exp && gp_async) + VERBOSE_PERFOUT_ERRSTRING("No expedited async GPs, so went with async!"); if (torture_cleanup_begin()) return; -- cgit v1.2.3 From 820687a7b98a5031207893ff265f97c0a0ad403e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 25 Apr 2017 15:12:56 -0700 Subject: rcuperf: Add writer_holdoff boot parameter This commit adds a writer_holdoff boot parameter to rcuperf, which is intended to be used to test Tree SRCU's auto-expediting. This boot parameter is in microseconds, and defaults to zero (that is, disabled). Set it to a bit larger than srcutree.exp_holdoff, keeping the nanosecond/microsecond conversion, to force Tree SRCU to auto-expedite more aggressively. This commit also adds documentation for this parameter, and fixes some alphabetization while in the neighborhood. Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 11 ++++++++--- kernel/rcu/rcuperf.c | 3 +++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'Documentation/admin-guide/kernel-parameters.txt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3598464ca8ed..01b5ab92d251 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3357,17 +3357,22 @@ rcuperf.perf_runnable= [BOOT] Start rcuperf running at boot time. + rcuperf.perf_type= [KNL] + Specify the RCU implementation to test. + rcuperf.shutdown= [KNL] Shut the system down after performance tests complete. This is useful for hands-off automated testing. - rcuperf.perf_type= [KNL] - Specify the RCU implementation to test. - rcuperf.verbose= [KNL] Enable additional printk() statements. + rcuperf.writer_holdoff= [KNL] + Write-side holdoff between grace periods, + in microseconds. The default of zero says + no holdoff. + rcutorture.cbflood_inter_holdoff= [KNL] Set holdoff time (jiffies) between successive callback-flood tests. diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index 49c8ed6bd2fd..d80f11d9f8bd 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -68,6 +68,7 @@ torture_param(int, nwriters, -1, "Number of RCU updater threads"); torture_param(bool, shutdown, !IS_ENABLED(MODULE), "Shutdown at end of performance tests."); torture_param(bool, verbose, true, "Enable verbose debugging printk()s"); +torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable"); static char *perf_type = "rcu"; module_param(perf_type, charp, 0444); @@ -447,6 +448,8 @@ rcu_perf_writer(void *arg) } do { + if (writer_holdoff) + udelay(writer_holdoff); wdp = &wdpp[i]; *wdp = ktime_get_mono_fast_ns(); if (gp_async) { -- cgit v1.2.3 From c350c008297643dad3c395c2fd92230142da5cf6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 15:35:32 -0700 Subject: srcu: Prevent sdp->srcu_gp_seq_needed counter wrap If a given CPU never happens to ever start an SRCU grace period, the grace-period sequence counter might wrap. If this CPU were to decide to finally start a grace period, the state of its sdp->srcu_gp_seq_needed might make it appear that it has already requested this grace period, which would prevent starting the grace period. If no other CPU ever started a grace period again, this would look like a grace-period hang. Even if some other CPU took pity and started the needed grace period, the leaf rcu_node structure's ->srcu_data_have_cbs field won't have record of the fact that this CPU has a callback pending, which would look like a very localized grace-period hang. This might seem very unlikely, but SRCU grace periods can take less than a microsecond on small systems, which means that overflow can happen in much less than an hour on a 32-bit embedded system. And embedded systems are especially likely to have long-term idle CPUs. Therefore, it makes sense to prevent this scenario from happening. This commit therefore scans each srcu_data structure occasionally, with frequency controlled by the srcutree.counter_wrap_check kernel boot parameter. This parameter can be set to something like 255 in order to exercise the counter-wrap-prevention code. Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 9 +++++++++ kernel/rcu/srcutree.c | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'Documentation/admin-guide/kernel-parameters.txt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 01b5ab92d251..6671f9b60a86 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3810,6 +3810,15 @@ spia_pedr= spia_peddr= + srcutree.counter_wrap_check [KNL] + Specifies how frequently to check for + grace-period sequence counter wrap for the + srcu_data structure's ->srcu_gp_seq_needed field. + The greater the number of bits set in this kernel + parameter, the less frequently counter wrap will + be checked for. Note that the bottom two bits + are ignored. + srcutree.exp_holdoff [KNL] Specifies how many nanoseconds must elapse since the end of the last SRCU grace period for diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index c6e2a4a1628b..cc06dbfc9692 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -45,6 +45,10 @@ static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF; module_param(exp_holdoff, ulong, 0444); +/* Overflow-check frequency. N bits roughly says every 2**N grace periods. */ +static ulong counter_wrap_check = (ULONG_MAX >> 2); +module_param(counter_wrap_check, ulong, 0444); + static void srcu_invoke_callbacks(struct work_struct *work); static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); @@ -496,10 +500,13 @@ static void srcu_gp_end(struct srcu_struct *sp) { unsigned long cbdelay; bool cbs; + int cpu; + unsigned long flags; unsigned long gpseq; int idx; int idxnext; unsigned long mask; + struct srcu_data *sdp; struct srcu_node *snp; /* Prevent more than one additional grace period. */ @@ -538,6 +545,17 @@ static void srcu_gp_end(struct srcu_struct *sp) smp_mb(); /* GP end before CB invocation. */ srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); } + + /* Occasionally prevent srcu_data counter wrap. */ + if (!(gpseq & counter_wrap_check)) + for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { + sdp = per_cpu_ptr(sp->sda, cpu); + spin_lock_irqsave(&sdp->lock, flags); + if (ULONG_CMP_GE(gpseq, + sdp->srcu_gp_seq_needed + 100)) + sdp->srcu_gp_seq_needed = gpseq; + spin_unlock_irqrestore(&sdp->lock, flags); + } } /* Callback initiation done, allow grace periods after next. */ -- cgit v1.2.3 From 90040c9e3015054db7efa0101afdd446d1167fe8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 10 May 2017 14:36:55 -0700 Subject: rcu: Remove *_SLOW_* Kconfig options The RCU_TORTURE_TEST_SLOW_PREINIT, RCU_TORTURE_TEST_SLOW_PREINIT_DELAY, RCU_TORTURE_TEST_SLOW_PREINIT_DELAY, RCU_TORTURE_TEST_SLOW_INIT, RCU_TORTURE_TEST_SLOW_INIT_DELAY, RCU_TORTURE_TEST_SLOW_CLEANUP, and RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY Kconfig options are only useful for torture testing, and there are the rcutree.gp_cleanup_delay, rcutree.gp_init_delay, and rcutree.gp_preinit_delay kernel boot parameters that rcutorture can use instead. The effect of these parameters is to artificially slow down grace period initialization and cleanup in order to make some types of race conditions happen more often. This commit therefore simplifies Tree RCU a bit by removing the Kconfig options and adding the corresponding kernel parameters to rcutorture's .boot files instead. However, this commit also leaves out the kernel parameters for TREE02, TREE04, and TREE07 in order to have about the same number of tests slowed as not slowed. TREE01, TREE03, TREE05, and TREE06 are slowed, and the rest are not slowed. Reported-by: Linus Torvalds Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 10 +-- kernel/rcu/tree.c | 26 ++------ kernel/rcu/tree_plugin.h | 6 +- lib/Kconfig.debug | 75 ---------------------- .../selftests/rcutorture/configs/rcu/TREE01 | 3 - .../selftests/rcutorture/configs/rcu/TREE01.boot | 3 + .../selftests/rcutorture/configs/rcu/TREE02 | 3 - .../selftests/rcutorture/configs/rcu/TREE03 | 3 - .../selftests/rcutorture/configs/rcu/TREE03.boot | 3 + .../selftests/rcutorture/configs/rcu/TREE04 | 3 - .../selftests/rcutorture/configs/rcu/TREE05 | 3 - .../selftests/rcutorture/configs/rcu/TREE05.boot | 3 + .../selftests/rcutorture/configs/rcu/TREE06 | 3 - .../selftests/rcutorture/configs/rcu/TREE06.boot | 3 + .../selftests/rcutorture/configs/rcu/TREE07 | 3 - .../selftests/rcutorture/doc/TREE_RCU-kconfig.txt | 9 --- 16 files changed, 24 insertions(+), 135 deletions(-) (limited to 'Documentation/admin-guide/kernel-parameters.txt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6671f9b60a86..f85bfe02f052 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3229,21 +3229,17 @@ rcutree.gp_cleanup_delay= [KNL] Set the number of jiffies to delay each step of - RCU grace-period cleanup. This only has effect - when CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP is set. + RCU grace-period cleanup. rcutree.gp_init_delay= [KNL] Set the number of jiffies to delay each step of - RCU grace-period initialization. This only has - effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT - is set. + RCU grace-period initialization. rcutree.gp_preinit_delay= [KNL] Set the number of jiffies to delay each step of RCU grace-period pre-initialization, that is, the propagation of recent CPU-hotplug changes up - the rcu_node combining tree. This only has effect - when CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT is set. + the rcu_node combining tree. rcutree.rcu_fanout_exact= [KNL] Disable autobalancing of the rcu_node combining diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index cac24f5d3fd2..bbbddd85906b 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -177,26 +177,12 @@ module_param(kthread_prio, int, 0644); /* Delay in jiffies for grace-period initialization delays, debug only. */ -#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT -static int gp_preinit_delay = CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY; -module_param(gp_preinit_delay, int, 0644); -#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */ -static const int gp_preinit_delay; -#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */ - -#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT -static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY; -module_param(gp_init_delay, int, 0644); -#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ -static const int gp_init_delay; -#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ - -#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP -static int gp_cleanup_delay = CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY; -module_param(gp_cleanup_delay, int, 0644); -#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */ -static const int gp_cleanup_delay; -#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */ +static int gp_preinit_delay; +module_param(gp_preinit_delay, int, 0444); +static int gp_init_delay; +module_param(gp_init_delay, int, 0444); +static int gp_cleanup_delay; +module_param(gp_cleanup_delay, int, 0444); /* * Number of grace periods between delays, normalized by the duration of diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 116cf8339826..0553d9fed7d7 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -109,11 +109,11 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tKick kthreads if too-long grace period.\n"); if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD)) pr_info("\tRCU callback double-/use-after-free debug enabled.\n"); - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT)) + if (gp_preinit_delay) pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay); - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT)) + if (gp_init_delay) pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay); - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP)) + if (gp_cleanup_delay) pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG)) pr_info("\tRCU debug extended QS entry/exit.\n"); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e4587ebe52c7..960c5d2d3c03 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1373,81 +1373,6 @@ config RCU_TORTURE_TEST Say M if you want the RCU torture tests to build as a module. Say N if you are unsure. -config RCU_TORTURE_TEST_SLOW_PREINIT - bool "Slow down RCU grace-period pre-initialization to expose races" - depends on RCU_TORTURE_TEST - help - This option delays grace-period pre-initialization (the - propagation of CPU-hotplug changes up the rcu_node combining - tree) for a few jiffies between initializing each pair of - consecutive rcu_node structures. This helps to expose races - involving grace-period pre-initialization, in other words, it - makes your kernel less stable. It can also greatly increase - grace-period latency, especially on systems with large numbers - of CPUs. This is useful when torture-testing RCU, but in - almost no other circumstance. - - Say Y here if you want your system to crash and hang more often. - Say N if you want a sane system. - -config RCU_TORTURE_TEST_SLOW_PREINIT_DELAY - int "How much to slow down RCU grace-period pre-initialization" - range 0 5 - default 3 - depends on RCU_TORTURE_TEST_SLOW_PREINIT - help - This option specifies the number of jiffies to wait between - each rcu_node structure pre-initialization step. - -config RCU_TORTURE_TEST_SLOW_INIT - bool "Slow down RCU grace-period initialization to expose races" - depends on RCU_TORTURE_TEST - help - This option delays grace-period initialization for a few - jiffies between initializing each pair of consecutive - rcu_node structures. This helps to expose races involving - grace-period initialization, in other words, it makes your - kernel less stable. It can also greatly increase grace-period - latency, especially on systems with large numbers of CPUs. - This is useful when torture-testing RCU, but in almost no - other circumstance. - - Say Y here if you want your system to crash and hang more often. - Say N if you want a sane system. - -config RCU_TORTURE_TEST_SLOW_INIT_DELAY - int "How much to slow down RCU grace-period initialization" - range 0 5 - default 3 - depends on RCU_TORTURE_TEST_SLOW_INIT - help - This option specifies the number of jiffies to wait between - each rcu_node structure initialization. - -config RCU_TORTURE_TEST_SLOW_CLEANUP - bool "Slow down RCU grace-period cleanup to expose races" - depends on RCU_TORTURE_TEST - help - This option delays grace-period cleanup for a few jiffies - between cleaning up each pair of consecutive rcu_node - structures. This helps to expose races involving grace-period - cleanup, in other words, it makes your kernel less stable. - It can also greatly increase grace-period latency, especially - on systems with large numbers of CPUs. This is useful when - torture-testing RCU, but in almost no other circumstance. - - Say Y here if you want your system to crash and hang more often. - Say N if you want a sane system. - -config RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY - int "How much to slow down RCU grace-period cleanup" - range 0 5 - default 3 - depends on RCU_TORTURE_TEST_SLOW_CLEANUP - help - This option specifies the number of jiffies to wait between - each rcu_node structure cleanup operation. - config RCU_CPU_STALL_TIMEOUT int "RCU CPU stall timeout in seconds" depends on RCU_STALL_COMMON diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index cc6c5815236e..92ca49f90ef9 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -17,6 +17,3 @@ CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot index adc3abc82fb8..89705ed79596 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot @@ -1 +1,4 @@ rcutorture.torture_type=rcu_bh maxcpus=8 +rcutree.gp_preinit_delay=3 +rcutree.gp_init_delay=3 +rcutree.gp_cleanup_delay=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 index 1cecab330ba0..35e639e39366 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 @@ -19,8 +19,5 @@ CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=n CONFIG_RCU_BOOST=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03 index 3b93ee544e70..7a17c503b382 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03 @@ -17,6 +17,3 @@ CONFIG_RCU_BOOST=y CONFIG_RCU_KTHREAD_PRIO=2 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot index 120c0c88d100..9ef3aed126e9 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot @@ -1 +1,4 @@ rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30 +rcutree.gp_preinit_delay=3 +rcutree.gp_init_delay=3 +rcutree.gp_cleanup_delay=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index 851c01ae2cea..27d22695d64c 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -18,7 +18,4 @@ CONFIG_RCU_FANOUT_LEAF=3 CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y CONFIG_RCU_EQS_DEBUG=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 index d4cdc0d74e16..1257d3227b1e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 @@ -19,6 +19,3 @@ CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot index 15b3e1a86f74..c7fd050dfcd9 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot @@ -1,2 +1,5 @@ rcutorture.torture_type=sched rcupdate.rcu_self_test_sched=1 +rcutree.gp_preinit_delay=3 +rcutree.gp_init_delay=3 +rcutree.gp_cleanup_delay=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 index 9215827649bd..05a4eec3f27b 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 @@ -21,6 +21,3 @@ CONFIG_PROVE_LOCKING=y CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=y CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot index dd90f28ed700..ad18b52a2cad 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot @@ -2,3 +2,6 @@ rcupdate.rcu_self_test=1 rcupdate.rcu_self_test_bh=1 rcupdate.rcu_self_test_sched=1 rcutree.rcu_fanout_exact=1 +rcutree.gp_preinit_delay=3 +rcutree.gp_init_delay=3 +rcutree.gp_cleanup_delay=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 index 99f04e4c5162..b9ddd3beeb9a 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 @@ -17,6 +17,3 @@ CONFIG_RCU_FANOUT_LEAF=2 CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index 364801b1a230..1dfec4657d95 100644 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -28,9 +28,6 @@ CONFIG_RCU_TRACE -- Do half. CONFIG_SMP -- Need one !SMP for PREEMPT_RCU. CONFIG_RCU_EXPERT=n -- Do a few, but these have to be vanilla configurations. CONFIG_RCU_EQS_DEBUG -- Do at least one for CONFIG_NO_HZ_FULL and not. -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP -- Do for all but a couple TREE scenarios. -CONFIG_RCU_TORTURE_TEST_SLOW_INIT -- Do for all but a couple TREE scenarios. -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT -- Do for all but a couple TREE scenarios. RCU-bh: Do one with PREEMPT and one with !PREEMPT. RCU-sched: Do one with PREEMPT but not BOOST. @@ -78,12 +75,6 @@ CONFIG_RCU_TORTURE_TEST_RUNNABLE Always used in KVM testing. -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY -CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY - - Inspection suffices, ignore. - CONFIG_PREEMPT_RCU CONFIG_TREE_RCU CONFIG_TINY_RCU -- cgit v1.2.3 From 62a31ce1372a8a52050961904e1ca16bc06e6f45 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 5 Jun 2017 14:15:12 -0600 Subject: doc: Add coresight_cpu_debug.enable to kernel-parameters.txt Add coresight_cpu_debug.enable to kernel-parameters.txt, this flag is used to enable/disable the CPU sampling based debugging. Signed-off-by: Leo Yan Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'Documentation/admin-guide/kernel-parameters.txt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 15f79c27748d..ff67ad7d2c55 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -649,6 +649,13 @@ /proc//coredump_filter. See also Documentation/filesystems/proc.txt. + coresight_cpu_debug.enable + [ARM,ARM64] + Format: + Enable/disable the CPU sampling based debugging. + 0: default value, disable debugging + 1: enable debugging at boot time + cpuidle.off=1 [CPU_IDLE] disable the cpuidle sub-system -- cgit v1.2.3 From 182936eee7e6b1956881b51bc534a541dc71a101 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:41 +0100 Subject: KVM: arm64: Enable GICv3 Group-1 sysreg trapping via command-line Now that we're able to safely handle Group-1 sysreg access, let's give the user the opportunity to enable it by passing a specific command-line option (vgic_v3.group1_trap). Tested-by: Alexander Graf Acked-by: David Daney Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ virt/kvm/arm/vgic/vgic-v3.c | 11 +++++++++++ 2 files changed, 15 insertions(+) (limited to 'Documentation/admin-guide/kernel-parameters.txt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 15f79c27748d..42fe395be6c8 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1829,6 +1829,10 @@ for all guests. Default is 1 (enabled) if in 64-bit or 32-bit PAE mode. + kvm-arm.vgic_v3_group1_trap= + [KVM,ARM] Trap guest accesses to GICv3 group-1 + system registers + kvm-intel.ept= [KVM,Intel] Disable extended page tables (virtualized MMU) support on capable Intel chips. Default is 1 (enabled) diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 722bdebdfbb5..4b2b62b73470 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -435,6 +435,12 @@ out: DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); +static int __init early_group1_trap_cfg(char *buf) +{ + return strtobool(buf, &group1_trap); +} +early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg); + /** * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT * @node: pointer to the DT node @@ -486,6 +492,11 @@ int vgic_v3_probe(const struct gic_kvm_info *info) if (kvm_vgic_global_state.vcpu_base == 0) kvm_info("disabling GICv2 emulation\n"); + if (group1_trap) { + kvm_info("GICv3 sysreg trapping enabled (reduced performance)\n"); + static_branch_enable(&vgic_v3_cpuif_trap); + } + kvm_vgic_global_state.vctrl_base = NULL; kvm_vgic_global_state.type = VGIC_V3; kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; -- cgit v1.2.3 From e23f62f76acea232d4def5d4eb21709a2b575f14 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:46 +0100 Subject: KVM: arm64: Enable GICv3 Group-0 sysreg trapping via command-line Now that we're able to safely handle Group-0 sysreg access, let's give the user the opportunity to enable it by passing a specific command-line option (vgic_v3.group0_trap). Tested-by: Alexander Graf Acked-by: David Daney Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ virt/kvm/arm/vgic/vgic-v3.c | 6 ++++++ 2 files changed, 10 insertions(+) (limited to 'Documentation/admin-guide/kernel-parameters.txt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 42fe395be6c8..88bdc421351f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1829,6 +1829,10 @@ for all guests. Default is 1 (enabled) if in 64-bit or 32-bit PAE mode. + kvm-arm.vgic_v3_group0_trap= + [KVM,ARM] Trap guest accesses to GICv3 group-0 + system registers + kvm-arm.vgic_v3_group1_trap= [KVM,ARM] Trap guest accesses to GICv3 group-1 system registers diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 39046ee5be25..828ca7f9a060 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -438,6 +438,12 @@ out: DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); +static int __init early_group0_trap_cfg(char *buf) +{ + return strtobool(buf, &group0_trap); +} +early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg); + static int __init early_group1_trap_cfg(char *buf) { return strtobool(buf, &group1_trap); -- cgit v1.2.3 From ff89511ef29b794d6a9c6b62f5ea76fc013cdae7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:53 +0100 Subject: KVM: arm64: Enable GICv3 common sysreg trapping via command-line Now that we're able to safely handle common sysreg access, let's give the user the opportunity to enable it by passing a specific command-line option (vgic_v3.common_trap). Tested-by: Alexander Graf Acked-by: David Daney Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ include/linux/irqchip/arm-gic-v3.h | 1 + virt/kvm/arm/vgic/vgic-v3.c | 11 ++++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'Documentation/admin-guide/kernel-parameters.txt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 88bdc421351f..aa8341e73b35 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1837,6 +1837,10 @@ [KVM,ARM] Trap guest accesses to GICv3 group-1 system registers + kvm-arm.vgic_v3_common_trap= + [KVM,ARM] Trap guest accesses to GICv3 common + system registers + kvm-intel.ept= [KVM,Intel] Disable extended page tables (virtualized MMU) support on capable Intel chips. Default is 1 (enabled) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index c7f31a962cfc..6a1f87ff94e2 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -417,6 +417,7 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_TC (1 << 10) #define ICH_HCR_TALL0 (1 << 11) #define ICH_HCR_TALL1 (1 << 12) #define ICH_HCR_EOIcount_SHIFT 27 diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 35c00efc110b..91cf8b4f01f1 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -23,6 +23,7 @@ static bool group0_trap; static bool group1_trap; +static bool common_trap; void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) { @@ -265,6 +266,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) vgic_v3->vgic_hcr |= ICH_HCR_TALL0; if (group1_trap) vgic_v3->vgic_hcr |= ICH_HCR_TALL1; + if (common_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TC; } int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) @@ -450,6 +453,12 @@ static int __init early_group1_trap_cfg(char *buf) } early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg); +static int __init early_common_trap_cfg(char *buf) +{ + return strtobool(buf, &common_trap); +} +early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg); + /** * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT * @node: pointer to the DT node @@ -508,7 +517,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info) } #endif - if (group0_trap || group1_trap) { + if (group0_trap || group1_trap || common_trap) { kvm_info("GICv3 sysreg trapping enabled (reduced performance)\n"); static_branch_enable(&vgic_v3_cpuif_trap); } -- cgit v1.2.3 From e36361d70e9729ee2334fcb260014a3ff275c2e0 Mon Sep 17 00:00:00 2001 From: Andreas Färber Date: Mon, 19 Jun 2017 03:46:40 +0200 Subject: tty: serial: Add Actions Semi Owl UART earlycon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements an earlycon for Actions Semi S500/S900 SoCs. Based on LeMaker linux-actions tree. Signed-off-by: Andreas Färber Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 6 ++ drivers/tty/serial/Kconfig | 19 ++++ drivers/tty/serial/Makefile | 1 + drivers/tty/serial/owl-uart.c | 135 ++++++++++++++++++++++++ 4 files changed, 161 insertions(+) create mode 100644 drivers/tty/serial/owl-uart.c (limited to 'Documentation/admin-guide/kernel-parameters.txt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 15f79c27748d..38496249f8ba 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -945,6 +945,12 @@ must already be setup and configured. Options are not yet supported. + owl, + Start an early, polled-mode console on a serial port + of an Actions Semi SoC, such as S500 or S900, at the + specified address. The serial port must already be + setup and configured. Options are not yet supported. + smh Use ARM semihosting calls for early console. s3c2410, diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 07812a7ea2a4..1f096e2bb398 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -1688,6 +1688,25 @@ config SERIAL_MVEBU_CONSOLE and warnings and which allows logins in single user mode) Otherwise, say 'N'. +config SERIAL_OWL + bool "Actions Semi Owl serial port support" + depends on ARCH_ACTIONS || COMPILE_TEST + select SERIAL_CORE + help + This driver is for Actions Semiconductor S500/S900 SoC's UART. + Say 'Y' here if you wish to use the on-board serial port. + Otherwise, say 'N'. + +config SERIAL_OWL_CONSOLE + bool "Console on Actions Semi Owl serial port" + depends on SERIAL_OWL=y + select SERIAL_CORE_CONSOLE + select SERIAL_EARLYCON + default y + help + Say 'Y' here if you wish to use Actions Semiconductor S500/S900 UART + as the system console. Only earlycon is implemented currently. + endmenu config SERIAL_MCTRL_GPIO diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile index 53c03e005132..fe88a75d9a59 100644 --- a/drivers/tty/serial/Makefile +++ b/drivers/tty/serial/Makefile @@ -92,6 +92,7 @@ obj-$(CONFIG_SERIAL_STM32) += stm32-usart.o obj-$(CONFIG_SERIAL_MVEBU_UART) += mvebu-uart.o obj-$(CONFIG_SERIAL_PIC32) += pic32_uart.o obj-$(CONFIG_SERIAL_MPS2_UART) += mps2-uart.o +obj-$(CONFIG_SERIAL_OWL) += owl-uart.o # GPIOLIB helpers for modem control lines obj-$(CONFIG_SERIAL_MCTRL_GPIO) += serial_mctrl_gpio.o diff --git a/drivers/tty/serial/owl-uart.c b/drivers/tty/serial/owl-uart.c new file mode 100644 index 000000000000..1b8008797a1b --- /dev/null +++ b/drivers/tty/serial/owl-uart.c @@ -0,0 +1,135 @@ +/* + * Actions Semi Owl family serial console + * + * Copyright 2013 Actions Semi Inc. + * Author: Actions Semi, Inc. + * + * Copyright (c) 2016-2017 Andreas Färber + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define OWL_UART_CTL 0x000 +#define OWL_UART_TXDAT 0x008 +#define OWL_UART_STAT 0x00c + +#define OWL_UART_CTL_TRFS_TX BIT(14) +#define OWL_UART_CTL_EN BIT(15) +#define OWL_UART_CTL_RXIE BIT(18) +#define OWL_UART_CTL_TXIE BIT(19) + +#define OWL_UART_STAT_RIP BIT(0) +#define OWL_UART_STAT_TIP BIT(1) +#define OWL_UART_STAT_TFFU BIT(6) +#define OWL_UART_STAT_TRFL_MASK (0x1f << 11) +#define OWL_UART_STAT_UTBB BIT(17) + +static inline void owl_uart_write(struct uart_port *port, u32 val, unsigned int off) +{ + writel(val, port->membase + off); +} + +static inline u32 owl_uart_read(struct uart_port *port, unsigned int off) +{ + return readl(port->membase + off); +} + +#ifdef CONFIG_SERIAL_OWL_CONSOLE + +static void owl_console_putchar(struct uart_port *port, int ch) +{ + if (!port->membase) + return; + + while (owl_uart_read(port, OWL_UART_STAT) & OWL_UART_STAT_TFFU) + cpu_relax(); + + owl_uart_write(port, ch, OWL_UART_TXDAT); +} + +static void owl_uart_port_write(struct uart_port *port, const char *s, + u_int count) +{ + u32 old_ctl, val; + unsigned long flags; + int locked; + + local_irq_save(flags); + + if (port->sysrq) + locked = 0; + else if (oops_in_progress) + locked = spin_trylock(&port->lock); + else { + spin_lock(&port->lock); + locked = 1; + } + + old_ctl = owl_uart_read(port, OWL_UART_CTL); + val = old_ctl | OWL_UART_CTL_TRFS_TX; + /* disable IRQ */ + val &= ~(OWL_UART_CTL_RXIE | OWL_UART_CTL_TXIE); + owl_uart_write(port, val, OWL_UART_CTL); + + uart_console_write(port, s, count, owl_console_putchar); + + /* wait until all contents have been sent out */ + while (owl_uart_read(port, OWL_UART_STAT) & OWL_UART_STAT_TRFL_MASK) + cpu_relax(); + + /* clear IRQ pending */ + val = owl_uart_read(port, OWL_UART_STAT); + val |= OWL_UART_STAT_TIP | OWL_UART_STAT_RIP; + owl_uart_write(port, val, OWL_UART_STAT); + + owl_uart_write(port, old_ctl, OWL_UART_CTL); + + if (locked) + spin_unlock(&port->lock); + + local_irq_restore(flags); +} + +static void owl_uart_early_console_write(struct console *co, + const char *s, + u_int count) +{ + struct earlycon_device *dev = co->data; + + owl_uart_port_write(&dev->port, s, count); +} + +static int __init +owl_uart_early_console_setup(struct earlycon_device *device, const char *opt) +{ + if (!device->port.membase) + return -ENODEV; + + device->con->write = owl_uart_early_console_write; + + return 0; +} +OF_EARLYCON_DECLARE(owl, "actions,owl-uart", + owl_uart_early_console_setup); + +#endif /* CONFIG_SERIAL_OWL_CONSOLE */ -- cgit v1.2.3 From 33ce9549cfa1e71d77bc91a2e67e65d693e2e53f Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Mon, 24 Apr 2017 12:04:09 -0400 Subject: ima: extend the "ima_policy" boot command line to support multiple policies Add support for providing multiple builtin policies on the "ima_policy=" boot command line. Use "|" as the delimitor separating the policy names. Signed-off-by: Mimi Zohar --- Documentation/admin-guide/kernel-parameters.txt | 17 +++++++++++------ security/integrity/ima/ima_policy.c | 15 ++++++++++----- 2 files changed, 21 insertions(+), 11 deletions(-) (limited to 'Documentation/admin-guide/kernel-parameters.txt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 15f79c27748d..9b4381fee877 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1477,12 +1477,17 @@ in crypto/hash_info.h. ima_policy= [IMA] - The builtin measurement policy to load during IMA - setup. Specyfing "tcb" as the value, measures all - programs exec'd, files mmap'd for exec, and all files - opened with the read mode bit set by either the - effective uid (euid=0) or uid=0. - Format: "tcb" + The builtin policies to load during IMA setup. + Format: "tcb | appraise_tcb" + + The "tcb" policy measures all programs exec'd, files + mmap'd for exec, and all files opened with the read + mode bit set by either the effective uid (euid=0) or + uid=0. + + The "appraise_tcb" policy appraises the integrity of + all files owned by root. (This is the equivalent + of ima_appraise_tcb.) ima_tcb [IMA] Deprecated. Use ima_policy= instead. Load a policy which meets the needs of the Trusted diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 3ab1067db624..0ddc41389a9c 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -170,19 +170,24 @@ static int __init default_measure_policy_setup(char *str) } __setup("ima_tcb", default_measure_policy_setup); +static bool ima_use_appraise_tcb __initdata; static int __init policy_setup(char *str) { - if (ima_policy) - return 1; + char *p; - if (strcmp(str, "tcb") == 0) - ima_policy = DEFAULT_TCB; + while ((p = strsep(&str, " |\n")) != NULL) { + if (*p == ' ') + continue; + if ((strcmp(p, "tcb") == 0) && !ima_policy) + ima_policy = DEFAULT_TCB; + else if (strcmp(p, "appraise_tcb") == 0) + ima_use_appraise_tcb = 1; + } return 1; } __setup("ima_policy=", policy_setup); -static bool ima_use_appraise_tcb __initdata; static int __init default_appraise_policy_setup(char *str) { ima_use_appraise_tcb = 1; -- cgit v1.2.3 From 503ceaef8e2e7dbbdb04a867acc6fe4c548ede7f Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Fri, 21 Apr 2017 18:58:27 -0400 Subject: ima: define a set of appraisal rules requiring file signatures The builtin "ima_appraise_tcb" policy should require file signatures for at least a few of the hooks (eg. kernel modules, firmware, and the kexec kernel image), but changing it would break the existing userspace/kernel ABI. This patch defines a new builtin policy named "secure_boot", which can be specified on the "ima_policy=" boot command line, independently or in conjunction with the "ima_appraise_tcb" policy, by specifing ima_policy="appraise_tcb | secure_boot". The new appraisal rules requiring file signatures will be added prior to the "ima_appraise_tcb" rules. Signed-off-by: Mimi Zohar Changelog: - Reference secure boot in the new builtin policy name. (Thiago Bauermann) --- Documentation/admin-guide/kernel-parameters.txt | 6 +++++- security/integrity/ima/ima_policy.c | 26 ++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'Documentation/admin-guide/kernel-parameters.txt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9b4381fee877..e438a1fca554 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1478,7 +1478,7 @@ ima_policy= [IMA] The builtin policies to load during IMA setup. - Format: "tcb | appraise_tcb" + Format: "tcb | appraise_tcb | secure_boot" The "tcb" policy measures all programs exec'd, files mmap'd for exec, and all files opened with the read @@ -1489,6 +1489,10 @@ all files owned by root. (This is the equivalent of ima_appraise_tcb.) + The "secure_boot" policy appraises the integrity + of files (eg. kexec kernel image, kernel modules, + firmware, policy, etc) based on file signatures. + ima_tcb [IMA] Deprecated. Use ima_policy= instead. Load a policy which meets the needs of the Trusted Computing Base. This means IMA will measure all diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 0ddc41389a9c..3653c86c70df 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -153,6 +153,17 @@ static struct ima_rule_entry default_appraise_rules[] __ro_after_init = { #endif }; +static struct ima_rule_entry secure_boot_rules[] __ro_after_init = { + {.action = APPRAISE, .func = MODULE_CHECK, + .flags = IMA_FUNC | IMA_DIGSIG_REQUIRED}, + {.action = APPRAISE, .func = FIRMWARE_CHECK, + .flags = IMA_FUNC | IMA_DIGSIG_REQUIRED}, + {.action = APPRAISE, .func = KEXEC_KERNEL_CHECK, + .flags = IMA_FUNC | IMA_DIGSIG_REQUIRED}, + {.action = APPRAISE, .func = POLICY_CHECK, + .flags = IMA_FUNC | IMA_DIGSIG_REQUIRED}, +}; + static LIST_HEAD(ima_default_rules); static LIST_HEAD(ima_policy_rules); static LIST_HEAD(ima_temp_rules); @@ -171,6 +182,7 @@ static int __init default_measure_policy_setup(char *str) __setup("ima_tcb", default_measure_policy_setup); static bool ima_use_appraise_tcb __initdata; +static bool ima_use_secure_boot __initdata; static int __init policy_setup(char *str) { char *p; @@ -182,6 +194,8 @@ static int __init policy_setup(char *str) ima_policy = DEFAULT_TCB; else if (strcmp(p, "appraise_tcb") == 0) ima_use_appraise_tcb = 1; + else if (strcmp(p, "secure_boot") == 0) + ima_use_secure_boot = 1; } return 1; @@ -410,12 +424,14 @@ void ima_update_policy_flag(void) */ void __init ima_init_policy(void) { - int i, measure_entries, appraise_entries; + int i, measure_entries, appraise_entries, secure_boot_entries; /* if !ima_policy set entries = 0 so we load NO default rules */ measure_entries = ima_policy ? ARRAY_SIZE(dont_measure_rules) : 0; appraise_entries = ima_use_appraise_tcb ? ARRAY_SIZE(default_appraise_rules) : 0; + secure_boot_entries = ima_use_secure_boot ? + ARRAY_SIZE(secure_boot_rules) : 0; for (i = 0; i < measure_entries; i++) list_add_tail(&dont_measure_rules[i].list, &ima_default_rules); @@ -434,6 +450,14 @@ void __init ima_init_policy(void) break; } + /* + * Insert the appraise rules requiring file signatures, prior to + * any other appraise rules. + */ + for (i = 0; i < secure_boot_entries; i++) + list_add_tail(&secure_boot_rules[i].list, + &ima_default_rules); + for (i = 0; i < appraise_entries; i++) { list_add_tail(&default_appraise_rules[i].list, &ima_default_rules); -- cgit v1.2.3 From 787e30750d60b4384d34e2dbe0ebe07e09adde15 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Wed, 14 Jun 2017 12:24:12 +0200 Subject: docs-rst: fix broken links to dynamic-debug-howto in kernel-parameters Another place in lib/Kconfig.debug was already fixed in commit f8998c226587 ("lib/Kconfig.debug: correct documentation paths"). Fixes: 9d85025b0418 ("docs-rst: create an user's manual book") Signed-off-by: Steffen Maier Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'Documentation/admin-guide/kernel-parameters.txt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 15f79c27748d..b426ad42ab5b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -720,7 +720,8 @@ See also Documentation/input/joystick-parport.txt ddebug_query= [KNL,DYNAMIC_DEBUG] Enable debug messages at early boot - time. See Documentation/dynamic-debug-howto.txt for + time. See + Documentation/admin-guide/dynamic-debug-howto.rst for details. Deprecated, see dyndbg. debug [KNL] Enable kernel debugging (events log level). @@ -874,7 +875,8 @@ dyndbg[="val"] [KNL,DYNAMIC_DEBUG] module.dyndbg[="val"] Enable debug messages at boot time. See - Documentation/dynamic-debug-howto.txt for details. + Documentation/admin-guide/dynamic-debug-howto.rst + for details. nompx [X86] Disables Intel Memory Protection Extensions. See Documentation/x86/intel_mpx.txt for more -- cgit v1.2.3 From 7660a6fddcbae344de8583aa4092071312f110c3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 6 Jul 2017 15:36:40 -0700 Subject: mm: allow slab_nomerge to be set at build time Some hardened environments want to build kernels with slab_nomerge already set (so that they do not depend on remembering to set the kernel command line option). This is desired to reduce the risk of kernel heap overflows being able to overwrite objects from merged caches and changes the requirements for cache layout control, increasing the difficulty of these attacks. By keeping caches unmerged, these kinds of exploits can usually only damage objects in the same cache (though the risk to metadata exploitation is unchanged). Link: http://lkml.kernel.org/r/20170620230911.GA25238@beast Signed-off-by: Kees Cook Cc: Daniel Micay Cc: David Windsor Cc: Eric Biggers Cc: Christoph Lameter Cc: Jonathan Corbet Cc: Daniel Micay Cc: David Windsor Cc: Eric Biggers Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: "Rafael J. Wysocki" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Mauro Carvalho Chehab Cc: "Paul E. McKenney" Cc: Arnd Bergmann Cc: Andy Lutomirski Cc: Nicolas Pitre Cc: Tejun Heo Cc: Daniel Mack Cc: Sebastian Andrzej Siewior Cc: Sergey Senozhatsky Cc: Helge Deller Cc: Rik van Riel Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 10 ++++++++-- init/Kconfig | 14 ++++++++++++++ mm/slab_common.c | 5 ++--- 3 files changed, 24 insertions(+), 5 deletions(-) (limited to 'Documentation/admin-guide/kernel-parameters.txt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f24ee1c99412..34ae9663aefd 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3760,8 +3760,14 @@ slab_nomerge [MM] Disable merging of slabs with similar size. May be necessary if there is some reason to distinguish - allocs to different slabs. Debug options disable - merging on their own. + allocs to different slabs, especially in hardened + environments where the risk of heap overflows and + layout control by attackers can usually be + frustrated by disabling merging. This will reduce + most of the exposure of a heap attack to a single + cache (risks via metadata attacks are mostly + unchanged). Debug options disable merging on their + own. For more information see Documentation/vm/slub.txt. slab_max_order= [MM, SLAB] diff --git a/init/Kconfig b/init/Kconfig index b0fcbb2c6f56..8514b25db21c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1548,6 +1548,20 @@ config SLOB endchoice +config SLAB_MERGE_DEFAULT + bool "Allow slab caches to be merged" + default y + help + For reduced kernel memory fragmentation, slab caches can be + merged when they share the same size and other characteristics. + This carries a risk of kernel heap overflows being able to + overwrite objects from merged caches (and more easily control + cache layout), which makes such heap attacks easier to exploit + by attackers. By keeping caches unmerged, these kinds of exploits + can usually only damage objects in the same cache. To disable + merging at runtime, "slab_nomerge" can be passed on the kernel + command line. + config SLAB_FREELIST_RANDOM default n depends on SLAB || SLUB diff --git a/mm/slab_common.c b/mm/slab_common.c index 01a0fe2eb332..904a83be82de 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -47,13 +47,12 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work, /* * Merge control. If this is set then no merging of slab caches will occur. - * (Could be removed. This was introduced to pacify the merge skeptics.) */ -static int slab_nomerge; +static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT); static int __init setup_slab_nomerge(char *str) { - slab_nomerge = 1; + slab_nomerge = true; return 1; } -- cgit v1.2.3 From f70029bbaacbfa8f082d2b4988717cba4e269f17 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Jul 2017 15:41:02 -0700 Subject: mm, memory_hotplug: drop CONFIG_MOVABLE_NODE Commit 20b2f52b73fe ("numa: add CONFIG_MOVABLE_NODE for movable-dedicated node") has introduced CONFIG_MOVABLE_NODE without a good explanation on why it is actually useful. It makes a lot of sense to make movable node semantic opt in but we already have that because the feature has to be explicitly enabled on the kernel command line. A config option on top only makes the configuration space larger without a good reason. It also adds an additional ifdefery that pollutes the code. Just drop the config option and make it de-facto always enabled. This shouldn't introduce any change to the semantic. Link: http://lkml.kernel.org/r/20170529114141.536-3-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Reza Arbab Acked-by: Vlastimil Babka Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Jerome Glisse Cc: Yasuaki Ishimatsu Cc: Xishi Qiu Cc: Kani Toshimitsu Cc: Chen Yucong Cc: Joonsoo Kim Cc: Andi Kleen Cc: David Rientjes Cc: Daniel Kiper Cc: Igor Mammedov Cc: Vitaly Kuznetsov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 7 +++++-- drivers/base/node.c | 4 ---- include/linux/memblock.h | 18 ----------------- include/linux/nodemask.h | 4 ---- mm/Kconfig | 26 ------------------------- mm/memblock.c | 2 -- mm/memory_hotplug.c | 4 ---- mm/page_alloc.c | 2 -- 8 files changed, 5 insertions(+), 62 deletions(-) (limited to 'Documentation/admin-guide/kernel-parameters.txt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 34ae9663aefd..dd7abbea8188 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2303,8 +2303,11 @@ that the amount of memory usable for all allocations is not too small. - movable_node [KNL] Boot-time switch to enable the effects - of CONFIG_MOVABLE_NODE=y. See mm/Kconfig for details. + movable_node [KNL] Boot-time switch to make hotplugable memory + NUMA nodes to be movable. This means that the memory + of such nodes will be usable only for movable + allocations which rules out almost all kernel + allocations. Use with caution! MTD_Partition= [MTD] Format: ,,, diff --git a/drivers/base/node.c b/drivers/base/node.c index 6b1ee371ee97..73d39bc58c42 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -639,9 +639,7 @@ static struct node_attr node_state_attr[] = { #ifdef CONFIG_HIGHMEM [N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY), #endif -#ifdef CONFIG_MOVABLE_NODE [N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY), -#endif [N_CPU] = _NODE_ATTR(has_cpu, N_CPU), }; @@ -652,9 +650,7 @@ static struct attribute *node_state_attrs[] = { #ifdef CONFIG_HIGHMEM &node_state_attr[N_HIGH_MEMORY].attr.attr, #endif -#ifdef CONFIG_MOVABLE_NODE &node_state_attr[N_MEMORY].attr.attr, -#endif &node_state_attr[N_CPU].attr.attr, NULL }; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 8098695e5d8d..1199e605d676 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -57,10 +57,8 @@ struct memblock { extern struct memblock memblock; extern int memblock_debug; -#ifdef CONFIG_MOVABLE_NODE /* If movable_node boot option specified */ extern bool movable_node_enabled; -#endif /* CONFIG_MOVABLE_NODE */ #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK #define __init_memblock __meminit @@ -169,7 +167,6 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, i != (u64)ULLONG_MAX; \ __next_reserved_mem_region(&i, p_start, p_end)) -#ifdef CONFIG_MOVABLE_NODE static inline bool memblock_is_hotpluggable(struct memblock_region *m) { return m->flags & MEMBLOCK_HOTPLUG; @@ -179,16 +176,6 @@ static inline bool __init_memblock movable_node_is_enabled(void) { return movable_node_enabled; } -#else -static inline bool memblock_is_hotpluggable(struct memblock_region *m) -{ - return false; -} -static inline bool movable_node_is_enabled(void) -{ - return false; -} -#endif static inline bool memblock_is_mirror(struct memblock_region *m) { @@ -296,7 +283,6 @@ phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align); -#ifdef CONFIG_MOVABLE_NODE /* * Set the allocation direction to bottom-up or top-down. */ @@ -314,10 +300,6 @@ static inline bool memblock_bottom_up(void) { return memblock.bottom_up; } -#else -static inline void __init memblock_set_bottom_up(bool enable) {} -static inline bool memblock_bottom_up(void) { return false; } -#endif /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index f746e44d4046..cf0b91c3ec12 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -387,11 +387,7 @@ enum node_states { #else N_HIGH_MEMORY = N_NORMAL_MEMORY, #endif -#ifdef CONFIG_MOVABLE_NODE N_MEMORY, /* The node has memory(regular, high, movable) */ -#else - N_MEMORY = N_HIGH_MEMORY, -#endif N_CPU, /* The node has one or more cpus */ NR_NODE_STATES }; diff --git a/mm/Kconfig b/mm/Kconfig index 9870baafb096..857f6ef368d4 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -149,32 +149,6 @@ config NO_BOOTMEM config MEMORY_ISOLATION bool -config MOVABLE_NODE - bool "Enable to assign a node which has only movable memory" - depends on HAVE_MEMBLOCK - depends on NO_BOOTMEM - depends on X86_64 || OF_EARLY_FLATTREE || MEMORY_HOTPLUG - depends on NUMA - default n - help - Allow a node to have only movable memory. Pages used by the kernel, - such as direct mapping pages cannot be migrated. So the corresponding - memory device cannot be hotplugged. This option allows the following - two things: - - When the system is booting, node full of hotpluggable memory can - be arranged to have only movable memory so that the whole node can - be hot-removed. (need movable_node boot option specified). - - After the system is up, the option allows users to online all the - memory of a node as movable memory so that the whole node can be - hot-removed. - - Users who don't use the memory hotplug feature are fine with this - option on since they don't specify movable_node boot option or they - don't online memory as movable. - - Say Y here if you want to hotplug a whole node. - Say N here if you want kernel to use memory on all nodes evenly. - # # Only be set on architectures that have completely implemented memory hotplug # feature. If you are not sure, don't touch it. diff --git a/mm/memblock.c b/mm/memblock.c index 7b8a5db76a2f..41eaeebb03dc 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -54,9 +54,7 @@ struct memblock memblock __initdata_memblock = { }; int memblock_debug __initdata_memblock; -#ifdef CONFIG_MOVABLE_NODE bool movable_node_enabled __initdata_memblock = false; -#endif static bool system_has_some_mirror __initdata_memblock = false; static int memblock_can_resize __initdata_memblock; static int memblock_memory_in_slab __initdata_memblock = 0; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 937319899e61..0dc8cf0a59d7 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1572,11 +1572,7 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) static int __init cmdline_parse_movable_node(char *p) { -#ifdef CONFIG_MOVABLE_NODE movable_node_enabled = true; -#else - pr_warn("movable_node option not supported\n"); -#endif return 0; } early_param("movable_node", cmdline_parse_movable_node); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a35add8d7c0b..bd65b60939b6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -113,9 +113,7 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = { #ifdef CONFIG_HIGHMEM [N_HIGH_MEMORY] = { { [0] = 1UL } }, #endif -#ifdef CONFIG_MOVABLE_NODE [N_MEMORY] = { { [0] = 1UL } }, -#endif [N_CPU] = { { [0] = 1UL } }, #endif /* NUMA */ }; -- cgit v1.2.3