From e6e78b004fa7e0ab455d46d27f218bf6ce178a18 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Fri, 30 Aug 2019 12:36:29 -0400 Subject: rcuperf: Add kfree_rcu() performance Tests This test runs kfree_rcu() in a loop to measure performance of the new kfree_rcu() batching functionality. The following table shows results when booting with arguments: rcuperf.kfree_loops=20000 rcuperf.kfree_alloc_num=8000 rcuperf.kfree_rcu_test=1 rcuperf.kfree_no_batch=X rcuperf.kfree_no_batch=X # Grace Periods Test Duration (s) X=1 (old behavior) 9133 11.5 X=0 (new behavior) 1732 12.5 On a 16 CPU system with the above boot parameters, we see that the total number of grace periods that elapse during the test drops from 9133 when not batching to 1732 when batching (a 5X improvement). The kfree_rcu() flood itself slows down a bit when batching, though, as shown. Note that the active memory consumption during the kfree_rcu() flood does increase to around 200-250MB due to the batching (from around 50MB without batching). However, this memory consumption is relatively constant. In other words, the system is able to keep up with the kfree_rcu() load. The memory consumption comes down considerably if KFREE_DRAIN_JIFFIES is increased from HZ/50 to HZ/80. A later patch will reduce memory consumption further by using multiple lists. Also, when running the test, please disable CONFIG_DEBUG_PREEMPT and CONFIG_PROVE_RCU for realistic comparisons with/without batching. Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'Documentation/admin-guide') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ade4e6ec23e0..3ce270b56f3a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3978,6 +3978,23 @@ test until boot completes in order to avoid interference. + rcuperf.kfree_rcu_test= [KNL] + Set to measure performance of kfree_rcu() flooding. + + rcuperf.kfree_nthreads= [KNL] + The number of threads running loops of kfree_rcu(). + + rcuperf.kfree_alloc_num= [KNL] + Number of allocations and frees done in an iteration. + + rcuperf.kfree_loops= [KNL] + Number of loops doing rcuperf.kfree_alloc_num number + of allocations and frees. + + rcuperf.kfree_no_batch= [KNL] + Use the non-batching (less efficient) version of kfree_rcu(). + This is useful for comparing with the batched version. + rcuperf.nreaders= [KNL] Set number of RCU readers. The value -1 selects N, where N is the number of CPUs. A value -- cgit v1.2.3 From 189a6883dcf7fa70e17403ae4225c60ffc9e404b Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Fri, 30 Aug 2019 12:36:33 -0400 Subject: rcu: Remove kfree_call_rcu_nobatch() Now that the kfree_rcu() special-casing has been removed from tree RCU, this commit removes kfree_call_rcu_nobatch() since it is no longer needed. Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 4 ---- include/linux/rcutiny.h | 5 ----- include/linux/rcutree.h | 1 - kernel/rcu/rcuperf.c | 10 +--------- kernel/rcu/tree.c | 18 ++++-------------- 5 files changed, 5 insertions(+), 33 deletions(-) (limited to 'Documentation/admin-guide') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3ce270b56f3a..ed83d6d90cc3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3991,10 +3991,6 @@ Number of loops doing rcuperf.kfree_alloc_num number of allocations and frees. - rcuperf.kfree_no_batch= [KNL] - Use the non-batching (less efficient) version of kfree_rcu(). - This is useful for comparing with the batched version. - rcuperf.nreaders= [KNL] Set number of RCU readers. The value -1 selects N, where N is the number of CPUs. A value diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 1bd166aab6f3..b2b2dc990da9 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -39,11 +39,6 @@ static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func) call_rcu(head, func); } -static inline void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func) -{ - call_rcu(head, func); -} - void rcu_qs(void); static inline void rcu_softirq_qs(void) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 6a65d3a16dbd..2f787b9029d1 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -34,7 +34,6 @@ static inline void rcu_virt_note_context_switch(int cpu) void synchronize_rcu_expedited(void); void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func); -void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func); void rcu_barrier(void); bool rcu_eqs_special_set(int cpu); diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index c1e25fd10f2a..da94b89cd531 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -593,7 +593,6 @@ rcu_perf_shutdown(void *arg) torture_param(int, kfree_nthreads, -1, "Number of threads running loops of kfree_rcu()."); torture_param(int, kfree_alloc_num, 8000, "Number of allocations and frees done in an iteration."); torture_param(int, kfree_loops, 10, "Number of loops doing kfree_alloc_num allocations and frees."); -torture_param(int, kfree_no_batch, 0, "Use the non-batching (slower) version of kfree_rcu()."); static struct task_struct **kfree_reader_tasks; static int kfree_nrealthreads; @@ -632,14 +631,7 @@ kfree_perf_thread(void *arg) if (!alloc_ptr) return -ENOMEM; - if (!kfree_no_batch) { - kfree_rcu(alloc_ptr, rh); - } else { - rcu_callback_t cb; - - cb = (rcu_callback_t)(unsigned long)offsetof(struct kfree_obj, rh); - kfree_call_rcu_nobatch(&(alloc_ptr->rh), cb); - } + kfree_rcu(alloc_ptr, rh); } cond_resched(); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a8dd612098bf..31d2d9255d95 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2763,8 +2763,10 @@ static void kfree_rcu_work(struct work_struct *work) rcu_lock_acquire(&rcu_callback_map); trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset); - /* Could be possible to optimize with kfree_bulk in future */ - kfree((void *)head - offset); + if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset))) { + /* Could be optimized with kfree_bulk() in future. */ + kfree((void *)head - offset); + } rcu_lock_release(&rcu_callback_map); cond_resched_tasks_rcu_qs(); @@ -2835,16 +2837,6 @@ static void kfree_rcu_monitor(struct work_struct *work) spin_unlock_irqrestore(&krcp->lock, flags); } -/* - * This version of kfree_call_rcu does not do batching of kfree_rcu() requests. - * Used only by rcuperf torture test for comparison with kfree_rcu_batch(). - */ -void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func) -{ - __call_rcu(head, func); -} -EXPORT_SYMBOL_GPL(kfree_call_rcu_nobatch); - /* * Queue a request for lazy invocation of kfree() after a grace period. * @@ -2864,8 +2856,6 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func) unsigned long flags; struct kfree_rcu_cpu *krcp; - head->func = func; - local_irq_save(flags); // For safely calling this_cpu_ptr(). krcp = this_cpu_ptr(&krc); if (krcp->initialized) -- cgit v1.2.3