tracing: Do not busy wait in buffer splice

On a !PREEMPT kernel, attempting to use trace-cmd results in a soft lockup: # trace-cmd record -e raw_syscalls:* -F false NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [trace-cmd:61] ... Call Trace: [<ffffffff8105b580>] ? __wake_up_common+0x90/0x90 [<ffffffff81092e25>] wait_on_pipe+0x35/0x40 [<ffffffff810936e3>] tracing_buffers_splice_read+0x2e3/0x3c0 [<ffffffff81093300>] ? tracing_stats_read+0x2a0/0x2a0 [<ffffffff812d10ab>] ? _raw_spin_unlock+0x2b/0x40 [<ffffffff810dc87b>] ? do_read_fault+0x21b/0x290 [<ffffffff810de56a>] ? handle_mm_fault+0x2ba/0xbd0 [<ffffffff81095c80>] ? trace_event_buffer_lock_reserve+0x40/0x80 [<ffffffff810951e2>] ? trace_buffer_lock_reserve+0x22/0x60 [<ffffffff81095c80>] ? trace_event_buffer_lock_reserve+0x40/0x80 [<ffffffff8112415d>] do_splice_to+0x6d/0x90 [<ffffffff81126971>] SyS_splice+0x7c1/0x800 [<ffffffff812d1edd>] tracesys_phase2+0xd3/0xd8 The problem is this: tracing_buffers_splice_read() calls ring_buffer_wait() to wait for data in the ring buffers. The buffers are not empty so ring_buffer_wait() returns immediately. But tracing_buffers_splice_read() calls ring_buffer_read_page() with full=1, meaning it only wants to read a full page. When the full page is not available, tracing_buffers_splice_read() tries to wait again with ring_buffer_wait(), which again returns immediately, and so on. Fix this by adding a "full" argument to ring_buffer_wait() which will make ring_buffer_wait() wait until the writer has left the reader's page, i.e. until full-page reads will succeed. Link: http://lkml.kernel.org/r/1415645194-25379-1-git-send-email-rabin@rab.in Cc: stable@vger.kernel.org # 3.16+ Fixes: b1169cc69ba9 ("tracing: Remove mock up poll wait function") Signed-off-by: Rabin Vincent <rabin@rab.in> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
author: Rabin Vincent <rabin@rab.in> 2014-11-10 19:46:34 +0100
committer: Steven Rostedt <rostedt@goodmis.org> 2014-11-10 22:45:43 +0100
commit: e30f53aad2202b5526c40c36d8eeac8bf290bde5 (patch)
tree: f9b21ea9ccbe93b781d8426af09cbd472cbdfef8 /kernel
parent: Linux 3.18-rc4 (diff)
download: linux-e30f53aad2202b5526c40c36d8eeac8bf290bde5.tar.xz
linux-e30f53aad2202b5526c40c36d8eeac8bf290bde5.zip
2 files changed, 61 insertions, 43 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2d75c94ae87d..a56e07c8d15b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -538,16 +538,18 @@ static void rb_wake_up_waiters(struct irq_work *work)
  * ring_buffer_wait - wait for input to the ring buffer
  * @buffer: buffer to wait on
  * @cpu: the cpu buffer to wait on
+ * @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS
  *
  * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
  * as data is added to any of the @buffer's cpu buffers. Otherwise
  * it will wait for data to be added to a specific cpu buffer.
  */
-int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
 {
-	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
 	DEFINE_WAIT(wait);
 	struct rb_irq_work *work;
+	int ret = 0;
 
 	/*
 	 * Depending on what the caller is waiting for, either any
@@ -564,36 +566,61 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
 	}
 
 
-	prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
+	while (true) {
+		prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
 
-	/*
-	 * The events can happen in critical sections where
-	 * checking a work queue can cause deadlocks.
-	 * After adding a task to the queue, this flag is set
-	 * only to notify events to try to wake up the queue
-	 * using irq_work.
-	 *
-	 * We don't clear it even if the buffer is no longer
-	 * empty. The flag only causes the next event to run
-	 * irq_work to do the work queue wake up. The worse
-	 * that can happen if we race with !trace_empty() is that
-	 * an event will cause an irq_work to try to wake up
-	 * an empty queue.
-	 *
-	 * There's no reason to protect this flag either, as
-	 * the work queue and irq_work logic will do the necessary
-	 * synchronization for the wake ups. The only thing
-	 * that is necessary is that the wake up happens after
-	 * a task has been queued. It's OK for spurious wake ups.
-	 */
-	work->waiters_pending = true;
+		/*
+		 * The events can happen in critical sections where
+		 * checking a work queue can cause deadlocks.
+		 * After adding a task to the queue, this flag is set
+		 * only to notify events to try to wake up the queue
+		 * using irq_work.
+		 *
+		 * We don't clear it even if the buffer is no longer
+		 * empty. The flag only causes the next event to run
+		 * irq_work to do the work queue wake up. The worse
+		 * that can happen if we race with !trace_empty() is that
+		 * an event will cause an irq_work to try to wake up
+		 * an empty queue.
+		 *
+		 * There's no reason to protect this flag either, as
+		 * the work queue and irq_work logic will do the necessary
+		 * synchronization for the wake ups. The only thing
+		 * that is necessary is that the wake up happens after
+		 * a task has been queued. It's OK for spurious wake ups.
+		 */
+		work->waiters_pending = true;
+
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+
+		if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
+			break;
+
+		if (cpu != RING_BUFFER_ALL_CPUS &&
+		    !ring_buffer_empty_cpu(buffer, cpu)) {
+			unsigned long flags;
+			bool pagebusy;
+
+			if (!full)
+				break;
+
+			raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+			pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
+			raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+			if (!pagebusy)
+				break;
+		}
 
-	if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) ||
-	    (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu)))
 		schedule();
+	}
 
 	finish_wait(&work->waiters, &wait);
-	return 0;
+
+	return ret;
 }
 
 /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8a528392b1f4..15209335888d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1076,13 +1076,14 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 }
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
-static int wait_on_pipe(struct trace_iterator *iter)
+static int wait_on_pipe(struct trace_iterator *iter, bool full)
 {
 	/* Iterators are static, they should be filled or empty */
 	if (trace_buffer_iter(iter, iter->cpu_file))
 		return 0;
 
-	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
+	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
+				full);
 }
 
 #ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -4434,15 +4435,12 @@ static int tracing_wait_pipe(struct file *filp)
 
 		mutex_unlock(&iter->mutex);
 
-		ret = wait_on_pipe(iter);
+		ret = wait_on_pipe(iter, false);
 
 		mutex_lock(&iter->mutex);
 
 		if (ret)
 			return ret;
-
-		if (signal_pending(current))
-			return -EINTR;
 	}
 
 	return 1;
@@ -5372,16 +5370,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 				goto out_unlock;
 			}
 			mutex_unlock(&trace_types_lock);
-			ret = wait_on_pipe(iter);
+			ret = wait_on_pipe(iter, false);
 			mutex_lock(&trace_types_lock);
 			if (ret) {
 				size = ret;
 				goto out_unlock;
 			}
-			if (signal_pending(current)) {
-				size = -EINTR;
-				goto out_unlock;
-			}
 			goto again;
 		}
 		size = 0;
@@ -5587,14 +5581,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 			goto out;
 		}
 		mutex_unlock(&trace_types_lock);
-		ret = wait_on_pipe(iter);
+		ret = wait_on_pipe(iter, true);
 		mutex_lock(&trace_types_lock);
 		if (ret)
 			goto out;
-		if (signal_pending(current)) {
-			ret = -EINTR;
-			goto out;
-		}
+
 		goto again;
 	}
author	Rabin Vincent <rabin@rab.in>	2014-11-10 19:46:34 +0100
committer	Steven Rostedt <rostedt@goodmis.org>	2014-11-10 22:45:43 +0100
commit	e30f53aad2202b5526c40c36d8eeac8bf290bde5 (patch)
tree	f9b21ea9ccbe93b781d8426af09cbd472cbdfef8 /kernel
parent	Linux 3.18-rc4 (diff)
download	linux-e30f53aad2202b5526c40c36d8eeac8bf290bde5.tar.xz linux-e30f53aad2202b5526c40c36d8eeac8bf290bde5.zip