summaryrefslogtreecommitdiffstats
path: root/include/trace/events/sched.h
diff options
context:
space:
mode:
authorValentin Schneider <valentin.schneider@arm.com>2022-01-20 17:25:19 +0100
committerPeter Zijlstra <peterz@infradead.org>2022-03-01 16:18:39 +0100
commitfa2c3254d7cfff5f7a916ab928a562d1165f17bb (patch)
tree678cc10a62564212f526fc4a65ea345fde95794e /include/trace/events/sched.h
parentsched/rt: Plug rt_mutex_setprio() vs push_rt_task() race (diff)
downloadlinux-fa2c3254d7cfff5f7a916ab928a562d1165f17bb.tar.xz
linux-fa2c3254d7cfff5f7a916ab928a562d1165f17bb.zip
sched/tracing: Don't re-read p->state when emitting sched_switch event
As of commit c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") the following sequence becomes possible: p->__state = TASK_INTERRUPTIBLE; __schedule() deactivate_task(p); ttwu() READ !p->on_rq p->__state=TASK_WAKING trace_sched_switch() __trace_sched_switch_state() task_state_index() return 0; TASK_WAKING isn't in TASK_REPORT, so the task appears as TASK_RUNNING in the trace event. Prevent this by pushing the value read from __schedule() down the trace event. Reported-by: Abhijeet Dharmapurikar <adharmap@quicinc.com> Signed-off-by: Valentin Schneider <valentin.schneider@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org> Link: https://lore.kernel.org/r/20220120162520.570782-2-valentin.schneider@arm.com
Diffstat (limited to 'include/trace/events/sched.h')
-rw-r--r--include/trace/events/sched.h11
1 files changed, 7 insertions, 4 deletions
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 94640482cfe7..65e786756321 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -187,7 +187,9 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
TP_ARGS(p));
#ifdef CREATE_TRACE_POINTS
-static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
+static inline long __trace_sched_switch_state(bool preempt,
+ unsigned int prev_state,
+ struct task_struct *p)
{
unsigned int state;
@@ -208,7 +210,7 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
* it for left shift operation to get the correct task->state
* mapping.
*/
- state = task_state_index(p);
+ state = __task_state_index(prev_state, p->exit_state);
return state ? (1 << (state - 1)) : state;
}
@@ -220,10 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
TRACE_EVENT(sched_switch,
TP_PROTO(bool preempt,
+ unsigned int prev_state,
struct task_struct *prev,
struct task_struct *next),
- TP_ARGS(preempt, prev, next),
+ TP_ARGS(preempt, prev_state, prev, next),
TP_STRUCT__entry(
__array( char, prev_comm, TASK_COMM_LEN )
@@ -239,7 +242,7 @@ TRACE_EVENT(sched_switch,
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
__entry->prev_pid = prev->pid;
__entry->prev_prio = prev->prio;
- __entry->prev_state = __trace_sched_switch_state(preempt, prev);
+ __entry->prev_state = __trace_sched_switch_state(preempt, prev_state, prev);
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
__entry->next_pid = next->pid;
__entry->next_prio = next->prio;