summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2009-09-11 01:09:23 +0200
committerFrederic Weisbecker <fweisbec@gmail.com>2009-09-11 01:09:23 +0200
commit8f8ffe2485bcaa890800681451d380779cea06af (patch)
tree1d2ef3a27f1cab9a2b9014f4b75886a96a1ae8db /kernel
parentx86: Remove unused config macros from instruction decoder selftest (diff)
parentMerge branch 'tracing/core' of git://git.kernel.org/pub/scm/linux/kernel/git/... (diff)
downloadlinux-8f8ffe2485bcaa890800681451d380779cea06af.tar.xz
linux-8f8ffe2485bcaa890800681451d380779cea06af.zip
Merge commit 'tracing/core' into tracing/kprobes
Conflicts: kernel/trace/trace_export.c kernel/trace/trace_kprobe.c Merge reason: This topic branch lacks an important build fix in tracing/core: 0dd7b74787eaf7858c6c573353a83c3e2766e674: tracing: Fix double CPP substitution in TRACE_EVENT_FN that prevents from multiple tracepoint headers inclusion crashes. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c21
-rw-r--r--kernel/futex.c28
-rw-r--r--kernel/futex_compat.c6
-rw-r--r--kernel/irq/manage.c27
-rw-r--r--kernel/module.c10
-rw-r--r--kernel/perf_counter.c356
-rw-r--r--kernel/sysctl.c7
-rw-r--r--kernel/time/clockevents.c16
-rw-r--r--kernel/time/tick-broadcast.c7
-rw-r--r--kernel/time/timer_list.c2
-rw-r--r--kernel/trace/Kconfig9
-rw-r--r--kernel/trace/blktrace.c24
-rw-r--r--kernel/trace/ftrace.c17
-rw-r--r--kernel/trace/kmemtrace.c4
-rw-r--r--kernel/trace/ring_buffer.c172
-rw-r--r--kernel/trace/trace.c387
-rw-r--r--kernel/trace/trace.h28
-rw-r--r--kernel/trace/trace_boot.c16
-rw-r--r--kernel/trace/trace_events.c6
-rw-r--r--kernel/trace/trace_events_filter.c51
-rw-r--r--kernel/trace/trace_export.c4
-rw-r--r--kernel/trace/trace_functions_graph.c14
-rw-r--r--kernel/trace/trace_irqsoff.c3
-rw-r--r--kernel/trace/trace_kprobe.c16
-rw-r--r--kernel/trace/trace_mmiotrace.c10
-rw-r--r--kernel/trace/trace_power.c22
-rw-r--r--kernel/trace/trace_sched_switch.c18
-rw-r--r--kernel/trace/trace_sched_wakeup.c7
-rw-r--r--kernel/trace/trace_syscalls.c46
-rw-r--r--kernel/tracepoint.c4
-rw-r--r--kernel/wait.c5
31 files changed, 874 insertions, 469 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 021e1138556e..e6c04d462ab2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -426,7 +426,6 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
init_rwsem(&mm->mmap_sem);
INIT_LIST_HEAD(&mm->mmlist);
mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
- mm->oom_adj = (current->mm) ? current->mm->oom_adj : 0;
mm->core_state = NULL;
mm->nr_ptes = 0;
set_mm_counter(mm, file_rss, 0);
@@ -816,11 +815,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
{
struct signal_struct *sig;
- if (clone_flags & CLONE_THREAD) {
- atomic_inc(&current->signal->count);
- atomic_inc(&current->signal->live);
+ if (clone_flags & CLONE_THREAD)
return 0;
- }
sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
tsk->signal = sig;
@@ -878,16 +874,6 @@ void __cleanup_signal(struct signal_struct *sig)
kmem_cache_free(signal_cachep, sig);
}
-static void cleanup_signal(struct task_struct *tsk)
-{
- struct signal_struct *sig = tsk->signal;
-
- atomic_dec(&sig->live);
-
- if (atomic_dec_and_test(&sig->count))
- __cleanup_signal(sig);
-}
-
static void copy_flags(unsigned long clone_flags, struct task_struct *p)
{
unsigned long new_flags = p->flags;
@@ -1240,6 +1226,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}
if (clone_flags & CLONE_THREAD) {
+ atomic_inc(&current->signal->count);
+ atomic_inc(&current->signal->live);
p->group_leader = current->group_leader;
list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
}
@@ -1283,7 +1271,8 @@ bad_fork_cleanup_mm:
if (p->mm)
mmput(p->mm);
bad_fork_cleanup_signal:
- cleanup_signal(p);
+ if (!(clone_flags & CLONE_THREAD))
+ __cleanup_signal(p->signal);
bad_fork_cleanup_sighand:
__cleanup_sighand(p->sighand);
bad_fork_cleanup_fs:
diff --git a/kernel/futex.c b/kernel/futex.c
index 0672ff88f159..e18cfbdc7190 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1010,15 +1010,19 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
* requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
* q: the futex_q
* key: the key of the requeue target futex
+ * hb: the hash_bucket of the requeue target futex
*
* During futex_requeue, with requeue_pi=1, it is possible to acquire the
* target futex if it is uncontended or via a lock steal. Set the futex_q key
* to the requeue target futex so the waiter can detect the wakeup on the right
* futex, but remove it from the hb and NULL the rt_waiter so it can detect
- * atomic lock acquisition. Must be called with the q->lock_ptr held.
+ * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
+ * to protect access to the pi_state to fixup the owner later. Must be called
+ * with both q->lock_ptr and hb->lock held.
*/
static inline
-void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
+void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
+ struct futex_hash_bucket *hb)
{
drop_futex_key_refs(&q->key);
get_futex_key_refs(key);
@@ -1030,6 +1034,11 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
WARN_ON(!q->rt_waiter);
q->rt_waiter = NULL;
+ q->lock_ptr = &hb->lock;
+#ifdef CONFIG_DEBUG_PI_LIST
+ q->list.plist.lock = &hb->lock;
+#endif
+
wake_up_state(q->task, TASK_NORMAL);
}
@@ -1088,7 +1097,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
set_waiters);
if (ret == 1)
- requeue_pi_wake_futex(top_waiter, key2);
+ requeue_pi_wake_futex(top_waiter, key2, hb2);
return ret;
}
@@ -1247,8 +1256,15 @@ retry_private:
if (!match_futex(&this->key, &key1))
continue;
- WARN_ON(!requeue_pi && this->rt_waiter);
- WARN_ON(requeue_pi && !this->rt_waiter);
+ /*
+ * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
+ * be paired with each other and no other futex ops.
+ */
+ if ((requeue_pi && !this->rt_waiter) ||
+ (!requeue_pi && this->rt_waiter)) {
+ ret = -EINVAL;
+ break;
+ }
/*
* Wake nr_wake waiters. For requeue_pi, if we acquired the
@@ -1273,7 +1289,7 @@ retry_private:
this->task, 1);
if (ret == 1) {
/* We got the lock. */
- requeue_pi_wake_futex(this, &key2);
+ requeue_pi_wake_futex(this, &key2, hb2);
continue;
} else if (ret) {
/* -EDEADLK */
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index d607a5b9ee29..235716556bf1 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -180,7 +180,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
int cmd = op & FUTEX_CMD_MASK;
if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- cmd == FUTEX_WAIT_BITSET)) {
+ cmd == FUTEX_WAIT_BITSET ||
+ cmd == FUTEX_WAIT_REQUEUE_PI)) {
if (get_compat_timespec(&ts, utime))
return -EFAULT;
if (!timespec_valid(&ts))
@@ -191,7 +192,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
t = ktime_add_safe(ktime_get(), t);
tp = &t;
}
- if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE)
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+ cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
val2 = (int) (unsigned long) utime;
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 61c679db4687..0ec9ed831737 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -607,7 +607,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
*/
get_task_struct(t);
new->thread = t;
- wake_up_process(t);
}
/*
@@ -690,6 +689,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
(int)(new->flags & IRQF_TRIGGER_MASK));
}
+ new->irq = irq;
*old_ptr = new;
/* Reset broken irq detection when installing new handler */
@@ -707,7 +707,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
spin_unlock_irqrestore(&desc->lock, flags);
- new->irq = irq;
+ /*
+ * Strictly no need to wake it up, but hung_task complains
+ * when no hard interrupt wakes the thread up.
+ */
+ if (new->thread)
+ wake_up_process(new->thread);
+
register_irq_proc(irq, desc);
new->dir = NULL;
register_handler_proc(irq, new);
@@ -761,7 +767,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action, **action_ptr;
- struct task_struct *irqthread;
unsigned long flags;
WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@ -809,9 +814,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
desc->chip->disable(irq);
}
- irqthread = action->thread;
- action->thread = NULL;
-
spin_unlock_irqrestore(&desc->lock, flags);
unregister_handler_proc(irq, action);
@@ -819,12 +821,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
/* Make sure it's not being used on another CPU: */
synchronize_irq(irq);
- if (irqthread) {
- if (!test_bit(IRQTF_DIED, &action->thread_flags))
- kthread_stop(irqthread);
- put_task_struct(irqthread);
- }
-
#ifdef CONFIG_DEBUG_SHIRQ
/*
* It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@ -840,6 +836,13 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
local_irq_restore(flags);
}
#endif
+
+ if (action->thread) {
+ if (!test_bit(IRQTF_DIED, &action->thread_flags))
+ kthread_stop(action->thread);
+ put_task_struct(action->thread);
+ }
+
return action;
}
diff --git a/kernel/module.c b/kernel/module.c
index b1821438694e..46580edff0cb 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -914,16 +914,18 @@ void __symbol_put(const char *symbol)
}
EXPORT_SYMBOL(__symbol_put);
+/* Note this assumes addr is a function, which it currently always is. */
void symbol_put_addr(void *addr)
{
struct module *modaddr;
+ unsigned long a = (unsigned long)dereference_function_descriptor(addr);
- if (core_kernel_text((unsigned long)addr))
+ if (core_kernel_text(a))
return;
/* module_text_address is safe here: we're supposed to have reference
* to module from symbol_get, so it can't go away. */
- modaddr = __module_text_address((unsigned long)addr);
+ modaddr = __module_text_address(a);
BUG_ON(!modaddr);
module_put(modaddr);
}
@@ -1279,6 +1281,10 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
struct module_notes_attrs *notes_attrs;
struct bin_attribute *nattr;
+ /* failed to create section attributes, so can't create notes */
+ if (!mod->sect_attrs)
+ return;
+
/* Count notes sections and allocate structures. */
notes = 0;
for (i = 0; i < nsect; i++)
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index b0b20a07f394..d7cbc579fc80 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -50,7 +50,7 @@ static atomic_t nr_task_counters __read_mostly;
* 1 - disallow cpu counters to unpriv
* 2 - disallow kernel profiling to unpriv
*/
-int sysctl_perf_counter_paranoid __read_mostly;
+int sysctl_perf_counter_paranoid __read_mostly = 1;
static inline bool perf_paranoid_cpu(void)
{
@@ -88,6 +88,7 @@ void __weak hw_perf_disable(void) { barrier(); }
void __weak hw_perf_enable(void) { barrier(); }
void __weak hw_perf_counter_setup(int cpu) { barrier(); }
+void __weak hw_perf_counter_setup_online(int cpu) { barrier(); }
int __weak
hw_perf_group_sched_in(struct perf_counter *group_leader,
@@ -306,6 +307,10 @@ counter_sched_out(struct perf_counter *counter,
return;
counter->state = PERF_COUNTER_STATE_INACTIVE;
+ if (counter->pending_disable) {
+ counter->pending_disable = 0;
+ counter->state = PERF_COUNTER_STATE_OFF;
+ }
counter->tstamp_stopped = ctx->time;
counter->pmu->disable(counter);
counter->oncpu = -1;
@@ -1498,10 +1503,21 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
*/
static void __perf_counter_read(void *info)
{
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_counter *counter = info;
struct perf_counter_context *ctx = counter->ctx;
unsigned long flags;
+ /*
+ * If this is a task context, we need to check whether it is
+ * the current task context of this cpu. If not it has been
+ * scheduled out before the smp call arrived. In that case
+ * counter->count would have been updated to a recent sample
+ * when the counter was scheduled out.
+ */
+ if (ctx->task && cpuctx->task_ctx != ctx)
+ return;
+
local_irq_save(flags);
if (ctx->is_active)
update_context_time(ctx);
@@ -1691,7 +1707,32 @@ static int perf_release(struct inode *inode, struct file *file)
return 0;
}
-static u64 perf_counter_read_tree(struct perf_counter *counter)
+static int perf_counter_read_size(struct perf_counter *counter)
+{
+ int entry = sizeof(u64); /* value */
+ int size = 0;
+ int nr = 1;
+
+ if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ size += sizeof(u64);
+
+ if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ size += sizeof(u64);
+
+ if (counter->attr.read_format & PERF_FORMAT_ID)
+ entry += sizeof(u64);
+
+ if (counter->attr.read_format & PERF_FORMAT_GROUP) {
+ nr += counter->group_leader->nr_siblings;
+ size += sizeof(u64);
+ }
+
+ size += entry * nr;
+
+ return size;
+}
+
+static u64 perf_counter_read_value(struct perf_counter *counter)
{
struct perf_counter *child;
u64 total = 0;
@@ -1703,14 +1744,96 @@ static u64 perf_counter_read_tree(struct perf_counter *counter)
return total;
}
+static int perf_counter_read_entry(struct perf_counter *counter,
+ u64 read_format, char __user *buf)
+{
+ int n = 0, count = 0;
+ u64 values[2];
+
+ values[n++] = perf_counter_read_value(counter);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(counter);
+
+ count = n * sizeof(u64);
+
+ if (copy_to_user(buf, values, count))
+ return -EFAULT;
+
+ return count;
+}
+
+static int perf_counter_read_group(struct perf_counter *counter,
+ u64 read_format, char __user *buf)
+{
+ struct perf_counter *leader = counter->group_leader, *sub;
+ int n = 0, size = 0, err = -EFAULT;
+ u64 values[3];
+
+ values[n++] = 1 + leader->nr_siblings;
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ values[n++] = leader->total_time_enabled +
+ atomic64_read(&leader->child_total_time_enabled);
+ }
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ values[n++] = leader->total_time_running +
+ atomic64_read(&leader->child_total_time_running);
+ }
+
+ size = n * sizeof(u64);
+
+ if (copy_to_user(buf, values, size))
+ return -EFAULT;
+
+ err = perf_counter_read_entry(leader, read_format, buf + size);
+ if (err < 0)
+ return err;
+
+ size += err;
+
+ list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+ err = perf_counter_read_entry(sub, read_format,
+ buf + size);
+ if (err < 0)
+ return err;
+
+ size += err;
+ }
+
+ return size;
+}
+
+static int perf_counter_read_one(struct perf_counter *counter,
+ u64 read_format, char __user *buf)
+{
+ u64 values[4];
+ int n = 0;
+
+ values[n++] = perf_counter_read_value(counter);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ values[n++] = counter->total_time_enabled +
+ atomic64_read(&counter->child_total_time_enabled);
+ }
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ values[n++] = counter->total_time_running +
+ atomic64_read(&counter->child_total_time_running);
+ }
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(counter);
+
+ if (copy_to_user(buf, values, n * sizeof(u64)))
+ return -EFAULT;
+
+ return n * sizeof(u64);
+}
+
/*
* Read the performance counter - simple non blocking version for now
*/
static ssize_t
perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
{
- u64 values[4];
- int n;
+ u64 read_format = counter->attr.read_format;
+ int ret;
/*
* Return end-of-file for a read on a counter that is in
@@ -1720,28 +1843,18 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
if (counter->state == PERF_COUNTER_STATE_ERROR)
return 0;
+ if (count < perf_counter_read_size(counter))
+ return -ENOSPC;
+
WARN_ON_ONCE(counter->ctx->parent_ctx);
mutex_lock(&counter->child_mutex);
- values[0] = perf_counter_read_tree(counter);
- n = 1;
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
- values[n++] = counter->total_time_enabled +
- atomic64_read(&counter->child_total_time_enabled);
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
- values[n++] = counter->total_time_running +
- atomic64_read(&counter->child_total_time_running);
- if (counter->attr.read_format & PERF_FORMAT_ID)
- values[n++] = primary_counter_id(counter);
+ if (read_format & PERF_FORMAT_GROUP)
+ ret = perf_counter_read_group(counter, read_format, buf);
+ else
+ ret = perf_counter_read_one(counter, read_format, buf);
mutex_unlock(&counter->child_mutex);
- if (count < n * sizeof(u64))
- return -EINVAL;
- count = n * sizeof(u64);
-
- if (copy_to_user(buf, values, count))
- return -EFAULT;
-
- return count;
+ return ret;
}
static ssize_t
@@ -1906,6 +2019,10 @@ int perf_counter_task_disable(void)
return 0;
}
+#ifndef PERF_COUNTER_INDEX_OFFSET
+# define PERF_COUNTER_INDEX_OFFSET 0
+#endif
+
static int perf_counter_index(struct perf_counter *counter)
{
if (counter->state != PERF_COUNTER_STATE_ACTIVE)
@@ -2245,7 +2362,7 @@ static void perf_pending_counter(struct perf_pending_entry *entry)
if (counter->pending_disable) {
counter->pending_disable = 0;
- perf_counter_disable(counter);
+ __perf_counter_disable(counter);
}
if (counter->pending_wakeup) {
@@ -2630,7 +2747,80 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
return task_pid_nr_ns(p, counter->ns);
}
-static void perf_counter_output(struct perf_counter *counter, int nmi,
+static void perf_output_read_one(struct perf_output_handle *handle,
+ struct perf_counter *counter)
+{
+ u64 read_format = counter->attr.read_format;
+ u64 values[4];
+ int n = 0;
+
+ values[n++] = atomic64_read(&counter->count);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ values[n++] = counter->total_time_enabled +
+ atomic64_read(&counter->child_total_time_enabled);
+ }
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ values[n++] = counter->total_time_running +
+ atomic64_read(&counter->child_total_time_running);
+ }
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(counter);
+
+ perf_output_copy(handle, values, n * sizeof(u64));
+}
+
+/*
+ * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult.
+ */
+static void perf_output_read_group(struct perf_output_handle *handle,
+ struct perf_counter *counter)
+{
+ struct perf_counter *leader = counter->group_leader, *sub;
+ u64 read_format = counter->attr.read_format;
+ u64 values[5];
+ int n = 0;
+
+ values[n++] = 1 + leader->nr_siblings;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ values[n++] = leader->total_time_enabled;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ values[n++] = leader->total_time_running;
+
+ if (leader != counter)
+ leader->pmu->read(leader);
+
+ values[n++] = atomic64_read(&leader->count);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(leader);
+
+ perf_output_copy(handle, values, n * sizeof(u64));
+
+ list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+ n = 0;
+
+ if (sub != counter)
+ sub->pmu->read(sub);
+
+ values[n++] = atomic64_read(&sub->count);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(sub);
+
+ perf_output_copy(handle, values, n * sizeof(u64));
+ }
+}
+
+static void perf_output_read(struct perf_output_handle *handle,
+ struct perf_counter *counter)
+{
+ if (counter->attr.read_format & PERF_FORMAT_GROUP)
+ perf_output_read_group(handle, counter);
+ else
+ perf_output_read_one(handle, counter);
+}
+
+void perf_counter_output(struct perf_counter *counter, int nmi,
struct perf_sample_data *data)
{
int ret;
@@ -2641,10 +2831,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
struct {
u32 pid, tid;
} tid_entry;
- struct {
- u64 id;
- u64 counter;
- } group_entry;
struct perf_callchain_entry *callchain = NULL;
int callchain_size = 0;
u64 time;
@@ -2699,10 +2885,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
if (sample_type & PERF_SAMPLE_PERIOD)
header.size += sizeof(u64);
- if (sample_type & PERF_SAMPLE_GROUP) {
- header.size += sizeof(u64) +
- counter->nr_siblings * sizeof(group_entry);
- }
+ if (sample_type & PERF_SAMPLE_READ)
+ header.size += perf_counter_read_size(counter);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
callchain = perf_callchain(data->regs);
@@ -2759,26 +2943,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
if (sample_type & PERF_SAMPLE_PERIOD)
perf_output_put(&handle, data->period);
- /*
- * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
- */
- if (sample_type & PERF_SAMPLE_GROUP) {
- struct perf_counter *leader, *sub;
- u64 nr = counter->nr_siblings;
-
- perf_output_put(&handle, nr);
-
- leader = counter->group_leader;
- list_for_each_entry(sub, &leader->sibling_list, list_entry) {
- if (sub != counter)
- sub->pmu->read(sub);
-
- group_entry.id = primary_counter_id(sub);
- group_entry.counter = atomic64_read(&sub->count);
-
- perf_output_put(&handle, group_entry);
- }
- }
+ if (sample_type & PERF_SAMPLE_READ)
+ perf_output_read(&handle, counter);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
if (callchain)
@@ -2817,8 +2983,6 @@ struct perf_read_event {
u32 pid;
u32 tid;
- u64 value;
- u64 format[3];
};
static void
@@ -2830,34 +2994,20 @@ perf_counter_read_event(struct perf_counter *counter,
.header = {
.type = PERF_EVENT_READ,
.misc = 0,
- .size = sizeof(event) - sizeof(event.format),
+ .size = sizeof(event) + perf_counter_read_size(counter),
},
.pid = perf_counter_pid(counter, task),
.tid = perf_counter_tid(counter, task),
- .value = atomic64_read(&counter->count),
};
- int ret, i = 0;
-
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
- event.header.size += sizeof(u64);
- event.format[i++] = counter->total_time_enabled;
- }
-
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
- event.header.size += sizeof(u64);
- event.format[i++] = counter->total_time_running;
- }
-
- if (counter->attr.read_format & PERF_FORMAT_ID) {
- event.header.size += sizeof(u64);
- event.format[i++] = primary_counter_id(counter);
- }
+ int ret;
ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
if (ret)
return;
- perf_output_copy(&handle, &event, event.header.size);
+ perf_output_put(&handle, event);
+ perf_output_read(&handle, counter);
+
perf_output_end(&handle);
}
@@ -2893,10 +3043,10 @@ static void perf_counter_task_output(struct perf_counter *counter,
return;
task_event->event.pid = perf_counter_pid(counter, task);
- task_event->event.ppid = perf_counter_pid(counter, task->real_parent);
+ task_event->event.ppid = perf_counter_pid(counter, current);
task_event->event.tid = perf_counter_tid(counter, task);
- task_event->event.ptid = perf_counter_tid(counter, task->real_parent);
+ task_event->event.ptid = perf_counter_tid(counter, current);
perf_output_put(&handle, task_event->event);
perf_output_end(&handle);
@@ -3443,40 +3593,32 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
static int perf_swcounter_is_counting(struct perf_counter *counter)
{
- struct perf_counter_context *ctx;
- unsigned long flags;
- int count;
-
+ /*
+ * The counter is active, we're good!
+ */
if (counter->state == PERF_COUNTER_STATE_ACTIVE)
return 1;
+ /*
+ * The counter is off/error, not counting.
+ */
if (counter->state != PERF_COUNTER_STATE_INACTIVE)
return 0;
/*
- * If the counter is inactive, it could be just because
- * its task is scheduled out, or because it's in a group
- * which could not go on the PMU. We want to count in
- * the first case but not the second. If the context is
- * currently active then an inactive software counter must
- * be the second case. If it's not currently active then
- * we need to know whether the counter was active when the
- * context was last active, which we can determine by
- * comparing counter->tstamp_stopped with ctx->time.
- *
- * We are within an RCU read-side critical section,
- * which protects the existence of *ctx.
+ * The counter is inactive, if the context is active
+ * we're part of a group that didn't make it on the 'pmu',
+ * not counting.
*/
- ctx = counter->ctx;
- spin_lock_irqsave(&ctx->lock, flags);
- count = 1;
- /* Re-check state now we have the lock */
- if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
- counter->ctx->is_active ||
- counter->tstamp_stopped < ctx->time)
- count = 0;
- spin_unlock_irqrestore(&ctx->lock, flags);
- return count;
+ if (counter->ctx->is_active)
+ return 0;
+
+ /*
+ * We're inactive and the context is too, this means the
+ * task is scheduled out, we're counting events that happen
+ * to us, like migration events.
+ */
+ return 1;
}
static int perf_swcounter_match(struct perf_counter *counter,
@@ -3924,13 +4066,14 @@ perf_counter_alloc(struct perf_counter_attr *attr,
hwc->sample_period = attr->sample_period;
if (attr->freq && attr->sample_freq)
hwc->sample_period = 1;
+ hwc->last_period = hwc->sample_period;
atomic64_set(&hwc->period_left, hwc->sample_period);
/*
- * we currently do not support PERF_SAMPLE_GROUP on inherited counters
+ * we currently do not support PERF_FORMAT_GROUP on inherited counters
*/
- if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
+ if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
goto done;
switch (attr->type) {
@@ -4592,6 +4735,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
perf_counter_init_cpu(cpu);
break;
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ hw_perf_counter_setup_online(cpu);
+ break;
+
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
perf_counter_exit_cpu(cpu);
@@ -4616,6 +4764,8 @@ void __init perf_counter_init(void)
{
perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
+ perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
+ (void *)(long)smp_processor_id());
register_cpu_notifier(&perf_cpu_nb);
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 98e02328c67d..58be76017fd0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -49,6 +49,7 @@
#include <linux/acpi.h>
#include <linux/reboot.h>
#include <linux/ftrace.h>
+#include <linux/security.h>
#include <linux/slow-work.h>
#include <linux/perf_counter.h>
@@ -1306,10 +1307,10 @@ static struct ctl_table vm_table[] = {
{
.ctl_name = CTL_UNNUMBERED,
.procname = "mmap_min_addr",
- .data = &mmap_min_addr,
- .maxlen = sizeof(unsigned long),
+ .data = &dac_mmap_min_addr,
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = &proc_doulongvec_minmax,
+ .proc_handler = &mmap_min_addr_handler,
},
#ifdef CONFIG_NUMA
{
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index a6dcd67b041d..620b58abdc32 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -137,11 +137,12 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
*/
int clockevents_register_notifier(struct notifier_block *nb)
{
+ unsigned long flags;
int ret;
- spin_lock(&clockevents_lock);
+ spin_lock_irqsave(&clockevents_lock, flags);
ret = raw_notifier_chain_register(&clockevents_chain, nb);
- spin_unlock(&clockevents_lock);
+ spin_unlock_irqrestore(&clockevents_lock, flags);
return ret;
}
@@ -178,16 +179,18 @@ static void clockevents_notify_released(void)
*/
void clockevents_register_device(struct clock_event_device *dev)
{
+ unsigned long flags;
+
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
BUG_ON(!dev->cpumask);
- spin_lock(&clockevents_lock);
+ spin_lock_irqsave(&clockevents_lock, flags);
list_add(&dev->list, &clockevent_devices);
clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
clockevents_notify_released();
- spin_unlock(&clockevents_lock);
+ spin_unlock_irqrestore(&clockevents_lock, flags);
}
EXPORT_SYMBOL_GPL(clockevents_register_device);
@@ -235,8 +238,9 @@ void clockevents_exchange_device(struct clock_event_device *old,
void clockevents_notify(unsigned long reason, void *arg)
{
struct list_head *node, *tmp;
+ unsigned long flags;
- spin_lock(&clockevents_lock);
+ spin_lock_irqsave(&clockevents_lock, flags);
clockevents_do_notify(reason, arg);
switch (reason) {
@@ -251,7 +255,7 @@ void clockevents_notify(unsigned long reason, void *arg)
default:
break;
}
- spin_unlock(&clockevents_lock);
+ spin_unlock_irqrestore(&clockevents_lock, flags);
}
EXPORT_SYMBOL_GPL(clockevents_notify);
#endif
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 877dbedc3118..c2ec25087a35 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -205,11 +205,11 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
* Powerstate information: The system enters/leaves a state, where
* affected devices might stop
*/
-static void tick_do_broadcast_on_off(void *why)
+static void tick_do_broadcast_on_off(unsigned long *reason)
{
struct clock_event_device *bc, *dev;
struct tick_device *td;
- unsigned long flags, *reason = why;
+ unsigned long flags;
int cpu, bc_stopped;
spin_lock_irqsave(&tick_broadcast_lock, flags);
@@ -276,8 +276,7 @@ void tick_broadcast_on_off(unsigned long reason, int *oncpu)
printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
"offline CPU #%d\n", *oncpu);
else
- smp_call_function_single(*oncpu, tick_do_broadcast_on_off,
- &reason, 1);
+ tick_do_broadcast_on_off(&reason);
}
/*
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index a999b92a1277..fddd69d16e03 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -286,7 +286,7 @@ static int __init init_timer_list_procfs(void)
{
struct proc_dir_entry *pe;
- pe = proc_create("timer_list", 0644, NULL, &timer_list_fops);
+ pe = proc_create("timer_list", 0444, NULL, &timer_list_fops);
if (!pe)
return -ENOMEM;
return 0;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index fb5fbf75f279..e78dcbde1a81 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -60,9 +60,14 @@ config EVENT_TRACING
bool
config CONTEXT_SWITCH_TRACER
- select MARKERS
bool
+config RING_BUFFER_ALLOW_SWAP
+ bool
+ help
+ Allow the use of ring_buffer_swap_cpu.
+ Adds a very slight overhead to tracing when enabled.
+
# All tracer options should select GENERIC_TRACER. For those options that are
# enabled by all tracers (context switch and event tracer) they select TRACING.
# This allows those options to appear when no other tracer is selected. But the
@@ -147,6 +152,7 @@ config IRQSOFF_TRACER
select TRACE_IRQFLAGS
select GENERIC_TRACER
select TRACER_MAX_TRACE
+ select RING_BUFFER_ALLOW_SWAP
help
This option measures the time spent in irqs-off critical
sections, with microsecond accuracy.
@@ -168,6 +174,7 @@ config PREEMPT_TRACER
depends on PREEMPT
select GENERIC_TRACER
select TRACER_MAX_TRACE
+ select RING_BUFFER_ALLOW_SWAP
help
This option measures the time spent in preemption off critical
sections, with microsecond accuracy.
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 1090b0aed9ba..3eb159c277c8 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -65,13 +65,15 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
{
struct blk_io_trace *t;
struct ring_buffer_event *event = NULL;
+ struct ring_buffer *buffer = NULL;
int pc = 0;
int cpu = smp_processor_id();
bool blk_tracer = blk_tracer_enabled;
if (blk_tracer) {
+ buffer = blk_tr->buffer;
pc = preempt_count();
- event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK,
+ event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
sizeof(*t) + len,
0, pc);
if (!event)
@@ -96,7 +98,7 @@ record_it:
memcpy((void *) t + sizeof(*t), data, len);
if (blk_tracer)
- trace_buffer_unlock_commit(blk_tr, event, 0, pc);
+ trace_buffer_unlock_commit(buffer, event, 0, pc);
}
}
@@ -179,6 +181,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
{
struct task_struct *tsk = current;
struct ring_buffer_event *event = NULL;
+ struct ring_buffer *buffer = NULL;
struct blk_io_trace *t;
unsigned long flags = 0;
unsigned long *sequence;
@@ -204,8 +207,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
if (blk_tracer) {
tracing_record_cmdline(current);
+ buffer = blk_tr->buffer;
pc = preempt_count();
- event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK,
+ event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
sizeof(*t) + pdu_len,
0, pc);
if (!event)
@@ -252,7 +256,7 @@ record_it:
memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
if (blk_tracer) {
- trace_buffer_unlock_commit(blk_tr, event, 0, pc);
+ trace_buffer_unlock_commit(buffer, event, 0, pc);
return;
}
}
@@ -267,8 +271,8 @@ static void blk_trace_free(struct blk_trace *bt)
{
debugfs_remove(bt->msg_file);
debugfs_remove(bt->dropped_file);
- debugfs_remove(bt->dir);
relay_close(bt->rchan);
+ debugfs_remove(bt->dir);
free_percpu(bt->sequence);
free_percpu(bt->msg_data);
kfree(bt);
@@ -378,18 +382,8 @@ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
static int blk_remove_buf_file_callback(struct dentry *dentry)
{
- struct dentry *parent = dentry->d_parent;
debugfs_remove(dentry);
- /*
- * this will fail for all but the last file, but that is ok. what we
- * care about is the top level buts->name directory going away, when
- * the last trace file is gone. Then we don't have to rmdir() that
- * manually on trace stop, so it nicely solves the issue with
- * force killing of running traces.
- */
-
- debugfs_remove(parent);
return 0;
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1993b7186cdb..8c804e24f96f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2222,7 +2222,11 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
read++;
cnt--;
- if (!(iter->flags & ~FTRACE_ITER_CONT)) {
+ /*
+ * If the parser haven't finished with the last write,
+ * continue reading the user input without skipping spaces.
+ */
+ if (!(iter->flags & FTRACE_ITER_CONT)) {
/* skip white space */
while (cnt && isspace(ch)) {
ret = get_user(ch, ubuf++);
@@ -2232,8 +2236,9 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
cnt--;
}
+ /* only spaces were written */
if (isspace(ch)) {
- file->f_pos += read;
+ *ppos += read;
ret = read;
goto out;
}
@@ -2262,12 +2267,12 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
if (ret)
goto out;
iter->buffer_idx = 0;
- } else
+ } else {
iter->flags |= FTRACE_ITER_CONT;
+ iter->buffer[iter->buffer_idx++] = ch;
+ }
-
- file->f_pos += read;
-
+ *ppos += read;
ret = read;
out:
mutex_unlock(&ftrace_regex_lock);
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index dda53ccf749b..81b1645c8549 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -183,11 +183,9 @@ static void kmemtrace_stop_probes(void)
static int kmem_trace_init(struct trace_array *tr)
{
- int cpu;
kmemtrace_array = tr;
- for_each_cpu(cpu, cpu_possible_mask)
- tracing_reset(tr, cpu);
+ tracing_reset_online_cpus(tr);
kmemtrace_start_probes();
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index da2c59d8f486..454e74e718cf 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -218,17 +218,12 @@ enum {
static inline int rb_null_event(struct ring_buffer_event *event)
{
- return event->type_len == RINGBUF_TYPE_PADDING
- && event->time_delta == 0;
-}
-
-static inline int rb_discarded_event(struct ring_buffer_event *event)
-{
- return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta;
+ return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
}
static void rb_event_set_padding(struct ring_buffer_event *event)
{
+ /* padding has a NULL time_delta */
event->type_len = RINGBUF_TYPE_PADDING;
event->time_delta = 0;
}
@@ -472,14 +467,19 @@ struct ring_buffer_iter {
};
/* buffer may be either ring_buffer or ring_buffer_per_cpu */
-#define RB_WARN_ON(buffer, cond) \
- ({ \
- int _____ret = unlikely(cond); \
- if (_____ret) { \
- atomic_inc(&buffer->record_disabled); \
- WARN_ON(1); \
- } \
- _____ret; \
+#define RB_WARN_ON(b, cond) \
+ ({ \
+ int _____ret = unlikely(cond); \
+ if (_____ret) { \
+ if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
+ struct ring_buffer_per_cpu *__b = \
+ (void *)b; \
+ atomic_inc(&__b->buffer->record_disabled); \
+ } else \
+ atomic_inc(&b->record_disabled); \
+ WARN_ON(1); \
+ } \
+ _____ret; \
})
/* Up this if you want to test the TIME_EXTENTS and normalization */
@@ -1778,9 +1778,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
event->type_len = RINGBUF_TYPE_PADDING;
/* time delta must be non zero */
event->time_delta = 1;
- /* Account for this as an entry */
- local_inc(&tail_page->entries);
- local_inc(&cpu_buffer->entries);
/* Set write to end of buffer */
length = (tail + length) - BUF_PAGE_SIZE;
@@ -2076,7 +2073,8 @@ static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
}
static struct ring_buffer_event *
-rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
+rb_reserve_next_event(struct ring_buffer *buffer,
+ struct ring_buffer_per_cpu *cpu_buffer,
unsigned long length)
{
struct ring_buffer_event *event;
@@ -2086,6 +2084,21 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
rb_start_commit(cpu_buffer);
+#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
+ /*
+ * Due to the ability to swap a cpu buffer from a buffer
+ * it is possible it was swapped before we committed.
+ * (committing stops a swap). We check for it here and
+ * if it happened, we have to fail the write.
+ */
+ barrier();
+ if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
+ local_dec(&cpu_buffer->committing);
+ local_dec(&cpu_buffer->commits);
+ return NULL;
+ }
+#endif
+
length = rb_calculate_event_length(length);
again:
/*
@@ -2246,7 +2259,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
if (length > BUF_MAX_DATA_SIZE)
goto out;
- event = rb_reserve_next_event(cpu_buffer, length);
+ event = rb_reserve_next_event(buffer, cpu_buffer, length);
if (!event)
goto out;
@@ -2269,18 +2282,23 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
}
EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
-static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
+static void
+rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
- local_inc(&cpu_buffer->entries);
-
/*
* The event first in the commit queue updates the
* time stamp.
*/
if (rb_event_is_commit(cpu_buffer, event))
cpu_buffer->write_stamp += event->time_delta;
+}
+static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event)
+{
+ local_inc(&cpu_buffer->entries);
+ rb_update_write_stamp(cpu_buffer, event);
rb_end_commit(cpu_buffer);
}
@@ -2327,32 +2345,57 @@ static inline void rb_event_discard(struct ring_buffer_event *event)
event->time_delta = 1;
}
-/**
- * ring_buffer_event_discard - discard any event in the ring buffer
- * @event: the event to discard
- *
- * Sometimes a event that is in the ring buffer needs to be ignored.
- * This function lets the user discard an event in the ring buffer
- * and then that event will not be read later.
- *
- * Note, it is up to the user to be careful with this, and protect
- * against races. If the user discards an event that has been consumed
- * it is possible that it could corrupt the ring buffer.
+/*
+ * Decrement the entries to the page that an event is on.
+ * The event does not even need to exist, only the pointer
+ * to the page it is on. This may only be called before the commit
+ * takes place.
*/
-void ring_buffer_event_discard(struct ring_buffer_event *event)
+static inline void
+rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_event *event)
{
- rb_event_discard(event);
+ unsigned long addr = (unsigned long)event;
+ struct buffer_page *bpage = cpu_buffer->commit_page;
+ struct buffer_page *start;
+
+ addr &= PAGE_MASK;
+
+ /* Do the likely case first */
+ if (likely(bpage->page == (void *)addr)) {
+ local_dec(&bpage->entries);
+ return;
+ }
+
+ /*
+ * Because the commit page may be on the reader page we
+ * start with the next page and check the end loop there.
+ */
+ rb_inc_page(cpu_buffer, &bpage);
+ start = bpage;
+ do {
+ if (bpage->page == (void *)addr) {
+ local_dec(&bpage->entries);
+ return;
+ }
+ rb_inc_page(cpu_buffer, &bpage);
+ } while (bpage != start);
+
+ /* commit not part of this buffer?? */
+ RB_WARN_ON(cpu_buffer, 1);
}
-EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
/**
* ring_buffer_commit_discard - discard an event that has not been committed
* @buffer: the ring buffer
* @event: non committed event to discard
*
- * This is similar to ring_buffer_event_discard but must only be
- * performed on an event that has not been committed yet. The difference
- * is that this will also try to free the event from the ring buffer
+ * Sometimes an event that is in the ring buffer needs to be ignored.
+ * This function lets the user discard an event in the ring buffer
+ * and then that event will not be read later.
+ *
+ * This function only works if it is called before the the item has been
+ * committed. It will try to free the event from the ring buffer
* if another event has not been added behind it.
*
* If another event has been added behind it, it will set the event
@@ -2380,14 +2423,15 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
*/
RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
+ rb_decrement_entry(cpu_buffer, event);
if (rb_try_to_discard(cpu_buffer, event))
goto out;
/*
* The commit is still visible by the reader, so we
- * must increment entries.
+ * must still update the timestamp.
*/
- local_inc(&cpu_buffer->entries);
+ rb_update_write_stamp(cpu_buffer, event);
out:
rb_end_commit(cpu_buffer);
@@ -2448,7 +2492,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
if (length > BUF_MAX_DATA_SIZE)
goto out;
- event = rb_reserve_next_event(cpu_buffer, length);
+ event = rb_reserve_next_event(buffer, cpu_buffer, length);
if (!event)
goto out;
@@ -2899,8 +2943,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
event = rb_reader_event(cpu_buffer);
- if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX
- || rb_discarded_event(event))
+ if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
cpu_buffer->read++;
rb_update_read_stamp(cpu_buffer, event);
@@ -3132,10 +3175,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
spin_unlock(&cpu_buffer->reader_lock);
local_irq_restore(flags);
- if (event && event->type_len == RINGBUF_TYPE_PADDING) {
- cpu_relax();
+ if (event && event->type_len == RINGBUF_TYPE_PADDING)
goto again;
- }
return event;
}
@@ -3160,10 +3201,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
event = rb_iter_peek(iter, ts);
spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
- if (event && event->type_len == RINGBUF_TYPE_PADDING) {
- cpu_relax();
+ if (event && event->type_len == RINGBUF_TYPE_PADDING)
goto again;
- }
return event;
}
@@ -3209,10 +3248,8 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
out:
preempt_enable();
- if (event && event->type_len == RINGBUF_TYPE_PADDING) {
- cpu_relax();
+ if (event && event->type_len == RINGBUF_TYPE_PADDING)
goto again;
- }
return event;
}
@@ -3292,21 +3329,19 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
unsigned long flags;
- again:
spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ again:
event = rb_iter_peek(iter, ts);
if (!event)
goto out;
+ if (event->type_len == RINGBUF_TYPE_PADDING)
+ goto again;
+
rb_advance_iter(iter);
out:
spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
- if (event && event->type_len == RINGBUF_TYPE_PADDING) {
- cpu_relax();
- goto again;
- }
-
return event;
}
EXPORT_SYMBOL_GPL(ring_buffer_read);
@@ -3373,12 +3408,16 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
+ goto out;
+
__raw_spin_lock(&cpu_buffer->lock);
rb_reset_cpu(cpu_buffer);
__raw_spin_unlock(&cpu_buffer->lock);
+ out:
spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
atomic_dec(&cpu_buffer->record_disabled);
@@ -3461,6 +3500,7 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
}
EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
+#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
/**
* ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
* @buffer_a: One buffer to swap with
@@ -3515,20 +3555,28 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
atomic_inc(&cpu_buffer_a->record_disabled);
atomic_inc(&cpu_buffer_b->record_disabled);
+ ret = -EBUSY;
+ if (local_read(&cpu_buffer_a->committing))
+ goto out_dec;
+ if (local_read(&cpu_buffer_b->committing))
+ goto out_dec;
+
buffer_a->buffers[cpu] = cpu_buffer_b;
buffer_b->buffers[cpu] = cpu_buffer_a;
cpu_buffer_b->buffer = buffer_a;
cpu_buffer_a->buffer = buffer_b;
+ ret = 0;
+
+out_dec:
atomic_dec(&cpu_buffer_a->record_disabled);
atomic_dec(&cpu_buffer_b->record_disabled);
-
- ret = 0;
out:
return ret;
}
EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
+#endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */
/**
* ring_buffer_alloc_read_page - allocate a page to read from buffer
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 63dbc7ff213f..5c75deeefe30 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -43,9 +43,6 @@
#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
-unsigned long __read_mostly tracing_max_latency;
-unsigned long __read_mostly tracing_thresh;
-
/*
* On boot up, the ring buffer is set to the minimum size, so that
* we do not waste memory on systems that are not using tracing.
@@ -172,10 +169,11 @@ static struct trace_array global_trace;
static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
-int filter_current_check_discard(struct ftrace_event_call *call, void *rec,
+int filter_current_check_discard(struct ring_buffer *buffer,
+ struct ftrace_event_call *call, void *rec,
struct ring_buffer_event *event)
{
- return filter_check_discard(call, rec, global_trace.buffer, event);
+ return filter_check_discard(call, rec, buffer, event);
}
EXPORT_SYMBOL_GPL(filter_current_check_discard);
@@ -266,6 +264,9 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
TRACE_ITER_GRAPH_TIME;
+static int trace_stop_count;
+static DEFINE_SPINLOCK(tracing_start_lock);
+
/**
* trace_wake_up - wake up tasks waiting for trace input
*
@@ -338,45 +339,6 @@ static struct {
int trace_clock_id;
-/*
- * ftrace_max_lock is used to protect the swapping of buffers
- * when taking a max snapshot. The buffers themselves are
- * protected by per_cpu spinlocks. But the action of the swap
- * needs its own lock.
- *
- * This is defined as a raw_spinlock_t in order to help
- * with performance when lockdep debugging is enabled.
- */
-static raw_spinlock_t ftrace_max_lock =
- (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
-
-/*
- * Copy the new maximum trace into the separate maximum-trace
- * structure. (this way the maximum trace is permanently saved,
- * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
- */
-static void
-__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
-{
- struct trace_array_cpu *data = tr->data[cpu];
-
- max_tr.cpu = cpu;
- max_tr.time_start = data->preempt_timestamp;
-
- data = max_tr.data[cpu];
- data->saved_latency = tracing_max_latency;
-
- memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
- data->pid = tsk->pid;
- data->uid = task_uid(tsk);
- data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
- data->policy = tsk->policy;
- data->rt_priority = tsk->rt_priority;
-
- /* record this tasks comm */
- tracing_record_cmdline(tsk);
-}
-
ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
{
int len;
@@ -420,6 +382,56 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
return cnt;
}
+/*
+ * ftrace_max_lock is used to protect the swapping of buffers
+ * when taking a max snapshot. The buffers themselves are
+ * protected by per_cpu spinlocks. But the action of the swap
+ * needs its own lock.
+ *
+ * This is defined as a raw_spinlock_t in order to help
+ * with performance when lockdep debugging is enabled.
+ *
+ * It is also used in other places outside the update_max_tr
+ * so it needs to be defined outside of the
+ * CONFIG_TRACER_MAX_TRACE.
+ */
+static raw_spinlock_t ftrace_max_lock =
+ (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+unsigned long __read_mostly tracing_max_latency;
+unsigned long __read_mostly tracing_thresh;
+
+/*
+ * Copy the new maximum trace into the separate maximum-trace
+ * structure. (this way the maximum trace is permanently saved,
+ * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
+ */
+static void
+__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
+{
+ struct trace_array_cpu *data = tr->data[cpu];
+ struct trace_array_cpu *max_data = tr->data[cpu];
+
+ max_tr.cpu = cpu;
+ max_tr.time_start = data->preempt_timestamp;
+
+ max_data = max_tr.data[cpu];
+ max_data->saved_latency = tracing_max_latency;
+ max_data->critical_start = data->critical_start;
+ max_data->critical_end = data->critical_end;
+
+ memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
+ max_data->pid = tsk->pid;
+ max_data->uid = task_uid(tsk);
+ max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
+ max_data->policy = tsk->policy;
+ max_data->rt_priority = tsk->rt_priority;
+
+ /* record this tasks comm */
+ tracing_record_cmdline(tsk);
+}
+
/**
* update_max_tr - snapshot all trace buffers from global_trace to max_tr
* @tr: tracer
@@ -434,16 +446,15 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
struct ring_buffer *buf = tr->buffer;
+ if (trace_stop_count)
+ return;
+
WARN_ON_ONCE(!irqs_disabled());
__raw_spin_lock(&ftrace_max_lock);
tr->buffer = max_tr.buffer;
max_tr.buffer = buf;
- ftrace_disable_cpu();
- ring_buffer_reset(tr->buffer);
- ftrace_enable_cpu();
-
__update_max_tr(tr, tsk, cpu);
__raw_spin_unlock(&ftrace_max_lock);
}
@@ -461,21 +472,35 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
int ret;
+ if (trace_stop_count)
+ return;
+
WARN_ON_ONCE(!irqs_disabled());
__raw_spin_lock(&ftrace_max_lock);
ftrace_disable_cpu();
- ring_buffer_reset(max_tr.buffer);
ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
+ if (ret == -EBUSY) {
+ /*
+ * We failed to swap the buffer due to a commit taking
+ * place on this CPU. We fail to record, but we reset
+ * the max trace buffer (no one writes directly to it)
+ * and flag that it failed.
+ */
+ trace_array_printk(&max_tr, _THIS_IP_,
+ "Failed to swap buffers due to commit in progress\n");
+ }
+
ftrace_enable_cpu();
- WARN_ON_ONCE(ret && ret != -EAGAIN);
+ WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
__update_max_tr(tr, tsk, cpu);
__raw_spin_unlock(&ftrace_max_lock);
}
+#endif /* CONFIG_TRACER_MAX_TRACE */
/**
* register_tracer - register a tracer with the ftrace system.
@@ -532,7 +557,6 @@ __acquires(kernel_lock)
if (type->selftest && !tracing_selftest_disabled) {
struct tracer *saved_tracer = current_trace;
struct trace_array *tr = &global_trace;
- int i;
/*
* Run a selftest on this tracer.
@@ -541,8 +565,7 @@ __acquires(kernel_lock)
* internal tracing to verify that everything is in order.
* If we fail, we do not register this tracer.
*/
- for_each_tracing_cpu(i)
- tracing_reset(tr, i);
+ tracing_reset_online_cpus(tr);
current_trace = type;
/* the test is responsible for initializing and enabling */
@@ -555,8 +578,7 @@ __acquires(kernel_lock)
goto out;
}
/* Only reset on passing, to avoid touching corrupted buffers */
- for_each_tracing_cpu(i)
- tracing_reset(tr, i);
+ tracing_reset_online_cpus(tr);
printk(KERN_CONT "PASSED\n");
}
@@ -631,21 +653,42 @@ void unregister_tracer(struct tracer *type)
mutex_unlock(&trace_types_lock);
}
-void tracing_reset(struct trace_array *tr, int cpu)
+static void __tracing_reset(struct trace_array *tr, int cpu)
{
ftrace_disable_cpu();
ring_buffer_reset_cpu(tr->buffer, cpu);
ftrace_enable_cpu();
}
+void tracing_reset(struct trace_array *tr, int cpu)
+{
+ struct ring_buffer *buffer = tr->buffer;
+
+ ring_buffer_record_disable(buffer);
+
+ /* Make sure all commits have finished */
+ synchronize_sched();
+ __tracing_reset(tr, cpu);
+
+ ring_buffer_record_enable(buffer);
+}
+
void tracing_reset_online_cpus(struct trace_array *tr)
{
+ struct ring_buffer *buffer = tr->buffer;
int cpu;
+ ring_buffer_record_disable(buffer);
+
+ /* Make sure all commits have finished */
+ synchronize_sched();
+
tr->time_start = ftrace_now(tr->cpu);
for_each_online_cpu(cpu)
- tracing_reset(tr, cpu);
+ __tracing_reset(tr, cpu);
+
+ ring_buffer_record_enable(buffer);
}
void tracing_reset_current(int cpu)
@@ -676,9 +719,6 @@ static void trace_init_cmdlines(void)
cmdline_idx = 0;
}
-static int trace_stop_count;
-static DEFINE_SPINLOCK(tracing_start_lock);
-
/**
* ftrace_off_permanent - disable all ftrace code permanently
*
@@ -859,14 +899,15 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
}
EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
-struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
- int type,
- unsigned long len,
- unsigned long flags, int pc)
+struct ring_buffer_event *
+trace_buffer_lock_reserve(struct ring_buffer *buffer,
+ int type,
+ unsigned long len,
+ unsigned long flags, int pc)
{
struct ring_buffer_event *event;
- event = ring_buffer_lock_reserve(tr->buffer, len);
+ event = ring_buffer_lock_reserve(buffer, len);
if (event != NULL) {
struct trace_entry *ent = ring_buffer_event_data(event);
@@ -877,53 +918,59 @@ struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
return event;
}
-static inline void __trace_buffer_unlock_commit(struct trace_array *tr,
- struct ring_buffer_event *event,
- unsigned long flags, int pc,
- int wake)
+static inline void
+__trace_buffer_unlock_commit(struct ring_buffer *buffer,
+ struct ring_buffer_event *event,
+ unsigned long flags, int pc,
+ int wake)
{
- ring_buffer_unlock_commit(tr->buffer, event);
+ ring_buffer_unlock_commit(buffer, event);
- ftrace_trace_stack(tr, flags, 6, pc);
- ftrace_trace_userstack(tr, flags, pc);
+ ftrace_trace_stack(buffer, flags, 6, pc);
+ ftrace_trace_userstack(buffer, flags, pc);
if (wake)
trace_wake_up();
}
-void trace_buffer_unlock_commit(struct trace_array *tr,
- struct ring_buffer_event *event,
- unsigned long flags, int pc)
+void trace_buffer_unlock_commit(struct ring_buffer *buffer,
+ struct ring_buffer_event *event,
+ unsigned long flags, int pc)
{
- __trace_buffer_unlock_commit(tr, event, flags, pc, 1);
+ __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
}
struct ring_buffer_event *
-trace_current_buffer_lock_reserve(int type, unsigned long len,
+trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
+ int type, unsigned long len,
unsigned long flags, int pc)
{
- return trace_buffer_lock_reserve(&global_trace,
+ *current_rb = global_trace.buffer;
+ return trace_buffer_lock_reserve(*current_rb,
type, len, flags, pc);
}
EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
-void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
+void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
+ struct ring_buffer_event *event,
unsigned long flags, int pc)
{
- __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
+ __trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
}
EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
-void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
- unsigned long flags, int pc)
+void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
+ struct ring_buffer_event *event,
+ unsigned long flags, int pc)
{
- __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
+ __trace_buffer_unlock_commit(buffer, event, flags, pc, 0);
}
EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
-void trace_current_buffer_discard_commit(struct ring_buffer_event *event)
+void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
+ struct ring_buffer_event *event)
{
- ring_buffer_discard_commit(global_trace.buffer, event);
+ ring_buffer_discard_commit(buffer, event);
}
EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
@@ -933,6 +980,7 @@ trace_function(struct trace_array *tr,
int pc)
{
struct ftrace_event_call *call = &event_function;
+ struct ring_buffer *buffer = tr->buffer;
struct ring_buffer_event *event;
struct ftrace_entry *entry;
@@ -940,7 +988,7 @@ trace_function(struct trace_array *tr,
if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
return;
- event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry),
+ event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
flags, pc);
if (!event)
return;
@@ -948,8 +996,8 @@ trace_function(struct trace_array *tr,
entry->ip = ip;
entry->parent_ip = parent_ip;
- if (!filter_check_discard(call, entry, tr->buffer, event))
- ring_buffer_unlock_commit(tr->buffer, event);
+ if (!filter_check_discard(call, entry, buffer, event))
+ ring_buffer_unlock_commit(buffer, event);
}
void
@@ -962,7 +1010,7 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
}
#ifdef CONFIG_STACKTRACE
-static void __ftrace_trace_stack(struct trace_array *tr,
+static void __ftrace_trace_stack(struct ring_buffer *buffer,
unsigned long flags,
int skip, int pc)
{
@@ -971,7 +1019,7 @@ static void __ftrace_trace_stack(struct trace_array *tr,
struct stack_entry *entry;
struct stack_trace trace;
- event = trace_buffer_lock_reserve(tr, TRACE_STACK,
+ event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
sizeof(*entry), flags, pc);
if (!event)
return;
@@ -984,26 +1032,27 @@ static void __ftrace_trace_stack(struct trace_array *tr,
trace.entries = entry->caller;
save_stack_trace(&trace);
- if (!filter_check_discard(call, entry, tr->buffer, event))
- ring_buffer_unlock_commit(tr->buffer, event);
+ if (!filter_check_discard(call, entry, buffer, event))
+ ring_buffer_unlock_commit(buffer, event);
}
-void ftrace_trace_stack(struct trace_array *tr, unsigned long flags, int skip,
- int pc)
+void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
+ int skip, int pc)
{
if (!(trace_flags & TRACE_ITER_STACKTRACE))
return;
- __ftrace_trace_stack(tr, flags, skip, pc);
+ __ftrace_trace_stack(buffer, flags, skip, pc);
}
void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
int pc)
{
- __ftrace_trace_stack(tr, flags, skip, pc);
+ __ftrace_trace_stack(tr->buffer, flags, skip, pc);
}
-void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc)
+void
+ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
{
struct ftrace_event_call *call = &event_user_stack;
struct ring_buffer_event *event;
@@ -1013,7 +1062,7 @@ void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc)
if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
return;
- event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK,
+ event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
sizeof(*entry), flags, pc);
if (!event)
return;
@@ -1027,8 +1076,8 @@ void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc)
trace.entries = entry->caller;
save_stack_trace_user(&trace);
- if (!filter_check_discard(call, entry, tr->buffer, event))
- ring_buffer_unlock_commit(tr->buffer, event);
+ if (!filter_check_discard(call, entry, buffer, event))
+ ring_buffer_unlock_commit(buffer, event);
}
#ifdef UNUSED
@@ -1047,9 +1096,10 @@ ftrace_trace_special(void *__tr,
{
struct ring_buffer_event *event;
struct trace_array *tr = __tr;
+ struct ring_buffer *buffer = tr->buffer;
struct special_entry *entry;
- event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL,
+ event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
sizeof(*entry), 0, pc);
if (!event)
return;
@@ -1057,7 +1107,7 @@ ftrace_trace_special(void *__tr,
entry->arg1 = arg1;
entry->arg2 = arg2;
entry->arg3 = arg3;
- trace_buffer_unlock_commit(tr, event, 0, pc);
+ trace_buffer_unlock_commit(buffer, event, 0, pc);
}
void
@@ -1103,6 +1153,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
struct ftrace_event_call *call = &event_bprint;
struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
struct bprint_entry *entry;
@@ -1135,7 +1186,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
goto out_unlock;
size = sizeof(*entry) + sizeof(u32) * len;
- event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc);
+ buffer = tr->buffer;
+ event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
+ flags, pc);
if (!event)
goto out_unlock;
entry = ring_buffer_event_data(event);
@@ -1143,8 +1196,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
entry->fmt = fmt;
memcpy(entry->buf, trace_buf, sizeof(u32) * len);
- if (!filter_check_discard(call, entry, tr->buffer, event))
- ring_buffer_unlock_commit(tr->buffer, event);
+ if (!filter_check_discard(call, entry, buffer, event))
+ ring_buffer_unlock_commit(buffer, event);
out_unlock:
__raw_spin_unlock(&trace_buf_lock);
@@ -1159,14 +1212,30 @@ out:
}
EXPORT_SYMBOL_GPL(trace_vbprintk);
-int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
+int trace_array_printk(struct trace_array *tr,
+ unsigned long ip, const char *fmt, ...)
+{
+ int ret;
+ va_list ap;
+
+ if (!(trace_flags & TRACE_ITER_PRINTK))
+ return 0;
+
+ va_start(ap, fmt);
+ ret = trace_array_vprintk(tr, ip, fmt, ap);
+ va_end(ap);
+ return ret;
+}
+
+int trace_array_vprintk(struct trace_array *tr,
+ unsigned long ip, const char *fmt, va_list args)
{
static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
static char trace_buf[TRACE_BUF_SIZE];
struct ftrace_event_call *call = &event_print;
struct ring_buffer_event *event;
- struct trace_array *tr = &global_trace;
+ struct ring_buffer *buffer;
struct trace_array_cpu *data;
int cpu, len = 0, size, pc;
struct print_entry *entry;
@@ -1194,7 +1263,9 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
trace_buf[len] = 0;
size = sizeof(*entry) + len + 1;
- event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc);
+ buffer = tr->buffer;
+ event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
+ irq_flags, pc);
if (!event)
goto out_unlock;
entry = ring_buffer_event_data(event);
@@ -1202,8 +1273,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
memcpy(&entry->buf, trace_buf, len);
entry->buf[len] = 0;
- if (!filter_check_discard(call, entry, tr->buffer, event))
- ring_buffer_unlock_commit(tr->buffer, event);
+ if (!filter_check_discard(call, entry, buffer, event))
+ ring_buffer_unlock_commit(buffer, event);
out_unlock:
__raw_spin_unlock(&trace_buf_lock);
@@ -1215,6 +1286,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
return len;
}
+
+int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
+{
+ return trace_array_printk(&global_trace, ip, fmt, args);
+}
EXPORT_SYMBOL_GPL(trace_vprintk);
enum trace_file_type {
@@ -1354,6 +1430,37 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
return ent;
}
+static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
+{
+ struct trace_array *tr = iter->tr;
+ struct ring_buffer_event *event;
+ struct ring_buffer_iter *buf_iter;
+ unsigned long entries = 0;
+ u64 ts;
+
+ tr->data[cpu]->skipped_entries = 0;
+
+ if (!iter->buffer_iter[cpu])
+ return;
+
+ buf_iter = iter->buffer_iter[cpu];
+ ring_buffer_iter_reset(buf_iter);
+
+ /*
+ * We could have the case with the max latency tracers
+ * that a reset never took place on a cpu. This is evident
+ * by the timestamp being before the start of the buffer.
+ */
+ while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
+ if (ts >= iter->tr->time_start)
+ break;
+ entries++;
+ ring_buffer_read(buf_iter, NULL);
+ }
+
+ tr->data[cpu]->skipped_entries = entries;
+}
+
/*
* No necessary locking here. The worst thing which can
* happen is loosing events consumed at the same time
@@ -1392,10 +1499,9 @@ static void *s_start(struct seq_file *m, loff_t *pos)
if (cpu_file == TRACE_PIPE_ALL_CPU) {
for_each_tracing_cpu(cpu)
- ring_buffer_iter_reset(iter->buffer_iter[cpu]);
+ tracing_iter_reset(iter, cpu);
} else
- ring_buffer_iter_reset(iter->buffer_iter[cpu_file]);
-
+ tracing_iter_reset(iter, cpu_file);
ftrace_enable_cpu();
@@ -1444,16 +1550,32 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
struct trace_array *tr = iter->tr;
struct trace_array_cpu *data = tr->data[tr->cpu];
struct tracer *type = current_trace;
- unsigned long total;
- unsigned long entries;
+ unsigned long entries = 0;
+ unsigned long total = 0;
+ unsigned long count;
const char *name = "preemption";
+ int cpu;
if (type)
name = type->name;
- entries = ring_buffer_entries(iter->tr->buffer);
- total = entries +
- ring_buffer_overruns(iter->tr->buffer);
+
+ for_each_tracing_cpu(cpu) {
+ count = ring_buffer_entries_cpu(tr->buffer, cpu);
+ /*
+ * If this buffer has skipped entries, then we hold all
+ * entries for the trace and we need to ignore the
+ * ones before the time stamp.
+ */
+ if (tr->data[cpu]->skipped_entries) {
+ count -= tr->data[cpu]->skipped_entries;
+ /* total is the same as the entries */
+ total += count;
+ } else
+ total += count +
+ ring_buffer_overrun_cpu(tr->buffer, cpu);
+ entries += count;
+ }
seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
name, UTS_RELEASE);
@@ -1495,7 +1617,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
seq_puts(m, "\n# => ended at: ");
seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
trace_print_seq(m, &iter->seq);
- seq_puts(m, "#\n");
+ seq_puts(m, "\n#\n");
}
seq_puts(m, "#\n");
@@ -1514,6 +1636,9 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
if (cpumask_test_cpu(iter->cpu, iter->started))
return;
+ if (iter->tr->data[iter->cpu]->skipped_entries)
+ return;
+
cpumask_set_cpu(iter->cpu, iter->started);
/* Don't print started cpu buffer for the first entry of the trace */
@@ -1776,19 +1901,23 @@ __tracing_open(struct inode *inode, struct file *file)
if (ring_buffer_overruns(iter->tr->buffer))
iter->iter_flags |= TRACE_FILE_ANNOTATE;
+ /* stop the trace while dumping */
+ tracing_stop();
+
if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
for_each_tracing_cpu(cpu) {
iter->buffer_iter[cpu] =
ring_buffer_read_start(iter->tr->buffer, cpu);
+ tracing_iter_reset(iter, cpu);
}
} else {
cpu = iter->cpu_file;
iter->buffer_iter[cpu] =
ring_buffer_read_start(iter->tr->buffer, cpu);
+ tracing_iter_reset(iter, cpu);
}
- /* TODO stop tracer */
ret = seq_open(file, &tracer_seq_ops);
if (ret < 0) {
fail_ret = ERR_PTR(ret);
@@ -1798,9 +1927,6 @@ __tracing_open(struct inode *inode, struct file *file)
m = file->private_data;
m->private = iter;
- /* stop the trace while dumping */
- tracing_stop();
-
mutex_unlock(&trace_types_lock);
return iter;
@@ -1811,6 +1937,7 @@ __tracing_open(struct inode *inode, struct file *file)
ring_buffer_read_finish(iter->buffer_iter[cpu]);
}
free_cpumask_var(iter->started);
+ tracing_start();
fail:
mutex_unlock(&trace_types_lock);
kfree(iter->trace);
@@ -3774,17 +3901,9 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (ret < 0)
return ret;
- switch (val) {
- case 0:
- trace_flags &= ~(1 << index);
- break;
- case 1:
- trace_flags |= 1 << index;
- break;
-
- default:
+ if (val != 0 && val != 1)
return -EINVAL;
- }
+ set_tracer_flags(1 << index, val);
*ppos += cnt;
@@ -3952,11 +4071,13 @@ static __init int tracer_init_debugfs(void)
trace_create_file("current_tracer", 0644, d_tracer,
&global_trace, &set_tracer_fops);
+#ifdef CONFIG_TRACER_MAX_TRACE
trace_create_file("tracing_max_latency", 0644, d_tracer,
&tracing_max_latency, &tracing_max_lat_fops);
trace_create_file("tracing_thresh", 0644, d_tracer,
&tracing_thresh, &tracing_max_lat_fops);
+#endif
trace_create_file("README", 0444, d_tracer,
NULL, &tracing_readme_fops);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f5362a0529eb..821064914c80 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -258,9 +258,6 @@ struct trace_array_cpu {
atomic_t disabled;
void *buffer_page; /* ring buffer spare */
- /* these fields get copied into max-trace: */
- unsigned long trace_idx;
- unsigned long overrun;
unsigned long saved_latency;
unsigned long critical_start;
unsigned long critical_end;
@@ -268,6 +265,7 @@ struct trace_array_cpu {
unsigned long nice;
unsigned long policy;
unsigned long rt_priority;
+ unsigned long skipped_entries;
cycle_t preempt_timestamp;
pid_t pid;
uid_t uid;
@@ -441,12 +439,13 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
struct ring_buffer_event;
-struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
- int type,
- unsigned long len,
- unsigned long flags,
- int pc);
-void trace_buffer_unlock_commit(struct trace_array *tr,
+struct ring_buffer_event *
+trace_buffer_lock_reserve(struct ring_buffer *buffer,
+ int type,
+ unsigned long len,
+ unsigned long flags,
+ int pc);
+void trace_buffer_unlock_commit(struct ring_buffer *buffer,
struct ring_buffer_event *event,
unsigned long flags, int pc);
@@ -497,18 +496,20 @@ void unregister_tracer(struct tracer *type);
extern unsigned long nsecs_to_usecs(unsigned long nsecs);
+#ifdef CONFIG_TRACER_MAX_TRACE
extern unsigned long tracing_max_latency;
extern unsigned long tracing_thresh;
void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
void update_max_tr_single(struct trace_array *tr,
struct task_struct *tsk, int cpu);
+#endif /* CONFIG_TRACER_MAX_TRACE */
#ifdef CONFIG_STACKTRACE
-void ftrace_trace_stack(struct trace_array *tr, unsigned long flags,
+void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
int skip, int pc);
-void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags,
+void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
int pc);
void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
@@ -589,6 +590,11 @@ extern int
trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
extern int
trace_vprintk(unsigned long ip, const char *fmt, va_list args);
+extern int
+trace_array_vprintk(struct trace_array *tr,
+ unsigned long ip, const char *fmt, va_list args);
+int trace_array_printk(struct trace_array *tr,
+ unsigned long ip, const char *fmt, ...);
extern unsigned long trace_flags;
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index a29ef23ffb47..19bfc75d467e 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -41,14 +41,12 @@ void disable_boot_trace(void)
static int boot_trace_init(struct trace_array *tr)
{
- int cpu;
boot_trace = tr;
if (!tr)
return 0;
- for_each_cpu(cpu, cpu_possible_mask)
- tracing_reset(tr, cpu);
+ tracing_reset_online_cpus(tr);
tracing_sched_switch_assign_trace(tr);
return 0;
@@ -132,6 +130,7 @@ struct tracer boot_tracer __read_mostly =
void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
{
struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
struct trace_boot_call *entry;
struct trace_array *tr = boot_trace;
@@ -144,13 +143,14 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
sprint_symbol(bt->func, (unsigned long)fn);
preempt_disable();
- event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL,
+ buffer = tr->buffer;
+ event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL,
sizeof(*entry), 0, 0);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
entry->boot_call = *bt;
- trace_buffer_unlock_commit(tr, event, 0, 0);
+ trace_buffer_unlock_commit(buffer, event, 0, 0);
out:
preempt_enable();
}
@@ -158,6 +158,7 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
{
struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
struct trace_boot_ret *entry;
struct trace_array *tr = boot_trace;
@@ -167,13 +168,14 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
sprint_symbol(bt->func, (unsigned long)fn);
preempt_disable();
- event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET,
+ buffer = tr->buffer;
+ event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET,
sizeof(*entry), 0, 0);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
entry->boot_ret = *bt;
- trace_buffer_unlock_commit(tr, event, 0, 0);
+ trace_buffer_unlock_commit(buffer, event, 0, 0);
out:
preempt_enable();
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 197cdaa96c43..ba3492076ab2 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1485,6 +1485,7 @@ static void
function_test_events_call(unsigned long ip, unsigned long parent_ip)
{
struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
struct ftrace_entry *entry;
unsigned long flags;
long disabled;
@@ -1502,7 +1503,8 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
local_save_flags(flags);
- event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry),
+ event = trace_current_buffer_lock_reserve(&buffer,
+ TRACE_FN, sizeof(*entry),
flags, pc);
if (!event)
goto out;
@@ -1510,7 +1512,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
entry->ip = ip;
entry->parent_ip = parent_ip;
- trace_nowake_buffer_unlock_commit(event, flags, pc);
+ trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
out:
atomic_dec(&per_cpu(test_event_disable, cpu));
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 9f03082c81d8..93660fbbf629 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -309,7 +309,7 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
struct event_filter *filter = call->filter;
mutex_lock(&event_mutex);
- if (filter->filter_string)
+ if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
trace_seq_printf(s, "none\n");
@@ -322,7 +322,7 @@ void print_subsystem_event_filter(struct event_subsystem *system,
struct event_filter *filter = system->filter;
mutex_lock(&event_mutex);
- if (filter->filter_string)
+ if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
trace_seq_printf(s, "none\n");
@@ -390,6 +390,9 @@ void destroy_preds(struct ftrace_event_call *call)
struct event_filter *filter = call->filter;
int i;
+ if (!filter)
+ return;
+
for (i = 0; i < MAX_FILTER_PRED; i++) {
if (filter->preds[i])
filter_free_pred(filter->preds[i]);
@@ -400,17 +403,19 @@ void destroy_preds(struct ftrace_event_call *call)
call->filter = NULL;
}
-int init_preds(struct ftrace_event_call *call)
+static int init_preds(struct ftrace_event_call *call)
{
struct event_filter *filter;
struct filter_pred *pred;
int i;
+ if (call->filter)
+ return 0;
+
filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
if (!call->filter)
return -ENOMEM;
- call->filter_active = 0;
filter->n_preds = 0;
filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
@@ -432,7 +437,26 @@ oom:
return -ENOMEM;
}
-EXPORT_SYMBOL_GPL(init_preds);
+
+static int init_subsystem_preds(struct event_subsystem *system)
+{
+ struct ftrace_event_call *call;
+ int err;
+
+ list_for_each_entry(call, &ftrace_events, list) {
+ if (!call->define_fields)
+ continue;
+
+ if (strcmp(call->system, system->name) != 0)
+ continue;
+
+ err = init_preds(call);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
enum {
FILTER_DISABLE_ALL,
@@ -449,6 +473,9 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
if (!call->define_fields)
continue;
+ if (strcmp(call->system, system->name) != 0)
+ continue;
+
if (flag == FILTER_INIT_NO_RESET) {
call->filter->no_reset = false;
continue;
@@ -457,10 +484,8 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
continue;
- if (!strcmp(call->system, system->name)) {
- filter_disable_preds(call);
- remove_filter_string(call->filter);
- }
+ filter_disable_preds(call);
+ remove_filter_string(call->filter);
}
}
@@ -1094,6 +1119,10 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
mutex_lock(&event_mutex);
+ err = init_preds(call);
+ if (err)
+ goto out_unlock;
+
if (!strcmp(strstrip(filter_string), "0")) {
filter_disable_preds(call);
remove_filter_string(call->filter);
@@ -1139,6 +1168,10 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
mutex_lock(&event_mutex);
+ err = init_subsystem_preds(system);
+ if (err)
+ goto out_unlock;
+
if (!strcmp(strstrip(filter_string), "0")) {
filter_free_subsystem_preds(system, FILTER_DISABLE_ALL);
remove_filter_string(system->filter);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index f75faeccf68e..a79ef6f193c0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -120,7 +120,7 @@ ftrace_format_##call(struct ftrace_event_call *unused, \
static int ftrace_raw_init_event(struct ftrace_event_call *event_call)
{
INIT_LIST_HEAD(&event_call->fields);
- init_preds(event_call);
+
return 0;
}
@@ -137,7 +137,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
.raw_init = ftrace_raw_init_event, \
.show_format = ftrace_format_##call, \
.define_fields = ftrace_define_fields_##call, \
-};
+}; \
#undef TRACE_EVENT_FORMAT_NOFILTER
#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 3f4a251b7d16..b3749a2c3132 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -173,19 +173,20 @@ static int __trace_graph_entry(struct trace_array *tr,
{
struct ftrace_event_call *call = &event_funcgraph_entry;
struct ring_buffer_event *event;
+ struct ring_buffer *buffer = tr->buffer;
struct ftrace_graph_ent_entry *entry;
if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
return 0;
- event = trace_buffer_lock_reserve(tr, TRACE_GRAPH_ENT,
+ event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
sizeof(*entry), flags, pc);
if (!event)
return 0;
entry = ring_buffer_event_data(event);
entry->graph_ent = *trace;
- if (!filter_current_check_discard(call, entry, event))
- ring_buffer_unlock_commit(tr->buffer, event);
+ if (!filter_current_check_discard(buffer, call, entry, event))
+ ring_buffer_unlock_commit(buffer, event);
return 1;
}
@@ -236,19 +237,20 @@ static void __trace_graph_return(struct trace_array *tr,
{
struct ftrace_event_call *call = &event_funcgraph_exit;
struct ring_buffer_event *event;
+ struct ring_buffer *buffer = tr->buffer;
struct ftrace_graph_ret_entry *entry;
if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
return;
- event = trace_buffer_lock_reserve(tr, TRACE_GRAPH_RET,
+ event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
sizeof(*entry), flags, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
entry->ret = *trace;
- if (!filter_current_check_discard(call, entry, event))
- ring_buffer_unlock_commit(tr->buffer, event);
+ if (!filter_current_check_discard(buffer, call, entry, event))
+ ring_buffer_unlock_commit(buffer, event);
}
void trace_graph_return(struct ftrace_graph_ret *trace)
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index b923d13e2fad..5555b75a0d12 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -178,7 +178,6 @@ out_unlock:
out:
data->critical_sequence = max_sequence;
data->preempt_timestamp = ftrace_now(cpu);
- tracing_reset(tr, cpu);
trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
}
@@ -208,7 +207,6 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
data->critical_sequence = max_sequence;
data->preempt_timestamp = ftrace_now(cpu);
data->critical_start = parent_ip ? : ip;
- tracing_reset(tr, cpu);
local_save_flags(flags);
@@ -379,6 +377,7 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
irqsoff_trace = tr;
/* make sure that the tracer is visible */
smp_wmb();
+ tracing_reset_online_cpus(tr);
start_irqsoff_tracer(tr);
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index f4ec3fc87b2d..19a6de63b44b 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -819,6 +819,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
struct trace_probe *tp = container_of(kp, struct trace_probe, kp);
struct kprobe_trace_entry *entry;
struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
int size, i, pc;
unsigned long irq_flags;
struct ftrace_event_call *call = &tp->call;
@@ -830,7 +831,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
- event = trace_current_buffer_lock_reserve(call->id, size,
+ event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
irq_flags, pc);
if (!event)
return 0;
@@ -841,8 +842,8 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i], regs);
- if (!filter_current_check_discard(call, entry, event))
- trace_nowake_buffer_unlock_commit(event, irq_flags, pc);
+ if (!filter_current_check_discard(buffer, call, entry, event))
+ trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
return 0;
}
@@ -853,6 +854,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
struct kretprobe_trace_entry *entry;
struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
int size, i, pc;
unsigned long irq_flags;
struct ftrace_event_call *call = &tp->call;
@@ -862,7 +864,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
- event = trace_current_buffer_lock_reserve(call->id, size,
+ event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
irq_flags, pc);
if (!event)
return 0;
@@ -874,8 +876,8 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i], regs);
- if (!filter_current_check_discard(call, entry, event))
- trace_nowake_buffer_unlock_commit(event, irq_flags, pc);
+ if (!filter_current_check_discard(buffer, call, entry, event))
+ trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
return 0;
}
@@ -964,7 +966,7 @@ static void probe_event_disable(struct ftrace_event_call *call)
static int probe_event_raw_init(struct ftrace_event_call *event_call)
{
INIT_LIST_HEAD(&event_call->fields);
- init_preds(event_call);
+
return 0;
}
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index d53b45ed0806..c4c9bbda53d3 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -307,11 +307,12 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
struct trace_array_cpu *data,
struct mmiotrace_rw *rw)
{
+ struct ring_buffer *buffer = tr->buffer;
struct ring_buffer_event *event;
struct trace_mmiotrace_rw *entry;
int pc = preempt_count();
- event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW,
+ event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_RW,
sizeof(*entry), 0, pc);
if (!event) {
atomic_inc(&dropped_count);
@@ -319,7 +320,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
}
entry = ring_buffer_event_data(event);
entry->rw = *rw;
- trace_buffer_unlock_commit(tr, event, 0, pc);
+ trace_buffer_unlock_commit(buffer, event, 0, pc);
}
void mmio_trace_rw(struct mmiotrace_rw *rw)
@@ -333,11 +334,12 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
struct trace_array_cpu *data,
struct mmiotrace_map *map)
{
+ struct ring_buffer *buffer = tr->buffer;
struct ring_buffer_event *event;
struct trace_mmiotrace_map *entry;
int pc = preempt_count();
- event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP,
+ event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_MAP,
sizeof(*entry), 0, pc);
if (!event) {
atomic_inc(&dropped_count);
@@ -345,7 +347,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
}
entry = ring_buffer_event_data(event);
entry->map = *map;
- trace_buffer_unlock_commit(tr, event, 0, pc);
+ trace_buffer_unlock_commit(buffer, event, 0, pc);
}
void mmio_trace_mapping(struct mmiotrace_map *map)
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 8a30d9874cd4..fe1a00f1445a 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -38,6 +38,7 @@ static void probe_power_end(struct power_trace *it)
{
struct ftrace_event_call *call = &event_power;
struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
struct trace_power *entry;
struct trace_array_cpu *data;
struct trace_array *tr = power_trace;
@@ -45,18 +46,20 @@ static void probe_power_end(struct power_trace *it)
if (!trace_power_enabled)
return;
+ buffer = tr->buffer;
+
preempt_disable();
it->end = ktime_get();
data = tr->data[smp_processor_id()];
- event = trace_buffer_lock_reserve(tr, TRACE_POWER,
+ event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
sizeof(*entry), 0, 0);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
entry->state_data = *it;
- if (!filter_check_discard(call, entry, tr->buffer, event))
- trace_buffer_unlock_commit(tr, event, 0, 0);
+ if (!filter_check_discard(call, entry, buffer, event))
+ trace_buffer_unlock_commit(buffer, event, 0, 0);
out:
preempt_enable();
}
@@ -66,6 +69,7 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
{
struct ftrace_event_call *call = &event_power;
struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
struct trace_power *entry;
struct trace_array_cpu *data;
struct trace_array *tr = power_trace;
@@ -73,6 +77,8 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
if (!trace_power_enabled)
return;
+ buffer = tr->buffer;
+
memset(it, 0, sizeof(struct power_trace));
it->state = level;
it->type = type;
@@ -81,14 +87,14 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
it->end = it->stamp;
data = tr->data[smp_processor_id()];
- event = trace_buffer_lock_reserve(tr, TRACE_POWER,
+ event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
sizeof(*entry), 0, 0);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
entry->state_data = *it;
- if (!filter_check_discard(call, entry, tr->buffer, event))
- trace_buffer_unlock_commit(tr, event, 0, 0);
+ if (!filter_check_discard(call, entry, buffer, event))
+ trace_buffer_unlock_commit(buffer, event, 0, 0);
out:
preempt_enable();
}
@@ -144,14 +150,12 @@ static void power_trace_reset(struct trace_array *tr)
static int power_trace_init(struct trace_array *tr)
{
- int cpu;
power_trace = tr;
trace_power_enabled = 1;
tracing_power_register();
- for_each_cpu(cpu, cpu_possible_mask)
- tracing_reset(tr, cpu);
+ tracing_reset_online_cpus(tr);
return 0;
}
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index e1285d7b5488..5fca0f51fde4 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -28,10 +28,11 @@ tracing_sched_switch_trace(struct trace_array *tr,
unsigned long flags, int pc)
{
struct ftrace_event_call *call = &event_context_switch;
+ struct ring_buffer *buffer = tr->buffer;
struct ring_buffer_event *event;
struct ctx_switch_entry *entry;
- event = trace_buffer_lock_reserve(tr, TRACE_CTX,
+ event = trace_buffer_lock_reserve(buffer, TRACE_CTX,
sizeof(*entry), flags, pc);
if (!event)
return;
@@ -44,8 +45,8 @@ tracing_sched_switch_trace(struct trace_array *tr,
entry->next_state = next->state;
entry->next_cpu = task_cpu(next);
- if (!filter_check_discard(call, entry, tr->buffer, event))
- trace_buffer_unlock_commit(tr, event, flags, pc);
+ if (!filter_check_discard(call, entry, buffer, event))
+ trace_buffer_unlock_commit(buffer, event, flags, pc);
}
static void
@@ -86,8 +87,9 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
struct ftrace_event_call *call = &event_wakeup;
struct ring_buffer_event *event;
struct ctx_switch_entry *entry;
+ struct ring_buffer *buffer = tr->buffer;
- event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
+ event = trace_buffer_lock_reserve(buffer, TRACE_WAKE,
sizeof(*entry), flags, pc);
if (!event)
return;
@@ -100,10 +102,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
entry->next_state = wakee->state;
entry->next_cpu = task_cpu(wakee);
- if (!filter_check_discard(call, entry, tr->buffer, event))
- ring_buffer_unlock_commit(tr->buffer, event);
- ftrace_trace_stack(tr, flags, 6, pc);
- ftrace_trace_userstack(tr, flags, pc);
+ if (!filter_check_discard(call, entry, buffer, event))
+ ring_buffer_unlock_commit(buffer, event);
+ ftrace_trace_stack(tr->buffer, flags, 6, pc);
+ ftrace_trace_userstack(tr->buffer, flags, pc);
}
static void
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index eacb27225173..ad69f105a7c6 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -186,11 +186,6 @@ out:
static void __wakeup_reset(struct trace_array *tr)
{
- int cpu;
-
- for_each_possible_cpu(cpu)
- tracing_reset(tr, cpu);
-
wakeup_cpu = -1;
wakeup_prio = -1;
@@ -204,6 +199,8 @@ static void wakeup_reset(struct trace_array *tr)
{
unsigned long flags;
+ tracing_reset_online_cpus(tr);
+
local_irq_save(flags);
__raw_spin_lock(&wakeup_lock);
__wakeup_reset(tr);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index e7c676e50a7f..dfc55fed2099 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -11,8 +11,8 @@
static DEFINE_MUTEX(syscall_trace_lock);
static int sys_refcount_enter;
static int sys_refcount_exit;
-static DECLARE_BITMAP(enabled_enter_syscalls, FTRACE_SYSCALL_MAX);
-static DECLARE_BITMAP(enabled_exit_syscalls, FTRACE_SYSCALL_MAX);
+static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
+static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
enum print_line_t
print_syscall_enter(struct trace_iterator *iter, int flags)
@@ -223,10 +223,13 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
struct syscall_trace_enter *entry;
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
int size;
int syscall_nr;
syscall_nr = syscall_get_nr(current, regs);
+ if (syscall_nr < 0)
+ return;
if (!test_bit(syscall_nr, enabled_enter_syscalls))
return;
@@ -236,8 +239,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
- event = trace_current_buffer_lock_reserve(sys_data->enter_id, size,
- 0, 0);
+ event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
+ size, 0, 0);
if (!event)
return;
@@ -245,8 +248,9 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
entry->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
- if (!filter_current_check_discard(sys_data->enter_event, entry, event))
- trace_current_buffer_unlock_commit(event, 0, 0);
+ if (!filter_current_check_discard(buffer, sys_data->enter_event,
+ entry, event))
+ trace_current_buffer_unlock_commit(buffer, event, 0, 0);
}
void ftrace_syscall_exit(struct pt_regs *regs, long ret)
@@ -254,9 +258,12 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
struct syscall_trace_exit *entry;
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
int syscall_nr;
syscall_nr = syscall_get_nr(current, regs);
+ if (syscall_nr < 0)
+ return;
if (!test_bit(syscall_nr, enabled_exit_syscalls))
return;
@@ -264,7 +271,7 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
if (!sys_data)
return;
- event = trace_current_buffer_lock_reserve(sys_data->exit_id,
+ event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
sizeof(*entry), 0, 0);
if (!event)
return;
@@ -273,8 +280,9 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
entry->nr = syscall_nr;
entry->ret = syscall_get_return_value(current, regs);
- if (!filter_current_check_discard(sys_data->exit_event, entry, event))
- trace_current_buffer_unlock_commit(event, 0, 0);
+ if (!filter_current_check_discard(buffer, sys_data->exit_event,
+ entry, event))
+ trace_current_buffer_unlock_commit(buffer, event, 0, 0);
}
int reg_event_syscall_enter(struct ftrace_event_call *call)
@@ -285,7 +293,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
name = (char *)call->data;
num = syscall_name_to_nr(name);
- if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+ if (num < 0 || num >= NR_syscalls)
return -ENOSYS;
mutex_lock(&syscall_trace_lock);
if (!sys_refcount_enter)
@@ -308,7 +316,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call)
name = (char *)call->data;
num = syscall_name_to_nr(name);
- if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+ if (num < 0 || num >= NR_syscalls)
return;
mutex_lock(&syscall_trace_lock);
sys_refcount_enter--;
@@ -326,7 +334,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
name = call->data;
num = syscall_name_to_nr(name);
- if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+ if (num < 0 || num >= NR_syscalls)
return -ENOSYS;
mutex_lock(&syscall_trace_lock);
if (!sys_refcount_exit)
@@ -349,7 +357,7 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
name = call->data;
num = syscall_name_to_nr(name);
- if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+ if (num < 0 || num >= NR_syscalls)
return;
mutex_lock(&syscall_trace_lock);
sys_refcount_exit--;
@@ -369,8 +377,8 @@ struct trace_event event_syscall_exit = {
#ifdef CONFIG_EVENT_PROFILE
-static DECLARE_BITMAP(enabled_prof_enter_syscalls, FTRACE_SYSCALL_MAX);
-static DECLARE_BITMAP(enabled_prof_exit_syscalls, FTRACE_SYSCALL_MAX);
+static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
+static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
static int sys_prof_refcount_enter;
static int sys_prof_refcount_exit;
@@ -416,7 +424,7 @@ int reg_prof_syscall_enter(char *name)
int num;
num = syscall_name_to_nr(name);
- if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+ if (num < 0 || num >= NR_syscalls)
return -ENOSYS;
mutex_lock(&syscall_trace_lock);
@@ -438,7 +446,7 @@ void unreg_prof_syscall_enter(char *name)
int num;
num = syscall_name_to_nr(name);
- if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+ if (num < 0 || num >= NR_syscalls)
return;
mutex_lock(&syscall_trace_lock);
@@ -477,7 +485,7 @@ int reg_prof_syscall_exit(char *name)
int num;
num = syscall_name_to_nr(name);
- if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+ if (num < 0 || num >= NR_syscalls)
return -ENOSYS;
mutex_lock(&syscall_trace_lock);
@@ -499,7 +507,7 @@ void unreg_prof_syscall_exit(char *name)
int num;
num = syscall_name_to_nr(name);
- if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+ if (num < 0 || num >= NR_syscalls)
return;
mutex_lock(&syscall_trace_lock);
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 1a6a453b7efb..9489a0a9b1be 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -597,7 +597,9 @@ void syscall_regfunc(void)
if (!sys_tracepoint_refcount) {
read_lock_irqsave(&tasklist_lock, flags);
do_each_thread(g, t) {
- set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
+ /* Skip kernel threads. */
+ if (t->mm)
+ set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
} while_each_thread(g, t);
read_unlock_irqrestore(&tasklist_lock, flags);
}
diff --git a/kernel/wait.c b/kernel/wait.c
index ea7c3b4275cf..c4bd3d825f35 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -10,13 +10,14 @@
#include <linux/wait.h>
#include <linux/hash.h>
-void init_waitqueue_head(wait_queue_head_t *q)
+void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
{
spin_lock_init(&q->lock);
+ lockdep_set_class(&q->lock, key);
INIT_LIST_HEAD(&q->task_list);
}
-EXPORT_SYMBOL(init_waitqueue_head);
+EXPORT_SYMBOL(__init_waitqueue_head);
void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{