summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/include/asm/ftrace.h29
-rw-r--r--arch/x86/include/asm/thread_info.h24
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/entry_32.S36
-rw-r--r--arch/x86/kernel/entry_64.S5
-rw-r--r--arch/x86/kernel/ftrace.c277
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/vdso/vclock_gettime.c3
-rw-r--r--drivers/char/sysrq.c18
-rw-r--r--include/asm-generic/vmlinux.lds.h14
-rw-r--r--include/linux/compiler.h66
-rw-r--r--include/linux/ftrace.h75
-rw-r--r--include/linux/ftrace_irq.h13
-rw-r--r--include/linux/hardirq.h15
-rw-r--r--include/linux/marker.h2
-rw-r--r--include/linux/sched.h11
-rw-r--r--include/linux/tracepoint.h4
-rw-r--r--include/trace/boot.h56
-rw-r--r--init/main.c35
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/marker.c80
-rw-r--r--kernel/sysctl.c10
-rw-r--r--kernel/trace/Kconfig59
-rw-r--r--kernel/trace/Makefile7
-rw-r--r--kernel/trace/ftrace.c66
-rw-r--r--kernel/trace/ring_buffer.c298
-rw-r--r--kernel/trace/trace.c449
-rw-r--r--kernel/trace/trace.h153
-rw-r--r--kernel/trace/trace_boot.c163
-rw-r--r--kernel/trace/trace_branch.c320
-rw-r--r--kernel/trace/trace_functions.c15
-rw-r--r--kernel/trace/trace_functions_return.c82
-rw-r--r--kernel/trace/trace_irqsoff.c52
-rw-r--r--kernel/trace/trace_mmiotrace.c22
-rw-r--r--kernel/trace/trace_nop.c16
-rw-r--r--kernel/trace/trace_sched_switch.c103
-rw-r--r--kernel/trace/trace_sched_wakeup.c67
-rw-r--r--kernel/trace/trace_selftest.c105
-rw-r--r--kernel/trace/trace_stack.c8
-rw-r--r--kernel/trace/trace_sysprof.c16
-rw-r--r--kernel/tracepoint.c261
-rw-r--r--scripts/Makefile.build12
-rw-r--r--scripts/bootgraph.pl4
-rwxr-xr-xscripts/recordmcount.pl4
-rw-r--r--scripts/tracing/draw_functrace.py130
50 files changed, 2538 insertions, 679 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c86c07459712..196fd1c62a2f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -765,6 +765,14 @@ and is between 256 and 4096 characters. It is defined in the file
parameter will force ia64_sal_cache_flush to call
ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
+ ftrace=[tracer]
+ [ftrace] will set and start the specified tracer
+ as early as possible in order to facilitate early
+ boot debugging.
+
+ ftrace_dump_on_oops
+ [ftrace] will dump the trace buffers on oops.
+
gamecon.map[2|3]=
[HW,JOY] Multisystem joystick and NES/SNES/PSX pad
support via parallel port (up to 5 devices per port)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 93224b569187..7780cc8b51d1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,6 +29,8 @@ config X86
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
+ select HAVE_FUNCTION_RET_TRACER if X86_32
+ select HAVE_FUNCTION_TRACE_MCOUNT_TEST
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2a3dfbd5e677..fa013f529b74 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -186,14 +186,10 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
-config MMIOTRACE_HOOKS
- bool
-
config MMIOTRACE
bool "Memory mapped IO tracing"
depends on DEBUG_KERNEL && PCI
select TRACING
- select MMIOTRACE_HOOKS
help
Mmiotrace traces Memory Mapped I/O access and is meant for
debugging and reverse engineering. It is called from the ioremap
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9e8bc29b8b17..9b6a1fa19e70 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -17,8 +17,33 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
*/
return addr - 1;
}
-#endif
-
+#endif /* __ASSEMBLY__ */
#endif /* CONFIG_FUNCTION_TRACER */
+#ifdef CONFIG_FUNCTION_RET_TRACER
+#define FTRACE_RET_STACK_SIZE 20
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Stack of return addresses for functions
+ * of a thread.
+ * Used in struct thread_info
+ */
+struct ftrace_ret_stack {
+ unsigned long ret;
+ unsigned long func;
+ unsigned long long calltime;
+};
+
+/*
+ * Primary handler of a function return.
+ * It relays on ftrace_return_to_handler.
+ * Defined in entry32.S
+ */
+extern void return_to_handler(void);
+
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_FUNCTION_RET_TRACER */
+
#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e44d379faad2..a71158369fd4 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -20,6 +20,7 @@
struct task_struct;
struct exec_domain;
#include <asm/processor.h>
+#include <asm/ftrace.h>
struct thread_info {
struct task_struct *task; /* main task structure */
@@ -38,8 +39,30 @@ struct thread_info {
*/
__u8 supervisor_stack[0];
#endif
+
+#ifdef CONFIG_FUNCTION_RET_TRACER
+ /* Index of current stored adress in ret_stack */
+ int curr_ret_stack;
+ /* Stack of return addresses for return function tracing */
+ struct ftrace_ret_stack ret_stack[FTRACE_RET_STACK_SIZE];
+#endif
};
+#ifdef CONFIG_FUNCTION_RET_TRACER
+#define INIT_THREAD_INFO(tsk) \
+{ \
+ .task = &tsk, \
+ .exec_domain = &default_exec_domain, \
+ .flags = 0, \
+ .cpu = 0, \
+ .preempt_count = 1, \
+ .addr_limit = KERNEL_DS, \
+ .restart_block = { \
+ .fn = do_no_restart_syscall, \
+ }, \
+ .curr_ret_stack = -1,\
+}
+#else
#define INIT_THREAD_INFO(tsk) \
{ \
.task = &tsk, \
@@ -52,6 +75,7 @@ struct thread_info {
.fn = do_no_restart_syscall, \
}, \
}
+#endif
#define init_thread_info (init_thread_union.thread_info)
#define init_stack (init_thread_union.stack)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e489ff9cb3e2..1d8ed95da846 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -14,6 +14,11 @@ CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
CFLAGS_REMOVE_ftrace.o = -pg
endif
+ifdef CONFIG_FUNCTION_RET_TRACER
+# Don't trace __switch_to() but let it for function tracer
+CFLAGS_REMOVE_process_32.o = -pg
+endif
+
#
# vsyscalls (which work on the user stack) should have
# no stack-protector checks:
@@ -65,6 +70,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_FUNCTION_RET_TRACER) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 28b597ef9ca1..f97621149839 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1157,6 +1157,9 @@ ENTRY(mcount)
END(mcount)
ENTRY(ftrace_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
pushl %eax
pushl %ecx
pushl %edx
@@ -1180,8 +1183,15 @@ END(ftrace_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
cmpl $ftrace_stub, ftrace_trace_function
jnz trace
+#ifdef CONFIG_FUNCTION_RET_TRACER
+ cmpl $ftrace_stub, ftrace_function_return
+ jnz trace_return
+#endif
.globl ftrace_stub
ftrace_stub:
ret
@@ -1200,8 +1210,34 @@ trace:
popl %edx
popl %ecx
popl %eax
+ jmp ftrace_stub
+#ifdef CONFIG_FUNCTION_RET_TRACER
+trace_return:
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %edx
+ lea 0x4(%ebp), %eax
+ call prepare_ftrace_return
+ popl %edx
+ popl %ecx
+ popl %eax
jmp ftrace_stub
+
+.globl return_to_handler
+return_to_handler:
+ pushl $0
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ call ftrace_return_to_handler
+ movl %eax, 0xc(%esp)
+ popl %edx
+ popl %ecx
+ popl %eax
+ ret
+#endif /* CONFIG_FUNCTION_RET_TRACER */
END(mcount)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b86f332c96a6..08aa6b10933c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -68,6 +68,8 @@ ENTRY(mcount)
END(mcount)
ENTRY(ftrace_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
/* taken from glibc */
subq $0x38, %rsp
@@ -103,6 +105,9 @@ END(ftrace_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
cmpq $ftrace_stub, ftrace_trace_function
jnz trace
.globl ftrace_stub
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 50ea0ac8c9bf..fe832738e1e2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -14,14 +14,144 @@
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/percpu.h>
+#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
#include <asm/ftrace.h>
+#include <linux/ftrace.h>
#include <asm/nops.h>
+#include <asm/nmi.h>
-static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
+
+#ifdef CONFIG_FUNCTION_RET_TRACER
+
+/*
+ * These functions are picked from those used on
+ * this page for dynamic ftrace. They have been
+ * simplified to ignore all traces in NMI context.
+ */
+static atomic_t in_nmi;
+
+void ftrace_nmi_enter(void)
+{
+ atomic_inc(&in_nmi);
+}
+
+void ftrace_nmi_exit(void)
+{
+ atomic_dec(&in_nmi);
+}
+
+/* Add a function return address to the trace stack on thread info.*/
+static int push_return_trace(unsigned long ret, unsigned long long time,
+ unsigned long func)
+{
+ int index;
+ struct thread_info *ti = current_thread_info();
+
+ /* The return trace stack is full */
+ if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1)
+ return -EBUSY;
+
+ index = ++ti->curr_ret_stack;
+ ti->ret_stack[index].ret = ret;
+ ti->ret_stack[index].func = func;
+ ti->ret_stack[index].calltime = time;
+
+ return 0;
+}
+
+/* Retrieve a function return address to the trace stack on thread info.*/
+static void pop_return_trace(unsigned long *ret, unsigned long long *time,
+ unsigned long *func)
+{
+ int index;
+
+ struct thread_info *ti = current_thread_info();
+ index = ti->curr_ret_stack;
+ *ret = ti->ret_stack[index].ret;
+ *func = ti->ret_stack[index].func;
+ *time = ti->ret_stack[index].calltime;
+ ti->curr_ret_stack--;
+}
+
+/*
+ * Send the trace to the ring-buffer.
+ * @return the original return address.
+ */
+unsigned long ftrace_return_to_handler(void)
+{
+ struct ftrace_retfunc trace;
+ pop_return_trace(&trace.ret, &trace.calltime, &trace.func);
+ trace.rettime = cpu_clock(raw_smp_processor_id());
+ ftrace_function_return(&trace);
+
+ return trace.ret;
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+{
+ unsigned long old;
+ unsigned long long calltime;
+ int faulted;
+ unsigned long return_hooker = (unsigned long)
+ &return_to_handler;
+
+ /* Nmi's are currently unsupported */
+ if (atomic_read(&in_nmi))
+ return;
+
+ /*
+ * Protect against fault, even if it shouldn't
+ * happen. This tool is too much intrusive to
+ * ignore such a protection.
+ */
+ asm volatile(
+ "1: movl (%[parent_old]), %[old]\n"
+ "2: movl %[return_hooker], (%[parent_replaced])\n"
+ " movl $0, %[faulted]\n"
+
+ ".section .fixup, \"ax\"\n"
+ "3: movl $1, %[faulted]\n"
+ ".previous\n"
+
+ ".section __ex_table, \"a\"\n"
+ " .long 1b, 3b\n"
+ " .long 2b, 3b\n"
+ ".previous\n"
+
+ : [parent_replaced] "=r" (parent), [old] "=r" (old),
+ [faulted] "=r" (faulted)
+ : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
+ : "memory"
+ );
+
+ if (WARN_ON(faulted)) {
+ unregister_ftrace_return();
+ return;
+ }
+
+ if (WARN_ON(!__kernel_text_address(old))) {
+ unregister_ftrace_return();
+ *parent = old;
+ return;
+ }
+
+ calltime = cpu_clock(raw_smp_processor_id());
+
+ if (push_return_trace(old, calltime, self_addr) == -EBUSY)
+ *parent = old;
+}
+
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
union ftrace_code_union {
char code[MCOUNT_INSN_SIZE];
@@ -31,17 +161,11 @@ union ftrace_code_union {
} __attribute__((packed));
};
-
static int ftrace_calc_offset(long ip, long addr)
{
return (int)(addr - ip);
}
-unsigned char *ftrace_nop_replace(void)
-{
- return ftrace_nop;
-}
-
unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
static union ftrace_code_union calc;
@@ -56,6 +180,142 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
return calc.code;
}
+/*
+ * Modifying code must take extra care. On an SMP machine, if
+ * the code being modified is also being executed on another CPU
+ * that CPU will have undefined results and possibly take a GPF.
+ * We use kstop_machine to stop other CPUS from exectuing code.
+ * But this does not stop NMIs from happening. We still need
+ * to protect against that. We separate out the modification of
+ * the code to take care of this.
+ *
+ * Two buffers are added: An IP buffer and a "code" buffer.
+ *
+ * 1) Put the instruction pointer into the IP buffer
+ * and the new code into the "code" buffer.
+ * 2) Set a flag that says we are modifying code
+ * 3) Wait for any running NMIs to finish.
+ * 4) Write the code
+ * 5) clear the flag.
+ * 6) Wait for any running NMIs to finish.
+ *
+ * If an NMI is executed, the first thing it does is to call
+ * "ftrace_nmi_enter". This will check if the flag is set to write
+ * and if it is, it will write what is in the IP and "code" buffers.
+ *
+ * The trick is, it does not matter if everyone is writing the same
+ * content to the code location. Also, if a CPU is executing code
+ * it is OK to write to that code location if the contents being written
+ * are the same as what exists.
+ */
+
+static atomic_t in_nmi = ATOMIC_INIT(0);
+static int mod_code_status; /* holds return value of text write */
+static int mod_code_write; /* set when NMI should do the write */
+static void *mod_code_ip; /* holds the IP to write to */
+static void *mod_code_newcode; /* holds the text to write to the IP */
+
+static unsigned nmi_wait_count;
+static atomic_t nmi_update_count = ATOMIC_INIT(0);
+
+int ftrace_arch_read_dyn_info(char *buf, int size)
+{
+ int r;
+
+ r = snprintf(buf, size, "%u %u",
+ nmi_wait_count,
+ atomic_read(&nmi_update_count));
+ return r;
+}
+
+static void ftrace_mod_code(void)
+{
+ /*
+ * Yes, more than one CPU process can be writing to mod_code_status.
+ * (and the code itself)
+ * But if one were to fail, then they all should, and if one were
+ * to succeed, then they all should.
+ */
+ mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
+ MCOUNT_INSN_SIZE);
+
+}
+
+void ftrace_nmi_enter(void)
+{
+ atomic_inc(&in_nmi);
+ /* Must have in_nmi seen before reading write flag */
+ smp_mb();
+ if (mod_code_write) {
+ ftrace_mod_code();
+ atomic_inc(&nmi_update_count);
+ }
+}
+
+void ftrace_nmi_exit(void)
+{
+ /* Finish all executions before clearing in_nmi */
+ smp_wmb();
+ atomic_dec(&in_nmi);
+}
+
+static void wait_for_nmi(void)
+{
+ int waited = 0;
+
+ while (atomic_read(&in_nmi)) {
+ waited = 1;
+ cpu_relax();
+ }
+
+ if (waited)
+ nmi_wait_count++;
+}
+
+static int
+do_ftrace_mod_code(unsigned long ip, void *new_code)
+{
+ mod_code_ip = (void *)ip;
+ mod_code_newcode = new_code;
+
+ /* The buffers need to be visible before we let NMIs write them */
+ smp_wmb();
+
+ mod_code_write = 1;
+
+ /* Make sure write bit is visible before we wait on NMIs */
+ smp_mb();
+
+ wait_for_nmi();
+
+ /* Make sure all running NMIs have finished before we write the code */
+ smp_mb();
+
+ ftrace_mod_code();
+
+ /* Make sure the write happens before clearing the bit */
+ smp_wmb();
+
+ mod_code_write = 0;
+
+ /* make sure NMIs see the cleared bit */
+ smp_mb();
+
+ wait_for_nmi();
+
+ return mod_code_status;
+}
+
+
+
+
+static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
+
+unsigned char *ftrace_nop_replace(void)
+{
+ return ftrace_nop;
+}
+
int
ftrace_modify_code(unsigned long ip, unsigned char *old_code,
unsigned char *new_code)
@@ -81,7 +341,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
return -EINVAL;
/* replace the text with the new text */
- if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
+ if (do_ftrace_mod_code(ip, new_code))
return -EPERM;
sync_core();
@@ -165,3 +425,4 @@ int __init ftrace_dyn_arch_init(void *data)
return 0;
}
+#endif
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0b8b6690a86d..6f3d3d4cd973 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -17,6 +17,9 @@
* want per guest time just set the kernel.vsyscall64 sysctl to 0.
*/
+/* Disable profiling for userspace code: */
+#define DISABLE_BRANCH_PROFILING
+
#include <linux/time.h>
#include <linux/init.h>
#include <linux/kernel.h>
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fea4565ff576..d8cc96a2738f 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
-obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
-mmiotrace-y := pf_in.o mmio-mod.o
+mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
obj-$(CONFIG_NUMA) += numa_$(BITS).o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 31e8730fa246..4152d3c3b138 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -53,7 +53,7 @@
static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
{
-#ifdef CONFIG_MMIOTRACE_HOOKS
+#ifdef CONFIG_MMIOTRACE
if (unlikely(is_kmmio_active()))
if (kmmio_handler(regs, addr) == 1)
return -1;
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 1ef0f90813d6..d9d35824c56f 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -9,6 +9,9 @@
* Also alternative() doesn't work.
*/
+/* Disable profiling for userspace code: */
+#define DISABLE_BRANCH_PROFILING
+
#include <linux/kernel.h>
#include <linux/posix-timers.h>
#include <linux/time.h>
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index ce0d9da52a8a..94966edfb44d 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -274,6 +274,22 @@ static struct sysrq_key_op sysrq_showstate_blocked_op = {
.enable_mask = SYSRQ_ENABLE_DUMP,
};
+#ifdef CONFIG_TRACING
+#include <linux/ftrace.h>
+
+static void sysrq_ftrace_dump(int key, struct tty_struct *tty)
+{
+ ftrace_dump();
+}
+static struct sysrq_key_op sysrq_ftrace_dump_op = {
+ .handler = sysrq_ftrace_dump,
+ .help_msg = "dumpZ-ftrace-buffer",
+ .action_msg = "Dump ftrace buffer",
+ .enable_mask = SYSRQ_ENABLE_DUMP,
+};
+#else
+#define sysrq_ftrace_dump_op (*(struct sysrq_key_op *)0)
+#endif
static void sysrq_handle_showmem(int key, struct tty_struct *tty)
{
@@ -406,7 +422,7 @@ static struct sysrq_key_op *sysrq_key_table[36] = {
NULL, /* x */
/* y: May be registered on sparc64 for global register dump */
NULL, /* y */
- NULL /* z */
+ &sysrq_ftrace_dump_op, /* z */
};
/* key2index calculation, -1 on invalid index */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 80744606bad1..a5e4ed9baec8 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -45,6 +45,17 @@
#define MCOUNT_REC()
#endif
+#ifdef CONFIG_TRACE_BRANCH_PROFILING
+#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_likely_profile) = .; \
+ *(_ftrace_likely) \
+ VMLINUX_SYMBOL(__stop_likely_profile) = .; \
+ VMLINUX_SYMBOL(__start_unlikely_profile) = .; \
+ *(_ftrace_unlikely) \
+ VMLINUX_SYMBOL(__stop_unlikely_profile) = .;
+#else
+#define LIKELY_PROFILE()
+#endif
+
/* .data section */
#define DATA_DATA \
*(.data) \
@@ -62,7 +73,8 @@
VMLINUX_SYMBOL(__stop___markers) = .; \
VMLINUX_SYMBOL(__start___tracepoints) = .; \
*(__tracepoints) \
- VMLINUX_SYMBOL(__stop___tracepoints) = .;
+ VMLINUX_SYMBOL(__stop___tracepoints) = .; \
+ LIKELY_PROFILE()
#define RO_DATA(align) \
. = ALIGN((align)); \
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 98115d9d04da..c7d804a7a4d6 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -59,8 +59,70 @@ extern void __chk_io_ptr(const volatile void __iomem *);
* specific implementations come from the above header files
*/
-#define likely(x) __builtin_expect(!!(x), 1)
-#define unlikely(x) __builtin_expect(!!(x), 0)
+struct ftrace_branch_data {
+ const char *func;
+ const char *file;
+ unsigned line;
+ unsigned long correct;
+ unsigned long incorrect;
+};
+
+/*
+ * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
+ * to disable branch tracing on a per file basis.
+ */
+#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING)
+void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
+
+#define likely_notrace(x) __builtin_expect(!!(x), 1)
+#define unlikely_notrace(x) __builtin_expect(!!(x), 0)
+
+#define likely_check(x) ({ \
+ int ______r; \
+ static struct ftrace_branch_data \
+ __attribute__((__aligned__(4))) \
+ __attribute__((section("_ftrace_likely"))) \
+ ______f = { \
+ .func = __func__, \
+ .file = __FILE__, \
+ .line = __LINE__, \
+ }; \
+ ______f.line = __LINE__; \
+ ______r = likely_notrace(x); \
+ ftrace_likely_update(&______f, ______r, 1); \
+ ______r; \
+ })
+#define unlikely_check(x) ({ \
+ int ______r; \
+ static struct ftrace_branch_data \
+ __attribute__((__aligned__(4))) \
+ __attribute__((section("_ftrace_unlikely"))) \
+ ______f = { \
+ .func = __func__, \
+ .file = __FILE__, \
+ .line = __LINE__, \
+ }; \
+ ______f.line = __LINE__; \
+ ______r = unlikely_notrace(x); \
+ ftrace_likely_update(&______f, ______r, 0); \
+ ______r; \
+ })
+
+/*
+ * Using __builtin_constant_p(x) to ignore cases where the return
+ * value is always the same. This idea is taken from a similar patch
+ * written by Daniel Walker.
+ */
+# ifndef likely
+# define likely(x) (__builtin_constant_p(x) ? !!(x) : likely_check(x))
+# endif
+# ifndef unlikely
+# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : unlikely_check(x))
+# endif
+#else
+# define likely(x) __builtin_expect(!!(x), 1)
+# define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
/* Optimization barrier */
#ifndef barrier
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 703eb53cfa2b..4fbc4a8b86a5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -23,6 +23,34 @@ struct ftrace_ops {
struct ftrace_ops *next;
};
+extern int function_trace_stop;
+
+/**
+ * ftrace_stop - stop function tracer.
+ *
+ * A quick way to stop the function tracer. Note this an on off switch,
+ * it is not something that is recursive like preempt_disable.
+ * This does not disable the calling of mcount, it only stops the
+ * calling of functions from mcount.
+ */
+static inline void ftrace_stop(void)
+{
+ function_trace_stop = 1;
+}
+
+/**
+ * ftrace_start - start the function tracer.
+ *
+ * This function is the inverse of ftrace_stop. This does not enable
+ * the function tracing if the function tracer is disabled. This only
+ * sets the function tracer flag to continue calling the functions
+ * from mcount.
+ */
+static inline void ftrace_start(void)
+{
+ function_trace_stop = 0;
+}
+
/*
* The ftrace_ops must be a static and should also
* be read_mostly. These functions do modify read_mostly variables
@@ -41,10 +69,11 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1);
# define unregister_ftrace_function(ops) do { } while (0)
# define clear_ftrace_function(ops) do { } while (0)
static inline void ftrace_kill(void) { }
+static inline void ftrace_stop(void) { }
+static inline void ftrace_start(void) { }
#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_DYNAMIC_FTRACE
-
enum {
FTRACE_FL_FREE = (1 << 0),
FTRACE_FL_FAILED = (1 << 1),
@@ -74,6 +103,9 @@ extern void ftrace_caller(void);
extern void ftrace_call(void);
extern void mcount_call(void);
+/* May be defined in arch */
+extern int ftrace_arch_read_dyn_info(char *buf, int size);
+
/**
* ftrace_modify_code - modify code segment
* @ip: the address of the code segment
@@ -102,7 +134,6 @@ extern void ftrace_release(void *start, unsigned long size);
extern void ftrace_disable_daemon(void);
extern void ftrace_enable_daemon(void);
-
#else
# define skip_trace(ip) ({ 0; })
# define ftrace_force_update() ({ 0; })
@@ -181,6 +212,11 @@ static inline void __ftrace_enabled_restore(int enabled)
#endif
#ifdef CONFIG_TRACING
+extern int ftrace_dump_on_oops;
+
+extern void tracing_start(void);
+extern void tracing_stop(void);
+
extern void
ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
@@ -211,6 +247,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
static inline int
ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)));
+static inline void tracing_start(void) { }
+static inline void tracing_stop(void) { }
static inline int
ftrace_printk(const char *fmt, ...)
{
@@ -229,25 +267,24 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { }
#endif
-struct boot_trace {
- pid_t caller;
- char func[KSYM_NAME_LEN];
- int result;
- unsigned long long duration; /* usecs */
- ktime_t calltime;
- ktime_t rettime;
+/*
+ * Structure that defines a return function trace.
+ */
+struct ftrace_retfunc {
+ unsigned long ret; /* Return address */
+ unsigned long func; /* Current function */
+ unsigned long long calltime;
+ unsigned long long rettime;
};
-#ifdef CONFIG_BOOT_TRACER
-extern void trace_boot(struct boot_trace *it, initcall_t fn);
-extern void start_boot_trace(void);
-extern void stop_boot_trace(void);
-#else
-static inline void trace_boot(struct boot_trace *it, initcall_t fn) { }
-static inline void start_boot_trace(void) { }
-static inline void stop_boot_trace(void) { }
-#endif
-
+#ifdef CONFIG_FUNCTION_RET_TRACER
+/* Type of a callback handler of tracing return function */
+typedef void (*trace_function_return_t)(struct ftrace_retfunc *);
+extern void register_ftrace_return(trace_function_return_t func);
+/* The current handler in use */
+extern trace_function_return_t ftrace_function_return;
+extern void unregister_ftrace_return(void);
+#endif
#endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h
new file mode 100644
index 000000000000..0b4df55d7a74
--- /dev/null
+++ b/include/linux/ftrace_irq.h
@@ -0,0 +1,13 @@
+#ifndef _LINUX_FTRACE_IRQ_H
+#define _LINUX_FTRACE_IRQ_H
+
+
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_RET_TRACER)
+extern void ftrace_nmi_enter(void);
+extern void ftrace_nmi_exit(void);
+#else
+static inline void ftrace_nmi_enter(void) { }
+static inline void ftrace_nmi_exit(void) { }
+#endif
+
+#endif /* _LINUX_FTRACE_IRQ_H */
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 181006cc94a0..89a56d79e4c6 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -4,6 +4,7 @@
#include <linux/preempt.h>
#include <linux/smp_lock.h>
#include <linux/lockdep.h>
+#include <linux/ftrace_irq.h>
#include <asm/hardirq.h>
#include <asm/system.h>
@@ -161,7 +162,17 @@ extern void irq_enter(void);
*/
extern void irq_exit(void);
-#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0)
-#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0)
+#define nmi_enter() \
+ do { \
+ ftrace_nmi_enter(); \
+ lockdep_off(); \
+ __irq_enter(); \
+ } while (0)
+#define nmi_exit() \
+ do { \
+ __irq_exit(); \
+ lockdep_on(); \
+ ftrace_nmi_exit(); \
+ } while (0)
#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 889196c7fbb1..4cf45472d9f5 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -136,8 +136,6 @@ extern marker_probe_func __mark_empty_function;
extern void marker_probe_cb(const struct marker *mdata,
void *call_private, ...);
-extern void marker_probe_cb_noarg(const struct marker *mdata,
- void *call_private, ...);
/*
* Connect a probe to a marker.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 644ffbda17ca..61c8cc36028a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2006,6 +2006,17 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct
{
*task_thread_info(p) = *task_thread_info(org);
task_thread_info(p)->task = p;
+
+#ifdef CONFIG_FUNCTION_RET_TRACER
+ /*
+ * When fork() creates a child process, this function is called.
+ * But the child task may not inherit the return adresses traced
+ * by the return function tracer because it will directly execute
+ * in userspace and will not return to kernel functions its parent
+ * used.
+ */
+ task_thread_info(p)->curr_ret_stack = -1;
+#endif
}
static inline unsigned long *end_of_stack(struct task_struct *p)
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index c5bb39c7a770..63064e9403f2 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -112,6 +112,10 @@ extern int tracepoint_probe_register(const char *name, void *probe);
*/
extern int tracepoint_probe_unregister(const char *name, void *probe);
+extern int tracepoint_probe_register_noupdate(const char *name, void *probe);
+extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe);
+extern void tracepoint_probe_update_all(void);
+
struct tracepoint_iter {
struct module *module;
struct tracepoint *tracepoint;
diff --git a/include/trace/boot.h b/include/trace/boot.h
new file mode 100644
index 000000000000..6b54537eab02
--- /dev/null
+++ b/include/trace/boot.h
@@ -0,0 +1,56 @@
+#ifndef _LINUX_TRACE_BOOT_H
+#define _LINUX_TRACE_BOOT_H
+
+/*
+ * Structure which defines the trace of an initcall
+ * while it is called.
+ * You don't have to fill the func field since it is
+ * only used internally by the tracer.
+ */
+struct boot_trace_call {
+ pid_t caller;
+ char func[KSYM_NAME_LEN];
+};
+
+/*
+ * Structure which defines the trace of an initcall
+ * while it returns.
+ */
+struct boot_trace_ret {
+ char func[KSYM_NAME_LEN];
+ int result;
+ unsigned long long duration; /* nsecs */
+};
+
+#ifdef CONFIG_BOOT_TRACER
+/* Append the traces on the ring-buffer */
+extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn);
+extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn);
+
+/* Tells the tracer that smp_pre_initcall is finished.
+ * So we can start the tracing
+ */
+extern void start_boot_trace(void);
+
+/* Resume the tracing of other necessary events
+ * such as sched switches
+ */
+extern void enable_boot_trace(void);
+
+/* Suspend this tracing. Actually, only sched_switches tracing have
+ * to be suspended. Initcalls doesn't need it.)
+ */
+extern void disable_boot_trace(void);
+#else
+static inline
+void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { }
+
+static inline
+void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { }
+
+static inline void start_boot_trace(void) { }
+static inline void enable_boot_trace(void) { }
+static inline void disable_boot_trace(void) { }
+#endif /* CONFIG_BOOT_TRACER */
+
+#endif /* __LINUX_TRACE_BOOT_H */
diff --git a/init/main.c b/init/main.c
index 7e117a231af1..e810196bf2f2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -63,6 +63,7 @@
#include <linux/signal.h>
#include <linux/idr.h>
#include <linux/ftrace.h>
+#include <trace/boot.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -703,31 +704,35 @@ core_param(initcall_debug, initcall_debug, bool, 0644);
int do_one_initcall(initcall_t fn)
{
int count = preempt_count();
- ktime_t delta;
+ ktime_t calltime, delta, rettime;
char msgbuf[64];
- struct boot_trace it;
+ struct boot_trace_call call;
+ struct boot_trace_ret ret;
if (initcall_debug) {
- it.caller = task_pid_nr(current);
- printk("calling %pF @ %i\n", fn, it.caller);
- it.calltime = ktime_get();
+ call.caller = task_pid_nr(current);
+ printk("calling %pF @ %i\n", fn, call.caller);
+ calltime = ktime_get();
+ trace_boot_call(&call, fn);
+ enable_boot_trace();
}
- it.result = fn();
+ ret.result = fn();
if (initcall_debug) {
- it.rettime = ktime_get();
- delta = ktime_sub(it.rettime, it.calltime);
- it.duration = (unsigned long long) delta.tv64 >> 10;
+ disable_boot_trace();
+ rettime = ktime_get();
+ delta = ktime_sub(rettime, calltime);
+ ret.duration = (unsigned long long) delta.tv64 >> 10;
+ trace_boot_ret(&ret, fn);
printk("initcall %pF returned %d after %Ld usecs\n", fn,
- it.result, it.duration);
- trace_boot(&it, fn);
+ ret.result, ret.duration);
}
msgbuf[0] = 0;
- if (it.result && it.result != -ENODEV && initcall_debug)
- sprintf(msgbuf, "error code %d ", it.result);
+ if (ret.result && ret.result != -ENODEV && initcall_debug)
+ sprintf(msgbuf, "error code %d ", ret.result);
if (preempt_count() != count) {
strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -741,7 +746,7 @@ int do_one_initcall(initcall_t fn)
printk("initcall %pF returned with %s\n", fn, msgbuf);
}
- return it.result;
+ return ret.result;
}
@@ -882,7 +887,7 @@ static int __init kernel_init(void * unused)
* we're essentially up and running. Get rid of the
* initmem segments and start the user-mode stuff..
*/
- stop_boot_trace();
+
init_post();
return 0;
}
diff --git a/kernel/Makefile b/kernel/Makefile
index 9a3ec66a9d84..af3be57acbbb 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -23,6 +23,10 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg
CFLAGS_REMOVE_sched_clock.o = -pg
CFLAGS_REMOVE_sched.o = -mno-spe -pg
endif
+ifdef CONFIG_FUNCTION_RET_TRACER
+CFLAGS_REMOVE_extable.o = -pg # For __kernel_text_address()
+CFLAGS_REMOVE_module.o = -pg # For __module_text_address()
+endif
obj-$(CONFIG_FREEZER) += freezer.o
obj-$(CONFIG_PROFILING) += profile.o
diff --git a/kernel/marker.c b/kernel/marker.c
index e9c6b2bc9400..2898b647d415 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -43,6 +43,7 @@ static DEFINE_MUTEX(markers_mutex);
*/
#define MARKER_HASH_BITS 6
#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
+static struct hlist_head marker_table[MARKER_TABLE_SIZE];
/*
* Note about RCU :
@@ -64,11 +65,10 @@ struct marker_entry {
void *oldptr;
int rcu_pending;
unsigned char ptype:1;
+ unsigned char format_allocated:1;
char name[0]; /* Contains name'\0'format'\0' */
};
-static struct hlist_head marker_table[MARKER_TABLE_SIZE];
-
/**
* __mark_empty_function - Empty probe callback
* @probe_private: probe private data
@@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
*
* Should be connected to markers "MARK_NOARGS".
*/
-void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
+static void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
{
va_list args; /* not initialized */
char ptype;
@@ -197,7 +197,6 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
}
rcu_read_unlock_sched();
}
-EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
static void free_old_closure(struct rcu_head *head)
{
@@ -416,6 +415,7 @@ static struct marker_entry *add_marker(const char *name, const char *format)
e->single.probe_private = NULL;
e->multi = NULL;
e->ptype = 0;
+ e->format_allocated = 0;
e->refcount = 0;
e->rcu_pending = 0;
hlist_add_head(&e->hlist, head);
@@ -447,6 +447,8 @@ static int remove_marker(const char *name)
if (e->single.func != __mark_empty_function)
return -EBUSY;
hlist_del(&e->hlist);
+ if (e->format_allocated)
+ kfree(e->format);
/* Make sure the call_rcu has been executed */
if (e->rcu_pending)
rcu_barrier_sched();
@@ -457,57 +459,34 @@ static int remove_marker(const char *name)
/*
* Set the mark_entry format to the format found in the element.
*/
-static int marker_set_format(struct marker_entry **entry, const char *format)
+static int marker_set_format(struct marker_entry *entry, const char *format)
{
- struct marker_entry *e;
- size_t name_len = strlen((*entry)->name) + 1;
- size_t format_len = strlen(format) + 1;
-
-
- e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
- GFP_KERNEL);
- if (!e)
+ entry->format = kstrdup(format, GFP_KERNEL);
+ if (!entry->format)
return -ENOMEM;
- memcpy(&e->name[0], (*entry)->name, name_len);
- e->format = &e->name[name_len];
- memcpy(e->format, format, format_len);
- if (strcmp(e->format, MARK_NOARGS) == 0)
- e->call = marker_probe_cb_noarg;
- else
- e->call = marker_probe_cb;
- e->single = (*entry)->single;
- e->multi = (*entry)->multi;
- e->ptype = (*entry)->ptype;
- e->refcount = (*entry)->refcount;
- e->rcu_pending = 0;
- hlist_add_before(&e->hlist, &(*entry)->hlist);
- hlist_del(&(*entry)->hlist);
- /* Make sure the call_rcu has been executed */
- if ((*entry)->rcu_pending)
- rcu_barrier_sched();
- kfree(*entry);
- *entry = e;
+ entry->format_allocated = 1;
+
trace_mark(core_marker_format, "name %s format %s",
- e->name, e->format);
+ entry->name, entry->format);
return 0;
}
/*
* Sets the probe callback corresponding to one marker.
*/
-static int set_marker(struct marker_entry **entry, struct marker *elem,
+static int set_marker(struct marker_entry *entry, struct marker *elem,
int active)
{
int ret;
- WARN_ON(strcmp((*entry)->name, elem->name) != 0);
+ WARN_ON(strcmp(entry->name, elem->name) != 0);
- if ((*entry)->format) {
- if (strcmp((*entry)->format, elem->format) != 0) {
+ if (entry->format) {
+ if (strcmp(entry->format, elem->format) != 0) {
printk(KERN_NOTICE
"Format mismatch for probe %s "
"(%s), marker (%s)\n",
- (*entry)->name,
- (*entry)->format,
+ entry->name,
+ entry->format,
elem->format);
return -EPERM;
}
@@ -523,34 +502,33 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
* pass from a "safe" callback (with argument) to an "unsafe"
* callback (does not set arguments).
*/
- elem->call = (*entry)->call;
+ elem->call = entry->call;
/*
* Sanity check :
* We only update the single probe private data when the ptr is
* set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
*/
WARN_ON(elem->single.func != __mark_empty_function
- && elem->single.probe_private
- != (*entry)->single.probe_private &&
- !elem->ptype);
- elem->single.probe_private = (*entry)->single.probe_private;
+ && elem->single.probe_private != entry->single.probe_private
+ && !elem->ptype);
+ elem->single.probe_private = entry->single.probe_private;
/*
* Make sure the private data is valid when we update the
* single probe ptr.
*/
smp_wmb();
- elem->single.func = (*entry)->single.func;
+ elem->single.func = entry->single.func;
/*
* We also make sure that the new probe callbacks array is consistent
* before setting a pointer to it.
*/
- rcu_assign_pointer(elem->multi, (*entry)->multi);
+ rcu_assign_pointer(elem->multi, entry->multi);
/*
* Update the function or multi probe array pointer before setting the
* ptype.
*/
smp_wmb();
- elem->ptype = (*entry)->ptype;
+ elem->ptype = entry->ptype;
elem->state = active;
return 0;
@@ -594,8 +572,7 @@ void marker_update_probe_range(struct marker *begin,
for (iter = begin; iter < end; iter++) {
mark_entry = get_marker(iter->name);
if (mark_entry) {
- set_marker(&mark_entry, iter,
- !!mark_entry->refcount);
+ set_marker(mark_entry, iter, !!mark_entry->refcount);
/*
* ignore error, continue
*/
@@ -657,7 +634,7 @@ int marker_probe_register(const char *name, const char *format,
ret = PTR_ERR(entry);
} else if (format) {
if (!entry->format)
- ret = marker_set_format(&entry, format);
+ ret = marker_set_format(entry, format);
else if (strcmp(entry->format, format))
ret = -EPERM;
}
@@ -848,8 +825,6 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe,
if (!e->ptype) {
if (num == 0 && e->single.func == probe)
return e->single.probe_private;
- else
- break;
} else {
struct marker_probe_closure *closure;
int match = 0;
@@ -861,6 +836,7 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe,
return closure[i].probe_private;
}
}
+ break;
}
}
return ERR_PTR(-ENOENT);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9d048fa2d902..65d4a9ba79e4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -484,6 +484,16 @@ static struct ctl_table kern_table[] = {
.proc_handler = &ftrace_enable_sysctl,
},
#endif
+#ifdef CONFIG_TRACING
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "ftrace_dump_on_oops",
+ .data = &ftrace_dump_on_oops,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif
#ifdef CONFIG_MODULES
{
.ctl_name = KERN_MODPROBE,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 33dbefd471e8..9c89526b6b7c 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -9,6 +9,16 @@ config NOP_TRACER
config HAVE_FUNCTION_TRACER
bool
+config HAVE_FUNCTION_RET_TRACER
+ bool
+
+config HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ bool
+ help
+ This gets selected when the arch tests the function_trace_stop
+ variable at the mcount call site. Otherwise, this variable
+ is tested by the called function.
+
config HAVE_DYNAMIC_FTRACE
bool
@@ -47,6 +57,17 @@ config FUNCTION_TRACER
(the bootup default), then the overhead of the instructions is very
small and not measurable even in micro-benchmarks.
+config FUNCTION_RET_TRACER
+ bool "Kernel Function return Tracer"
+ depends on !DYNAMIC_FTRACE
+ depends on HAVE_FUNCTION_RET_TRACER
+ depends on FUNCTION_TRACER
+ help
+ Enable the kernel to trace a function at its return.
+ It's first purpose is to trace the duration of functions.
+ This is done by setting the current return address on the thread
+ info structure of the current task.
+
config IRQSOFF_TRACER
bool "Interrupts-off Latency Tracer"
default n
@@ -138,6 +159,44 @@ config BOOT_TRACER
selected, because the self-tests are an initcall as well and that
would invalidate the boot trace. )
+config TRACE_BRANCH_PROFILING
+ bool "Trace likely/unlikely profiler"
+ depends on DEBUG_KERNEL
+ select TRACING
+ help
+ This tracer profiles all the the likely and unlikely macros
+ in the kernel. It will display the results in:
+
+ /debugfs/tracing/profile_likely
+ /debugfs/tracing/profile_unlikely
+
+ Note: this will add a significant overhead, only turn this
+ on if you need to profile the system's use of these macros.
+
+ Say N if unsure.
+
+config TRACING_BRANCHES
+ bool
+ help
+ Selected by tracers that will trace the likely and unlikely
+ conditions. This prevents the tracers themselves from being
+ profiled. Profiling the tracing infrastructure can only happen
+ when the likelys and unlikelys are not being traced.
+
+config BRANCH_TRACER
+ bool "Trace likely/unlikely instances"
+ depends on TRACE_BRANCH_PROFILING
+ select TRACING_BRANCHES
+ help
+ This traces the events of likely and unlikely condition
+ calls in the kernel. The difference between this and the
+ "Trace likely/unlikely profiler" is that this is not a
+ histogram of the callers, but actually places the calling
+ events into a running trace buffer to see when and where the
+ events happened, as well as their results.
+
+ Say N if unsure.
+
config STACK_TRACER
bool "Trace max stack"
depends on HAVE_FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c8228b1a49e9..1a8c9259dc69 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -10,6 +10,11 @@ CFLAGS_trace_selftest_dynamic.o = -pg
obj-y += trace_selftest_dynamic.o
endif
+# If unlikely tracing is enabled, do not trace these files
+ifdef CONFIG_TRACING_BRANCHES
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+endif
+
obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
@@ -24,5 +29,7 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o
obj-$(CONFIG_STACK_TRACER) += trace_stack.o
obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
+obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o
+obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 14fa52297b28..beb21a51e1ef 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -47,6 +47,9 @@
int ftrace_enabled __read_mostly;
static int last_ftrace_enabled;
+/* Quick disabling of function tracer. */
+int function_trace_stop;
+
/*
* ftrace_disabled is set when an anomaly is discovered.
* ftrace_disabled is much stronger than ftrace_enabled.
@@ -63,6 +66,7 @@ static struct ftrace_ops ftrace_list_end __read_mostly =
static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
+ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
{
@@ -88,7 +92,22 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
void clear_ftrace_function(void)
{
ftrace_trace_function = ftrace_stub;
+ __ftrace_trace_function = ftrace_stub;
+}
+
+#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+/*
+ * For those archs that do not test ftrace_trace_stop in their
+ * mcount call site, we need to do it from C.
+ */
+static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
+{
+ if (function_trace_stop)
+ return;
+
+ __ftrace_trace_function(ip, parent_ip);
}
+#endif
static int __register_ftrace_function(struct ftrace_ops *ops)
{
@@ -110,10 +129,18 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
* For one func, simply call it directly.
* For more than one func, call the chain.
*/
+#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
if (ops->next == &ftrace_list_end)
ftrace_trace_function = ops->func;
else
ftrace_trace_function = ftrace_list_func;
+#else
+ if (ops->next == &ftrace_list_end)
+ __ftrace_trace_function = ops->func;
+ else
+ __ftrace_trace_function = ftrace_list_func;
+ ftrace_trace_function = ftrace_test_stop_func;
+#endif
}
spin_unlock(&ftrace_lock);
@@ -522,7 +549,7 @@ static void ftrace_run_update_code(int command)
}
static ftrace_func_t saved_ftrace_func;
-static int ftrace_start;
+static int ftrace_start_up;
static DEFINE_MUTEX(ftrace_start_lock);
static void ftrace_startup(void)
@@ -533,8 +560,8 @@ static void ftrace_startup(void)
return;
mutex_lock(&ftrace_start_lock);
- ftrace_start++;
- if (ftrace_start == 1)
+ ftrace_start_up++;
+ if (ftrace_start_up == 1)
command |= FTRACE_ENABLE_CALLS;
if (saved_ftrace_func != ftrace_trace_function) {
@@ -558,8 +585,8 @@ static void ftrace_shutdown(void)
return;
mutex_lock(&ftrace_start_lock);
- ftrace_start--;
- if (!ftrace_start)
+ ftrace_start_up--;
+ if (!ftrace_start_up)
command |= FTRACE_DISABLE_CALLS;
if (saved_ftrace_func != ftrace_trace_function) {
@@ -585,8 +612,8 @@ static void ftrace_startup_sysctl(void)
mutex_lock(&ftrace_start_lock);
/* Force update next time */
saved_ftrace_func = NULL;
- /* ftrace_start is true if we want ftrace running */
- if (ftrace_start)
+ /* ftrace_start_up is true if we want ftrace running */
+ if (ftrace_start_up)
command |= FTRACE_ENABLE_CALLS;
ftrace_run_update_code(command);
@@ -601,8 +628,8 @@ static void ftrace_shutdown_sysctl(void)
return;
mutex_lock(&ftrace_start_lock);
- /* ftrace_start is true if ftrace is running */
- if (ftrace_start)
+ /* ftrace_start_up is true if ftrace is running */
+ if (ftrace_start_up)
command |= FTRACE_DISABLE_CALLS;
ftrace_run_update_code(command);
@@ -734,6 +761,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
((iter->flags & FTRACE_ITER_FAILURES) &&
!(rec->flags & FTRACE_FL_FAILED)) ||
+ ((iter->flags & FTRACE_ITER_FILTER) &&
+ !(rec->flags & FTRACE_FL_FILTER)) ||
+
((iter->flags & FTRACE_ITER_NOTRACE) &&
!(rec->flags & FTRACE_FL_NOTRACE))) {
rec = NULL;
@@ -1182,7 +1212,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
mutex_lock(&ftrace_sysctl_lock);
mutex_lock(&ftrace_start_lock);
- if (iter->filtered && ftrace_start && ftrace_enabled)
+ if (iter->filtered && ftrace_start_up && ftrace_enabled)
ftrace_run_update_code(FTRACE_ENABLE_CALLS);
mutex_unlock(&ftrace_start_lock);
mutex_unlock(&ftrace_sysctl_lock);
@@ -1450,3 +1480,19 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
return ret;
}
+#ifdef CONFIG_FUNCTION_RET_TRACER
+trace_function_return_t ftrace_function_return =
+ (trace_function_return_t)ftrace_stub;
+void register_ftrace_return(trace_function_return_t func)
+{
+ ftrace_function_return = func;
+}
+
+void unregister_ftrace_return(void)
+{
+ ftrace_function_return = (trace_function_return_t)ftrace_stub;
+}
+#endif
+
+
+
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 231db209fa82..2d6c2cf0c3bc 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -45,6 +45,8 @@ void tracing_off(void)
ring_buffers_off = 1;
}
+#include "trace.h"
+
/* Up this if you want to test the TIME_EXTENTS and normalization */
#define DEBUG_SHIFT 0
@@ -187,7 +189,8 @@ static inline int test_time_stamp(u64 delta)
struct ring_buffer_per_cpu {
int cpu;
struct ring_buffer *buffer;
- spinlock_t lock;
+ spinlock_t reader_lock; /* serialize readers */
+ raw_spinlock_t lock;
struct lock_class_key lock_key;
struct list_head pages;
struct buffer_page *head_page; /* read from head */
@@ -221,32 +224,16 @@ struct ring_buffer_iter {
u64 read_stamp;
};
+/* buffer may be either ring_buffer or ring_buffer_per_cpu */
#define RB_WARN_ON(buffer, cond) \
- do { \
- if (unlikely(cond)) { \
- atomic_inc(&buffer->record_disabled); \
- WARN_ON(1); \
- } \
- } while (0)
-
-#define RB_WARN_ON_RET(buffer, cond) \
- do { \
- if (unlikely(cond)) { \
- atomic_inc(&buffer->record_disabled); \
- WARN_ON(1); \
- return -1; \
- } \
- } while (0)
-
-#define RB_WARN_ON_ONCE(buffer, cond) \
- do { \
- static int once; \
- if (unlikely(cond) && !once) { \
- once++; \
+ ({ \
+ int _____ret = unlikely(cond); \
+ if (_____ret) { \
atomic_inc(&buffer->record_disabled); \
WARN_ON(1); \
} \
- } while (0)
+ _____ret; \
+ })
/**
* check_pages - integrity check of buffer pages
@@ -260,14 +247,18 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
struct list_head *head = &cpu_buffer->pages;
struct buffer_page *page, *tmp;
- RB_WARN_ON_RET(cpu_buffer, head->next->prev != head);
- RB_WARN_ON_RET(cpu_buffer, head->prev->next != head);
+ if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
+ return -1;
+ if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
+ return -1;
list_for_each_entry_safe(page, tmp, head, list) {
- RB_WARN_ON_RET(cpu_buffer,
- page->list.next->prev != &page->list);
- RB_WARN_ON_RET(cpu_buffer,
- page->list.prev->next != &page->list);
+ if (RB_WARN_ON(cpu_buffer,
+ page->list.next->prev != &page->list))
+ return -1;
+ if (RB_WARN_ON(cpu_buffer,
+ page->list.prev->next != &page->list))
+ return -1;
}
return 0;
@@ -324,7 +315,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
cpu_buffer->cpu = cpu;
cpu_buffer->buffer = buffer;
- spin_lock_init(&cpu_buffer->lock);
+ spin_lock_init(&cpu_buffer->reader_lock);
+ cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
INIT_LIST_HEAD(&cpu_buffer->pages);
page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
@@ -473,13 +465,15 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
synchronize_sched();
for (i = 0; i < nr_pages; i++) {
- BUG_ON(list_empty(&cpu_buffer->pages));
+ if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
+ return;
p = cpu_buffer->pages.next;
page = list_entry(p, struct buffer_page, list);
list_del_init(&page->list);
free_buffer_page(page);
}
- BUG_ON(list_empty(&cpu_buffer->pages));
+ if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
+ return;
rb_reset_cpu(cpu_buffer);
@@ -501,7 +495,8 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
synchronize_sched();
for (i = 0; i < nr_pages; i++) {
- BUG_ON(list_empty(pages));
+ if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
+ return;
p = pages->next;
page = list_entry(p, struct buffer_page, list);
list_del_init(&page->list);
@@ -556,7 +551,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
if (size < buffer_size) {
/* easy case, just free pages */
- BUG_ON(nr_pages >= buffer->pages);
+ if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
+ mutex_unlock(&buffer->mutex);
+ return -1;
+ }
rm_pages = buffer->pages - nr_pages;
@@ -575,7 +573,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
* add these pages to the cpu_buffers. Otherwise we just free
* them all and return -ENOMEM;
*/
- BUG_ON(nr_pages <= buffer->pages);
+ if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
+ mutex_unlock(&buffer->mutex);
+ return -1;
+ }
+
new_pages = nr_pages - buffer->pages;
for_each_buffer_cpu(buffer, cpu) {
@@ -598,7 +600,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
rb_insert_pages(cpu_buffer, &pages, new_pages);
}
- BUG_ON(!list_empty(&pages));
+ if (RB_WARN_ON(buffer, !list_empty(&pages))) {
+ mutex_unlock(&buffer->mutex);
+ return -1;
+ }
out:
buffer->pages = nr_pages;
@@ -686,7 +691,8 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
head += rb_event_length(event)) {
event = __rb_page_index(cpu_buffer->head_page, head);
- BUG_ON(rb_null_event(event));
+ if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
+ return;
/* Only count data entries */
if (event->type != RINGBUF_TYPE_DATA)
continue;
@@ -739,8 +745,9 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
addr &= PAGE_MASK;
while (cpu_buffer->commit_page->page != (void *)addr) {
- RB_WARN_ON(cpu_buffer,
- cpu_buffer->commit_page == cpu_buffer->tail_page);
+ if (RB_WARN_ON(cpu_buffer,
+ cpu_buffer->commit_page == cpu_buffer->tail_page))
+ return;
cpu_buffer->commit_page->commit =
cpu_buffer->commit_page->write;
rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
@@ -887,7 +894,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
if (write > BUF_PAGE_SIZE) {
struct buffer_page *next_page = tail_page;
- spin_lock_irqsave(&cpu_buffer->lock, flags);
+ local_irq_save(flags);
+ __raw_spin_lock(&cpu_buffer->lock);
rb_inc_page(cpu_buffer, &next_page);
@@ -895,7 +903,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
reader_page = cpu_buffer->reader_page;
/* we grabbed the lock before incrementing */
- RB_WARN_ON(cpu_buffer, next_page == reader_page);
+ if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
+ goto out_unlock;
/*
* If for some reason, we had an interrupt storm that made
@@ -963,7 +972,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
rb_set_commit_to_write(cpu_buffer);
}
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
/* fail and let the caller try again */
return ERR_PTR(-EAGAIN);
@@ -971,7 +981,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
/* We reserved something on the buffer */
- BUG_ON(write > BUF_PAGE_SIZE);
+ if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
+ return NULL;
event = __rb_page_index(tail_page, tail);
rb_update_event(event, type, length);
@@ -986,7 +997,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
return event;
out_unlock:
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
return NULL;
}
@@ -1069,10 +1081,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
* storm or we have something buggy.
* Bail!
*/
- if (unlikely(++nr_loops > 1000)) {
- RB_WARN_ON(cpu_buffer, 1);
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
return NULL;
- }
ts = ring_buffer_time_stamp(cpu_buffer->cpu);
@@ -1175,8 +1185,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
return NULL;
/* If we are tracing schedule, we don't want to recurse */
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id();
@@ -1207,10 +1216,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
return event;
out:
- if (resched)
- preempt_enable_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
return NULL;
}
@@ -1252,12 +1258,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
/*
* Only the last preempt count needs to restore preemption.
*/
- if (preempt_count() == 1) {
- if (per_cpu(rb_need_resched, cpu))
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
- } else
+ if (preempt_count() == 1)
+ ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
+ else
preempt_enable_no_resched_notrace();
return 0;
@@ -1293,8 +1296,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
if (atomic_read(&buffer->record_disabled))
return -EBUSY;
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id();
@@ -1320,10 +1322,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
ret = 0;
out:
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
return ret;
}
@@ -1482,14 +1481,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
return overruns;
}
-/**
- * ring_buffer_iter_reset - reset an iterator
- * @iter: The iterator to reset
- *
- * Resets the iterator, so that it will start from the beginning
- * again.
- */
-void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
+static void rb_iter_reset(struct ring_buffer_iter *iter)
{
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
@@ -1508,6 +1500,23 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
}
/**
+ * ring_buffer_iter_reset - reset an iterator
+ * @iter: The iterator to reset
+ *
+ * Resets the iterator, so that it will start from the beginning
+ * again.
+ */
+void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ rb_iter_reset(iter);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+}
+
+/**
* ring_buffer_iter_empty - check if an iterator has no more to read
* @iter: The iterator to check
*/
@@ -1590,7 +1599,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
unsigned long flags;
int nr_loops = 0;
- spin_lock_irqsave(&cpu_buffer->lock, flags);
+ local_irq_save(flags);
+ __raw_spin_lock(&cpu_buffer->lock);
again:
/*
@@ -1599,8 +1609,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
* a case where we will loop three times. There should be no
* reason to loop four times (that I know of).
*/
- if (unlikely(++nr_loops > 3)) {
- RB_WARN_ON(cpu_buffer, 1);
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
reader = NULL;
goto out;
}
@@ -1612,8 +1621,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
goto out;
/* Never should we have an index greater than the size */
- RB_WARN_ON(cpu_buffer,
- cpu_buffer->reader_page->read > rb_page_size(reader));
+ if (RB_WARN_ON(cpu_buffer,
+ cpu_buffer->reader_page->read > rb_page_size(reader)))
+ goto out;
/* check if we caught up to the tail */
reader = NULL;
@@ -1652,7 +1662,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
goto again;
out:
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
return reader;
}
@@ -1666,7 +1677,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
reader = rb_get_reader_page(cpu_buffer);
/* This function should not be called when buffer is empty */
- BUG_ON(!reader);
+ if (RB_WARN_ON(cpu_buffer, !reader))
+ return;
event = rb_reader_event(cpu_buffer);
@@ -1693,7 +1705,9 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
* Check if we are at the end of the buffer.
*/
if (iter->head >= rb_page_size(iter->head_page)) {
- BUG_ON(iter->head_page == cpu_buffer->commit_page);
+ if (RB_WARN_ON(buffer,
+ iter->head_page == cpu_buffer->commit_page))
+ return;
rb_inc_iter(iter);
return;
}
@@ -1706,8 +1720,10 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
* This should not be called to advance the header if we are
* at the tail of the buffer.
*/
- BUG_ON((iter->head_page == cpu_buffer->commit_page) &&
- (iter->head + length > rb_commit_index(cpu_buffer)));
+ if (RB_WARN_ON(cpu_buffer,
+ (iter->head_page == cpu_buffer->commit_page) &&
+ (iter->head + length > rb_commit_index(cpu_buffer))))
+ return;
rb_update_iter_read_stamp(iter, event);
@@ -1719,17 +1735,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
rb_advance_iter(iter);
}
-/**
- * ring_buffer_peek - peek at the next event to be read
- * @buffer: The ring buffer to read
- * @cpu: The cpu to peak at
- * @ts: The timestamp counter of this event.
- *
- * This will return the event that will be read next, but does
- * not consume the data.
- */
-struct ring_buffer_event *
-ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
+static struct ring_buffer_event *
+rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
@@ -1750,10 +1757,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
* can have. Nesting 10 deep of interrupts is clearly
* an anomaly.
*/
- if (unlikely(++nr_loops > 10)) {
- RB_WARN_ON(cpu_buffer, 1);
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
return NULL;
- }
reader = rb_get_reader_page(cpu_buffer);
if (!reader)
@@ -1791,16 +1796,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
return NULL;
}
-/**
- * ring_buffer_iter_peek - peek at the next event to be read
- * @iter: The ring buffer iterator
- * @ts: The timestamp counter of this event.
- *
- * This will return the event that will be read next, but does
- * not increment the iterator.
- */
-struct ring_buffer_event *
-ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
+static struct ring_buffer_event *
+rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
{
struct ring_buffer *buffer;
struct ring_buffer_per_cpu *cpu_buffer;
@@ -1822,10 +1819,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
* can have. Nesting 10 deep of interrupts is clearly
* an anomaly.
*/
- if (unlikely(++nr_loops > 10)) {
- RB_WARN_ON(cpu_buffer, 1);
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
return NULL;
- }
if (rb_per_cpu_empty(cpu_buffer))
return NULL;
@@ -1862,6 +1857,51 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
}
/**
+ * ring_buffer_peek - peek at the next event to be read
+ * @buffer: The ring buffer to read
+ * @cpu: The cpu to peak at
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not consume the data.
+ */
+struct ring_buffer_event *
+ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ struct ring_buffer_event *event;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ event = rb_buffer_peek(buffer, cpu, ts);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return event;
+}
+
+/**
+ * ring_buffer_iter_peek - peek at the next event to be read
+ * @iter: The ring buffer iterator
+ * @ts: The timestamp counter of this event.
+ *
+ * This will return the event that will be read next, but does
+ * not increment the iterator.
+ */
+struct ring_buffer_event *
+ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ struct ring_buffer_event *event;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ event = rb_iter_peek(iter, ts);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return event;
+}
+
+/**
* ring_buffer_consume - return an event and consume it
* @buffer: The ring buffer to get the next event from
*
@@ -1872,19 +1912,24 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
struct ring_buffer_event *
ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
{
- struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
struct ring_buffer_event *event;
+ unsigned long flags;
if (!cpu_isset(cpu, buffer->cpumask))
return NULL;
- event = ring_buffer_peek(buffer, cpu, ts);
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+ event = rb_buffer_peek(buffer, cpu, ts);
if (!event)
- return NULL;
+ goto out;
- cpu_buffer = buffer->buffers[cpu];
rb_advance_reader(cpu_buffer);
+ out:
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
return event;
}
@@ -1921,9 +1966,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
atomic_inc(&cpu_buffer->record_disabled);
synchronize_sched();
- spin_lock_irqsave(&cpu_buffer->lock, flags);
- ring_buffer_iter_reset(iter);
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ __raw_spin_lock(&cpu_buffer->lock);
+ rb_iter_reset(iter);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
return iter;
}
@@ -1955,12 +2002,17 @@ struct ring_buffer_event *
ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
{
struct ring_buffer_event *event;
+ struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ unsigned long flags;
- event = ring_buffer_iter_peek(iter, ts);
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ event = rb_iter_peek(iter, ts);
if (!event)
- return NULL;
+ goto out;
rb_advance_iter(iter);
+ out:
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
return event;
}
@@ -2009,11 +2061,15 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
if (!cpu_isset(cpu, buffer->cpumask))
return;
- spin_lock_irqsave(&cpu_buffer->lock, flags);
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+ __raw_spin_lock(&cpu_buffer->lock);
rb_reset_cpu(cpu_buffer);
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 697eda36b86a..4bf070bb5272 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -43,6 +43,15 @@
unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
unsigned long __read_mostly tracing_thresh;
+
+/*
+ * Kill all tracing for good (never come back).
+ * It is initialized to 1 but will turn to zero if the initialization
+ * of the tracer is successful. But that is the only place that sets
+ * this back to zero.
+ */
+int tracing_disabled = 1;
+
static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
static inline void ftrace_disable_cpu(void)
@@ -62,7 +71,36 @@ static cpumask_t __read_mostly tracing_buffer_mask;
#define for_each_tracing_cpu(cpu) \
for_each_cpu_mask(cpu, tracing_buffer_mask)
-static int tracing_disabled = 1;
+/*
+ * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
+ *
+ * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
+ * is set, then ftrace_dump is called. This will output the contents
+ * of the ftrace buffers to the console. This is very useful for
+ * capturing traces that lead to crashes and outputing it to a
+ * serial console.
+ *
+ * It is default off, but you can enable it with either specifying
+ * "ftrace_dump_on_oops" in the kernel command line, or setting
+ * /proc/sys/kernel/ftrace_dump_on_oops to true.
+ */
+int ftrace_dump_on_oops;
+
+static int tracing_set_tracer(char *buf);
+
+static int __init set_ftrace(char *str)
+{
+ tracing_set_tracer(str);
+ return 1;
+}
+__setup("ftrace", set_ftrace);
+
+static int __init set_ftrace_dump_on_oops(char *str)
+{
+ ftrace_dump_on_oops = 1;
+ return 1;
+}
+__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
long
ns2usecs(cycle_t nsec)
@@ -112,6 +150,19 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
/* tracer_enabled is used to toggle activation of a tracer */
static int tracer_enabled = 1;
+/**
+ * tracing_is_enabled - return tracer_enabled status
+ *
+ * This function is used by other tracers to know the status
+ * of the tracer_enabled flag. Tracers may use this function
+ * to know if it should enable their features when starting
+ * up. See irqsoff tracer for an example (start_irqsoff_tracer).
+ */
+int tracing_is_enabled(void)
+{
+ return tracer_enabled;
+}
+
/* function tracing enabled */
int ftrace_function_enabled;
@@ -154,7 +205,7 @@ static DEFINE_MUTEX(trace_types_lock);
static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
/* trace_flags holds iter_ctrl options */
-unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
+unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK;
/**
* trace_wake_up - wake up tasks waiting for trace input
@@ -193,13 +244,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
return nsecs / 1000;
}
-/*
- * TRACE_ITER_SYM_MASK masks the options in trace_flags that
- * control the output of kernel symbols.
- */
-#define TRACE_ITER_SYM_MASK \
- (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
-
/* These must match the bit postions in trace_iterator_flags */
static const char *trace_options[] = {
"print-parent",
@@ -213,6 +257,10 @@ static const char *trace_options[] = {
"stacktrace",
"sched-tree",
"ftrace_printk",
+ "ftrace_preempt",
+#ifdef CONFIG_BRANCH_TRACER
+ "branch",
+#endif
NULL
};
@@ -485,7 +533,6 @@ int register_tracer(struct tracer *type)
if (type->selftest) {
struct tracer *saved_tracer = current_trace;
struct trace_array *tr = &global_trace;
- int saved_ctrl = tr->ctrl;
int i;
/*
* Run a selftest on this tracer.
@@ -498,13 +545,11 @@ int register_tracer(struct tracer *type)
tracing_reset(tr, i);
}
current_trace = type;
- tr->ctrl = 0;
/* the test is responsible for initializing and enabling */
pr_info("Testing tracer %s: ", type->name);
ret = type->selftest(type, tr);
/* the test is responsible for resetting too */
current_trace = saved_tracer;
- tr->ctrl = saved_ctrl;
if (ret) {
printk(KERN_CONT "FAILED!\n");
goto out;
@@ -581,6 +626,76 @@ static void trace_init_cmdlines(void)
cmdline_idx = 0;
}
+static int trace_stop_count;
+static DEFINE_SPINLOCK(tracing_start_lock);
+
+/**
+ * tracing_start - quick start of the tracer
+ *
+ * If tracing is enabled but was stopped by tracing_stop,
+ * this will start the tracer back up.
+ */
+void tracing_start(void)
+{
+ struct ring_buffer *buffer;
+ unsigned long flags;
+
+ if (tracing_disabled)
+ return;
+
+ spin_lock_irqsave(&tracing_start_lock, flags);
+ if (--trace_stop_count)
+ goto out;
+
+ if (trace_stop_count < 0) {
+ /* Someone screwed up their debugging */
+ WARN_ON_ONCE(1);
+ trace_stop_count = 0;
+ goto out;
+ }
+
+
+ buffer = global_trace.buffer;
+ if (buffer)
+ ring_buffer_record_enable(buffer);
+
+ buffer = max_tr.buffer;
+ if (buffer)
+ ring_buffer_record_enable(buffer);
+
+ ftrace_start();
+ out:
+ spin_unlock_irqrestore(&tracing_start_lock, flags);
+}
+
+/**
+ * tracing_stop - quick stop of the tracer
+ *
+ * Light weight way to stop tracing. Use in conjunction with
+ * tracing_start.
+ */
+void tracing_stop(void)
+{
+ struct ring_buffer *buffer;
+ unsigned long flags;
+
+ ftrace_stop();
+ spin_lock_irqsave(&tracing_start_lock, flags);
+ if (trace_stop_count++)
+ goto out;
+
+ buffer = global_trace.buffer;
+ if (buffer)
+ ring_buffer_record_disable(buffer);
+
+ buffer = max_tr.buffer;
+ if (buffer)
+ ring_buffer_record_disable(buffer);
+
+ out:
+ spin_unlock_irqrestore(&tracing_start_lock, flags);
+}
+
void trace_stop_cmdline_recording(void);
static void trace_save_cmdline(struct task_struct *tsk)
@@ -691,6 +806,35 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
}
+#ifdef CONFIG_FUNCTION_RET_TRACER
+static void __trace_function_return(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct ftrace_retfunc *trace,
+ unsigned long flags,
+ int pc)
+{
+ struct ring_buffer_event *event;
+ struct ftrace_ret_entry *entry;
+ unsigned long irq_flags;
+
+ if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ return;
+
+ event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_FN_RET;
+ entry->ip = trace->func;
+ entry->parent_ip = trace->ret;
+ entry->rettime = trace->rettime;
+ entry->calltime = trace->calltime;
+ ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
+}
+#endif
+
void
ftrace(struct trace_array *tr, struct trace_array_cpu *data,
unsigned long ip, unsigned long parent_ip, unsigned long flags,
@@ -841,26 +985,28 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
{
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
+ unsigned long flags;
int cpu;
int pc;
- if (tracing_disabled || !tr->ctrl)
+ if (tracing_disabled)
return;
pc = preempt_count();
- preempt_disable_notrace();
+ local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
- if (likely(!atomic_read(&data->disabled)))
+ if (likely(atomic_inc_return(&data->disabled) == 1))
ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
- preempt_enable_notrace();
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
}
#ifdef CONFIG_FUNCTION_TRACER
static void
-function_trace_call(unsigned long ip, unsigned long parent_ip)
+function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
{
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
@@ -873,8 +1019,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
return;
pc = preempt_count();
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
local_save_flags(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
@@ -884,12 +1029,63 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
trace_function(tr, data, ip, parent_ip, flags, pc);
atomic_dec(&data->disabled);
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
+}
+
+static void
+function_trace_call(unsigned long ip, unsigned long parent_ip)
+{
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ if (unlikely(!ftrace_function_enabled))
+ return;
+
+ /*
+ * Need to use raw, since this must be called before the
+ * recursive protection is performed.
+ */
+ raw_local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ trace_function(tr, data, ip, parent_ip, flags, pc);
+ }
+
+ atomic_dec(&data->disabled);
+ raw_local_irq_restore(flags);
}
+#ifdef CONFIG_FUNCTION_RET_TRACER
+void trace_function_return(struct ftrace_retfunc *trace)
+{
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ raw_local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ __trace_function_return(tr, data, trace, flags, pc);
+ }
+ atomic_dec(&data->disabled);
+ raw_local_irq_restore(flags);
+}
+#endif /* CONFIG_FUNCTION_RET_TRACER */
+
static struct ftrace_ops trace_ops __read_mostly =
{
.func = function_trace_call,
@@ -898,9 +1094,14 @@ static struct ftrace_ops trace_ops __read_mostly =
void tracing_start_function_trace(void)
{
ftrace_function_enabled = 0;
+
+ if (trace_flags & TRACE_ITER_PREEMPTONLY)
+ trace_ops.func = function_trace_call_preempt_only;
+ else
+ trace_ops.func = function_trace_call;
+
register_ftrace_function(&trace_ops);
- if (tracer_enabled)
- ftrace_function_enabled = 1;
+ ftrace_function_enabled = 1;
}
void tracing_stop_function_trace(void)
@@ -1047,10 +1248,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
atomic_inc(&trace_record_cmdline_disabled);
- /* let the tracer grab locks here if needed */
- if (current_trace->start)
- current_trace->start(iter);
-
if (*pos != iter->pos) {
iter->ent = NULL;
iter->cpu = 0;
@@ -1077,14 +1274,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
static void s_stop(struct seq_file *m, void *p)
{
- struct trace_iterator *iter = m->private;
-
atomic_dec(&trace_record_cmdline_disabled);
-
- /* let the tracer release locks here if needed */
- if (current_trace && current_trace == iter->trace && iter->trace->stop)
- iter->trace->stop(iter);
-
mutex_unlock(&trace_types_lock);
}
@@ -1143,7 +1333,7 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
# define IP_FMT "%016lx"
#endif
-static int
+int
seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
{
int ret;
@@ -1338,6 +1528,17 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
trace_seq_putc(s, '\n');
}
+static void test_cpu_buff_start(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+
+ if (cpu_isset(iter->cpu, iter->started))
+ return;
+
+ cpu_set(iter->cpu, iter->started);
+ trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
+}
+
static enum print_line_t
print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
{
@@ -1357,6 +1558,8 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
if (entry->type == TRACE_CONT)
return TRACE_TYPE_HANDLED;
+ test_cpu_buff_start(iter);
+
next_entry = find_next_entry(iter, NULL, &next_ts);
if (!next_entry)
next_ts = iter->ts;
@@ -1448,6 +1651,18 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
trace_seq_print_cont(s, iter);
break;
}
+ case TRACE_BRANCH: {
+ struct trace_branch *field;
+
+ trace_assign_type(field, entry);
+
+ trace_seq_printf(s, "[%s] %s:%s:%d\n",
+ field->correct ? " ok " : " MISS ",
+ field->func,
+ field->file,
+ field->line);
+ break;
+ }
default:
trace_seq_printf(s, "Unknown type %d\n", entry->type);
}
@@ -1472,6 +1687,8 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
if (entry->type == TRACE_CONT)
return TRACE_TYPE_HANDLED;
+ test_cpu_buff_start(iter);
+
comm = trace_find_cmdline(iter->ent->pid);
t = ns2usecs(iter->ts);
@@ -1581,6 +1798,22 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
trace_seq_print_cont(s, iter);
break;
}
+ case TRACE_FN_RET: {
+ return print_return_function(iter);
+ break;
+ }
+ case TRACE_BRANCH: {
+ struct trace_branch *field;
+
+ trace_assign_type(field, entry);
+
+ trace_seq_printf(s, "[%s] %s:%s:%d\n",
+ field->correct ? " ok " : " MISS ",
+ field->func,
+ field->file,
+ field->line);
+ break;
+ }
}
return TRACE_TYPE_HANDLED;
}
@@ -1917,10 +2150,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
m->private = iter;
/* stop the trace while dumping */
- if (iter->tr->ctrl) {
- tracer_enabled = 0;
- ftrace_function_enabled = 0;
- }
+ tracing_stop();
if (iter->trace && iter->trace->open)
iter->trace->open(iter);
@@ -1965,14 +2195,7 @@ int tracing_release(struct inode *inode, struct file *file)
iter->trace->close(iter);
/* reenable tracing if it was previously enabled */
- if (iter->tr->ctrl) {
- tracer_enabled = 1;
- /*
- * It is safe to enable function tracing even if it
- * isn't used
- */
- ftrace_function_enabled = 1;
- }
+ tracing_start();
mutex_unlock(&trace_types_lock);
seq_release(inode, file);
@@ -2310,11 +2533,10 @@ static ssize_t
tracing_ctrl_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- struct trace_array *tr = filp->private_data;
char buf[64];
int r;
- r = sprintf(buf, "%ld\n", tr->ctrl);
+ r = sprintf(buf, "%u\n", tracer_enabled);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
@@ -2342,16 +2564,18 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
val = !!val;
mutex_lock(&trace_types_lock);
- if (tr->ctrl ^ val) {
- if (val)
+ if (tracer_enabled ^ val) {
+ if (val) {
tracer_enabled = 1;
- else
+ if (current_trace->start)
+ current_trace->start(tr);
+ tracing_start();
+ } else {
tracer_enabled = 0;
-
- tr->ctrl = val;
-
- if (current_trace && current_trace->ctrl_update)
- current_trace->ctrl_update(tr);
+ tracing_stop();
+ if (current_trace->stop)
+ current_trace->stop(tr);
+ }
}
mutex_unlock(&trace_types_lock);
@@ -2377,29 +2601,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
-static ssize_t
-tracing_set_trace_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
+static int tracing_set_tracer(char *buf)
{
struct trace_array *tr = &global_trace;
struct tracer *t;
- char buf[max_tracer_type_len+1];
- int i;
- size_t ret;
-
- ret = cnt;
-
- if (cnt > max_tracer_type_len)
- cnt = max_tracer_type_len;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- /* strip ending whitespace. */
- for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
- buf[i] = 0;
+ int ret = 0;
mutex_lock(&trace_types_lock);
for (t = trace_types; t; t = t->next) {
@@ -2413,6 +2619,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
if (t == current_trace)
goto out;
+ trace_branch_disable();
if (current_trace && current_trace->reset)
current_trace->reset(tr);
@@ -2420,9 +2627,37 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
if (t->init)
t->init(tr);
+ trace_branch_enable(tr);
out:
mutex_unlock(&trace_types_lock);
+ return ret;
+}
+
+static ssize_t
+tracing_set_trace_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[max_tracer_type_len+1];
+ int i;
+ size_t ret;
+
+ if (cnt > max_tracer_type_len)
+ cnt = max_tracer_type_len;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ /* strip ending whitespace. */
+ for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
+ buf[i] = 0;
+
+ ret = tracing_set_tracer(buf);
+ if (!ret)
+ ret = cnt;
+
if (ret > 0)
filp->f_pos += ret;
@@ -2491,6 +2726,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
return -ENOMEM;
mutex_lock(&trace_types_lock);
+
+ /* trace pipe does not show start of buffer */
+ cpus_setall(iter->started);
+
iter->tr = &global_trace;
iter->trace = current_trace;
filp->private_data = iter;
@@ -2677,7 +2916,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
unsigned long val;
char buf[64];
int ret, cpu;
- struct trace_array *tr = filp->private_data;
if (cnt >= sizeof(buf))
return -EINVAL;
@@ -2697,12 +2935,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
mutex_lock(&trace_types_lock);
- if (tr->ctrl) {
- cnt = -EBUSY;
- pr_info("ftrace: please disable tracing"
- " before modifying buffer size\n");
- goto out;
- }
+ tracing_stop();
/* disable all cpu buffers */
for_each_tracing_cpu(cpu) {
@@ -2750,6 +2983,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
atomic_dec(&max_tr.data[cpu]->disabled);
}
+ tracing_start();
max_tr.entries = global_trace.entries;
mutex_unlock(&trace_types_lock);
@@ -2772,9 +3006,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
{
char *buf;
char *end;
- struct trace_array *tr = &global_trace;
- if (!tr->ctrl || tracing_disabled)
+ if (tracing_disabled)
return -EINVAL;
if (cnt > TRACE_BUF_SIZE)
@@ -2840,22 +3073,38 @@ static struct file_operations tracing_mark_fops = {
#ifdef CONFIG_DYNAMIC_FTRACE
+int __weak ftrace_arch_read_dyn_info(char *buf, int size)
+{
+ return 0;
+}
+
static ssize_t
-tracing_read_long(struct file *filp, char __user *ubuf,
+tracing_read_dyn_info(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
+ static char ftrace_dyn_info_buffer[1024];
+ static DEFINE_MUTEX(dyn_info_mutex);
unsigned long *p = filp->private_data;
- char buf[64];
+ char *buf = ftrace_dyn_info_buffer;
+ int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
int r;
- r = sprintf(buf, "%ld\n", *p);
+ mutex_lock(&dyn_info_mutex);
+ r = sprintf(buf, "%ld ", *p);
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
+ buf[r++] = '\n';
+
+ r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+
+ mutex_unlock(&dyn_info_mutex);
+
+ return r;
}
-static struct file_operations tracing_read_long_fops = {
+static struct file_operations tracing_dyn_info_fops = {
.open = tracing_open_generic,
- .read = tracing_read_long,
+ .read = tracing_read_dyn_info,
};
#endif
@@ -2964,7 +3213,7 @@ static __init int tracer_init_debugfs(void)
#ifdef CONFIG_DYNAMIC_FTRACE
entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
&ftrace_update_tot_cnt,
- &tracing_read_long_fops);
+ &tracing_dyn_info_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'dyn_ftrace_total_info' entry\n");
@@ -2987,7 +3236,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
unsigned long flags, irq_flags;
int cpu, len = 0, size, pc;
- if (!tr->ctrl || tracing_disabled)
+ if (tracing_disabled)
return 0;
pc = preempt_count();
@@ -3045,7 +3294,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk);
static int trace_panic_handler(struct notifier_block *this,
unsigned long event, void *unused)
{
- ftrace_dump();
+ if (ftrace_dump_on_oops)
+ ftrace_dump();
return NOTIFY_OK;
}
@@ -3061,7 +3311,8 @@ static int trace_die_handler(struct notifier_block *self,
{
switch (val) {
case DIE_OOPS:
- ftrace_dump();
+ if (ftrace_dump_on_oops)
+ ftrace_dump();
break;
default:
break;
@@ -3102,7 +3353,6 @@ trace_printk_seq(struct trace_seq *s)
trace_seq_reset(s);
}
-
void ftrace_dump(void)
{
static DEFINE_SPINLOCK(ftrace_dump_lock);
@@ -3220,7 +3470,6 @@ __init static int tracer_alloc_buffers(void)
#endif
/* All seems OK, enable tracing */
- global_trace.ctrl = tracer_enabled;
tracing_disabled = 0;
atomic_notifier_chain_register(&panic_notifier_list,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8465ad052707..9e015f5bea1d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -8,6 +8,7 @@
#include <linux/ring_buffer.h>
#include <linux/mmiotrace.h>
#include <linux/ftrace.h>
+#include <trace/boot.h>
enum trace_type {
__TRACE_FIRST_TYPE = 0,
@@ -21,7 +22,10 @@ enum trace_type {
TRACE_SPECIAL,
TRACE_MMIO_RW,
TRACE_MMIO_MAP,
- TRACE_BOOT,
+ TRACE_BRANCH,
+ TRACE_BOOT_CALL,
+ TRACE_BOOT_RET,
+ TRACE_FN_RET,
__TRACE_LAST_TYPE
};
@@ -48,6 +52,15 @@ struct ftrace_entry {
unsigned long ip;
unsigned long parent_ip;
};
+
+/* Function return entry */
+struct ftrace_ret_entry {
+ struct trace_entry ent;
+ unsigned long ip;
+ unsigned long parent_ip;
+ unsigned long long calltime;
+ unsigned long long rettime;
+};
extern struct tracer boot_tracer;
/*
@@ -112,9 +125,24 @@ struct trace_mmiotrace_map {
struct mmiotrace_map map;
};
-struct trace_boot {
+struct trace_boot_call {
+ struct trace_entry ent;
+ struct boot_trace_call boot_call;
+};
+
+struct trace_boot_ret {
struct trace_entry ent;
- struct boot_trace initcall;
+ struct boot_trace_ret boot_ret;
+};
+
+#define TRACE_FUNC_SIZE 30
+#define TRACE_FILE_SIZE 20
+struct trace_branch {
+ struct trace_entry ent;
+ unsigned line;
+ char func[TRACE_FUNC_SIZE+1];
+ char file[TRACE_FILE_SIZE+1];
+ char correct;
};
/*
@@ -172,7 +200,6 @@ struct trace_iterator;
struct trace_array {
struct ring_buffer *buffer;
unsigned long entries;
- long ctrl;
int cpu;
cycle_t time_start;
struct task_struct *waiter;
@@ -218,7 +245,10 @@ extern void __ftrace_bad_type(void);
TRACE_MMIO_RW); \
IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
TRACE_MMIO_MAP); \
- IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \
+ IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
+ IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
+ IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
+ IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\
__ftrace_bad_type(); \
} while (0)
@@ -236,15 +266,14 @@ struct tracer {
const char *name;
void (*init)(struct trace_array *tr);
void (*reset)(struct trace_array *tr);
+ void (*start)(struct trace_array *tr);
+ void (*stop)(struct trace_array *tr);
void (*open)(struct trace_iterator *iter);
void (*pipe_open)(struct trace_iterator *iter);
void (*close)(struct trace_iterator *iter);
- void (*start)(struct trace_iterator *iter);
- void (*stop)(struct trace_iterator *iter);
ssize_t (*read)(struct trace_iterator *iter,
struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos);
- void (*ctrl_update)(struct trace_array *tr);
#ifdef CONFIG_FTRACE_STARTUP_TEST
int (*selftest)(struct tracer *trace,
struct trace_array *tr);
@@ -279,8 +308,11 @@ struct trace_iterator {
unsigned long iter_flags;
loff_t pos;
long idx;
+
+ cpumask_t started;
};
+int tracing_is_enabled(void);
void trace_wake_up(void);
void tracing_reset(struct trace_array *tr, int cpu);
int tracing_open_generic(struct inode *inode, struct file *filp);
@@ -320,9 +352,14 @@ void trace_function(struct trace_array *tr,
unsigned long ip,
unsigned long parent_ip,
unsigned long flags, int pc);
+void
+trace_function_return(struct ftrace_retfunc *trace);
void tracing_start_cmdline_record(void);
void tracing_stop_cmdline_record(void);
+void tracing_sched_switch_assign_trace(struct trace_array *tr);
+void tracing_stop_sched_switch_record(void);
+void tracing_start_sched_switch_record(void);
int register_tracer(struct tracer *type);
void unregister_tracer(struct tracer *type);
@@ -383,12 +420,18 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_sysprof(struct tracer *trace,
struct trace_array *tr);
+extern int trace_selftest_startup_branch(struct tracer *trace,
+ struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
extern void trace_seq_print_cont(struct trace_seq *s,
struct trace_iterator *iter);
+
+extern int
+seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
+ unsigned long sym_flags);
extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
size_t cnt);
extern long ns2usecs(cycle_t nsec);
@@ -396,6 +439,17 @@ extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
extern unsigned long trace_flags;
+/* Standard output formatting function used for function return traces */
+#ifdef CONFIG_FUNCTION_RET_TRACER
+extern enum print_line_t print_return_function(struct trace_iterator *iter);
+#else
+static inline enum print_line_t
+print_return_function(struct trace_iterator *iter)
+{
+ return TRACE_TYPE_UNHANDLED;
+}
+#endif
+
/*
* trace_iterator_flags is an enumeration that defines bit
* positions into trace_flags that controls the output.
@@ -415,8 +469,91 @@ enum trace_iterator_flags {
TRACE_ITER_STACKTRACE = 0x100,
TRACE_ITER_SCHED_TREE = 0x200,
TRACE_ITER_PRINTK = 0x400,
+ TRACE_ITER_PREEMPTONLY = 0x800,
+#ifdef CONFIG_BRANCH_TRACER
+ TRACE_ITER_BRANCH = 0x1000,
+#endif
};
+/*
+ * TRACE_ITER_SYM_MASK masks the options in trace_flags that
+ * control the output of kernel symbols.
+ */
+#define TRACE_ITER_SYM_MASK \
+ (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
+
extern struct tracer nop_trace;
+/**
+ * ftrace_preempt_disable - disable preemption scheduler safe
+ *
+ * When tracing can happen inside the scheduler, there exists
+ * cases that the tracing might happen before the need_resched
+ * flag is checked. If this happens and the tracer calls
+ * preempt_enable (after a disable), a schedule might take place
+ * causing an infinite recursion.
+ *
+ * To prevent this, we read the need_recshed flag before
+ * disabling preemption. When we want to enable preemption we
+ * check the flag, if it is set, then we call preempt_enable_no_resched.
+ * Otherwise, we call preempt_enable.
+ *
+ * The rational for doing the above is that if need resched is set
+ * and we have yet to reschedule, we are either in an atomic location
+ * (where we do not need to check for scheduling) or we are inside
+ * the scheduler and do not want to resched.
+ */
+static inline int ftrace_preempt_disable(void)
+{
+ int resched;
+
+ resched = need_resched();
+ preempt_disable_notrace();
+
+ return resched;
+}
+
+/**
+ * ftrace_preempt_enable - enable preemption scheduler safe
+ * @resched: the return value from ftrace_preempt_disable
+ *
+ * This is a scheduler safe way to enable preemption and not miss
+ * any preemption checks. The disabled saved the state of preemption.
+ * If resched is set, then we were either inside an atomic or
+ * are inside the scheduler (we would have already scheduled
+ * otherwise). In this case, we do not want to call normal
+ * preempt_enable, but preempt_enable_no_resched instead.
+ */
+static inline void ftrace_preempt_enable(int resched)
+{
+ if (resched)
+ preempt_enable_no_resched_notrace();
+ else
+ preempt_enable_notrace();
+}
+
+#ifdef CONFIG_BRANCH_TRACER
+extern int enable_branch_tracing(struct trace_array *tr);
+extern void disable_branch_tracing(void);
+static inline int trace_branch_enable(struct trace_array *tr)
+{
+ if (trace_flags & TRACE_ITER_BRANCH)
+ return enable_branch_tracing(tr);
+ return 0;
+}
+static inline void trace_branch_disable(void)
+{
+ /* due to races, always disable */
+ disable_branch_tracing();
+}
+#else
+static inline int trace_branch_enable(struct trace_array *tr)
+{
+ return 0;
+}
+static inline void trace_branch_disable(void)
+{
+}
+#endif /* CONFIG_BRANCH_TRACER */
+
#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index d0a5e50eeff2..cb333b7fd113 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -13,23 +13,38 @@
#include "trace.h"
static struct trace_array *boot_trace;
-static int trace_boot_enabled;
+static bool pre_initcalls_finished;
-
-/* Should be started after do_pre_smp_initcalls() in init/main.c */
+/* Tells the boot tracer that the pre_smp_initcalls are finished.
+ * So we are ready .
+ * It doesn't enable sched events tracing however.
+ * You have to call enable_boot_trace to do so.
+ */
void start_boot_trace(void)
{
- trace_boot_enabled = 1;
+ pre_initcalls_finished = true;
}
-void stop_boot_trace(void)
+void enable_boot_trace(void)
{
- trace_boot_enabled = 0;
+ if (pre_initcalls_finished)
+ tracing_start_sched_switch_record();
}
-void reset_boot_trace(struct trace_array *tr)
+void disable_boot_trace(void)
{
- stop_boot_trace();
+ if (pre_initcalls_finished)
+ tracing_stop_sched_switch_record();
+}
+
+static void reset_boot_trace(struct trace_array *tr)
+{
+ int cpu;
+
+ tr->time_start = ftrace_now(tr->cpu);
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
}
static void boot_trace_init(struct trace_array *tr)
@@ -37,49 +52,77 @@ static void boot_trace_init(struct trace_array *tr)
int cpu;
boot_trace = tr;
- trace_boot_enabled = 0;
-
for_each_cpu_mask(cpu, cpu_possible_map)
tracing_reset(tr, cpu);
+
+ tracing_sched_switch_assign_trace(tr);
}
-static void boot_trace_ctrl_update(struct trace_array *tr)
+static enum print_line_t
+initcall_call_print_line(struct trace_iterator *iter)
{
- if (tr->ctrl)
- start_boot_trace();
+ struct trace_entry *entry = iter->ent;
+ struct trace_seq *s = &iter->seq;
+ struct trace_boot_call *field;
+ struct boot_trace_call *call;
+ u64 ts;
+ unsigned long nsec_rem;
+ int ret;
+
+ trace_assign_type(field, entry);
+ call = &field->boot_call;
+ ts = iter->ts;
+ nsec_rem = do_div(ts, 1000000000);
+
+ ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
+ (unsigned long)ts, nsec_rem, call->func, call->caller);
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
else
- stop_boot_trace();
+ return TRACE_TYPE_HANDLED;
}
-static enum print_line_t initcall_print_line(struct trace_iterator *iter)
+static enum print_line_t
+initcall_ret_print_line(struct trace_iterator *iter)
{
- int ret;
struct trace_entry *entry = iter->ent;
- struct trace_boot *field = (struct trace_boot *)entry;
- struct boot_trace *it = &field->initcall;
struct trace_seq *s = &iter->seq;
- struct timespec calltime = ktime_to_timespec(it->calltime);
- struct timespec rettime = ktime_to_timespec(it->rettime);
-
- if (entry->type == TRACE_BOOT) {
- ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
- calltime.tv_sec,
- calltime.tv_nsec,
- it->func, it->caller);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
- "returned %d after %lld msecs\n",
- rettime.tv_sec,
- rettime.tv_nsec,
- it->func, it->result, it->duration);
-
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
+ struct trace_boot_ret *field;
+ struct boot_trace_ret *init_ret;
+ u64 ts;
+ unsigned long nsec_rem;
+ int ret;
+
+ trace_assign_type(field, entry);
+ init_ret = &field->boot_ret;
+ ts = iter->ts;
+ nsec_rem = do_div(ts, 1000000000);
+
+ ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
+ "returned %d after %llu msecs\n",
+ (unsigned long) ts,
+ nsec_rem,
+ init_ret->func, init_ret->result, init_ret->duration);
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ else
return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t initcall_print_line(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+
+ switch (entry->type) {
+ case TRACE_BOOT_CALL:
+ return initcall_call_print_line(iter);
+ case TRACE_BOOT_RET:
+ return initcall_ret_print_line(iter);
+ default:
+ return TRACE_TYPE_UNHANDLED;
}
- return TRACE_TYPE_UNHANDLED;
}
struct tracer boot_tracer __read_mostly =
@@ -87,27 +130,53 @@ struct tracer boot_tracer __read_mostly =
.name = "initcall",
.init = boot_trace_init,
.reset = reset_boot_trace,
- .ctrl_update = boot_trace_ctrl_update,
.print_line = initcall_print_line,
};
-void trace_boot(struct boot_trace *it, initcall_t fn)
+void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
{
struct ring_buffer_event *event;
- struct trace_boot *entry;
- struct trace_array_cpu *data;
+ struct trace_boot_call *entry;
unsigned long irq_flags;
struct trace_array *tr = boot_trace;
- if (!trace_boot_enabled)
+ if (!pre_initcalls_finished)
return;
/* Get its name now since this function could
* disappear because it is in the .init section.
*/
- sprint_symbol(it->func, (unsigned long)fn);
+ sprint_symbol(bt->func, (unsigned long)fn);
+ preempt_disable();
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ goto out;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, 0);
+ entry->ent.type = TRACE_BOOT_CALL;
+ entry->boot_call = *bt;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ trace_wake_up();
+
+ out:
+ preempt_enable();
+}
+
+void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
+{
+ struct ring_buffer_event *event;
+ struct trace_boot_ret *entry;
+ unsigned long irq_flags;
+ struct trace_array *tr = boot_trace;
+
+ if (!pre_initcalls_finished)
+ return;
+
+ sprint_symbol(bt->func, (unsigned long)fn);
preempt_disable();
- data = tr->data[smp_processor_id()];
event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
&irq_flags);
@@ -115,8 +184,8 @@ void trace_boot(struct boot_trace *it, initcall_t fn)
goto out;
entry = ring_buffer_event_data(event);
tracing_generic_entry_update(&entry->ent, 0, 0);
- entry->ent.type = TRACE_BOOT;
- entry->initcall = *it;
+ entry->ent.type = TRACE_BOOT_RET;
+ entry->boot_ret = *bt;
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
trace_wake_up();
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
new file mode 100644
index 000000000000..85265553918f
--- /dev/null
+++ b/kernel/trace/trace_branch.c
@@ -0,0 +1,320 @@
+/*
+ * unlikely profiler
+ *
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ */
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/hash.h>
+#include <linux/fs.h>
+#include <asm/local.h>
+#include "trace.h"
+
+#ifdef CONFIG_BRANCH_TRACER
+
+static int branch_tracing_enabled __read_mostly;
+static DEFINE_MUTEX(branch_tracing_mutex);
+static struct trace_array *branch_tracer;
+
+static void
+probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
+{
+ struct trace_array *tr = branch_tracer;
+ struct ring_buffer_event *event;
+ struct trace_branch *entry;
+ unsigned long flags, irq_flags;
+ int cpu, pc;
+ const char *p;
+
+ /*
+ * I would love to save just the ftrace_likely_data pointer, but
+ * this code can also be used by modules. Ugly things can happen
+ * if the module is unloaded, and then we go and read the
+ * pointer. This is slower, but much safer.
+ */
+
+ if (unlikely(!tr))
+ return;
+
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
+ goto out;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ goto out;
+
+ pc = preempt_count();
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_BRANCH;
+
+ /* Strip off the path, only save the file */
+ p = f->file + strlen(f->file);
+ while (p >= f->file && *p != '/')
+ p--;
+ p++;
+
+ strncpy(entry->func, f->func, TRACE_FUNC_SIZE);
+ strncpy(entry->file, p, TRACE_FILE_SIZE);
+ entry->func[TRACE_FUNC_SIZE] = 0;
+ entry->file[TRACE_FILE_SIZE] = 0;
+ entry->line = f->line;
+ entry->correct = val == expect;
+
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ out:
+ atomic_dec(&tr->data[cpu]->disabled);
+ local_irq_restore(flags);
+}
+
+static inline
+void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
+{
+ if (!branch_tracing_enabled)
+ return;
+
+ probe_likely_condition(f, val, expect);
+}
+
+int enable_branch_tracing(struct trace_array *tr)
+{
+ int ret = 0;
+
+ mutex_lock(&branch_tracing_mutex);
+ branch_tracer = tr;
+ /*
+ * Must be seen before enabling. The reader is a condition
+ * where we do not need a matching rmb()
+ */
+ smp_wmb();
+ branch_tracing_enabled++;
+ mutex_unlock(&branch_tracing_mutex);
+
+ return ret;
+}
+
+void disable_branch_tracing(void)
+{
+ mutex_lock(&branch_tracing_mutex);
+
+ if (!branch_tracing_enabled)
+ goto out_unlock;
+
+ branch_tracing_enabled--;
+
+ out_unlock:
+ mutex_unlock(&branch_tracing_mutex);
+}
+
+static void start_branch_trace(struct trace_array *tr)
+{
+ enable_branch_tracing(tr);
+}
+
+static void stop_branch_trace(struct trace_array *tr)
+{
+ disable_branch_tracing();
+}
+
+static void branch_trace_init(struct trace_array *tr)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+
+ start_branch_trace(tr);
+}
+
+static void branch_trace_reset(struct trace_array *tr)
+{
+ stop_branch_trace(tr);
+}
+
+struct tracer branch_trace __read_mostly =
+{
+ .name = "branch",
+ .init = branch_trace_init,
+ .reset = branch_trace_reset,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_branch,
+#endif
+};
+
+__init static int init_branch_trace(void)
+{
+ return register_tracer(&branch_trace);
+}
+
+device_initcall(init_branch_trace);
+#else
+static inline
+void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
+{
+}
+#endif /* CONFIG_BRANCH_TRACER */
+
+void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
+{
+ /*
+ * I would love to have a trace point here instead, but the
+ * trace point code is so inundated with unlikely and likely
+ * conditions that the recursive nightmare that exists is too
+ * much to try to get working. At least for now.
+ */
+ trace_likely_condition(f, val, expect);
+
+ /* FIXME: Make this atomic! */
+ if (val == expect)
+ f->correct++;
+ else
+ f->incorrect++;
+}
+EXPORT_SYMBOL(ftrace_likely_update);
+
+struct ftrace_pointer {
+ void *start;
+ void *stop;
+};
+
+static void *
+t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct ftrace_pointer *f = m->private;
+ struct ftrace_branch_data *p = v;
+
+ (*pos)++;
+
+ if (v == (void *)1)
+ return f->start;
+
+ ++p;
+
+ if ((void *)p >= (void *)f->stop)
+ return NULL;
+
+ return p;
+}
+
+static void *t_start(struct seq_file *m, loff_t *pos)
+{
+ void *t = (void *)1;
+ loff_t l = 0;
+
+ for (; t && l < *pos; t = t_next(m, t, &l))
+ ;
+
+ return t;
+}
+
+static void t_stop(struct seq_file *m, void *p)
+{
+}
+
+static int t_show(struct seq_file *m, void *v)
+{
+ struct ftrace_branch_data *p = v;
+ const char *f;
+ unsigned long percent;
+
+ if (v == (void *)1) {
+ seq_printf(m, " correct incorrect %% "
+ " Function "
+ " File Line\n"
+ " ------- --------- - "
+ " -------- "
+ " ---- ----\n");
+ return 0;
+ }
+
+ /* Only print the file, not the path */
+ f = p->file + strlen(p->file);
+ while (f >= p->file && *f != '/')
+ f--;
+ f++;
+
+ if (p->correct) {
+ percent = p->incorrect * 100;
+ percent /= p->correct + p->incorrect;
+ } else
+ percent = p->incorrect ? 100 : 0;
+
+ seq_printf(m, "%8lu %8lu %3lu ", p->correct, p->incorrect, percent);
+ seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line);
+ return 0;
+}
+
+static struct seq_operations tracing_likely_seq_ops = {
+ .start = t_start,
+ .next = t_next,
+ .stop = t_stop,
+ .show = t_show,
+};
+
+static int tracing_likely_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ ret = seq_open(file, &tracing_likely_seq_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = (void *)inode->i_private;
+ }
+
+ return ret;
+}
+
+static struct file_operations tracing_likely_fops = {
+ .open = tracing_likely_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+};
+
+extern unsigned long __start_likely_profile[];
+extern unsigned long __stop_likely_profile[];
+extern unsigned long __start_unlikely_profile[];
+extern unsigned long __stop_unlikely_profile[];
+
+static struct ftrace_pointer ftrace_likely_pos = {
+ .start = __start_likely_profile,
+ .stop = __stop_likely_profile,
+};
+
+static struct ftrace_pointer ftrace_unlikely_pos = {
+ .start = __start_unlikely_profile,
+ .stop = __stop_unlikely_profile,
+};
+
+static __init int ftrace_branch_init(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+
+ entry = debugfs_create_file("profile_likely", 0444, d_tracer,
+ &ftrace_likely_pos,
+ &tracing_likely_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'profile_likely' entry\n");
+
+ entry = debugfs_create_file("profile_unlikely", 0444, d_tracer,
+ &ftrace_unlikely_pos,
+ &tracing_likely_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs"
+ " 'profile_unlikely' entry\n");
+
+ return 0;
+}
+
+device_initcall(ftrace_branch_init);
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 0f85a64003d3..8693b7a0a5b2 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -44,22 +44,17 @@ static void stop_function_trace(struct trace_array *tr)
static void function_trace_init(struct trace_array *tr)
{
- if (tr->ctrl)
- start_function_trace(tr);
+ start_function_trace(tr);
}
static void function_trace_reset(struct trace_array *tr)
{
- if (tr->ctrl)
- stop_function_trace(tr);
+ stop_function_trace(tr);
}
-static void function_trace_ctrl_update(struct trace_array *tr)
+static void function_trace_start(struct trace_array *tr)
{
- if (tr->ctrl)
- start_function_trace(tr);
- else
- stop_function_trace(tr);
+ function_reset(tr);
}
static struct tracer function_trace __read_mostly =
@@ -67,7 +62,7 @@ static struct tracer function_trace __read_mostly =
.name = "function",
.init = function_trace_init,
.reset = function_trace_reset,
- .ctrl_update = function_trace_ctrl_update,
+ .start = function_trace_start,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_function,
#endif
diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c
new file mode 100644
index 000000000000..7680b21537dd
--- /dev/null
+++ b/kernel/trace/trace_functions_return.c
@@ -0,0 +1,82 @@
+/*
+ *
+ * Function return tracer.
+ * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ * Mostly borrowed from function tracer which
+ * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/fs.h>
+
+#include "trace.h"
+
+
+static void start_return_trace(struct trace_array *tr)
+{
+ register_ftrace_return(&trace_function_return);
+}
+
+static void stop_return_trace(struct trace_array *tr)
+{
+ unregister_ftrace_return();
+}
+
+static void return_trace_init(struct trace_array *tr)
+{
+ int cpu;
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+
+ start_return_trace(tr);
+}
+
+static void return_trace_reset(struct trace_array *tr)
+{
+ stop_return_trace(tr);
+}
+
+
+enum print_line_t
+print_return_function(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *entry = iter->ent;
+ struct ftrace_ret_entry *field;
+ int ret;
+
+ if (entry->type == TRACE_FN_RET) {
+ trace_assign_type(field, entry);
+ ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ ret = seq_print_ip_sym(s, field->ip,
+ trace_flags & TRACE_ITER_SYM_MASK);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ ret = trace_seq_printf(s, " (%llu ns)\n",
+ field->rettime - field->calltime);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ else
+ return TRACE_TYPE_HANDLED;
+ }
+ return TRACE_TYPE_UNHANDLED;
+}
+
+static struct tracer return_trace __read_mostly =
+{
+ .name = "return",
+ .init = return_trace_init,
+ .reset = return_trace_reset,
+ .print_line = print_return_function
+};
+
+static __init int init_return_trace(void)
+{
+ return register_tracer(&return_trace);
+}
+
+device_initcall(init_return_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 9c74071c10e0..d919d4eaa7cc 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -353,15 +353,28 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
}
#endif /* CONFIG_PREEMPT_TRACER */
+/*
+ * save_tracer_enabled is used to save the state of the tracer_enabled
+ * variable when we disable it when we open a trace output file.
+ */
+static int save_tracer_enabled;
+
static void start_irqsoff_tracer(struct trace_array *tr)
{
register_ftrace_function(&trace_ops);
- tracer_enabled = 1;
+ if (tracing_is_enabled()) {
+ tracer_enabled = 1;
+ save_tracer_enabled = 1;
+ } else {
+ tracer_enabled = 0;
+ save_tracer_enabled = 0;
+ }
}
static void stop_irqsoff_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
+ save_tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
}
@@ -370,36 +383,36 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
irqsoff_trace = tr;
/* make sure that the tracer is visible */
smp_wmb();
-
- if (tr->ctrl)
- start_irqsoff_tracer(tr);
+ start_irqsoff_tracer(tr);
}
static void irqsoff_tracer_reset(struct trace_array *tr)
{
- if (tr->ctrl)
- stop_irqsoff_tracer(tr);
+ stop_irqsoff_tracer(tr);
}
-static void irqsoff_tracer_ctrl_update(struct trace_array *tr)
+static void irqsoff_tracer_start(struct trace_array *tr)
{
- if (tr->ctrl)
- start_irqsoff_tracer(tr);
- else
- stop_irqsoff_tracer(tr);
+ tracer_enabled = 1;
+ save_tracer_enabled = 1;
+}
+
+static void irqsoff_tracer_stop(struct trace_array *tr)
+{
+ tracer_enabled = 0;
+ save_tracer_enabled = 0;
}
static void irqsoff_tracer_open(struct trace_iterator *iter)
{
/* stop the trace while dumping */
- if (iter->tr->ctrl)
- stop_irqsoff_tracer(iter->tr);
+ tracer_enabled = 0;
}
static void irqsoff_tracer_close(struct trace_iterator *iter)
{
- if (iter->tr->ctrl)
- start_irqsoff_tracer(iter->tr);
+ /* restart tracing */
+ tracer_enabled = save_tracer_enabled;
}
#ifdef CONFIG_IRQSOFF_TRACER
@@ -414,9 +427,10 @@ static struct tracer irqsoff_tracer __read_mostly =
.name = "irqsoff",
.init = irqsoff_tracer_init,
.reset = irqsoff_tracer_reset,
+ .start = irqsoff_tracer_start,
+ .stop = irqsoff_tracer_stop,
.open = irqsoff_tracer_open,
.close = irqsoff_tracer_close,
- .ctrl_update = irqsoff_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_irqsoff,
@@ -440,9 +454,10 @@ static struct tracer preemptoff_tracer __read_mostly =
.name = "preemptoff",
.init = preemptoff_tracer_init,
.reset = irqsoff_tracer_reset,
+ .start = irqsoff_tracer_start,
+ .stop = irqsoff_tracer_stop,
.open = irqsoff_tracer_open,
.close = irqsoff_tracer_close,
- .ctrl_update = irqsoff_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_preemptoff,
@@ -468,9 +483,10 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
.name = "preemptirqsoff",
.init = preemptirqsoff_tracer_init,
.reset = irqsoff_tracer_reset,
+ .start = irqsoff_tracer_start,
+ .stop = irqsoff_tracer_stop,
.open = irqsoff_tracer_open,
.close = irqsoff_tracer_close,
- .ctrl_update = irqsoff_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_preemptirqsoff,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index f28484618ff0..51bcf370215e 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -34,30 +34,24 @@ static void mmio_trace_init(struct trace_array *tr)
{
pr_debug("in %s\n", __func__);
mmio_trace_array = tr;
- if (tr->ctrl) {
- mmio_reset_data(tr);
- enable_mmiotrace();
- }
+
+ mmio_reset_data(tr);
+ enable_mmiotrace();
}
static void mmio_trace_reset(struct trace_array *tr)
{
pr_debug("in %s\n", __func__);
- if (tr->ctrl)
- disable_mmiotrace();
+
+ disable_mmiotrace();
mmio_reset_data(tr);
mmio_trace_array = NULL;
}
-static void mmio_trace_ctrl_update(struct trace_array *tr)
+static void mmio_trace_start(struct trace_array *tr)
{
pr_debug("in %s\n", __func__);
- if (tr->ctrl) {
- mmio_reset_data(tr);
- enable_mmiotrace();
- } else {
- disable_mmiotrace();
- }
+ mmio_reset_data(tr);
}
static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
@@ -298,10 +292,10 @@ static struct tracer mmio_tracer __read_mostly =
.name = "mmiotrace",
.init = mmio_trace_init,
.reset = mmio_trace_reset,
+ .start = mmio_trace_start,
.pipe_open = mmio_pipe_open,
.close = mmio_close,
.read = mmio_read,
- .ctrl_update = mmio_trace_ctrl_update,
.print_line = mmio_print_line,
};
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 4592b4862515..2ef1d227e7d8 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -32,23 +32,12 @@ static void nop_trace_init(struct trace_array *tr)
for_each_online_cpu(cpu)
tracing_reset(tr, cpu);
- if (tr->ctrl)
- start_nop_trace(tr);
+ start_nop_trace(tr);
}
static void nop_trace_reset(struct trace_array *tr)
{
- if (tr->ctrl)
- stop_nop_trace(tr);
-}
-
-static void nop_trace_ctrl_update(struct trace_array *tr)
-{
- /* When starting a new trace, reset the buffers */
- if (tr->ctrl)
- start_nop_trace(tr);
- else
- stop_nop_trace(tr);
+ stop_nop_trace(tr);
}
struct tracer nop_trace __read_mostly =
@@ -56,7 +45,6 @@ struct tracer nop_trace __read_mostly =
.name = "nop",
.init = nop_trace_init,
.reset = nop_trace_reset,
- .ctrl_update = nop_trace_ctrl_update,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_nop,
#endif
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index b8f56beb1a62..be35bdfe2e38 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -16,7 +16,8 @@
static struct trace_array *ctx_trace;
static int __read_mostly tracer_enabled;
-static atomic_t sched_ref;
+static int sched_ref;
+static DEFINE_MUTEX(sched_register_mutex);
static void
probe_sched_switch(struct rq *__rq, struct task_struct *prev,
@@ -27,7 +28,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
int cpu;
int pc;
- if (!atomic_read(&sched_ref))
+ if (!sched_ref)
return;
tracing_record_cmdline(prev);
@@ -123,20 +124,18 @@ static void tracing_sched_unregister(void)
static void tracing_start_sched_switch(void)
{
- long ref;
-
- ref = atomic_inc_return(&sched_ref);
- if (ref == 1)
+ mutex_lock(&sched_register_mutex);
+ if (!(sched_ref++))
tracing_sched_register();
+ mutex_unlock(&sched_register_mutex);
}
static void tracing_stop_sched_switch(void)
{
- long ref;
-
- ref = atomic_dec_and_test(&sched_ref);
- if (ref)
+ mutex_lock(&sched_register_mutex);
+ if (!(--sched_ref))
tracing_sched_unregister();
+ mutex_unlock(&sched_register_mutex);
}
void tracing_start_cmdline_record(void)
@@ -149,40 +148,85 @@ void tracing_stop_cmdline_record(void)
tracing_stop_sched_switch();
}
+/**
+ * tracing_start_sched_switch_record - start tracing context switches
+ *
+ * Turns on context switch tracing for a tracer.
+ */
+void tracing_start_sched_switch_record(void)
+{
+ if (unlikely(!ctx_trace)) {
+ WARN_ON(1);
+ return;
+ }
+
+ tracing_start_sched_switch();
+
+ mutex_lock(&sched_register_mutex);
+ tracer_enabled++;
+ mutex_unlock(&sched_register_mutex);
+}
+
+/**
+ * tracing_stop_sched_switch_record - start tracing context switches
+ *
+ * Turns off context switch tracing for a tracer.
+ */
+void tracing_stop_sched_switch_record(void)
+{
+ mutex_lock(&sched_register_mutex);
+ tracer_enabled--;
+ WARN_ON(tracer_enabled < 0);
+ mutex_unlock(&sched_register_mutex);
+
+ tracing_stop_sched_switch();
+}
+
+/**
+ * tracing_sched_switch_assign_trace - assign a trace array for ctx switch
+ * @tr: trace array pointer to assign
+ *
+ * Some tracers might want to record the context switches in their
+ * trace. This function lets those tracers assign the trace array
+ * to use.
+ */
+void tracing_sched_switch_assign_trace(struct trace_array *tr)
+{
+ ctx_trace = tr;
+}
+
static void start_sched_trace(struct trace_array *tr)
{
sched_switch_reset(tr);
- tracing_start_cmdline_record();
- tracer_enabled = 1;
+ tracing_start_sched_switch_record();
}
static void stop_sched_trace(struct trace_array *tr)
{
- tracer_enabled = 0;
- tracing_stop_cmdline_record();
+ tracing_stop_sched_switch_record();
}
static void sched_switch_trace_init(struct trace_array *tr)
{
ctx_trace = tr;
-
- if (tr->ctrl)
- start_sched_trace(tr);
+ start_sched_trace(tr);
}
static void sched_switch_trace_reset(struct trace_array *tr)
{
- if (tr->ctrl)
+ if (sched_ref)
stop_sched_trace(tr);
}
-static void sched_switch_trace_ctrl_update(struct trace_array *tr)
+static void sched_switch_trace_start(struct trace_array *tr)
{
- /* When starting a new trace, reset the buffers */
- if (tr->ctrl)
- start_sched_trace(tr);
- else
- stop_sched_trace(tr);
+ sched_switch_reset(tr);
+ tracing_start_sched_switch();
+}
+
+static void sched_switch_trace_stop(struct trace_array *tr)
+{
+ tracing_stop_sched_switch();
}
static struct tracer sched_switch_trace __read_mostly =
@@ -190,7 +234,8 @@ static struct tracer sched_switch_trace __read_mostly =
.name = "sched_switch",
.init = sched_switch_trace_init,
.reset = sched_switch_trace_reset,
- .ctrl_update = sched_switch_trace_ctrl_update,
+ .start = sched_switch_trace_start,
+ .stop = sched_switch_trace_stop,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_sched_switch,
#endif
@@ -198,14 +243,6 @@ static struct tracer sched_switch_trace __read_mostly =
__init static int init_sched_switch_trace(void)
{
- int ret = 0;
-
- if (atomic_read(&sched_ref))
- ret = tracing_sched_register();
- if (ret) {
- pr_info("error registering scheduler trace\n");
- return ret;
- }
return register_tracer(&sched_switch_trace);
}
device_initcall(init_sched_switch_trace);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3ae93f16b565..983f2b1478c9 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -50,8 +50,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
return;
pc = preempt_count();
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id();
data = tr->data[cpu];
@@ -81,15 +80,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
out:
atomic_dec(&data->disabled);
- /*
- * To prevent recursion from the scheduler, if the
- * resched flag was set before we entered, then
- * don't reschedule.
- */
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
}
static struct ftrace_ops trace_ops __read_mostly =
@@ -271,6 +262,12 @@ out:
atomic_dec(&wakeup_trace->data[cpu]->disabled);
}
+/*
+ * save_tracer_enabled is used to save the state of the tracer_enabled
+ * variable when we disable it when we open a trace output file.
+ */
+static int save_tracer_enabled;
+
static void start_wakeup_tracer(struct trace_array *tr)
{
int ret;
@@ -309,7 +306,13 @@ static void start_wakeup_tracer(struct trace_array *tr)
register_ftrace_function(&trace_ops);
- tracer_enabled = 1;
+ if (tracing_is_enabled()) {
+ tracer_enabled = 1;
+ save_tracer_enabled = 1;
+ } else {
+ tracer_enabled = 0;
+ save_tracer_enabled = 0;
+ }
return;
fail_deprobe_wake_new:
@@ -321,6 +324,7 @@ fail_deprobe:
static void stop_wakeup_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
+ save_tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
unregister_trace_sched_switch(probe_wakeup_sched_switch);
unregister_trace_sched_wakeup_new(probe_wakeup);
@@ -330,40 +334,42 @@ static void stop_wakeup_tracer(struct trace_array *tr)
static void wakeup_tracer_init(struct trace_array *tr)
{
wakeup_trace = tr;
-
- if (tr->ctrl)
- start_wakeup_tracer(tr);
+ start_wakeup_tracer(tr);
}
static void wakeup_tracer_reset(struct trace_array *tr)
{
- if (tr->ctrl) {
- stop_wakeup_tracer(tr);
- /* make sure we put back any tasks we are tracing */
- wakeup_reset(tr);
- }
+ stop_wakeup_tracer(tr);
+ /* make sure we put back any tasks we are tracing */
+ wakeup_reset(tr);
}
-static void wakeup_tracer_ctrl_update(struct trace_array *tr)
+static void wakeup_tracer_start(struct trace_array *tr)
{
- if (tr->ctrl)
- start_wakeup_tracer(tr);
- else
- stop_wakeup_tracer(tr);
+ wakeup_reset(tr);
+ tracer_enabled = 1;
+ save_tracer_enabled = 1;
+}
+
+static void wakeup_tracer_stop(struct trace_array *tr)
+{
+ tracer_enabled = 0;
+ save_tracer_enabled = 0;
}
static void wakeup_tracer_open(struct trace_iterator *iter)
{
/* stop the trace while dumping */
- if (iter->tr->ctrl)
- stop_wakeup_tracer(iter->tr);
+ tracer_enabled = 0;
}
static void wakeup_tracer_close(struct trace_iterator *iter)
{
/* forget about any processes we were recording */
- if (iter->tr->ctrl)
- start_wakeup_tracer(iter->tr);
+ if (save_tracer_enabled) {
+ wakeup_reset(iter->tr);
+ tracer_enabled = 1;
+ }
}
static struct tracer wakeup_tracer __read_mostly =
@@ -371,9 +377,10 @@ static struct tracer wakeup_tracer __read_mostly =
.name = "wakeup",
.init = wakeup_tracer_init,
.reset = wakeup_tracer_reset,
+ .start = wakeup_tracer_start,
+ .stop = wakeup_tracer_stop,
.open = wakeup_tracer_open,
.close = wakeup_tracer_close,
- .ctrl_update = wakeup_tracer_ctrl_update,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 90bc752a7580..24e6e075e6d6 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -13,6 +13,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
case TRACE_STACK:
case TRACE_PRINT:
case TRACE_SPECIAL:
+ case TRACE_BRANCH:
return 1;
}
return 0;
@@ -110,7 +111,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
ftrace_set_filter(func_name, strlen(func_name), 1);
/* enable tracing */
- tr->ctrl = 1;
trace->init(tr);
/* Sleep for a 1/10 of a second */
@@ -134,13 +134,13 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
msleep(100);
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
ftrace_enabled = 0;
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
+ tracing_start();
/* we should only have one item */
if (!ret && count != 1) {
@@ -148,6 +148,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
ret = -1;
goto out;
}
+
out:
ftrace_enabled = save_ftrace_enabled;
tracer_enabled = save_tracer_enabled;
@@ -180,18 +181,17 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
ftrace_enabled = 1;
tracer_enabled = 1;
- tr->ctrl = 1;
trace->init(tr);
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
ftrace_enabled = 0;
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
+ tracing_start();
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
@@ -223,7 +223,6 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
int ret;
/* start the tracing */
- tr->ctrl = 1;
trace->init(tr);
/* reset the max latency */
tracing_max_latency = 0;
@@ -232,13 +231,13 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
udelay(100);
local_irq_enable();
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (!ret)
ret = trace_test_buffer(&max_tr, &count);
trace->reset(tr);
+ tracing_start();
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
@@ -259,8 +258,20 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
unsigned long count;
int ret;
+ /*
+ * Now that the big kernel lock is no longer preemptable,
+ * and this is called with the BKL held, it will always
+ * fail. If preemption is already disabled, simply
+ * pass the test. When the BKL is removed, or becomes
+ * preemptible again, we will once again test this,
+ * so keep it in.
+ */
+ if (preempt_count()) {
+ printk(KERN_CONT "can not test ... force ");
+ return 0;
+ }
+
/* start the tracing */
- tr->ctrl = 1;
trace->init(tr);
/* reset the max latency */
tracing_max_latency = 0;
@@ -269,13 +280,13 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
udelay(100);
preempt_enable();
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (!ret)
ret = trace_test_buffer(&max_tr, &count);
trace->reset(tr);
+ tracing_start();
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
@@ -296,8 +307,20 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
unsigned long count;
int ret;
+ /*
+ * Now that the big kernel lock is no longer preemptable,
+ * and this is called with the BKL held, it will always
+ * fail. If preemption is already disabled, simply
+ * pass the test. When the BKL is removed, or becomes
+ * preemptible again, we will once again test this,
+ * so keep it in.
+ */
+ if (preempt_count()) {
+ printk(KERN_CONT "can not test ... force ");
+ return 0;
+ }
+
/* start the tracing */
- tr->ctrl = 1;
trace->init(tr);
/* reset the max latency */
@@ -312,27 +335,30 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
local_irq_enable();
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
- if (ret)
+ if (ret) {
+ tracing_start();
goto out;
+ }
ret = trace_test_buffer(&max_tr, &count);
- if (ret)
+ if (ret) {
+ tracing_start();
goto out;
+ }
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
+ tracing_start();
goto out;
}
/* do the test by disabling interrupts first this time */
tracing_max_latency = 0;
- tr->ctrl = 1;
- trace->ctrl_update(tr);
+ tracing_start();
preempt_disable();
local_irq_disable();
udelay(100);
@@ -341,8 +367,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
local_irq_enable();
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (ret)
@@ -358,6 +383,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
out:
trace->reset(tr);
+ tracing_start();
tracing_max_latency = save_max;
return ret;
@@ -423,7 +449,6 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
wait_for_completion(&isrt);
/* start the tracing */
- tr->ctrl = 1;
trace->init(tr);
/* reset the max latency */
tracing_max_latency = 0;
@@ -448,8 +473,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
msleep(100);
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
if (!ret)
@@ -457,6 +481,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
trace->reset(tr);
+ tracing_start();
tracing_max_latency = save_max;
@@ -480,16 +505,15 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
int ret;
/* start the tracing */
- tr->ctrl = 1;
trace->init(tr);
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
+ tracing_start();
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
@@ -508,17 +532,38 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
int ret;
/* start the tracing */
- tr->ctrl = 1;
trace->init(tr);
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
- tr->ctrl = 0;
- trace->ctrl_update(tr);
+ tracing_stop();
/* check the trace buffer */
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
+ tracing_start();
return ret;
}
#endif /* CONFIG_SYSPROF_TRACER */
+
+#ifdef CONFIG_BRANCH_TRACER
+int
+trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ trace->init(tr);
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+ /* stop the tracing. */
+ tracing_stop();
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+ tracing_start();
+
+ return ret;
+}
+#endif /* CONFIG_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index be682b62fe58..d39e8b7de6a2 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -107,8 +107,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
if (unlikely(!ftrace_enabled || stack_trace_disabled))
return;
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id();
/* no atomic needed, we only modify this variable by this cpu */
@@ -120,10 +119,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
out:
per_cpu(trace_active, cpu)--;
/* prevent recursion in schedule */
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
}
static struct ftrace_ops trace_ops __read_mostly =
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 9587d3bcba55..05f753422aea 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -265,23 +265,12 @@ static void stack_trace_init(struct trace_array *tr)
{
sysprof_trace = tr;
- if (tr->ctrl)
- start_stack_trace(tr);
+ start_stack_trace(tr);
}
static void stack_trace_reset(struct trace_array *tr)
{
- if (tr->ctrl)
- stop_stack_trace(tr);
-}
-
-static void stack_trace_ctrl_update(struct trace_array *tr)
-{
- /* When starting a new trace, reset the buffers */
- if (tr->ctrl)
- start_stack_trace(tr);
- else
- stop_stack_trace(tr);
+ stop_stack_trace(tr);
}
static struct tracer stack_trace __read_mostly =
@@ -289,7 +278,6 @@ static struct tracer stack_trace __read_mostly =
.name = "sysprof",
.init = stack_trace_init,
.reset = stack_trace_reset,
- .ctrl_update = stack_trace_ctrl_update,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_sysprof,
#endif
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index af8c85664882..e96590f17de1 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -43,6 +43,7 @@ static DEFINE_MUTEX(tracepoints_mutex);
*/
#define TRACEPOINT_HASH_BITS 6
#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
+static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
/*
* Note about RCU :
@@ -54,40 +55,43 @@ struct tracepoint_entry {
struct hlist_node hlist;
void **funcs;
int refcount; /* Number of times armed. 0 if disarmed. */
- struct rcu_head rcu;
- void *oldptr;
- unsigned char rcu_pending:1;
char name[0];
};
-static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
+struct tp_probes {
+ union {
+ struct rcu_head rcu;
+ struct list_head list;
+ } u;
+ void *probes[0];
+};
-static void free_old_closure(struct rcu_head *head)
+static inline void *allocate_probes(int count)
{
- struct tracepoint_entry *entry = container_of(head,
- struct tracepoint_entry, rcu);
- kfree(entry->oldptr);
- /* Make sure we free the data before setting the pending flag to 0 */
- smp_wmb();
- entry->rcu_pending = 0;
+ struct tp_probes *p = kmalloc(count * sizeof(void *)
+ + sizeof(struct tp_probes), GFP_KERNEL);
+ return p == NULL ? NULL : p->probes;
}
-static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old)
+static void rcu_free_old_probes(struct rcu_head *head)
{
- if (!old)
- return;
- entry->oldptr = old;
- entry->rcu_pending = 1;
- /* write rcu_pending before calling the RCU callback */
- smp_wmb();
- call_rcu_sched(&entry->rcu, free_old_closure);
+ kfree(container_of(head, struct tp_probes, u.rcu));
+}
+
+static inline void release_probes(void *old)
+{
+ if (old) {
+ struct tp_probes *tp_probes = container_of(old,
+ struct tp_probes, probes[0]);
+ call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
+ }
}
static void debug_print_probes(struct tracepoint_entry *entry)
{
int i;
- if (!tracepoint_debug)
+ if (!tracepoint_debug || !entry->funcs)
return;
for (i = 0; entry->funcs[i]; i++)
@@ -111,12 +115,13 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
return ERR_PTR(-EEXIST);
}
/* + 2 : one for new probe, one for NULL func */
- new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL);
+ new = allocate_probes(nr_probes + 2);
if (new == NULL)
return ERR_PTR(-ENOMEM);
if (old)
memcpy(new, old, nr_probes * sizeof(void *));
new[nr_probes] = probe;
+ new[nr_probes + 1] = NULL;
entry->refcount = nr_probes + 1;
entry->funcs = new;
debug_print_probes(entry);
@@ -132,7 +137,7 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
old = entry->funcs;
if (!old)
- return NULL;
+ return ERR_PTR(-ENOENT);
debug_print_probes(entry);
/* (N -> M), (N > 1, M >= 0) probes */
@@ -151,13 +156,13 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
int j = 0;
/* N -> M, (N > 1, M > 0) */
/* + 1 for NULL */
- new = kzalloc((nr_probes - nr_del + 1)
- * sizeof(void *), GFP_KERNEL);
+ new = allocate_probes(nr_probes - nr_del + 1);
if (new == NULL)
return ERR_PTR(-ENOMEM);
for (i = 0; old[i]; i++)
if ((probe && old[i] != probe))
new[j++] = old[i];
+ new[nr_probes - nr_del] = NULL;
entry->refcount = nr_probes - nr_del;
entry->funcs = new;
}
@@ -215,7 +220,6 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
memcpy(&e->name[0], name, name_len);
e->funcs = NULL;
e->refcount = 0;
- e->rcu_pending = 0;
hlist_add_head(&e->hlist, head);
return e;
}
@@ -224,32 +228,10 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
* Remove the tracepoint from the tracepoint hash table. Must be called with
* mutex_lock held.
*/
-static int remove_tracepoint(const char *name)
+static inline void remove_tracepoint(struct tracepoint_entry *e)
{
- struct hlist_head *head;
- struct hlist_node *node;
- struct tracepoint_entry *e;
- int found = 0;
- size_t len = strlen(name) + 1;
- u32 hash = jhash(name, len-1, 0);
-
- head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
- hlist_for_each_entry(e, node, head, hlist) {
- if (!strcmp(name, e->name)) {
- found = 1;
- break;
- }
- }
- if (!found)
- return -ENOENT;
- if (e->refcount)
- return -EBUSY;
hlist_del(&e->hlist);
- /* Make sure the call_rcu_sched has been executed */
- if (e->rcu_pending)
- rcu_barrier_sched();
kfree(e);
- return 0;
}
/*
@@ -320,6 +302,23 @@ static void tracepoint_update_probes(void)
module_update_tracepoints();
}
+static void *tracepoint_add_probe(const char *name, void *probe)
+{
+ struct tracepoint_entry *entry;
+ void *old;
+
+ entry = get_tracepoint(name);
+ if (!entry) {
+ entry = add_tracepoint(name);
+ if (IS_ERR(entry))
+ return entry;
+ }
+ old = tracepoint_entry_add_probe(entry, probe);
+ if (IS_ERR(old) && !entry->refcount)
+ remove_tracepoint(entry);
+ return old;
+}
+
/**
* tracepoint_probe_register - Connect a probe to a tracepoint
* @name: tracepoint name
@@ -330,44 +329,36 @@ static void tracepoint_update_probes(void)
*/
int tracepoint_probe_register(const char *name, void *probe)
{
- struct tracepoint_entry *entry;
- int ret = 0;
void *old;
mutex_lock(&tracepoints_mutex);
- entry = get_tracepoint(name);
- if (!entry) {
- entry = add_tracepoint(name);
- if (IS_ERR(entry)) {
- ret = PTR_ERR(entry);
- goto end;
- }
- }
- /*
- * If we detect that a call_rcu_sched is pending for this tracepoint,
- * make sure it's executed now.
- */
- if (entry->rcu_pending)
- rcu_barrier_sched();
- old = tracepoint_entry_add_probe(entry, probe);
- if (IS_ERR(old)) {
- ret = PTR_ERR(old);
- goto end;
- }
+ old = tracepoint_add_probe(name, probe);
mutex_unlock(&tracepoints_mutex);
+ if (IS_ERR(old))
+ return PTR_ERR(old);
+
tracepoint_update_probes(); /* may update entry */
- mutex_lock(&tracepoints_mutex);
- entry = get_tracepoint(name);
- WARN_ON(!entry);
- if (entry->rcu_pending)
- rcu_barrier_sched();
- tracepoint_entry_free_old(entry, old);
-end:
- mutex_unlock(&tracepoints_mutex);
- return ret;
+ release_probes(old);
+ return 0;
}
EXPORT_SYMBOL_GPL(tracepoint_probe_register);
+static void *tracepoint_remove_probe(const char *name, void *probe)
+{
+ struct tracepoint_entry *entry;
+ void *old;
+
+ entry = get_tracepoint(name);
+ if (!entry)
+ return ERR_PTR(-ENOENT);
+ old = tracepoint_entry_remove_probe(entry, probe);
+ if (IS_ERR(old))
+ return old;
+ if (!entry->refcount)
+ remove_tracepoint(entry);
+ return old;
+}
+
/**
* tracepoint_probe_unregister - Disconnect a probe from a tracepoint
* @name: tracepoint name
@@ -380,38 +371,104 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register);
*/
int tracepoint_probe_unregister(const char *name, void *probe)
{
- struct tracepoint_entry *entry;
void *old;
- int ret = -ENOENT;
mutex_lock(&tracepoints_mutex);
- entry = get_tracepoint(name);
- if (!entry)
- goto end;
- if (entry->rcu_pending)
- rcu_barrier_sched();
- old = tracepoint_entry_remove_probe(entry, probe);
- if (!old) {
- printk(KERN_WARNING "Warning: Trying to unregister a probe"
- "that doesn't exist\n");
- goto end;
- }
+ old = tracepoint_remove_probe(name, probe);
mutex_unlock(&tracepoints_mutex);
+ if (IS_ERR(old))
+ return PTR_ERR(old);
+
tracepoint_update_probes(); /* may update entry */
+ release_probes(old);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+
+static LIST_HEAD(old_probes);
+static int need_update;
+
+static void tracepoint_add_old_probes(void *old)
+{
+ need_update = 1;
+ if (old) {
+ struct tp_probes *tp_probes = container_of(old,
+ struct tp_probes, probes[0]);
+ list_add(&tp_probes->u.list, &old_probes);
+ }
+}
+
+/**
+ * tracepoint_probe_register_noupdate - register a probe but not connect
+ * @name: tracepoint name
+ * @probe: probe handler
+ *
+ * caller must call tracepoint_probe_update_all()
+ */
+int tracepoint_probe_register_noupdate(const char *name, void *probe)
+{
+ void *old;
+
mutex_lock(&tracepoints_mutex);
- entry = get_tracepoint(name);
- if (!entry)
- goto end;
- if (entry->rcu_pending)
- rcu_barrier_sched();
- tracepoint_entry_free_old(entry, old);
- remove_tracepoint(name); /* Ignore busy error message */
- ret = 0;
-end:
+ old = tracepoint_add_probe(name, probe);
+ if (IS_ERR(old)) {
+ mutex_unlock(&tracepoints_mutex);
+ return PTR_ERR(old);
+ }
+ tracepoint_add_old_probes(old);
mutex_unlock(&tracepoints_mutex);
- return ret;
+ return 0;
}
-EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
+
+/**
+ * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect
+ * @name: tracepoint name
+ * @probe: probe function pointer
+ *
+ * caller must call tracepoint_probe_update_all()
+ */
+int tracepoint_probe_unregister_noupdate(const char *name, void *probe)
+{
+ void *old;
+
+ mutex_lock(&tracepoints_mutex);
+ old = tracepoint_remove_probe(name, probe);
+ if (IS_ERR(old)) {
+ mutex_unlock(&tracepoints_mutex);
+ return PTR_ERR(old);
+ }
+ tracepoint_add_old_probes(old);
+ mutex_unlock(&tracepoints_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate);
+
+/**
+ * tracepoint_probe_update_all - update tracepoints
+ */
+void tracepoint_probe_update_all(void)
+{
+ LIST_HEAD(release_probes);
+ struct tp_probes *pos, *next;
+
+ mutex_lock(&tracepoints_mutex);
+ if (!need_update) {
+ mutex_unlock(&tracepoints_mutex);
+ return;
+ }
+ if (!list_empty(&old_probes))
+ list_replace_init(&old_probes, &release_probes);
+ need_update = 0;
+ mutex_unlock(&tracepoints_mutex);
+
+ tracepoint_update_probes();
+ list_for_each_entry_safe(pos, next, &release_probes, u.list) {
+ list_del(&pos->u.list);
+ call_rcu_sched(&pos->u.rcu, rcu_free_old_probes);
+ }
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
/**
* tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 468fbc9016c7..7a176773af85 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -198,16 +198,10 @@ cmd_modversions = \
fi;
endif
-ifdef CONFIG_64BIT
-arch_bits = 64
-else
-arch_bits = 32
-endif
-
ifdef CONFIG_FTRACE_MCOUNT_RECORD
-cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl \
- "$(ARCH)" "$(arch_bits)" "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" \
- "$(NM)" "$(RM)" "$(MV)" "$(@)";
+cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
+ "$(if $(CONFIG_64BIT),64,32)" \
+ "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)";
endif
define rule_cc_o_c
diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
index d2c61efc216f..a8635a10fef2 100644
--- a/scripts/bootgraph.pl
+++ b/scripts/bootgraph.pl
@@ -109,8 +109,8 @@ my $stylecounter = 0;
my %rows;
my $rowscount = 1;
my @initcalls = sort { $start{$a} <=> $start{$b} } keys(%start);
-my $key;
-foreach $key (@initcalls) {
+
+foreach my $key (@initcalls) {
my $duration = $end{$key} - $start{$key};
if ($duration >= $threshold) {
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 6b9fe3eb8360..eeac71c87c66 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -134,6 +134,7 @@ my $section_regex; # Find the start of a section
my $function_regex; # Find the name of a function
# (return offset and func name)
my $mcount_regex; # Find the call site to mcount (return offset)
+my $alignment; # The .align value to use for $mcount_section
if ($arch eq "x86") {
if ($bits == 64) {
@@ -148,6 +149,7 @@ if ($arch eq "x86_64") {
$function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
$mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
$type = ".quad";
+ $alignment = 8;
# force flags for this arch
$ld .= " -m elf_x86_64";
@@ -160,6 +162,7 @@ if ($arch eq "x86_64") {
$function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
$mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
$type = ".long";
+ $alignment = 4;
# force flags for this arch
$ld .= " -m elf_i386";
@@ -288,6 +291,7 @@ sub update_funcs
open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
$opened = 1;
print FILE "\t.section $mcount_section,\"a\",\@progbits\n";
+ print FILE "\t.align $alignment\n";
}
printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset;
}
diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py
new file mode 100644
index 000000000000..902f9a992620
--- /dev/null
+++ b/scripts/tracing/draw_functrace.py
@@ -0,0 +1,130 @@
+#!/usr/bin/python
+
+"""
+Copyright 2008 (c) Frederic Weisbecker <fweisbec@gmail.com>
+Licensed under the terms of the GNU GPL License version 2
+
+This script parses a trace provided by the function tracer in
+kernel/trace/trace_functions.c
+The resulted trace is processed into a tree to produce a more human
+view of the call stack by drawing textual but hierarchical tree of
+calls. Only the functions's names and the the call time are provided.
+
+Usage:
+ Be sure that you have CONFIG_FUNCTION_TRACER
+ # mkdir /debugfs
+ # mount -t debug debug /debug
+ # echo function > /debug/tracing/current_tracer
+ $ cat /debug/tracing/trace_pipe > ~/raw_trace_func
+ Wait some times but not too much, the script is a bit slow.
+ Break the pipe (Ctrl + Z)
+ $ scripts/draw_functrace.py < raw_trace_func > draw_functrace
+ Then you have your drawn trace in draw_functrace
+"""
+
+
+import sys, re
+
+class CallTree:
+ """ This class provides a tree representation of the functions
+ call stack. If a function has no parent in the kernel (interrupt,
+ syscall, kernel thread...) then it is attached to a virtual parent
+ called ROOT.
+ """
+ ROOT = None
+
+ def __init__(self, func, time = None, parent = None):
+ self._func = func
+ self._time = time
+ if parent is None:
+ self._parent = CallTree.ROOT
+ else:
+ self._parent = parent
+ self._children = []
+
+ def calls(self, func, calltime):
+ """ If a function calls another one, call this method to insert it
+ into the tree at the appropriate place.
+ @return: A reference to the newly created child node.
+ """
+ child = CallTree(func, calltime, self)
+ self._children.append(child)
+ return child
+
+ def getParent(self, func):
+ """ Retrieve the last parent of the current node that
+ has the name given by func. If this function is not
+ on a parent, then create it as new child of root
+ @return: A reference to the parent.
+ """
+ tree = self
+ while tree != CallTree.ROOT and tree._func != func:
+ tree = tree._parent
+ if tree == CallTree.ROOT:
+ child = CallTree.ROOT.calls(func, None)
+ return child
+ return tree
+
+ def __repr__(self):
+ return self.__toString("", True)
+
+ def __toString(self, branch, lastChild):
+ if self._time is not None:
+ s = "%s----%s (%s)\n" % (branch, self._func, self._time)
+ else:
+ s = "%s----%s\n" % (branch, self._func)
+
+ i = 0
+ if lastChild:
+ branch = branch[:-1] + " "
+ while i < len(self._children):
+ if i != len(self._children) - 1:
+ s += "%s" % self._children[i].__toString(branch +\
+ " |", False)
+ else:
+ s += "%s" % self._children[i].__toString(branch +\
+ " |", True)
+ i += 1
+ return s
+
+class BrokenLineException(Exception):
+ """If the last line is not complete because of the pipe breakage,
+ we want to stop the processing and ignore this line.
+ """
+ pass
+
+class CommentLineException(Exception):
+ """ If the line is a comment (as in the beginning of the trace file),
+ just ignore it.
+ """
+ pass
+
+
+def parseLine(line):
+ line = line.strip()
+ if line.startswith("#"):
+ raise CommentLineException
+ m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line)
+ if m is None:
+ raise BrokenLineException
+ return (m.group(1), m.group(2), m.group(3))
+
+
+def main():
+ CallTree.ROOT = CallTree("Root (Nowhere)", None, None)
+ tree = CallTree.ROOT
+
+ for line in sys.stdin:
+ try:
+ calltime, callee, caller = parseLine(line)
+ except BrokenLineException:
+ break
+ except CommentLineException:
+ continue
+ tree = tree.getParent(caller)
+ tree = tree.calls(callee, calltime)
+
+ print CallTree.ROOT
+
+if __name__ == "__main__":
+ main()