summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-10 11:43:00 +0200
committerIngo Molnar <mingo@elte.hu>2008-07-10 11:43:00 +0200
commitbac0c9103b31c3dd83ad9d731dd9834e2ba75e4f (patch)
tree702dd6a7ce06d224d594c2293af546b11ac9f51b
parentMerge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rol... (diff)
parentftrace: prevent ftrace modifications while being kprobe'd, v2 (diff)
downloadlinux-bac0c9103b31c3dd83ad9d731dd9834e2ba75e4f.tar.xz
linux-bac0c9103b31c3dd83ad9d731dd9834e2ba75e4f.zip
Merge branch 'tracing/ftrace' into auto-ftrace-next
-rw-r--r--Makefile4
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/boot/compressed/Makefile6
-rw-r--r--arch/arm/kernel/Makefile5
-rw-r--r--arch/arm/kernel/armksyms.c5
-rw-r--r--arch/arm/kernel/entry-common.S51
-rw-r--r--arch/arm/kernel/ftrace.c116
-rw-r--r--arch/arm/kernel/kprobes.c2
-rw-r--r--arch/powerpc/Kconfig4
-rw-r--r--arch/powerpc/kernel/Makefile14
-rw-r--r--arch/powerpc/kernel/entry_32.S127
-rw-r--r--arch/powerpc/kernel/entry_64.S65
-rw-r--r--arch/powerpc/kernel/ftrace.c154
-rw-r--r--arch/powerpc/kernel/io.c3
-rw-r--r--arch/powerpc/kernel/irq.c6
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c5
-rw-r--r--arch/powerpc/kernel/setup_32.c6
-rw-r--r--arch/powerpc/platforms/powermac/Makefile5
-rw-r--r--arch/sparc64/Kconfig2
-rw-r--r--arch/sparc64/Kconfig.debug2
-rw-r--r--arch/sparc64/kernel/Makefile1
-rw-r--r--arch/sparc64/kernel/ftrace.c94
-rw-r--r--arch/sparc64/kernel/sparc64_ksyms.c4
-rw-r--r--arch/sparc64/lib/mcount.S58
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Kconfig.debug8
-rw-r--r--arch/x86/kernel/Makefile8
-rw-r--r--arch/x86/kernel/alternative.c22
-rw-r--r--arch/x86/kernel/entry_32.S72
-rw-r--r--arch/x86/kernel/entry_64.S106
-rw-r--r--arch/x86/kernel/ftrace.c141
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c9
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/process_32.c3
-rw-r--r--arch/x86/kernel/process_64.c3
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c11
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/thunk_32.S47
-rw-r--r--arch/x86/lib/thunk_64.S19
-rw-r--r--arch/x86/mm/fault.c56
-rw-r--r--arch/x86/mm/init_32.c4
-rw-r--r--arch/x86/mm/init_64.c10
-rw-r--r--arch/x86/vdso/vclock_gettime.c15
-rw-r--r--arch/x86/vdso/vgetcpu.c3
-rw-r--r--include/asm-arm/ftrace.h14
-rw-r--r--include/asm-arm/kprobes.h1
-rw-r--r--include/asm-powerpc/ftrace.h14
-rw-r--r--include/asm-powerpc/hw_irq.h10
-rw-r--r--include/asm-sparc64/ftrace.h14
-rw-r--r--include/asm-x86/alternative.h2
-rw-r--r--include/asm-x86/ftrace.h14
-rw-r--r--include/asm-x86/irqflags.h24
-rw-r--r--include/asm-x86/kdebug.h9
-rw-r--r--include/asm-x86/vsyscall.h3
-rw-r--r--include/linux/ftrace.h143
-rw-r--r--include/linux/irqflags.h13
-rw-r--r--include/linux/kprobes.h4
-rw-r--r--include/linux/linkage.h2
-rw-r--r--include/linux/marker.h40
-rw-r--r--include/linux/preempt.h34
-rw-r--r--include/linux/sched.h16
-rw-r--r--include/linux/writeback.h2
-rw-r--r--kernel/Makefile14
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/lockdep.c33
-rw-r--r--kernel/marker.c30
-rw-r--r--kernel/printk.c2
-rw-r--r--kernel/sched.c57
-rw-r--r--kernel/semaphore.c1
-rw-r--r--kernel/spinlock.c2
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/trace/Kconfig127
-rw-r--r--kernel/trace/Makefile22
-rw-r--r--kernel/trace/ftrace.c1710
-rw-r--r--kernel/trace/trace.c3100
-rw-r--r--kernel/trace/trace.h313
-rw-r--r--kernel/trace/trace_functions.c78
-rw-r--r--kernel/trace/trace_irqsoff.c486
-rw-r--r--kernel/trace/trace_sched_switch.c286
-rw-r--r--kernel/trace/trace_sched_wakeup.c447
-rw-r--r--kernel/trace/trace_selftest.c540
-rw-r--r--kernel/trace/trace_selftest_dynamic.c7
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--lib/Makefile9
-rw-r--r--lib/smp_processor_id.c6
-rw-r--r--mm/page-writeback.c10
-rw-r--r--scripts/Makefile.lib3
89 files changed, 8834 insertions, 115 deletions
diff --git a/Makefile b/Makefile
index 6315424a00b9..8e519953d2db 100644
--- a/Makefile
+++ b/Makefile
@@ -528,6 +528,10 @@ KBUILD_CFLAGS += -g
KBUILD_AFLAGS += -gdwarf-2
endif
+ifdef CONFIG_FTRACE
+KBUILD_CFLAGS += -pg
+endif
+
# We trigger additional mismatches with less inlining
ifdef CONFIG_DEBUG_SECTION_MISMATCH
KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b786e68914d4..3845e5c8a34f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -14,6 +14,8 @@ config ARM
select HAVE_OPROFILE
select HAVE_KPROBES if (!XIP_KERNEL)
select HAVE_KRETPROBES if (HAVE_KPROBES)
+ select HAVE_FTRACE if (!XIP_KERNEL)
+ select HAVE_DYNAMIC_FTRACE if (HAVE_FTRACE)
help
The ARM series is a line of low-power-consumption RISC chip designs
licensed by ARM Ltd and targeted at embedded applications and
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index de9d9ee50958..95baac4939e0 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -69,6 +69,12 @@ SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
targets := vmlinux vmlinux.lds piggy.gz piggy.o font.o font.c \
head.o misc.o $(OBJS)
+
+ifeq ($(CONFIG_FTRACE),y)
+ORIG_CFLAGS := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
+endif
+
EXTRA_CFLAGS := -fpic -fno-builtin
EXTRA_AFLAGS :=
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index ad455ff5aebe..eb9092ca8008 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -4,6 +4,10 @@
AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
+ifdef CONFIG_DYNAMIC_FTRACE
+CFLAGS_REMOVE_ftrace.o = -pg
+endif
+
# Object file lists.
obj-y := compat.o entry-armv.o entry-common.o irq.o \
@@ -18,6 +22,7 @@ obj-$(CONFIG_ARTHUR) += arthur.o
obj-$(CONFIG_ISA_DMA) += dma-isa.o
obj-$(CONFIG_PCI) += bios32.o isa.o
obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o
obj-$(CONFIG_ATAGS_PROC) += atags.o
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 688b7b1ee416..cc7b246e9652 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -18,6 +18,7 @@
#include <asm/io.h>
#include <asm/system.h>
#include <asm/uaccess.h>
+#include <asm/ftrace.h>
/*
* libgcc functions - functions that are used internally by the
@@ -181,3 +182,7 @@ EXPORT_SYMBOL(_find_next_bit_be);
#endif
EXPORT_SYMBOL(copy_page);
+
+#ifdef CONFIG_FTRACE
+EXPORT_SYMBOL(mcount);
+#endif
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 597ed00a08d8..84694e88b428 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -9,6 +9,7 @@
*/
#include <asm/unistd.h>
+#include <asm/ftrace.h>
#include <asm/arch/entry-macro.S>
#include "entry-header.S"
@@ -99,6 +100,56 @@ ENTRY(ret_from_fork)
#undef CALL
#define CALL(x) .long x
+#ifdef CONFIG_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+ENTRY(mcount)
+ stmdb sp!, {r0-r3, lr}
+ mov r0, lr
+ sub r0, r0, #MCOUNT_INSN_SIZE
+
+ .globl mcount_call
+mcount_call:
+ bl ftrace_stub
+ ldmia sp!, {r0-r3, pc}
+
+ENTRY(ftrace_caller)
+ stmdb sp!, {r0-r3, lr}
+ ldr r1, [fp, #-4]
+ mov r0, lr
+ sub r0, r0, #MCOUNT_INSN_SIZE
+
+ .globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ ldmia sp!, {r0-r3, pc}
+
+#else
+
+ENTRY(mcount)
+ stmdb sp!, {r0-r3, lr}
+ ldr r0, =ftrace_trace_function
+ ldr r2, [r0]
+ adr r0, ftrace_stub
+ cmp r0, r2
+ bne trace
+ ldmia sp!, {r0-r3, pc}
+
+trace:
+ ldr r1, [fp, #-4]
+ mov r0, lr
+ sub r0, r0, #MCOUNT_INSN_SIZE
+ mov lr, pc
+ mov pc, r2
+ ldmia sp!, {r0-r3, pc}
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+ .globl ftrace_stub
+ftrace_stub:
+ mov pc, lr
+
+#endif /* CONFIG_FTRACE */
+
/*=============================================================================
* SWI handler
*-----------------------------------------------------------------------------
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
new file mode 100644
index 000000000000..76d50e6091bc
--- /dev/null
+++ b/arch/arm/kernel/ftrace.c
@@ -0,0 +1,116 @@
+/*
+ * Dynamic function tracing support.
+ *
+ * Copyright (C) 2008 Abhishek Sagar <sagar.abhishek@gmail.com>
+ *
+ * For licencing details, see COPYING.
+ *
+ * Defines low-level handling of mcount calls when the kernel
+ * is compiled with the -pg flag. When using dynamic ftrace, the
+ * mcount call-sites get patched lazily with NOP till they are
+ * enabled. All code mutation routines here take effect atomically.
+ */
+
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ftrace.h>
+
+#define PC_OFFSET 8
+#define BL_OPCODE 0xeb000000
+#define BL_OFFSET_MASK 0x00ffffff
+
+static unsigned long bl_insn;
+static const unsigned long NOP = 0xe1a00000; /* mov r0, r0 */
+
+unsigned char *ftrace_nop_replace(void)
+{
+ return (char *)&NOP;
+}
+
+/* construct a branch (BL) instruction to addr */
+unsigned char *ftrace_call_replace(unsigned long pc, unsigned long addr)
+{
+ long offset;
+
+ offset = (long)addr - (long)(pc + PC_OFFSET);
+ if (unlikely(offset < -33554432 || offset > 33554428)) {
+ /* Can't generate branches that far (from ARM ARM). Ftrace
+ * doesn't generate branches outside of kernel text.
+ */
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+ offset = (offset >> 2) & BL_OFFSET_MASK;
+ bl_insn = BL_OPCODE | offset;
+ return (unsigned char *)&bl_insn;
+}
+
+int ftrace_modify_code(unsigned long pc, unsigned char *old_code,
+ unsigned char *new_code)
+{
+ unsigned long err = 0, replaced = 0, old, new;
+
+ old = *(unsigned long *)old_code;
+ new = *(unsigned long *)new_code;
+
+ __asm__ __volatile__ (
+ "1: ldr %1, [%2] \n"
+ " cmp %1, %4 \n"
+ "2: streq %3, [%2] \n"
+ " cmpne %1, %3 \n"
+ " movne %0, #2 \n"
+ "3:\n"
+
+ ".section .fixup, \"ax\"\n"
+ "4: mov %0, #1 \n"
+ " b 3b \n"
+ ".previous\n"
+
+ ".section __ex_table, \"a\"\n"
+ " .long 1b, 4b \n"
+ " .long 2b, 4b \n"
+ ".previous\n"
+
+ : "=r"(err), "=r"(replaced)
+ : "r"(pc), "r"(new), "r"(old), "0"(err), "1"(replaced)
+ : "memory");
+
+ if (!err && (replaced == old))
+ flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
+
+ return err;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ int ret;
+ unsigned long pc, old;
+ unsigned char *new;
+
+ pc = (unsigned long)&ftrace_call;
+ memcpy(&old, &ftrace_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(pc, (unsigned long)func);
+ ret = ftrace_modify_code(pc, (unsigned char *)&old, new);
+ return ret;
+}
+
+int ftrace_mcount_set(unsigned long *data)
+{
+ unsigned long pc, old;
+ unsigned long *addr = data;
+ unsigned char *new;
+
+ pc = (unsigned long)&mcount_call;
+ memcpy(&old, &mcount_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(pc, *addr);
+ *addr = ftrace_modify_code(pc, (unsigned char *)&old, new);
+ return 0;
+}
+
+/* run from kstop_machine */
+int __init ftrace_dyn_arch_init(void *data)
+{
+ ftrace_mcount_set(data);
+ return 0;
+}
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 5593dd207216..5ee39e10c8d1 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -274,7 +274,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
* for kretprobe handlers which should normally be interested in r0 only
* anyway.
*/
-static void __attribute__((naked)) __kprobes kretprobe_trampoline(void)
+void __naked __kprobes kretprobe_trampoline(void)
{
__asm__ __volatile__ (
"stmdb sp!, {r0 - r11} \n\t"
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3934e2659407..a5e9912e2d37 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -105,11 +105,13 @@ config ARCH_NO_VIRT_TO_BUS
config PPC
bool
default y
+ select HAVE_DYNAMIC_FTRACE
+ select HAVE_FTRACE
select HAVE_IDE
- select HAVE_OPROFILE
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_LMB
+ select HAVE_OPROFILE
config EARLY_PRINTK
bool
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2346d271fbfd..f3f5e2641432 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -12,6 +12,18 @@ CFLAGS_prom_init.o += -fPIC
CFLAGS_btext.o += -fPIC
endif
+ifdef CONFIG_FTRACE
+# Do not trace early boot code
+CFLAGS_REMOVE_cputable.o = -pg
+CFLAGS_REMOVE_prom_init.o = -pg
+
+ifdef CONFIG_DYNAMIC_FTRACE
+# dynamic ftrace setup.
+CFLAGS_REMOVE_ftrace.o = -pg
+endif
+
+endif
+
obj-y := cputable.o ptrace.o syscalls.o \
irq.o align.o signal_32.o pmc.o vdso.o \
init_task.o process.o systbl.o idle.o \
@@ -78,6 +90,8 @@ obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \
obj-$(CONFIG_AUDIT) += audit.o
obj64-$(CONFIG_AUDIT) += compat_audit.o
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+
obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
ifneq ($(CONFIG_PPC_INDIRECT_IO),y)
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0c8614d9875c..7231a708af0d 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -30,6 +30,7 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
+#include <asm/ftrace.h>
#undef SHOW_SYSCALLS
#undef SHOW_SYSCALLS_TASK
@@ -1035,3 +1036,129 @@ machine_check_in_rtas:
/* XXX load up BATs and panic */
#endif /* CONFIG_PPC_RTAS */
+
+#ifdef CONFIG_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+ stwu r1,-48(r1)
+ stw r3, 12(r1)
+ stw r4, 16(r1)
+ stw r5, 20(r1)
+ stw r6, 24(r1)
+ mflr r3
+ stw r7, 28(r1)
+ mfcr r5
+ stw r8, 32(r1)
+ stw r9, 36(r1)
+ stw r10,40(r1)
+ stw r3, 44(r1)
+ stw r5, 8(r1)
+ subi r3, r3, MCOUNT_INSN_SIZE
+ .globl mcount_call
+mcount_call:
+ bl ftrace_stub
+ nop
+ lwz r6, 8(r1)
+ lwz r0, 44(r1)
+ lwz r3, 12(r1)
+ mtctr r0
+ lwz r4, 16(r1)
+ mtcr r6
+ lwz r5, 20(r1)
+ lwz r6, 24(r1)
+ lwz r0, 52(r1)
+ lwz r7, 28(r1)
+ lwz r8, 32(r1)
+ mtlr r0
+ lwz r9, 36(r1)
+ lwz r10,40(r1)
+ addi r1, r1, 48
+ bctr
+
+_GLOBAL(ftrace_caller)
+ /* Based off of objdump optput from glibc */
+ stwu r1,-48(r1)
+ stw r3, 12(r1)
+ stw r4, 16(r1)
+ stw r5, 20(r1)
+ stw r6, 24(r1)
+ mflr r3
+ lwz r4, 52(r1)
+ mfcr r5
+ stw r7, 28(r1)
+ stw r8, 32(r1)
+ stw r9, 36(r1)
+ stw r10,40(r1)
+ stw r3, 44(r1)
+ stw r5, 8(r1)
+ subi r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+ lwz r6, 8(r1)
+ lwz r0, 44(r1)
+ lwz r3, 12(r1)
+ mtctr r0
+ lwz r4, 16(r1)
+ mtcr r6
+ lwz r5, 20(r1)
+ lwz r6, 24(r1)
+ lwz r0, 52(r1)
+ lwz r7, 28(r1)
+ lwz r8, 32(r1)
+ mtlr r0
+ lwz r9, 36(r1)
+ lwz r10,40(r1)
+ addi r1, r1, 48
+ bctr
+#else
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+ stwu r1,-48(r1)
+ stw r3, 12(r1)
+ stw r4, 16(r1)
+ stw r5, 20(r1)
+ stw r6, 24(r1)
+ mflr r3
+ lwz r4, 52(r1)
+ mfcr r5
+ stw r7, 28(r1)
+ stw r8, 32(r1)
+ stw r9, 36(r1)
+ stw r10,40(r1)
+ stw r3, 44(r1)
+ stw r5, 8(r1)
+
+ subi r3, r3, MCOUNT_INSN_SIZE
+ LOAD_REG_ADDR(r5, ftrace_trace_function)
+ lwz r5,0(r5)
+
+ mtctr r5
+ bctrl
+
+ nop
+
+ lwz r6, 8(r1)
+ lwz r0, 44(r1)
+ lwz r3, 12(r1)
+ mtctr r0
+ lwz r4, 16(r1)
+ mtcr r6
+ lwz r5, 20(r1)
+ lwz r6, 24(r1)
+ lwz r0, 52(r1)
+ lwz r7, 28(r1)
+ lwz r8, 32(r1)
+ mtlr r0
+ lwz r9, 36(r1)
+ lwz r10,40(r1)
+ addi r1, r1, 48
+ bctr
+#endif
+
+_GLOBAL(ftrace_stub)
+ blr
+
+#endif /* CONFIG_MCOUNT */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index c0db5b769e55..2f511a969d2c 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -31,6 +31,7 @@
#include <asm/bug.h>
#include <asm/ptrace.h>
#include <asm/irqflags.h>
+#include <asm/ftrace.h>
/*
* System calls.
@@ -870,3 +871,67 @@ _GLOBAL(enter_prom)
ld r0,16(r1)
mtlr r0
blr
+
+#ifdef CONFIG_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ subi r3, r3, MCOUNT_INSN_SIZE
+ .globl mcount_call
+mcount_call:
+ bl ftrace_stub
+ nop
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+ blr
+
+_GLOBAL(ftrace_caller)
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ ld r11, 0(r1)
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ ld r4, 16(r11)
+ subi r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+_GLOBAL(ftrace_stub)
+ blr
+#else
+_GLOBAL(mcount)
+ blr
+
+_GLOBAL(_mcount)
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ ld r11, 0(r1)
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ ld r4, 16(r11)
+
+ subi r3, r3, MCOUNT_INSN_SIZE
+ LOAD_REG_ADDR(r5,ftrace_trace_function)
+ ld r5,0(r5)
+ ld r5,0(r5)
+ mtctr r5
+ bctrl
+
+ nop
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+_GLOBAL(ftrace_stub)
+ blr
+
+#endif
+#endif
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
new file mode 100644
index 000000000000..3855ceb937b0
--- /dev/null
+++ b/arch/powerpc/kernel/ftrace.c
@@ -0,0 +1,154 @@
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
+ *
+ */
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ftrace.h>
+
+
+static unsigned int ftrace_nop = 0x60000000;
+
+#ifdef CONFIG_PPC32
+# define GET_ADDR(addr) addr
+#else
+/* PowerPC64's functions are data that points to the functions */
+# define GET_ADDR(addr) *(unsigned long *)addr
+#endif
+
+
+static unsigned int notrace ftrace_calc_offset(long ip, long addr)
+{
+ return (int)(addr - ip);
+}
+
+notrace unsigned char *ftrace_nop_replace(void)
+{
+ return (char *)&ftrace_nop;
+}
+
+notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+ static unsigned int op;
+
+ /*
+ * It would be nice to just use create_function_call, but that will
+ * update the code itself. Here we need to just return the
+ * instruction that is going to be modified, without modifying the
+ * code.
+ */
+ addr = GET_ADDR(addr);
+
+ /* Set to "bl addr" */
+ op = 0x48000001 | (ftrace_calc_offset(ip, addr) & 0x03fffffc);
+
+ /*
+ * No locking needed, this must be called via kstop_machine
+ * which in essence is like running on a uniprocessor machine.
+ */
+ return (unsigned char *)&op;
+}
+
+#ifdef CONFIG_PPC64
+# define _ASM_ALIGN " .align 3 "
+# define _ASM_PTR " .llong "
+#else
+# define _ASM_ALIGN " .align 2 "
+# define _ASM_PTR " .long "
+#endif
+
+notrace int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+ unsigned char *new_code)
+{
+ unsigned replaced;
+ unsigned old = *(unsigned *)old_code;
+ unsigned new = *(unsigned *)new_code;
+ int faulted = 0;
+
+ /*
+ * Note: Due to modules and __init, code can
+ * disappear and change, we need to protect against faulting
+ * as well as code changing.
+ *
+ * No real locking needed, this code is run through
+ * kstop_machine.
+ */
+ asm volatile (
+ "1: lwz %1, 0(%2)\n"
+ " cmpw %1, %5\n"
+ " bne 2f\n"
+ " stwu %3, 0(%2)\n"
+ "2:\n"
+ ".section .fixup, \"ax\"\n"
+ "3: li %0, 1\n"
+ " b 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ _ASM_ALIGN "\n"
+ _ASM_PTR "1b, 3b\n"
+ ".previous"
+ : "=r"(faulted), "=r"(replaced)
+ : "r"(ip), "r"(new),
+ "0"(faulted), "r"(old)
+ : "memory");
+
+ if (replaced != old && replaced != new)
+ faulted = 2;
+
+ if (!faulted)
+ flush_icache_range(ip, ip + 8);
+
+ return faulted;
+}
+
+notrace int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+ int ret;
+
+ memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, (unsigned long)func);
+ ret = ftrace_modify_code(ip, old, new);
+
+ return ret;
+}
+
+notrace int ftrace_mcount_set(unsigned long *data)
+{
+ unsigned long ip = (long)(&mcount_call);
+ unsigned long *addr = data;
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+
+ /*
+ * Replace the mcount stub with a pointer to the
+ * ip recorder function.
+ */
+ memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, *addr);
+ *addr = ftrace_modify_code(ip, old, new);
+
+ return 0;
+}
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+ /* This is running in kstop_machine */
+
+ ftrace_mcount_set(data);
+
+ return 0;
+}
+
diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
index e31aca9208eb..1882bf419fa6 100644
--- a/arch/powerpc/kernel/io.c
+++ b/arch/powerpc/kernel/io.c
@@ -120,7 +120,8 @@ EXPORT_SYMBOL(_outsl_ns);
#define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0)
-void _memset_io(volatile void __iomem *addr, int c, unsigned long n)
+notrace void
+_memset_io(volatile void __iomem *addr, int c, unsigned long n)
{
void *p = (void __force *)addr;
u32 lc = c;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index bcc249d90c4d..dcc946e67099 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -98,7 +98,7 @@ EXPORT_SYMBOL(irq_desc);
int distribute_irqs = 1;
-static inline unsigned long get_hard_enabled(void)
+static inline notrace unsigned long get_hard_enabled(void)
{
unsigned long enabled;
@@ -108,13 +108,13 @@ static inline unsigned long get_hard_enabled(void)
return enabled;
}
-static inline void set_soft_enabled(unsigned long enable)
+static inline notrace void set_soft_enabled(unsigned long enable)
{
__asm__ __volatile__("stb %0,%1(13)"
: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
}
-void raw_local_irq_restore(unsigned long en)
+notrace void raw_local_irq_restore(unsigned long en)
{
/*
* get_paca()->soft_enabled = en;
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index d3ac631cbd26..a8d02506468a 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -42,6 +42,7 @@
#include <asm/div64.h>
#include <asm/signal.h>
#include <asm/dcr.h>
+#include <asm/ftrace.h>
#ifdef CONFIG_PPC32
extern void transfer_to_handler(void);
@@ -67,6 +68,10 @@ EXPORT_SYMBOL(single_step_exception);
EXPORT_SYMBOL(sys_sigreturn);
#endif
+#ifdef CONFIG_FTRACE
+EXPORT_SYMBOL(_mcount);
+#endif
+
EXPORT_SYMBOL(strcpy);
EXPORT_SYMBOL(strncpy);
EXPORT_SYMBOL(strcat);
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 5112a4aa801d..19e8fcb9cea8 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -81,7 +81,7 @@ int ucache_bsize;
* from the address that it was linked at, so we must use RELOC/PTRRELOC
* to access static data (including strings). -- paulus
*/
-unsigned long __init early_init(unsigned long dt_ptr)
+notrace unsigned long __init early_init(unsigned long dt_ptr)
{
unsigned long offset = reloc_offset();
struct cpu_spec *spec;
@@ -111,7 +111,7 @@ unsigned long __init early_init(unsigned long dt_ptr)
* This is called very early on the boot process, after a minimal
* MMU environment has been set up but before MMU_init is called.
*/
-void __init machine_init(unsigned long dt_ptr, unsigned long phys)
+notrace void __init machine_init(unsigned long dt_ptr, unsigned long phys)
{
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
@@ -133,7 +133,7 @@ void __init machine_init(unsigned long dt_ptr, unsigned long phys)
#ifdef CONFIG_BOOKE_WDT
/* Checks wdt=x and wdt_period=xx command-line option */
-int __init early_parse_wdt(char *p)
+notrace int __init early_parse_wdt(char *p)
{
if (p && strncmp(p, "0", 1) != 0)
booke_wdt_enabled = 1;
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index 4d72c8f72159..89774177b209 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -1,5 +1,10 @@
CFLAGS_bootx_init.o += -fPIC
+ifdef CONFIG_FTRACE
+# Do not trace early boot code
+CFLAGS_REMOVE_bootx_init.o = -pg
+endif
+
obj-y += pic.o setup.o time.o feature.o pci.o \
sleep.o low_i2c.o cache.o pfunc_core.o \
pfunc_base.o
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index eb36f3b746b8..fca9246470b1 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -11,6 +11,8 @@ config SPARC
config SPARC64
bool
default y
+ select HAVE_DYNAMIC_FTRACE
+ select HAVE_FTRACE
select HAVE_IDE
select HAVE_LMB
select HAVE_ARCH_KGDB
diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug
index 6a4d28a4076d..d6d32d178fc8 100644
--- a/arch/sparc64/Kconfig.debug
+++ b/arch/sparc64/Kconfig.debug
@@ -33,7 +33,7 @@ config DEBUG_PAGEALLOC
config MCOUNT
bool
- depends on STACK_DEBUG
+ depends on STACK_DEBUG || FTRACE
default y
config FRAME_POINTER
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index ec4f5ebb1ca6..418b5782096e 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -14,6 +14,7 @@ obj-y := process.o setup.o cpu.o idprom.o \
power.o sbus.o sparc64_ksyms.o chmc.o \
visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_PCI) += ebus.o pci_common.o \
pci_psycho.o pci_sabre.o pci_schizo.o \
diff --git a/arch/sparc64/kernel/ftrace.c b/arch/sparc64/kernel/ftrace.c
new file mode 100644
index 000000000000..4298d0aee713
--- /dev/null
+++ b/arch/sparc64/kernel/ftrace.c
@@ -0,0 +1,94 @@
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/ftrace.h>
+
+static const u32 ftrace_nop = 0x01000000;
+
+notrace unsigned char *ftrace_nop_replace(void)
+{
+ return (char *)&ftrace_nop;
+}
+
+notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+ static u32 call;
+ s32 off;
+
+ off = ((s32)addr - (s32)ip);
+ call = 0x40000000 | ((u32)off >> 2);
+
+ return (unsigned char *) &call;
+}
+
+notrace int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+ unsigned char *new_code)
+{
+ u32 old = *(u32 *)old_code;
+ u32 new = *(u32 *)new_code;
+ u32 replaced;
+ int faulted;
+
+ __asm__ __volatile__(
+ "1: cas [%[ip]], %[old], %[new]\n"
+ " flush %[ip]\n"
+ " mov 0, %[faulted]\n"
+ "2:\n"
+ " .section .fixup,#alloc,#execinstr\n"
+ " .align 4\n"
+ "3: sethi %%hi(2b), %[faulted]\n"
+ " jmpl %[faulted] + %%lo(2b), %%g0\n"
+ " mov 1, %[faulted]\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .word 1b, 3b\n"
+ " .previous\n"
+ : "=r" (replaced), [faulted] "=r" (faulted)
+ : [new] "0" (new), [old] "r" (old), [ip] "r" (ip)
+ : "memory");
+
+ if (replaced != old && replaced != new)
+ faulted = 2;
+
+ return faulted;
+}
+
+notrace int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+
+ memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, (unsigned long)func);
+ return ftrace_modify_code(ip, old, new);
+}
+
+notrace int ftrace_mcount_set(unsigned long *data)
+{
+ unsigned long ip = (long)(&mcount_call);
+ unsigned long *addr = data;
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+
+ /*
+ * Replace the mcount stub with a pointer to the
+ * ip recorder function.
+ */
+ memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, *addr);
+ *addr = ftrace_modify_code(ip, old, new);
+
+ return 0;
+}
+
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+ ftrace_mcount_set(data);
+ return 0;
+}
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 8ac0b99f2c55..49d3ea50c247 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -53,6 +53,7 @@
#include <asm/ns87303.h>
#include <asm/timer.h>
#include <asm/cpudata.h>
+#include <asm/ftrace.h>
struct poll {
int fd;
@@ -111,8 +112,7 @@ EXPORT_SYMBOL(__write_trylock);
EXPORT_SYMBOL(smp_call_function);
#endif /* CONFIG_SMP */
-#if defined(CONFIG_MCOUNT)
-extern void _mcount(void);
+#ifdef CONFIG_MCOUNT
EXPORT_SYMBOL(_mcount);
#endif
diff --git a/arch/sparc64/lib/mcount.S b/arch/sparc64/lib/mcount.S
index 9e4534b485c7..7735a7a60533 100644
--- a/arch/sparc64/lib/mcount.S
+++ b/arch/sparc64/lib/mcount.S
@@ -28,10 +28,13 @@ ovstack:
.skip OVSTACKSIZE
#endif
.text
- .align 32
- .globl mcount, _mcount
-mcount:
+ .align 32
+ .globl _mcount
+ .type _mcount,#function
+ .globl mcount
+ .type mcount,#function
_mcount:
+mcount:
#ifdef CONFIG_STACK_DEBUG
/*
* Check whether %sp is dangerously low.
@@ -55,6 +58,53 @@ _mcount:
or %g3, %lo(panicstring), %o0
call prom_halt
nop
+1:
+#endif
+#ifdef CONFIG_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+ mov %o7, %o0
+ .globl mcount_call
+mcount_call:
+ call ftrace_stub
+ mov %o0, %o7
+#else
+ sethi %hi(ftrace_trace_function), %g1
+ sethi %hi(ftrace_stub), %g2
+ ldx [%g1 + %lo(ftrace_trace_function)], %g1
+ or %g2, %lo(ftrace_stub), %g2
+ cmp %g1, %g2
+ be,pn %icc, 1f
+ mov %i7, %o1
+ jmpl %g1, %g0
+ mov %o7, %o0
+ /* not reached */
+1:
#endif
-1: retl
+#endif
+ retl
nop
+ .size _mcount,.-_mcount
+ .size mcount,.-mcount
+
+#ifdef CONFIG_FTRACE
+ .globl ftrace_stub
+ .type ftrace_stub,#function
+ftrace_stub:
+ retl
+ nop
+ .size ftrace_stub,.-ftrace_stub
+#ifdef CONFIG_DYNAMIC_FTRACE
+ .globl ftrace_caller
+ .type ftrace_caller,#function
+ftrace_caller:
+ mov %i7, %o1
+ mov %o7, %o0
+ .globl ftrace_call
+ftrace_call:
+ call ftrace_stub
+ mov %o0, %o7
+ retl
+ nop
+ .size ftrace_caller,.-ftrace_caller
+#endif
+#endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bf07b6f50fa1..c3a4c03c0800 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,6 +23,8 @@ config X86
select HAVE_OPROFILE
select HAVE_KPROBES
select HAVE_KRETPROBES
+ select HAVE_DYNAMIC_FTRACE
+ select HAVE_FTRACE
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 18363374d51a..f395fd537c5c 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -172,6 +172,14 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
+config PAGE_FAULT_HANDLERS
+ bool "Custom page fault handlers"
+ depends on DEBUG_KERNEL
+ help
+ Allow the use of custom page fault handlers. A kernel module may
+ register a function that is called on every page fault. Custom
+ handlers are used by some debugging and reverse engineering tools.
+
#
# IO delay types:
#
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 77807d4769c9..5ff67208d4ae 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,6 +6,13 @@ extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds
CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
+ifdef CONFIG_FTRACE
+# Do not profile debug utilities
+CFLAGS_REMOVE_tsc_64.o = -pg
+CFLAGS_REMOVE_tsc_32.o = -pg
+CFLAGS_REMOVE_rtc.o = -pg
+endif
+
#
# vsyscalls (which work on the user stack) should have
# no stack-protector checks:
@@ -56,6 +63,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o
obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 65c7857a90dd..2763cb37b553 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,6 +1,6 @@
#include <linux/module.h>
#include <linux/sched.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/kprobes.h>
#include <linux/mm.h>
@@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
#ifdef CONFIG_X86_64
extern char __vsyscall_0;
-static inline const unsigned char*const * find_nop_table(void)
+const unsigned char *const *find_nop_table(void)
{
return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
@@ -162,7 +162,7 @@ static const struct nop {
{ -1, NULL }
};
-static const unsigned char*const * find_nop_table(void)
+const unsigned char *const *find_nop_table(void)
{
const unsigned char *const *noptable = intel_nops;
int i;
@@ -279,7 +279,7 @@ struct smp_alt_module {
struct list_head next;
};
static LIST_HEAD(smp_alt_modules);
-static DEFINE_SPINLOCK(smp_alt);
+static DEFINE_MUTEX(smp_alt);
static int smp_mode = 1; /* protected by smp_alt */
void alternatives_smp_module_add(struct module *mod, char *name,
@@ -312,12 +312,12 @@ void alternatives_smp_module_add(struct module *mod, char *name,
__func__, smp->locks, smp->locks_end,
smp->text, smp->text_end, smp->name);
- spin_lock(&smp_alt);
+ mutex_lock(&smp_alt);
list_add_tail(&smp->next, &smp_alt_modules);
if (boot_cpu_has(X86_FEATURE_UP))
alternatives_smp_unlock(smp->locks, smp->locks_end,
smp->text, smp->text_end);
- spin_unlock(&smp_alt);
+ mutex_unlock(&smp_alt);
}
void alternatives_smp_module_del(struct module *mod)
@@ -327,17 +327,17 @@ void alternatives_smp_module_del(struct module *mod)
if (smp_alt_once || noreplace_smp)
return;
- spin_lock(&smp_alt);
+ mutex_lock(&smp_alt);
list_for_each_entry(item, &smp_alt_modules, next) {
if (mod != item->mod)
continue;
list_del(&item->next);
- spin_unlock(&smp_alt);
+ mutex_unlock(&smp_alt);
DPRINTK("%s: %s\n", __func__, item->name);
kfree(item);
return;
}
- spin_unlock(&smp_alt);
+ mutex_unlock(&smp_alt);
}
void alternatives_smp_switch(int smp)
@@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp)
return;
BUG_ON(!smp && (num_online_cpus() > 1));
- spin_lock(&smp_alt);
+ mutex_lock(&smp_alt);
/*
* Avoid unnecessary switches because it forces JIT based VMs to
@@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp)
mod->text, mod->text_end);
}
smp_mode = smp;
- spin_unlock(&smp_alt);
+ mutex_unlock(&smp_alt);
}
#endif
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c778e4fa55a2..95e6bbe3665e 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -51,6 +51,7 @@
#include <asm/percpu.h>
#include <asm/dwarf2.h>
#include <asm/processor-flags.h>
+#include <asm/ftrace.h>
#include "irq_vectors.h"
/*
@@ -1110,6 +1111,77 @@ ENDPROC(xen_failsafe_callback)
#endif /* CONFIG_XEN */
+#ifdef CONFIG_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+ENTRY(mcount)
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %eax
+ subl $MCOUNT_INSN_SIZE, %eax
+
+.globl mcount_call
+mcount_call:
+ call ftrace_stub
+
+ popl %edx
+ popl %ecx
+ popl %eax
+
+ ret
+END(mcount)
+
+ENTRY(ftrace_caller)
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %eax
+ movl 0x4(%ebp), %edx
+ subl $MCOUNT_INSN_SIZE, %eax
+
+.globl ftrace_call
+ftrace_call:
+ call ftrace_stub
+
+ popl %edx
+ popl %ecx
+ popl %eax
+
+.globl ftrace_stub
+ftrace_stub:
+ ret
+END(ftrace_caller)
+
+#else /* ! CONFIG_DYNAMIC_FTRACE */
+
+ENTRY(mcount)
+ cmpl $ftrace_stub, ftrace_trace_function
+ jnz trace
+.globl ftrace_stub
+ftrace_stub:
+ ret
+
+ /* taken from glibc */
+trace:
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %eax
+ movl 0x4(%ebp), %edx
+ subl $MCOUNT_INSN_SIZE, %eax
+
+ call *ftrace_trace_function
+
+ popl %edx
+ popl %ecx
+ popl %eax
+
+ jmp ftrace_stub
+END(mcount)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FTRACE */
+
.section .rodata,"a"
#include "syscall_table_32.S"
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 556a8df522a7..b0f7308f78a6 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -51,9 +51,115 @@
#include <asm/page.h>
#include <asm/irqflags.h>
#include <asm/paravirt.h>
+#include <asm/ftrace.h>
.code64
+#ifdef CONFIG_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+ENTRY(mcount)
+
+ subq $0x38, %rsp
+ movq %rax, (%rsp)
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 24(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r8, 40(%rsp)
+ movq %r9, 48(%rsp)
+
+ movq 0x38(%rsp), %rdi
+ subq $MCOUNT_INSN_SIZE, %rdi
+
+.globl mcount_call
+mcount_call:
+ call ftrace_stub
+
+ movq 48(%rsp), %r9
+ movq 40(%rsp), %r8
+ movq 32(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rdx
+ movq 8(%rsp), %rcx
+ movq (%rsp), %rax
+ addq $0x38, %rsp
+
+ retq
+END(mcount)
+
+ENTRY(ftrace_caller)
+
+ /* taken from glibc */
+ subq $0x38, %rsp
+ movq %rax, (%rsp)
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 24(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r8, 40(%rsp)
+ movq %r9, 48(%rsp)
+
+ movq 0x38(%rsp), %rdi
+ movq 8(%rbp), %rsi
+ subq $MCOUNT_INSN_SIZE, %rdi
+
+.globl ftrace_call
+ftrace_call:
+ call ftrace_stub
+
+ movq 48(%rsp), %r9
+ movq 40(%rsp), %r8
+ movq 32(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rdx
+ movq 8(%rsp), %rcx
+ movq (%rsp), %rax
+ addq $0x38, %rsp
+
+.globl ftrace_stub
+ftrace_stub:
+ retq
+END(ftrace_caller)
+
+#else /* ! CONFIG_DYNAMIC_FTRACE */
+ENTRY(mcount)
+ cmpq $ftrace_stub, ftrace_trace_function
+ jnz trace
+.globl ftrace_stub
+ftrace_stub:
+ retq
+
+trace:
+ /* taken from glibc */
+ subq $0x38, %rsp
+ movq %rax, (%rsp)
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 24(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r8, 40(%rsp)
+ movq %r9, 48(%rsp)
+
+ movq 0x38(%rsp), %rdi
+ movq 8(%rbp), %rsi
+ subq $MCOUNT_INSN_SIZE, %rdi
+
+ call *ftrace_trace_function
+
+ movq 48(%rsp), %r9
+ movq 40(%rsp), %r8
+ movq 32(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rdx
+ movq 8(%rsp), %rcx
+ movq (%rsp), %rax
+ addq $0x38, %rsp
+
+ jmp ftrace_stub
+END(mcount)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FTRACE */
+
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
new file mode 100644
index 000000000000..ab115cd15fdf
--- /dev/null
+++ b/arch/x86/kernel/ftrace.c
@@ -0,0 +1,141 @@
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes to Ingo Molnar, for suggesting the idea.
+ * Mathieu Desnoyers, for suggesting postponing the modifications.
+ * Arjan van de Ven, for keeping me straight, and explaining to me
+ * the dangers of modifying code on the run.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/alternative.h>
+#include <asm/ftrace.h>
+
+
+/* Long is fine, even if it is only 4 bytes ;-) */
+static long *ftrace_nop;
+
+union ftrace_code_union {
+ char code[MCOUNT_INSN_SIZE];
+ struct {
+ char e8;
+ int offset;
+ } __attribute__((packed));
+};
+
+
+static int notrace ftrace_calc_offset(long ip, long addr)
+{
+ return (int)(addr - ip);
+}
+
+notrace unsigned char *ftrace_nop_replace(void)
+{
+ return (char *)ftrace_nop;
+}
+
+notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+ static union ftrace_code_union calc;
+
+ calc.e8 = 0xe8;
+ calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
+
+ /*
+ * No locking needed, this must be called via kstop_machine
+ * which in essence is like running on a uniprocessor machine.
+ */
+ return calc.code;
+}
+
+notrace int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+ unsigned char *new_code)
+{
+ unsigned replaced;
+ unsigned old = *(unsigned *)old_code; /* 4 bytes */
+ unsigned new = *(unsigned *)new_code; /* 4 bytes */
+ unsigned char newch = new_code[4];
+ int faulted = 0;
+
+ /*
+ * Note: Due to modules and __init, code can
+ * disappear and change, we need to protect against faulting
+ * as well as code changing.
+ *
+ * No real locking needed, this code is run through
+ * kstop_machine.
+ */
+ asm volatile (
+ "1: lock\n"
+ " cmpxchg %3, (%2)\n"
+ " jnz 2f\n"
+ " movb %b4, 4(%2)\n"
+ "2:\n"
+ ".section .fixup, \"ax\"\n"
+ "3: movl $1, %0\n"
+ " jmp 2b\n"
+ ".previous\n"
+ _ASM_EXTABLE(1b, 3b)
+ : "=r"(faulted), "=a"(replaced)
+ : "r"(ip), "r"(new), "c"(newch),
+ "0"(faulted), "a"(old)
+ : "memory");
+ sync_core();
+
+ if (replaced != old && replaced != new)
+ faulted = 2;
+
+ return faulted;
+}
+
+notrace int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+ int ret;
+
+ memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, (unsigned long)func);
+ ret = ftrace_modify_code(ip, old, new);
+
+ return ret;
+}
+
+notrace int ftrace_mcount_set(unsigned long *data)
+{
+ unsigned long ip = (long)(&mcount_call);
+ unsigned long *addr = data;
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+
+ /*
+ * Replace the mcount stub with a pointer to the
+ * ip recorder function.
+ */
+ memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, *addr);
+ *addr = ftrace_modify_code(ip, old, new);
+
+ return 0;
+}
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+ const unsigned char *const *noptable = find_nop_table();
+
+ /* This is running in kstop_machine */
+
+ ftrace_mcount_set(data);
+
+ ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
+
+ return 0;
+}
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index deb43785e923..dd7ebee446af 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -1,7 +1,14 @@
#include <linux/module.h>
+
#include <asm/checksum.h>
-#include <asm/desc.h>
#include <asm/pgtable.h>
+#include <asm/desc.h>
+#include <asm/ftrace.h>
+
+#ifdef CONFIG_FTRACE
+/* mcount is defined in assembly */
+EXPORT_SYMBOL(mcount);
+#endif
/* Networking helper routines. */
EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index d0b234c9fc31..88923fd7a6fc 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -11,6 +11,8 @@
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/numa.h>
+#include <linux/ftrace.h>
+
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
@@ -107,6 +109,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
unsigned long page_list[PAGES_NR];
void *control_page;
+ tracer_disable();
+
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 576a03db4511..1558fdc174f9 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -11,6 +11,8 @@
#include <linux/string.h>
#include <linux/reboot.h>
#include <linux/numa.h>
+#include <linux/ftrace.h>
+
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
@@ -184,6 +186,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
unsigned long page_list[PAGES_NR];
void *control_page;
+ tracer_disable();
+
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index e2db9ac5c61c..347a7aba8b16 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -185,7 +185,10 @@ void cpu_idle(void)
local_irq_disable();
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
+ /* Don't trace irqs off for idle */
+ stop_critical_timings();
idle();
+ start_critical_timings();
}
tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c6eb5c91e5f6..ea090e6cfe39 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -165,7 +165,10 @@ void cpu_idle(void)
*/
local_irq_disable();
enter_idle();
+ /* Don't trace irqs off for idle */
+ stop_critical_timings();
idle();
+ start_critical_timings();
/* In many cases the interrupt that ended idle
has already called exit_idle. But some idle
loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 61efa2f7d564..4063dfa2a02d 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -42,7 +42,8 @@
#include <asm/topology.h>
#include <asm/vgtod.h>
-#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
+#define __vsyscall(nr) \
+ __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
#define __syscall_clobber "r11","cx","memory"
/*
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index f6c05d0410fb..16ff4bf418d9 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -2,13 +2,20 @@
All C exports should go in the respective C files. */
#include <linux/module.h>
-#include <net/checksum.h>
#include <linux/smp.h>
+#include <net/checksum.h>
+
#include <asm/processor.h>
-#include <asm/uaccess.h>
#include <asm/pgtable.h>
+#include <asm/uaccess.h>
#include <asm/desc.h>
+#include <asm/ftrace.h>
+
+#ifdef CONFIG_FTRACE
+/* mcount is defined in assembly */
+EXPORT_SYMBOL(mcount);
+#endif
EXPORT_SYMBOL(kernel_thread);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 76f60f52a885..84aa2883fe15 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -5,6 +5,7 @@
obj-$(CONFIG_SMP) := msr-on-cpu.o
lib-y := delay_$(BITS).o
+lib-y += thunk_$(BITS).o
lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o
lib-y += memcpy_$(BITS).o
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
new file mode 100644
index 000000000000..650b11e00ecc
--- /dev/null
+++ b/arch/x86/lib/thunk_32.S
@@ -0,0 +1,47 @@
+/*
+ * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash)
+ * Copyright 2008 by Steven Rostedt, Red Hat, Inc
+ * (inspired by Andi Kleen's thunk_64.S)
+ * Subject to the GNU public license, v.2. No warranty of any kind.
+ */
+
+ #include <linux/linkage.h>
+
+#define ARCH_TRACE_IRQS_ON \
+ pushl %eax; \
+ pushl %ecx; \
+ pushl %edx; \
+ call trace_hardirqs_on; \
+ popl %edx; \
+ popl %ecx; \
+ popl %eax;
+
+#define ARCH_TRACE_IRQS_OFF \
+ pushl %eax; \
+ pushl %ecx; \
+ pushl %edx; \
+ call trace_hardirqs_off; \
+ popl %edx; \
+ popl %ecx; \
+ popl %eax;
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* put return address in eax (arg1) */
+ .macro thunk_ra name,func
+ .globl \name
+\name:
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ /* Place EIP in the arg1 */
+ movl 3*4(%esp), %eax
+ call \func
+ popl %edx
+ popl %ecx
+ popl %eax
+ ret
+ .endm
+
+ thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
+ thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
+#endif
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index e009251d4e9f..bf9a7d5a5428 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -2,6 +2,7 @@
* Save registers before calling assembly functions. This avoids
* disturbance of register allocation in some inline assembly constructs.
* Copyright 2001,2002 by Andi Kleen, SuSE Labs.
+ * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
* Subject to the GNU public license, v.2. No warranty of any kind.
*/
@@ -42,8 +43,22 @@
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
- thunk trace_hardirqs_on_thunk,trace_hardirqs_on
- thunk trace_hardirqs_off_thunk,trace_hardirqs_off
+ /* put return address in rdi (arg1) */
+ .macro thunk_ra name,func
+ .globl \name
+\name:
+ CFI_STARTPROC
+ SAVE_ARGS
+ /* SAVE_ARGS pushs 9 elements */
+ /* the next element would be the rip */
+ movq 9*8(%rsp), %rdi
+ call \func
+ jmp restore
+ CFI_ENDPROC
+ .endm
+
+ thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
+ thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8bcb6f40ccb6..42394b353c6a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -49,6 +49,60 @@
#define PF_RSVD (1<<3)
#define PF_INSTR (1<<4)
+#ifdef CONFIG_PAGE_FAULT_HANDLERS
+static HLIST_HEAD(pf_handlers); /* protected by RCU */
+static DEFINE_SPINLOCK(pf_handlers_writer);
+
+void register_page_fault_handler(struct pf_handler *new_pfh)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&pf_handlers_writer, flags);
+ hlist_add_head_rcu(&new_pfh->hlist, &pf_handlers);
+ spin_unlock_irqrestore(&pf_handlers_writer, flags);
+}
+EXPORT_SYMBOL_GPL(register_page_fault_handler);
+
+/**
+ * unregister_page_fault_handler:
+ * The caller must ensure @old_pfh is not in use anymore before freeing it.
+ * This function does not guarantee it. The list of handlers is protected by
+ * RCU, so you can do this by e.g. calling synchronize_rcu().
+ */
+void unregister_page_fault_handler(struct pf_handler *old_pfh)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&pf_handlers_writer, flags);
+ hlist_del_rcu(&old_pfh->hlist);
+ spin_unlock_irqrestore(&pf_handlers_writer, flags);
+}
+EXPORT_SYMBOL_GPL(unregister_page_fault_handler);
+#endif
+
+/* returns non-zero if do_page_fault() should return */
+static int handle_custom_pf(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
+{
+#ifdef CONFIG_PAGE_FAULT_HANDLERS
+ int ret = 0;
+ struct pf_handler *cur;
+ struct hlist_node *ncur;
+
+ if (hlist_empty(&pf_handlers))
+ return 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(cur, ncur, &pf_handlers, hlist) {
+ ret = cur->handler(regs, error_code, address);
+ if (ret)
+ break;
+ }
+ rcu_read_unlock();
+ return ret;
+#else
+ return 0;
+#endif
+}
+
static inline int notify_page_fault(struct pt_regs *regs)
{
#ifdef CONFIG_KPROBES
@@ -606,6 +660,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (notify_page_fault(regs))
return;
+ if (handle_custom_pf(regs, error_code, address))
+ return;
/*
* We fault-in kernel-space virtual memory on-demand. The
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ec30d10154b6..f96eca21ad8f 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -710,6 +710,8 @@ void mark_rodata_ro(void)
unsigned long start = PFN_ALIGN(_text);
unsigned long size = PFN_ALIGN(_etext) - start;
+#ifndef CONFIG_DYNAMIC_FTRACE
+ /* Dynamic tracing modifies the kernel text section */
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
printk(KERN_INFO "Write protecting the kernel text: %luk\n",
size >> 10);
@@ -722,6 +724,8 @@ void mark_rodata_ro(void)
printk(KERN_INFO "Testing CPA: write protecting again\n");
set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
#endif
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
start += size;
size = (unsigned long)__end_rodata - start;
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 819dad973b13..17c0a6138a53 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -767,6 +767,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
void mark_rodata_ro(void)
{
unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
+ unsigned long rodata_start =
+ ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ /* Dynamic tracing modifies the kernel text section */
+ start = rodata_start;
+#endif
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
@@ -776,8 +783,7 @@ void mark_rodata_ro(void)
* The rodata section (but not the kernel text!) should also be
* not-executable.
*/
- start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
- set_memory_nx(start, (end - start) >> PAGE_SHIFT);
+ set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
rodata_test();
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index efa2ba7c6005..1ef0f90813d6 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -23,7 +23,7 @@
#define gtod vdso_vsyscall_gtod_data
-static long vdso_fallback_gettime(long clock, struct timespec *ts)
+notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
asm("syscall" : "=a" (ret) :
@@ -31,7 +31,7 @@ static long vdso_fallback_gettime(long clock, struct timespec *ts)
return ret;
}
-static inline long vgetns(void)
+notrace static inline long vgetns(void)
{
long v;
cycles_t (*vread)(void);
@@ -40,7 +40,7 @@ static inline long vgetns(void)
return (v * gtod->clock.mult) >> gtod->clock.shift;
}
-static noinline int do_realtime(struct timespec *ts)
+notrace static noinline int do_realtime(struct timespec *ts)
{
unsigned long seq, ns;
do {
@@ -54,7 +54,8 @@ static noinline int do_realtime(struct timespec *ts)
}
/* Copy of the version in kernel/time.c which we cannot directly access */
-static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
+notrace static void
+vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
{
while (nsec >= NSEC_PER_SEC) {
nsec -= NSEC_PER_SEC;
@@ -68,7 +69,7 @@ static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
ts->tv_nsec = nsec;
}
-static noinline int do_monotonic(struct timespec *ts)
+notrace static noinline int do_monotonic(struct timespec *ts)
{
unsigned long seq, ns, secs;
do {
@@ -82,7 +83,7 @@ static noinline int do_monotonic(struct timespec *ts)
return 0;
}
-int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
{
if (likely(gtod->sysctl_enabled && gtod->clock.vread))
switch (clock) {
@@ -96,7 +97,7 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
int clock_gettime(clockid_t, struct timespec *)
__attribute__((weak, alias("__vdso_clock_gettime")));
-int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{
long ret;
if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index c8097f17f8a9..9fbc6b20026b 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -13,7 +13,8 @@
#include <asm/vgtod.h>
#include "vextern.h"
-long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
+notrace long
+__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
{
unsigned int p;
diff --git a/include/asm-arm/ftrace.h b/include/asm-arm/ftrace.h
new file mode 100644
index 000000000000..584ef9a8e5a5
--- /dev/null
+++ b/include/asm-arm/ftrace.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_ARM_FTRACE
+#define _ASM_ARM_FTRACE
+
+#ifdef CONFIG_FTRACE
+#define MCOUNT_ADDR ((long)(mcount))
+#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void mcount(void);
+#endif
+
+#endif
+
+#endif /* _ASM_ARM_FTRACE */
diff --git a/include/asm-arm/kprobes.h b/include/asm-arm/kprobes.h
index c042194d3ab5..b1a37876942d 100644
--- a/include/asm-arm/kprobes.h
+++ b/include/asm-arm/kprobes.h
@@ -59,6 +59,7 @@ struct kprobe_ctlblk {
};
void arch_remove_kprobe(struct kprobe *);
+void kretprobe_trampoline(void);
int kprobe_trap_handler(struct pt_regs *regs, unsigned int instr);
int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
diff --git a/include/asm-powerpc/ftrace.h b/include/asm-powerpc/ftrace.h
new file mode 100644
index 000000000000..de921326cca8
--- /dev/null
+++ b/include/asm-powerpc/ftrace.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_POWERPC_FTRACE
+#define _ASM_POWERPC_FTRACE
+
+#ifdef CONFIG_FTRACE
+#define MCOUNT_ADDR ((long)(_mcount))
+#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void _mcount(void);
+#endif
+
+#endif
+
+#endif /* _ASM_POWERPC_FTRACE */
diff --git a/include/asm-powerpc/hw_irq.h b/include/asm-powerpc/hw_irq.h
index ad8c9f7fd0e3..f75a5fc64d2e 100644
--- a/include/asm-powerpc/hw_irq.h
+++ b/include/asm-powerpc/hw_irq.h
@@ -59,6 +59,11 @@ extern void iseries_handle_interrupts(void);
get_paca()->hard_enabled = 0; \
} while(0)
+static inline int irqs_disabled_flags(unsigned long flags)
+{
+ return flags == 0;
+}
+
#else
#if defined(CONFIG_BOOKE)
@@ -113,6 +118,11 @@ static inline void local_irq_save_ptr(unsigned long *flags)
#define hard_irq_enable() local_irq_enable()
#define hard_irq_disable() local_irq_disable()
+static inline int irqs_disabled_flags(unsigned long flags)
+{
+ return (flags & MSR_EE) == 0;
+}
+
#endif /* CONFIG_PPC64 */
/*
diff --git a/include/asm-sparc64/ftrace.h b/include/asm-sparc64/ftrace.h
new file mode 100644
index 000000000000..d27716cd38c1
--- /dev/null
+++ b/include/asm-sparc64/ftrace.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_SPARC64_FTRACE
+#define _ASM_SPARC64_FTRACE
+
+#ifdef CONFIG_MCOUNT
+#define MCOUNT_ADDR ((long)(_mcount))
+#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void _mcount(void);
+#endif
+
+#endif
+
+#endif /* _ASM_SPARC64_FTRACE */
diff --git a/include/asm-x86/alternative.h b/include/asm-x86/alternative.h
index 1f6a9ca10126..f6aa18eadf71 100644
--- a/include/asm-x86/alternative.h
+++ b/include/asm-x86/alternative.h
@@ -72,6 +72,8 @@ static inline void alternatives_smp_module_del(struct module *mod) {}
static inline void alternatives_smp_switch(int smp) {}
#endif /* CONFIG_SMP */
+const unsigned char *const *find_nop_table(void);
+
/*
* Alternative instructions for different CPU types or capabilities.
*
diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h
new file mode 100644
index 000000000000..c184441133f2
--- /dev/null
+++ b/include/asm-x86/ftrace.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_X86_FTRACE
+#define _ASM_SPARC64_FTRACE
+
+#ifdef CONFIG_FTRACE
+#define MCOUNT_ADDR ((long)(mcount))
+#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void mcount(void);
+#endif
+
+#endif /* CONFIG_FTRACE */
+
+#endif /* _ASM_X86_FTRACE */
diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index c242527f970e..24d71b1eb189 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -179,8 +179,6 @@ static inline void trace_hardirqs_fixup(void)
* have a reliable stack. x86_64 only.
*/
#define SWAPGS_UNSAFE_STACK swapgs
-#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk
-#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk
#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
#define ARCH_LOCKDEP_SYS_EXIT_IRQ \
TRACE_IRQS_ON; \
@@ -192,24 +190,6 @@ static inline void trace_hardirqs_fixup(void)
TRACE_IRQS_OFF;
#else
-#define ARCH_TRACE_IRQS_ON \
- pushl %eax; \
- pushl %ecx; \
- pushl %edx; \
- call trace_hardirqs_on; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-
-#define ARCH_TRACE_IRQS_OFF \
- pushl %eax; \
- pushl %ecx; \
- pushl %edx; \
- call trace_hardirqs_off; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-
#define ARCH_LOCKDEP_SYS_EXIT \
pushl %eax; \
pushl %ecx; \
@@ -223,8 +203,8 @@ static inline void trace_hardirqs_fixup(void)
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
-# define TRACE_IRQS_ON ARCH_TRACE_IRQS_ON
-# define TRACE_IRQS_OFF ARCH_TRACE_IRQS_OFF
+# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
+# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
#else
# define TRACE_IRQS_ON
# define TRACE_IRQS_OFF
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index 96651bb59ba1..a80f2d6cc737 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -35,4 +35,13 @@ extern void show_regs(struct pt_regs *regs);
extern unsigned long oops_begin(void);
extern void oops_end(unsigned long, struct pt_regs *, int signr);
+struct pf_handler {
+ struct hlist_node hlist;
+ int (*handler)(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address);
+};
+
+extern void register_page_fault_handler(struct pf_handler *new_pfh);
+extern void unregister_page_fault_handler(struct pf_handler *old_pfh);
+
#endif
diff --git a/include/asm-x86/vsyscall.h b/include/asm-x86/vsyscall.h
index 17b3700949bf..6b66ff905af0 100644
--- a/include/asm-x86/vsyscall.h
+++ b/include/asm-x86/vsyscall.h
@@ -24,7 +24,8 @@ enum vsyscall_num {
((unused, __section__ (".vsyscall_gtod_data"),aligned(16)))
#define __section_vsyscall_clock __attribute__ \
((unused, __section__ (".vsyscall_clock"),aligned(16)))
-#define __vsyscall_fn __attribute__ ((unused,__section__(".vsyscall_fn")))
+#define __vsyscall_fn \
+ __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace
#define VGETCPU_RDTSCP 1
#define VGETCPU_LSL 2
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
new file mode 100644
index 000000000000..3121b95443d9
--- /dev/null
+++ b/include/linux/ftrace.h
@@ -0,0 +1,143 @@
+#ifndef _LINUX_FTRACE_H
+#define _LINUX_FTRACE_H
+
+#ifdef CONFIG_FTRACE
+
+#include <linux/linkage.h>
+#include <linux/fs.h>
+
+extern int ftrace_enabled;
+extern int
+ftrace_enable_sysctl(struct ctl_table *table, int write,
+ struct file *filp, void __user *buffer, size_t *lenp,
+ loff_t *ppos);
+
+typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
+
+struct ftrace_ops {
+ ftrace_func_t func;
+ struct ftrace_ops *next;
+};
+
+/*
+ * The ftrace_ops must be a static and should also
+ * be read_mostly. These functions do modify read_mostly variables
+ * so use them sparely. Never free an ftrace_op or modify the
+ * next pointer after it has been registered. Even after unregistering
+ * it, the next pointer may still be used internally.
+ */
+int register_ftrace_function(struct ftrace_ops *ops);
+int unregister_ftrace_function(struct ftrace_ops *ops);
+void clear_ftrace_function(void);
+
+extern void ftrace_stub(unsigned long a0, unsigned long a1);
+
+#else /* !CONFIG_FTRACE */
+# define register_ftrace_function(ops) do { } while (0)
+# define unregister_ftrace_function(ops) do { } while (0)
+# define clear_ftrace_function(ops) do { } while (0)
+#endif /* CONFIG_FTRACE */
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+# define FTRACE_HASHBITS 10
+# define FTRACE_HASHSIZE (1<<FTRACE_HASHBITS)
+
+enum {
+ FTRACE_FL_FREE = (1 << 0),
+ FTRACE_FL_FAILED = (1 << 1),
+ FTRACE_FL_FILTER = (1 << 2),
+ FTRACE_FL_ENABLED = (1 << 3),
+ FTRACE_FL_NOTRACE = (1 << 4),
+ FTRACE_FL_CONVERTED = (1 << 5),
+ FTRACE_FL_FROZEN = (1 << 6),
+};
+
+struct dyn_ftrace {
+ struct hlist_node node;
+ unsigned long ip; /* address of mcount call-site */
+ unsigned long flags;
+};
+
+int ftrace_force_update(void);
+void ftrace_set_filter(unsigned char *buf, int len, int reset);
+
+/* defined in arch */
+extern int ftrace_ip_converted(unsigned long ip);
+extern unsigned char *ftrace_nop_replace(void);
+extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr);
+extern int ftrace_dyn_arch_init(void *data);
+extern int ftrace_mcount_set(unsigned long *data);
+extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+ unsigned char *new_code);
+extern int ftrace_update_ftrace_func(ftrace_func_t func);
+extern void ftrace_caller(void);
+extern void ftrace_call(void);
+extern void mcount_call(void);
+
+extern int skip_trace(unsigned long ip);
+
+void ftrace_disable_daemon(void);
+void ftrace_enable_daemon(void);
+
+#else
+# define skip_trace(ip) ({ 0; })
+# define ftrace_force_update() ({ 0; })
+# define ftrace_set_filter(buf, len, reset) do { } while (0)
+# define ftrace_disable_daemon() do { } while (0)
+# define ftrace_enable_daemon() do { } while (0)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+/* totally disable ftrace - can not re-enable after this */
+void ftrace_kill(void);
+
+static inline void tracer_disable(void)
+{
+#ifdef CONFIG_FTRACE
+ ftrace_enabled = 0;
+#endif
+}
+
+#ifdef CONFIG_FRAME_POINTER
+/* TODO: need to fix this for ARM */
+# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
+# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1))
+# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2))
+# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3))
+# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4))
+# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5))
+# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6))
+#else
+# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
+# define CALLER_ADDR1 0UL
+# define CALLER_ADDR2 0UL
+# define CALLER_ADDR3 0UL
+# define CALLER_ADDR4 0UL
+# define CALLER_ADDR5 0UL
+# define CALLER_ADDR6 0UL
+#endif
+
+#ifdef CONFIG_IRQSOFF_TRACER
+ extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
+ extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
+#else
+# define time_hardirqs_on(a0, a1) do { } while (0)
+# define time_hardirqs_off(a0, a1) do { } while (0)
+#endif
+
+#ifdef CONFIG_PREEMPT_TRACER
+ extern void trace_preempt_on(unsigned long a0, unsigned long a1);
+ extern void trace_preempt_off(unsigned long a0, unsigned long a1);
+#else
+# define trace_preempt_on(a0, a1) do { } while (0)
+# define trace_preempt_off(a0, a1) do { } while (0)
+#endif
+
+#ifdef CONFIG_TRACING
+extern void
+ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
+#else
+static inline void
+ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
+#endif
+
+#endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index e600c4e9b8c5..2b1c2e58566e 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -12,10 +12,10 @@
#define _LINUX_TRACE_IRQFLAGS_H
#ifdef CONFIG_TRACE_IRQFLAGS
- extern void trace_hardirqs_on(void);
- extern void trace_hardirqs_off(void);
extern void trace_softirqs_on(unsigned long ip);
extern void trace_softirqs_off(unsigned long ip);
+ extern void trace_hardirqs_on(void);
+ extern void trace_hardirqs_off(void);
# define trace_hardirq_context(p) ((p)->hardirq_context)
# define trace_softirq_context(p) ((p)->softirq_context)
# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled)
@@ -41,6 +41,15 @@
# define INIT_TRACE_IRQFLAGS
#endif
+#if defined(CONFIG_IRQSOFF_TRACER) || \
+ defined(CONFIG_PREEMPT_TRACER)
+ extern void stop_critical_timings(void);
+ extern void start_critical_timings(void);
+#else
+# define stop_critical_timings() do { } while (0)
+# define start_critical_timings() do { } while (0)
+#endif
+
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
#include <asm/irqflags.h>
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1036631ff4fa..04a3556bdea6 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -259,6 +259,10 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
struct jprobe;
struct kretprobe;
+static inline struct kprobe *get_kprobe(void *addr)
+{
+ return NULL;
+}
static inline struct kprobe *kprobe_running(void)
{
return NULL;
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 2119610b24f8..14f329c64ba8 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -3,6 +3,8 @@
#include <asm/linkage.h>
+#define notrace __attribute__((no_instrument_function))
+
#ifdef __cplusplus
#define CPP_ASMLINKAGE extern "C"
#else
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 430f6adf9762..1290653f9241 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -44,8 +44,8 @@ struct marker {
*/
char state; /* Marker state. */
char ptype; /* probe type : 0 : single, 1 : multi */
- void (*call)(const struct marker *mdata, /* Probe wrapper */
- void *call_private, const char *fmt, ...);
+ /* Probe wrapper */
+ void (*call)(const struct marker *mdata, void *call_private, ...);
struct marker_probe_closure single;
struct marker_probe_closure *multi;
} __attribute__((aligned(8)));
@@ -58,8 +58,12 @@ struct marker {
* Make sure the alignment of the structure in the __markers section will
* not add unwanted padding between the beginning of the section and the
* structure. Force alignment to the same alignment as the section start.
+ *
+ * The "generic" argument controls which marker enabling mechanism must be used.
+ * If generic is true, a variable read is used.
+ * If generic is false, immediate values are used.
*/
-#define __trace_mark(name, call_private, format, args...) \
+#define __trace_mark(generic, name, call_private, format, args...) \
do { \
static const char __mstrtab_##name[] \
__attribute__((section("__markers_strings"))) \
@@ -72,15 +76,14 @@ struct marker {
__mark_check_format(format, ## args); \
if (unlikely(__mark_##name.state)) { \
(*__mark_##name.call) \
- (&__mark_##name, call_private, \
- format, ## args); \
+ (&__mark_##name, call_private, ## args);\
} \
} while (0)
extern void marker_update_probe_range(struct marker *begin,
struct marker *end);
#else /* !CONFIG_MARKERS */
-#define __trace_mark(name, call_private, format, args...) \
+#define __trace_mark(generic, name, call_private, format, args...) \
__mark_check_format(format, ## args)
static inline void marker_update_probe_range(struct marker *begin,
struct marker *end)
@@ -88,15 +91,30 @@ static inline void marker_update_probe_range(struct marker *begin,
#endif /* CONFIG_MARKERS */
/**
- * trace_mark - Marker
+ * trace_mark - Marker using code patching
* @name: marker name, not quoted.
* @format: format string
* @args...: variable argument list
*
- * Places a marker.
+ * Places a marker using optimized code patching technique (imv_read())
+ * to be enabled when immediate values are present.
*/
#define trace_mark(name, format, args...) \
- __trace_mark(name, NULL, format, ## args)
+ __trace_mark(0, name, NULL, format, ## args)
+
+/**
+ * _trace_mark - Marker using variable read
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker using a standard memory read (_imv_read()) to be
+ * enabled. Should be used for markers in code paths where instruction
+ * modification based enabling is not welcome. (__init and __exit functions,
+ * lockdep, some traps, printk).
+ */
+#define _trace_mark(name, format, args...) \
+ __trace_mark(1, name, NULL, format, ## args)
/**
* MARK_NOARGS - Format string for a marker with no argument.
@@ -117,9 +135,9 @@ static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...)
extern marker_probe_func __mark_empty_function;
extern void marker_probe_cb(const struct marker *mdata,
- void *call_private, const char *fmt, ...);
+ void *call_private, ...);
extern void marker_probe_cb_noarg(const struct marker *mdata,
- void *call_private, const char *fmt, ...);
+ void *call_private, ...);
/*
* Connect a probe to a marker.
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 23f0c54175cd..72b1a10a59b6 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -10,7 +10,7 @@
#include <linux/linkage.h>
#include <linux/list.h>
-#ifdef CONFIG_DEBUG_PREEMPT
+#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
extern void add_preempt_count(int val);
extern void sub_preempt_count(int val);
#else
@@ -52,6 +52,34 @@ do { \
preempt_check_resched(); \
} while (0)
+/* For debugging and tracer internals only! */
+#define add_preempt_count_notrace(val) \
+ do { preempt_count() += (val); } while (0)
+#define sub_preempt_count_notrace(val) \
+ do { preempt_count() -= (val); } while (0)
+#define inc_preempt_count_notrace() add_preempt_count_notrace(1)
+#define dec_preempt_count_notrace() sub_preempt_count_notrace(1)
+
+#define preempt_disable_notrace() \
+do { \
+ inc_preempt_count_notrace(); \
+ barrier(); \
+} while (0)
+
+#define preempt_enable_no_resched_notrace() \
+do { \
+ barrier(); \
+ dec_preempt_count_notrace(); \
+} while (0)
+
+/* preempt_check_resched is OK to trace */
+#define preempt_enable_notrace() \
+do { \
+ preempt_enable_no_resched_notrace(); \
+ barrier(); \
+ preempt_check_resched(); \
+} while (0)
+
#else
#define preempt_disable() do { } while (0)
@@ -59,6 +87,10 @@ do { \
#define preempt_enable() do { } while (0)
#define preempt_check_resched() do { } while (0)
+#define preempt_disable_notrace() do { } while (0)
+#define preempt_enable_no_resched_notrace() do { } while (0)
+#define preempt_enable_notrace() do { } while (0)
+
#endif
#ifdef CONFIG_PREEMPT_NOTIFIERS
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c5d3f847ca8d..aa609858aef0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -246,6 +246,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
extern void init_idle(struct task_struct *idle, int cpu);
extern void init_idle_bootup_task(struct task_struct *idle);
+extern int runqueue_is_locked(void);
+
extern cpumask_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
extern int select_nohz_load_balancer(int cpu);
@@ -2131,6 +2133,18 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm)
}
#endif
+#ifdef CONFIG_TRACING
+extern void
+__trace_special(void *__tr, void *__data,
+ unsigned long arg1, unsigned long arg2, unsigned long arg3);
+#else
+static inline void
+__trace_special(void *__tr, void *__data,
+ unsigned long arg1, unsigned long arg2, unsigned long arg3)
+{
+}
+#endif
+
extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask);
extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
@@ -2225,6 +2239,8 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
}
#endif /* CONFIG_MM_OWNER */
+#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
+
#endif /* __KERNEL__ */
#endif
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index f462439cc288..bd91987c065f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -105,6 +105,8 @@ extern int vm_highmem_is_dirtyable;
extern int block_dump;
extern int laptop_mode;
+extern unsigned long determine_dirtyable_memory(void);
+
extern int dirty_ratio_handler(struct ctl_table *table, int write,
struct file *filp, void __user *buffer, size_t *lenp,
loff_t *ppos);
diff --git a/kernel/Makefile b/kernel/Makefile
index 1c9938addb9d..ca2433e84873 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,6 +11,18 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o sched_clock.o
+CFLAGS_REMOVE_sched.o = -pg -mno-spe
+
+ifdef CONFIG_FTRACE
+# Do not trace debug files and internal ftrace files
+CFLAGS_REMOVE_lockdep.o = -pg
+CFLAGS_REMOVE_lockdep_proc.o = -pg
+CFLAGS_REMOVE_mutex-debug.o = -pg
+CFLAGS_REMOVE_rtmutex-debug.o = -pg
+CFLAGS_REMOVE_cgroup-debug.o = -pg
+CFLAGS_REMOVE_sched_clock.o = -pg
+endif
+
obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
@@ -69,6 +81,8 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
obj-$(CONFIG_MARKERS) += marker.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
+obj-$(CONFIG_FTRACE) += trace/
+obj-$(CONFIG_TRACING) += trace/
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/fork.c b/kernel/fork.c
index 19908b26cf80..d66d676dc362 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -909,7 +909,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
rt_mutex_init_task(p);
-#ifdef CONFIG_TRACE_IRQFLAGS
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_LOCKDEP)
DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 81a4e4a3f087..65548eff029e 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -39,6 +39,7 @@
#include <linux/irqflags.h>
#include <linux/utsname.h>
#include <linux/hash.h>
+#include <linux/ftrace.h>
#include <asm/sections.h>
@@ -81,6 +82,8 @@ static int graph_lock(void)
__raw_spin_unlock(&lockdep_lock);
return 0;
}
+ /* prevent any recursions within lockdep from causing deadlocks */
+ current->lockdep_recursion++;
return 1;
}
@@ -89,6 +92,7 @@ static inline int graph_unlock(void)
if (debug_locks && !__raw_spin_is_locked(&lockdep_lock))
return DEBUG_LOCKS_WARN_ON(1);
+ current->lockdep_recursion--;
__raw_spin_unlock(&lockdep_lock);
return 0;
}
@@ -982,7 +986,7 @@ check_noncircular(struct lock_class *source, unsigned int depth)
return 1;
}
-#ifdef CONFIG_TRACE_IRQFLAGS
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
/*
* Forwards and backwards subgraph searching, for the purposes of
* proving that two subgraphs can be connected by a new dependency
@@ -1680,7 +1684,7 @@ valid_state(struct task_struct *curr, struct held_lock *this,
static int mark_lock(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit);
-#ifdef CONFIG_TRACE_IRQFLAGS
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
/*
* print irq inversion bug:
@@ -2013,11 +2017,13 @@ void early_boot_irqs_on(void)
/*
* Hardirqs will be enabled:
*/
-void trace_hardirqs_on(void)
+void trace_hardirqs_on_caller(unsigned long a0)
{
struct task_struct *curr = current;
unsigned long ip;
+ time_hardirqs_on(CALLER_ADDR0, a0);
+
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -2055,16 +2061,23 @@ void trace_hardirqs_on(void)
curr->hardirq_enable_event = ++curr->irq_events;
debug_atomic_inc(&hardirqs_on_events);
}
+EXPORT_SYMBOL(trace_hardirqs_on_caller);
+void trace_hardirqs_on(void)
+{
+ trace_hardirqs_on_caller(CALLER_ADDR0);
+}
EXPORT_SYMBOL(trace_hardirqs_on);
/*
* Hardirqs were disabled:
*/
-void trace_hardirqs_off(void)
+void trace_hardirqs_off_caller(unsigned long a0)
{
struct task_struct *curr = current;
+ time_hardirqs_off(CALLER_ADDR0, a0);
+
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -2082,7 +2095,12 @@ void trace_hardirqs_off(void)
} else
debug_atomic_inc(&redundant_hardirqs_off);
}
+EXPORT_SYMBOL(trace_hardirqs_off_caller);
+void trace_hardirqs_off(void)
+{
+ trace_hardirqs_off_caller(CALLER_ADDR0);
+}
EXPORT_SYMBOL(trace_hardirqs_off);
/*
@@ -2246,7 +2264,7 @@ static inline int separate_irq_context(struct task_struct *curr,
* Mark a lock with a usage bit, and validate the state transition:
*/
static int mark_lock(struct task_struct *curr, struct held_lock *this,
- enum lock_usage_bit new_bit)
+ enum lock_usage_bit new_bit)
{
unsigned int new_mask = 1 << new_bit, ret = 1;
@@ -2686,7 +2704,7 @@ static void check_flags(unsigned long flags)
* and also avoid lockdep recursion:
*/
void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
- int trylock, int read, int check, unsigned long ip)
+ int trylock, int read, int check, unsigned long ip)
{
unsigned long flags;
@@ -2708,7 +2726,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
EXPORT_SYMBOL_GPL(lock_acquire);
-void lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
+void lock_release(struct lockdep_map *lock, int nested,
+ unsigned long ip)
{
unsigned long flags;
diff --git a/kernel/marker.c b/kernel/marker.c
index b5a9fe1d50d5..1abfb923b761 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -55,8 +55,8 @@ static DEFINE_MUTEX(markers_mutex);
struct marker_entry {
struct hlist_node hlist;
char *format;
- void (*call)(const struct marker *mdata, /* Probe wrapper */
- void *call_private, const char *fmt, ...);
+ /* Probe wrapper */
+ void (*call)(const struct marker *mdata, void *call_private, ...);
struct marker_probe_closure single;
struct marker_probe_closure *multi;
int refcount; /* Number of times armed. 0 if disarmed. */
@@ -91,15 +91,13 @@ EXPORT_SYMBOL_GPL(__mark_empty_function);
* marker_probe_cb Callback that prepares the variable argument list for probes.
* @mdata: pointer of type struct marker
* @call_private: caller site private data
- * @fmt: format string
* @...: Variable argument list.
*
* Since we do not use "typical" pointer based RCU in the 1 argument case, we
* need to put a full smp_rmb() in this branch. This is why we do not use
* rcu_dereference() for the pointer read.
*/
-void marker_probe_cb(const struct marker *mdata, void *call_private,
- const char *fmt, ...)
+void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
{
va_list args;
char ptype;
@@ -120,8 +118,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private,
/* Must read the ptr before private data. They are not data
* dependant, so we put an explicit smp_rmb() here. */
smp_rmb();
- va_start(args, fmt);
- func(mdata->single.probe_private, call_private, fmt, &args);
+ va_start(args, call_private);
+ func(mdata->single.probe_private, call_private, mdata->format,
+ &args);
va_end(args);
} else {
struct marker_probe_closure *multi;
@@ -136,9 +135,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private,
smp_read_barrier_depends();
multi = mdata->multi;
for (i = 0; multi[i].func; i++) {
- va_start(args, fmt);
- multi[i].func(multi[i].probe_private, call_private, fmt,
- &args);
+ va_start(args, call_private);
+ multi[i].func(multi[i].probe_private, call_private,
+ mdata->format, &args);
va_end(args);
}
}
@@ -150,13 +149,11 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
* marker_probe_cb Callback that does not prepare the variable argument list.
* @mdata: pointer of type struct marker
* @call_private: caller site private data
- * @fmt: format string
* @...: Variable argument list.
*
* Should be connected to markers "MARK_NOARGS".
*/
-void marker_probe_cb_noarg(const struct marker *mdata,
- void *call_private, const char *fmt, ...)
+void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
{
va_list args; /* not initialized */
char ptype;
@@ -172,7 +169,8 @@ void marker_probe_cb_noarg(const struct marker *mdata,
/* Must read the ptr before private data. They are not data
* dependant, so we put an explicit smp_rmb() here. */
smp_rmb();
- func(mdata->single.probe_private, call_private, fmt, &args);
+ func(mdata->single.probe_private, call_private, mdata->format,
+ &args);
} else {
struct marker_probe_closure *multi;
int i;
@@ -186,8 +184,8 @@ void marker_probe_cb_noarg(const struct marker *mdata,
smp_read_barrier_depends();
multi = mdata->multi;
for (i = 0; multi[i].func; i++)
- multi[i].func(multi[i].probe_private, call_private, fmt,
- &args);
+ multi[i].func(multi[i].probe_private, call_private,
+ mdata->format, &args);
}
preempt_enable();
}
diff --git a/kernel/printk.c b/kernel/printk.c
index e2129e83fd75..75ef3af39132 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1041,7 +1041,9 @@ void release_console_sem(void)
_log_end = log_end;
con_start = log_end; /* Flush */
spin_unlock(&logbuf_lock);
+ stop_critical_timings(); /* don't trace print latency */
call_console_drivers(_con_start, _log_end);
+ start_critical_timings();
local_irq_restore(flags);
}
console_locked = 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index 94ead43eda62..42899dce837d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -70,6 +70,7 @@
#include <linux/bootmem.h>
#include <linux/debugfs.h>
#include <linux/ctype.h>
+#include <linux/ftrace.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
@@ -607,6 +608,24 @@ static inline void update_rq_clock(struct rq *rq)
# define const_debug static const
#endif
+/**
+ * runqueue_is_locked
+ *
+ * Returns true if the current cpu runqueue is locked.
+ * This interface allows printk to be called with the runqueue lock
+ * held and know whether or not it is OK to wake up the klogd.
+ */
+int runqueue_is_locked(void)
+{
+ int cpu = get_cpu();
+ struct rq *rq = cpu_rq(cpu);
+ int ret;
+
+ ret = spin_is_locked(&rq->lock);
+ put_cpu();
+ return ret;
+}
+
/*
* Debugging: various feature bits
*/
@@ -831,7 +850,7 @@ static unsigned long long __cpu_clock(int cpu)
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
* clock constructed from sched_clock():
*/
-unsigned long long cpu_clock(int cpu)
+unsigned long long notrace cpu_clock(int cpu)
{
unsigned long long prev_cpu_time, time, delta_time;
unsigned long flags;
@@ -2149,6 +2168,9 @@ out_activate:
success = 1;
out_running:
+ trace_mark(kernel_sched_wakeup,
+ "pid %d state %ld ## rq %p task %p rq->curr %p",
+ p->pid, p->state, rq, p, rq->curr);
check_preempt_curr(rq, p);
p->state = TASK_RUNNING;
@@ -2279,6 +2301,9 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
p->sched_class->task_new(rq, p);
inc_nr_running(p, rq);
}
+ trace_mark(kernel_sched_wakeup_new,
+ "pid %d state %ld ## rq %p task %p rq->curr %p",
+ p->pid, p->state, rq, p, rq->curr);
check_preempt_curr(rq, p);
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
@@ -2451,6 +2476,11 @@ context_switch(struct rq *rq, struct task_struct *prev,
struct mm_struct *mm, *oldmm;
prepare_task_switch(rq, prev, next);
+ trace_mark(kernel_sched_schedule,
+ "prev_pid %d next_pid %d prev_state %ld "
+ "## rq %p prev %p next %p",
+ prev->pid, next->pid, prev->state,
+ rq, prev, next);
mm = next->mm;
oldmm = prev->active_mm;
/*
@@ -4021,26 +4051,44 @@ void scheduler_tick(void)
#endif
}
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
+#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
+ defined(CONFIG_PREEMPT_TRACER))
+
+static inline unsigned long get_parent_ip(unsigned long addr)
+{
+ if (in_lock_functions(addr)) {
+ addr = CALLER_ADDR2;
+ if (in_lock_functions(addr))
+ addr = CALLER_ADDR3;
+ }
+ return addr;
+}
void __kprobes add_preempt_count(int val)
{
+#ifdef CONFIG_DEBUG_PREEMPT
/*
* Underflow?
*/
if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
return;
+#endif
preempt_count() += val;
+#ifdef CONFIG_DEBUG_PREEMPT
/*
* Spinlock count overflowing soon?
*/
DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
PREEMPT_MASK - 10);
+#endif
+ if (preempt_count() == val)
+ trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
}
EXPORT_SYMBOL(add_preempt_count);
void __kprobes sub_preempt_count(int val)
{
+#ifdef CONFIG_DEBUG_PREEMPT
/*
* Underflow?
*/
@@ -4052,7 +4100,10 @@ void __kprobes sub_preempt_count(int val)
if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
!(preempt_count() & PREEMPT_MASK)))
return;
+#endif
+ if (preempt_count() == val)
+ trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
preempt_count() -= val;
}
EXPORT_SYMBOL(sub_preempt_count);
@@ -5384,7 +5435,7 @@ out_unlock:
return retval;
}
-static const char stat_nam[] = "RSDTtZX";
+static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
void sched_show_task(struct task_struct *p)
{
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index 5c2942e768cd..aaaeae8244e7 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -31,6 +31,7 @@
#include <linux/sched.h>
#include <linux/semaphore.h>
#include <linux/spinlock.h>
+#include <linux/ftrace.h>
static noinline void __down(struct semaphore *sem);
static noinline int __down_interruptible(struct semaphore *sem);
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index ae28c8245123..a1fb54c93cdd 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -436,7 +436,7 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
}
EXPORT_SYMBOL(_spin_trylock_bh);
-int in_lock_functions(unsigned long addr)
+notrace int in_lock_functions(unsigned long addr)
{
/* Linker adds these: start and end of __lockfunc functions */
extern char __lock_text_start[], __lock_text_end[];
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 29116652dca8..efaf7c5500e9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -46,6 +46,7 @@
#include <linux/nfs_fs.h>
#include <linux/acpi.h>
#include <linux/reboot.h>
+#include <linux/ftrace.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
@@ -455,6 +456,16 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+#ifdef CONFIG_FTRACE
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "ftrace_enabled",
+ .data = &ftrace_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &ftrace_enable_sysctl,
+ },
+#endif
#ifdef CONFIG_KMOD
{
.ctl_name = KERN_MODPROBE,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
new file mode 100644
index 000000000000..5c2295b29f2c
--- /dev/null
+++ b/kernel/trace/Kconfig
@@ -0,0 +1,127 @@
+#
+# Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
+#
+config HAVE_FTRACE
+ bool
+
+config HAVE_DYNAMIC_FTRACE
+ bool
+
+config TRACER_MAX_TRACE
+ bool
+
+config TRACING
+ bool
+ select DEBUG_FS
+ select STACKTRACE
+
+config FTRACE
+ bool "Kernel Function Tracer"
+ depends on HAVE_FTRACE
+ select FRAME_POINTER
+ select TRACING
+ select CONTEXT_SWITCH_TRACER
+ help
+ Enable the kernel to trace every kernel function. This is done
+ by using a compiler feature to insert a small, 5-byte No-Operation
+ instruction to the beginning of every kernel function, which NOP
+ sequence is then dynamically patched into a tracer call when
+ tracing is enabled by the administrator. If it's runtime disabled
+ (the bootup default), then the overhead of the instructions is very
+ small and not measurable even in micro-benchmarks.
+
+config IRQSOFF_TRACER
+ bool "Interrupts-off Latency Tracer"
+ default n
+ depends on TRACE_IRQFLAGS_SUPPORT
+ depends on GENERIC_TIME
+ depends on HAVE_FTRACE
+ select TRACE_IRQFLAGS
+ select TRACING
+ select TRACER_MAX_TRACE
+ help
+ This option measures the time spent in irqs-off critical
+ sections, with microsecond accuracy.
+
+ The default measurement method is a maximum search, which is
+ disabled by default and can be runtime (re-)started
+ via:
+
+ echo 0 > /debugfs/tracing/tracing_max_latency
+
+ (Note that kernel size and overhead increases with this option
+ enabled. This option and the preempt-off timing option can be
+ used together or separately.)
+
+config PREEMPT_TRACER
+ bool "Preemption-off Latency Tracer"
+ default n
+ depends on GENERIC_TIME
+ depends on PREEMPT
+ depends on HAVE_FTRACE
+ select TRACING
+ select TRACER_MAX_TRACE
+ help
+ This option measures the time spent in preemption off critical
+ sections, with microsecond accuracy.
+
+ The default measurement method is a maximum search, which is
+ disabled by default and can be runtime (re-)started
+ via:
+
+ echo 0 > /debugfs/tracing/tracing_max_latency
+
+ (Note that kernel size and overhead increases with this option
+ enabled. This option and the irqs-off timing option can be
+ used together or separately.)
+
+config SCHED_TRACER
+ bool "Scheduling Latency Tracer"
+ depends on HAVE_FTRACE
+ select TRACING
+ select CONTEXT_SWITCH_TRACER
+ select TRACER_MAX_TRACE
+ help
+ This tracer tracks the latency of the highest priority task
+ to be scheduled in, starting from the point it has woken up.
+
+config CONTEXT_SWITCH_TRACER
+ bool "Trace process context switches"
+ depends on HAVE_FTRACE
+ select TRACING
+ select MARKERS
+ help
+ This tracer gets called from the context switch and records
+ all switching of tasks.
+
+config DYNAMIC_FTRACE
+ bool "enable/disable ftrace tracepoints dynamically"
+ depends on FTRACE
+ depends on HAVE_DYNAMIC_FTRACE
+ default y
+ help
+ This option will modify all the calls to ftrace dynamically
+ (will patch them out of the binary image and replaces them
+ with a No-Op instruction) as they are called. A table is
+ created to dynamically enable them again.
+
+ This way a CONFIG_FTRACE kernel is slightly larger, but otherwise
+ has native performance as long as no tracing is active.
+
+ The changes to the code are done by a kernel thread that
+ wakes up once a second and checks to see if any ftrace calls
+ were made. If so, it runs stop_machine (stops all CPUS)
+ and modifies the code to jump over the call to ftrace.
+
+config FTRACE_SELFTEST
+ bool
+
+config FTRACE_STARTUP_TEST
+ bool "Perform a startup test on ftrace"
+ depends on TRACING
+ select FTRACE_SELFTEST
+ help
+ This option performs a series of startup tests on ftrace. On bootup
+ a series of tests are made to verify that the tracer is
+ functioning properly. It will do tests on all the configured
+ tracers of ftrace.
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
new file mode 100644
index 000000000000..d9efbbfa2bdf
--- /dev/null
+++ b/kernel/trace/Makefile
@@ -0,0 +1,22 @@
+
+# Do not instrument the tracer itself:
+
+ifdef CONFIG_FTRACE
+ORIG_CFLAGS := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
+
+# selftest needs instrumentation
+CFLAGS_trace_selftest_dynamic.o = -pg
+obj-y += trace_selftest_dynamic.o
+endif
+
+obj-$(CONFIG_FTRACE) += libftrace.o
+
+obj-$(CONFIG_TRACING) += trace.o
+obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
+obj-$(CONFIG_FTRACE) += trace_functions.o
+obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
+obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
+obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
+
+libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
new file mode 100644
index 000000000000..0f271c45cd02
--- /dev/null
+++ b/kernel/trace/ftrace.c
@@ -0,0 +1,1710 @@
+/*
+ * Infrastructure for profiling code inserted by 'gcc -pg'.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com>
+ *
+ * Originally ported from the -rt patch by:
+ * Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Based on code in the latency_tracer, that is:
+ *
+ * Copyright (C) 2004-2006 Ingo Molnar
+ * Copyright (C) 2004 William Lee Irwin III
+ */
+
+#include <linux/stop_machine.h>
+#include <linux/clocksource.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/hardirq.h>
+#include <linux/kthread.h>
+#include <linux/uaccess.h>
+#include <linux/kprobes.h>
+#include <linux/ftrace.h>
+#include <linux/sysctl.h>
+#include <linux/ctype.h>
+#include <linux/hash.h>
+#include <linux/list.h>
+
+#include <asm/ftrace.h>
+
+#include "trace.h"
+
+/* ftrace_enabled is a method to turn ftrace on or off */
+int ftrace_enabled __read_mostly;
+static int last_ftrace_enabled;
+
+/*
+ * ftrace_disabled is set when an anomaly is discovered.
+ * ftrace_disabled is much stronger than ftrace_enabled.
+ */
+static int ftrace_disabled __read_mostly;
+
+static DEFINE_SPINLOCK(ftrace_lock);
+static DEFINE_MUTEX(ftrace_sysctl_lock);
+
+static struct ftrace_ops ftrace_list_end __read_mostly =
+{
+ .func = ftrace_stub,
+};
+
+static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
+ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
+
+static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
+{
+ struct ftrace_ops *op = ftrace_list;
+
+ /* in case someone actually ports this to alpha! */
+ read_barrier_depends();
+
+ while (op != &ftrace_list_end) {
+ /* silly alpha */
+ read_barrier_depends();
+ op->func(ip, parent_ip);
+ op = op->next;
+ };
+}
+
+/**
+ * clear_ftrace_function - reset the ftrace function
+ *
+ * This NULLs the ftrace function and in essence stops
+ * tracing. There may be lag
+ */
+void clear_ftrace_function(void)
+{
+ ftrace_trace_function = ftrace_stub;
+}
+
+static int __register_ftrace_function(struct ftrace_ops *ops)
+{
+ /* Should never be called by interrupts */
+ spin_lock(&ftrace_lock);
+
+ ops->next = ftrace_list;
+ /*
+ * We are entering ops into the ftrace_list but another
+ * CPU might be walking that list. We need to make sure
+ * the ops->next pointer is valid before another CPU sees
+ * the ops pointer included into the ftrace_list.
+ */
+ smp_wmb();
+ ftrace_list = ops;
+
+ if (ftrace_enabled) {
+ /*
+ * For one func, simply call it directly.
+ * For more than one func, call the chain.
+ */
+ if (ops->next == &ftrace_list_end)
+ ftrace_trace_function = ops->func;
+ else
+ ftrace_trace_function = ftrace_list_func;
+ }
+
+ spin_unlock(&ftrace_lock);
+
+ return 0;
+}
+
+static int __unregister_ftrace_function(struct ftrace_ops *ops)
+{
+ struct ftrace_ops **p;
+ int ret = 0;
+
+ spin_lock(&ftrace_lock);
+
+ /*
+ * If we are removing the last function, then simply point
+ * to the ftrace_stub.
+ */
+ if (ftrace_list == ops && ops->next == &ftrace_list_end) {
+ ftrace_trace_function = ftrace_stub;
+ ftrace_list = &ftrace_list_end;
+ goto out;
+ }
+
+ for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
+ if (*p == ops)
+ break;
+
+ if (*p != ops) {
+ ret = -1;
+ goto out;
+ }
+
+ *p = (*p)->next;
+
+ if (ftrace_enabled) {
+ /* If we only have one func left, then call that directly */
+ if (ftrace_list == &ftrace_list_end ||
+ ftrace_list->next == &ftrace_list_end)
+ ftrace_trace_function = ftrace_list->func;
+ }
+
+ out:
+ spin_unlock(&ftrace_lock);
+
+ return ret;
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+static struct task_struct *ftraced_task;
+
+enum {
+ FTRACE_ENABLE_CALLS = (1 << 0),
+ FTRACE_DISABLE_CALLS = (1 << 1),
+ FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
+ FTRACE_ENABLE_MCOUNT = (1 << 3),
+ FTRACE_DISABLE_MCOUNT = (1 << 4),
+};
+
+static int ftrace_filtered;
+static int tracing_on;
+static int frozen_record_count;
+
+static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
+
+static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
+
+static DEFINE_SPINLOCK(ftrace_shutdown_lock);
+static DEFINE_MUTEX(ftraced_lock);
+static DEFINE_MUTEX(ftrace_regex_lock);
+
+struct ftrace_page {
+ struct ftrace_page *next;
+ unsigned long index;
+ struct dyn_ftrace records[];
+};
+
+#define ENTRIES_PER_PAGE \
+ ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
+
+/* estimate from running different kernels */
+#define NR_TO_INIT 10000
+
+static struct ftrace_page *ftrace_pages_start;
+static struct ftrace_page *ftrace_pages;
+
+static int ftraced_trigger;
+static int ftraced_suspend;
+static int ftraced_stop;
+
+static int ftrace_record_suspend;
+
+static struct dyn_ftrace *ftrace_free_records;
+
+
+#ifdef CONFIG_KPROBES
+static inline void freeze_record(struct dyn_ftrace *rec)
+{
+ if (!(rec->flags & FTRACE_FL_FROZEN)) {
+ rec->flags |= FTRACE_FL_FROZEN;
+ frozen_record_count++;
+ }
+}
+
+static inline void unfreeze_record(struct dyn_ftrace *rec)
+{
+ if (rec->flags & FTRACE_FL_FROZEN) {
+ rec->flags &= ~FTRACE_FL_FROZEN;
+ frozen_record_count--;
+ }
+}
+
+static inline int record_frozen(struct dyn_ftrace *rec)
+{
+ return rec->flags & FTRACE_FL_FROZEN;
+}
+#else
+# define freeze_record(rec) ({ 0; })
+# define unfreeze_record(rec) ({ 0; })
+# define record_frozen(rec) ({ 0; })
+#endif /* CONFIG_KPROBES */
+
+int skip_trace(unsigned long ip)
+{
+ unsigned long fl;
+ struct dyn_ftrace *rec;
+ struct hlist_node *t;
+ struct hlist_head *head;
+
+ if (frozen_record_count == 0)
+ return 0;
+
+ head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)];
+ hlist_for_each_entry_rcu(rec, t, head, node) {
+ if (rec->ip == ip) {
+ if (record_frozen(rec)) {
+ if (rec->flags & FTRACE_FL_FAILED)
+ return 1;
+
+ if (!(rec->flags & FTRACE_FL_CONVERTED))
+ return 1;
+
+ if (!tracing_on || !ftrace_enabled)
+ return 1;
+
+ if (ftrace_filtered) {
+ fl = rec->flags & (FTRACE_FL_FILTER |
+ FTRACE_FL_NOTRACE);
+ if (!fl || (fl & FTRACE_FL_NOTRACE))
+ return 1;
+ }
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static inline int
+ftrace_ip_in_hash(unsigned long ip, unsigned long key)
+{
+ struct dyn_ftrace *p;
+ struct hlist_node *t;
+ int found = 0;
+
+ hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) {
+ if (p->ip == ip) {
+ found = 1;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static inline void
+ftrace_add_hash(struct dyn_ftrace *node, unsigned long key)
+{
+ hlist_add_head_rcu(&node->node, &ftrace_hash[key]);
+}
+
+/* called from kstop_machine */
+static inline void ftrace_del_hash(struct dyn_ftrace *node)
+{
+ hlist_del(&node->node);
+}
+
+static void ftrace_free_rec(struct dyn_ftrace *rec)
+{
+ /* no locking, only called from kstop_machine */
+
+ rec->ip = (unsigned long)ftrace_free_records;
+ ftrace_free_records = rec;
+ rec->flags |= FTRACE_FL_FREE;
+}
+
+static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
+{
+ struct dyn_ftrace *rec;
+
+ /* First check for freed records */
+ if (ftrace_free_records) {
+ rec = ftrace_free_records;
+
+ if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
+ WARN_ON_ONCE(1);
+ ftrace_free_records = NULL;
+ ftrace_disabled = 1;
+ ftrace_enabled = 0;
+ return NULL;
+ }
+
+ ftrace_free_records = (void *)rec->ip;
+ memset(rec, 0, sizeof(*rec));
+ return rec;
+ }
+
+ if (ftrace_pages->index == ENTRIES_PER_PAGE) {
+ if (!ftrace_pages->next)
+ return NULL;
+ ftrace_pages = ftrace_pages->next;
+ }
+
+ return &ftrace_pages->records[ftrace_pages->index++];
+}
+
+static void
+ftrace_record_ip(unsigned long ip)
+{
+ struct dyn_ftrace *node;
+ unsigned long flags;
+ unsigned long key;
+ int resched;
+ int atomic;
+ int cpu;
+
+ if (!ftrace_enabled || ftrace_disabled)
+ return;
+
+ resched = need_resched();
+ preempt_disable_notrace();
+
+ /*
+ * We simply need to protect against recursion.
+ * Use the the raw version of smp_processor_id and not
+ * __get_cpu_var which can call debug hooks that can
+ * cause a recursive crash here.
+ */
+ cpu = raw_smp_processor_id();
+ per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
+ if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1)
+ goto out;
+
+ if (unlikely(ftrace_record_suspend))
+ goto out;
+
+ key = hash_long(ip, FTRACE_HASHBITS);
+
+ WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
+
+ if (ftrace_ip_in_hash(ip, key))
+ goto out;
+
+ atomic = irqs_disabled();
+
+ spin_lock_irqsave(&ftrace_shutdown_lock, flags);
+
+ /* This ip may have hit the hash before the lock */
+ if (ftrace_ip_in_hash(ip, key))
+ goto out_unlock;
+
+ node = ftrace_alloc_dyn_node(ip);
+ if (!node)
+ goto out_unlock;
+
+ node->ip = ip;
+
+ ftrace_add_hash(node, key);
+
+ ftraced_trigger = 1;
+
+ out_unlock:
+ spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
+ out:
+ per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
+
+ /* prevent recursion with scheduler */
+ if (resched)
+ preempt_enable_no_resched_notrace();
+ else
+ preempt_enable_notrace();
+}
+
+#define FTRACE_ADDR ((long)(ftrace_caller))
+
+static int
+__ftrace_replace_code(struct dyn_ftrace *rec,
+ unsigned char *old, unsigned char *new, int enable)
+{
+ unsigned long ip, fl;
+
+ ip = rec->ip;
+
+ if (ftrace_filtered && enable) {
+ /*
+ * If filtering is on:
+ *
+ * If this record is set to be filtered and
+ * is enabled then do nothing.
+ *
+ * If this record is set to be filtered and
+ * it is not enabled, enable it.
+ *
+ * If this record is not set to be filtered
+ * and it is not enabled do nothing.
+ *
+ * If this record is set not to trace then
+ * do nothing.
+ *
+ * If this record is set not to trace and
+ * it is enabled then disable it.
+ *
+ * If this record is not set to be filtered and
+ * it is enabled, disable it.
+ */
+
+ fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE |
+ FTRACE_FL_ENABLED);
+
+ if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) ||
+ (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) ||
+ !fl || (fl == FTRACE_FL_NOTRACE))
+ return 0;
+
+ /*
+ * If it is enabled disable it,
+ * otherwise enable it!
+ */
+ if (fl & FTRACE_FL_ENABLED) {
+ /* swap new and old */
+ new = old;
+ old = ftrace_call_replace(ip, FTRACE_ADDR);
+ rec->flags &= ~FTRACE_FL_ENABLED;
+ } else {
+ new = ftrace_call_replace(ip, FTRACE_ADDR);
+ rec->flags |= FTRACE_FL_ENABLED;
+ }
+ } else {
+
+ if (enable) {
+ /*
+ * If this record is set not to trace and is
+ * not enabled, do nothing.
+ */
+ fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
+ if (fl == FTRACE_FL_NOTRACE)
+ return 0;
+
+ new = ftrace_call_replace(ip, FTRACE_ADDR);
+ } else
+ old = ftrace_call_replace(ip, FTRACE_ADDR);
+
+ if (enable) {
+ if (rec->flags & FTRACE_FL_ENABLED)
+ return 0;
+ rec->flags |= FTRACE_FL_ENABLED;
+ } else {
+ if (!(rec->flags & FTRACE_FL_ENABLED))
+ return 0;
+ rec->flags &= ~FTRACE_FL_ENABLED;
+ }
+ }
+
+ return ftrace_modify_code(ip, old, new);
+}
+
+static void ftrace_replace_code(int enable)
+{
+ int i, failed;
+ unsigned char *new = NULL, *old = NULL;
+ struct dyn_ftrace *rec;
+ struct ftrace_page *pg;
+
+ if (enable)
+ old = ftrace_nop_replace();
+ else
+ new = ftrace_nop_replace();
+
+ for (pg = ftrace_pages_start; pg; pg = pg->next) {
+ for (i = 0; i < pg->index; i++) {
+ rec = &pg->records[i];
+
+ /* don't modify code that has already faulted */
+ if (rec->flags & FTRACE_FL_FAILED)
+ continue;
+
+ /* ignore updates to this record's mcount site */
+ if (get_kprobe((void *)rec->ip)) {
+ freeze_record(rec);
+ continue;
+ } else {
+ unfreeze_record(rec);
+ }
+
+ failed = __ftrace_replace_code(rec, old, new, enable);
+ if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
+ rec->flags |= FTRACE_FL_FAILED;
+ if ((system_state == SYSTEM_BOOTING) ||
+ !core_kernel_text(rec->ip)) {
+ ftrace_del_hash(rec);
+ ftrace_free_rec(rec);
+ }
+ }
+ }
+ }
+}
+
+static void ftrace_shutdown_replenish(void)
+{
+ if (ftrace_pages->next)
+ return;
+
+ /* allocate another page */
+ ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
+}
+
+static int
+ftrace_code_disable(struct dyn_ftrace *rec)
+{
+ unsigned long ip;
+ unsigned char *nop, *call;
+ int failed;
+
+ ip = rec->ip;
+
+ nop = ftrace_nop_replace();
+ call = ftrace_call_replace(ip, MCOUNT_ADDR);
+
+ failed = ftrace_modify_code(ip, call, nop);
+ if (failed) {
+ rec->flags |= FTRACE_FL_FAILED;
+ return 0;
+ }
+ return 1;
+}
+
+static int __ftrace_update_code(void *ignore);
+
+static int __ftrace_modify_code(void *data)
+{
+ unsigned long addr;
+ int *command = data;
+
+ if (*command & FTRACE_ENABLE_CALLS) {
+ /*
+ * Update any recorded ips now that we have the
+ * machine stopped
+ */
+ __ftrace_update_code(NULL);
+ ftrace_replace_code(1);
+ tracing_on = 1;
+ } else if (*command & FTRACE_DISABLE_CALLS) {
+ ftrace_replace_code(0);
+ tracing_on = 0;
+ }
+
+ if (*command & FTRACE_UPDATE_TRACE_FUNC)
+ ftrace_update_ftrace_func(ftrace_trace_function);
+
+ if (*command & FTRACE_ENABLE_MCOUNT) {
+ addr = (unsigned long)ftrace_record_ip;
+ ftrace_mcount_set(&addr);
+ } else if (*command & FTRACE_DISABLE_MCOUNT) {
+ addr = (unsigned long)ftrace_stub;
+ ftrace_mcount_set(&addr);
+ }
+
+ return 0;
+}
+
+static void ftrace_run_update_code(int command)
+{
+ stop_machine_run(__ftrace_modify_code, &command, NR_CPUS);
+}
+
+void ftrace_disable_daemon(void)
+{
+ /* Stop the daemon from calling kstop_machine */
+ mutex_lock(&ftraced_lock);
+ ftraced_stop = 1;
+ mutex_unlock(&ftraced_lock);
+
+ ftrace_force_update();
+}
+
+void ftrace_enable_daemon(void)
+{
+ mutex_lock(&ftraced_lock);
+ ftraced_stop = 0;
+ mutex_unlock(&ftraced_lock);
+
+ ftrace_force_update();
+}
+
+static ftrace_func_t saved_ftrace_func;
+
+static void ftrace_startup(void)
+{
+ int command = 0;
+
+ if (unlikely(ftrace_disabled))
+ return;
+
+ mutex_lock(&ftraced_lock);
+ ftraced_suspend++;
+ if (ftraced_suspend == 1)
+ command |= FTRACE_ENABLE_CALLS;
+
+ if (saved_ftrace_func != ftrace_trace_function) {
+ saved_ftrace_func = ftrace_trace_function;
+ command |= FTRACE_UPDATE_TRACE_FUNC;
+ }
+
+ if (!command || !ftrace_enabled)
+ goto out;
+
+ ftrace_run_update_code(command);
+ out:
+ mutex_unlock(&ftraced_lock);
+}
+
+static void ftrace_shutdown(void)
+{
+ int command = 0;
+
+ if (unlikely(ftrace_disabled))
+ return;
+
+ mutex_lock(&ftraced_lock);
+ ftraced_suspend--;
+ if (!ftraced_suspend)
+ command |= FTRACE_DISABLE_CALLS;
+
+ if (saved_ftrace_func != ftrace_trace_function) {
+ saved_ftrace_func = ftrace_trace_function;
+ command |= FTRACE_UPDATE_TRACE_FUNC;
+ }
+
+ if (!command || !ftrace_enabled)
+ goto out;
+
+ ftrace_run_update_code(command);
+ out:
+ mutex_unlock(&ftraced_lock);
+}
+
+static void ftrace_startup_sysctl(void)
+{
+ int command = FTRACE_ENABLE_MCOUNT;
+
+ if (unlikely(ftrace_disabled))
+ return;
+
+ mutex_lock(&ftraced_lock);
+ /* Force update next time */
+ saved_ftrace_func = NULL;
+ /* ftraced_suspend is true if we want ftrace running */
+ if (ftraced_suspend)
+ command |= FTRACE_ENABLE_CALLS;
+
+ ftrace_run_update_code(command);
+ mutex_unlock(&ftraced_lock);
+}
+
+static void ftrace_shutdown_sysctl(void)
+{
+ int command = FTRACE_DISABLE_MCOUNT;
+
+ if (unlikely(ftrace_disabled))
+ return;
+
+ mutex_lock(&ftraced_lock);
+ /* ftraced_suspend is true if ftrace is running */
+ if (ftraced_suspend)
+ command |= FTRACE_DISABLE_CALLS;
+
+ ftrace_run_update_code(command);
+ mutex_unlock(&ftraced_lock);
+}
+
+static cycle_t ftrace_update_time;
+static unsigned long ftrace_update_cnt;
+unsigned long ftrace_update_tot_cnt;
+
+static int __ftrace_update_code(void *ignore)
+{
+ int i, save_ftrace_enabled;
+ cycle_t start, stop;
+ struct dyn_ftrace *p;
+ struct hlist_node *t, *n;
+ struct hlist_head *head, temp_list;
+
+ /* Don't be recording funcs now */
+ ftrace_record_suspend++;
+ save_ftrace_enabled = ftrace_enabled;
+ ftrace_enabled = 0;
+
+ start = ftrace_now(raw_smp_processor_id());
+ ftrace_update_cnt = 0;
+
+ /* No locks needed, the machine is stopped! */
+ for (i = 0; i < FTRACE_HASHSIZE; i++) {
+ INIT_HLIST_HEAD(&temp_list);
+ head = &ftrace_hash[i];
+
+ /* all CPUS are stopped, we are safe to modify code */
+ hlist_for_each_entry_safe(p, t, n, head, node) {
+ /* Skip over failed records which have not been
+ * freed. */
+ if (p->flags & FTRACE_FL_FAILED)
+ continue;
+
+ /* Unconverted records are always at the head of the
+ * hash bucket. Once we encounter a converted record,
+ * simply skip over to the next bucket. Saves ftraced
+ * some processor cycles (ftrace does its bid for
+ * global warming :-p ). */
+ if (p->flags & (FTRACE_FL_CONVERTED))
+ break;
+
+ /* Ignore updates to this record's mcount site.
+ * Reintroduce this record at the head of this
+ * bucket to attempt to "convert" it again if
+ * the kprobe on it is unregistered before the
+ * next run. */
+ if (get_kprobe((void *)p->ip)) {
+ ftrace_del_hash(p);
+ INIT_HLIST_NODE(&p->node);
+ hlist_add_head(&p->node, &temp_list);
+ freeze_record(p);
+ continue;
+ } else {
+ unfreeze_record(p);
+ }
+
+ /* convert record (i.e, patch mcount-call with NOP) */
+ if (ftrace_code_disable(p)) {
+ p->flags |= FTRACE_FL_CONVERTED;
+ ftrace_update_cnt++;
+ } else {
+ if ((system_state == SYSTEM_BOOTING) ||
+ !core_kernel_text(p->ip)) {
+ ftrace_del_hash(p);
+ ftrace_free_rec(p);
+ }
+ }
+ }
+
+ hlist_for_each_entry_safe(p, t, n, &temp_list, node) {
+ hlist_del(&p->node);
+ INIT_HLIST_NODE(&p->node);
+ hlist_add_head(&p->node, head);
+ }
+ }
+
+ stop = ftrace_now(raw_smp_processor_id());
+ ftrace_update_time = stop - start;
+ ftrace_update_tot_cnt += ftrace_update_cnt;
+ ftraced_trigger = 0;
+
+ ftrace_enabled = save_ftrace_enabled;
+ ftrace_record_suspend--;
+
+ return 0;
+}
+
+static int ftrace_update_code(void)
+{
+ if (unlikely(ftrace_disabled) ||
+ !ftrace_enabled || !ftraced_trigger)
+ return 0;
+
+ stop_machine_run(__ftrace_update_code, NULL, NR_CPUS);
+
+ return 1;
+}
+
+static int ftraced(void *ignore)
+{
+ unsigned long usecs;
+
+ while (!kthread_should_stop()) {
+
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ /* check once a second */
+ schedule_timeout(HZ);
+
+ if (unlikely(ftrace_disabled))
+ continue;
+
+ mutex_lock(&ftrace_sysctl_lock);
+ mutex_lock(&ftraced_lock);
+ if (!ftraced_suspend && !ftraced_stop &&
+ ftrace_update_code()) {
+ usecs = nsecs_to_usecs(ftrace_update_time);
+ if (ftrace_update_tot_cnt > 100000) {
+ ftrace_update_tot_cnt = 0;
+ pr_info("hm, dftrace overflow: %lu change%s"
+ " (%lu total) in %lu usec%s\n",
+ ftrace_update_cnt,
+ ftrace_update_cnt != 1 ? "s" : "",
+ ftrace_update_tot_cnt,
+ usecs, usecs != 1 ? "s" : "");
+ ftrace_disabled = 1;
+ WARN_ON_ONCE(1);
+ }
+ }
+ mutex_unlock(&ftraced_lock);
+ mutex_unlock(&ftrace_sysctl_lock);
+
+ ftrace_shutdown_replenish();
+ }
+ __set_current_state(TASK_RUNNING);
+ return 0;
+}
+
+static int __init ftrace_dyn_table_alloc(void)
+{
+ struct ftrace_page *pg;
+ int cnt;
+ int i;
+
+ /* allocate a few pages */
+ ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!ftrace_pages_start)
+ return -1;
+
+ /*
+ * Allocate a few more pages.
+ *
+ * TODO: have some parser search vmlinux before
+ * final linking to find all calls to ftrace.
+ * Then we can:
+ * a) know how many pages to allocate.
+ * and/or
+ * b) set up the table then.
+ *
+ * The dynamic code is still necessary for
+ * modules.
+ */
+
+ pg = ftrace_pages = ftrace_pages_start;
+
+ cnt = NR_TO_INIT / ENTRIES_PER_PAGE;
+
+ for (i = 0; i < cnt; i++) {
+ pg->next = (void *)get_zeroed_page(GFP_KERNEL);
+
+ /* If we fail, we'll try later anyway */
+ if (!pg->next)
+ break;
+
+ pg = pg->next;
+ }
+
+ return 0;
+}
+
+enum {
+ FTRACE_ITER_FILTER = (1 << 0),
+ FTRACE_ITER_CONT = (1 << 1),
+ FTRACE_ITER_NOTRACE = (1 << 2),
+ FTRACE_ITER_FAILURES = (1 << 3),
+};
+
+#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
+
+struct ftrace_iterator {
+ loff_t pos;
+ struct ftrace_page *pg;
+ unsigned idx;
+ unsigned flags;
+ unsigned char buffer[FTRACE_BUFF_MAX+1];
+ unsigned buffer_idx;
+ unsigned filtered;
+};
+
+static void *
+t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct ftrace_iterator *iter = m->private;
+ struct dyn_ftrace *rec = NULL;
+
+ (*pos)++;
+
+ retry:
+ if (iter->idx >= iter->pg->index) {
+ if (iter->pg->next) {
+ iter->pg = iter->pg->next;
+ iter->idx = 0;
+ goto retry;
+ }
+ } else {
+ rec = &iter->pg->records[iter->idx++];
+ if ((!(iter->flags & FTRACE_ITER_FAILURES) &&
+ (rec->flags & FTRACE_FL_FAILED)) ||
+
+ ((iter->flags & FTRACE_ITER_FAILURES) &&
+ (!(rec->flags & FTRACE_FL_FAILED) ||
+ (rec->flags & FTRACE_FL_FREE))) ||
+
+ ((iter->flags & FTRACE_ITER_FILTER) &&
+ !(rec->flags & FTRACE_FL_FILTER)) ||
+
+ ((iter->flags & FTRACE_ITER_NOTRACE) &&
+ !(rec->flags & FTRACE_FL_NOTRACE))) {
+ rec = NULL;
+ goto retry;
+ }
+ }
+
+ iter->pos = *pos;
+
+ return rec;
+}
+
+static void *t_start(struct seq_file *m, loff_t *pos)
+{
+ struct ftrace_iterator *iter = m->private;
+ void *p = NULL;
+ loff_t l = -1;
+
+ if (*pos != iter->pos) {
+ for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l))
+ ;
+ } else {
+ l = *pos;
+ p = t_next(m, p, &l);
+ }
+
+ return p;
+}
+
+static void t_stop(struct seq_file *m, void *p)
+{
+}
+
+static int t_show(struct seq_file *m, void *v)
+{
+ struct dyn_ftrace *rec = v;
+ char str[KSYM_SYMBOL_LEN];
+
+ if (!rec)
+ return 0;
+
+ kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+
+ seq_printf(m, "%s\n", str);
+
+ return 0;
+}
+
+static struct seq_operations show_ftrace_seq_ops = {
+ .start = t_start,
+ .next = t_next,
+ .stop = t_stop,
+ .show = t_show,
+};
+
+static int
+ftrace_avail_open(struct inode *inode, struct file *file)
+{
+ struct ftrace_iterator *iter;
+ int ret;
+
+ if (unlikely(ftrace_disabled))
+ return -ENODEV;
+
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ iter->pg = ftrace_pages_start;
+ iter->pos = -1;
+
+ ret = seq_open(file, &show_ftrace_seq_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+
+ m->private = iter;
+ } else {
+ kfree(iter);
+ }
+
+ return ret;
+}
+
+int ftrace_avail_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = (struct seq_file *)file->private_data;
+ struct ftrace_iterator *iter = m->private;
+
+ seq_release(inode, file);
+ kfree(iter);
+
+ return 0;
+}
+
+static int
+ftrace_failures_open(struct inode *inode, struct file *file)
+{
+ int ret;
+ struct seq_file *m;
+ struct ftrace_iterator *iter;
+
+ ret = ftrace_avail_open(inode, file);
+ if (!ret) {
+ m = (struct seq_file *)file->private_data;
+ iter = (struct ftrace_iterator *)m->private;
+ iter->flags = FTRACE_ITER_FAILURES;
+ }
+
+ return ret;
+}
+
+
+static void ftrace_filter_reset(int enable)
+{
+ struct ftrace_page *pg;
+ struct dyn_ftrace *rec;
+ unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
+ unsigned i;
+
+ /* keep kstop machine from running */
+ preempt_disable();
+ if (enable)
+ ftrace_filtered = 0;
+ pg = ftrace_pages_start;
+ while (pg) {
+ for (i = 0; i < pg->index; i++) {
+ rec = &pg->records[i];
+ if (rec->flags & FTRACE_FL_FAILED)
+ continue;
+ rec->flags &= ~type;
+ }
+ pg = pg->next;
+ }
+ preempt_enable();
+}
+
+static int
+ftrace_regex_open(struct inode *inode, struct file *file, int enable)
+{
+ struct ftrace_iterator *iter;
+ int ret = 0;
+
+ if (unlikely(ftrace_disabled))
+ return -ENODEV;
+
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ mutex_lock(&ftrace_regex_lock);
+ if ((file->f_mode & FMODE_WRITE) &&
+ !(file->f_flags & O_APPEND))
+ ftrace_filter_reset(enable);
+
+ if (file->f_mode & FMODE_READ) {
+ iter->pg = ftrace_pages_start;
+ iter->pos = -1;
+ iter->flags = enable ? FTRACE_ITER_FILTER :
+ FTRACE_ITER_NOTRACE;
+
+ ret = seq_open(file, &show_ftrace_seq_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = iter;
+ } else
+ kfree(iter);
+ } else
+ file->private_data = iter;
+ mutex_unlock(&ftrace_regex_lock);
+
+ return ret;
+}
+
+static int
+ftrace_filter_open(struct inode *inode, struct file *file)
+{
+ return ftrace_regex_open(inode, file, 1);
+}
+
+static int
+ftrace_notrace_open(struct inode *inode, struct file *file)
+{
+ return ftrace_regex_open(inode, file, 0);
+}
+
+static ssize_t
+ftrace_regex_read(struct file *file, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ if (file->f_mode & FMODE_READ)
+ return seq_read(file, ubuf, cnt, ppos);
+ else
+ return -EPERM;
+}
+
+static loff_t
+ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
+{
+ loff_t ret;
+
+ if (file->f_mode & FMODE_READ)
+ ret = seq_lseek(file, offset, origin);
+ else
+ file->f_pos = ret = 1;
+
+ return ret;
+}
+
+enum {
+ MATCH_FULL,
+ MATCH_FRONT_ONLY,
+ MATCH_MIDDLE_ONLY,
+ MATCH_END_ONLY,
+};
+
+static void
+ftrace_match(unsigned char *buff, int len, int enable)
+{
+ char str[KSYM_SYMBOL_LEN];
+ char *search = NULL;
+ struct ftrace_page *pg;
+ struct dyn_ftrace *rec;
+ int type = MATCH_FULL;
+ unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
+ unsigned i, match = 0, search_len = 0;
+
+ for (i = 0; i < len; i++) {
+ if (buff[i] == '*') {
+ if (!i) {
+ search = buff + i + 1;
+ type = MATCH_END_ONLY;
+ search_len = len - (i + 1);
+ } else {
+ if (type == MATCH_END_ONLY) {
+ type = MATCH_MIDDLE_ONLY;
+ } else {
+ match = i;
+ type = MATCH_FRONT_ONLY;
+ }
+ buff[i] = 0;
+ break;
+ }
+ }
+ }
+
+ /* keep kstop machine from running */
+ preempt_disable();
+ if (enable)
+ ftrace_filtered = 1;
+ pg = ftrace_pages_start;
+ while (pg) {
+ for (i = 0; i < pg->index; i++) {
+ int matched = 0;
+ char *ptr;
+
+ rec = &pg->records[i];
+ if (rec->flags & FTRACE_FL_FAILED)
+ continue;
+ kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+ switch (type) {
+ case MATCH_FULL:
+ if (strcmp(str, buff) == 0)
+ matched = 1;
+ break;
+ case MATCH_FRONT_ONLY:
+ if (memcmp(str, buff, match) == 0)
+ matched = 1;
+ break;
+ case MATCH_MIDDLE_ONLY:
+ if (strstr(str, search))
+ matched = 1;
+ break;
+ case MATCH_END_ONLY:
+ ptr = strstr(str, search);
+ if (ptr && (ptr[search_len] == 0))
+ matched = 1;
+ break;
+ }
+ if (matched)
+ rec->flags |= flag;
+ }
+ pg = pg->next;
+ }
+ preempt_enable();
+}
+
+static ssize_t
+ftrace_regex_write(struct file *file, const char __user *ubuf,
+ size_t cnt, loff_t *ppos, int enable)
+{
+ struct ftrace_iterator *iter;
+ char ch;
+ size_t read = 0;
+ ssize_t ret;
+
+ if (!cnt || cnt < 0)
+ return 0;
+
+ mutex_lock(&ftrace_regex_lock);
+
+ if (file->f_mode & FMODE_READ) {
+ struct seq_file *m = file->private_data;
+ iter = m->private;
+ } else
+ iter = file->private_data;
+
+ if (!*ppos) {
+ iter->flags &= ~FTRACE_ITER_CONT;
+ iter->buffer_idx = 0;
+ }
+
+ ret = get_user(ch, ubuf++);
+ if (ret)
+ goto out;
+ read++;
+ cnt--;
+
+ if (!(iter->flags & ~FTRACE_ITER_CONT)) {
+ /* skip white space */
+ while (cnt && isspace(ch)) {
+ ret = get_user(ch, ubuf++);
+ if (ret)
+ goto out;
+ read++;
+ cnt--;
+ }
+
+ if (isspace(ch)) {
+ file->f_pos += read;
+ ret = read;
+ goto out;
+ }
+
+ iter->buffer_idx = 0;
+ }
+
+ while (cnt && !isspace(ch)) {
+ if (iter->buffer_idx < FTRACE_BUFF_MAX)
+ iter->buffer[iter->buffer_idx++] = ch;
+ else {
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = get_user(ch, ubuf++);
+ if (ret)
+ goto out;
+ read++;
+ cnt--;
+ }
+
+ if (isspace(ch)) {
+ iter->filtered++;
+ iter->buffer[iter->buffer_idx] = 0;
+ ftrace_match(iter->buffer, iter->buffer_idx, enable);
+ iter->buffer_idx = 0;
+ } else
+ iter->flags |= FTRACE_ITER_CONT;
+
+
+ file->f_pos += read;
+
+ ret = read;
+ out:
+ mutex_unlock(&ftrace_regex_lock);
+
+ return ret;
+}
+
+static ssize_t
+ftrace_filter_write(struct file *file, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
+}
+
+static ssize_t
+ftrace_notrace_write(struct file *file, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ return ftrace_regex_write(file, ubuf, cnt, ppos, 0);
+}
+
+static void
+ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
+{
+ if (unlikely(ftrace_disabled))
+ return;
+
+ mutex_lock(&ftrace_regex_lock);
+ if (reset)
+ ftrace_filter_reset(enable);
+ if (buf)
+ ftrace_match(buf, len, enable);
+ mutex_unlock(&ftrace_regex_lock);
+}
+
+/**
+ * ftrace_set_filter - set a function to filter on in ftrace
+ * @buf - the string that holds the function filter text.
+ * @len - the length of the string.
+ * @reset - non zero to reset all filters before applying this filter.
+ *
+ * Filters denote which functions should be enabled when tracing is enabled.
+ * If @buf is NULL and reset is set, all functions will be enabled for tracing.
+ */
+void ftrace_set_filter(unsigned char *buf, int len, int reset)
+{
+ ftrace_set_regex(buf, len, reset, 1);
+}
+
+/**
+ * ftrace_set_notrace - set a function to not trace in ftrace
+ * @buf - the string that holds the function notrace text.
+ * @len - the length of the string.
+ * @reset - non zero to reset all filters before applying this filter.
+ *
+ * Notrace Filters denote which functions should not be enabled when tracing
+ * is enabled. If @buf is NULL and reset is set, all functions will be enabled
+ * for tracing.
+ */
+void ftrace_set_notrace(unsigned char *buf, int len, int reset)
+{
+ ftrace_set_regex(buf, len, reset, 0);
+}
+
+static int
+ftrace_regex_release(struct inode *inode, struct file *file, int enable)
+{
+ struct seq_file *m = (struct seq_file *)file->private_data;
+ struct ftrace_iterator *iter;
+
+ mutex_lock(&ftrace_regex_lock);
+ if (file->f_mode & FMODE_READ) {
+ iter = m->private;
+
+ seq_release(inode, file);
+ } else
+ iter = file->private_data;
+
+ if (iter->buffer_idx) {
+ iter->filtered++;
+ iter->buffer[iter->buffer_idx] = 0;
+ ftrace_match(iter->buffer, iter->buffer_idx, enable);
+ }
+
+ mutex_lock(&ftrace_sysctl_lock);
+ mutex_lock(&ftraced_lock);
+ if (iter->filtered && ftraced_suspend && ftrace_enabled)
+ ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+ mutex_unlock(&ftraced_lock);
+ mutex_unlock(&ftrace_sysctl_lock);
+
+ kfree(iter);
+ mutex_unlock(&ftrace_regex_lock);
+ return 0;
+}
+
+static int
+ftrace_filter_release(struct inode *inode, struct file *file)
+{
+ return ftrace_regex_release(inode, file, 1);
+}
+
+static int
+ftrace_notrace_release(struct inode *inode, struct file *file)
+{
+ return ftrace_regex_release(inode, file, 0);
+}
+
+static ssize_t
+ftraced_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ /* don't worry about races */
+ char *buf = ftraced_stop ? "disabled\n" : "enabled\n";
+ int r = strlen(buf);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+ftraced_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[64];
+ long val;
+ int ret;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ if (strncmp(buf, "enable", 6) == 0)
+ val = 1;
+ else if (strncmp(buf, "disable", 7) == 0)
+ val = 0;
+ else {
+ buf[cnt] = 0;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ val = !!val;
+ }
+
+ if (val)
+ ftrace_enable_daemon();
+ else
+ ftrace_disable_daemon();
+
+ filp->f_pos += cnt;
+
+ return cnt;
+}
+
+static struct file_operations ftrace_avail_fops = {
+ .open = ftrace_avail_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = ftrace_avail_release,
+};
+
+static struct file_operations ftrace_failures_fops = {
+ .open = ftrace_failures_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = ftrace_avail_release,
+};
+
+static struct file_operations ftrace_filter_fops = {
+ .open = ftrace_filter_open,
+ .read = ftrace_regex_read,
+ .write = ftrace_filter_write,
+ .llseek = ftrace_regex_lseek,
+ .release = ftrace_filter_release,
+};
+
+static struct file_operations ftrace_notrace_fops = {
+ .open = ftrace_notrace_open,
+ .read = ftrace_regex_read,
+ .write = ftrace_notrace_write,
+ .llseek = ftrace_regex_lseek,
+ .release = ftrace_notrace_release,
+};
+
+static struct file_operations ftraced_fops = {
+ .open = tracing_open_generic,
+ .read = ftraced_read,
+ .write = ftraced_write,
+};
+
+/**
+ * ftrace_force_update - force an update to all recording ftrace functions
+ */
+int ftrace_force_update(void)
+{
+ int ret = 0;
+
+ if (unlikely(ftrace_disabled))
+ return -ENODEV;
+
+ mutex_lock(&ftrace_sysctl_lock);
+ mutex_lock(&ftraced_lock);
+
+ /*
+ * If ftraced_trigger is not set, then there is nothing
+ * to update.
+ */
+ if (ftraced_trigger && !ftrace_update_code())
+ ret = -EBUSY;
+
+ mutex_unlock(&ftraced_lock);
+ mutex_unlock(&ftrace_sysctl_lock);
+
+ return ret;
+}
+
+static void ftrace_force_shutdown(void)
+{
+ struct task_struct *task;
+ int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC;
+
+ mutex_lock(&ftraced_lock);
+ task = ftraced_task;
+ ftraced_task = NULL;
+ ftraced_suspend = -1;
+ ftrace_run_update_code(command);
+ mutex_unlock(&ftraced_lock);
+
+ if (task)
+ kthread_stop(task);
+}
+
+static __init int ftrace_init_debugfs(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+
+ entry = debugfs_create_file("available_filter_functions", 0444,
+ d_tracer, NULL, &ftrace_avail_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'available_filter_functions' entry\n");
+
+ entry = debugfs_create_file("failures", 0444,
+ d_tracer, NULL, &ftrace_failures_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'failures' entry\n");
+
+ entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer,
+ NULL, &ftrace_filter_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'set_ftrace_filter' entry\n");
+
+ entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer,
+ NULL, &ftrace_notrace_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'set_ftrace_notrace' entry\n");
+
+ entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer,
+ NULL, &ftraced_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'ftraced_enabled' entry\n");
+ return 0;
+}
+
+fs_initcall(ftrace_init_debugfs);
+
+static int __init ftrace_dynamic_init(void)
+{
+ struct task_struct *p;
+ unsigned long addr;
+ int ret;
+
+ addr = (unsigned long)ftrace_record_ip;
+
+ stop_machine_run(ftrace_dyn_arch_init, &addr, NR_CPUS);
+
+ /* ftrace_dyn_arch_init places the return code in addr */
+ if (addr) {
+ ret = (int)addr;
+ goto failed;
+ }
+
+ ret = ftrace_dyn_table_alloc();
+ if (ret)
+ goto failed;
+
+ p = kthread_run(ftraced, NULL, "ftraced");
+ if (IS_ERR(p)) {
+ ret = -1;
+ goto failed;
+ }
+
+ last_ftrace_enabled = ftrace_enabled = 1;
+ ftraced_task = p;
+
+ return 0;
+
+ failed:
+ ftrace_disabled = 1;
+ return ret;
+}
+
+core_initcall(ftrace_dynamic_init);
+#else
+# define ftrace_startup() do { } while (0)
+# define ftrace_shutdown() do { } while (0)
+# define ftrace_startup_sysctl() do { } while (0)
+# define ftrace_shutdown_sysctl() do { } while (0)
+# define ftrace_force_shutdown() do { } while (0)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+/**
+ * ftrace_kill - totally shutdown ftrace
+ *
+ * This is a safety measure. If something was detected that seems
+ * wrong, calling this function will keep ftrace from doing
+ * any more modifications, and updates.
+ * used when something went wrong.
+ */
+void ftrace_kill(void)
+{
+ mutex_lock(&ftrace_sysctl_lock);
+ ftrace_disabled = 1;
+ ftrace_enabled = 0;
+
+ clear_ftrace_function();
+ mutex_unlock(&ftrace_sysctl_lock);
+
+ /* Try to totally disable ftrace */
+ ftrace_force_shutdown();
+}
+
+/**
+ * register_ftrace_function - register a function for profiling
+ * @ops - ops structure that holds the function for profiling.
+ *
+ * Register a function to be called by all functions in the
+ * kernel.
+ *
+ * Note: @ops->func and all the functions it calls must be labeled
+ * with "notrace", otherwise it will go into a
+ * recursive loop.
+ */
+int register_ftrace_function(struct ftrace_ops *ops)
+{
+ int ret;
+
+ if (unlikely(ftrace_disabled))
+ return -1;
+
+ mutex_lock(&ftrace_sysctl_lock);
+ ret = __register_ftrace_function(ops);
+ ftrace_startup();
+ mutex_unlock(&ftrace_sysctl_lock);
+
+ return ret;
+}
+
+/**
+ * unregister_ftrace_function - unresgister a function for profiling.
+ * @ops - ops structure that holds the function to unregister
+ *
+ * Unregister a function that was added to be called by ftrace profiling.
+ */
+int unregister_ftrace_function(struct ftrace_ops *ops)
+{
+ int ret;
+
+ mutex_lock(&ftrace_sysctl_lock);
+ ret = __unregister_ftrace_function(ops);
+ ftrace_shutdown();
+ mutex_unlock(&ftrace_sysctl_lock);
+
+ return ret;
+}
+
+int
+ftrace_enable_sysctl(struct ctl_table *table, int write,
+ struct file *file, void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret;
+
+ if (unlikely(ftrace_disabled))
+ return -ENODEV;
+
+ mutex_lock(&ftrace_sysctl_lock);
+
+ ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
+
+ if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
+ goto out;
+
+ last_ftrace_enabled = ftrace_enabled;
+
+ if (ftrace_enabled) {
+
+ ftrace_startup_sysctl();
+
+ /* we are starting ftrace again */
+ if (ftrace_list != &ftrace_list_end) {
+ if (ftrace_list->next == &ftrace_list_end)
+ ftrace_trace_function = ftrace_list->func;
+ else
+ ftrace_trace_function = ftrace_list_func;
+ }
+
+ } else {
+ /* stopping ftrace calls (just send to ftrace_stub) */
+ ftrace_trace_function = ftrace_stub;
+
+ ftrace_shutdown_sysctl();
+ }
+
+ out:
+ mutex_unlock(&ftrace_sysctl_lock);
+ return ret;
+}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
new file mode 100644
index 000000000000..9ade79369bfb
--- /dev/null
+++ b/kernel/trace/trace.c
@@ -0,0 +1,3100 @@
+/*
+ * ring buffer based function tracer
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
+ *
+ * Originally taken from the RT patch by:
+ * Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Based on code from the latency_tracer, that is:
+ * Copyright (C) 2004-2006 Ingo Molnar
+ * Copyright (C) 2004 William Lee Irwin III
+ */
+#include <linux/utsrelease.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/pagemap.h>
+#include <linux/hardirq.h>
+#include <linux/linkage.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/gfp.h>
+#include <linux/fs.h>
+#include <linux/kprobes.h>
+#include <linux/writeback.h>
+
+#include <linux/stacktrace.h>
+
+#include "trace.h"
+
+unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
+unsigned long __read_mostly tracing_thresh;
+
+static unsigned long __read_mostly tracing_nr_buffers;
+static cpumask_t __read_mostly tracing_buffer_mask;
+
+#define for_each_tracing_cpu(cpu) \
+ for_each_cpu_mask(cpu, tracing_buffer_mask)
+
+static int trace_alloc_page(void);
+static int trace_free_page(void);
+
+static int tracing_disabled = 1;
+
+static unsigned long tracing_pages_allocated;
+
+long
+ns2usecs(cycle_t nsec)
+{
+ nsec += 500;
+ do_div(nsec, 1000);
+ return nsec;
+}
+
+cycle_t ftrace_now(int cpu)
+{
+ return cpu_clock(cpu);
+}
+
+/*
+ * The global_trace is the descriptor that holds the tracing
+ * buffers for the live tracing. For each CPU, it contains
+ * a link list of pages that will store trace entries. The
+ * page descriptor of the pages in the memory is used to hold
+ * the link list by linking the lru item in the page descriptor
+ * to each of the pages in the buffer per CPU.
+ *
+ * For each active CPU there is a data field that holds the
+ * pages for the buffer for that CPU. Each CPU has the same number
+ * of pages allocated for its buffer.
+ */
+static struct trace_array global_trace;
+
+static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
+
+/*
+ * The max_tr is used to snapshot the global_trace when a maximum
+ * latency is reached. Some tracers will use this to store a maximum
+ * trace while it continues examining live traces.
+ *
+ * The buffers for the max_tr are set up the same as the global_trace.
+ * When a snapshot is taken, the link list of the max_tr is swapped
+ * with the link list of the global_trace and the buffers are reset for
+ * the global_trace so the tracing can continue.
+ */
+static struct trace_array max_tr;
+
+static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
+
+/* tracer_enabled is used to toggle activation of a tracer */
+static int tracer_enabled = 1;
+
+/*
+ * trace_nr_entries is the number of entries that is allocated
+ * for a buffer. Note, the number of entries is always rounded
+ * to ENTRIES_PER_PAGE.
+ */
+static unsigned long trace_nr_entries = 65536UL;
+
+/* trace_types holds a link list of available tracers. */
+static struct tracer *trace_types __read_mostly;
+
+/* current_trace points to the tracer that is currently active */
+static struct tracer *current_trace __read_mostly;
+
+/*
+ * max_tracer_type_len is used to simplify the allocating of
+ * buffers to read userspace tracer names. We keep track of
+ * the longest tracer name registered.
+ */
+static int max_tracer_type_len;
+
+/*
+ * trace_types_lock is used to protect the trace_types list.
+ * This lock is also used to keep user access serialized.
+ * Accesses from userspace will grab this lock while userspace
+ * activities happen inside the kernel.
+ */
+static DEFINE_MUTEX(trace_types_lock);
+
+/* trace_wait is a waitqueue for tasks blocked on trace_poll */
+static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
+
+/* trace_flags holds iter_ctrl options */
+unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
+
+static notrace void no_trace_init(struct trace_array *tr)
+{
+ int cpu;
+
+ if(tr->ctrl)
+ for_each_online_cpu(cpu)
+ tracing_reset(tr->data[cpu]);
+ tracer_enabled = 0;
+}
+
+/* dummy trace to disable tracing */
+static struct tracer no_tracer __read_mostly = {
+ .name = "none",
+ .init = no_trace_init
+};
+
+
+/**
+ * trace_wake_up - wake up tasks waiting for trace input
+ *
+ * Simply wakes up any task that is blocked on the trace_wait
+ * queue. These is used with trace_poll for tasks polling the trace.
+ */
+void trace_wake_up(void)
+{
+ /*
+ * The runqueue_is_locked() can fail, but this is the best we
+ * have for now:
+ */
+ if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
+ wake_up(&trace_wait);
+}
+
+#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
+
+static int __init set_nr_entries(char *str)
+{
+ unsigned long nr_entries;
+ int ret;
+
+ if (!str)
+ return 0;
+ ret = strict_strtoul(str, 0, &nr_entries);
+ /* nr_entries can not be zero */
+ if (ret < 0 || nr_entries == 0)
+ return 0;
+ trace_nr_entries = nr_entries;
+ return 1;
+}
+__setup("trace_entries=", set_nr_entries);
+
+unsigned long nsecs_to_usecs(unsigned long nsecs)
+{
+ return nsecs / 1000;
+}
+
+/*
+ * trace_flag_type is an enumeration that holds different
+ * states when a trace occurs. These are:
+ * IRQS_OFF - interrupts were disabled
+ * NEED_RESCED - reschedule is requested
+ * HARDIRQ - inside an interrupt handler
+ * SOFTIRQ - inside a softirq handler
+ */
+enum trace_flag_type {
+ TRACE_FLAG_IRQS_OFF = 0x01,
+ TRACE_FLAG_NEED_RESCHED = 0x02,
+ TRACE_FLAG_HARDIRQ = 0x04,
+ TRACE_FLAG_SOFTIRQ = 0x08,
+};
+
+/*
+ * TRACE_ITER_SYM_MASK masks the options in trace_flags that
+ * control the output of kernel symbols.
+ */
+#define TRACE_ITER_SYM_MASK \
+ (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
+
+/* These must match the bit postions in trace_iterator_flags */
+static const char *trace_options[] = {
+ "print-parent",
+ "sym-offset",
+ "sym-addr",
+ "verbose",
+ "raw",
+ "hex",
+ "bin",
+ "block",
+ "stacktrace",
+ "sched-tree",
+ NULL
+};
+
+/*
+ * ftrace_max_lock is used to protect the swapping of buffers
+ * when taking a max snapshot. The buffers themselves are
+ * protected by per_cpu spinlocks. But the action of the swap
+ * needs its own lock.
+ *
+ * This is defined as a raw_spinlock_t in order to help
+ * with performance when lockdep debugging is enabled.
+ */
+static raw_spinlock_t ftrace_max_lock =
+ (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+/*
+ * Copy the new maximum trace into the separate maximum-trace
+ * structure. (this way the maximum trace is permanently saved,
+ * for later retrieval via /debugfs/tracing/latency_trace)
+ */
+static void
+__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
+{
+ struct trace_array_cpu *data = tr->data[cpu];
+
+ max_tr.cpu = cpu;
+ max_tr.time_start = data->preempt_timestamp;
+
+ data = max_tr.data[cpu];
+ data->saved_latency = tracing_max_latency;
+
+ memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
+ data->pid = tsk->pid;
+ data->uid = tsk->uid;
+ data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
+ data->policy = tsk->policy;
+ data->rt_priority = tsk->rt_priority;
+
+ /* record this tasks comm */
+ tracing_record_cmdline(current);
+}
+
+#define CHECK_COND(cond) \
+ if (unlikely(cond)) { \
+ tracing_disabled = 1; \
+ WARN_ON(1); \
+ return -1; \
+ }
+
+/**
+ * check_pages - integrity check of trace buffers
+ *
+ * As a safty measure we check to make sure the data pages have not
+ * been corrupted.
+ */
+int check_pages(struct trace_array_cpu *data)
+{
+ struct page *page, *tmp;
+
+ CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
+ CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
+
+ list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
+ CHECK_COND(page->lru.next->prev != &page->lru);
+ CHECK_COND(page->lru.prev->next != &page->lru);
+ }
+
+ return 0;
+}
+
+/**
+ * head_page - page address of the first page in per_cpu buffer.
+ *
+ * head_page returns the page address of the first page in
+ * a per_cpu buffer. This also preforms various consistency
+ * checks to make sure the buffer has not been corrupted.
+ */
+void *head_page(struct trace_array_cpu *data)
+{
+ struct page *page;
+
+ if (list_empty(&data->trace_pages))
+ return NULL;
+
+ page = list_entry(data->trace_pages.next, struct page, lru);
+ BUG_ON(&page->lru == &data->trace_pages);
+
+ return page_address(page);
+}
+
+/**
+ * trace_seq_printf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+{
+ int len = (PAGE_SIZE - 1) - s->len;
+ va_list ap;
+ int ret;
+
+ if (!len)
+ return 0;
+
+ va_start(ap, fmt);
+ ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
+ va_end(ap);
+
+ /* If we can't write it all, don't bother writing anything */
+ if (ret >= len)
+ return 0;
+
+ s->len += ret;
+
+ return len;
+}
+
+/**
+ * trace_seq_puts - trace sequence printing of simple string
+ * @s: trace sequence descriptor
+ * @str: simple string to record
+ *
+ * The tracer may use either the sequence operations or its own
+ * copy to user routines. This function records a simple string
+ * into a special buffer (@s) for later retrieval by a sequencer
+ * or other mechanism.
+ */
+static int
+trace_seq_puts(struct trace_seq *s, const char *str)
+{
+ int len = strlen(str);
+
+ if (len > ((PAGE_SIZE - 1) - s->len))
+ return 0;
+
+ memcpy(s->buffer + s->len, str, len);
+ s->len += len;
+
+ return len;
+}
+
+static int
+trace_seq_putc(struct trace_seq *s, unsigned char c)
+{
+ if (s->len >= (PAGE_SIZE - 1))
+ return 0;
+
+ s->buffer[s->len++] = c;
+
+ return 1;
+}
+
+static int
+trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
+{
+ if (len > ((PAGE_SIZE - 1) - s->len))
+ return 0;
+
+ memcpy(s->buffer + s->len, mem, len);
+ s->len += len;
+
+ return len;
+}
+
+#define HEX_CHARS 17
+static const char hex2asc[] = "0123456789abcdef";
+
+static int
+trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
+{
+ unsigned char hex[HEX_CHARS];
+ unsigned char *data = mem;
+ unsigned char byte;
+ int i, j;
+
+ BUG_ON(len >= HEX_CHARS);
+
+#ifdef __BIG_ENDIAN
+ for (i = 0, j = 0; i < len; i++) {
+#else
+ for (i = len-1, j = 0; i >= 0; i--) {
+#endif
+ byte = data[i];
+
+ hex[j++] = hex2asc[byte & 0x0f];
+ hex[j++] = hex2asc[byte >> 4];
+ }
+ hex[j++] = ' ';
+
+ return trace_seq_putmem(s, hex, j);
+}
+
+static void
+trace_seq_reset(struct trace_seq *s)
+{
+ s->len = 0;
+ s->readpos = 0;
+}
+
+ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
+{
+ int len;
+ int ret;
+
+ if (s->len <= s->readpos)
+ return -EBUSY;
+
+ len = s->len - s->readpos;
+ if (cnt > len)
+ cnt = len;
+ ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
+ if (ret)
+ return -EFAULT;
+
+ s->readpos += len;
+ return cnt;
+}
+
+static void
+trace_print_seq(struct seq_file *m, struct trace_seq *s)
+{
+ int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
+
+ s->buffer[len] = 0;
+ seq_puts(m, s->buffer);
+
+ trace_seq_reset(s);
+}
+
+/*
+ * flip the trace buffers between two trace descriptors.
+ * This usually is the buffers between the global_trace and
+ * the max_tr to record a snapshot of a current trace.
+ *
+ * The ftrace_max_lock must be held.
+ */
+static void
+flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
+{
+ struct list_head flip_pages;
+
+ INIT_LIST_HEAD(&flip_pages);
+
+ memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
+ sizeof(struct trace_array_cpu) -
+ offsetof(struct trace_array_cpu, trace_head_idx));
+
+ check_pages(tr1);
+ check_pages(tr2);
+ list_splice_init(&tr1->trace_pages, &flip_pages);
+ list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
+ list_splice_init(&flip_pages, &tr2->trace_pages);
+ BUG_ON(!list_empty(&flip_pages));
+ check_pages(tr1);
+ check_pages(tr2);
+}
+
+/**
+ * update_max_tr - snapshot all trace buffers from global_trace to max_tr
+ * @tr: tracer
+ * @tsk: the task with the latency
+ * @cpu: The cpu that initiated the trace.
+ *
+ * Flip the buffers between the @tr and the max_tr and record information
+ * about which task was the cause of this latency.
+ */
+void
+update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
+{
+ struct trace_array_cpu *data;
+ int i;
+
+ WARN_ON_ONCE(!irqs_disabled());
+ __raw_spin_lock(&ftrace_max_lock);
+ /* clear out all the previous traces */
+ for_each_tracing_cpu(i) {
+ data = tr->data[i];
+ flip_trace(max_tr.data[i], data);
+ tracing_reset(data);
+ }
+
+ __update_max_tr(tr, tsk, cpu);
+ __raw_spin_unlock(&ftrace_max_lock);
+}
+
+/**
+ * update_max_tr_single - only copy one trace over, and reset the rest
+ * @tr - tracer
+ * @tsk - task with the latency
+ * @cpu - the cpu of the buffer to copy.
+ *
+ * Flip the trace of a single CPU buffer between the @tr and the max_tr.
+ */
+void
+update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
+{
+ struct trace_array_cpu *data = tr->data[cpu];
+ int i;
+
+ WARN_ON_ONCE(!irqs_disabled());
+ __raw_spin_lock(&ftrace_max_lock);
+ for_each_tracing_cpu(i)
+ tracing_reset(max_tr.data[i]);
+
+ flip_trace(max_tr.data[cpu], data);
+ tracing_reset(data);
+
+ __update_max_tr(tr, tsk, cpu);
+ __raw_spin_unlock(&ftrace_max_lock);
+}
+
+/**
+ * register_tracer - register a tracer with the ftrace system.
+ * @type - the plugin for the tracer
+ *
+ * Register a new plugin tracer.
+ */
+int register_tracer(struct tracer *type)
+{
+ struct tracer *t;
+ int len;
+ int ret = 0;
+
+ if (!type->name) {
+ pr_info("Tracer must have a name\n");
+ return -1;
+ }
+
+ mutex_lock(&trace_types_lock);
+ for (t = trace_types; t; t = t->next) {
+ if (strcmp(type->name, t->name) == 0) {
+ /* already found */
+ pr_info("Trace %s already registered\n",
+ type->name);
+ ret = -1;
+ goto out;
+ }
+ }
+
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+ if (type->selftest) {
+ struct tracer *saved_tracer = current_trace;
+ struct trace_array_cpu *data;
+ struct trace_array *tr = &global_trace;
+ int saved_ctrl = tr->ctrl;
+ int i;
+ /*
+ * Run a selftest on this tracer.
+ * Here we reset the trace buffer, and set the current
+ * tracer to be this tracer. The tracer can then run some
+ * internal tracing to verify that everything is in order.
+ * If we fail, we do not register this tracer.
+ */
+ for_each_tracing_cpu(i) {
+ data = tr->data[i];
+ if (!head_page(data))
+ continue;
+ tracing_reset(data);
+ }
+ current_trace = type;
+ tr->ctrl = 0;
+ /* the test is responsible for initializing and enabling */
+ pr_info("Testing tracer %s: ", type->name);
+ ret = type->selftest(type, tr);
+ /* the test is responsible for resetting too */
+ current_trace = saved_tracer;
+ tr->ctrl = saved_ctrl;
+ if (ret) {
+ printk(KERN_CONT "FAILED!\n");
+ goto out;
+ }
+ /* Only reset on passing, to avoid touching corrupted buffers */
+ for_each_tracing_cpu(i) {
+ data = tr->data[i];
+ if (!head_page(data))
+ continue;
+ tracing_reset(data);
+ }
+ printk(KERN_CONT "PASSED\n");
+ }
+#endif
+
+ type->next = trace_types;
+ trace_types = type;
+ len = strlen(type->name);
+ if (len > max_tracer_type_len)
+ max_tracer_type_len = len;
+
+ out:
+ mutex_unlock(&trace_types_lock);
+
+ return ret;
+}
+
+void unregister_tracer(struct tracer *type)
+{
+ struct tracer **t;
+ int len;
+
+ mutex_lock(&trace_types_lock);
+ for (t = &trace_types; *t; t = &(*t)->next) {
+ if (*t == type)
+ goto found;
+ }
+ pr_info("Trace %s not registered\n", type->name);
+ goto out;
+
+ found:
+ *t = (*t)->next;
+ if (strlen(type->name) != max_tracer_type_len)
+ goto out;
+
+ max_tracer_type_len = 0;
+ for (t = &trace_types; *t; t = &(*t)->next) {
+ len = strlen((*t)->name);
+ if (len > max_tracer_type_len)
+ max_tracer_type_len = len;
+ }
+ out:
+ mutex_unlock(&trace_types_lock);
+}
+
+void tracing_reset(struct trace_array_cpu *data)
+{
+ data->trace_idx = 0;
+ data->overrun = 0;
+ data->trace_head = data->trace_tail = head_page(data);
+ data->trace_head_idx = 0;
+ data->trace_tail_idx = 0;
+}
+
+#define SAVED_CMDLINES 128
+static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
+static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
+static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
+static int cmdline_idx;
+static DEFINE_SPINLOCK(trace_cmdline_lock);
+
+/* temporary disable recording */
+atomic_t trace_record_cmdline_disabled __read_mostly;
+
+static void trace_init_cmdlines(void)
+{
+ memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
+ memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
+ cmdline_idx = 0;
+}
+
+void trace_stop_cmdline_recording(void);
+
+static void trace_save_cmdline(struct task_struct *tsk)
+{
+ unsigned map;
+ unsigned idx;
+
+ if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
+ return;
+
+ /*
+ * It's not the end of the world if we don't get
+ * the lock, but we also don't want to spin
+ * nor do we want to disable interrupts,
+ * so if we miss here, then better luck next time.
+ */
+ if (!spin_trylock(&trace_cmdline_lock))
+ return;
+
+ idx = map_pid_to_cmdline[tsk->pid];
+ if (idx >= SAVED_CMDLINES) {
+ idx = (cmdline_idx + 1) % SAVED_CMDLINES;
+
+ map = map_cmdline_to_pid[idx];
+ if (map <= PID_MAX_DEFAULT)
+ map_pid_to_cmdline[map] = (unsigned)-1;
+
+ map_pid_to_cmdline[tsk->pid] = idx;
+
+ cmdline_idx = idx;
+ }
+
+ memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
+
+ spin_unlock(&trace_cmdline_lock);
+}
+
+static char *trace_find_cmdline(int pid)
+{
+ char *cmdline = "<...>";
+ unsigned map;
+
+ if (!pid)
+ return "<idle>";
+
+ if (pid > PID_MAX_DEFAULT)
+ goto out;
+
+ map = map_pid_to_cmdline[pid];
+ if (map >= SAVED_CMDLINES)
+ goto out;
+
+ cmdline = saved_cmdlines[map];
+
+ out:
+ return cmdline;
+}
+
+void tracing_record_cmdline(struct task_struct *tsk)
+{
+ if (atomic_read(&trace_record_cmdline_disabled))
+ return;
+
+ trace_save_cmdline(tsk);
+}
+
+static inline struct list_head *
+trace_next_list(struct trace_array_cpu *data, struct list_head *next)
+{
+ /*
+ * Roundrobin - but skip the head (which is not a real page):
+ */
+ next = next->next;
+ if (unlikely(next == &data->trace_pages))
+ next = next->next;
+ BUG_ON(next == &data->trace_pages);
+
+ return next;
+}
+
+static inline void *
+trace_next_page(struct trace_array_cpu *data, void *addr)
+{
+ struct list_head *next;
+ struct page *page;
+
+ page = virt_to_page(addr);
+
+ next = trace_next_list(data, &page->lru);
+ page = list_entry(next, struct page, lru);
+
+ return page_address(page);
+}
+
+static inline struct trace_entry *
+tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
+{
+ unsigned long idx, idx_next;
+ struct trace_entry *entry;
+
+ data->trace_idx++;
+ idx = data->trace_head_idx;
+ idx_next = idx + 1;
+
+ BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
+
+ entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
+
+ if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
+ data->trace_head = trace_next_page(data, data->trace_head);
+ idx_next = 0;
+ }
+
+ if (data->trace_head == data->trace_tail &&
+ idx_next == data->trace_tail_idx) {
+ /* overrun */
+ data->overrun++;
+ data->trace_tail_idx++;
+ if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
+ data->trace_tail =
+ trace_next_page(data, data->trace_tail);
+ data->trace_tail_idx = 0;
+ }
+ }
+
+ data->trace_head_idx = idx_next;
+
+ return entry;
+}
+
+static inline void
+tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
+{
+ struct task_struct *tsk = current;
+ unsigned long pc;
+
+ pc = preempt_count();
+
+ entry->preempt_count = pc & 0xff;
+ entry->pid = (tsk) ? tsk->pid : 0;
+ entry->t = ftrace_now(raw_smp_processor_id());
+ entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
+ ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
+ ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
+ (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
+}
+
+void
+trace_function(struct trace_array *tr, struct trace_array_cpu *data,
+ unsigned long ip, unsigned long parent_ip, unsigned long flags)
+{
+ struct trace_entry *entry;
+ unsigned long irq_flags;
+
+ raw_local_irq_save(irq_flags);
+ __raw_spin_lock(&data->lock);
+ entry = tracing_get_trace_entry(tr, data);
+ tracing_generic_entry_update(entry, flags);
+ entry->type = TRACE_FN;
+ entry->fn.ip = ip;
+ entry->fn.parent_ip = parent_ip;
+ __raw_spin_unlock(&data->lock);
+ raw_local_irq_restore(irq_flags);
+}
+
+void
+ftrace(struct trace_array *tr, struct trace_array_cpu *data,
+ unsigned long ip, unsigned long parent_ip, unsigned long flags)
+{
+ if (likely(!atomic_read(&data->disabled)))
+ trace_function(tr, data, ip, parent_ip, flags);
+}
+
+void __trace_stack(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long flags,
+ int skip)
+{
+ struct trace_entry *entry;
+ struct stack_trace trace;
+
+ if (!(trace_flags & TRACE_ITER_STACKTRACE))
+ return;
+
+ entry = tracing_get_trace_entry(tr, data);
+ tracing_generic_entry_update(entry, flags);
+ entry->type = TRACE_STACK;
+
+ memset(&entry->stack, 0, sizeof(entry->stack));
+
+ trace.nr_entries = 0;
+ trace.max_entries = FTRACE_STACK_ENTRIES;
+ trace.skip = skip;
+ trace.entries = entry->stack.caller;
+
+ save_stack_trace(&trace);
+}
+
+void
+__trace_special(void *__tr, void *__data,
+ unsigned long arg1, unsigned long arg2, unsigned long arg3)
+{
+ struct trace_array_cpu *data = __data;
+ struct trace_array *tr = __tr;
+ struct trace_entry *entry;
+ unsigned long irq_flags;
+
+ raw_local_irq_save(irq_flags);
+ __raw_spin_lock(&data->lock);
+ entry = tracing_get_trace_entry(tr, data);
+ tracing_generic_entry_update(entry, 0);
+ entry->type = TRACE_SPECIAL;
+ entry->special.arg1 = arg1;
+ entry->special.arg2 = arg2;
+ entry->special.arg3 = arg3;
+ __trace_stack(tr, data, irq_flags, 4);
+ __raw_spin_unlock(&data->lock);
+ raw_local_irq_restore(irq_flags);
+
+ trace_wake_up();
+}
+
+void
+tracing_sched_switch_trace(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct task_struct *prev,
+ struct task_struct *next,
+ unsigned long flags)
+{
+ struct trace_entry *entry;
+ unsigned long irq_flags;
+
+ raw_local_irq_save(irq_flags);
+ __raw_spin_lock(&data->lock);
+ entry = tracing_get_trace_entry(tr, data);
+ tracing_generic_entry_update(entry, flags);
+ entry->type = TRACE_CTX;
+ entry->ctx.prev_pid = prev->pid;
+ entry->ctx.prev_prio = prev->prio;
+ entry->ctx.prev_state = prev->state;
+ entry->ctx.next_pid = next->pid;
+ entry->ctx.next_prio = next->prio;
+ entry->ctx.next_state = next->state;
+ __trace_stack(tr, data, flags, 5);
+ __raw_spin_unlock(&data->lock);
+ raw_local_irq_restore(irq_flags);
+}
+
+void
+tracing_sched_wakeup_trace(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct task_struct *wakee,
+ struct task_struct *curr,
+ unsigned long flags)
+{
+ struct trace_entry *entry;
+ unsigned long irq_flags;
+
+ raw_local_irq_save(irq_flags);
+ __raw_spin_lock(&data->lock);
+ entry = tracing_get_trace_entry(tr, data);
+ tracing_generic_entry_update(entry, flags);
+ entry->type = TRACE_WAKE;
+ entry->ctx.prev_pid = curr->pid;
+ entry->ctx.prev_prio = curr->prio;
+ entry->ctx.prev_state = curr->state;
+ entry->ctx.next_pid = wakee->pid;
+ entry->ctx.next_prio = wakee->prio;
+ entry->ctx.next_state = wakee->state;
+ __trace_stack(tr, data, flags, 6);
+ __raw_spin_unlock(&data->lock);
+ raw_local_irq_restore(irq_flags);
+
+ trace_wake_up();
+}
+
+void
+ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
+{
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+
+ if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl)
+ return;
+
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1))
+ __trace_special(tr, data, arg1, arg2, arg3);
+
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
+}
+
+#ifdef CONFIG_FTRACE
+static void
+function_trace_call(unsigned long ip, unsigned long parent_ip)
+{
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+
+ if (unlikely(!tracer_enabled))
+ return;
+
+ if (skip_trace(ip))
+ return;
+
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1))
+ trace_function(tr, data, ip, parent_ip, flags);
+
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
+}
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+ .func = function_trace_call,
+};
+
+void tracing_start_function_trace(void)
+{
+ register_ftrace_function(&trace_ops);
+}
+
+void tracing_stop_function_trace(void)
+{
+ unregister_ftrace_function(&trace_ops);
+}
+#endif
+
+enum trace_file_type {
+ TRACE_FILE_LAT_FMT = 1,
+};
+
+static struct trace_entry *
+trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
+ struct trace_iterator *iter, int cpu)
+{
+ struct page *page;
+ struct trace_entry *array;
+
+ if (iter->next_idx[cpu] >= tr->entries ||
+ iter->next_idx[cpu] >= data->trace_idx ||
+ (data->trace_head == data->trace_tail &&
+ data->trace_head_idx == data->trace_tail_idx))
+ return NULL;
+
+ if (!iter->next_page[cpu]) {
+ /* Initialize the iterator for this cpu trace buffer */
+ WARN_ON(!data->trace_tail);
+ page = virt_to_page(data->trace_tail);
+ iter->next_page[cpu] = &page->lru;
+ iter->next_page_idx[cpu] = data->trace_tail_idx;
+ }
+
+ page = list_entry(iter->next_page[cpu], struct page, lru);
+ BUG_ON(&data->trace_pages == &page->lru);
+
+ array = page_address(page);
+
+ WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
+ return &array[iter->next_page_idx[cpu]];
+}
+
+static struct trace_entry *
+find_next_entry(struct trace_iterator *iter, int *ent_cpu)
+{
+ struct trace_array *tr = iter->tr;
+ struct trace_entry *ent, *next = NULL;
+ int next_cpu = -1;
+ int cpu;
+
+ for_each_tracing_cpu(cpu) {
+ if (!head_page(tr->data[cpu]))
+ continue;
+ ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
+ /*
+ * Pick the entry with the smallest timestamp:
+ */
+ if (ent && (!next || ent->t < next->t)) {
+ next = ent;
+ next_cpu = cpu;
+ }
+ }
+
+ if (ent_cpu)
+ *ent_cpu = next_cpu;
+
+ return next;
+}
+
+static void trace_iterator_increment(struct trace_iterator *iter)
+{
+ iter->idx++;
+ iter->next_idx[iter->cpu]++;
+ iter->next_page_idx[iter->cpu]++;
+
+ if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
+ struct trace_array_cpu *data = iter->tr->data[iter->cpu];
+
+ iter->next_page_idx[iter->cpu] = 0;
+ iter->next_page[iter->cpu] =
+ trace_next_list(data, iter->next_page[iter->cpu]);
+ }
+}
+
+static void trace_consume(struct trace_iterator *iter)
+{
+ struct trace_array_cpu *data = iter->tr->data[iter->cpu];
+
+ data->trace_tail_idx++;
+ if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
+ data->trace_tail = trace_next_page(data, data->trace_tail);
+ data->trace_tail_idx = 0;
+ }
+
+ /* Check if we empty it, then reset the index */
+ if (data->trace_head == data->trace_tail &&
+ data->trace_head_idx == data->trace_tail_idx)
+ data->trace_idx = 0;
+}
+
+static void *find_next_entry_inc(struct trace_iterator *iter)
+{
+ struct trace_entry *next;
+ int next_cpu = -1;
+
+ next = find_next_entry(iter, &next_cpu);
+
+ iter->prev_ent = iter->ent;
+ iter->prev_cpu = iter->cpu;
+
+ iter->ent = next;
+ iter->cpu = next_cpu;
+
+ if (next)
+ trace_iterator_increment(iter);
+
+ return next ? iter : NULL;
+}
+
+static void *s_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct trace_iterator *iter = m->private;
+ void *last_ent = iter->ent;
+ int i = (int)*pos;
+ void *ent;
+
+ (*pos)++;
+
+ /* can't go backwards */
+ if (iter->idx > i)
+ return NULL;
+
+ if (iter->idx < 0)
+ ent = find_next_entry_inc(iter);
+ else
+ ent = iter;
+
+ while (ent && iter->idx < i)
+ ent = find_next_entry_inc(iter);
+
+ iter->pos = *pos;
+
+ if (last_ent && !ent)
+ seq_puts(m, "\n\nvim:ft=help\n");
+
+ return ent;
+}
+
+static void *s_start(struct seq_file *m, loff_t *pos)
+{
+ struct trace_iterator *iter = m->private;
+ void *p = NULL;
+ loff_t l = 0;
+ int i;
+
+ mutex_lock(&trace_types_lock);
+
+ if (!current_trace || current_trace != iter->trace) {
+ mutex_unlock(&trace_types_lock);
+ return NULL;
+ }
+
+ atomic_inc(&trace_record_cmdline_disabled);
+
+ /* let the tracer grab locks here if needed */
+ if (current_trace->start)
+ current_trace->start(iter);
+
+ if (*pos != iter->pos) {
+ iter->ent = NULL;
+ iter->cpu = 0;
+ iter->idx = -1;
+ iter->prev_ent = NULL;
+ iter->prev_cpu = -1;
+
+ for_each_tracing_cpu(i) {
+ iter->next_idx[i] = 0;
+ iter->next_page[i] = NULL;
+ }
+
+ for (p = iter; p && l < *pos; p = s_next(m, p, &l))
+ ;
+
+ } else {
+ l = *pos - 1;
+ p = s_next(m, p, &l);
+ }
+
+ return p;
+}
+
+static void s_stop(struct seq_file *m, void *p)
+{
+ struct trace_iterator *iter = m->private;
+
+ atomic_dec(&trace_record_cmdline_disabled);
+
+ /* let the tracer release locks here if needed */
+ if (current_trace && current_trace == iter->trace && iter->trace->stop)
+ iter->trace->stop(iter);
+
+ mutex_unlock(&trace_types_lock);
+}
+
+#define KRETPROBE_MSG "[unknown/kretprobe'd]"
+
+#ifdef CONFIG_KRETPROBES
+static inline int kretprobed(unsigned long addr)
+{
+ return addr == (unsigned long)kretprobe_trampoline;
+}
+#else
+static inline int kretprobed(unsigned long addr)
+{
+ return 0;
+}
+#endif /* CONFIG_KRETPROBES */
+
+static int
+seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
+{
+#ifdef CONFIG_KALLSYMS
+ char str[KSYM_SYMBOL_LEN];
+
+ kallsyms_lookup(address, NULL, NULL, NULL, str);
+
+ return trace_seq_printf(s, fmt, str);
+#endif
+ return 1;
+}
+
+static int
+seq_print_sym_offset(struct trace_seq *s, const char *fmt,
+ unsigned long address)
+{
+#ifdef CONFIG_KALLSYMS
+ char str[KSYM_SYMBOL_LEN];
+
+ sprint_symbol(str, address);
+ return trace_seq_printf(s, fmt, str);
+#endif
+ return 1;
+}
+
+#ifndef CONFIG_64BIT
+# define IP_FMT "%08lx"
+#else
+# define IP_FMT "%016lx"
+#endif
+
+static int
+seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
+{
+ int ret;
+
+ if (!ip)
+ return trace_seq_printf(s, "0");
+
+ if (sym_flags & TRACE_ITER_SYM_OFFSET)
+ ret = seq_print_sym_offset(s, "%s", ip);
+ else
+ ret = seq_print_sym_short(s, "%s", ip);
+
+ if (!ret)
+ return 0;
+
+ if (sym_flags & TRACE_ITER_SYM_ADDR)
+ ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+ return ret;
+}
+
+static void print_lat_help_header(struct seq_file *m)
+{
+ seq_puts(m, "# _------=> CPU# \n");
+ seq_puts(m, "# / _-----=> irqs-off \n");
+ seq_puts(m, "# | / _----=> need-resched \n");
+ seq_puts(m, "# || / _---=> hardirq/softirq \n");
+ seq_puts(m, "# ||| / _--=> preempt-depth \n");
+ seq_puts(m, "# |||| / \n");
+ seq_puts(m, "# ||||| delay \n");
+ seq_puts(m, "# cmd pid ||||| time | caller \n");
+ seq_puts(m, "# \\ / ||||| \\ | / \n");
+}
+
+static void print_func_help_header(struct seq_file *m)
+{
+ seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
+ seq_puts(m, "# | | | | |\n");
+}
+
+
+static void
+print_trace_header(struct seq_file *m, struct trace_iterator *iter)
+{
+ unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
+ struct trace_array *tr = iter->tr;
+ struct trace_array_cpu *data = tr->data[tr->cpu];
+ struct tracer *type = current_trace;
+ unsigned long total = 0;
+ unsigned long entries = 0;
+ int cpu;
+ const char *name = "preemption";
+
+ if (type)
+ name = type->name;
+
+ for_each_tracing_cpu(cpu) {
+ if (head_page(tr->data[cpu])) {
+ total += tr->data[cpu]->trace_idx;
+ if (tr->data[cpu]->trace_idx > tr->entries)
+ entries += tr->entries;
+ else
+ entries += tr->data[cpu]->trace_idx;
+ }
+ }
+
+ seq_printf(m, "%s latency trace v1.1.5 on %s\n",
+ name, UTS_RELEASE);
+ seq_puts(m, "-----------------------------------"
+ "---------------------------------\n");
+ seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
+ " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
+ nsecs_to_usecs(data->saved_latency),
+ entries,
+ total,
+ tr->cpu,
+#if defined(CONFIG_PREEMPT_NONE)
+ "server",
+#elif defined(CONFIG_PREEMPT_VOLUNTARY)
+ "desktop",
+#elif defined(CONFIG_PREEMPT_DESKTOP)
+ "preempt",
+#else
+ "unknown",
+#endif
+ /* These are reserved for later use */
+ 0, 0, 0, 0);
+#ifdef CONFIG_SMP
+ seq_printf(m, " #P:%d)\n", num_online_cpus());
+#else
+ seq_puts(m, ")\n");
+#endif
+ seq_puts(m, " -----------------\n");
+ seq_printf(m, " | task: %.16s-%d "
+ "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
+ data->comm, data->pid, data->uid, data->nice,
+ data->policy, data->rt_priority);
+ seq_puts(m, " -----------------\n");
+
+ if (data->critical_start) {
+ seq_puts(m, " => started at: ");
+ seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
+ trace_print_seq(m, &iter->seq);
+ seq_puts(m, "\n => ended at: ");
+ seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
+ trace_print_seq(m, &iter->seq);
+ seq_puts(m, "\n");
+ }
+
+ seq_puts(m, "\n");
+}
+
+static void
+lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
+{
+ int hardirq, softirq;
+ char *comm;
+
+ comm = trace_find_cmdline(entry->pid);
+
+ trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
+ trace_seq_printf(s, "%d", cpu);
+ trace_seq_printf(s, "%c%c",
+ (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
+ ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
+
+ hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
+ softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
+ if (hardirq && softirq) {
+ trace_seq_putc(s, 'H');
+ } else {
+ if (hardirq) {
+ trace_seq_putc(s, 'h');
+ } else {
+ if (softirq)
+ trace_seq_putc(s, 's');
+ else
+ trace_seq_putc(s, '.');
+ }
+ }
+
+ if (entry->preempt_count)
+ trace_seq_printf(s, "%x", entry->preempt_count);
+ else
+ trace_seq_puts(s, ".");
+}
+
+unsigned long preempt_mark_thresh = 100;
+
+static void
+lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
+ unsigned long rel_usecs)
+{
+ trace_seq_printf(s, " %4lldus", abs_usecs);
+ if (rel_usecs > preempt_mark_thresh)
+ trace_seq_puts(s, "!: ");
+ else if (rel_usecs > 1)
+ trace_seq_puts(s, "+: ");
+ else
+ trace_seq_puts(s, " : ");
+}
+
+static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
+
+static int
+print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
+{
+ struct trace_seq *s = &iter->seq;
+ unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
+ struct trace_entry *next_entry = find_next_entry(iter, NULL);
+ unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
+ struct trace_entry *entry = iter->ent;
+ unsigned long abs_usecs;
+ unsigned long rel_usecs;
+ char *comm;
+ int S, T;
+ int i;
+ unsigned state;
+
+ if (!next_entry)
+ next_entry = entry;
+ rel_usecs = ns2usecs(next_entry->t - entry->t);
+ abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
+
+ if (verbose) {
+ comm = trace_find_cmdline(entry->pid);
+ trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
+ " %ld.%03ldms (+%ld.%03ldms): ",
+ comm,
+ entry->pid, cpu, entry->flags,
+ entry->preempt_count, trace_idx,
+ ns2usecs(entry->t),
+ abs_usecs/1000,
+ abs_usecs % 1000, rel_usecs/1000,
+ rel_usecs % 1000);
+ } else {
+ lat_print_generic(s, entry, cpu);
+ lat_print_timestamp(s, abs_usecs, rel_usecs);
+ }
+ switch (entry->type) {
+ case TRACE_FN:
+ seq_print_ip_sym(s, entry->fn.ip, sym_flags);
+ trace_seq_puts(s, " (");
+ if (kretprobed(entry->fn.parent_ip))
+ trace_seq_puts(s, KRETPROBE_MSG);
+ else
+ seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
+ trace_seq_puts(s, ")\n");
+ break;
+ case TRACE_CTX:
+ case TRACE_WAKE:
+ T = entry->ctx.next_state < sizeof(state_to_char) ?
+ state_to_char[entry->ctx.next_state] : 'X';
+
+ state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0;
+ S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
+ comm = trace_find_cmdline(entry->ctx.next_pid);
+ trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
+ entry->ctx.prev_pid,
+ entry->ctx.prev_prio,
+ S, entry->type == TRACE_CTX ? "==>" : " +",
+ entry->ctx.next_pid,
+ entry->ctx.next_prio,
+ T, comm);
+ break;
+ case TRACE_SPECIAL:
+ trace_seq_printf(s, "# %ld %ld %ld\n",
+ entry->special.arg1,
+ entry->special.arg2,
+ entry->special.arg3);
+ break;
+ case TRACE_STACK:
+ for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+ if (i)
+ trace_seq_puts(s, " <= ");
+ seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
+ }
+ trace_seq_puts(s, "\n");
+ break;
+ default:
+ trace_seq_printf(s, "Unknown type %d\n", entry->type);
+ }
+ return 1;
+}
+
+static int print_trace_fmt(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
+ struct trace_entry *entry;
+ unsigned long usec_rem;
+ unsigned long long t;
+ unsigned long secs;
+ char *comm;
+ int ret;
+ int S, T;
+ int i;
+
+ entry = iter->ent;
+
+ comm = trace_find_cmdline(iter->ent->pid);
+
+ t = ns2usecs(entry->t);
+ usec_rem = do_div(t, 1000000ULL);
+ secs = (unsigned long)t;
+
+ ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
+ if (!ret)
+ return 0;
+ ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
+ if (!ret)
+ return 0;
+ ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
+ if (!ret)
+ return 0;
+
+ switch (entry->type) {
+ case TRACE_FN:
+ ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
+ if (!ret)
+ return 0;
+ if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
+ entry->fn.parent_ip) {
+ ret = trace_seq_printf(s, " <-");
+ if (!ret)
+ return 0;
+ if (kretprobed(entry->fn.parent_ip))
+ ret = trace_seq_puts(s, KRETPROBE_MSG);
+ else
+ ret = seq_print_ip_sym(s, entry->fn.parent_ip,
+ sym_flags);
+ if (!ret)
+ return 0;
+ }
+ ret = trace_seq_printf(s, "\n");
+ if (!ret)
+ return 0;
+ break;
+ case TRACE_CTX:
+ case TRACE_WAKE:
+ S = entry->ctx.prev_state < sizeof(state_to_char) ?
+ state_to_char[entry->ctx.prev_state] : 'X';
+ T = entry->ctx.next_state < sizeof(state_to_char) ?
+ state_to_char[entry->ctx.next_state] : 'X';
+ ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
+ entry->ctx.prev_pid,
+ entry->ctx.prev_prio,
+ S,
+ entry->type == TRACE_CTX ? "==>" : " +",
+ entry->ctx.next_pid,
+ entry->ctx.next_prio,
+ T);
+ if (!ret)
+ return 0;
+ break;
+ case TRACE_SPECIAL:
+ ret = trace_seq_printf(s, "# %ld %ld %ld\n",
+ entry->special.arg1,
+ entry->special.arg2,
+ entry->special.arg3);
+ if (!ret)
+ return 0;
+ break;
+ case TRACE_STACK:
+ for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+ if (i) {
+ ret = trace_seq_puts(s, " <= ");
+ if (!ret)
+ return 0;
+ }
+ ret = seq_print_ip_sym(s, entry->stack.caller[i],
+ sym_flags);
+ if (!ret)
+ return 0;
+ }
+ ret = trace_seq_puts(s, "\n");
+ if (!ret)
+ return 0;
+ break;
+ }
+ return 1;
+}
+
+static int print_raw_fmt(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *entry;
+ int ret;
+ int S, T;
+
+ entry = iter->ent;
+
+ ret = trace_seq_printf(s, "%d %d %llu ",
+ entry->pid, iter->cpu, entry->t);
+ if (!ret)
+ return 0;
+
+ switch (entry->type) {
+ case TRACE_FN:
+ ret = trace_seq_printf(s, "%x %x\n",
+ entry->fn.ip, entry->fn.parent_ip);
+ if (!ret)
+ return 0;
+ break;
+ case TRACE_CTX:
+ case TRACE_WAKE:
+ S = entry->ctx.prev_state < sizeof(state_to_char) ?
+ state_to_char[entry->ctx.prev_state] : 'X';
+ T = entry->ctx.next_state < sizeof(state_to_char) ?
+ state_to_char[entry->ctx.next_state] : 'X';
+ if (entry->type == TRACE_WAKE)
+ S = '+';
+ ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
+ entry->ctx.prev_pid,
+ entry->ctx.prev_prio,
+ S,
+ entry->ctx.next_pid,
+ entry->ctx.next_prio,
+ T);
+ if (!ret)
+ return 0;
+ break;
+ case TRACE_SPECIAL:
+ case TRACE_STACK:
+ ret = trace_seq_printf(s, "# %ld %ld %ld\n",
+ entry->special.arg1,
+ entry->special.arg2,
+ entry->special.arg3);
+ if (!ret)
+ return 0;
+ break;
+ }
+ return 1;
+}
+
+#define SEQ_PUT_FIELD_RET(s, x) \
+do { \
+ if (!trace_seq_putmem(s, &(x), sizeof(x))) \
+ return 0; \
+} while (0)
+
+#define SEQ_PUT_HEX_FIELD_RET(s, x) \
+do { \
+ if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
+ return 0; \
+} while (0)
+
+static int print_hex_fmt(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ unsigned char newline = '\n';
+ struct trace_entry *entry;
+ int S, T;
+
+ entry = iter->ent;
+
+ SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
+ SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
+ SEQ_PUT_HEX_FIELD_RET(s, entry->t);
+
+ switch (entry->type) {
+ case TRACE_FN:
+ SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
+ SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
+ break;
+ case TRACE_CTX:
+ case TRACE_WAKE:
+ S = entry->ctx.prev_state < sizeof(state_to_char) ?
+ state_to_char[entry->ctx.prev_state] : 'X';
+ T = entry->ctx.next_state < sizeof(state_to_char) ?
+ state_to_char[entry->ctx.next_state] : 'X';
+ if (entry->type == TRACE_WAKE)
+ S = '+';
+ SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
+ SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
+ SEQ_PUT_HEX_FIELD_RET(s, S);
+ SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
+ SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
+ SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
+ SEQ_PUT_HEX_FIELD_RET(s, T);
+ break;
+ case TRACE_SPECIAL:
+ case TRACE_STACK:
+ SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
+ SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
+ SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
+ break;
+ }
+ SEQ_PUT_FIELD_RET(s, newline);
+
+ return 1;
+}
+
+static int print_bin_fmt(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *entry;
+
+ entry = iter->ent;
+
+ SEQ_PUT_FIELD_RET(s, entry->pid);
+ SEQ_PUT_FIELD_RET(s, entry->cpu);
+ SEQ_PUT_FIELD_RET(s, entry->t);
+
+ switch (entry->type) {
+ case TRACE_FN:
+ SEQ_PUT_FIELD_RET(s, entry->fn.ip);
+ SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
+ break;
+ case TRACE_CTX:
+ SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
+ SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
+ SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
+ SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
+ SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
+ SEQ_PUT_FIELD_RET(s, entry->ctx.next_state);
+ break;
+ case TRACE_SPECIAL:
+ case TRACE_STACK:
+ SEQ_PUT_FIELD_RET(s, entry->special.arg1);
+ SEQ_PUT_FIELD_RET(s, entry->special.arg2);
+ SEQ_PUT_FIELD_RET(s, entry->special.arg3);
+ break;
+ }
+ return 1;
+}
+
+static int trace_empty(struct trace_iterator *iter)
+{
+ struct trace_array_cpu *data;
+ int cpu;
+
+ for_each_tracing_cpu(cpu) {
+ data = iter->tr->data[cpu];
+
+ if (head_page(data) && data->trace_idx &&
+ (data->trace_tail != data->trace_head ||
+ data->trace_tail_idx != data->trace_head_idx))
+ return 0;
+ }
+ return 1;
+}
+
+static int print_trace_line(struct trace_iterator *iter)
+{
+ if (iter->trace && iter->trace->print_line)
+ return iter->trace->print_line(iter);
+
+ if (trace_flags & TRACE_ITER_BIN)
+ return print_bin_fmt(iter);
+
+ if (trace_flags & TRACE_ITER_HEX)
+ return print_hex_fmt(iter);
+
+ if (trace_flags & TRACE_ITER_RAW)
+ return print_raw_fmt(iter);
+
+ if (iter->iter_flags & TRACE_FILE_LAT_FMT)
+ return print_lat_fmt(iter, iter->idx, iter->cpu);
+
+ return print_trace_fmt(iter);
+}
+
+static int s_show(struct seq_file *m, void *v)
+{
+ struct trace_iterator *iter = v;
+
+ if (iter->ent == NULL) {
+ if (iter->tr) {
+ seq_printf(m, "# tracer: %s\n", iter->trace->name);
+ seq_puts(m, "#\n");
+ }
+ if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
+ /* print nothing if the buffers are empty */
+ if (trace_empty(iter))
+ return 0;
+ print_trace_header(m, iter);
+ if (!(trace_flags & TRACE_ITER_VERBOSE))
+ print_lat_help_header(m);
+ } else {
+ if (!(trace_flags & TRACE_ITER_VERBOSE))
+ print_func_help_header(m);
+ }
+ } else {
+ print_trace_line(iter);
+ trace_print_seq(m, &iter->seq);
+ }
+
+ return 0;
+}
+
+static struct seq_operations tracer_seq_ops = {
+ .start = s_start,
+ .next = s_next,
+ .stop = s_stop,
+ .show = s_show,
+};
+
+static struct trace_iterator *
+__tracing_open(struct inode *inode, struct file *file, int *ret)
+{
+ struct trace_iterator *iter;
+
+ if (tracing_disabled) {
+ *ret = -ENODEV;
+ return NULL;
+ }
+
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter) {
+ *ret = -ENOMEM;
+ goto out;
+ }
+
+ mutex_lock(&trace_types_lock);
+ if (current_trace && current_trace->print_max)
+ iter->tr = &max_tr;
+ else
+ iter->tr = inode->i_private;
+ iter->trace = current_trace;
+ iter->pos = -1;
+
+ /* TODO stop tracer */
+ *ret = seq_open(file, &tracer_seq_ops);
+ if (!*ret) {
+ struct seq_file *m = file->private_data;
+ m->private = iter;
+
+ /* stop the trace while dumping */
+ if (iter->tr->ctrl)
+ tracer_enabled = 0;
+
+ if (iter->trace && iter->trace->open)
+ iter->trace->open(iter);
+ } else {
+ kfree(iter);
+ iter = NULL;
+ }
+ mutex_unlock(&trace_types_lock);
+
+ out:
+ return iter;
+}
+
+int tracing_open_generic(struct inode *inode, struct file *filp)
+{
+ if (tracing_disabled)
+ return -ENODEV;
+
+ filp->private_data = inode->i_private;
+ return 0;
+}
+
+int tracing_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = (struct seq_file *)file->private_data;
+ struct trace_iterator *iter = m->private;
+
+ mutex_lock(&trace_types_lock);
+ if (iter->trace && iter->trace->close)
+ iter->trace->close(iter);
+
+ /* reenable tracing if it was previously enabled */
+ if (iter->tr->ctrl)
+ tracer_enabled = 1;
+ mutex_unlock(&trace_types_lock);
+
+ seq_release(inode, file);
+ kfree(iter);
+ return 0;
+}
+
+static int tracing_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ __tracing_open(inode, file, &ret);
+
+ return ret;
+}
+
+static int tracing_lt_open(struct inode *inode, struct file *file)
+{
+ struct trace_iterator *iter;
+ int ret;
+
+ iter = __tracing_open(inode, file, &ret);
+
+ if (!ret)
+ iter->iter_flags |= TRACE_FILE_LAT_FMT;
+
+ return ret;
+}
+
+
+static void *
+t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct tracer *t = m->private;
+
+ (*pos)++;
+
+ if (t)
+ t = t->next;
+
+ m->private = t;
+
+ return t;
+}
+
+static void *t_start(struct seq_file *m, loff_t *pos)
+{
+ struct tracer *t = m->private;
+ loff_t l = 0;
+
+ mutex_lock(&trace_types_lock);
+ for (; t && l < *pos; t = t_next(m, t, &l))
+ ;
+
+ return t;
+}
+
+static void t_stop(struct seq_file *m, void *p)
+{
+ mutex_unlock(&trace_types_lock);
+}
+
+static int t_show(struct seq_file *m, void *v)
+{
+ struct tracer *t = v;
+
+ if (!t)
+ return 0;
+
+ seq_printf(m, "%s", t->name);
+ if (t->next)
+ seq_putc(m, ' ');
+ else
+ seq_putc(m, '\n');
+
+ return 0;
+}
+
+static struct seq_operations show_traces_seq_ops = {
+ .start = t_start,
+ .next = t_next,
+ .stop = t_stop,
+ .show = t_show,
+};
+
+static int show_traces_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ if (tracing_disabled)
+ return -ENODEV;
+
+ ret = seq_open(file, &show_traces_seq_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = trace_types;
+ }
+
+ return ret;
+}
+
+static struct file_operations tracing_fops = {
+ .open = tracing_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = tracing_release,
+};
+
+static struct file_operations tracing_lt_fops = {
+ .open = tracing_lt_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = tracing_release,
+};
+
+static struct file_operations show_traces_fops = {
+ .open = show_traces_open,
+ .read = seq_read,
+ .release = seq_release,
+};
+
+/*
+ * Only trace on a CPU if the bitmask is set:
+ */
+static cpumask_t tracing_cpumask = CPU_MASK_ALL;
+
+/*
+ * When tracing/tracing_cpu_mask is modified then this holds
+ * the new bitmask we are about to install:
+ */
+static cpumask_t tracing_cpumask_new;
+
+/*
+ * The tracer itself will not take this lock, but still we want
+ * to provide a consistent cpumask to user-space:
+ */
+static DEFINE_MUTEX(tracing_cpumask_update_lock);
+
+/*
+ * Temporary storage for the character representation of the
+ * CPU bitmask (and one more byte for the newline):
+ */
+static char mask_str[NR_CPUS + 1];
+
+static ssize_t
+tracing_cpumask_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ int len;
+
+ mutex_lock(&tracing_cpumask_update_lock);
+
+ len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
+ if (count - len < 2) {
+ count = -EINVAL;
+ goto out_err;
+ }
+ len += sprintf(mask_str + len, "\n");
+ count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
+
+out_err:
+ mutex_unlock(&tracing_cpumask_update_lock);
+
+ return count;
+}
+
+static ssize_t
+tracing_cpumask_write(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ int err, cpu;
+
+ mutex_lock(&tracing_cpumask_update_lock);
+ err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
+ if (err)
+ goto err_unlock;
+
+ raw_local_irq_disable();
+ __raw_spin_lock(&ftrace_max_lock);
+ for_each_tracing_cpu(cpu) {
+ /*
+ * Increase/decrease the disabled counter if we are
+ * about to flip a bit in the cpumask:
+ */
+ if (cpu_isset(cpu, tracing_cpumask) &&
+ !cpu_isset(cpu, tracing_cpumask_new)) {
+ atomic_inc(&global_trace.data[cpu]->disabled);
+ }
+ if (!cpu_isset(cpu, tracing_cpumask) &&
+ cpu_isset(cpu, tracing_cpumask_new)) {
+ atomic_dec(&global_trace.data[cpu]->disabled);
+ }
+ }
+ __raw_spin_unlock(&ftrace_max_lock);
+ raw_local_irq_enable();
+
+ tracing_cpumask = tracing_cpumask_new;
+
+ mutex_unlock(&tracing_cpumask_update_lock);
+
+ return count;
+
+err_unlock:
+ mutex_unlock(&tracing_cpumask_update_lock);
+
+ return err;
+}
+
+static struct file_operations tracing_cpumask_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_cpumask_read,
+ .write = tracing_cpumask_write,
+};
+
+static ssize_t
+tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char *buf;
+ int r = 0;
+ int len = 0;
+ int i;
+
+ /* calulate max size */
+ for (i = 0; trace_options[i]; i++) {
+ len += strlen(trace_options[i]);
+ len += 3; /* "no" and space */
+ }
+
+ /* +2 for \n and \0 */
+ buf = kmalloc(len + 2, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ for (i = 0; trace_options[i]; i++) {
+ if (trace_flags & (1 << i))
+ r += sprintf(buf + r, "%s ", trace_options[i]);
+ else
+ r += sprintf(buf + r, "no%s ", trace_options[i]);
+ }
+
+ r += sprintf(buf + r, "\n");
+ WARN_ON(r >= len + 2);
+
+ r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+
+ kfree(buf);
+
+ return r;
+}
+
+static ssize_t
+tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[64];
+ char *cmp = buf;
+ int neg = 0;
+ int i;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ if (strncmp(buf, "no", 2) == 0) {
+ neg = 1;
+ cmp += 2;
+ }
+
+ for (i = 0; trace_options[i]; i++) {
+ int len = strlen(trace_options[i]);
+
+ if (strncmp(cmp, trace_options[i], len) == 0) {
+ if (neg)
+ trace_flags &= ~(1 << i);
+ else
+ trace_flags |= (1 << i);
+ break;
+ }
+ }
+ /*
+ * If no option could be set, return an error:
+ */
+ if (!trace_options[i])
+ return -EINVAL;
+
+ filp->f_pos += cnt;
+
+ return cnt;
+}
+
+static struct file_operations tracing_iter_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_iter_ctrl_read,
+ .write = tracing_iter_ctrl_write,
+};
+
+static const char readme_msg[] =
+ "tracing mini-HOWTO:\n\n"
+ "# mkdir /debug\n"
+ "# mount -t debugfs nodev /debug\n\n"
+ "# cat /debug/tracing/available_tracers\n"
+ "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
+ "# cat /debug/tracing/current_tracer\n"
+ "none\n"
+ "# echo sched_switch > /debug/tracing/current_tracer\n"
+ "# cat /debug/tracing/current_tracer\n"
+ "sched_switch\n"
+ "# cat /debug/tracing/iter_ctrl\n"
+ "noprint-parent nosym-offset nosym-addr noverbose\n"
+ "# echo print-parent > /debug/tracing/iter_ctrl\n"
+ "# echo 1 > /debug/tracing/tracing_enabled\n"
+ "# cat /debug/tracing/trace > /tmp/trace.txt\n"
+ "echo 0 > /debug/tracing/tracing_enabled\n"
+;
+
+static ssize_t
+tracing_readme_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ return simple_read_from_buffer(ubuf, cnt, ppos,
+ readme_msg, strlen(readme_msg));
+}
+
+static struct file_operations tracing_readme_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_readme_read,
+};
+
+static ssize_t
+tracing_ctrl_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = filp->private_data;
+ char buf[64];
+ int r;
+
+ r = sprintf(buf, "%ld\n", tr->ctrl);
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+tracing_ctrl_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = filp->private_data;
+ char buf[64];
+ long val;
+ int ret;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ val = !!val;
+
+ mutex_lock(&trace_types_lock);
+ if (tr->ctrl ^ val) {
+ if (val)
+ tracer_enabled = 1;
+ else
+ tracer_enabled = 0;
+
+ tr->ctrl = val;
+
+ if (current_trace && current_trace->ctrl_update)
+ current_trace->ctrl_update(tr);
+ }
+ mutex_unlock(&trace_types_lock);
+
+ filp->f_pos += cnt;
+
+ return cnt;
+}
+
+static ssize_t
+tracing_set_trace_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[max_tracer_type_len+2];
+ int r;
+
+ mutex_lock(&trace_types_lock);
+ if (current_trace)
+ r = sprintf(buf, "%s\n", current_trace->name);
+ else
+ r = sprintf(buf, "\n");
+ mutex_unlock(&trace_types_lock);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+tracing_set_trace_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = &global_trace;
+ struct tracer *t;
+ char buf[max_tracer_type_len+1];
+ int i;
+
+ if (cnt > max_tracer_type_len)
+ cnt = max_tracer_type_len;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ /* strip ending whitespace. */
+ for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
+ buf[i] = 0;
+
+ mutex_lock(&trace_types_lock);
+ for (t = trace_types; t; t = t->next) {
+ if (strcmp(t->name, buf) == 0)
+ break;
+ }
+ if (!t || t == current_trace)
+ goto out;
+
+ if (current_trace && current_trace->reset)
+ current_trace->reset(tr);
+
+ current_trace = t;
+ if (t->init)
+ t->init(tr);
+
+ out:
+ mutex_unlock(&trace_types_lock);
+
+ filp->f_pos += cnt;
+
+ return cnt;
+}
+
+static ssize_t
+tracing_max_lat_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ unsigned long *ptr = filp->private_data;
+ char buf[64];
+ int r;
+
+ r = snprintf(buf, sizeof(buf), "%ld\n",
+ *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
+ if (r > sizeof(buf))
+ r = sizeof(buf);
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+tracing_max_lat_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ long *ptr = filp->private_data;
+ char buf[64];
+ long val;
+ int ret;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ *ptr = val * 1000;
+
+ return cnt;
+}
+
+static atomic_t tracing_reader;
+
+static int tracing_open_pipe(struct inode *inode, struct file *filp)
+{
+ struct trace_iterator *iter;
+
+ if (tracing_disabled)
+ return -ENODEV;
+
+ /* We only allow for reader of the pipe */
+ if (atomic_inc_return(&tracing_reader) != 1) {
+ atomic_dec(&tracing_reader);
+ return -EBUSY;
+ }
+
+ /* create a buffer to store the information to pass to userspace */
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ mutex_lock(&trace_types_lock);
+ iter->tr = &global_trace;
+ iter->trace = current_trace;
+ filp->private_data = iter;
+
+ if (iter->trace->pipe_open)
+ iter->trace->pipe_open(iter);
+ mutex_unlock(&trace_types_lock);
+
+ return 0;
+}
+
+static int tracing_release_pipe(struct inode *inode, struct file *file)
+{
+ struct trace_iterator *iter = file->private_data;
+
+ kfree(iter);
+ atomic_dec(&tracing_reader);
+
+ return 0;
+}
+
+static unsigned int
+tracing_poll_pipe(struct file *filp, poll_table *poll_table)
+{
+ struct trace_iterator *iter = filp->private_data;
+
+ if (trace_flags & TRACE_ITER_BLOCK) {
+ /*
+ * Always select as readable when in blocking mode
+ */
+ return POLLIN | POLLRDNORM;
+ } else {
+ if (!trace_empty(iter))
+ return POLLIN | POLLRDNORM;
+ poll_wait(filp, &trace_wait, poll_table);
+ if (!trace_empty(iter))
+ return POLLIN | POLLRDNORM;
+
+ return 0;
+ }
+}
+
+/*
+ * Consumer reader.
+ */
+static ssize_t
+tracing_read_pipe(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_iterator *iter = filp->private_data;
+ struct trace_array_cpu *data;
+ static cpumask_t mask;
+ unsigned long flags;
+#ifdef CONFIG_FTRACE
+ int ftrace_save;
+#endif
+ int cpu;
+ ssize_t sret;
+
+ /* return any leftover data */
+ sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+ if (sret != -EBUSY)
+ return sret;
+ sret = 0;
+
+ trace_seq_reset(&iter->seq);
+
+ mutex_lock(&trace_types_lock);
+ if (iter->trace->read) {
+ sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
+ if (sret)
+ goto out;
+ }
+
+ while (trace_empty(iter)) {
+
+ if ((filp->f_flags & O_NONBLOCK)) {
+ sret = -EAGAIN;
+ goto out;
+ }
+
+ /*
+ * This is a make-shift waitqueue. The reason we don't use
+ * an actual wait queue is because:
+ * 1) we only ever have one waiter
+ * 2) the tracing, traces all functions, we don't want
+ * the overhead of calling wake_up and friends
+ * (and tracing them too)
+ * Anyway, this is really very primitive wakeup.
+ */
+ set_current_state(TASK_INTERRUPTIBLE);
+ iter->tr->waiter = current;
+
+ mutex_unlock(&trace_types_lock);
+
+ /* sleep for 100 msecs, and try again. */
+ schedule_timeout(HZ/10);
+
+ mutex_lock(&trace_types_lock);
+
+ iter->tr->waiter = NULL;
+
+ if (signal_pending(current)) {
+ sret = -EINTR;
+ goto out;
+ }
+
+ if (iter->trace != current_trace)
+ goto out;
+
+ /*
+ * We block until we read something and tracing is disabled.
+ * We still block if tracing is disabled, but we have never
+ * read anything. This allows a user to cat this file, and
+ * then enable tracing. But after we have read something,
+ * we give an EOF when tracing is again disabled.
+ *
+ * iter->pos will be 0 if we haven't read anything.
+ */
+ if (!tracer_enabled && iter->pos)
+ break;
+
+ continue;
+ }
+
+ /* stop when tracing is finished */
+ if (trace_empty(iter))
+ goto out;
+
+ if (cnt >= PAGE_SIZE)
+ cnt = PAGE_SIZE - 1;
+
+ /* reset all but tr, trace, and overruns */
+ memset(&iter->seq, 0,
+ sizeof(struct trace_iterator) -
+ offsetof(struct trace_iterator, seq));
+ iter->pos = -1;
+
+ /*
+ * We need to stop all tracing on all CPUS to read the
+ * the next buffer. This is a bit expensive, but is
+ * not done often. We fill all what we can read,
+ * and then release the locks again.
+ */
+
+ cpus_clear(mask);
+ local_irq_save(flags);
+#ifdef CONFIG_FTRACE
+ ftrace_save = ftrace_enabled;
+ ftrace_enabled = 0;
+#endif
+ smp_wmb();
+ for_each_tracing_cpu(cpu) {
+ data = iter->tr->data[cpu];
+
+ if (!head_page(data) || !data->trace_idx)
+ continue;
+
+ atomic_inc(&data->disabled);
+ cpu_set(cpu, mask);
+ }
+
+ for_each_cpu_mask(cpu, mask) {
+ data = iter->tr->data[cpu];
+ __raw_spin_lock(&data->lock);
+
+ if (data->overrun > iter->last_overrun[cpu])
+ iter->overrun[cpu] +=
+ data->overrun - iter->last_overrun[cpu];
+ iter->last_overrun[cpu] = data->overrun;
+ }
+
+ while (find_next_entry_inc(iter) != NULL) {
+ int ret;
+ int len = iter->seq.len;
+
+ ret = print_trace_line(iter);
+ if (!ret) {
+ /* don't print partial lines */
+ iter->seq.len = len;
+ break;
+ }
+
+ trace_consume(iter);
+
+ if (iter->seq.len >= cnt)
+ break;
+ }
+
+ for_each_cpu_mask(cpu, mask) {
+ data = iter->tr->data[cpu];
+ __raw_spin_unlock(&data->lock);
+ }
+
+ for_each_cpu_mask(cpu, mask) {
+ data = iter->tr->data[cpu];
+ atomic_dec(&data->disabled);
+ }
+#ifdef CONFIG_FTRACE
+ ftrace_enabled = ftrace_save;
+#endif
+ local_irq_restore(flags);
+
+ /* Now copy what we have to the user */
+ sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+ if (iter->seq.readpos >= iter->seq.len)
+ trace_seq_reset(&iter->seq);
+ if (sret == -EBUSY)
+ sret = 0;
+
+out:
+ mutex_unlock(&trace_types_lock);
+
+ return sret;
+}
+
+static ssize_t
+tracing_entries_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = filp->private_data;
+ char buf[64];
+ int r;
+
+ r = sprintf(buf, "%lu\n", tr->entries);
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+tracing_entries_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ unsigned long val;
+ char buf[64];
+ int i, ret;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ /* must have at least 1 entry */
+ if (!val)
+ return -EINVAL;
+
+ mutex_lock(&trace_types_lock);
+
+ if (current_trace != &no_tracer) {
+ cnt = -EBUSY;
+ pr_info("ftrace: set current_tracer to none"
+ " before modifying buffer size\n");
+ goto out;
+ }
+
+ if (val > global_trace.entries) {
+ long pages_requested;
+ unsigned long freeable_pages;
+
+ /* make sure we have enough memory before mapping */
+ pages_requested =
+ (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
+
+ /* account for each buffer (and max_tr) */
+ pages_requested *= tracing_nr_buffers * 2;
+
+ /* Check for overflow */
+ if (pages_requested < 0) {
+ cnt = -ENOMEM;
+ goto out;
+ }
+
+ freeable_pages = determine_dirtyable_memory();
+
+ /* we only allow to request 1/4 of useable memory */
+ if (pages_requested >
+ ((freeable_pages + tracing_pages_allocated) / 4)) {
+ cnt = -ENOMEM;
+ goto out;
+ }
+
+ while (global_trace.entries < val) {
+ if (trace_alloc_page()) {
+ cnt = -ENOMEM;
+ goto out;
+ }
+ /* double check that we don't go over the known pages */
+ if (tracing_pages_allocated > pages_requested)
+ break;
+ }
+
+ } else {
+ /* include the number of entries in val (inc of page entries) */
+ while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
+ trace_free_page();
+ }
+
+ /* check integrity */
+ for_each_tracing_cpu(i)
+ check_pages(global_trace.data[i]);
+
+ filp->f_pos += cnt;
+
+ /* If check pages failed, return ENOMEM */
+ if (tracing_disabled)
+ cnt = -ENOMEM;
+ out:
+ max_tr.entries = global_trace.entries;
+ mutex_unlock(&trace_types_lock);
+
+ return cnt;
+}
+
+static struct file_operations tracing_max_lat_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_max_lat_read,
+ .write = tracing_max_lat_write,
+};
+
+static struct file_operations tracing_ctrl_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_ctrl_read,
+ .write = tracing_ctrl_write,
+};
+
+static struct file_operations set_tracer_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_set_trace_read,
+ .write = tracing_set_trace_write,
+};
+
+static struct file_operations tracing_pipe_fops = {
+ .open = tracing_open_pipe,
+ .poll = tracing_poll_pipe,
+ .read = tracing_read_pipe,
+ .release = tracing_release_pipe,
+};
+
+static struct file_operations tracing_entries_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_entries_read,
+ .write = tracing_entries_write,
+};
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+static ssize_t
+tracing_read_long(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ unsigned long *p = filp->private_data;
+ char buf[64];
+ int r;
+
+ r = sprintf(buf, "%ld\n", *p);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static struct file_operations tracing_read_long_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_read_long,
+};
+#endif
+
+static struct dentry *d_tracer;
+
+struct dentry *tracing_init_dentry(void)
+{
+ static int once;
+
+ if (d_tracer)
+ return d_tracer;
+
+ d_tracer = debugfs_create_dir("tracing", NULL);
+
+ if (!d_tracer && !once) {
+ once = 1;
+ pr_warning("Could not create debugfs directory 'tracing'\n");
+ return NULL;
+ }
+
+ return d_tracer;
+}
+
+#ifdef CONFIG_FTRACE_SELFTEST
+/* Let selftest have access to static functions in this file */
+#include "trace_selftest.c"
+#endif
+
+static __init void tracer_init_debugfs(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+
+ entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
+ &global_trace, &tracing_ctrl_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
+
+ entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
+ NULL, &tracing_iter_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
+
+ entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
+ NULL, &tracing_cpumask_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
+
+ entry = debugfs_create_file("latency_trace", 0444, d_tracer,
+ &global_trace, &tracing_lt_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'latency_trace' entry\n");
+
+ entry = debugfs_create_file("trace", 0444, d_tracer,
+ &global_trace, &tracing_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'trace' entry\n");
+
+ entry = debugfs_create_file("available_tracers", 0444, d_tracer,
+ &global_trace, &show_traces_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'trace' entry\n");
+
+ entry = debugfs_create_file("current_tracer", 0444, d_tracer,
+ &global_trace, &set_tracer_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'trace' entry\n");
+
+ entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
+ &tracing_max_latency,
+ &tracing_max_lat_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'tracing_max_latency' entry\n");
+
+ entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
+ &tracing_thresh, &tracing_max_lat_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'tracing_threash' entry\n");
+ entry = debugfs_create_file("README", 0644, d_tracer,
+ NULL, &tracing_readme_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'README' entry\n");
+
+ entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
+ NULL, &tracing_pipe_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'tracing_threash' entry\n");
+
+ entry = debugfs_create_file("trace_entries", 0644, d_tracer,
+ &global_trace, &tracing_entries_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'tracing_threash' entry\n");
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
+ &ftrace_update_tot_cnt,
+ &tracing_read_long_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'dyn_ftrace_total_info' entry\n");
+#endif
+}
+
+static int trace_alloc_page(void)
+{
+ struct trace_array_cpu *data;
+ struct page *page, *tmp;
+ LIST_HEAD(pages);
+ void *array;
+ unsigned pages_allocated = 0;
+ int i;
+
+ /* first allocate a page for each CPU */
+ for_each_tracing_cpu(i) {
+ array = (void *)__get_free_page(GFP_KERNEL);
+ if (array == NULL) {
+ printk(KERN_ERR "tracer: failed to allocate page"
+ "for trace buffer!\n");
+ goto free_pages;
+ }
+
+ pages_allocated++;
+ page = virt_to_page(array);
+ list_add(&page->lru, &pages);
+
+/* Only allocate if we are actually using the max trace */
+#ifdef CONFIG_TRACER_MAX_TRACE
+ array = (void *)__get_free_page(GFP_KERNEL);
+ if (array == NULL) {
+ printk(KERN_ERR "tracer: failed to allocate page"
+ "for trace buffer!\n");
+ goto free_pages;
+ }
+ pages_allocated++;
+ page = virt_to_page(array);
+ list_add(&page->lru, &pages);
+#endif
+ }
+
+ /* Now that we successfully allocate a page per CPU, add them */
+ for_each_tracing_cpu(i) {
+ data = global_trace.data[i];
+ page = list_entry(pages.next, struct page, lru);
+ list_del_init(&page->lru);
+ list_add_tail(&page->lru, &data->trace_pages);
+ ClearPageLRU(page);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+ data = max_tr.data[i];
+ page = list_entry(pages.next, struct page, lru);
+ list_del_init(&page->lru);
+ list_add_tail(&page->lru, &data->trace_pages);
+ SetPageLRU(page);
+#endif
+ }
+ tracing_pages_allocated += pages_allocated;
+ global_trace.entries += ENTRIES_PER_PAGE;
+
+ return 0;
+
+ free_pages:
+ list_for_each_entry_safe(page, tmp, &pages, lru) {
+ list_del_init(&page->lru);
+ __free_page(page);
+ }
+ return -ENOMEM;
+}
+
+static int trace_free_page(void)
+{
+ struct trace_array_cpu *data;
+ struct page *page;
+ struct list_head *p;
+ int i;
+ int ret = 0;
+
+ /* free one page from each buffer */
+ for_each_tracing_cpu(i) {
+ data = global_trace.data[i];
+ p = data->trace_pages.next;
+ if (p == &data->trace_pages) {
+ /* should never happen */
+ WARN_ON(1);
+ tracing_disabled = 1;
+ ret = -1;
+ break;
+ }
+ page = list_entry(p, struct page, lru);
+ ClearPageLRU(page);
+ list_del(&page->lru);
+ tracing_pages_allocated--;
+ tracing_pages_allocated--;
+ __free_page(page);
+
+ tracing_reset(data);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+ data = max_tr.data[i];
+ p = data->trace_pages.next;
+ if (p == &data->trace_pages) {
+ /* should never happen */
+ WARN_ON(1);
+ tracing_disabled = 1;
+ ret = -1;
+ break;
+ }
+ page = list_entry(p, struct page, lru);
+ ClearPageLRU(page);
+ list_del(&page->lru);
+ __free_page(page);
+
+ tracing_reset(data);
+#endif
+ }
+ global_trace.entries -= ENTRIES_PER_PAGE;
+
+ return ret;
+}
+
+__init static int tracer_alloc_buffers(void)
+{
+ struct trace_array_cpu *data;
+ void *array;
+ struct page *page;
+ int pages = 0;
+ int ret = -ENOMEM;
+ int i;
+
+ /* TODO: make the number of buffers hot pluggable with CPUS */
+ tracing_nr_buffers = num_possible_cpus();
+ tracing_buffer_mask = cpu_possible_map;
+
+ /* Allocate the first page for all buffers */
+ for_each_tracing_cpu(i) {
+ data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
+ max_tr.data[i] = &per_cpu(max_data, i);
+
+ array = (void *)__get_free_page(GFP_KERNEL);
+ if (array == NULL) {
+ printk(KERN_ERR "tracer: failed to allocate page"
+ "for trace buffer!\n");
+ goto free_buffers;
+ }
+
+ /* set the array to the list */
+ INIT_LIST_HEAD(&data->trace_pages);
+ page = virt_to_page(array);
+ list_add(&page->lru, &data->trace_pages);
+ /* use the LRU flag to differentiate the two buffers */
+ ClearPageLRU(page);
+
+ data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+ max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+/* Only allocate if we are actually using the max trace */
+#ifdef CONFIG_TRACER_MAX_TRACE
+ array = (void *)__get_free_page(GFP_KERNEL);
+ if (array == NULL) {
+ printk(KERN_ERR "tracer: failed to allocate page"
+ "for trace buffer!\n");
+ goto free_buffers;
+ }
+
+ INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
+ page = virt_to_page(array);
+ list_add(&page->lru, &max_tr.data[i]->trace_pages);
+ SetPageLRU(page);
+#endif
+ }
+
+ /*
+ * Since we allocate by orders of pages, we may be able to
+ * round up a bit.
+ */
+ global_trace.entries = ENTRIES_PER_PAGE;
+ pages++;
+
+ while (global_trace.entries < trace_nr_entries) {
+ if (trace_alloc_page())
+ break;
+ pages++;
+ }
+ max_tr.entries = global_trace.entries;
+
+ pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n",
+ pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE);
+ pr_info(" actual entries %ld\n", global_trace.entries);
+
+ tracer_init_debugfs();
+
+ trace_init_cmdlines();
+
+ register_tracer(&no_tracer);
+ current_trace = &no_tracer;
+
+ /* All seems OK, enable tracing */
+ global_trace.ctrl = tracer_enabled;
+ tracing_disabled = 0;
+
+ return 0;
+
+ free_buffers:
+ for (i-- ; i >= 0; i--) {
+ struct page *page, *tmp;
+ struct trace_array_cpu *data = global_trace.data[i];
+
+ if (data) {
+ list_for_each_entry_safe(page, tmp,
+ &data->trace_pages, lru) {
+ list_del_init(&page->lru);
+ __free_page(page);
+ }
+ }
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+ data = max_tr.data[i];
+ if (data) {
+ list_for_each_entry_safe(page, tmp,
+ &data->trace_pages, lru) {
+ list_del_init(&page->lru);
+ __free_page(page);
+ }
+ }
+#endif
+ }
+ return ret;
+}
+fs_initcall(tracer_alloc_buffers);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
new file mode 100644
index 000000000000..6b8bd8800d04
--- /dev/null
+++ b/kernel/trace/trace.h
@@ -0,0 +1,313 @@
+#ifndef _LINUX_KERNEL_TRACE_H
+#define _LINUX_KERNEL_TRACE_H
+
+#include <linux/fs.h>
+#include <asm/atomic.h>
+#include <linux/sched.h>
+#include <linux/clocksource.h>
+
+enum trace_type {
+ __TRACE_FIRST_TYPE = 0,
+
+ TRACE_FN,
+ TRACE_CTX,
+ TRACE_WAKE,
+ TRACE_STACK,
+ TRACE_SPECIAL,
+
+ __TRACE_LAST_TYPE
+};
+
+/*
+ * Function trace entry - function address and parent function addres:
+ */
+struct ftrace_entry {
+ unsigned long ip;
+ unsigned long parent_ip;
+};
+
+/*
+ * Context switch trace entry - which task (and prio) we switched from/to:
+ */
+struct ctx_switch_entry {
+ unsigned int prev_pid;
+ unsigned char prev_prio;
+ unsigned char prev_state;
+ unsigned int next_pid;
+ unsigned char next_prio;
+ unsigned char next_state;
+};
+
+/*
+ * Special (free-form) trace entry:
+ */
+struct special_entry {
+ unsigned long arg1;
+ unsigned long arg2;
+ unsigned long arg3;
+};
+
+/*
+ * Stack-trace entry:
+ */
+
+#define FTRACE_STACK_ENTRIES 8
+
+struct stack_entry {
+ unsigned long caller[FTRACE_STACK_ENTRIES];
+};
+
+/*
+ * The trace entry - the most basic unit of tracing. This is what
+ * is printed in the end as a single line in the trace output, such as:
+ *
+ * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
+ */
+struct trace_entry {
+ char type;
+ char cpu;
+ char flags;
+ char preempt_count;
+ int pid;
+ cycle_t t;
+ union {
+ struct ftrace_entry fn;
+ struct ctx_switch_entry ctx;
+ struct special_entry special;
+ struct stack_entry stack;
+ };
+};
+
+#define TRACE_ENTRY_SIZE sizeof(struct trace_entry)
+
+/*
+ * The CPU trace array - it consists of thousands of trace entries
+ * plus some other descriptor data: (for example which task started
+ * the trace, etc.)
+ */
+struct trace_array_cpu {
+ struct list_head trace_pages;
+ atomic_t disabled;
+ raw_spinlock_t lock;
+ struct lock_class_key lock_key;
+
+ /* these fields get copied into max-trace: */
+ unsigned trace_head_idx;
+ unsigned trace_tail_idx;
+ void *trace_head; /* producer */
+ void *trace_tail; /* consumer */
+ unsigned long trace_idx;
+ unsigned long overrun;
+ unsigned long saved_latency;
+ unsigned long critical_start;
+ unsigned long critical_end;
+ unsigned long critical_sequence;
+ unsigned long nice;
+ unsigned long policy;
+ unsigned long rt_priority;
+ cycle_t preempt_timestamp;
+ pid_t pid;
+ uid_t uid;
+ char comm[TASK_COMM_LEN];
+};
+
+struct trace_iterator;
+
+/*
+ * The trace array - an array of per-CPU trace arrays. This is the
+ * highest level data structure that individual tracers deal with.
+ * They have on/off state as well:
+ */
+struct trace_array {
+ unsigned long entries;
+ long ctrl;
+ int cpu;
+ cycle_t time_start;
+ struct task_struct *waiter;
+ struct trace_array_cpu *data[NR_CPUS];
+};
+
+/*
+ * A specific tracer, represented by methods that operate on a trace array:
+ */
+struct tracer {
+ const char *name;
+ void (*init)(struct trace_array *tr);
+ void (*reset)(struct trace_array *tr);
+ void (*open)(struct trace_iterator *iter);
+ void (*pipe_open)(struct trace_iterator *iter);
+ void (*close)(struct trace_iterator *iter);
+ void (*start)(struct trace_iterator *iter);
+ void (*stop)(struct trace_iterator *iter);
+ ssize_t (*read)(struct trace_iterator *iter,
+ struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos);
+ void (*ctrl_update)(struct trace_array *tr);
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+ int (*selftest)(struct tracer *trace,
+ struct trace_array *tr);
+#endif
+ int (*print_line)(struct trace_iterator *iter);
+ struct tracer *next;
+ int print_max;
+};
+
+struct trace_seq {
+ unsigned char buffer[PAGE_SIZE];
+ unsigned int len;
+ unsigned int readpos;
+};
+
+/*
+ * Trace iterator - used by printout routines who present trace
+ * results to users and which routines might sleep, etc:
+ */
+struct trace_iterator {
+ struct trace_array *tr;
+ struct tracer *trace;
+ void *private;
+ long last_overrun[NR_CPUS];
+ long overrun[NR_CPUS];
+
+ /* The below is zeroed out in pipe_read */
+ struct trace_seq seq;
+ struct trace_entry *ent;
+ int cpu;
+
+ struct trace_entry *prev_ent;
+ int prev_cpu;
+
+ unsigned long iter_flags;
+ loff_t pos;
+ unsigned long next_idx[NR_CPUS];
+ struct list_head *next_page[NR_CPUS];
+ unsigned next_page_idx[NR_CPUS];
+ long idx;
+};
+
+void tracing_reset(struct trace_array_cpu *data);
+int tracing_open_generic(struct inode *inode, struct file *filp);
+struct dentry *tracing_init_dentry(void);
+void ftrace(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long ip,
+ unsigned long parent_ip,
+ unsigned long flags);
+void tracing_sched_switch_trace(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct task_struct *prev,
+ struct task_struct *next,
+ unsigned long flags);
+void tracing_record_cmdline(struct task_struct *tsk);
+
+void tracing_sched_wakeup_trace(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct task_struct *wakee,
+ struct task_struct *cur,
+ unsigned long flags);
+void trace_special(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long arg1,
+ unsigned long arg2,
+ unsigned long arg3);
+void trace_function(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long ip,
+ unsigned long parent_ip,
+ unsigned long flags);
+
+void tracing_start_function_trace(void);
+void tracing_stop_function_trace(void);
+void tracing_start_cmdline_record(void);
+void tracing_stop_cmdline_record(void);
+int register_tracer(struct tracer *type);
+void unregister_tracer(struct tracer *type);
+
+extern unsigned long nsecs_to_usecs(unsigned long nsecs);
+
+extern unsigned long tracing_max_latency;
+extern unsigned long tracing_thresh;
+
+void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
+void update_max_tr_single(struct trace_array *tr,
+ struct task_struct *tsk, int cpu);
+
+extern cycle_t ftrace_now(int cpu);
+
+#ifdef CONFIG_CONTEXT_SWITCH_TRACER
+typedef void
+(*tracer_switch_func_t)(void *private,
+ void *__rq,
+ struct task_struct *prev,
+ struct task_struct *next);
+
+struct tracer_switch_ops {
+ tracer_switch_func_t func;
+ void *private;
+ struct tracer_switch_ops *next;
+};
+
+#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern unsigned long ftrace_update_tot_cnt;
+#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
+extern int DYN_FTRACE_TEST_NAME(void);
+#endif
+
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+#ifdef CONFIG_FTRACE
+extern int trace_selftest_startup_function(struct tracer *trace,
+ struct trace_array *tr);
+#endif
+#ifdef CONFIG_IRQSOFF_TRACER
+extern int trace_selftest_startup_irqsoff(struct tracer *trace,
+ struct trace_array *tr);
+#endif
+#ifdef CONFIG_PREEMPT_TRACER
+extern int trace_selftest_startup_preemptoff(struct tracer *trace,
+ struct trace_array *tr);
+#endif
+#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
+extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
+ struct trace_array *tr);
+#endif
+#ifdef CONFIG_SCHED_TRACER
+extern int trace_selftest_startup_wakeup(struct tracer *trace,
+ struct trace_array *tr);
+#endif
+#ifdef CONFIG_CONTEXT_SWITCH_TRACER
+extern int trace_selftest_startup_sched_switch(struct tracer *trace,
+ struct trace_array *tr);
+#endif
+#endif /* CONFIG_FTRACE_STARTUP_TEST */
+
+extern void *head_page(struct trace_array_cpu *data);
+extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
+extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+ size_t cnt);
+extern long ns2usecs(cycle_t nsec);
+
+extern unsigned long trace_flags;
+
+/*
+ * trace_iterator_flags is an enumeration that defines bit
+ * positions into trace_flags that controls the output.
+ *
+ * NOTE: These bits must match the trace_options array in
+ * trace.c.
+ */
+enum trace_iterator_flags {
+ TRACE_ITER_PRINT_PARENT = 0x01,
+ TRACE_ITER_SYM_OFFSET = 0x02,
+ TRACE_ITER_SYM_ADDR = 0x04,
+ TRACE_ITER_VERBOSE = 0x08,
+ TRACE_ITER_RAW = 0x10,
+ TRACE_ITER_HEX = 0x20,
+ TRACE_ITER_BIN = 0x40,
+ TRACE_ITER_BLOCK = 0x80,
+ TRACE_ITER_STACKTRACE = 0x100,
+ TRACE_ITER_SCHED_TREE = 0x200,
+};
+
+#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
new file mode 100644
index 000000000000..7ee7dcd76b7d
--- /dev/null
+++ b/kernel/trace/trace_functions.c
@@ -0,0 +1,78 @@
+/*
+ * ring buffer based function tracer
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
+ *
+ * Based on code from the latency_tracer, that is:
+ *
+ * Copyright (C) 2004-2006 Ingo Molnar
+ * Copyright (C) 2004 William Lee Irwin III
+ */
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/fs.h>
+
+#include "trace.h"
+
+static void function_reset(struct trace_array *tr)
+{
+ int cpu;
+
+ tr->time_start = ftrace_now(tr->cpu);
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr->data[cpu]);
+}
+
+static void start_function_trace(struct trace_array *tr)
+{
+ function_reset(tr);
+ tracing_start_cmdline_record();
+ tracing_start_function_trace();
+}
+
+static void stop_function_trace(struct trace_array *tr)
+{
+ tracing_stop_function_trace();
+ tracing_stop_cmdline_record();
+}
+
+static void function_trace_init(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ start_function_trace(tr);
+}
+
+static void function_trace_reset(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ stop_function_trace(tr);
+}
+
+static void function_trace_ctrl_update(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ start_function_trace(tr);
+ else
+ stop_function_trace(tr);
+}
+
+static struct tracer function_trace __read_mostly =
+{
+ .name = "ftrace",
+ .init = function_trace_init,
+ .reset = function_trace_reset,
+ .ctrl_update = function_trace_ctrl_update,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_function,
+#endif
+};
+
+static __init int init_function_trace(void)
+{
+ return register_tracer(&function_trace);
+}
+
+device_initcall(init_function_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
new file mode 100644
index 000000000000..421d6fe3650e
--- /dev/null
+++ b/kernel/trace/trace_irqsoff.c
@@ -0,0 +1,486 @@
+/*
+ * trace irqs off criticall timings
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
+ *
+ * From code in the latency_tracer, that is:
+ *
+ * Copyright (C) 2004-2006 Ingo Molnar
+ * Copyright (C) 2004 William Lee Irwin III
+ */
+#include <linux/kallsyms.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/fs.h>
+
+#include "trace.h"
+
+static struct trace_array *irqsoff_trace __read_mostly;
+static int tracer_enabled __read_mostly;
+
+static DEFINE_PER_CPU(int, tracing_cpu);
+
+static DEFINE_SPINLOCK(max_trace_lock);
+
+enum {
+ TRACER_IRQS_OFF = (1 << 1),
+ TRACER_PREEMPT_OFF = (1 << 2),
+};
+
+static int trace_type __read_mostly;
+
+#ifdef CONFIG_PREEMPT_TRACER
+static inline int
+preempt_trace(void)
+{
+ return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count());
+}
+#else
+# define preempt_trace() (0)
+#endif
+
+#ifdef CONFIG_IRQSOFF_TRACER
+static inline int
+irq_trace(void)
+{
+ return ((trace_type & TRACER_IRQS_OFF) &&
+ irqs_disabled());
+}
+#else
+# define irq_trace() (0)
+#endif
+
+/*
+ * Sequence count - we record it when starting a measurement and
+ * skip the latency if the sequence has changed - some other section
+ * did a maximum and could disturb our measurement with serial console
+ * printouts, etc. Truly coinciding maximum latencies should be rare
+ * and what happens together happens separately as well, so this doesnt
+ * decrease the validity of the maximum found:
+ */
+static __cacheline_aligned_in_smp unsigned long max_sequence;
+
+#ifdef CONFIG_FTRACE
+/*
+ * irqsoff uses its own tracer function to keep the overhead down:
+ */
+static void
+irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
+{
+ struct trace_array *tr = irqsoff_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+
+ /*
+ * Does not matter if we preempt. We test the flags
+ * afterward, to see if irqs are disabled or not.
+ * If we preempt and get a false positive, the flags
+ * test will fail.
+ */
+ cpu = raw_smp_processor_id();
+ if (likely(!per_cpu(tracing_cpu, cpu)))
+ return;
+
+ local_save_flags(flags);
+ /* slight chance to get a false positive on tracing_cpu */
+ if (!irqs_disabled_flags(flags))
+ return;
+
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1))
+ trace_function(tr, data, ip, parent_ip, flags);
+
+ atomic_dec(&data->disabled);
+}
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+ .func = irqsoff_tracer_call,
+};
+#endif /* CONFIG_FTRACE */
+
+/*
+ * Should this new latency be reported/recorded?
+ */
+static int report_latency(cycle_t delta)
+{
+ if (tracing_thresh) {
+ if (delta < tracing_thresh)
+ return 0;
+ } else {
+ if (delta <= tracing_max_latency)
+ return 0;
+ }
+ return 1;
+}
+
+static void
+check_critical_timing(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ unsigned long parent_ip,
+ int cpu)
+{
+ unsigned long latency, t0, t1;
+ cycle_t T0, T1, delta;
+ unsigned long flags;
+
+ /*
+ * usecs conversion is slow so we try to delay the conversion
+ * as long as possible:
+ */
+ T0 = data->preempt_timestamp;
+ T1 = ftrace_now(cpu);
+ delta = T1-T0;
+
+ local_save_flags(flags);
+
+ if (!report_latency(delta))
+ goto out;
+
+ spin_lock_irqsave(&max_trace_lock, flags);
+
+ /* check if we are still the max latency */
+ if (!report_latency(delta))
+ goto out_unlock;
+
+ trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
+
+ latency = nsecs_to_usecs(delta);
+
+ if (data->critical_sequence != max_sequence)
+ goto out_unlock;
+
+ tracing_max_latency = delta;
+ t0 = nsecs_to_usecs(T0);
+ t1 = nsecs_to_usecs(T1);
+
+ data->critical_end = parent_ip;
+
+ update_max_tr_single(tr, current, cpu);
+
+ max_sequence++;
+
+out_unlock:
+ spin_unlock_irqrestore(&max_trace_lock, flags);
+
+out:
+ data->critical_sequence = max_sequence;
+ data->preempt_timestamp = ftrace_now(cpu);
+ tracing_reset(data);
+ trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
+}
+
+static inline void
+start_critical_timing(unsigned long ip, unsigned long parent_ip)
+{
+ int cpu;
+ struct trace_array *tr = irqsoff_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+
+ if (likely(!tracer_enabled))
+ return;
+
+ cpu = raw_smp_processor_id();
+
+ if (per_cpu(tracing_cpu, cpu))
+ return;
+
+ data = tr->data[cpu];
+
+ if (unlikely(!data) || atomic_read(&data->disabled))
+ return;
+
+ atomic_inc(&data->disabled);
+
+ data->critical_sequence = max_sequence;
+ data->preempt_timestamp = ftrace_now(cpu);
+ data->critical_start = parent_ip ? : ip;
+ tracing_reset(data);
+
+ local_save_flags(flags);
+
+ trace_function(tr, data, ip, parent_ip, flags);
+
+ per_cpu(tracing_cpu, cpu) = 1;
+
+ atomic_dec(&data->disabled);
+}
+
+static inline void
+stop_critical_timing(unsigned long ip, unsigned long parent_ip)
+{
+ int cpu;
+ struct trace_array *tr = irqsoff_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+
+ cpu = raw_smp_processor_id();
+ /* Always clear the tracing cpu on stopping the trace */
+ if (unlikely(per_cpu(tracing_cpu, cpu)))
+ per_cpu(tracing_cpu, cpu) = 0;
+ else
+ return;
+
+ if (!tracer_enabled)
+ return;
+
+ data = tr->data[cpu];
+
+ if (unlikely(!data) || unlikely(!head_page(data)) ||
+ !data->critical_start || atomic_read(&data->disabled))
+ return;
+
+ atomic_inc(&data->disabled);
+
+ local_save_flags(flags);
+ trace_function(tr, data, ip, parent_ip, flags);
+ check_critical_timing(tr, data, parent_ip ? : ip, cpu);
+ data->critical_start = 0;
+ atomic_dec(&data->disabled);
+}
+
+/* start and stop critical timings used to for stoppage (in idle) */
+void start_critical_timings(void)
+{
+ if (preempt_trace() || irq_trace())
+ start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+
+void stop_critical_timings(void)
+{
+ if (preempt_trace() || irq_trace())
+ stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+
+#ifdef CONFIG_IRQSOFF_TRACER
+#ifdef CONFIG_PROVE_LOCKING
+void time_hardirqs_on(unsigned long a0, unsigned long a1)
+{
+ if (!preempt_trace() && irq_trace())
+ stop_critical_timing(a0, a1);
+}
+
+void time_hardirqs_off(unsigned long a0, unsigned long a1)
+{
+ if (!preempt_trace() && irq_trace())
+ start_critical_timing(a0, a1);
+}
+
+#else /* !CONFIG_PROVE_LOCKING */
+
+/*
+ * Stubs:
+ */
+
+void early_boot_irqs_off(void)
+{
+}
+
+void early_boot_irqs_on(void)
+{
+}
+
+void trace_softirqs_on(unsigned long ip)
+{
+}
+
+void trace_softirqs_off(unsigned long ip)
+{
+}
+
+inline void print_irqtrace_events(struct task_struct *curr)
+{
+}
+
+/*
+ * We are only interested in hardirq on/off events:
+ */
+void trace_hardirqs_on(void)
+{
+ if (!preempt_trace() && irq_trace())
+ stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+EXPORT_SYMBOL(trace_hardirqs_on);
+
+void trace_hardirqs_off(void)
+{
+ if (!preempt_trace() && irq_trace())
+ start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+}
+EXPORT_SYMBOL(trace_hardirqs_off);
+
+void trace_hardirqs_on_caller(unsigned long caller_addr)
+{
+ if (!preempt_trace() && irq_trace())
+ stop_critical_timing(CALLER_ADDR0, caller_addr);
+}
+EXPORT_SYMBOL(trace_hardirqs_on_caller);
+
+void trace_hardirqs_off_caller(unsigned long caller_addr)
+{
+ if (!preempt_trace() && irq_trace())
+ start_critical_timing(CALLER_ADDR0, caller_addr);
+}
+EXPORT_SYMBOL(trace_hardirqs_off_caller);
+
+#endif /* CONFIG_PROVE_LOCKING */
+#endif /* CONFIG_IRQSOFF_TRACER */
+
+#ifdef CONFIG_PREEMPT_TRACER
+void trace_preempt_on(unsigned long a0, unsigned long a1)
+{
+ stop_critical_timing(a0, a1);
+}
+
+void trace_preempt_off(unsigned long a0, unsigned long a1)
+{
+ start_critical_timing(a0, a1);
+}
+#endif /* CONFIG_PREEMPT_TRACER */
+
+static void start_irqsoff_tracer(struct trace_array *tr)
+{
+ register_ftrace_function(&trace_ops);
+ tracer_enabled = 1;
+}
+
+static void stop_irqsoff_tracer(struct trace_array *tr)
+{
+ tracer_enabled = 0;
+ unregister_ftrace_function(&trace_ops);
+}
+
+static void __irqsoff_tracer_init(struct trace_array *tr)
+{
+ irqsoff_trace = tr;
+ /* make sure that the tracer is visible */
+ smp_wmb();
+
+ if (tr->ctrl)
+ start_irqsoff_tracer(tr);
+}
+
+static void irqsoff_tracer_reset(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ stop_irqsoff_tracer(tr);
+}
+
+static void irqsoff_tracer_ctrl_update(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ start_irqsoff_tracer(tr);
+ else
+ stop_irqsoff_tracer(tr);
+}
+
+static void irqsoff_tracer_open(struct trace_iterator *iter)
+{
+ /* stop the trace while dumping */
+ if (iter->tr->ctrl)
+ stop_irqsoff_tracer(iter->tr);
+}
+
+static void irqsoff_tracer_close(struct trace_iterator *iter)
+{
+ if (iter->tr->ctrl)
+ start_irqsoff_tracer(iter->tr);
+}
+
+#ifdef CONFIG_IRQSOFF_TRACER
+static void irqsoff_tracer_init(struct trace_array *tr)
+{
+ trace_type = TRACER_IRQS_OFF;
+
+ __irqsoff_tracer_init(tr);
+}
+static struct tracer irqsoff_tracer __read_mostly =
+{
+ .name = "irqsoff",
+ .init = irqsoff_tracer_init,
+ .reset = irqsoff_tracer_reset,
+ .open = irqsoff_tracer_open,
+ .close = irqsoff_tracer_close,
+ .ctrl_update = irqsoff_tracer_ctrl_update,
+ .print_max = 1,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_irqsoff,
+#endif
+};
+# define register_irqsoff(trace) register_tracer(&trace)
+#else
+# define register_irqsoff(trace) do { } while (0)
+#endif
+
+#ifdef CONFIG_PREEMPT_TRACER
+static void preemptoff_tracer_init(struct trace_array *tr)
+{
+ trace_type = TRACER_PREEMPT_OFF;
+
+ __irqsoff_tracer_init(tr);
+}
+
+static struct tracer preemptoff_tracer __read_mostly =
+{
+ .name = "preemptoff",
+ .init = preemptoff_tracer_init,
+ .reset = irqsoff_tracer_reset,
+ .open = irqsoff_tracer_open,
+ .close = irqsoff_tracer_close,
+ .ctrl_update = irqsoff_tracer_ctrl_update,
+ .print_max = 1,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_preemptoff,
+#endif
+};
+# define register_preemptoff(trace) register_tracer(&trace)
+#else
+# define register_preemptoff(trace) do { } while (0)
+#endif
+
+#if defined(CONFIG_IRQSOFF_TRACER) && \
+ defined(CONFIG_PREEMPT_TRACER)
+
+static void preemptirqsoff_tracer_init(struct trace_array *tr)
+{
+ trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
+
+ __irqsoff_tracer_init(tr);
+}
+
+static struct tracer preemptirqsoff_tracer __read_mostly =
+{
+ .name = "preemptirqsoff",
+ .init = preemptirqsoff_tracer_init,
+ .reset = irqsoff_tracer_reset,
+ .open = irqsoff_tracer_open,
+ .close = irqsoff_tracer_close,
+ .ctrl_update = irqsoff_tracer_ctrl_update,
+ .print_max = 1,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_preemptirqsoff,
+#endif
+};
+
+# define register_preemptirqsoff(trace) register_tracer(&trace)
+#else
+# define register_preemptirqsoff(trace) do { } while (0)
+#endif
+
+__init static int init_irqsoff_tracer(void)
+{
+ register_irqsoff(irqsoff_tracer);
+ register_preemptoff(preemptoff_tracer);
+ register_preemptirqsoff(preemptirqsoff_tracer);
+
+ return 0;
+}
+device_initcall(init_irqsoff_tracer);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
new file mode 100644
index 000000000000..93a662009151
--- /dev/null
+++ b/kernel/trace/trace_sched_switch.c
@@ -0,0 +1,286 @@
+/*
+ * trace context switch
+ *
+ * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/marker.h>
+#include <linux/ftrace.h>
+
+#include "trace.h"
+
+static struct trace_array *ctx_trace;
+static int __read_mostly tracer_enabled;
+static atomic_t sched_ref;
+
+static void
+sched_switch_func(void *private, void *__rq, struct task_struct *prev,
+ struct task_struct *next)
+{
+ struct trace_array **ptr = private;
+ struct trace_array *tr = *ptr;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+
+ tracing_record_cmdline(prev);
+ tracing_record_cmdline(next);
+
+ if (!tracer_enabled)
+ return;
+
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1))
+ tracing_sched_switch_trace(tr, data, prev, next, flags);
+
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
+}
+
+static notrace void
+sched_switch_callback(void *probe_data, void *call_data,
+ const char *format, va_list *args)
+{
+ struct task_struct *prev;
+ struct task_struct *next;
+ struct rq *__rq;
+
+ if (!atomic_read(&sched_ref))
+ return;
+
+ /* skip prev_pid %d next_pid %d prev_state %ld */
+ (void)va_arg(*args, int);
+ (void)va_arg(*args, int);
+ (void)va_arg(*args, long);
+ __rq = va_arg(*args, typeof(__rq));
+ prev = va_arg(*args, typeof(prev));
+ next = va_arg(*args, typeof(next));
+
+ /*
+ * If tracer_switch_func only points to the local
+ * switch func, it still needs the ptr passed to it.
+ */
+ sched_switch_func(probe_data, __rq, prev, next);
+}
+
+static void
+wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
+ task_struct *curr)
+{
+ struct trace_array **ptr = private;
+ struct trace_array *tr = *ptr;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+
+ if (!tracer_enabled)
+ return;
+
+ tracing_record_cmdline(curr);
+
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1))
+ tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
+
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
+}
+
+static notrace void
+wake_up_callback(void *probe_data, void *call_data,
+ const char *format, va_list *args)
+{
+ struct task_struct *curr;
+ struct task_struct *task;
+ struct rq *__rq;
+
+ if (likely(!tracer_enabled))
+ return;
+
+ /* Skip pid %d state %ld */
+ (void)va_arg(*args, int);
+ (void)va_arg(*args, long);
+ /* now get the meat: "rq %p task %p rq->curr %p" */
+ __rq = va_arg(*args, typeof(__rq));
+ task = va_arg(*args, typeof(task));
+ curr = va_arg(*args, typeof(curr));
+
+ tracing_record_cmdline(task);
+ tracing_record_cmdline(curr);
+
+ wakeup_func(probe_data, __rq, task, curr);
+}
+
+static void sched_switch_reset(struct trace_array *tr)
+{
+ int cpu;
+
+ tr->time_start = ftrace_now(tr->cpu);
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr->data[cpu]);
+}
+
+static int tracing_sched_register(void)
+{
+ int ret;
+
+ ret = marker_probe_register("kernel_sched_wakeup",
+ "pid %d state %ld ## rq %p task %p rq->curr %p",
+ wake_up_callback,
+ &ctx_trace);
+ if (ret) {
+ pr_info("wakeup trace: Couldn't add marker"
+ " probe to kernel_sched_wakeup\n");
+ return ret;
+ }
+
+ ret = marker_probe_register("kernel_sched_wakeup_new",
+ "pid %d state %ld ## rq %p task %p rq->curr %p",
+ wake_up_callback,
+ &ctx_trace);
+ if (ret) {
+ pr_info("wakeup trace: Couldn't add marker"
+ " probe to kernel_sched_wakeup_new\n");
+ goto fail_deprobe;
+ }
+
+ ret = marker_probe_register("kernel_sched_schedule",
+ "prev_pid %d next_pid %d prev_state %ld "
+ "## rq %p prev %p next %p",
+ sched_switch_callback,
+ &ctx_trace);
+ if (ret) {
+ pr_info("sched trace: Couldn't add marker"
+ " probe to kernel_sched_schedule\n");
+ goto fail_deprobe_wake_new;
+ }
+
+ return ret;
+fail_deprobe_wake_new:
+ marker_probe_unregister("kernel_sched_wakeup_new",
+ wake_up_callback,
+ &ctx_trace);
+fail_deprobe:
+ marker_probe_unregister("kernel_sched_wakeup",
+ wake_up_callback,
+ &ctx_trace);
+ return ret;
+}
+
+static void tracing_sched_unregister(void)
+{
+ marker_probe_unregister("kernel_sched_schedule",
+ sched_switch_callback,
+ &ctx_trace);
+ marker_probe_unregister("kernel_sched_wakeup_new",
+ wake_up_callback,
+ &ctx_trace);
+ marker_probe_unregister("kernel_sched_wakeup",
+ wake_up_callback,
+ &ctx_trace);
+}
+
+static void tracing_start_sched_switch(void)
+{
+ long ref;
+
+ ref = atomic_inc_return(&sched_ref);
+ if (ref == 1)
+ tracing_sched_register();
+}
+
+static void tracing_stop_sched_switch(void)
+{
+ long ref;
+
+ ref = atomic_dec_and_test(&sched_ref);
+ if (ref)
+ tracing_sched_unregister();
+}
+
+void tracing_start_cmdline_record(void)
+{
+ tracing_start_sched_switch();
+}
+
+void tracing_stop_cmdline_record(void)
+{
+ tracing_stop_sched_switch();
+}
+
+static void start_sched_trace(struct trace_array *tr)
+{
+ sched_switch_reset(tr);
+ tracer_enabled = 1;
+ tracing_start_cmdline_record();
+}
+
+static void stop_sched_trace(struct trace_array *tr)
+{
+ tracing_stop_cmdline_record();
+ tracer_enabled = 0;
+}
+
+static void sched_switch_trace_init(struct trace_array *tr)
+{
+ ctx_trace = tr;
+
+ if (tr->ctrl)
+ start_sched_trace(tr);
+}
+
+static void sched_switch_trace_reset(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ stop_sched_trace(tr);
+}
+
+static void sched_switch_trace_ctrl_update(struct trace_array *tr)
+{
+ /* When starting a new trace, reset the buffers */
+ if (tr->ctrl)
+ start_sched_trace(tr);
+ else
+ stop_sched_trace(tr);
+}
+
+static struct tracer sched_switch_trace __read_mostly =
+{
+ .name = "sched_switch",
+ .init = sched_switch_trace_init,
+ .reset = sched_switch_trace_reset,
+ .ctrl_update = sched_switch_trace_ctrl_update,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_sched_switch,
+#endif
+};
+
+__init static int init_sched_switch_trace(void)
+{
+ int ret = 0;
+
+ if (atomic_read(&sched_ref))
+ ret = tracing_sched_register();
+ if (ret) {
+ pr_info("error registering scheduler trace\n");
+ return ret;
+ }
+ return register_tracer(&sched_switch_trace);
+}
+device_initcall(init_sched_switch_trace);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
new file mode 100644
index 000000000000..bf7e91caef57
--- /dev/null
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -0,0 +1,447 @@
+/*
+ * trace task wakeup timings
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
+ *
+ * Based on code from the latency_tracer, that is:
+ *
+ * Copyright (C) 2004-2006 Ingo Molnar
+ * Copyright (C) 2004 William Lee Irwin III
+ */
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/marker.h>
+
+#include "trace.h"
+
+static struct trace_array *wakeup_trace;
+static int __read_mostly tracer_enabled;
+
+static struct task_struct *wakeup_task;
+static int wakeup_cpu;
+static unsigned wakeup_prio = -1;
+
+static DEFINE_SPINLOCK(wakeup_lock);
+
+static void __wakeup_reset(struct trace_array *tr);
+
+#ifdef CONFIG_FTRACE
+/*
+ * irqsoff uses its own tracer function to keep the overhead down:
+ */
+static void
+wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
+{
+ struct trace_array *tr = wakeup_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int resched;
+ int cpu;
+
+ if (likely(!wakeup_task))
+ return;
+
+ resched = need_resched();
+ preempt_disable_notrace();
+
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+ if (unlikely(disabled != 1))
+ goto out;
+
+ spin_lock_irqsave(&wakeup_lock, flags);
+
+ if (unlikely(!wakeup_task))
+ goto unlock;
+
+ /*
+ * The task can't disappear because it needs to
+ * wake up first, and we have the wakeup_lock.
+ */
+ if (task_cpu(wakeup_task) != cpu)
+ goto unlock;
+
+ trace_function(tr, data, ip, parent_ip, flags);
+
+ unlock:
+ spin_unlock_irqrestore(&wakeup_lock, flags);
+
+ out:
+ atomic_dec(&data->disabled);
+
+ /*
+ * To prevent recursion from the scheduler, if the
+ * resched flag was set before we entered, then
+ * don't reschedule.
+ */
+ if (resched)
+ preempt_enable_no_resched_notrace();
+ else
+ preempt_enable_notrace();
+}
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+ .func = wakeup_tracer_call,
+};
+#endif /* CONFIG_FTRACE */
+
+/*
+ * Should this new latency be reported/recorded?
+ */
+static int report_latency(cycle_t delta)
+{
+ if (tracing_thresh) {
+ if (delta < tracing_thresh)
+ return 0;
+ } else {
+ if (delta <= tracing_max_latency)
+ return 0;
+ }
+ return 1;
+}
+
+static void notrace
+wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
+ struct task_struct *next)
+{
+ unsigned long latency = 0, t0 = 0, t1 = 0;
+ struct trace_array **ptr = private;
+ struct trace_array *tr = *ptr;
+ struct trace_array_cpu *data;
+ cycle_t T0, T1, delta;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+
+ if (unlikely(!tracer_enabled))
+ return;
+
+ /*
+ * When we start a new trace, we set wakeup_task to NULL
+ * and then set tracer_enabled = 1. We want to make sure
+ * that another CPU does not see the tracer_enabled = 1
+ * and the wakeup_task with an older task, that might
+ * actually be the same as next.
+ */
+ smp_rmb();
+
+ if (next != wakeup_task)
+ return;
+
+ /* The task we are waiting for is waking up */
+ data = tr->data[wakeup_cpu];
+
+ /* disable local data, not wakeup_cpu data */
+ cpu = raw_smp_processor_id();
+ disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+ if (likely(disabled != 1))
+ goto out;
+
+ spin_lock_irqsave(&wakeup_lock, flags);
+
+ /* We could race with grabbing wakeup_lock */
+ if (unlikely(!tracer_enabled || next != wakeup_task))
+ goto out_unlock;
+
+ trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
+
+ /*
+ * usecs conversion is slow so we try to delay the conversion
+ * as long as possible:
+ */
+ T0 = data->preempt_timestamp;
+ T1 = ftrace_now(cpu);
+ delta = T1-T0;
+
+ if (!report_latency(delta))
+ goto out_unlock;
+
+ latency = nsecs_to_usecs(delta);
+
+ tracing_max_latency = delta;
+ t0 = nsecs_to_usecs(T0);
+ t1 = nsecs_to_usecs(T1);
+
+ update_max_tr(tr, wakeup_task, wakeup_cpu);
+
+out_unlock:
+ __wakeup_reset(tr);
+ spin_unlock_irqrestore(&wakeup_lock, flags);
+out:
+ atomic_dec(&tr->data[cpu]->disabled);
+}
+
+static notrace void
+sched_switch_callback(void *probe_data, void *call_data,
+ const char *format, va_list *args)
+{
+ struct task_struct *prev;
+ struct task_struct *next;
+ struct rq *__rq;
+
+ /* skip prev_pid %d next_pid %d prev_state %ld */
+ (void)va_arg(*args, int);
+ (void)va_arg(*args, int);
+ (void)va_arg(*args, long);
+ __rq = va_arg(*args, typeof(__rq));
+ prev = va_arg(*args, typeof(prev));
+ next = va_arg(*args, typeof(next));
+
+ tracing_record_cmdline(prev);
+
+ /*
+ * If tracer_switch_func only points to the local
+ * switch func, it still needs the ptr passed to it.
+ */
+ wakeup_sched_switch(probe_data, __rq, prev, next);
+}
+
+static void __wakeup_reset(struct trace_array *tr)
+{
+ struct trace_array_cpu *data;
+ int cpu;
+
+ assert_spin_locked(&wakeup_lock);
+
+ for_each_possible_cpu(cpu) {
+ data = tr->data[cpu];
+ tracing_reset(data);
+ }
+
+ wakeup_cpu = -1;
+ wakeup_prio = -1;
+
+ if (wakeup_task)
+ put_task_struct(wakeup_task);
+
+ wakeup_task = NULL;
+}
+
+static void wakeup_reset(struct trace_array *tr)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&wakeup_lock, flags);
+ __wakeup_reset(tr);
+ spin_unlock_irqrestore(&wakeup_lock, flags);
+}
+
+static void
+wakeup_check_start(struct trace_array *tr, struct task_struct *p,
+ struct task_struct *curr)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+ long disabled;
+
+ if (likely(!rt_task(p)) ||
+ p->prio >= wakeup_prio ||
+ p->prio >= curr->prio)
+ return;
+
+ disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+ if (unlikely(disabled != 1))
+ goto out;
+
+ /* interrupts should be off from try_to_wake_up */
+ spin_lock(&wakeup_lock);
+
+ /* check for races. */
+ if (!tracer_enabled || p->prio >= wakeup_prio)
+ goto out_locked;
+
+ /* reset the trace */
+ __wakeup_reset(tr);
+
+ wakeup_cpu = task_cpu(p);
+ wakeup_prio = p->prio;
+
+ wakeup_task = p;
+ get_task_struct(wakeup_task);
+
+ local_save_flags(flags);
+
+ tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
+ trace_function(tr, tr->data[wakeup_cpu],
+ CALLER_ADDR1, CALLER_ADDR2, flags);
+
+out_locked:
+ spin_unlock(&wakeup_lock);
+out:
+ atomic_dec(&tr->data[cpu]->disabled);
+}
+
+static notrace void
+wake_up_callback(void *probe_data, void *call_data,
+ const char *format, va_list *args)
+{
+ struct trace_array **ptr = probe_data;
+ struct trace_array *tr = *ptr;
+ struct task_struct *curr;
+ struct task_struct *task;
+ struct rq *__rq;
+
+ if (likely(!tracer_enabled))
+ return;
+
+ /* Skip pid %d state %ld */
+ (void)va_arg(*args, int);
+ (void)va_arg(*args, long);
+ /* now get the meat: "rq %p task %p rq->curr %p" */
+ __rq = va_arg(*args, typeof(__rq));
+ task = va_arg(*args, typeof(task));
+ curr = va_arg(*args, typeof(curr));
+
+ tracing_record_cmdline(task);
+ tracing_record_cmdline(curr);
+
+ wakeup_check_start(tr, task, curr);
+}
+
+static void start_wakeup_tracer(struct trace_array *tr)
+{
+ int ret;
+
+ ret = marker_probe_register("kernel_sched_wakeup",
+ "pid %d state %ld ## rq %p task %p rq->curr %p",
+ wake_up_callback,
+ &wakeup_trace);
+ if (ret) {
+ pr_info("wakeup trace: Couldn't add marker"
+ " probe to kernel_sched_wakeup\n");
+ return;
+ }
+
+ ret = marker_probe_register("kernel_sched_wakeup_new",
+ "pid %d state %ld ## rq %p task %p rq->curr %p",
+ wake_up_callback,
+ &wakeup_trace);
+ if (ret) {
+ pr_info("wakeup trace: Couldn't add marker"
+ " probe to kernel_sched_wakeup_new\n");
+ goto fail_deprobe;
+ }
+
+ ret = marker_probe_register("kernel_sched_schedule",
+ "prev_pid %d next_pid %d prev_state %ld "
+ "## rq %p prev %p next %p",
+ sched_switch_callback,
+ &wakeup_trace);
+ if (ret) {
+ pr_info("sched trace: Couldn't add marker"
+ " probe to kernel_sched_schedule\n");
+ goto fail_deprobe_wake_new;
+ }
+
+ wakeup_reset(tr);
+
+ /*
+ * Don't let the tracer_enabled = 1 show up before
+ * the wakeup_task is reset. This may be overkill since
+ * wakeup_reset does a spin_unlock after setting the
+ * wakeup_task to NULL, but I want to be safe.
+ * This is a slow path anyway.
+ */
+ smp_wmb();
+
+ tracer_enabled = 1;
+ register_ftrace_function(&trace_ops);
+
+ return;
+fail_deprobe_wake_new:
+ marker_probe_unregister("kernel_sched_wakeup_new",
+ wake_up_callback,
+ &wakeup_trace);
+fail_deprobe:
+ marker_probe_unregister("kernel_sched_wakeup",
+ wake_up_callback,
+ &wakeup_trace);
+}
+
+static void stop_wakeup_tracer(struct trace_array *tr)
+{
+ tracer_enabled = 0;
+ unregister_ftrace_function(&trace_ops);
+ marker_probe_unregister("kernel_sched_schedule",
+ sched_switch_callback,
+ &wakeup_trace);
+ marker_probe_unregister("kernel_sched_wakeup_new",
+ wake_up_callback,
+ &wakeup_trace);
+ marker_probe_unregister("kernel_sched_wakeup",
+ wake_up_callback,
+ &wakeup_trace);
+}
+
+static void wakeup_tracer_init(struct trace_array *tr)
+{
+ wakeup_trace = tr;
+
+ if (tr->ctrl)
+ start_wakeup_tracer(tr);
+}
+
+static void wakeup_tracer_reset(struct trace_array *tr)
+{
+ if (tr->ctrl) {
+ stop_wakeup_tracer(tr);
+ /* make sure we put back any tasks we are tracing */
+ wakeup_reset(tr);
+ }
+}
+
+static void wakeup_tracer_ctrl_update(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ start_wakeup_tracer(tr);
+ else
+ stop_wakeup_tracer(tr);
+}
+
+static void wakeup_tracer_open(struct trace_iterator *iter)
+{
+ /* stop the trace while dumping */
+ if (iter->tr->ctrl)
+ stop_wakeup_tracer(iter->tr);
+}
+
+static void wakeup_tracer_close(struct trace_iterator *iter)
+{
+ /* forget about any processes we were recording */
+ if (iter->tr->ctrl)
+ start_wakeup_tracer(iter->tr);
+}
+
+static struct tracer wakeup_tracer __read_mostly =
+{
+ .name = "wakeup",
+ .init = wakeup_tracer_init,
+ .reset = wakeup_tracer_reset,
+ .open = wakeup_tracer_open,
+ .close = wakeup_tracer_close,
+ .ctrl_update = wakeup_tracer_ctrl_update,
+ .print_max = 1,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_wakeup,
+#endif
+};
+
+__init static int init_wakeup_tracer(void)
+{
+ int ret;
+
+ ret = register_tracer(&wakeup_tracer);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+device_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
new file mode 100644
index 000000000000..18c5423bc977
--- /dev/null
+++ b/kernel/trace/trace_selftest.c
@@ -0,0 +1,540 @@
+/* Include in trace.c */
+
+#include <linux/kthread.h>
+#include <linux/delay.h>
+
+static inline int trace_valid_entry(struct trace_entry *entry)
+{
+ switch (entry->type) {
+ case TRACE_FN:
+ case TRACE_CTX:
+ case TRACE_WAKE:
+ case TRACE_STACK:
+ case TRACE_SPECIAL:
+ return 1;
+ }
+ return 0;
+}
+
+static int
+trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
+{
+ struct trace_entry *entries;
+ struct page *page;
+ int idx = 0;
+ int i;
+
+ BUG_ON(list_empty(&data->trace_pages));
+ page = list_entry(data->trace_pages.next, struct page, lru);
+ entries = page_address(page);
+
+ check_pages(data);
+ if (head_page(data) != entries)
+ goto failed;
+
+ /*
+ * The starting trace buffer always has valid elements,
+ * if any element exists.
+ */
+ entries = head_page(data);
+
+ for (i = 0; i < tr->entries; i++) {
+
+ if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
+ printk(KERN_CONT ".. invalid entry %d ",
+ entries[idx].type);
+ goto failed;
+ }
+
+ idx++;
+ if (idx >= ENTRIES_PER_PAGE) {
+ page = virt_to_page(entries);
+ if (page->lru.next == &data->trace_pages) {
+ if (i != tr->entries - 1) {
+ printk(KERN_CONT ".. entries buffer mismatch");
+ goto failed;
+ }
+ } else {
+ page = list_entry(page->lru.next, struct page, lru);
+ entries = page_address(page);
+ }
+ idx = 0;
+ }
+ }
+
+ page = virt_to_page(entries);
+ if (page->lru.next != &data->trace_pages) {
+ printk(KERN_CONT ".. too many entries");
+ goto failed;
+ }
+
+ return 0;
+
+ failed:
+ /* disable tracing */
+ tracing_disabled = 1;
+ printk(KERN_CONT ".. corrupted trace buffer .. ");
+ return -1;
+}
+
+/*
+ * Test the trace buffer to see if all the elements
+ * are still sane.
+ */
+static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
+{
+ unsigned long flags, cnt = 0;
+ int cpu, ret = 0;
+
+ /* Don't allow flipping of max traces now */
+ raw_local_irq_save(flags);
+ __raw_spin_lock(&ftrace_max_lock);
+ for_each_possible_cpu(cpu) {
+ if (!head_page(tr->data[cpu]))
+ continue;
+
+ cnt += tr->data[cpu]->trace_idx;
+
+ ret = trace_test_buffer_cpu(tr, tr->data[cpu]);
+ if (ret)
+ break;
+ }
+ __raw_spin_unlock(&ftrace_max_lock);
+ raw_local_irq_restore(flags);
+
+ if (count)
+ *count = cnt;
+
+ return ret;
+}
+
+#ifdef CONFIG_FTRACE
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+
+/* Test dynamic code modification and ftrace filters */
+int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
+ struct trace_array *tr,
+ int (*func)(void))
+{
+ unsigned long count;
+ int ret;
+ int save_ftrace_enabled = ftrace_enabled;
+ int save_tracer_enabled = tracer_enabled;
+ char *func_name;
+
+ /* The ftrace test PASSED */
+ printk(KERN_CONT "PASSED\n");
+ pr_info("Testing dynamic ftrace: ");
+
+ /* enable tracing, and record the filter function */
+ ftrace_enabled = 1;
+ tracer_enabled = 1;
+
+ /* passed in by parameter to fool gcc from optimizing */
+ func();
+
+ /* update the records */
+ ret = ftrace_force_update();
+ if (ret) {
+ printk(KERN_CONT ".. ftraced failed .. ");
+ return ret;
+ }
+
+ /*
+ * Some archs *cough*PowerPC*cough* add charachters to the
+ * start of the function names. We simply put a '*' to
+ * accomodate them.
+ */
+ func_name = "*" STR(DYN_FTRACE_TEST_NAME);
+
+ /* filter only on our function */
+ ftrace_set_filter(func_name, strlen(func_name), 1);
+
+ /* enable tracing */
+ tr->ctrl = 1;
+ trace->init(tr);
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+
+ /* we should have nothing in the buffer */
+ ret = trace_test_buffer(tr, &count);
+ if (ret)
+ goto out;
+
+ if (count) {
+ ret = -1;
+ printk(KERN_CONT ".. filter did not filter .. ");
+ goto out;
+ }
+
+ /* call our function again */
+ func();
+
+ /* sleep again */
+ msleep(100);
+
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ ftrace_enabled = 0;
+
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+
+ /* we should only have one item */
+ if (!ret && count != 1) {
+ printk(KERN_CONT ".. filter failed count=%ld ..", count);
+ ret = -1;
+ goto out;
+ }
+ out:
+ ftrace_enabled = save_ftrace_enabled;
+ tracer_enabled = save_tracer_enabled;
+
+ /* Enable tracing on all functions again */
+ ftrace_set_filter(NULL, 0, 1);
+
+ return ret;
+}
+#else
+# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
+#endif /* CONFIG_DYNAMIC_FTRACE */
+/*
+ * Simple verification test of ftrace function tracer.
+ * Enable ftrace, sleep 1/10 second, and then read the trace
+ * buffer to see if all is in order.
+ */
+int
+trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long count;
+ int ret;
+ int save_ftrace_enabled = ftrace_enabled;
+ int save_tracer_enabled = tracer_enabled;
+
+ /* make sure msleep has been recorded */
+ msleep(1);
+
+ /* force the recorded functions to be traced */
+ ret = ftrace_force_update();
+ if (ret) {
+ printk(KERN_CONT ".. ftraced failed .. ");
+ return ret;
+ }
+
+ /* start the tracing */
+ ftrace_enabled = 1;
+ tracer_enabled = 1;
+
+ tr->ctrl = 1;
+ trace->init(tr);
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ ftrace_enabled = 0;
+
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ goto out;
+ }
+
+ ret = trace_selftest_startup_dynamic_tracing(trace, tr,
+ DYN_FTRACE_TEST_NAME);
+
+ out:
+ ftrace_enabled = save_ftrace_enabled;
+ tracer_enabled = save_tracer_enabled;
+
+ /* kill ftrace totally if we failed */
+ if (ret)
+ ftrace_kill();
+
+ return ret;
+}
+#endif /* CONFIG_FTRACE */
+
+#ifdef CONFIG_IRQSOFF_TRACER
+int
+trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long save_max = tracing_max_latency;
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ tr->ctrl = 1;
+ trace->init(tr);
+ /* reset the max latency */
+ tracing_max_latency = 0;
+ /* disable interrupts for a bit */
+ local_irq_disable();
+ udelay(100);
+ local_irq_enable();
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ /* check both trace buffers */
+ ret = trace_test_buffer(tr, NULL);
+ if (!ret)
+ ret = trace_test_buffer(&max_tr, &count);
+ trace->reset(tr);
+
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ }
+
+ tracing_max_latency = save_max;
+
+ return ret;
+}
+#endif /* CONFIG_IRQSOFF_TRACER */
+
+#ifdef CONFIG_PREEMPT_TRACER
+int
+trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long save_max = tracing_max_latency;
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ tr->ctrl = 1;
+ trace->init(tr);
+ /* reset the max latency */
+ tracing_max_latency = 0;
+ /* disable preemption for a bit */
+ preempt_disable();
+ udelay(100);
+ preempt_enable();
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ /* check both trace buffers */
+ ret = trace_test_buffer(tr, NULL);
+ if (!ret)
+ ret = trace_test_buffer(&max_tr, &count);
+ trace->reset(tr);
+
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ }
+
+ tracing_max_latency = save_max;
+
+ return ret;
+}
+#endif /* CONFIG_PREEMPT_TRACER */
+
+#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
+int
+trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long save_max = tracing_max_latency;
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ tr->ctrl = 1;
+ trace->init(tr);
+
+ /* reset the max latency */
+ tracing_max_latency = 0;
+
+ /* disable preemption and interrupts for a bit */
+ preempt_disable();
+ local_irq_disable();
+ udelay(100);
+ preempt_enable();
+ /* reverse the order of preempt vs irqs */
+ local_irq_enable();
+
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ /* check both trace buffers */
+ ret = trace_test_buffer(tr, NULL);
+ if (ret)
+ goto out;
+
+ ret = trace_test_buffer(&max_tr, &count);
+ if (ret)
+ goto out;
+
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ goto out;
+ }
+
+ /* do the test by disabling interrupts first this time */
+ tracing_max_latency = 0;
+ tr->ctrl = 1;
+ trace->ctrl_update(tr);
+ preempt_disable();
+ local_irq_disable();
+ udelay(100);
+ preempt_enable();
+ /* reverse the order of preempt vs irqs */
+ local_irq_enable();
+
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ /* check both trace buffers */
+ ret = trace_test_buffer(tr, NULL);
+ if (ret)
+ goto out;
+
+ ret = trace_test_buffer(&max_tr, &count);
+
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ goto out;
+ }
+
+ out:
+ trace->reset(tr);
+ tracing_max_latency = save_max;
+
+ return ret;
+}
+#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
+
+#ifdef CONFIG_SCHED_TRACER
+static int trace_wakeup_test_thread(void *data)
+{
+ /* Make this a RT thread, doesn't need to be too high */
+ struct sched_param param = { .sched_priority = 5 };
+ struct completion *x = data;
+
+ sched_setscheduler(current, SCHED_FIFO, &param);
+
+ /* Make it know we have a new prio */
+ complete(x);
+
+ /* now go to sleep and let the test wake us up */
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+
+ /* we are awake, now wait to disappear */
+ while (!kthread_should_stop()) {
+ /*
+ * This is an RT task, do short sleeps to let
+ * others run.
+ */
+ msleep(100);
+ }
+
+ return 0;
+}
+
+int
+trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long save_max = tracing_max_latency;
+ struct task_struct *p;
+ struct completion isrt;
+ unsigned long count;
+ int ret;
+
+ init_completion(&isrt);
+
+ /* create a high prio thread */
+ p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test");
+ if (IS_ERR(p)) {
+ printk(KERN_CONT "Failed to create ftrace wakeup test thread ");
+ return -1;
+ }
+
+ /* make sure the thread is running at an RT prio */
+ wait_for_completion(&isrt);
+
+ /* start the tracing */
+ tr->ctrl = 1;
+ trace->init(tr);
+ /* reset the max latency */
+ tracing_max_latency = 0;
+
+ /* sleep to let the RT thread sleep too */
+ msleep(100);
+
+ /*
+ * Yes this is slightly racy. It is possible that for some
+ * strange reason that the RT thread we created, did not
+ * call schedule for 100ms after doing the completion,
+ * and we do a wakeup on a task that already is awake.
+ * But that is extremely unlikely, and the worst thing that
+ * happens in such a case, is that we disable tracing.
+ * Honestly, if this race does happen something is horrible
+ * wrong with the system.
+ */
+
+ wake_up_process(p);
+
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ /* check both trace buffers */
+ ret = trace_test_buffer(tr, NULL);
+ if (!ret)
+ ret = trace_test_buffer(&max_tr, &count);
+
+
+ trace->reset(tr);
+
+ tracing_max_latency = save_max;
+
+ /* kill the thread */
+ kthread_stop(p);
+
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ }
+
+ return ret;
+}
+#endif /* CONFIG_SCHED_TRACER */
+
+#ifdef CONFIG_CONTEXT_SWITCH_TRACER
+int
+trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ tr->ctrl = 1;
+ trace->init(tr);
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+ /* stop the tracing. */
+ tr->ctrl = 0;
+ trace->ctrl_update(tr);
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ }
+
+ return ret;
+}
+#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
diff --git a/kernel/trace/trace_selftest_dynamic.c b/kernel/trace/trace_selftest_dynamic.c
new file mode 100644
index 000000000000..54dd77cce5bf
--- /dev/null
+++ b/kernel/trace/trace_selftest_dynamic.c
@@ -0,0 +1,7 @@
+#include "trace.h"
+
+int DYN_FTRACE_TEST_NAME(void)
+{
+ /* used to call mcount */
+ return 0;
+}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d2099f41aa1e..d8b6279a9b42 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -634,6 +634,8 @@ config LATENCYTOP
Enable this option if you want to use the LatencyTOP tool
to find out which userspace is blocking on what kernel operations.
+source kernel/trace/Kconfig
+
config PROVIDE_OHCI1394_DMA_INIT
bool "Remote debugging over FireWire early on boot"
depends on PCI && X86
diff --git a/lib/Makefile b/lib/Makefile
index 74b0cfb1fcc3..4b836a53c08f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,6 +8,15 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
sha1.o irq_regs.o reciprocal_div.o argv_split.o \
proportions.o prio_heap.o ratelimit.o
+ifdef CONFIG_FTRACE
+# Do not profile string.o, since it may be used in early boot or vdso
+CFLAGS_REMOVE_string.o = -pg
+# Also do not profile any debug utilities
+CFLAGS_REMOVE_spinlock_debug.o = -pg
+CFLAGS_REMOVE_list_debug.o = -pg
+CFLAGS_REMOVE_debugobjects.o = -pg
+endif
+
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 6c90fb90e19c..3b4dc098181e 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -7,7 +7,7 @@
#include <linux/kallsyms.h>
#include <linux/sched.h>
-unsigned int debug_smp_processor_id(void)
+notrace unsigned int debug_smp_processor_id(void)
{
unsigned long preempt_count = preempt_count();
int this_cpu = raw_smp_processor_id();
@@ -37,7 +37,7 @@ unsigned int debug_smp_processor_id(void)
/*
* Avoid recursion:
*/
- preempt_disable();
+ preempt_disable_notrace();
if (!printk_ratelimit())
goto out_enable;
@@ -49,7 +49,7 @@ unsigned int debug_smp_processor_id(void)
dump_stack();
out_enable:
- preempt_enable_no_resched();
+ preempt_enable_no_resched_notrace();
out:
return this_cpu;
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 789b6adbef37..b38f700825fc 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -126,8 +126,6 @@ static void background_writeout(unsigned long _min_pages);
static struct prop_descriptor vm_completions;
static struct prop_descriptor vm_dirties;
-static unsigned long determine_dirtyable_memory(void);
-
/*
* couple the period to the dirty_ratio:
*
@@ -347,7 +345,13 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
#endif
}
-static unsigned long determine_dirtyable_memory(void)
+/**
+ * determine_dirtyable_memory - amount of memory that may be used
+ *
+ * Returns the numebr of pages that can currently be freed and used
+ * by the kernel for direct mappings.
+ */
+unsigned long determine_dirtyable_memory(void)
{
unsigned long x;
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 8e440233c27d..ea48b82a3707 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -96,7 +96,8 @@ basename_flags = -D"KBUILD_BASENAME=KBUILD_STR($(call name-fix,$(basetarget)))"
modname_flags = $(if $(filter 1,$(words $(modname))),\
-D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))")
-_c_flags = $(KBUILD_CFLAGS) $(ccflags-y) $(CFLAGS_$(basetarget).o)
+orig_c_flags = $(KBUILD_CFLAGS) $(ccflags-y) $(CFLAGS_$(basetarget).o)
+_c_flags = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags))
_a_flags = $(KBUILD_AFLAGS) $(asflags-y) $(AFLAGS_$(basetarget).o)
_cpp_flags = $(KBUILD_CPPFLAGS) $(cppflags-y) $(CPPFLAGS_$(@F))