summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2008-05-12 21:20:42 +0200
committerThomas Gleixner <tglx@linutronix.de>2008-05-23 20:33:09 +0200
commit3d0833953e1b98b79ddf491dd49229eef9baeac1 (patch)
tree3520cda824bdb58e47ce3e9f43d68249d5cc1a12 /arch/x86/kernel
parentftrace: trace preempt off critical timings (diff)
downloadlinux-3d0833953e1b98b79ddf491dd49229eef9baeac1.tar.xz
linux-3d0833953e1b98b79ddf491dd49229eef9baeac1.zip
ftrace: dynamic enabling/disabling of function calls
This patch adds a feature to dynamically replace the ftrace code with the jmps to allow a kernel with ftrace configured to run as fast as it can without it configured. The way this works, is on bootup (if ftrace is enabled), a ftrace function is registered to record the instruction pointer of all places that call the function. Later, if there's still any code to patch, a kthread is awoken (rate limited to at most once a second) that performs a stop_machine, and replaces all the code that was called with a jmp over the call to ftrace. It only replaces what was found the previous time. Typically the system reaches equilibrium quickly after bootup and there's no code patching needed at all. e.g. call ftrace /* 5 bytes */ is replaced with jmp 3f /* jmp is 2 bytes and we jump 3 forward */ 3: When we want to enable ftrace for function tracing, the IP recording is removed, and stop_machine is called again to replace all the locations of that were recorded back to the call of ftrace. When it is disabled, we replace the code back to the jmp. Allocation is done by the kthread. If the ftrace recording function is called, and we don't have any record slots available, then we simply skip that call. Once a second a new page (if needed) is allocated for recording new ftrace function calls. A large batch is allocated at boot up to get most of the calls there. Because we do this via stop_machine, we don't have to worry about another CPU executing a ftrace call as we modify it. But we do need to worry about NMI's so all functions that might be called via nmi must be annotated with notrace_nmi. When this code is configured in, the NMI code will not call notrace. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/ftrace.c237
2 files changed, 238 insertions, 0 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5e618c3b4720..e142091524b0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o
obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
new file mode 100644
index 000000000000..5dd58136ef02
--- /dev/null
+++ b/arch/x86/kernel/ftrace.c
@@ -0,0 +1,237 @@
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes to Ingo Molnar, for suggesting the idea.
+ * Mathieu Desnoyers, for suggesting postponing the modifications.
+ * Arjan van de Ven, for keeping me straight, and explaining to me
+ * the dangers of modifying code on the run.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#define CALL_BACK 5
+
+#define JMPFWD 0x03eb
+
+static unsigned short ftrace_jmp = JMPFWD;
+
+struct ftrace_record {
+ struct dyn_ftrace rec;
+ int failed;
+} __attribute__((packed));
+
+struct ftrace_page {
+ struct ftrace_page *next;
+ int index;
+ struct ftrace_record records[];
+} __attribute__((packed));
+
+#define ENTRIES_PER_PAGE \
+ ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct ftrace_record))
+
+/* estimate from running different kernels */
+#define NR_TO_INIT 10000
+
+#define MCOUNT_ADDR ((long)(&mcount))
+
+union ftrace_code_union {
+ char code[5];
+ struct {
+ char e8;
+ int offset;
+ } __attribute__((packed));
+};
+
+static struct ftrace_page *ftrace_pages_start;
+static struct ftrace_page *ftrace_pages;
+
+notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip)
+{
+ struct ftrace_record *rec;
+ unsigned short save;
+
+ ip -= CALL_BACK;
+ save = *(short *)ip;
+
+ /* If this was already converted, skip it */
+ if (save == JMPFWD)
+ return NULL;
+
+ if (ftrace_pages->index == ENTRIES_PER_PAGE) {
+ if (!ftrace_pages->next)
+ return NULL;
+ ftrace_pages = ftrace_pages->next;
+ }
+
+ rec = &ftrace_pages->records[ftrace_pages->index++];
+
+ return &rec->rec;
+}
+
+static int notrace
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+ unsigned char *new_code)
+{
+ unsigned short old = *(unsigned short *)old_code;
+ unsigned short new = *(unsigned short *)new_code;
+ unsigned short replaced;
+ int faulted = 0;
+
+ /*
+ * Note: Due to modules and __init, code can
+ * disappear and change, we need to protect against faulting
+ * as well as code changing.
+ *
+ * No real locking needed, this code is run through
+ * kstop_machine.
+ */
+ asm volatile (
+ "1: lock\n"
+ " cmpxchg %w3, (%2)\n"
+ "2:\n"
+ ".section .fixup, \"ax\"\n"
+ " movl $1, %0\n"
+ "3: jmp 2b\n"
+ ".previous\n"
+ _ASM_EXTABLE(1b, 3b)
+ : "=r"(faulted), "=a"(replaced)
+ : "r"(ip), "r"(new), "0"(faulted), "a"(old)
+ : "memory");
+ sync_core();
+
+ if (replaced != old)
+ faulted = 2;
+
+ return faulted;
+}
+
+static int notrace ftrace_calc_offset(long ip)
+{
+ return (int)(MCOUNT_ADDR - ip);
+}
+
+notrace void ftrace_code_disable(struct dyn_ftrace *rec)
+{
+ unsigned long ip;
+ union ftrace_code_union save;
+ struct ftrace_record *r =
+ container_of(rec, struct ftrace_record, rec);
+
+ ip = rec->ip;
+
+ save.e8 = 0xe8;
+ save.offset = ftrace_calc_offset(ip);
+
+ /* move the IP back to the start of the call */
+ ip -= CALL_BACK;
+
+ r->failed = ftrace_modify_code(ip, save.code, (char *)&ftrace_jmp);
+}
+
+static void notrace ftrace_replace_code(int saved)
+{
+ unsigned char *new = NULL, *old = NULL;
+ struct ftrace_record *rec;
+ struct ftrace_page *pg;
+ unsigned long ip;
+ int i;
+
+ if (saved)
+ old = (char *)&ftrace_jmp;
+ else
+ new = (char *)&ftrace_jmp;
+
+ for (pg = ftrace_pages_start; pg; pg = pg->next) {
+ for (i = 0; i < pg->index; i++) {
+ union ftrace_code_union calc;
+ rec = &pg->records[i];
+
+ /* don't modify code that has already faulted */
+ if (rec->failed)
+ continue;
+
+ ip = rec->rec.ip;
+
+ calc.e8 = 0xe8;
+ calc.offset = ftrace_calc_offset(ip);
+
+ if (saved)
+ new = calc.code;
+ else
+ old = calc.code;
+
+ ip -= CALL_BACK;
+
+ rec->failed = ftrace_modify_code(ip, old, new);
+ }
+ }
+
+}
+
+notrace void ftrace_startup_code(void)
+{
+ ftrace_replace_code(1);
+}
+
+notrace void ftrace_shutdown_code(void)
+{
+ ftrace_replace_code(0);
+}
+
+notrace void ftrace_shutdown_replenish(void)
+{
+ if (ftrace_pages->next)
+ return;
+
+ /* allocate another page */
+ ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
+}
+
+notrace int ftrace_shutdown_arch_init(void)
+{
+ struct ftrace_page *pg;
+ int cnt;
+ int i;
+
+ /* allocate a few pages */
+ ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!ftrace_pages_start)
+ return -1;
+
+ /*
+ * Allocate a few more pages.
+ *
+ * TODO: have some parser search vmlinux before
+ * final linking to find all calls to ftrace.
+ * Then we can:
+ * a) know how many pages to allocate.
+ * and/or
+ * b) set up the table then.
+ *
+ * The dynamic code is still necessary for
+ * modules.
+ */
+
+ pg = ftrace_pages = ftrace_pages_start;
+
+ cnt = NR_TO_INIT / ENTRIES_PER_PAGE;
+
+ for (i = 0; i < cnt; i++) {
+ pg->next = (void *)get_zeroed_page(GFP_KERNEL);
+
+ /* If we fail, we'll try later anyway */
+ if (!pg->next)
+ break;
+
+ pg = pg->next;
+ }
+
+ return 0;
+}