summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries/hvCall.S
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2009-10-26 19:50:29 +0100
committerPaul Mackerras <paulus@samba.org>2009-10-28 06:13:04 +0100
commitc8cd093a6e9f96ea6b871576fd4e46d7c818bb89 (patch)
tree2bad2c3a2cc68a35fb93d986a49bf543efcd0156 /arch/powerpc/platforms/pseries/hvCall.S
parentpowerpc: tracing: Add powerpc tracepoints for timer entry and exit (diff)
downloadlinux-c8cd093a6e9f96ea6b871576fd4e46d7c818bb89.tar.xz
linux-c8cd093a6e9f96ea6b871576fd4e46d7c818bb89.zip
powerpc: tracing: Add hypervisor call tracepoints
Add hcall_entry and hcall_exit tracepoints. This replaces the inline assembly HCALL_STATS code and converts it to use the new tracepoints. To keep the disabled case as quick as possible, we embed a status word in the TOC so we can get at it with a single load. By doing so we keep the overhead at a minimum. Time taken for a null hcall: No tracepoint code: 135.79 cycles Disabled tracepoints: 137.95 cycles For reference, before this patch enabling HCALL_STATS resulted in a null hcall of 201.44 cycles! Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/platforms/pseries/hvCall.S')
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S101
1 files changed, 58 insertions, 43 deletions
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index c1427b3634ec..01e95ab18d35 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -14,20 +14,54 @@
#define STK_PARM(i) (48 + ((i)-3)*8)
-#ifdef CONFIG_HCALL_STATS
+#ifdef CONFIG_TRACEPOINTS
+
+ .section ".toc","aw"
+
+ .globl hcall_tracepoint_refcount
+hcall_tracepoint_refcount:
+ .llong 0
+
+ .section ".text"
+
/*
* precall must preserve all registers. use unused STK_PARM()
- * areas to save snapshots and opcode.
+ * areas to save snapshots and opcode. We branch around this
+ * in early init (eg when populating the MMU hashtable) by using an
+ * unconditional cpu feature.
*/
#define HCALL_INST_PRECALL \
- std r3,STK_PARM(r3)(r1); /* save opcode */ \
- mftb r0; /* get timebase and */ \
- std r0,STK_PARM(r5)(r1); /* save for later */ \
BEGIN_FTR_SECTION; \
- mfspr r0,SPRN_PURR; /* get PURR and */ \
- std r0,STK_PARM(r6)(r1); /* save for later */ \
-END_FTR_SECTION_IFSET(CPU_FTR_PURR);
-
+ b 1f; \
+END_FTR_SECTION(0, 1); \
+ ld r12,hcall_tracepoint_refcount@toc(r2); \
+ cmpdi r12,0; \
+ beq+ 1f; \
+ mflr r0; \
+ std r3,STK_PARM(r3)(r1); \
+ std r4,STK_PARM(r4)(r1); \
+ std r5,STK_PARM(r5)(r1); \
+ std r6,STK_PARM(r6)(r1); \
+ std r7,STK_PARM(r7)(r1); \
+ std r8,STK_PARM(r8)(r1); \
+ std r9,STK_PARM(r9)(r1); \
+ std r10,STK_PARM(r10)(r1); \
+ std r0,16(r1); \
+ stdu r1,-STACK_FRAME_OVERHEAD(r1); \
+ bl .__trace_hcall_entry; \
+ addi r1,r1,STACK_FRAME_OVERHEAD; \
+ ld r0,16(r1); \
+ ld r3,STK_PARM(r3)(r1); \
+ ld r4,STK_PARM(r4)(r1); \
+ ld r5,STK_PARM(r5)(r1); \
+ ld r6,STK_PARM(r6)(r1); \
+ ld r7,STK_PARM(r7)(r1); \
+ ld r8,STK_PARM(r8)(r1); \
+ ld r9,STK_PARM(r9)(r1); \
+ ld r10,STK_PARM(r10)(r1); \
+ mtlr r0; \
+1:
+
/*
* postcall is performed immediately before function return which
* allows liberal use of volatile registers. We branch around this
@@ -38,40 +72,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_PURR);
BEGIN_FTR_SECTION; \
b 1f; \
END_FTR_SECTION(0, 1); \
- ld r4,STK_PARM(r3)(r1); /* validate opcode */ \
- cmpldi cr7,r4,MAX_HCALL_OPCODE; \
- bgt- cr7,1f; \
- \
- /* get time and PURR snapshots after hcall */ \
- mftb r7; /* timebase after */ \
-BEGIN_FTR_SECTION; \
- mfspr r8,SPRN_PURR; /* PURR after */ \
- ld r6,STK_PARM(r6)(r1); /* PURR before */ \
- subf r6,r6,r8; /* delta */ \
-END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
- ld r5,STK_PARM(r5)(r1); /* timebase before */ \
- subf r5,r5,r7; /* time delta */ \
- \
- /* calculate address of stat structure r4 = opcode */ \
- srdi r4,r4,2; /* index into array */ \
- mulli r4,r4,HCALL_STAT_SIZE; \
- LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \
- add r4,r4,r7; \
- ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \
- add r4,r4,r7; \
- \
- /* update stats */ \
- ld r7,HCALL_STAT_CALLS(r4); /* count */ \
- addi r7,r7,1; \
- std r7,HCALL_STAT_CALLS(r4); \
- ld r7,HCALL_STAT_TB(r4); /* timebase */ \
- add r7,r7,r5; \
- std r7,HCALL_STAT_TB(r4); \
-BEGIN_FTR_SECTION; \
- ld r7,HCALL_STAT_PURR(r4); /* PURR */ \
- add r7,r7,r6; \
- std r7,HCALL_STAT_PURR(r4); \
-END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
+ ld r12,hcall_tracepoint_refcount@toc(r2); \
+ cmpdi r12,0; \
+ beq+ 1f; \
+ mflr r0; \
+ ld r6,STK_PARM(r3)(r1); \
+ std r3,STK_PARM(r3)(r1); \
+ mr r4,r3; \
+ mr r3,r6; \
+ std r0,16(r1); \
+ stdu r1,-STACK_FRAME_OVERHEAD(r1); \
+ bl .__trace_hcall_exit; \
+ addi r1,r1,STACK_FRAME_OVERHEAD; \
+ ld r0,16(r1); \
+ ld r3,STK_PARM(r3)(r1); \
+ mtlr r0; \
1:
#else
#define HCALL_INST_PRECALL