summaryrefslogtreecommitdiffstats
path: root/arch/s390/kernel
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2008-12-31 15:11:42 +0100
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2008-12-31 15:11:49 +0100
commitc742b31c03f37c5c499178f09f57381aa6c70131 (patch)
treedb0e95b8299d997fcb1264126bc8efe10d0ddd51 /arch/s390/kernel
parent[PATCH] improve idle cputime accounting (diff)
downloadlinux-c742b31c03f37c5c499178f09f57381aa6c70131.tar.xz
linux-c742b31c03f37c5c499178f09f57381aa6c70131.zip
[PATCH] fast vdso implementation for CLOCK_THREAD_CPUTIME_ID
The extract cpu time instruction (ectg) instruction allows the user process to get the current thread cputime without calling into the kernel. The code that uses the instruction needs to switch to the access registers mode to get access to the per-cpu info page that contains the two base values that are needed to calculate the current cputime from the CPU timer with the ectg instruction. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/kernel')
-rw-r--r--arch/s390/kernel/asm-offsets.c5
-rw-r--r--arch/s390/kernel/entry64.S45
-rw-r--r--arch/s390/kernel/head64.S2
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/kernel/smp.c9
-rw-r--r--arch/s390/kernel/vdso.c123
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S5
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S39
8 files changed, 201 insertions, 29 deletions
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index e641f60bac99..67a60016babb 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -48,6 +48,11 @@ int main(void)
DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
+ DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
+ DEFINE(__VDSO_ECTG_BASE,
+ offsetof(struct vdso_per_cpu_data, ectg_timer_base));
+ DEFINE(__VDSO_ECTG_USER,
+ offsetof(struct vdso_per_cpu_data, ectg_user_time));
/* constants used by the vdso */
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index ae83c195171c..c6fbde13971a 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -177,8 +177,11 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
.if !\sync
ni \psworg+1,0xfd # clear wait state bit
.endif
- lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user
+ lg %r14,__LC_VDSO_PER_CPU
+ lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user
stpt __LC_EXIT_TIMER
+ mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+ lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user
lpswe \psworg # back to caller
.endm
@@ -980,23 +983,23 @@ cleanup_sysc_return:
cleanup_sysc_leave:
clc 8(8,%r12),BASED(cleanup_sysc_leave_insn)
- je 2f
- mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+ je 3f
clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8)
- je 2f
- mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
+ jhe 0f
+ mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
cghi %r12,__LC_MCK_OLD_PSW
- jne 0f
+ jne 1f
mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
- j 1f
-0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
-1: lmg %r0,%r11,SP_R0(%r15)
+ j 2f
+1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
+2: lmg %r0,%r11,SP_R0(%r15)
lg %r15,SP_R15(%r15)
-2: la %r12,__LC_RETURN_PSW
+3: la %r12,__LC_RETURN_PSW
br %r14
cleanup_sysc_leave_insn:
.quad sysc_done - 4
- .quad sysc_done - 8
+ .quad sysc_done - 16
cleanup_io_return:
mvc __LC_RETURN_PSW(8),0(%r12)
@@ -1006,23 +1009,23 @@ cleanup_io_return:
cleanup_io_leave:
clc 8(8,%r12),BASED(cleanup_io_leave_insn)
- je 2f
- mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+ je 3f
clc 8(8,%r12),BASED(cleanup_io_leave_insn+8)
- je 2f
- mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
+ jhe 0f
+ mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
cghi %r12,__LC_MCK_OLD_PSW
- jne 0f
+ jne 1f
mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
- j 1f
-0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
-1: lmg %r0,%r11,SP_R0(%r15)
+ j 2f
+1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
+2: lmg %r0,%r11,SP_R0(%r15)
lg %r15,SP_R15(%r15)
-2: la %r12,__LC_RETURN_PSW
+3: la %r12,__LC_RETURN_PSW
br %r14
cleanup_io_leave_insn:
.quad io_done - 4
- .quad io_done - 8
+ .quad io_done - 16
/*
* Integer constants
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 3ccd36b24b8f..f9f70aa15244 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -87,6 +87,8 @@ startup_continue:
lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
# move IPL device to lowcore
mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12)
+ lghi %r0,__LC_PASTE
+ stg %r0,__LC_VDSO_PER_CPU
#
# Setup stack
#
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b7a1efd5522c..d825f4950e4e 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -427,6 +427,8 @@ setup_lowcore(void)
/* enable extended save area */
__ctl_set_bit(14, 29);
}
+#else
+ lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
#endif
set_prefix((u32)(unsigned long) lc);
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 3979a6fc0882..b3461e8f1705 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -47,6 +47,7 @@
#include <asm/lowcore.h>
#include <asm/sclp.h>
#include <asm/cpu.h>
+#include <asm/vdso.h>
#include "entry.h"
/*
@@ -506,6 +507,9 @@ static int __cpuinit smp_alloc_lowcore(int cpu)
goto out;
lowcore->extended_save_area_addr = (u32) save_area;
}
+#else
+ if (vdso_alloc_per_cpu(cpu, lowcore))
+ goto out;
#endif
lowcore_ptr[cpu] = lowcore;
return 0;
@@ -528,6 +532,8 @@ static void smp_free_lowcore(int cpu)
#ifndef CONFIG_64BIT
if (MACHINE_HAS_IEEE)
free_page((unsigned long) lowcore->extended_save_area_addr);
+#else
+ vdso_free_per_cpu(cpu, lowcore);
#endif
free_page(lowcore->panic_stack - PAGE_SIZE);
free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER);
@@ -670,6 +676,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
panic_stack = __get_free_page(GFP_KERNEL);
async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
+ BUG_ON(!lowcore || !panic_stack || !async_stack);
#ifndef CONFIG_64BIT
if (MACHINE_HAS_IEEE)
save_area = get_zeroed_page(GFP_KERNEL);
@@ -683,6 +690,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
#ifndef CONFIG_64BIT
if (MACHINE_HAS_IEEE)
lowcore->extended_save_area_addr = (u32) save_area;
+#else
+ BUG_ON(vdso_alloc_per_cpu(smp_processor_id(), lowcore));
#endif
set_prefix((u32)(unsigned long) lowcore);
local_mcck_enable();
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 10a6ccef4412..25a6a82f1c02 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -31,9 +31,6 @@
#include <asm/sections.h>
#include <asm/vdso.h>
-/* Max supported size for symbol names */
-#define MAX_SYMNAME 64
-
#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
extern char vdso32_start, vdso32_end;
static void *vdso32_kbase = &vdso32_start;
@@ -71,6 +68,119 @@ static union {
struct vdso_data *vdso_data = &vdso_data_store.data;
/*
+ * Setup vdso data page.
+ */
+static void vdso_init_data(struct vdso_data *vd)
+{
+ unsigned int facility_list;
+
+ facility_list = stfl();
+ vd->ectg_available = switch_amode && (facility_list & 1);
+}
+
+#ifdef CONFIG_64BIT
+/*
+ * Setup per cpu vdso data page.
+ */
+static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd)
+{
+}
+
+/*
+ * Allocate/free per cpu vdso data.
+ */
+#ifdef CONFIG_64BIT
+#define SEGMENT_ORDER 2
+#else
+#define SEGMENT_ORDER 1
+#endif
+
+int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore)
+{
+ unsigned long segment_table, page_table, page_frame;
+ u32 *psal, *aste;
+ int i;
+
+ lowcore->vdso_per_cpu_data = __LC_PASTE;
+
+ if (!switch_amode || !vdso_enabled)
+ return 0;
+
+ segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
+ page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ page_frame = get_zeroed_page(GFP_KERNEL);
+ if (!segment_table || !page_table || !page_frame)
+ goto out;
+
+ clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
+ PAGE_SIZE << SEGMENT_ORDER);
+ clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY,
+ 256*sizeof(unsigned long));
+
+ *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
+ *(unsigned long *) page_table = _PAGE_RO + page_frame;
+
+ psal = (u32 *) (page_table + 256*sizeof(unsigned long));
+ aste = psal + 32;
+
+ for (i = 4; i < 32; i += 4)
+ psal[i] = 0x80000000;
+
+ lowcore->paste[4] = (u32)(addr_t) psal;
+ psal[0] = 0x20000000;
+ psal[2] = (u32)(addr_t) aste;
+ *(unsigned long *) (aste + 2) = segment_table +
+ _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
+ aste[4] = (u32)(addr_t) psal;
+ lowcore->vdso_per_cpu_data = page_frame;
+
+ vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame);
+ return 0;
+
+out:
+ free_page(page_frame);
+ free_page(page_table);
+ free_pages(segment_table, SEGMENT_ORDER);
+ return -ENOMEM;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore)
+{
+ unsigned long segment_table, page_table, page_frame;
+ u32 *psal, *aste;
+
+ if (!switch_amode || !vdso_enabled)
+ return;
+
+ psal = (u32 *)(addr_t) lowcore->paste[4];
+ aste = (u32 *)(addr_t) psal[2];
+ segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK;
+ page_table = *(unsigned long *) segment_table;
+ page_frame = *(unsigned long *) page_table;
+
+ free_page(page_frame);
+ free_page(page_table);
+ free_pages(segment_table, SEGMENT_ORDER);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static void __vdso_init_cr5(void *dummy)
+{
+ unsigned long cr5;
+
+ cr5 = offsetof(struct _lowcore, paste);
+ __ctl_load(cr5, 5, 5);
+}
+
+static void vdso_init_cr5(void)
+{
+ if (switch_amode && vdso_enabled)
+ on_each_cpu(__vdso_init_cr5, NULL, 1);
+}
+#endif /* CONFIG_64BIT */
+
+/*
* This is called from binfmt_elf, we create the special vma for the
* vDSO and insert it into the mm struct tree
*/
@@ -172,6 +282,9 @@ static int __init vdso_init(void)
{
int i;
+ if (!vdso_enabled)
+ return 0;
+ vdso_init_data(vdso_data);
#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
/* Calculate the size of the 32 bit vDSO */
vdso32_pages = ((&vdso32_end - &vdso32_start
@@ -208,6 +321,10 @@ static int __init vdso_init(void)
}
vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
vdso64_pagelist[vdso64_pages] = NULL;
+#ifndef CONFIG_SMP
+ BUG_ON(vdso_alloc_per_cpu(0, S390_lowcore));
+#endif
+ vdso_init_cr5();
#endif /* CONFIG_64BIT */
get_page(virt_to_page(vdso_data));
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
index 488e31a3c0e7..9ce8caafdb4e 100644
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -22,7 +22,12 @@ __kernel_clock_getres:
cghi %r2,CLOCK_REALTIME
je 0f
cghi %r2,CLOCK_MONOTONIC
+ je 0f
+ cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
jne 2f
+ larl %r5,_vdso_data
+ icm %r0,15,__LC_ECTG_OK(%r5)
+ jz 2f
0: ltgr %r3,%r3
jz 1f /* res == NULL */
larl %r1,3f
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index 738a410b7eb2..79dbfee831ec 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -22,8 +22,10 @@ __kernel_clock_gettime:
larl %r5,_vdso_data
cghi %r2,CLOCK_REALTIME
je 4f
+ cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
+ je 9f
cghi %r2,CLOCK_MONOTONIC
- jne 9f
+ jne 12f
/* CLOCK_MONOTONIC */
ltgr %r3,%r3
@@ -42,7 +44,7 @@ __kernel_clock_gettime:
alg %r0,__VDSO_WTOM_SEC(%r5)
clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
jne 0b
- larl %r5,10f
+ larl %r5,13f
1: clg %r1,0(%r5)
jl 2f
slg %r1,0(%r5)
@@ -68,7 +70,7 @@ __kernel_clock_gettime:
lg %r0,__VDSO_XTIME_SEC(%r5)
clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
jne 5b
- larl %r5,10f
+ larl %r5,13f
6: clg %r1,0(%r5)
jl 7f
slg %r1,0(%r5)
@@ -79,11 +81,38 @@ __kernel_clock_gettime:
8: lghi %r2,0
br %r14
+ /* CLOCK_THREAD_CPUTIME_ID for this thread */
+9: icm %r0,15,__VDSO_ECTG_OK(%r5)
+ jz 12f
+ ear %r2,%a4
+ llilh %r4,0x0100
+ sar %a4,%r4
+ lghi %r4,0
+ sacf 512 /* Magic ectg instruction */
+ .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
+ sacf 0
+ sar %a4,%r2
+ algr %r1,%r0 /* r1 = cputime as TOD value */
+ mghi %r1,1000 /* convert to nanoseconds */
+ srlg %r1,%r1,12 /* r1 = cputime in nanosec */
+ lgr %r4,%r1
+ larl %r5,13f
+ srlg %r1,%r1,9 /* divide by 1000000000 */
+ mlg %r0,8(%r5)
+ srlg %r0,%r0,11 /* r0 = tv_sec */
+ stg %r0,0(%r3)
+ msg %r0,0(%r5) /* calculate tv_nsec */
+ slgr %r4,%r0 /* r4 = tv_nsec */
+ stg %r4,8(%r3)
+ lghi %r2,0
+ br %r14
+
/* Fallback to system call */
-9: lghi %r1,__NR_clock_gettime
+12: lghi %r1,__NR_clock_gettime
svc 0
br %r14
-10: .quad 1000000000
+13: .quad 1000000000
+14: .quad 19342813113834067
.cfi_endproc
.size __kernel_clock_gettime,.-__kernel_clock_gettime