summaryrefslogtreecommitdiffstats
path: root/arch/x86/vdso
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/vdso')
-rw-r--r--arch/x86/vdso/Makefile1
-rw-r--r--arch/x86/vdso/vclock_gettime.c103
-rw-r--r--arch/x86/vdso/vdso.S16
-rw-r--r--arch/x86/vdso/vdso32/sysenter.S2
-rw-r--r--arch/x86/vdso/vma.c67
5 files changed, 129 insertions, 60 deletions
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index bef0bc962400..5d179502a52c 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -26,6 +26,7 @@ targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
export CPPFLAGS_vdso.lds += -P -C
VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
+ -Wl,--no-undefined \
-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index a724905fdae7..6bc0e723b6e8 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -6,7 +6,6 @@
*
* The code should have no internal unresolved relocations.
* Check with readelf after changing.
- * Also alternative() doesn't work.
*/
/* Disable profiling for userspace code: */
@@ -17,6 +16,7 @@
#include <linux/time.h>
#include <linux/string.h>
#include <asm/vsyscall.h>
+#include <asm/fixmap.h>
#include <asm/vgtod.h>
#include <asm/timex.h>
#include <asm/hpet.h>
@@ -25,6 +25,43 @@
#define gtod (&VVAR(vsyscall_gtod_data))
+notrace static cycle_t vread_tsc(void)
+{
+ cycle_t ret;
+ u64 last;
+
+ /*
+ * Empirically, a fence (of type that depends on the CPU)
+ * before rdtsc is enough to ensure that rdtsc is ordered
+ * with respect to loads. The various CPU manuals are unclear
+ * as to whether rdtsc can be reordered with later loads,
+ * but no one has ever seen it happen.
+ */
+ rdtsc_barrier();
+ ret = (cycle_t)vget_cycles();
+
+ last = VVAR(vsyscall_gtod_data).clock.cycle_last;
+
+ if (likely(ret >= last))
+ return ret;
+
+ /*
+ * GCC likes to generate cmov here, but this branch is extremely
+ * predictable (it's just a funciton of time and the likely is
+ * very likely) and there's a data dependence, so force GCC
+ * to generate a branch instead. I don't barrier() because
+ * we don't actually need a barrier, and if this function
+ * ever gets inlined it will generate worse code.
+ */
+ asm volatile ("");
+ return last;
+}
+
+static notrace cycle_t vread_hpet(void)
+{
+ return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
+}
+
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
@@ -36,9 +73,12 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
notrace static inline long vgetns(void)
{
long v;
- cycles_t (*vread)(void);
- vread = gtod->clock.vread;
- v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask;
+ cycles_t cycles;
+ if (gtod->clock.vclock_mode == VCLOCK_TSC)
+ cycles = vread_tsc();
+ else
+ cycles = vread_hpet();
+ v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
return (v * gtod->clock.mult) >> gtod->clock.shift;
}
@@ -116,21 +156,21 @@ notrace static noinline int do_monotonic_coarse(struct timespec *ts)
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
{
- if (likely(gtod->sysctl_enabled))
- switch (clock) {
- case CLOCK_REALTIME:
- if (likely(gtod->clock.vread))
- return do_realtime(ts);
- break;
- case CLOCK_MONOTONIC:
- if (likely(gtod->clock.vread))
- return do_monotonic(ts);
- break;
- case CLOCK_REALTIME_COARSE:
- return do_realtime_coarse(ts);
- case CLOCK_MONOTONIC_COARSE:
- return do_monotonic_coarse(ts);
- }
+ switch (clock) {
+ case CLOCK_REALTIME:
+ if (likely(gtod->clock.vclock_mode != VCLOCK_NONE))
+ return do_realtime(ts);
+ break;
+ case CLOCK_MONOTONIC:
+ if (likely(gtod->clock.vclock_mode != VCLOCK_NONE))
+ return do_monotonic(ts);
+ break;
+ case CLOCK_REALTIME_COARSE:
+ return do_realtime_coarse(ts);
+ case CLOCK_MONOTONIC_COARSE:
+ return do_monotonic_coarse(ts);
+ }
+
return vdso_fallback_gettime(clock, ts);
}
int clock_gettime(clockid_t, struct timespec *)
@@ -139,7 +179,7 @@ int clock_gettime(clockid_t, struct timespec *)
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{
long ret;
- if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
+ if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) {
if (likely(tv != NULL)) {
BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
offsetof(struct timespec, tv_nsec) ||
@@ -161,27 +201,14 @@ notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
int gettimeofday(struct timeval *, struct timezone *)
__attribute__((weak, alias("__vdso_gettimeofday")));
-/* This will break when the xtime seconds get inaccurate, but that is
- * unlikely */
-
-static __always_inline long time_syscall(long *t)
-{
- long secs;
- asm volatile("syscall"
- : "=a" (secs)
- : "0" (__NR_time), "D" (t) : "cc", "r11", "cx", "memory");
- return secs;
-}
-
+/*
+ * This will break when the xtime seconds get inaccurate, but that is
+ * unlikely
+ */
notrace time_t __vdso_time(time_t *t)
{
- time_t result;
-
- if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled))
- return time_syscall(t);
-
/* This is atomic on x86_64 so we don't need any locks. */
- result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec);
+ time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec);
if (t)
*t = result;
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S
index 1d3aa6b87181..01f5e3b4613c 100644
--- a/arch/x86/vdso/vdso.S
+++ b/arch/x86/vdso/vdso.S
@@ -1,10 +1,22 @@
+#include <asm/page_types.h>
+#include <linux/linkage.h>
#include <linux/init.h>
-__INITDATA
+__PAGE_ALIGNED_DATA
.globl vdso_start, vdso_end
+ .align PAGE_SIZE
vdso_start:
.incbin "arch/x86/vdso/vdso.so"
vdso_end:
+ .align PAGE_SIZE /* extra data here leaks to userspace. */
-__FINIT
+.previous
+
+ .globl vdso_pages
+ .bss
+ .align 8
+ .type vdso_pages, @object
+vdso_pages:
+ .zero (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE * 8
+ .size vdso_pages, .-vdso_pages
diff --git a/arch/x86/vdso/vdso32/sysenter.S b/arch/x86/vdso/vdso32/sysenter.S
index e2800affa754..e354bceee0e0 100644
--- a/arch/x86/vdso/vdso32/sysenter.S
+++ b/arch/x86/vdso/vdso32/sysenter.S
@@ -43,7 +43,7 @@ __kernel_vsyscall:
.space 7,0x90
/* 14: System call restart point is here! (SYSENTER_RETURN-2) */
- jmp .Lenter_kernel
+ int $0x80
/* 16: System call normal return point is here! */
VDSO32_SYSENTER_RETURN: /* Symbol used by sysenter.c via vdso32-syms.h */
pop %ebp
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 7abd2be0f9b9..153407c35b75 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -14,41 +14,61 @@
#include <asm/vgtod.h>
#include <asm/proto.h>
#include <asm/vdso.h>
+#include <asm/page.h>
unsigned int __read_mostly vdso_enabled = 1;
extern char vdso_start[], vdso_end[];
extern unsigned short vdso_sync_cpuid;
-static struct page **vdso_pages;
+extern struct page *vdso_pages[];
static unsigned vdso_size;
-static int __init init_vdso_vars(void)
+static void __init patch_vdso(void *vdso, size_t len)
+{
+ Elf64_Ehdr *hdr = vdso;
+ Elf64_Shdr *sechdrs, *alt_sec = 0;
+ char *secstrings;
+ void *alt_data;
+ int i;
+
+ BUG_ON(len < sizeof(Elf64_Ehdr));
+ BUG_ON(memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0);
+
+ sechdrs = (void *)hdr + hdr->e_shoff;
+ secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+ for (i = 1; i < hdr->e_shnum; i++) {
+ Elf64_Shdr *shdr = &sechdrs[i];
+ if (!strcmp(secstrings + shdr->sh_name, ".altinstructions")) {
+ alt_sec = shdr;
+ goto found;
+ }
+ }
+
+ /* If we get here, it's probably a bug. */
+ pr_warning("patch_vdso: .altinstructions not found\n");
+ return; /* nothing to patch */
+
+found:
+ alt_data = (void *)hdr + alt_sec->sh_offset;
+ apply_alternatives(alt_data, alt_data + alt_sec->sh_size);
+}
+
+static int __init init_vdso(void)
{
int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
int i;
+ patch_vdso(vdso_start, vdso_end - vdso_start);
+
vdso_size = npages << PAGE_SHIFT;
- vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
- if (!vdso_pages)
- goto oom;
- for (i = 0; i < npages; i++) {
- struct page *p;
- p = alloc_page(GFP_KERNEL);
- if (!p)
- goto oom;
- vdso_pages[i] = p;
- copy_page(page_address(p), vdso_start + i*PAGE_SIZE);
- }
+ for (i = 0; i < npages; i++)
+ vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE);
return 0;
-
- oom:
- printk("Cannot allocate vdso\n");
- vdso_enabled = 0;
- return -ENOMEM;
}
-subsys_initcall(init_vdso_vars);
+subsys_initcall(init_vdso);
struct linux_binprm;
@@ -69,6 +89,15 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
addr = start + (offset << PAGE_SHIFT);
if (addr >= end)
addr = end;
+
+ /*
+ * page-align it here so that get_unmapped_area doesn't
+ * align it wrongfully again to the next page. addr can come in 4K
+ * unaligned here as a result of stack start randomization.
+ */
+ addr = PAGE_ALIGN(addr);
+ addr = align_addr(addr, NULL, ALIGN_VDSO);
+
return addr;
}