From bd83e65bfaedafef1ba21ce19c1ea7913da01bec Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:37 -0500 Subject: metag: Replace __get_cpu_var uses for address calculation Replace __get_cpu_var uses for address calculation with this_cpu_ptr(). Acked-by: James Hogan Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/metag/kernel/perf/perf_event.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c index 02c08737f6aa..2478ec6d23c9 100644 --- a/arch/metag/kernel/perf/perf_event.c +++ b/arch/metag/kernel/perf/perf_event.c @@ -258,7 +258,7 @@ int metag_pmu_event_set_period(struct perf_event *event, static void metag_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -306,7 +306,7 @@ static void metag_pmu_stop(struct perf_event *event, int flags) static int metag_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = 0, ret = 0; @@ -348,7 +348,7 @@ out: static void metag_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -597,7 +597,7 @@ static int _hw_perf_event_init(struct perf_event *event) static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx) { - struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *events = this_cpu_ptr(&cpu_hw_events); unsigned int config = event->config; unsigned int tmp = config & 0xf0; unsigned long flags; @@ -670,7 +670,7 @@ unlock: static void metag_pmu_disable_counter(struct hw_perf_event *event, int idx) { - struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *events = this_cpu_ptr(&cpu_hw_events); unsigned int tmp = 0; unsigned long flags; @@ -718,7 +718,7 @@ out: static void metag_pmu_write_counter(int idx, u32 val) { - struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *events = this_cpu_ptr(&cpu_hw_events); u32 tmp = 0; unsigned long flags; @@ -751,7 +751,7 @@ static int metag_pmu_event_map(int idx) static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev) { int idx = (int)dev; - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event = cpuhw->events[idx]; struct hw_perf_event *hwc = &event->hw; struct pt_regs *regs = get_irq_regs(); -- cgit v1.2.3 From 89cbc76768c2fa4ed95545bf961f3a14ddfeed21 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:40 -0500 Subject: x86: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Cc: Thomas Gleixner Cc: x86@kernel.org Acked-by: H. Peter Anvin Acked-by: Ingo Molnar Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/debugreg.h | 4 +-- arch/x86/include/asm/uv/uv_hub.h | 2 +- arch/x86/kernel/apb_timer.c | 4 +-- arch/x86/kernel/apic/apic.c | 4 +-- arch/x86/kernel/cpu/common.c | 6 ++-- arch/x86/kernel/cpu/mcheck/mce-inject.c | 6 ++-- arch/x86/kernel/cpu/mcheck/mce.c | 46 ++++++++++++++--------------- arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_intel.c | 22 +++++++------- arch/x86/kernel/cpu/perf_event.c | 22 +++++++------- arch/x86/kernel/cpu/perf_event_amd.c | 4 +-- arch/x86/kernel/cpu/perf_event_intel.c | 18 +++++------ arch/x86/kernel/cpu/perf_event_intel_ds.c | 20 ++++++------- arch/x86/kernel/cpu/perf_event_intel_lbr.c | 12 ++++---- arch/x86/kernel/cpu/perf_event_intel_rapl.c | 12 ++++---- arch/x86/kernel/cpu/perf_event_knc.c | 2 +- arch/x86/kernel/cpu/perf_event_p4.c | 6 ++-- arch/x86/kernel/hw_breakpoint.c | 8 ++--- arch/x86/kernel/irq_64.c | 6 ++-- arch/x86/kernel/kvm.c | 22 +++++++------- arch/x86/kvm/svm.c | 6 ++-- arch/x86/kvm/vmx.c | 10 +++---- arch/x86/kvm/x86.c | 2 +- arch/x86/mm/kmemcheck/kmemcheck.c | 14 ++++----- arch/x86/oprofile/nmi_int.c | 8 ++--- arch/x86/platform/uv/uv_time.c | 2 +- arch/x86/xen/enlighten.c | 4 +-- arch/x86/xen/multicalls.c | 8 ++--- arch/x86/xen/spinlock.c | 2 +- arch/x86/xen/time.c | 10 +++---- 30 files changed, 147 insertions(+), 147 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 4b528a970bd4..61fd18b83b6c 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -97,11 +97,11 @@ extern void hw_breakpoint_restore(void); DECLARE_PER_CPU(int, debug_stack_usage); static inline void debug_stack_usage_inc(void) { - __get_cpu_var(debug_stack_usage)++; + __this_cpu_inc(debug_stack_usage); } static inline void debug_stack_usage_dec(void) { - __get_cpu_var(debug_stack_usage)--; + __this_cpu_dec(debug_stack_usage); } int is_debug_stack(unsigned long addr); void debug_stack_set_zero(void); diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index c63e925fd6b7..bb84cfd5a1a1 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -164,7 +164,7 @@ struct uv_hub_info_s { }; DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); -#define uv_hub_info (&__get_cpu_var(__uv_hub_info)) +#define uv_hub_info this_cpu_ptr(&__uv_hub_info) #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) /* diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index af5b08ab3b71..5972b108f15a 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -146,7 +146,7 @@ static inline int is_apbt_capable(void) static int __init apbt_clockevent_register(void) { struct sfi_timer_table_entry *mtmr; - struct apbt_dev *adev = &__get_cpu_var(cpu_apbt_dev); + struct apbt_dev *adev = this_cpu_ptr(&cpu_apbt_dev); mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); if (mtmr == NULL) { @@ -200,7 +200,7 @@ void apbt_setup_secondary_clock(void) if (!cpu) return; - adev = &__get_cpu_var(cpu_apbt_dev); + adev = this_cpu_ptr(&cpu_apbt_dev); if (!adev->timer) { adev->timer = dw_apb_clockevent_init(cpu, adev->name, APBT_CLOCKEVENT_RATING, adev_virt_addr(adev), diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 67760275544b..00853b254ab0 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -561,7 +561,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); */ static void setup_APIC_timer(void) { - struct clock_event_device *levt = &__get_cpu_var(lapic_events); + struct clock_event_device *levt = this_cpu_ptr(&lapic_events); if (this_cpu_has(X86_FEATURE_ARAT)) { lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; @@ -696,7 +696,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) static int __init calibrate_APIC_clock(void) { - struct clock_event_device *levt = &__get_cpu_var(lapic_events); + struct clock_event_device *levt = this_cpu_ptr(&lapic_events); void (*real_handler)(struct clock_event_device *dev); unsigned long deltaj; long delta, deltatsc; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e4ab2b42bd6f..5666eb9568fc 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1198,9 +1198,9 @@ DEFINE_PER_CPU(int, debug_stack_usage); int is_debug_stack(unsigned long addr) { - return __get_cpu_var(debug_stack_usage) || - (addr <= __get_cpu_var(debug_stack_addr) && - addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); + return __this_cpu_read(debug_stack_usage) || + (addr <= __this_cpu_read(debug_stack_addr) && + addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ)); } NOKPROBE_SYMBOL(is_debug_stack); diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 5ac2d1fb28bc..4cfba4371a71 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -83,7 +83,7 @@ static DEFINE_MUTEX(mce_inject_mutex); static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) { int cpu = smp_processor_id(); - struct mce *m = &__get_cpu_var(injectm); + struct mce *m = this_cpu_ptr(&injectm); if (!cpumask_test_cpu(cpu, mce_inject_cpumask)) return NMI_DONE; cpumask_clear_cpu(cpu, mce_inject_cpumask); @@ -97,7 +97,7 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) static void mce_irq_ipi(void *info) { int cpu = smp_processor_id(); - struct mce *m = &__get_cpu_var(injectm); + struct mce *m = this_cpu_ptr(&injectm); if (cpumask_test_cpu(cpu, mce_inject_cpumask) && m->inject_flags & MCJ_EXCEPTION) { @@ -109,7 +109,7 @@ static void mce_irq_ipi(void *info) /* Inject mce on current CPU */ static int raise_local(void) { - struct mce *m = &__get_cpu_var(injectm); + struct mce *m = this_cpu_ptr(&injectm); int context = MCJ_CTX(m->inject_flags); int ret = 0; int cpu = m->extcpu; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index bd9ccda8087f..61a9668cebfd 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -400,7 +400,7 @@ static u64 mce_rdmsrl(u32 msr) if (offset < 0) return 0; - return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); + return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset); } if (rdmsrl_safe(msr, &v)) { @@ -422,7 +422,7 @@ static void mce_wrmsrl(u32 msr, u64 v) int offset = msr_to_offset(msr); if (offset >= 0) - *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; + *(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v; return; } wrmsrl(msr, v); @@ -478,7 +478,7 @@ static DEFINE_PER_CPU(struct mce_ring, mce_ring); /* Runs with CPU affinity in workqueue */ static int mce_ring_empty(void) { - struct mce_ring *r = &__get_cpu_var(mce_ring); + struct mce_ring *r = this_cpu_ptr(&mce_ring); return r->start == r->end; } @@ -490,7 +490,7 @@ static int mce_ring_get(unsigned long *pfn) *pfn = 0; get_cpu(); - r = &__get_cpu_var(mce_ring); + r = this_cpu_ptr(&mce_ring); if (r->start == r->end) goto out; *pfn = r->ring[r->start]; @@ -504,7 +504,7 @@ out: /* Always runs in MCE context with preempt off */ static int mce_ring_add(unsigned long pfn) { - struct mce_ring *r = &__get_cpu_var(mce_ring); + struct mce_ring *r = this_cpu_ptr(&mce_ring); unsigned next; next = (r->end + 1) % MCE_RING_SIZE; @@ -526,7 +526,7 @@ int mce_available(struct cpuinfo_x86 *c) static void mce_schedule_work(void) { if (!mce_ring_empty()) - schedule_work(&__get_cpu_var(mce_work)); + schedule_work(this_cpu_ptr(&mce_work)); } DEFINE_PER_CPU(struct irq_work, mce_irq_work); @@ -551,7 +551,7 @@ static void mce_report_event(struct pt_regs *regs) return; } - irq_work_queue(&__get_cpu_var(mce_irq_work)); + irq_work_queue(this_cpu_ptr(&mce_irq_work)); } /* @@ -1045,7 +1045,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_gather_info(&m, regs); - final = &__get_cpu_var(mces_seen); + final = this_cpu_ptr(&mces_seen); *final = m; memset(valid_banks, 0, sizeof(valid_banks)); @@ -1278,22 +1278,22 @@ static unsigned long (*mce_adjust_timer)(unsigned long interval) = static int cmc_error_seen(void) { - unsigned long *v = &__get_cpu_var(mce_polled_error); + unsigned long *v = this_cpu_ptr(&mce_polled_error); return test_and_clear_bit(0, v); } static void mce_timer_fn(unsigned long data) { - struct timer_list *t = &__get_cpu_var(mce_timer); + struct timer_list *t = this_cpu_ptr(&mce_timer); unsigned long iv; int notify; WARN_ON(smp_processor_id() != data); - if (mce_available(__this_cpu_ptr(&cpu_info))) { + if (mce_available(this_cpu_ptr(&cpu_info))) { machine_check_poll(MCP_TIMESTAMP, - &__get_cpu_var(mce_poll_banks)); + this_cpu_ptr(&mce_poll_banks)); mce_intel_cmci_poll(); } @@ -1323,7 +1323,7 @@ static void mce_timer_fn(unsigned long data) */ void mce_timer_kick(unsigned long interval) { - struct timer_list *t = &__get_cpu_var(mce_timer); + struct timer_list *t = this_cpu_ptr(&mce_timer); unsigned long when = jiffies + interval; unsigned long iv = __this_cpu_read(mce_next_interval); @@ -1659,7 +1659,7 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) static void __mcheck_cpu_init_timer(void) { - struct timer_list *t = &__get_cpu_var(mce_timer); + struct timer_list *t = this_cpu_ptr(&mce_timer); unsigned int cpu = smp_processor_id(); setup_timer(t, mce_timer_fn, cpu); @@ -1702,8 +1702,8 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_timer(); - INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); - init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb); + INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work); + init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb); } /* @@ -1955,7 +1955,7 @@ static struct miscdevice mce_chrdev_device = { static void __mce_disable_bank(void *arg) { int bank = *((int *)arg); - __clear_bit(bank, __get_cpu_var(mce_poll_banks)); + __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); cmci_disable_bank(bank); } @@ -2065,7 +2065,7 @@ static void mce_syscore_shutdown(void) static void mce_syscore_resume(void) { __mcheck_cpu_init_generic(); - __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); + __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info)); } static struct syscore_ops mce_syscore_ops = { @@ -2080,7 +2080,7 @@ static struct syscore_ops mce_syscore_ops = { static void mce_cpu_restart(void *data) { - if (!mce_available(__this_cpu_ptr(&cpu_info))) + if (!mce_available(raw_cpu_ptr(&cpu_info))) return; __mcheck_cpu_init_generic(); __mcheck_cpu_init_timer(); @@ -2096,14 +2096,14 @@ static void mce_restart(void) /* Toggle features for corrected errors */ static void mce_disable_cmci(void *data) { - if (!mce_available(__this_cpu_ptr(&cpu_info))) + if (!mce_available(raw_cpu_ptr(&cpu_info))) return; cmci_clear(); } static void mce_enable_ce(void *all) { - if (!mce_available(__this_cpu_ptr(&cpu_info))) + if (!mce_available(raw_cpu_ptr(&cpu_info))) return; cmci_reenable(); cmci_recheck(); @@ -2336,7 +2336,7 @@ static void mce_disable_cpu(void *h) unsigned long action = *(unsigned long *)h; int i; - if (!mce_available(__this_cpu_ptr(&cpu_info))) + if (!mce_available(raw_cpu_ptr(&cpu_info))) return; if (!(action & CPU_TASKS_FROZEN)) @@ -2354,7 +2354,7 @@ static void mce_reenable_cpu(void *h) unsigned long action = *(unsigned long *)h; int i; - if (!mce_available(__this_cpu_ptr(&cpu_info))) + if (!mce_available(raw_cpu_ptr(&cpu_info))) return; if (!(action & CPU_TASKS_FROZEN)) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 1e49f8f41276..5d4999f95aec 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -310,7 +310,7 @@ static void amd_threshold_interrupt(void) * event. */ machine_check_poll(MCP_TIMESTAMP, - &__get_cpu_var(mce_poll_banks)); + this_cpu_ptr(&mce_poll_banks)); if (high & MASK_OVERFLOW_HI) { rdmsrl(address, m.misc); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 3bdb95ae8c43..b3c97bafc123 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -86,7 +86,7 @@ void mce_intel_cmci_poll(void) { if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) return; - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); + machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); } void mce_intel_hcpu_update(unsigned long cpu) @@ -145,7 +145,7 @@ static void cmci_storm_disable_banks(void) u64 val; raw_spin_lock_irqsave(&cmci_discover_lock, flags); - owned = __get_cpu_var(mce_banks_owned); + owned = this_cpu_ptr(mce_banks_owned); for_each_set_bit(bank, owned, MAX_NR_BANKS) { rdmsrl(MSR_IA32_MCx_CTL2(bank), val); val &= ~MCI_CTL2_CMCI_EN; @@ -195,7 +195,7 @@ static void intel_threshold_interrupt(void) { if (cmci_storm_detect()) return; - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); + machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); mce_notify_irq(); } @@ -206,7 +206,7 @@ static void intel_threshold_interrupt(void) */ static void cmci_discover(int banks) { - unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); + unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned); unsigned long flags; int i; int bios_wrong_thresh = 0; @@ -228,7 +228,7 @@ static void cmci_discover(int banks) /* Already owned by someone else? */ if (val & MCI_CTL2_CMCI_EN) { clear_bit(i, owned); - __clear_bit(i, __get_cpu_var(mce_poll_banks)); + __clear_bit(i, this_cpu_ptr(mce_poll_banks)); continue; } @@ -252,7 +252,7 @@ static void cmci_discover(int banks) /* Did the enable bit stick? -- the bank supports CMCI */ if (val & MCI_CTL2_CMCI_EN) { set_bit(i, owned); - __clear_bit(i, __get_cpu_var(mce_poll_banks)); + __clear_bit(i, this_cpu_ptr(mce_poll_banks)); /* * We are able to set thresholds for some banks that * had a threshold of 0. This means the BIOS has not @@ -263,7 +263,7 @@ static void cmci_discover(int banks) (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) bios_wrong_thresh = 1; } else { - WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); + WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks))); } } raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); @@ -284,10 +284,10 @@ void cmci_recheck(void) unsigned long flags; int banks; - if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) + if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) return; local_irq_save(flags); - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); + machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); local_irq_restore(flags); } @@ -296,12 +296,12 @@ static void __cmci_disable_bank(int bank) { u64 val; - if (!test_bit(bank, __get_cpu_var(mce_banks_owned))) + if (!test_bit(bank, this_cpu_ptr(mce_banks_owned))) return; rdmsrl(MSR_IA32_MCx_CTL2(bank), val); val &= ~MCI_CTL2_CMCI_EN; wrmsrl(MSR_IA32_MCx_CTL2(bank), val); - __clear_bit(bank, __get_cpu_var(mce_banks_owned)); + __clear_bit(bank, this_cpu_ptr(mce_banks_owned)); } /* diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2879ecdaac43..5cd2b7967370 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -487,7 +487,7 @@ static int __x86_pmu_event_init(struct perf_event *event) void x86_pmu_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { @@ -505,7 +505,7 @@ void x86_pmu_disable_all(void) static void x86_pmu_disable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!x86_pmu_initialized()) return; @@ -522,7 +522,7 @@ static void x86_pmu_disable(struct pmu *pmu) void x86_pmu_enable_all(int added) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { @@ -869,7 +869,7 @@ static void x86_pmu_start(struct perf_event *event, int flags); static void x86_pmu_enable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; struct hw_perf_event *hwc; int i, added = cpuc->n_added; @@ -1020,7 +1020,7 @@ void x86_pmu_enable_event(struct perf_event *event) */ static int x86_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc; int assign[X86_PMC_IDX_MAX]; int n, n0, ret; @@ -1071,7 +1071,7 @@ out: static void x86_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx = event->hw.idx; if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) @@ -1150,7 +1150,7 @@ void perf_event_print_debug(void) void x86_pmu_stop(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) { @@ -1172,7 +1172,7 @@ void x86_pmu_stop(struct perf_event *event, int flags) static void x86_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int i; /* @@ -1227,7 +1227,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) int idx, handled = 0; u64 val; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); /* * Some chipsets need to unmask the LVTPC in a particular spot @@ -1636,7 +1636,7 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) */ static int x86_pmu_commit_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int assign[X86_PMC_IDX_MAX]; int n, ret; @@ -1995,7 +1995,7 @@ static unsigned long get_segment_base(unsigned int segment) if (idx > GDT_ENTRIES) return 0; - desc = __this_cpu_ptr(&gdt_page.gdt[0]); + desc = raw_cpu_ptr(gdt_page.gdt); } return get_desc_base(desc + idx); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index beeb7cc07044..28926311aac1 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -699,7 +699,7 @@ __init int amd_pmu_init(void) void amd_pmu_enable_virt(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); cpuc->perf_ctr_virt_mask = 0; @@ -711,7 +711,7 @@ EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); void amd_pmu_disable_virt(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); /* * We only mask out the Host-only bit so that host-only counting works diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 2502d0d9d246..6f80accf137d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1045,7 +1045,7 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) static void intel_pmu_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); @@ -1058,7 +1058,7 @@ static void intel_pmu_disable_all(void) static void intel_pmu_enable_all(int added) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); intel_pmu_pebs_enable_all(); intel_pmu_lbr_enable_all(); @@ -1092,7 +1092,7 @@ static void intel_pmu_enable_all(int added) */ static void intel_pmu_nhm_workaround(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); static const unsigned long nhm_magic[4] = { 0x4300B5, 0x4300D2, @@ -1191,7 +1191,7 @@ static inline bool event_is_checkpointed(struct perf_event *event) static void intel_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { intel_pmu_disable_bts(); @@ -1255,7 +1255,7 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) static void intel_pmu_enable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { if (!__this_cpu_read(cpu_hw_events.enabled)) @@ -1349,7 +1349,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) u64 status; int handled; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); /* * No known reason to not always do late ACK, @@ -1781,7 +1781,7 @@ EXPORT_SYMBOL_GPL(perf_guest_get_msrs); static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; @@ -1802,7 +1802,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; int idx; @@ -1836,7 +1836,7 @@ static void core_pmu_enable_event(struct perf_event *event) static void core_pmu_enable_all(int added) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 696ade311ded..7b786b369789 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -475,7 +475,7 @@ void intel_pmu_enable_bts(u64 config) void intel_pmu_disable_bts(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); unsigned long debugctlmsr; if (!cpuc->ds) @@ -492,7 +492,7 @@ void intel_pmu_disable_bts(void) int intel_pmu_drain_bts_buffer(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; struct bts_record { u64 from; @@ -712,7 +712,7 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event) void intel_pmu_pebs_enable(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; @@ -727,7 +727,7 @@ void intel_pmu_pebs_enable(struct perf_event *event) void intel_pmu_pebs_disable(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; cpuc->pebs_enabled &= ~(1ULL << hwc->idx); @@ -745,7 +745,7 @@ void intel_pmu_pebs_disable(struct perf_event *event) void intel_pmu_pebs_enable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->pebs_enabled) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); @@ -753,7 +753,7 @@ void intel_pmu_pebs_enable_all(void) void intel_pmu_pebs_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->pebs_enabled) wrmsrl(MSR_IA32_PEBS_ENABLE, 0); @@ -761,7 +761,7 @@ void intel_pmu_pebs_disable_all(void) static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); unsigned long from = cpuc->lbr_entries[0].from; unsigned long old_to, to = cpuc->lbr_entries[0].to; unsigned long ip = regs->ip; @@ -868,7 +868,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, * We cast to the biggest pebs_record but are careful not to * unconditionally access the 'extra' entries. */ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct pebs_record_hsw *pebs = __pebs; struct perf_sample_data data; struct pt_regs regs; @@ -957,7 +957,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; struct perf_event *event = cpuc->events[0]; /* PMC0 only */ struct pebs_record_core *at, *top; @@ -998,7 +998,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; struct perf_event *event = NULL; void *at, *top; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 9dd2459a4c73..ebb0d3144551 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -133,7 +133,7 @@ static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); static void __intel_pmu_lbr_enable(void) { u64 debugctl; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->lbr_sel) wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config); @@ -183,7 +183,7 @@ void intel_pmu_lbr_reset(void) void intel_pmu_lbr_enable(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!x86_pmu.lbr_nr) return; @@ -203,7 +203,7 @@ void intel_pmu_lbr_enable(struct perf_event *event) void intel_pmu_lbr_disable(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!x86_pmu.lbr_nr) return; @@ -220,7 +220,7 @@ void intel_pmu_lbr_disable(struct perf_event *event) void intel_pmu_lbr_enable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->lbr_users) __intel_pmu_lbr_enable(); @@ -228,7 +228,7 @@ void intel_pmu_lbr_enable_all(void) void intel_pmu_lbr_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->lbr_users) __intel_pmu_lbr_disable(); @@ -332,7 +332,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) void intel_pmu_lbr_read(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!cpuc->lbr_users) return; diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 619f7699487a..d64f275fe274 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -135,7 +135,7 @@ static inline u64 rapl_scale(u64 v) * or use ldexp(count, -32). * Watts = Joules/Time delta */ - return v << (32 - __get_cpu_var(rapl_pmu)->hw_unit); + return v << (32 - __this_cpu_read(rapl_pmu->hw_unit)); } static u64 rapl_event_update(struct perf_event *event) @@ -187,7 +187,7 @@ static void rapl_stop_hrtimer(struct rapl_pmu *pmu) static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) { - struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); struct perf_event *event; unsigned long flags; @@ -234,7 +234,7 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, static void rapl_pmu_event_start(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); unsigned long flags; spin_lock_irqsave(&pmu->lock, flags); @@ -244,7 +244,7 @@ static void rapl_pmu_event_start(struct perf_event *event, int mode) static void rapl_pmu_event_stop(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); struct hw_perf_event *hwc = &event->hw; unsigned long flags; @@ -278,7 +278,7 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) static int rapl_pmu_event_add(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); struct hw_perf_event *hwc = &event->hw; unsigned long flags; @@ -696,7 +696,7 @@ static int __init rapl_pmu_init(void) return -1; } - pmu = __get_cpu_var(rapl_pmu); + pmu = __this_cpu_read(rapl_pmu); pr_info("RAPL PMU detected, hw unit 2^-%d Joules," " API unit is 2^-32 Joules," diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c index 838fa8772c62..5b0c232d1ee6 100644 --- a/arch/x86/kernel/cpu/perf_event_knc.c +++ b/arch/x86/kernel/cpu/perf_event_knc.c @@ -217,7 +217,7 @@ static int knc_pmu_handle_irq(struct pt_regs *regs) int bit, loops; u64 status; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); knc_pmu_disable_all(); diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 5d466b7d8609..f2e56783af3d 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -915,7 +915,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event) static void p4_pmu_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { @@ -984,7 +984,7 @@ static void p4_pmu_enable_event(struct perf_event *event) static void p4_pmu_enable_all(int added) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { @@ -1004,7 +1004,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) int idx, handled = 0; u64 val; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); for (idx = 0; idx < x86_pmu.num_counters; idx++) { int overflow; diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 5f9cf20cdb68..3d5fb509bdeb 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -108,7 +108,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) int i; for (i = 0; i < HBP_NUM; i++) { - struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); if (!*slot) { *slot = bp; @@ -122,7 +122,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) set_debugreg(info->address, i); __this_cpu_write(cpu_debugreg[i], info->address); - dr7 = &__get_cpu_var(cpu_dr7); + dr7 = this_cpu_ptr(&cpu_dr7); *dr7 |= encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); @@ -146,7 +146,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) int i; for (i = 0; i < HBP_NUM; i++) { - struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); if (*slot == bp) { *slot = NULL; @@ -157,7 +157,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) return; - dr7 = &__get_cpu_var(cpu_dr7); + dr7 = this_cpu_ptr(&cpu_dr7); *dr7 &= ~__encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 4d1c746892eb..e4b503d5558c 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -52,13 +52,13 @@ static inline void stack_overflow_check(struct pt_regs *regs) regs->sp <= curbase + THREAD_SIZE) return; - irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack) + + irq_stack_top = (u64)this_cpu_ptr(irq_stack_union.irq_stack) + STACK_TOP_MARGIN; - irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr); + irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr); if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom) return; - oist = &__get_cpu_var(orig_ist); + oist = this_cpu_ptr(&orig_ist); estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN; estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1]; if (regs->sp >= estack_top && regs->sp <= estack_bottom) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 3dd8e2c4d74a..2b68102dbbeb 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -243,9 +243,9 @@ u32 kvm_read_and_reset_pf_reason(void) { u32 reason = 0; - if (__get_cpu_var(apf_reason).enabled) { - reason = __get_cpu_var(apf_reason).reason; - __get_cpu_var(apf_reason).reason = 0; + if (__this_cpu_read(apf_reason.enabled)) { + reason = __this_cpu_read(apf_reason.reason); + __this_cpu_write(apf_reason.reason, 0); } return reason; @@ -318,7 +318,7 @@ static void kvm_guest_apic_eoi_write(u32 reg, u32 val) * there's no need for lock or memory barriers. * An optimization barrier is implied in apic write. */ - if (__test_and_clear_bit(KVM_PV_EOI_BIT, &__get_cpu_var(kvm_apic_eoi))) + if (__test_and_clear_bit(KVM_PV_EOI_BIT, this_cpu_ptr(&kvm_apic_eoi))) return; apic_write(APIC_EOI, APIC_EOI_ACK); } @@ -329,13 +329,13 @@ void kvm_guest_cpu_init(void) return; if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { - u64 pa = slow_virt_to_phys(&__get_cpu_var(apf_reason)); + u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); #ifdef CONFIG_PREEMPT pa |= KVM_ASYNC_PF_SEND_ALWAYS; #endif wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED); - __get_cpu_var(apf_reason).enabled = 1; + __this_cpu_write(apf_reason.enabled, 1); printk(KERN_INFO"KVM setup async PF for cpu %d\n", smp_processor_id()); } @@ -344,8 +344,8 @@ void kvm_guest_cpu_init(void) unsigned long pa; /* Size alignment is implied but just to make it explicit. */ BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); - __get_cpu_var(kvm_apic_eoi) = 0; - pa = slow_virt_to_phys(&__get_cpu_var(kvm_apic_eoi)) + __this_cpu_write(kvm_apic_eoi, 0); + pa = slow_virt_to_phys(this_cpu_ptr(&kvm_apic_eoi)) | KVM_MSR_ENABLED; wrmsrl(MSR_KVM_PV_EOI_EN, pa); } @@ -356,11 +356,11 @@ void kvm_guest_cpu_init(void) static void kvm_pv_disable_apf(void) { - if (!__get_cpu_var(apf_reason).enabled) + if (!__this_cpu_read(apf_reason.enabled)) return; wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); - __get_cpu_var(apf_reason).enabled = 0; + __this_cpu_write(apf_reason.enabled, 0); printk(KERN_INFO"Unregister pv shared memory for cpu %d\n", smp_processor_id()); @@ -716,7 +716,7 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) if (in_nmi()) return; - w = &__get_cpu_var(klock_waiting); + w = this_cpu_ptr(&klock_waiting); cpu = smp_processor_id(); start = spin_time_start(); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ddf742768ecf..1b0e90658d8d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -670,7 +670,7 @@ static int svm_hardware_enable(void *garbage) if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); - __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; + __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT); } @@ -1312,8 +1312,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && - svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) { - __get_cpu_var(current_tsc_ratio) = svm->tsc_ratio; + svm->tsc_ratio != __this_cpu_read(current_tsc_ratio)) { + __this_cpu_write(current_tsc_ratio, svm->tsc_ratio); wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); } } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bfe11cf124a1..36cf28a910b8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1601,7 +1601,7 @@ static void reload_tss(void) /* * VT restores TR but not its size. Useless. */ - struct desc_ptr *gdt = &__get_cpu_var(host_gdt); + struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); struct desc_struct *descs; descs = (void *)gdt->address; @@ -1647,7 +1647,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) static unsigned long segment_base(u16 selector) { - struct desc_ptr *gdt = &__get_cpu_var(host_gdt); + struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); struct desc_struct *d; unsigned long table_base; unsigned long v; @@ -1777,7 +1777,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) */ if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded) stts(); - load_gdt(&__get_cpu_var(host_gdt)); + load_gdt(this_cpu_ptr(&host_gdt)); } static void vmx_load_host_state(struct vcpu_vmx *vmx) @@ -1807,7 +1807,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } if (vmx->loaded_vmcs->cpu != cpu) { - struct desc_ptr *gdt = &__get_cpu_var(host_gdt); + struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); unsigned long sysenter_esp; kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); @@ -2744,7 +2744,7 @@ static int hardware_enable(void *garbage) ept_sync_global(); } - native_store_gdt(&__get_cpu_var(host_gdt)); + native_store_gdt(this_cpu_ptr(&host_gdt)); return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8f1e22d3b286..c84ee536f9a3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1556,7 +1556,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); - this_tsc_khz = __get_cpu_var(cpu_tsc_khz); + this_tsc_khz = __this_cpu_read(cpu_tsc_khz); if (unlikely(this_tsc_khz == 0)) { local_irq_restore(flags); kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index dd89a13f1051..b4f2e7e9e907 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c @@ -140,7 +140,7 @@ static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context); bool kmemcheck_active(struct pt_regs *regs) { - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); return data->balance > 0; } @@ -148,7 +148,7 @@ bool kmemcheck_active(struct pt_regs *regs) /* Save an address that needs to be shown/hidden */ static void kmemcheck_save_addr(unsigned long addr) { - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr)); data->addr[data->n_addrs++] = addr; @@ -156,7 +156,7 @@ static void kmemcheck_save_addr(unsigned long addr) static unsigned int kmemcheck_show_all(void) { - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); unsigned int i; unsigned int n; @@ -169,7 +169,7 @@ static unsigned int kmemcheck_show_all(void) static unsigned int kmemcheck_hide_all(void) { - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); unsigned int i; unsigned int n; @@ -185,7 +185,7 @@ static unsigned int kmemcheck_hide_all(void) */ void kmemcheck_show(struct pt_regs *regs) { - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); BUG_ON(!irqs_disabled()); @@ -226,7 +226,7 @@ void kmemcheck_show(struct pt_regs *regs) */ void kmemcheck_hide(struct pt_regs *regs) { - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); int n; BUG_ON(!irqs_disabled()); @@ -528,7 +528,7 @@ static void kmemcheck_access(struct pt_regs *regs, const uint8_t *insn_primary; unsigned int size; - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); /* Recursive fault -- ouch. */ if (data->busy) { diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 379e8bd0deea..1d2e6392f5fa 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -64,11 +64,11 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, static int profile_exceptions_notify(unsigned int val, struct pt_regs *regs) { if (ctr_running) - model->check_ctrs(regs, &__get_cpu_var(cpu_msrs)); + model->check_ctrs(regs, this_cpu_ptr(&cpu_msrs)); else if (!nmi_enabled) return NMI_DONE; else - model->stop(&__get_cpu_var(cpu_msrs)); + model->stop(this_cpu_ptr(&cpu_msrs)); return NMI_HANDLED; } @@ -91,7 +91,7 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs) static void nmi_cpu_start(void *dummy) { - struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); + struct op_msrs const *msrs = this_cpu_ptr(&cpu_msrs); if (!msrs->controls) WARN_ON_ONCE(1); else @@ -111,7 +111,7 @@ static int nmi_start(void) static void nmi_cpu_stop(void *dummy) { - struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); + struct op_msrs const *msrs = this_cpu_ptr(&cpu_msrs); if (!msrs->controls) WARN_ON_ONCE(1); else diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index 5c86786bbfd2..a244237f3cfa 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c @@ -365,7 +365,7 @@ __setup("uvrtcevt", uv_enable_evt_rtc); static __init void uv_rtc_register_clockevents(struct work_struct *dummy) { - struct clock_event_device *ced = &__get_cpu_var(cpu_ced); + struct clock_event_device *ced = this_cpu_ptr(&cpu_ced); *ced = clock_event_device_uv; ced->cpumask = cpumask_of(smp_processor_id()); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c0cb11fb5008..2628ee556756 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -821,7 +821,7 @@ static void xen_convert_trap_info(const struct desc_ptr *desc, void xen_copy_trap_info(struct trap_info *traps) { - const struct desc_ptr *desc = &__get_cpu_var(idt_desc); + const struct desc_ptr *desc = this_cpu_ptr(&idt_desc); xen_convert_trap_info(desc, traps); } @@ -838,7 +838,7 @@ static void xen_load_idt(const struct desc_ptr *desc) spin_lock(&lock); - __get_cpu_var(idt_desc) = *desc; + memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc)); xen_convert_trap_info(desc, traps); diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 0d82003e76ad..ea54a08d8301 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -54,7 +54,7 @@ DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); void xen_mc_flush(void) { - struct mc_buffer *b = &__get_cpu_var(mc_buffer); + struct mc_buffer *b = this_cpu_ptr(&mc_buffer); struct multicall_entry *mc; int ret = 0; unsigned long flags; @@ -131,7 +131,7 @@ void xen_mc_flush(void) struct multicall_space __xen_mc_entry(size_t args) { - struct mc_buffer *b = &__get_cpu_var(mc_buffer); + struct mc_buffer *b = this_cpu_ptr(&mc_buffer); struct multicall_space ret; unsigned argidx = roundup(b->argidx, sizeof(u64)); @@ -162,7 +162,7 @@ struct multicall_space __xen_mc_entry(size_t args) struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) { - struct mc_buffer *b = &__get_cpu_var(mc_buffer); + struct mc_buffer *b = this_cpu_ptr(&mc_buffer); struct multicall_space ret = { NULL, NULL }; BUG_ON(preemptible()); @@ -192,7 +192,7 @@ out: void xen_mc_callback(void (*fn)(void *), void *data) { - struct mc_buffer *b = &__get_cpu_var(mc_buffer); + struct mc_buffer *b = this_cpu_ptr(&mc_buffer); struct callback *cb; if (b->cbidx == MC_BATCH) { diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 0ba5f3b967f0..23b45eb9a89c 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -109,7 +109,7 @@ static bool xen_pvspin = true; __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) { int irq = __this_cpu_read(lock_kicker_irq); - struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting); + struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting); int cpu = smp_processor_id(); u64 start; unsigned long flags; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 5718b0b58b60..a1d430b112b3 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -80,7 +80,7 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res) BUG_ON(preemptible()); - state = &__get_cpu_var(xen_runstate); + state = this_cpu_ptr(&xen_runstate); /* * The runstate info is always updated by the hypervisor on @@ -123,7 +123,7 @@ static void do_stolen_accounting(void) WARN_ON(state.state != RUNSTATE_running); - snap = &__get_cpu_var(xen_runstate_snapshot); + snap = this_cpu_ptr(&xen_runstate_snapshot); /* work out how much time the VCPU has not been runn*ing* */ runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; @@ -158,7 +158,7 @@ cycle_t xen_clocksource_read(void) cycle_t ret; preempt_disable_notrace(); - src = &__get_cpu_var(xen_vcpu)->time; + src = this_cpu_ptr(&xen_vcpu->time); ret = pvclock_clocksource_read(src); preempt_enable_notrace(); return ret; @@ -397,7 +397,7 @@ static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt. static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) { - struct clock_event_device *evt = &__get_cpu_var(xen_clock_events).evt; + struct clock_event_device *evt = this_cpu_ptr(&xen_clock_events.evt); irqreturn_t ret; ret = IRQ_NONE; @@ -460,7 +460,7 @@ void xen_setup_cpu_clockevents(void) { BUG_ON(preemptible()); - clockevents_register_device(&__get_cpu_var(xen_clock_events).evt); + clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt)); } void xen_timer_resume(void) -- cgit v1.2.3 From e16321709c8270f9803bbfdb51e5e02235078c7f Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:41 -0500 Subject: uv: Replace __get_cpu_var Use __this_cpu_read instead. Cc: Hedi Berriche Cc: Mike Travis Cc: Dimitri Sivanich Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/uv/uv_hub.h | 10 +++++----- arch/x86/platform/uv/uv_nmi.c | 40 ++++++++++++++++++++-------------------- 2 files changed, 25 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index bb84cfd5a1a1..a00ad8f2a657 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -601,16 +601,16 @@ struct uv_hub_nmi_s { struct uv_cpu_nmi_s { struct uv_hub_nmi_s *hub; - atomic_t state; - atomic_t pinging; + int state; + int pinging; int queries; int pings; }; -DECLARE_PER_CPU(struct uv_cpu_nmi_s, __uv_cpu_nmi); -#define uv_cpu_nmi (__get_cpu_var(__uv_cpu_nmi)) +DECLARE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi); + #define uv_hub_nmi (uv_cpu_nmi.hub) -#define uv_cpu_nmi_per(cpu) (per_cpu(__uv_cpu_nmi, cpu)) +#define uv_cpu_nmi_per(cpu) (per_cpu(uv_cpu_nmi, cpu)) #define uv_hub_nmi_per(cpu) (uv_cpu_nmi_per(cpu).hub) /* uv_cpu_nmi_states */ diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index c89c93320c12..c6b146e67116 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -63,8 +63,8 @@ static struct uv_hub_nmi_s **uv_hub_nmi_list; -DEFINE_PER_CPU(struct uv_cpu_nmi_s, __uv_cpu_nmi); -EXPORT_PER_CPU_SYMBOL_GPL(__uv_cpu_nmi); +DEFINE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi); +EXPORT_PER_CPU_SYMBOL_GPL(uv_cpu_nmi); static unsigned long nmi_mmr; static unsigned long nmi_mmr_clear; @@ -215,7 +215,7 @@ static int uv_check_nmi(struct uv_hub_nmi_s *hub_nmi) int nmi = 0; local64_inc(&uv_nmi_count); - uv_cpu_nmi.queries++; + this_cpu_inc(uv_cpu_nmi.queries); do { nmi = atomic_read(&hub_nmi->in_nmi); @@ -293,7 +293,7 @@ static void uv_nmi_nr_cpus_ping(void) int cpu; for_each_cpu(cpu, uv_nmi_cpu_mask) - atomic_set(&uv_cpu_nmi_per(cpu).pinging, 1); + uv_cpu_nmi_per(cpu).pinging = 1; apic->send_IPI_mask(uv_nmi_cpu_mask, APIC_DM_NMI); } @@ -304,8 +304,8 @@ static void uv_nmi_cleanup_mask(void) int cpu; for_each_cpu(cpu, uv_nmi_cpu_mask) { - atomic_set(&uv_cpu_nmi_per(cpu).pinging, 0); - atomic_set(&uv_cpu_nmi_per(cpu).state, UV_NMI_STATE_OUT); + uv_cpu_nmi_per(cpu).pinging = 0; + uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_OUT; cpumask_clear_cpu(cpu, uv_nmi_cpu_mask); } } @@ -328,7 +328,7 @@ static int uv_nmi_wait_cpus(int first) int loop_delay = uv_nmi_loop_delay; for_each_cpu(j, uv_nmi_cpu_mask) { - if (atomic_read(&uv_cpu_nmi_per(j).state)) { + if (uv_cpu_nmi_per(j).state) { cpumask_clear_cpu(j, uv_nmi_cpu_mask); if (++k >= n) break; @@ -359,7 +359,7 @@ static int uv_nmi_wait_cpus(int first) static void uv_nmi_wait(int master) { /* indicate this cpu is in */ - atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_IN); + this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_IN); /* if not the first cpu in (the master), then we are a slave cpu */ if (!master) @@ -419,7 +419,7 @@ static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs) "UV:%sNMI process trace for CPU %d\n", dots, cpu); show_regs(regs); } - atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE); + this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE); } /* Trigger a slave cpu to dump it's state */ @@ -427,20 +427,20 @@ static void uv_nmi_trigger_dump(int cpu) { int retry = uv_nmi_trigger_delay; - if (atomic_read(&uv_cpu_nmi_per(cpu).state) != UV_NMI_STATE_IN) + if (uv_cpu_nmi_per(cpu).state != UV_NMI_STATE_IN) return; - atomic_set(&uv_cpu_nmi_per(cpu).state, UV_NMI_STATE_DUMP); + uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_DUMP; do { cpu_relax(); udelay(10); - if (atomic_read(&uv_cpu_nmi_per(cpu).state) + if (uv_cpu_nmi_per(cpu).state != UV_NMI_STATE_DUMP) return; } while (--retry > 0); pr_crit("UV: CPU %d stuck in process dump function\n", cpu); - atomic_set(&uv_cpu_nmi_per(cpu).state, UV_NMI_STATE_DUMP_DONE); + uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_DUMP_DONE; } /* Wait until all cpus ready to exit */ @@ -488,7 +488,7 @@ static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master) } else { while (!atomic_read(&uv_nmi_slave_continue)) cpu_relax(); - while (atomic_read(&uv_cpu_nmi.state) != UV_NMI_STATE_DUMP) + while (this_cpu_read(uv_cpu_nmi.state) != UV_NMI_STATE_DUMP) cpu_relax(); uv_nmi_dump_state_cpu(cpu, regs); } @@ -615,7 +615,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) local_irq_save(flags); /* If not a UV System NMI, ignore */ - if (!atomic_read(&uv_cpu_nmi.pinging) && !uv_check_nmi(hub_nmi)) { + if (!this_cpu_read(uv_cpu_nmi.pinging) && !uv_check_nmi(hub_nmi)) { local_irq_restore(flags); return NMI_DONE; } @@ -639,7 +639,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) uv_call_kgdb_kdb(cpu, regs, master); /* Clear per_cpu "in nmi" flag */ - atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_OUT); + this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_OUT); /* Clear MMR NMI flag on each hub */ uv_clear_nmi(cpu); @@ -666,16 +666,16 @@ static int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs) { int ret; - uv_cpu_nmi.queries++; - if (!atomic_read(&uv_cpu_nmi.pinging)) { + this_cpu_inc(uv_cpu_nmi.queries); + if (!this_cpu_read(uv_cpu_nmi.pinging)) { local64_inc(&uv_nmi_ping_misses); return NMI_DONE; } - uv_cpu_nmi.pings++; + this_cpu_inc(uv_cpu_nmi.pings); local64_inc(&uv_nmi_ping_count); ret = uv_handle_nmi(reason, regs); - atomic_set(&uv_cpu_nmi.pinging, 0); + this_cpu_write(uv_cpu_nmi.pinging, 0); return ret; } -- cgit v1.2.3 From 06b96c8beb940619ddc818e2e00915fbc524f807 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:42 -0500 Subject: arm: Replace __this_cpu_ptr with raw_cpu_ptr __this_cpu_ptr is being phased out. So replace with raw_cpu_ptr. Cc: Russell King Cc: Catalin Marinas Acked-by: Will Deacon Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/arm/kernel/smp_twd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index dfc32130bc44..93090213c71c 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -92,7 +92,7 @@ static int twd_timer_ack(void) static void twd_timer_stop(void) { - struct clock_event_device *clk = __this_cpu_ptr(twd_evt); + struct clock_event_device *clk = raw_cpu_ptr(twd_evt); twd_set_mode(CLOCK_EVT_MODE_UNUSED, clk); disable_percpu_irq(clk->irq); @@ -108,7 +108,7 @@ static void twd_update_frequency(void *new_rate) { twd_timer_rate = *((unsigned long *) new_rate); - clockevents_update_freq(__this_cpu_ptr(twd_evt), twd_timer_rate); + clockevents_update_freq(raw_cpu_ptr(twd_evt), twd_timer_rate); } static int twd_rate_change(struct notifier_block *nb, @@ -134,7 +134,7 @@ static struct notifier_block twd_clk_nb = { static int twd_clk_init(void) { - if (twd_evt && __this_cpu_ptr(twd_evt) && !IS_ERR(twd_clk)) + if (twd_evt && raw_cpu_ptr(twd_evt) && !IS_ERR(twd_clk)) return clk_notifier_register(twd_clk, &twd_clk_nb); return 0; @@ -153,7 +153,7 @@ static void twd_update_frequency(void *data) { twd_timer_rate = clk_get_rate(twd_clk); - clockevents_update_freq(__this_cpu_ptr(twd_evt), twd_timer_rate); + clockevents_update_freq(raw_cpu_ptr(twd_evt), twd_timer_rate); } static int twd_cpufreq_transition(struct notifier_block *nb, @@ -179,7 +179,7 @@ static struct notifier_block twd_cpufreq_nb = { static int twd_cpufreq_init(void) { - if (twd_evt && __this_cpu_ptr(twd_evt) && !IS_ERR(twd_clk)) + if (twd_evt && raw_cpu_ptr(twd_evt) && !IS_ERR(twd_clk)) return cpufreq_register_notifier(&twd_cpufreq_nb, CPUFREQ_TRANSITION_NOTIFIER); @@ -269,7 +269,7 @@ static void twd_get_clock(struct device_node *np) */ static void twd_timer_setup(void) { - struct clock_event_device *clk = __this_cpu_ptr(twd_evt); + struct clock_event_device *clk = raw_cpu_ptr(twd_evt); int cpu = smp_processor_id(); /* -- cgit v1.2.3 From d1cd39ad583e36f3a945ba043a0a2bfae83fe859 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:43 -0500 Subject: MIPS: Replace __get_cpu_var uses in FPU emulator. The use of __this_cpu_inc() requires a fundamental integer type, so change the type of all the counters to unsigned long, which is the same width they were before, but not wrapped in local_t. Signed-off-by: David Daney Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/mips/include/asm/fpu_emulator.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h index 0195745b4b1b..3ee347713307 100644 --- a/arch/mips/include/asm/fpu_emulator.h +++ b/arch/mips/include/asm/fpu_emulator.h @@ -33,17 +33,17 @@ #ifdef CONFIG_DEBUG_FS struct mips_fpu_emulator_stats { - local_t emulated; - local_t loads; - local_t stores; - local_t cp1ops; - local_t cp1xops; - local_t errors; - local_t ieee754_inexact; - local_t ieee754_underflow; - local_t ieee754_overflow; - local_t ieee754_zerodiv; - local_t ieee754_invalidop; + unsigned long emulated; + unsigned long loads; + unsigned long stores; + unsigned long cp1ops; + unsigned long cp1xops; + unsigned long errors; + unsigned long ieee754_inexact; + unsigned long ieee754_underflow; + unsigned long ieee754_overflow; + unsigned long ieee754_zerodiv; + unsigned long ieee754_invalidop; }; DECLARE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats); @@ -51,7 +51,7 @@ DECLARE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats); #define MIPS_FPU_EMU_INC_STATS(M) \ do { \ preempt_disable(); \ - __local_inc(&__get_cpu_var(fpuemustats).M); \ + __this_cpu_inc(fpuemustats.M); \ preempt_enable(); \ } while (0) -- cgit v1.2.3 From 35898716b4d3382791d219be317faace580b6a41 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:44 -0500 Subject: mips: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Cc: Ralf Baechle Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/mips/cavium-octeon/octeon-irq.c | 30 +++++++++++++++--------------- arch/mips/kernel/kprobes.c | 6 +++--- arch/mips/kernel/perf_event_mipsxx.c | 14 +++++++------- arch/mips/kernel/smp-bmips.c | 2 +- arch/mips/loongson/loongson-3/smp.c | 6 +++--- 5 files changed, 29 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 1b82ac6921e0..741734049675 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -264,13 +264,13 @@ static void octeon_irq_ciu_enable_local(struct irq_data *data) unsigned long *pen; unsigned long flags; union octeon_ciu_chip_data cd; - raw_spinlock_t *lock = &__get_cpu_var(octeon_irq_ciu_spinlock); + raw_spinlock_t *lock = this_cpu_ptr(&octeon_irq_ciu_spinlock); cd.p = irq_data_get_irq_chip_data(data); raw_spin_lock_irqsave(lock, flags); if (cd.s.line == 0) { - pen = &__get_cpu_var(octeon_irq_ciu0_en_mirror); + pen = this_cpu_ptr(&octeon_irq_ciu0_en_mirror); __set_bit(cd.s.bit, pen); /* * Must be visible to octeon_irq_ip{2,3}_ciu() before @@ -279,7 +279,7 @@ static void octeon_irq_ciu_enable_local(struct irq_data *data) wmb(); cvmx_write_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2), *pen); } else { - pen = &__get_cpu_var(octeon_irq_ciu1_en_mirror); + pen = this_cpu_ptr(&octeon_irq_ciu1_en_mirror); __set_bit(cd.s.bit, pen); /* * Must be visible to octeon_irq_ip{2,3}_ciu() before @@ -296,13 +296,13 @@ static void octeon_irq_ciu_disable_local(struct irq_data *data) unsigned long *pen; unsigned long flags; union octeon_ciu_chip_data cd; - raw_spinlock_t *lock = &__get_cpu_var(octeon_irq_ciu_spinlock); + raw_spinlock_t *lock = this_cpu_ptr(&octeon_irq_ciu_spinlock); cd.p = irq_data_get_irq_chip_data(data); raw_spin_lock_irqsave(lock, flags); if (cd.s.line == 0) { - pen = &__get_cpu_var(octeon_irq_ciu0_en_mirror); + pen = this_cpu_ptr(&octeon_irq_ciu0_en_mirror); __clear_bit(cd.s.bit, pen); /* * Must be visible to octeon_irq_ip{2,3}_ciu() before @@ -311,7 +311,7 @@ static void octeon_irq_ciu_disable_local(struct irq_data *data) wmb(); cvmx_write_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2), *pen); } else { - pen = &__get_cpu_var(octeon_irq_ciu1_en_mirror); + pen = this_cpu_ptr(&octeon_irq_ciu1_en_mirror); __clear_bit(cd.s.bit, pen); /* * Must be visible to octeon_irq_ip{2,3}_ciu() before @@ -431,11 +431,11 @@ static void octeon_irq_ciu_enable_local_v2(struct irq_data *data) if (cd.s.line == 0) { int index = cvmx_get_core_num() * 2; - set_bit(cd.s.bit, &__get_cpu_var(octeon_irq_ciu0_en_mirror)); + set_bit(cd.s.bit, this_cpu_ptr(&octeon_irq_ciu0_en_mirror)); cvmx_write_csr(CVMX_CIU_INTX_EN0_W1S(index), mask); } else { int index = cvmx_get_core_num() * 2 + 1; - set_bit(cd.s.bit, &__get_cpu_var(octeon_irq_ciu1_en_mirror)); + set_bit(cd.s.bit, this_cpu_ptr(&octeon_irq_ciu1_en_mirror)); cvmx_write_csr(CVMX_CIU_INTX_EN1_W1S(index), mask); } } @@ -450,11 +450,11 @@ static void octeon_irq_ciu_disable_local_v2(struct irq_data *data) if (cd.s.line == 0) { int index = cvmx_get_core_num() * 2; - clear_bit(cd.s.bit, &__get_cpu_var(octeon_irq_ciu0_en_mirror)); + clear_bit(cd.s.bit, this_cpu_ptr(&octeon_irq_ciu0_en_mirror)); cvmx_write_csr(CVMX_CIU_INTX_EN0_W1C(index), mask); } else { int index = cvmx_get_core_num() * 2 + 1; - clear_bit(cd.s.bit, &__get_cpu_var(octeon_irq_ciu1_en_mirror)); + clear_bit(cd.s.bit, this_cpu_ptr(&octeon_irq_ciu1_en_mirror)); cvmx_write_csr(CVMX_CIU_INTX_EN1_W1C(index), mask); } } @@ -1063,7 +1063,7 @@ static void octeon_irq_ip2_ciu(void) const unsigned long core_id = cvmx_get_core_num(); u64 ciu_sum = cvmx_read_csr(CVMX_CIU_INTX_SUM0(core_id * 2)); - ciu_sum &= __get_cpu_var(octeon_irq_ciu0_en_mirror); + ciu_sum &= __this_cpu_read(octeon_irq_ciu0_en_mirror); if (likely(ciu_sum)) { int bit = fls64(ciu_sum) - 1; int irq = octeon_irq_ciu_to_irq[0][bit]; @@ -1080,7 +1080,7 @@ static void octeon_irq_ip3_ciu(void) { u64 ciu_sum = cvmx_read_csr(CVMX_CIU_INT_SUM1); - ciu_sum &= __get_cpu_var(octeon_irq_ciu1_en_mirror); + ciu_sum &= __this_cpu_read(octeon_irq_ciu1_en_mirror); if (likely(ciu_sum)) { int bit = fls64(ciu_sum) - 1; int irq = octeon_irq_ciu_to_irq[1][bit]; @@ -1129,10 +1129,10 @@ static void octeon_irq_init_ciu_percpu(void) int coreid = cvmx_get_core_num(); - __get_cpu_var(octeon_irq_ciu0_en_mirror) = 0; - __get_cpu_var(octeon_irq_ciu1_en_mirror) = 0; + __this_cpu_write(octeon_irq_ciu0_en_mirror, 0); + __this_cpu_write(octeon_irq_ciu1_en_mirror, 0); wmb(); - raw_spin_lock_init(&__get_cpu_var(octeon_irq_ciu_spinlock)); + raw_spin_lock_init(this_cpu_ptr(&octeon_irq_ciu_spinlock)); /* * Disable All CIU Interrupts. The ones we need will be * enabled later. Read the SUM register so we know the write diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c index 1f8187ab0997..212f46f2014e 100644 --- a/arch/mips/kernel/kprobes.c +++ b/arch/mips/kernel/kprobes.c @@ -224,7 +224,7 @@ static void save_previous_kprobe(struct kprobe_ctlblk *kcb) static void restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; kcb->kprobe_old_SR = kcb->prev_kprobe.old_SR; kcb->kprobe_saved_SR = kcb->prev_kprobe.saved_SR; @@ -234,7 +234,7 @@ static void restore_previous_kprobe(struct kprobe_ctlblk *kcb) static void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_SR = kcb->kprobe_old_SR = (regs->cp0_status & ST0_IE); kcb->kprobe_saved_epc = regs->cp0_epc; } @@ -385,7 +385,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) ret = 1; goto no_kprobe; } - p = __get_cpu_var(current_kprobe); + p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) goto ss_probe; } diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 14bf74b0f51c..abb209fa28c6 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -340,7 +340,7 @@ static int mipsxx_pmu_alloc_counter(struct cpu_hw_events *cpuc, static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); WARN_ON(idx < 0 || idx >= mipspmu.num_counters); @@ -360,7 +360,7 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx) static void mipsxx_pmu_disable_event(int idx) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); unsigned long flags; WARN_ON(idx < 0 || idx >= mipspmu.num_counters); @@ -460,7 +460,7 @@ static void mipspmu_stop(struct perf_event *event, int flags) static int mipspmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx; int err = 0; @@ -496,7 +496,7 @@ out: static void mipspmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -1275,7 +1275,7 @@ static int __hw_perf_event_init(struct perf_event *event) static void pause_local_counters(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int ctr = mipspmu.num_counters; unsigned long flags; @@ -1291,7 +1291,7 @@ static void pause_local_counters(void) static void resume_local_counters(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int ctr = mipspmu.num_counters; do { @@ -1302,7 +1302,7 @@ static void resume_local_counters(void) static int mipsxx_pmu_handle_shared_irq(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_sample_data data; unsigned int counters = mipspmu.num_counters; u64 counter; diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index df9e2bd9b2c2..06bb5ed6d80a 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -346,7 +346,7 @@ static irqreturn_t bmips43xx_ipi_interrupt(int irq, void *dev_id) int action, cpu = irq - IPI0_IRQ; spin_lock_irqsave(&ipi_lock, flags); - action = __get_cpu_var(ipi_action_mask); + action = __this_cpu_read(ipi_action_mask); per_cpu(ipi_action_mask, cpu) = 0; clear_c0_cause(cpu ? C_SW1 : C_SW0); spin_unlock_irqrestore(&ipi_lock, flags); diff --git a/arch/mips/loongson/loongson-3/smp.c b/arch/mips/loongson/loongson-3/smp.c index 74e827b4ec8f..d8c63af6c7cc 100644 --- a/arch/mips/loongson/loongson-3/smp.c +++ b/arch/mips/loongson/loongson-3/smp.c @@ -299,16 +299,16 @@ static void loongson3_init_secondary(void) per_cpu(cpu_state, cpu) = CPU_ONLINE; i = 0; - __get_cpu_var(core0_c0count) = 0; + __this_cpu_write(core0_c0count, 0); loongson3_send_ipi_single(0, SMP_ASK_C0COUNT); - while (!__get_cpu_var(core0_c0count)) { + while (!__this_cpu_read(core0_c0count)) { i++; cpu_relax(); } if (i > MAX_LOOPS) i = MAX_LOOPS; - initcount = __get_cpu_var(core0_c0count) + i; + initcount = __this_cpu_read(core0_c0count) + i; write_c0_count(initcount); } -- cgit v1.2.3 From eb7e7d766326f70859046bfdb6277068c2461fe2 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:45 -0500 Subject: s390: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to this_cpu_inc(y) Cc: Martin Schwidefsky CC: linux390@de.ibm.com Acked-by: Heiko Carstens Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/s390/include/asm/cputime.h | 2 +- arch/s390/include/asm/irq.h | 2 +- arch/s390/include/asm/percpu.h | 16 ++++++++-------- arch/s390/kernel/irq.c | 2 +- arch/s390/kernel/kprobes.c | 8 ++++---- arch/s390/kernel/nmi.c | 10 +++++++--- arch/s390/kernel/perf_cpum_cf.c | 22 +++++++++++----------- arch/s390/kernel/perf_cpum_sf.c | 16 ++++++++-------- arch/s390/kernel/processor.c | 4 ++-- arch/s390/kernel/time.c | 6 +++--- arch/s390/kernel/vtime.c | 2 +- arch/s390/oprofile/hwsampler.c | 2 +- 12 files changed, 48 insertions(+), 44 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index f65bd3634519..692d310dc32d 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -184,7 +184,7 @@ cputime64_t s390_get_idle_time(int cpu); static inline int s390_nohz_delay(int cpu) { - return __get_cpu_var(s390_idle).nohz_delay != 0; + return __this_cpu_read(s390_idle.nohz_delay) != 0; } #define arch_needs_cpu(cpu) s390_nohz_delay(cpu) diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index c4dd400a2791..713d325afbfe 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -81,7 +81,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); static __always_inline void inc_irq_stat(enum interruption_class irq) { - __get_cpu_var(irq_stat).irqs[irq]++; + __this_cpu_inc(irq_stat.irqs[irq]); } struct ext_code { diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index fa91e0097458..933355e0d091 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -31,7 +31,7 @@ pcp_op_T__ old__, new__, prev__; \ pcp_op_T__ *ptr__; \ preempt_disable(); \ - ptr__ = __this_cpu_ptr(&(pcp)); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ prev__ = *ptr__; \ do { \ old__ = prev__; \ @@ -70,7 +70,7 @@ pcp_op_T__ val__ = (val); \ pcp_op_T__ old__, *ptr__; \ preempt_disable(); \ - ptr__ = __this_cpu_ptr(&(pcp)); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ if (__builtin_constant_p(val__) && \ ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \ asm volatile( \ @@ -97,7 +97,7 @@ pcp_op_T__ val__ = (val); \ pcp_op_T__ old__, *ptr__; \ preempt_disable(); \ - ptr__ = __this_cpu_ptr(&(pcp)); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ asm volatile( \ op " %[old__],%[val__],%[ptr__]\n" \ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ @@ -116,7 +116,7 @@ pcp_op_T__ val__ = (val); \ pcp_op_T__ old__, *ptr__; \ preempt_disable(); \ - ptr__ = __this_cpu_ptr(&(pcp)); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ asm volatile( \ op " %[old__],%[val__],%[ptr__]\n" \ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ @@ -138,7 +138,7 @@ pcp_op_T__ ret__; \ pcp_op_T__ *ptr__; \ preempt_disable(); \ - ptr__ = __this_cpu_ptr(&(pcp)); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ ret__ = cmpxchg(ptr__, oval, nval); \ preempt_enable(); \ ret__; \ @@ -154,7 +154,7 @@ typeof(pcp) *ptr__; \ typeof(pcp) ret__; \ preempt_disable(); \ - ptr__ = __this_cpu_ptr(&(pcp)); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ ret__ = xchg(ptr__, nval); \ preempt_enable(); \ ret__; \ @@ -173,8 +173,8 @@ typeof(pcp2) *p2__; \ int ret__; \ preempt_disable(); \ - p1__ = __this_cpu_ptr(&(pcp1)); \ - p2__ = __this_cpu_ptr(&(pcp2)); \ + p1__ = raw_cpu_ptr(&(pcp1)); \ + p2__ = raw_cpu_ptr(&(pcp2)); \ ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__); \ preempt_enable(); \ ret__; \ diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 8eb82443cfbd..891c183211ce 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -258,7 +258,7 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy) ext_code = *(struct ext_code *) ®s->int_code; if (ext_code.code != EXT_IRQ_CLK_COMP) - __get_cpu_var(s390_idle).nohz_delay = 1; + __this_cpu_write(s390_idle.nohz_delay, 1); index = ext_hash(ext_code.code); rcu_read_lock(); diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index bc71a7b95af5..131ed342ed10 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -366,9 +366,9 @@ static void __kprobes disable_singlestep(struct kprobe_ctlblk *kcb, */ static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p) { - kcb->prev_kprobe.kp = __get_cpu_var(current_kprobe); + kcb->prev_kprobe.kp = __this_cpu_read(current_kprobe); kcb->prev_kprobe.status = kcb->kprobe_status; - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); } /* @@ -378,7 +378,7 @@ static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p) */ static void __kprobes pop_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; } @@ -459,7 +459,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn); return 1; } else if (kprobe_running()) { - p = __get_cpu_var(current_kprobe); + p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { /* * Continuation after the jprobe completed and diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 210e1285f75a..d75c42f4147d 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -53,8 +53,12 @@ void s390_handle_mcck(void) */ local_irq_save(flags); local_mcck_disable(); - mcck = __get_cpu_var(cpu_mcck); - memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct)); + /* + * Ummm... Does this make sense at all? Copying the percpu struct + * and then zapping it one statement later? + */ + memcpy(&mcck, this_cpu_ptr(&cpu_mcck), sizeof(mcck)); + memset(&mcck, 0, sizeof(struct mcck_struct)); clear_cpu_flag(CIF_MCCK_PENDING); local_mcck_enable(); local_irq_restore(flags); @@ -253,7 +257,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) nmi_enter(); inc_irq_stat(NMI_NMI); mci = (struct mci *) &S390_lowcore.mcck_interruption_code; - mcck = &__get_cpu_var(cpu_mcck); + mcck = this_cpu_ptr(&cpu_mcck); umode = user_mode(regs); if (mci->sd) { diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index d3194de7ae1e..56fdad479115 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -173,7 +173,7 @@ static int validate_ctr_auth(const struct hw_perf_event *hwc) */ static void cpumf_pmu_enable(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); int err; if (cpuhw->flags & PMU_F_ENABLED) @@ -196,7 +196,7 @@ static void cpumf_pmu_enable(struct pmu *pmu) */ static void cpumf_pmu_disable(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); int err; u64 inactive; @@ -230,7 +230,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code, return; inc_irq_stat(IRQEXT_CMC); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); /* Measurement alerts are shared and might happen when the PMU * is not reserved. Ignore these alerts in this case. */ @@ -250,7 +250,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code, #define PMC_RELEASE 1 static void setup_pmc_cpu(void *flags) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); switch (*((int *) flags)) { case PMC_INIT: @@ -475,7 +475,7 @@ static void cpumf_pmu_read(struct perf_event *event) static void cpumf_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) @@ -506,7 +506,7 @@ static void cpumf_pmu_start(struct perf_event *event, int flags) static void cpumf_pmu_stop(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; if (!(hwc->state & PERF_HES_STOPPED)) { @@ -527,7 +527,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags) static int cpumf_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); /* Check authorization for the counter set to which this * counter belongs. @@ -551,7 +551,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) static void cpumf_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); cpumf_pmu_stop(event, PERF_EF_UPDATE); @@ -575,7 +575,7 @@ static void cpumf_pmu_del(struct perf_event *event, int flags) */ static void cpumf_pmu_start_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); perf_pmu_disable(pmu); cpuhw->flags |= PERF_EVENT_TXN; @@ -589,7 +589,7 @@ static void cpumf_pmu_start_txn(struct pmu *pmu) */ static void cpumf_pmu_cancel_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); WARN_ON(cpuhw->tx_state != cpuhw->state); @@ -604,7 +604,7 @@ static void cpumf_pmu_cancel_txn(struct pmu *pmu) */ static int cpumf_pmu_commit_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); u64 state; /* check if the updated state can be scheduled */ diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index ea0c7b2ef030..08e761318c17 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -562,7 +562,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex); static void setup_pmc_cpu(void *flags) { int err; - struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf); + struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf); err = 0; switch (*((int *) flags)) { @@ -849,7 +849,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event) static void cpumsf_pmu_enable(struct pmu *pmu) { - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); struct hw_perf_event *hwc; int err; @@ -898,7 +898,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu) static void cpumsf_pmu_disable(struct pmu *pmu) { - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); struct hws_lsctl_request_block inactive; struct hws_qsi_info_block si; int err; @@ -1306,7 +1306,7 @@ static void cpumsf_pmu_read(struct perf_event *event) */ static void cpumsf_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) return; @@ -1327,7 +1327,7 @@ static void cpumsf_pmu_start(struct perf_event *event, int flags) */ static void cpumsf_pmu_stop(struct perf_event *event, int flags) { - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); if (event->hw.state & PERF_HES_STOPPED) return; @@ -1346,7 +1346,7 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags) static int cpumsf_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); int err; if (cpuhw->flags & PMU_F_IN_USE) @@ -1397,7 +1397,7 @@ out: static void cpumsf_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); perf_pmu_disable(event->pmu); cpumsf_pmu_stop(event, PERF_EF_UPDATE); @@ -1470,7 +1470,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code, if (!(alert & CPU_MF_INT_SF_MASK)) return; inc_irq_stat(IRQEXT_CMS); - cpuhw = &__get_cpu_var(cpu_hw_sf); + cpuhw = this_cpu_ptr(&cpu_hw_sf); /* Measurement alerts are shared and might happen when the PMU * is not reserved. Ignore these alerts in this case. */ diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 24612029f450..f0305b1189aa 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -23,8 +23,8 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id); */ void cpu_init(void) { - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); - struct cpuid *id = &__get_cpu_var(cpu_id); + struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); + struct cpuid *id = this_cpu_ptr(&cpu_id); get_cpu_id(id); atomic_inc(&init_mm.mm_count); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 4cef607f3711..4e5a6d881c62 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -92,7 +92,7 @@ void clock_comparator_work(void) struct clock_event_device *cd; S390_lowcore.clock_comparator = -1ULL; - cd = &__get_cpu_var(comparators); + cd = this_cpu_ptr(&comparators); cd->event_handler(cd); } @@ -360,7 +360,7 @@ EXPORT_SYMBOL(get_sync_clock); */ static void disable_sync_clock(void *dummy) { - atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word); + atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word); /* * Clear the in-sync bit 2^31. All get_sync_clock calls will * fail until the sync bit is turned back on. In addition @@ -377,7 +377,7 @@ static void disable_sync_clock(void *dummy) */ static void enable_sync_clock(void) { - atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word); + atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word); atomic_set_mask(0x80000000, sw_ptr); } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 8c34363d6f1e..f400745dedc0 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(vtime_account_system); void __kprobes vtime_stop_cpu(void) { - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); unsigned long long idle_time; unsigned long psw_mask; diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index e53c6f268807..ff9b4eb34589 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -178,7 +178,7 @@ static int smp_ctl_qsi(int cpu) static void hws_ext_handler(struct ext_code ext_code, unsigned int param32, unsigned long param64) { - struct hws_cpu_buffer *cb = &__get_cpu_var(sampler_cpu_buffer); + struct hws_cpu_buffer *cb = this_cpu_ptr(&sampler_cpu_buffer); if (!(param32 & CPU_MF_INT_SF_MASK)) return; -- cgit v1.2.3 From 6065a244a039a23d933e4b803a4e052da2849208 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:47 -0500 Subject: ia64: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Cc: Tony Luck Cc: Fenghua Yu Cc: linux-ia64@vger.kernel.org Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/ia64/include/asm/hw_irq.h | 2 +- arch/ia64/include/asm/sn/arch.h | 4 ++-- arch/ia64/include/asm/sn/nodepda.h | 2 +- arch/ia64/include/asm/switch_to.h | 2 +- arch/ia64/include/asm/uv/uv_hub.h | 2 +- arch/ia64/kernel/irq.c | 2 +- arch/ia64/kernel/irq_ia64.c | 4 ++-- arch/ia64/kernel/kprobes.c | 6 +++--- arch/ia64/kernel/mca.c | 16 ++++++++-------- arch/ia64/kernel/process.c | 6 +++--- arch/ia64/kernel/traps.c | 2 +- arch/ia64/sn/kernel/sn2/sn2_smp.c | 28 ++++++++++++++-------------- 12 files changed, 38 insertions(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/ia64/include/asm/hw_irq.h b/arch/ia64/include/asm/hw_irq.h index 029bab36cd91..668786e84af8 100644 --- a/arch/ia64/include/asm/hw_irq.h +++ b/arch/ia64/include/asm/hw_irq.h @@ -159,7 +159,7 @@ static inline ia64_vector __ia64_irq_to_vector(int irq) static inline unsigned int __ia64_local_vector_to_irq (ia64_vector vec) { - return __get_cpu_var(vector_irq)[vec]; + return __this_cpu_read(vector_irq[vec]); } #endif diff --git a/arch/ia64/include/asm/sn/arch.h b/arch/ia64/include/asm/sn/arch.h index 7caa1f44cd95..31eb784866f8 100644 --- a/arch/ia64/include/asm/sn/arch.h +++ b/arch/ia64/include/asm/sn/arch.h @@ -57,7 +57,7 @@ struct sn_hub_info_s { u16 nasid_bitmask; }; DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); -#define sn_hub_info (&__get_cpu_var(__sn_hub_info)) +#define sn_hub_info this_cpu_ptr(&__sn_hub_info) #define is_shub2() (sn_hub_info->shub2) #define is_shub1() (sn_hub_info->shub2 == 0) @@ -72,7 +72,7 @@ DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); * cpu. */ DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); -#define sn_cnodeid_to_nasid (&__get_cpu_var(__sn_cnodeid_to_nasid[0])) +#define sn_cnodeid_to_nasid this_cpu_ptr(&__sn_cnodeid_to_nasid[0]) extern u8 sn_partition_id; diff --git a/arch/ia64/include/asm/sn/nodepda.h b/arch/ia64/include/asm/sn/nodepda.h index ee118b901de4..7c8b4710f071 100644 --- a/arch/ia64/include/asm/sn/nodepda.h +++ b/arch/ia64/include/asm/sn/nodepda.h @@ -70,7 +70,7 @@ typedef struct nodepda_s nodepda_t; */ DECLARE_PER_CPU(struct nodepda_s *, __sn_nodepda); -#define sn_nodepda (__get_cpu_var(__sn_nodepda)) +#define sn_nodepda __this_cpu_read(__sn_nodepda) #define NODEPDA(cnodeid) (sn_nodepda->pernode_pdaindr[cnodeid]) /* diff --git a/arch/ia64/include/asm/switch_to.h b/arch/ia64/include/asm/switch_to.h index d38c7ea5eea5..e8f3585e7e7a 100644 --- a/arch/ia64/include/asm/switch_to.h +++ b/arch/ia64/include/asm/switch_to.h @@ -32,7 +32,7 @@ extern void ia64_load_extra (struct task_struct *task); #ifdef CONFIG_PERFMON DECLARE_PER_CPU(unsigned long, pfm_syst_info); -# define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1) +# define PERFMON_IS_SYSWIDE() (__this_cpu_read(pfm_syst_info) & 0x1) #else # define PERFMON_IS_SYSWIDE() (0) #endif diff --git a/arch/ia64/include/asm/uv/uv_hub.h b/arch/ia64/include/asm/uv/uv_hub.h index 53e9dfacd073..2a88c7204e52 100644 --- a/arch/ia64/include/asm/uv/uv_hub.h +++ b/arch/ia64/include/asm/uv/uv_hub.h @@ -108,7 +108,7 @@ struct uv_hub_info_s { unsigned char n_val; }; DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); -#define uv_hub_info (&__get_cpu_var(__uv_hub_info)) +#define uv_hub_info this_cpu_ptr(&__uv_hub_info) #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) /* diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index f2c418281130..812a1e6b3179 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -42,7 +42,7 @@ ia64_vector __ia64_irq_to_vector(int irq) unsigned int __ia64_local_vector_to_irq (ia64_vector vec) { - return __get_cpu_var(vector_irq)[vec]; + return __this_cpu_read(vector_irq[vec]); } #endif diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 03ea78ed64a9..698d8fefde6c 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -330,7 +330,7 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) int irq; struct irq_desc *desc; struct irq_cfg *cfg; - irq = __get_cpu_var(vector_irq)[vector]; + irq = __this_cpu_read(vector_irq[vector]); if (irq < 0) continue; @@ -344,7 +344,7 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) goto unlock; spin_lock_irqsave(&vector_lock, flags); - __get_cpu_var(vector_irq)[vector] = -1; + __this_cpu_write(vector_irq[vector], -1); cpu_clear(me, vector_table[vector]); spin_unlock_irqrestore(&vector_lock, flags); cfg->move_cleanup_count--; diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 074fde49c9e6..c7c51445c3be 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -396,7 +396,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { unsigned int i; i = atomic_read(&kcb->prev_kprobe_index); - __get_cpu_var(current_kprobe) = kcb->prev_kprobe[i-1].kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe[i-1].kp); kcb->kprobe_status = kcb->prev_kprobe[i-1].status; atomic_sub(1, &kcb->prev_kprobe_index); } @@ -404,7 +404,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes set_current_kprobe(struct kprobe *p, struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); } static void kretprobe_trampoline(void) @@ -823,7 +823,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) /* * jprobe instrumented function just completed */ - p = __get_cpu_var(current_kprobe); + p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; } diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index db7b36bb068b..8bfd36af46f8 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1341,7 +1341,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, ia64_mlogbuf_finish(1); } - if (__get_cpu_var(ia64_mca_tr_reload)) { + if (__this_cpu_read(ia64_mca_tr_reload)) { mca_insert_tr(0x1); /*Reload dynamic itrs*/ mca_insert_tr(0x2); /*Reload dynamic itrs*/ } @@ -1868,14 +1868,14 @@ ia64_mca_cpu_init(void *cpu_data) "MCA", cpu); format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, init_stack), "INIT", cpu); - __get_cpu_var(ia64_mca_data) = __per_cpu_mca[cpu] = __pa(data); + __this_cpu_write(ia64_mca_data, (__per_cpu_mca[cpu] = __pa(data))); /* * Stash away a copy of the PTE needed to map the per-CPU page. * We may need it during MCA recovery. */ - __get_cpu_var(ia64_mca_per_cpu_pte) = - pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL)); + __this_cpu_write(ia64_mca_per_cpu_pte, + pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL))); /* * Also, stash away a copy of the PAL address and the PTE @@ -1884,10 +1884,10 @@ ia64_mca_cpu_init(void *cpu_data) pal_vaddr = efi_get_pal_addr(); if (!pal_vaddr) return; - __get_cpu_var(ia64_mca_pal_base) = - GRANULEROUNDDOWN((unsigned long) pal_vaddr); - __get_cpu_var(ia64_mca_pal_pte) = pte_val(mk_pte_phys(__pa(pal_vaddr), - PAGE_KERNEL)); + __this_cpu_write(ia64_mca_pal_base, + GRANULEROUNDDOWN((unsigned long) pal_vaddr)); + __this_cpu_write(ia64_mca_pal_pte, pte_val(mk_pte_phys(__pa(pal_vaddr), + PAGE_KERNEL))); } static void ia64_mca_cmc_vector_adjust(void *dummy) diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index deed6fa96bb0..b51514957620 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -215,7 +215,7 @@ static inline void play_dead(void) unsigned int this_cpu = smp_processor_id(); /* Ack it */ - __get_cpu_var(cpu_state) = CPU_DEAD; + __this_cpu_write(cpu_state, CPU_DEAD); max_xtp(); local_irq_disable(); @@ -273,7 +273,7 @@ ia64_save_extra (struct task_struct *task) if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) pfm_save_regs(task); - info = __get_cpu_var(pfm_syst_info); + info = __this_cpu_read(pfm_syst_info); if (info & PFM_CPUINFO_SYST_WIDE) pfm_syst_wide_update_task(task, info, 0); #endif @@ -293,7 +293,7 @@ ia64_load_extra (struct task_struct *task) if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) pfm_load_regs(task); - info = __get_cpu_var(pfm_syst_info); + info = __this_cpu_read(pfm_syst_info); if (info & PFM_CPUINFO_SYST_WIDE) pfm_syst_wide_update_task(task, info, 1); #endif diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index d3636e67a98e..6f7d4a4dcf24 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -299,7 +299,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) if (!(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) { unsigned long count, current_jiffies = jiffies; - struct fpu_swa_msg *cp = &__get_cpu_var(cpulast); + struct fpu_swa_msg *cp = this_cpu_ptr(&cpulast); if (unlikely(current_jiffies > cp->time)) cp->count = 0; diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 68c845411624..f9c8d9fc5939 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -134,8 +134,8 @@ sn2_ipi_flush_all_tlb(struct mm_struct *mm) itc = ia64_get_itc(); smp_flush_tlb_cpumask(*mm_cpumask(mm)); itc = ia64_get_itc() - itc; - __get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc; - __get_cpu_var(ptcstats).shub_ipi_flushes++; + __this_cpu_add(ptcstats.shub_ipi_flushes_itc_clocks, itc); + __this_cpu_inc(ptcstats.shub_ipi_flushes); } /** @@ -199,14 +199,14 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, start += (1UL << nbits); } while (start < end); ia64_srlz_i(); - __get_cpu_var(ptcstats).ptc_l++; + __this_cpu_inc(ptcstats.ptc_l); preempt_enable(); return; } if (atomic_read(&mm->mm_users) == 1 && mymm) { flush_tlb_mm(mm); - __get_cpu_var(ptcstats).change_rid++; + __this_cpu_inc(ptcstats.change_rid); preempt_enable(); return; } @@ -250,11 +250,11 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, spin_lock_irqsave(PTC_LOCK(shub1), flags); itc2 = ia64_get_itc(); - __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc; - __get_cpu_var(ptcstats).shub_ptc_flushes++; - __get_cpu_var(ptcstats).nodes_flushed += nix; + __this_cpu_add(ptcstats.lock_itc_clocks, itc2 - itc); + __this_cpu_inc(ptcstats.shub_ptc_flushes); + __this_cpu_add(ptcstats.nodes_flushed, nix); if (!mymm) - __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++; + __this_cpu_inc(ptcstats.shub_ptc_flushes_not_my_mm); if (use_cpu_ptcga && !mymm) { old_rr = ia64_get_rr(start); @@ -299,9 +299,9 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, done: itc2 = ia64_get_itc() - itc2; - __get_cpu_var(ptcstats).shub_itc_clocks += itc2; - if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) - __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2; + __this_cpu_add(ptcstats.shub_itc_clocks, itc2); + if (itc2 > __this_cpu_read(ptcstats.shub_itc_clocks_max)) + __this_cpu_write(ptcstats.shub_itc_clocks_max, itc2); if (old_rr) { ia64_set_rr(start, old_rr); @@ -311,7 +311,7 @@ done: spin_unlock_irqrestore(PTC_LOCK(shub1), flags); if (flush_opt == 1 && deadlock) { - __get_cpu_var(ptcstats).deadlocks++; + __this_cpu_inc(ptcstats.deadlocks); sn2_ipi_flush_all_tlb(mm); } @@ -334,7 +334,7 @@ sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, short nasid, i; unsigned long *piows, zeroval, n; - __get_cpu_var(ptcstats).deadlocks++; + __this_cpu_inc(ptcstats.deadlocks); piows = (unsigned long *) pda->pio_write_status_addr; zeroval = pda->pio_write_status_val; @@ -349,7 +349,7 @@ sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, ptc1 = CHANGE_NASID(nasid, ptc1); n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); - __get_cpu_var(ptcstats).deadlocks2 += n; + __this_cpu_add(ptcstats.deadlocks2, n); } } -- cgit v1.2.3 From 2999a4b354c24985268f9310bc9522ff358453a8 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:48 -0500 Subject: alpha: Replace __get_cpu_var __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) CC: Ivan Kokshaysky Cc: Matt Turner Acked-by: Richard Henderson Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/alpha/kernel/perf_event.c | 16 ++++++++-------- arch/alpha/kernel/time.c | 6 +++--- arch/powerpc/include/asm/cputime.h | 6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index c52e7f0ee5f6..5c218aa3f3df 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -431,7 +431,7 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) */ static int alpha_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int n0; int ret; @@ -483,7 +483,7 @@ static int alpha_pmu_add(struct perf_event *event, int flags) */ static void alpha_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; unsigned long irq_flags; int j; @@ -531,7 +531,7 @@ static void alpha_pmu_read(struct perf_event *event) static void alpha_pmu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!(hwc->state & PERF_HES_STOPPED)) { cpuc->idx_mask &= ~(1UL<idx); @@ -551,7 +551,7 @@ static void alpha_pmu_stop(struct perf_event *event, int flags) static void alpha_pmu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) return; @@ -724,7 +724,7 @@ static int alpha_pmu_event_init(struct perf_event *event) */ static void alpha_pmu_enable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->enabled) return; @@ -750,7 +750,7 @@ static void alpha_pmu_enable(struct pmu *pmu) static void alpha_pmu_disable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!cpuc->enabled) return; @@ -814,8 +814,8 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, struct hw_perf_event *hwc; int idx, j; - __get_cpu_var(irq_pmi_count)++; - cpuc = &__get_cpu_var(cpu_hw_events); + __this_cpu_inc(irq_pmi_count); + cpuc = this_cpu_ptr(&cpu_hw_events); /* Completely counting through the PMC's period to trigger a new PMC * overflow interrupt while in this interrupt routine is utterly diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index ee39cee8064c..643a9dcdf093 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -56,9 +56,9 @@ unsigned long est_cycle_freq; DEFINE_PER_CPU(u8, irq_work_pending); -#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 -#define test_irq_work_pending() __get_cpu_var(irq_work_pending) -#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 +#define set_irq_work_pending_flag() __this_cpu_write(irq_work_pending, 1) +#define test_irq_work_pending() __this_cpu_read(irq_work_pending) +#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) void arch_irq_work_raise(void) { diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 607559ab271f..e5d10ab8463b 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -56,10 +56,10 @@ static inline unsigned long cputime_to_jiffies(const cputime_t ct) static inline cputime_t cputime_to_scaled(const cputime_t ct) { if (cpu_has_feature(CPU_FTR_SPURR) && - __get_cpu_var(cputime_last_delta)) + __this_cpu_read(cputime_last_delta)) return (__force u64) ct * - __get_cpu_var(cputime_scaled_last_delta) / - __get_cpu_var(cputime_last_delta); + __this_cpu_read(cputime_scaled_last_delta) / + __this_cpu_read(cputime_last_delta); return ct; } -- cgit v1.2.3 From 5828f666c069af74e00db21559f1535103c9f79a Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:49 -0500 Subject: powerpc: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) tj: Folded a fix patch. http://lkml.kernel.org/g/alpine.DEB.2.11.1408172143020.9652@gentwo.org Cc: Benjamin Herrenschmidt CC: Paul Mackerras Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/powerpc/include/asm/hardirq.h | 4 +++- arch/powerpc/include/asm/tlbflush.h | 4 ++-- arch/powerpc/include/asm/xics.h | 8 ++++---- arch/powerpc/kernel/dbell.c | 2 +- arch/powerpc/kernel/hw_breakpoint.c | 6 +++--- arch/powerpc/kernel/iommu.c | 2 +- arch/powerpc/kernel/irq.c | 4 ++-- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/kprobes.c | 6 +++--- arch/powerpc/kernel/mce.c | 24 +++++++++++------------ arch/powerpc/kernel/process.c | 10 +++++----- arch/powerpc/kernel/smp.c | 6 +++--- arch/powerpc/kernel/sysfs.c | 4 ++-- arch/powerpc/kernel/time.c | 22 ++++++++++----------- arch/powerpc/kernel/traps.c | 8 ++++---- arch/powerpc/kvm/e500.c | 14 ++++++------- arch/powerpc/kvm/e500mc.c | 4 ++-- arch/powerpc/mm/hash_native_64.c | 2 +- arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/hugetlbpage-book3e.c | 6 +++--- arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/perf/core-book3s.c | 22 ++++++++++----------- arch/powerpc/perf/core-fsl-emb.c | 6 +++--- arch/powerpc/platforms/cell/interrupt.c | 6 +++--- arch/powerpc/platforms/powernv/opal-tracepoints.c | 4 ++-- arch/powerpc/platforms/ps3/interrupt.c | 2 +- arch/powerpc/platforms/pseries/dtl.c | 2 +- arch/powerpc/platforms/pseries/hvCall_inst.c | 4 ++-- arch/powerpc/platforms/pseries/iommu.c | 8 ++++---- arch/powerpc/platforms/pseries/lpar.c | 6 +++--- arch/powerpc/platforms/pseries/ras.c | 4 ++-- arch/powerpc/sysdev/xics/xics-common.c | 2 +- 32 files changed, 105 insertions(+), 103 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 1bbb3013d6aa..8d907ba4fd05 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -21,7 +21,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define __ARCH_IRQ_STAT -#define local_softirq_pending() __get_cpu_var(irq_stat).__softirq_pending +#define local_softirq_pending() __this_cpu_read(irq_stat.__softirq_pending) +#define set_softirq_pending(x) __this_cpu_write(irq_stat._softirq_pending, (x)) +#define or_softirq_pending(x) __this_cpu_or(irq_stat._softirq_pending, (x)) static inline void ack_bad_irq(unsigned int irq) { diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 2def01ed0cb2..cd7c2719d3ef 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -107,14 +107,14 @@ extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch); static inline void arch_enter_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } static inline void arch_leave_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->index) __flush_tlb_pending(batch); diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 282d43a0c855..5007ad0448ce 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -97,7 +97,7 @@ DECLARE_PER_CPU(struct xics_cppr, xics_cppr); static inline void xics_push_cppr(unsigned int vec) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1)) return; @@ -110,7 +110,7 @@ static inline void xics_push_cppr(unsigned int vec) static inline unsigned char xics_pop_cppr(void) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); if (WARN_ON(os_cppr->index < 1)) return LOWEST_PRIORITY; @@ -120,7 +120,7 @@ static inline unsigned char xics_pop_cppr(void) static inline void xics_set_base_cppr(unsigned char cppr) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); /* we only really want to set the priority when there's * just one cppr value on the stack @@ -132,7 +132,7 @@ static inline void xics_set_base_cppr(unsigned char cppr) static inline unsigned char xics_cppr_top(void) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); return os_cppr->stack[os_cppr->index]; } diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index d55c76c571f3..f4217819cc31 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -41,7 +41,7 @@ void doorbell_exception(struct pt_regs *regs) may_hard_irq_enable(); - __get_cpu_var(irq_stat).doorbell_irqs++; + __this_cpu_inc(irq_stat.doorbell_irqs); smp_ipi_demux(); diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 0bb5918faaaf..b62f90eaf19e 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -63,7 +63,7 @@ int hw_breakpoint_slots(int type) int arch_install_hw_breakpoint(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot = &__get_cpu_var(bp_per_reg); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg); *slot = bp; @@ -88,7 +88,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) */ void arch_uninstall_hw_breakpoint(struct perf_event *bp) { - struct perf_event **slot = &__get_cpu_var(bp_per_reg); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg); if (*slot != bp) { WARN_ONCE(1, "Can't find the breakpoint"); @@ -226,7 +226,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) */ rcu_read_lock(); - bp = __get_cpu_var(bp_per_reg); + bp = __this_cpu_read(bp_per_reg); if (!bp) goto out; info = counter_arch_bp(bp); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index a10642a0d861..71e60bfb89e2 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -208,7 +208,7 @@ static unsigned long iommu_range_alloc(struct device *dev, * We don't need to disable preemption here because any CPU can * safely use any IOMMU pool. */ - pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1); + pool_nr = __this_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1); if (largealloc) pool = &(tbl->large_pool); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4c5891de162e..74d40c6855b8 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -114,7 +114,7 @@ static inline notrace void set_soft_enabled(unsigned long enable) static inline notrace int decrementer_check_overflow(void) { u64 now = get_tb_or_rtc(); - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); return now >= *next_tb; } @@ -499,7 +499,7 @@ void __do_irq(struct pt_regs *regs) /* And finally process it */ if (unlikely(irq == NO_IRQ)) - __get_cpu_var(irq_stat).spurious_irqs++; + __this_cpu_inc(irq_stat.spurious_irqs); else generic_handle_irq(irq); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 8504657379f1..e77c3ccf8dcf 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -155,7 +155,7 @@ static int kgdb_singlestep(struct pt_regs *regs) { struct thread_info *thread_info, *exception_thread_info; struct thread_info *backup_current_thread_info = - &__get_cpu_var(kgdb_thread_info); + this_cpu_ptr(&kgdb_thread_info); if (user_mode(regs)) return 0; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 2f72af82513c..7c053f281406 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -119,7 +119,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr; } @@ -127,7 +127,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_msr = regs->msr; } @@ -192,7 +192,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) ret = 1; goto no_kprobe; } - p = __get_cpu_var(current_kprobe); + p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index a7fd4cb78b78..15c99b649b04 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -73,8 +73,8 @@ void save_mce_event(struct pt_regs *regs, long handled, uint64_t nip, uint64_t addr) { uint64_t srr1; - int index = __get_cpu_var(mce_nest_count)++; - struct machine_check_event *mce = &__get_cpu_var(mce_event[index]); + int index = __this_cpu_inc_return(mce_nest_count); + struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); /* * Return if we don't have enough space to log mce event. @@ -143,7 +143,7 @@ void save_mce_event(struct pt_regs *regs, long handled, */ int get_mce_event(struct machine_check_event *mce, bool release) { - int index = __get_cpu_var(mce_nest_count) - 1; + int index = __this_cpu_read(mce_nest_count) - 1; struct machine_check_event *mc_evt; int ret = 0; @@ -153,7 +153,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) /* Check if we have MCE info to process. */ if (index < MAX_MC_EVT) { - mc_evt = &__get_cpu_var(mce_event[index]); + mc_evt = this_cpu_ptr(&mce_event[index]); /* Copy the event structure and release the original */ if (mce) *mce = *mc_evt; @@ -163,7 +163,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) } /* Decrement the count to free the slot. */ if (release) - __get_cpu_var(mce_nest_count)--; + __this_cpu_dec(mce_nest_count); return ret; } @@ -184,13 +184,13 @@ void machine_check_queue_event(void) if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; - index = __get_cpu_var(mce_queue_count)++; + index = __this_cpu_inc_return(mce_queue_count); /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { - __get_cpu_var(mce_queue_count)--; + __this_cpu_dec(mce_queue_count); return; } - __get_cpu_var(mce_event_queue[index]) = evt; + memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); /* Queue irq work to process this event later. */ irq_work_queue(&mce_event_process_work); @@ -208,11 +208,11 @@ static void machine_check_process_queued_event(struct irq_work *work) * For now just print it to console. * TODO: log this error event to FSP or nvram. */ - while (__get_cpu_var(mce_queue_count) > 0) { - index = __get_cpu_var(mce_queue_count) - 1; + while (__this_cpu_read(mce_queue_count) > 0) { + index = __this_cpu_read(mce_queue_count) - 1; machine_check_print_event_info( - &__get_cpu_var(mce_event_queue[index])); - __get_cpu_var(mce_queue_count)--; + this_cpu_ptr(&mce_event_queue[index])); + __this_cpu_dec(mce_queue_count); } } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index bf44ae962ab8..2df2f2956520 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -498,7 +498,7 @@ static inline int set_dawr(struct arch_hw_breakpoint *brk) void __set_breakpoint(struct arch_hw_breakpoint *brk) { - __get_cpu_var(current_brk) = *brk; + __this_cpu_write(current_brk, *brk); if (cpu_has_feature(CPU_FTR_DAWR)) set_dawr(brk); @@ -841,7 +841,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * schedule DABR */ #ifndef CONFIG_HAVE_HW_BREAKPOINT - if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) + if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk), &new->thread.hw_brk))) __set_breakpoint(&new->thread.hw_brk); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif @@ -855,7 +855,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * Collect processor utilization data per process */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); long unsigned start_tb, current_tb; start_tb = old_thread->start_tb; cu->current_tb = current_tb = mfspr(SPRN_PURR); @@ -865,7 +865,7 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC_BOOK3S_64 - batch = &__get_cpu_var(ppc64_tlb_batch); + batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->active) { current_thread_info()->local_flags |= _TLF_LAZY_MMU; if (batch->index) @@ -888,7 +888,7 @@ struct task_struct *__switch_to(struct task_struct *prev, #ifdef CONFIG_PPC_BOOK3S_64 if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; - batch = &__get_cpu_var(ppc64_tlb_batch); + batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a0738af4aba6..60391a51467a 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -242,7 +242,7 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) irqreturn_t smp_ipi_demux(void) { - struct cpu_messages *info = &__get_cpu_var(ipi_message); + struct cpu_messages *info = this_cpu_ptr(&ipi_message); unsigned int all; mb(); /* order any irq clear */ @@ -438,9 +438,9 @@ void generic_mach_cpu_die(void) idle_task_exit(); cpu = smp_processor_id(); printk(KERN_DEBUG "CPU%d offline\n", cpu); - __get_cpu_var(cpu_state) = CPU_DEAD; + __this_cpu_write(cpu_state, CPU_DEAD); smp_wmb(); - while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) + while (__this_cpu_read(cpu_state) != CPU_UP_PREPARE) cpu_relax(); } diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 67fd2fd2620a..fa1fd8a0c867 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -394,10 +394,10 @@ void ppc_enable_pmcs(void) ppc_set_pmu_inuse(1); /* Only need to enable them once */ - if (__get_cpu_var(pmcs_enabled)) + if (__this_cpu_read(pmcs_enabled)) return; - __get_cpu_var(pmcs_enabled) = 1; + __this_cpu_write(pmcs_enabled, 1); if (ppc_md.enable_pmcs) ppc_md.enable_pmcs(); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 368ab374d33c..4769e5b7f905 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -458,9 +458,9 @@ static inline void clear_irq_work_pending(void) DEFINE_PER_CPU(u8, irq_work_pending); -#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 -#define test_irq_work_pending() __get_cpu_var(irq_work_pending) -#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 +#define set_irq_work_pending_flag() __this_cpu_write(irq_work_pending, 1) +#define test_irq_work_pending() __this_cpu_read(irq_work_pending) +#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) #endif /* 32 vs 64 bit */ @@ -482,8 +482,8 @@ void arch_irq_work_raise(void) void __timer_interrupt(void) { struct pt_regs *regs = get_irq_regs(); - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); - struct clock_event_device *evt = &__get_cpu_var(decrementers); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + struct clock_event_device *evt = this_cpu_ptr(&decrementers); u64 now; trace_timer_interrupt_entry(regs); @@ -498,7 +498,7 @@ void __timer_interrupt(void) *next_tb = ~(u64)0; if (evt->event_handler) evt->event_handler(evt); - __get_cpu_var(irq_stat).timer_irqs_event++; + __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; if (now <= DECREMENTER_MAX) @@ -506,13 +506,13 @@ void __timer_interrupt(void) /* We may have raced with new irq work */ if (test_irq_work_pending()) set_dec(1); - __get_cpu_var(irq_stat).timer_irqs_others++; + __this_cpu_inc(irq_stat.timer_irqs_others); } #ifdef CONFIG_PPC64 /* collect purr register values often, for accurate calculations */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); cu->current_tb = mfspr(SPRN_PURR); } #endif @@ -527,7 +527,7 @@ void __timer_interrupt(void) void timer_interrupt(struct pt_regs * regs) { struct pt_regs *old_regs; - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -813,7 +813,7 @@ static void __init clocksource_init(void) static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { - __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt; + __this_cpu_write(decrementers_next_tb, get_tb_or_rtc() + evt); set_dec(evt); /* We may have raced with new irq work */ @@ -833,7 +833,7 @@ static void decrementer_set_mode(enum clock_event_mode mode, /* Interrupt handler for the timer broadcast IPI */ void tick_broadcast_ipi_handler(void) { - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); *next_tb = get_tb_or_rtc(); __timer_interrupt(); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 0dc43f9932cf..e6595b72269b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -295,7 +295,7 @@ long machine_check_early(struct pt_regs *regs) { long handled = 0; - __get_cpu_var(irq_stat).mce_exceptions++; + __this_cpu_inc(irq_stat.mce_exceptions); if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); @@ -304,7 +304,7 @@ long machine_check_early(struct pt_regs *regs) long hmi_exception_realmode(struct pt_regs *regs) { - __get_cpu_var(irq_stat).hmi_exceptions++; + __this_cpu_inc(irq_stat.hmi_exceptions); if (ppc_md.hmi_exception_early) ppc_md.hmi_exception_early(regs); @@ -700,7 +700,7 @@ void machine_check_exception(struct pt_regs *regs) enum ctx_state prev_state = exception_enter(); int recover = 0; - __get_cpu_var(irq_stat).mce_exceptions++; + __this_cpu_inc(irq_stat.mce_exceptions); /* See if any machine dependent calls. In theory, we would want * to call the CPU first, and call the ppc_md. one if the CPU @@ -1519,7 +1519,7 @@ void vsx_unavailable_tm(struct pt_regs *regs) void performance_monitor_exception(struct pt_regs *regs) { - __get_cpu_var(irq_stat).pmu_irqs++; + __this_cpu_inc(irq_stat.pmu_irqs); perf_irq(regs); } diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 2e02ed849f36..16095841afe1 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -76,11 +76,11 @@ static inline int local_sid_setup_one(struct id *entry) unsigned long sid; int ret = -1; - sid = ++(__get_cpu_var(pcpu_last_used_sid)); + sid = __this_cpu_inc_return(pcpu_last_used_sid); if (sid < NUM_TIDS) { - __get_cpu_var(pcpu_sids).entry[sid] = entry; + __this_cpu_write(pcpu_sids)entry[sid], entry); entry->val = sid; - entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid]; + entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]); ret = sid; } @@ -108,8 +108,8 @@ static inline int local_sid_setup_one(struct id *entry) static inline int local_sid_lookup(struct id *entry) { if (entry && entry->val != 0 && - __get_cpu_var(pcpu_sids).entry[entry->val] == entry && - entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val]) + __this_cpu_read(pcpu_sids.entry[entry->val]) == entry && + entry->pentry == this_cpu_ptr(&pcpu_sids.entry[entry->val])) return entry->val; return -1; } @@ -117,8 +117,8 @@ static inline int local_sid_lookup(struct id *entry) /* Invalidate all id mappings on local core -- call with preempt disabled */ static inline void local_sid_destroy_all(void) { - __get_cpu_var(pcpu_last_used_sid) = 0; - memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); + __this_cpu_write(pcpu_last_used_sid, 0); + memset(this_cpu_ptr(&pcpu_sids), 0, sizeof(pcpu_sids)); } static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 164bad2a19bf..6ef54e523f33 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -141,9 +141,9 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GESR, vcpu->arch.shared->esr); if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || - __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) { + __this_cpu_read(last_vcpu_of_lpid[vcpu->kvm->arch.lpid]) != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); - __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu; + __this_cpu_write(last_vcpu_of_lpid[vcpu->kvm->arch.lpid], vcpu); } kvmppc_load_guest_fp(vcpu); diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index afc0a8295f84..504a16f1a1a0 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -625,7 +625,7 @@ static void native_flush_hash_range(unsigned long number, int local) unsigned long want_v; unsigned long flags; real_pte_t pte; - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); unsigned long psize = batch->psize; int ssize = batch->ssize; int i; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index daee7f4e5a14..060d51fda35e 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1314,7 +1314,7 @@ void flush_hash_range(unsigned long number, int local) else { int i; struct ppc64_tlb_batch *batch = - &__get_cpu_var(ppc64_tlb_batch); + this_cpu_ptr(&ppc64_tlb_batch); for (i = 0; i < number; i++) flush_hash_page(batch->vpn[i], batch->pte[i], diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index 5e4ee2573903..ba47aaf33a4b 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -33,13 +33,13 @@ static inline int tlb1_next(void) ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - index = __get_cpu_var(next_tlbcam_idx); + index = this_cpu_read(next_tlbcam_idx); /* Just round-robin the entries and wrap when we hit the end */ if (unlikely(index == ncams - 1)) - __get_cpu_var(next_tlbcam_idx) = tlbcam_index; + __this_cpu_write(next_tlbcam_idx, tlbcam_index); else - __get_cpu_var(next_tlbcam_idx)++; + __this_cpu_inc(next_tlbcam_idx); return index; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7e70ae968e5f..8aa04f03fd31 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -462,7 +462,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) { struct hugepd_freelist **batchp; - batchp = &get_cpu_var(hugepd_freelist_cur); + batchp = this_cpu_ptr(&hugepd_freelist_cur); if (atomic_read(&tlb->mm->mm_users) < 2 || cpumask_equal(mm_cpumask(tlb->mm), diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index b7cd00b0171e..690f9c7bf3c8 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -339,7 +339,7 @@ static void power_pmu_bhrb_reset(void) static void power_pmu_bhrb_enable(struct perf_event *event) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); if (!ppmu->bhrb_nr) return; @@ -354,7 +354,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event) static void power_pmu_bhrb_disable(struct perf_event *event) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); if (!ppmu->bhrb_nr) return; @@ -1144,7 +1144,7 @@ static void power_pmu_disable(struct pmu *pmu) if (!ppmu) return; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) { /* @@ -1211,7 +1211,7 @@ static void power_pmu_enable(struct pmu *pmu) return; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) goto out; @@ -1403,7 +1403,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) * Add the event to the list (if there is room) * and check whether the total set is still feasible. */ - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); n0 = cpuhw->n_events; if (n0 >= ppmu->n_counter) goto out; @@ -1469,7 +1469,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags) power_pmu_read(event); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); for (i = 0; i < cpuhw->n_events; ++i) { if (event == cpuhw->event[i]) { while (++i < cpuhw->n_events) { @@ -1575,7 +1575,7 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) */ void power_pmu_start_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; @@ -1589,7 +1589,7 @@ void power_pmu_start_txn(struct pmu *pmu) */ void power_pmu_cancel_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1607,7 +1607,7 @@ int power_pmu_commit_txn(struct pmu *pmu) if (!ppmu) return -EAGAIN; - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); n = cpuhw->n_events; if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) return -EAGAIN; @@ -1964,7 +1964,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) { struct cpu_hw_events *cpuhw; - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); power_pmu_bhrb_read(cpuhw); data.br_stack = &cpuhw->bhrb_stack; } @@ -2037,7 +2037,7 @@ static bool pmc_overflow(unsigned long val) static void perf_event_interrupt(struct pt_regs *regs) { int i, j; - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; unsigned long val[8]; int found, active; diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index d35ae52c69dc..4acaea01fe03 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -210,7 +210,7 @@ static void fsl_emb_pmu_disable(struct pmu *pmu) unsigned long flags; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) { cpuhw->disabled = 1; @@ -249,7 +249,7 @@ static void fsl_emb_pmu_enable(struct pmu *pmu) unsigned long flags; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) goto out; @@ -653,7 +653,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, static void perf_event_interrupt(struct pt_regs *regs) { int i; - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; unsigned long val; int found = 0; diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 8a106b4172e0..4c11421847be 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -82,7 +82,7 @@ static void iic_unmask(struct irq_data *d) static void iic_eoi(struct irq_data *d) { - struct iic *iic = &__get_cpu_var(cpu_iic); + struct iic *iic = this_cpu_ptr(&cpu_iic); out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]); BUG_ON(iic->eoi_ptr < 0); } @@ -148,7 +148,7 @@ static unsigned int iic_get_irq(void) struct iic *iic; unsigned int virq; - iic = &__get_cpu_var(cpu_iic); + iic = this_cpu_ptr(&cpu_iic); *(unsigned long *) &pending = in_be64((u64 __iomem *) &iic->regs->pending_destr); if (!(pending.flags & CBE_IIC_IRQ_VALID)) @@ -163,7 +163,7 @@ static unsigned int iic_get_irq(void) void iic_setup_cpu(void) { - out_be64(&__get_cpu_var(cpu_iic).regs->prio, 0xff); + out_be64(this_cpu_ptr(&cpu_iic.regs->prio), 0xff); } u8 iic_get_target_id(int cpu) diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index d8a000a9988b..9527e2a7c541 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -48,7 +48,7 @@ void __trace_opal_entry(unsigned long opcode, unsigned long *args) local_irq_save(flags); - depth = &__get_cpu_var(opal_trace_depth); + depth = this_cpu_ptr(&opal_trace_depth); if (*depth) goto out; @@ -69,7 +69,7 @@ void __trace_opal_exit(long opcode, unsigned long retval) local_irq_save(flags); - depth = &__get_cpu_var(opal_trace_depth); + depth = this_cpu_ptr(&opal_trace_depth); if (*depth) goto out; diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index 5f3b23220b8e..a6c42f34303a 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -711,7 +711,7 @@ void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq) static unsigned int ps3_get_irq(void) { - struct ps3_private *pd = &__get_cpu_var(ps3_private); + struct ps3_private *pd = this_cpu_ptr(&ps3_private); u64 x = (pd->bmp.status & pd->bmp.mask); unsigned int plug; diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 1062f71f5a85..39049e4884fb 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -75,7 +75,7 @@ static atomic_t dtl_count; */ static void consume_dtle(struct dtl_entry *dtle, u64 index) { - struct dtl_ring *dtlr = &__get_cpu_var(dtl_rings); + struct dtl_ring *dtlr = this_cpu_ptr(&dtl_rings); struct dtl_entry *wp = dtlr->write_ptr; struct lppaca *vpa = local_paca->lppaca_ptr; diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 4575f0c9e521..f02ec3ab428c 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -110,7 +110,7 @@ static void probe_hcall_entry(void *ignored, unsigned long opcode, unsigned long if (opcode > MAX_HCALL_OPCODE) return; - h = &__get_cpu_var(hcall_stats)[opcode / 4]; + h = this_cpu_ptr(&hcall_stats[opcode / 4]); h->tb_start = mftb(); h->purr_start = mfspr(SPRN_PURR); } @@ -123,7 +123,7 @@ static void probe_hcall_exit(void *ignored, unsigned long opcode, unsigned long if (opcode > MAX_HCALL_OPCODE) return; - h = &__get_cpu_var(hcall_stats)[opcode / 4]; + h = this_cpu_ptr(&hcall_stats[opcode / 4]); h->num_calls++; h->tb_total += mftb() - h->tb_start; h->purr_total += mfspr(SPRN_PURR) - h->purr_start; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 4642d6a4d356..8c355ed4291e 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -200,7 +200,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, local_irq_save(flags); /* to protect tcep and the page behind it */ - tcep = __get_cpu_var(tce_page); + tcep = __this_cpu_read(tce_page); /* This is safe to do since interrupts are off when we're called * from iommu_alloc{,_sg}() @@ -213,7 +213,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } - __get_cpu_var(tce_page) = tcep; + __this_cpu_write(tce_page, tcep); } rpn = __pa(uaddr) >> TCE_SHIFT; @@ -399,7 +399,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, long l, limit; local_irq_disable(); /* to protect tcep and the page behind it */ - tcep = __get_cpu_var(tce_page); + tcep = __this_cpu_read(tce_page); if (!tcep) { tcep = (__be64 *)__get_free_page(GFP_ATOMIC); @@ -407,7 +407,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, local_irq_enable(); return -ENOMEM; } - __get_cpu_var(tce_page) = tcep; + __this_cpu_write(tce_page, tcep); } proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 34e64237fff9..56df72da59fe 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -507,7 +507,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) unsigned long vpn; unsigned long i, pix, rc; unsigned long flags = 0; - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long param[9]; unsigned long hash, index, shift, hidx, slot; @@ -697,7 +697,7 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args) local_irq_save(flags); - depth = &__get_cpu_var(hcall_trace_depth); + depth = this_cpu_ptr(&hcall_trace_depth); if (*depth) goto out; @@ -722,7 +722,7 @@ void __trace_hcall_exit(long opcode, unsigned long retval, local_irq_save(flags); - depth = &__get_cpu_var(hcall_trace_depth); + depth = this_cpu_ptr(&hcall_trace_depth); if (*depth) goto out; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index dff05b9eb946..179a69fd5568 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -302,8 +302,8 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) /* If it isn't an extended log we can use the per cpu 64bit buffer */ h = (struct rtas_error_log *)&savep[1]; if (!rtas_error_extended(h)) { - memcpy(&__get_cpu_var(mce_data_buf), h, sizeof(__u64)); - errhdr = (struct rtas_error_log *)&__get_cpu_var(mce_data_buf); + memcpy(this_cpu_ptr(&mce_data_buf), h, sizeof(__u64)); + errhdr = (struct rtas_error_log *)this_cpu_ptr(&mce_data_buf); } else { int len, error_log_length; diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index fe0cca477164..365249cd346b 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -155,7 +155,7 @@ int __init xics_smp_probe(void) void xics_teardown_cpu(void) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); /* * we have to reset the cppr index to 0 because we're -- cgit v1.2.3 From b4f501916ce2ae80c28017814d71d1bf83679271 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:50 -0500 Subject: tile: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Acked-by: Chris Metcalf Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/tile/include/asm/irqflags.h | 4 ++-- arch/tile/include/asm/mmu_context.h | 6 +++--- arch/tile/kernel/irq.c | 14 +++++++------- arch/tile/kernel/messaging.c | 4 ++-- arch/tile/kernel/process.c | 2 +- arch/tile/kernel/setup.c | 3 ++- arch/tile/kernel/single_step.c | 4 ++-- arch/tile/kernel/smp.c | 2 +- arch/tile/kernel/smpboot.c | 6 +++--- arch/tile/kernel/time.c | 8 ++++---- arch/tile/mm/highmem.c | 2 +- arch/tile/mm/init.c | 4 ++-- 12 files changed, 30 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h index 71af5747874d..60d62a292fce 100644 --- a/arch/tile/include/asm/irqflags.h +++ b/arch/tile/include/asm/irqflags.h @@ -140,12 +140,12 @@ extern unsigned int debug_smp_processor_id(void); /* * Read the set of maskable interrupts. - * We avoid the preemption warning here via __this_cpu_ptr since even + * We avoid the preemption warning here via raw_cpu_ptr since even * if irqs are already enabled, it's harmless to read the wrong cpu's * enabled mask. */ #define arch_local_irqs_enabled() \ - (*__this_cpu_ptr(&interrupts_enabled_mask)) + (*raw_cpu_ptr(&interrupts_enabled_mask)) /* Re-enable all maskable interrupts. */ #define arch_local_irq_enable() \ diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h index 4734215e2ad4..f67753db1f78 100644 --- a/arch/tile/include/asm/mmu_context.h +++ b/arch/tile/include/asm/mmu_context.h @@ -84,7 +84,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *t) * clear any pending DMA interrupts. */ if (current->thread.tile_dma_state.enabled) - install_page_table(mm->pgd, __get_cpu_var(current_asid)); + install_page_table(mm->pgd, __this_cpu_read(current_asid)); #endif } @@ -96,12 +96,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, int cpu = smp_processor_id(); /* Pick new ASID. */ - int asid = __get_cpu_var(current_asid) + 1; + int asid = __this_cpu_read(current_asid) + 1; if (asid > max_asid) { asid = min_asid; local_flush_tlb(); } - __get_cpu_var(current_asid) = asid; + __this_cpu_write(current_asid, asid); /* Clear cpu from the old mm, and set it in the new one. */ cpumask_clear_cpu(cpu, mm_cpumask(prev)); diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 637f2ffaa5f5..ba85765e1436 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -73,7 +73,7 @@ static DEFINE_PER_CPU(int, irq_depth); */ void tile_dev_intr(struct pt_regs *regs, int intnum) { - int depth = __get_cpu_var(irq_depth)++; + int depth = __this_cpu_inc_return(irq_depth); unsigned long original_irqs; unsigned long remaining_irqs; struct pt_regs *old_regs; @@ -120,7 +120,7 @@ void tile_dev_intr(struct pt_regs *regs, int intnum) /* Count device irqs; Linux IPIs are counted elsewhere. */ if (irq != IRQ_RESCHEDULE) - __get_cpu_var(irq_stat).irq_dev_intr_count++; + __this_cpu_inc(irq_stat.irq_dev_intr_count); generic_handle_irq(irq); } @@ -130,10 +130,10 @@ void tile_dev_intr(struct pt_regs *regs, int intnum) * including any that were reenabled during interrupt * handling. */ - if (depth == 0) - unmask_irqs(~__get_cpu_var(irq_disable_mask)); + if (depth == 1) + unmask_irqs(~__this_cpu_read(irq_disable_mask)); - __get_cpu_var(irq_depth)--; + __this_cpu_dec(irq_depth); /* * Track time spent against the current process again and @@ -151,7 +151,7 @@ void tile_dev_intr(struct pt_regs *regs, int intnum) static void tile_irq_chip_enable(struct irq_data *d) { get_cpu_var(irq_disable_mask) &= ~(1UL << d->irq); - if (__get_cpu_var(irq_depth) == 0) + if (__this_cpu_read(irq_depth) == 0) unmask_irqs(1UL << d->irq); put_cpu_var(irq_disable_mask); } @@ -197,7 +197,7 @@ static void tile_irq_chip_ack(struct irq_data *d) */ static void tile_irq_chip_eoi(struct irq_data *d) { - if (!(__get_cpu_var(irq_disable_mask) & (1UL << d->irq))) + if (!(__this_cpu_read(irq_disable_mask) & (1UL << d->irq))) unmask_irqs(1UL << d->irq); } diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c index 7867266f9716..ac950be1318e 100644 --- a/arch/tile/kernel/messaging.c +++ b/arch/tile/kernel/messaging.c @@ -28,7 +28,7 @@ static DEFINE_PER_CPU(HV_MsgState, msg_state); void init_messaging(void) { /* Allocate storage for messages in kernel space */ - HV_MsgState *state = &__get_cpu_var(msg_state); + HV_MsgState *state = this_cpu_ptr(&msg_state); int rc = hv_register_message_state(state); if (rc != HV_OK) panic("hv_register_message_state: error %d", rc); @@ -96,7 +96,7 @@ void hv_message_intr(struct pt_regs *regs, int intnum) struct hv_driver_cb *cb = (struct hv_driver_cb *)him->intarg; cb->callback(cb, him->intdata); - __get_cpu_var(irq_stat).irq_hv_msg_count++; + __this_cpu_inc(irq_stat.irq_hv_msg_count); } } diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 16ed58948757..0050cbc1d9de 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -64,7 +64,7 @@ early_param("idle", idle_setup); void arch_cpu_idle(void) { - __get_cpu_var(irq_stat).idle_timestamp = jiffies; + __this_cpu_write(irq_stat.idle_timestamp, jiffies); _cpu_idle(); } diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 112ababa9e55..b9736ded06f2 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1218,7 +1218,8 @@ static void __init validate_hv(void) * various asid variables to their appropriate initial states. */ asid_range = hv_inquire_asid(0); - __get_cpu_var(current_asid) = min_asid = asid_range.start; + min_asid = asid_range.start; + __this_cpu_write(current_asid, min_asid); max_asid = asid_range.start + asid_range.size - 1; if (hv_confstr(HV_CONFSTR_CHIP_MODEL, (HV_VirtAddr)chip_model, diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index de07fa7d1315..6cb2ce31b5a2 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -740,7 +740,7 @@ static DEFINE_PER_CPU(unsigned long, ss_saved_pc); void gx_singlestep_handle(struct pt_regs *regs, int fault_num) { - unsigned long *ss_pc = &__get_cpu_var(ss_saved_pc); + unsigned long *ss_pc = this_cpu_ptr(&ss_saved_pc); struct thread_info *info = (void *)current_thread_info(); int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP); unsigned long control = __insn_mfspr(SPR_SINGLE_STEP_CONTROL_K); @@ -766,7 +766,7 @@ void gx_singlestep_handle(struct pt_regs *regs, int fault_num) void single_step_once(struct pt_regs *regs) { - unsigned long *ss_pc = &__get_cpu_var(ss_saved_pc); + unsigned long *ss_pc = this_cpu_ptr(&ss_saved_pc); unsigned long control = __insn_mfspr(SPR_SINGLE_STEP_CONTROL_K); *ss_pc = regs->pc; diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index 01e8ab29f43a..3dbedb0174b3 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -188,7 +188,7 @@ void flush_icache_range(unsigned long start, unsigned long end) /* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */ static irqreturn_t handle_reschedule_ipi(int irq, void *token) { - __get_cpu_var(irq_stat).irq_resched_count++; + __this_cpu_inc(irq_stat.irq_resched_count); scheduler_ipi(); return IRQ_HANDLED; diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 732e9d138661..0d59a1b60c74 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -41,7 +41,7 @@ void __init smp_prepare_boot_cpu(void) int cpu = smp_processor_id(); set_cpu_online(cpu, 1); set_cpu_present(cpu, 1); - __get_cpu_var(cpu_state) = CPU_ONLINE; + __this_cpu_write(cpu_state, CPU_ONLINE); init_messaging(); } @@ -158,7 +158,7 @@ static void start_secondary(void) /* printk(KERN_DEBUG "Initializing CPU#%d\n", cpuid); */ /* Initialize the current asid for our first page table. */ - __get_cpu_var(current_asid) = min_asid; + __this_cpu_write(current_asid, min_asid); /* Set up this thread as another owner of the init_mm */ atomic_inc(&init_mm.mm_count); @@ -201,7 +201,7 @@ void online_secondary(void) notify_cpu_starting(smp_processor_id()); set_cpu_online(smp_processor_id(), 1); - __get_cpu_var(cpu_state) = CPU_ONLINE; + __this_cpu_write(cpu_state, CPU_ONLINE); /* Set up tile-specific state for this cpu. */ setup_cpu(0); diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index d8fbc289e680..ab1c9fe2aa7f 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -162,7 +162,7 @@ static DEFINE_PER_CPU(struct clock_event_device, tile_timer) = { void setup_tile_timer(void) { - struct clock_event_device *evt = &__get_cpu_var(tile_timer); + struct clock_event_device *evt = this_cpu_ptr(&tile_timer); /* Fill in fields that are speed-specific. */ clockevents_calc_mult_shift(evt, cycles_per_sec, TILE_MINSEC); @@ -182,7 +182,7 @@ void setup_tile_timer(void) void do_timer_interrupt(struct pt_regs *regs, int fault_num) { struct pt_regs *old_regs = set_irq_regs(regs); - struct clock_event_device *evt = &__get_cpu_var(tile_timer); + struct clock_event_device *evt = this_cpu_ptr(&tile_timer); /* * Mask the timer interrupt here, since we are a oneshot timer @@ -194,7 +194,7 @@ void do_timer_interrupt(struct pt_regs *regs, int fault_num) irq_enter(); /* Track interrupt count. */ - __get_cpu_var(irq_stat).irq_timer_count++; + __this_cpu_inc(irq_stat.irq_timer_count); /* Call the generic timer handler */ evt->event_handler(evt); @@ -235,7 +235,7 @@ cycles_t ns2cycles(unsigned long nsecs) * We do not have to disable preemption here as each core has the same * clock frequency. */ - struct clock_event_device *dev = &__raw_get_cpu_var(tile_timer); + struct clock_event_device *dev = raw_cpu_ptr(&tile_timer); /* * as in clocksource.h and x86's timer.h, we split the calculation diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c index 0dc218294770..6aa2f2625447 100644 --- a/arch/tile/mm/highmem.c +++ b/arch/tile/mm/highmem.c @@ -103,7 +103,7 @@ static void kmap_atomic_register(struct page *page, int type, spin_lock(&_lock); /* With interrupts disabled, now fill in the per-cpu info. */ - amp = &__get_cpu_var(amps).per_type[type]; + amp = this_cpu_ptr(&s.per_type[type]); amp->page = page; amp->cpu = smp_processor_id(); amp->va = va; diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index bfb3127b4df9..f46a152b09e6 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -593,14 +593,14 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) interrupt_mask_set_mask(-1ULL); rc = flush_and_install_context(__pa(pgtables), init_pgprot((unsigned long)pgtables), - __get_cpu_var(current_asid), + __this_cpu_read(current_asid), cpumask_bits(my_cpu_mask)); interrupt_mask_restore_mask(irqmask); BUG_ON(rc != 0); /* Copy the page table back to the normal swapper_pg_dir. */ memcpy(pgd_base, pgtables, sizeof(pgtables)); - __install_page_table(pgd_base, __get_cpu_var(current_asid), + __install_page_table(pgd_base, __this_cpu_read(current_asid), swapper_pgprot); /* -- cgit v1.2.3 From 81829a96869c8bad74a582705617e75758c4152d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:51 -0500 Subject: tile: Use this_cpu_ptr() for hardware counters Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/tile/kernel/perf_event.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/tile/kernel/perf_event.c b/arch/tile/kernel/perf_event.c index 2bf6c9c135c1..bb509cee3b59 100644 --- a/arch/tile/kernel/perf_event.c +++ b/arch/tile/kernel/perf_event.c @@ -590,7 +590,7 @@ static int tile_event_set_period(struct perf_event *event) */ static void tile_pmu_stop(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -616,7 +616,7 @@ static void tile_pmu_stop(struct perf_event *event, int flags) */ static void tile_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx = event->hw.idx; if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) @@ -650,7 +650,7 @@ static void tile_pmu_start(struct perf_event *event, int flags) */ static int tile_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc; unsigned long mask; int b, max_cnt; @@ -706,7 +706,7 @@ static int tile_pmu_add(struct perf_event *event, int flags) */ static void tile_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int i; /* @@ -880,14 +880,14 @@ static struct pmu tilera_pmu = { int tile_pmu_handle_irq(struct pt_regs *regs, int fault) { struct perf_sample_data data; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; struct hw_perf_event *hwc; u64 val; unsigned long status; int bit; - __get_cpu_var(perf_irqs)++; + __this_cpu_inc(perf_irqs); if (!atomic_read(&tile_active_events)) return 0; -- cgit v1.2.3 From 7e788ab11d73fbb617973c12a9b3f84f93721e67 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:52 -0500 Subject: blackfin: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) CC: Mike Frysinger Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/blackfin/include/asm/ipipe.h | 2 +- arch/blackfin/kernel/perf_event.c | 10 +++++----- arch/blackfin/mach-common/ints-priority.c | 8 ++++---- arch/blackfin/mach-common/smp.c | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/blackfin/include/asm/ipipe.h b/arch/blackfin/include/asm/ipipe.h index 17b5e92e3bc6..fe1160fbff91 100644 --- a/arch/blackfin/include/asm/ipipe.h +++ b/arch/blackfin/include/asm/ipipe.h @@ -157,7 +157,7 @@ static inline unsigned long __ipipe_ffnz(unsigned long ul) } #define __ipipe_do_root_xirq(ipd, irq) \ - ((ipd)->irqs[irq].handler(irq, &__raw_get_cpu_var(__ipipe_tick_regs))) + ((ipd)->irqs[irq].handler(irq, raw_cpu_ptr(&__ipipe_tick_regs))) #define __ipipe_run_irqtail(irq) /* Must be a macro */ \ do { \ diff --git a/arch/blackfin/kernel/perf_event.c b/arch/blackfin/kernel/perf_event.c index ea2032013cc2..1e9c8b0bf486 100644 --- a/arch/blackfin/kernel/perf_event.c +++ b/arch/blackfin/kernel/perf_event.c @@ -300,7 +300,7 @@ again: static void bfin_pmu_stop(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -318,7 +318,7 @@ static void bfin_pmu_stop(struct perf_event *event, int flags) static void bfin_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -335,7 +335,7 @@ static void bfin_pmu_start(struct perf_event *event, int flags) static void bfin_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); bfin_pmu_stop(event, PERF_EF_UPDATE); __clear_bit(event->hw.idx, cpuc->used_mask); @@ -345,7 +345,7 @@ static void bfin_pmu_del(struct perf_event *event, int flags) static int bfin_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; int ret = -EAGAIN; @@ -421,7 +421,7 @@ static int bfin_pmu_event_init(struct perf_event *event) static void bfin_pmu_enable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; struct hw_perf_event *hwc; int i; diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c index 1f94784eab6d..e5910e9bc4ac 100644 --- a/arch/blackfin/mach-common/ints-priority.c +++ b/arch/blackfin/mach-common/ints-priority.c @@ -1309,12 +1309,12 @@ asmlinkage int __ipipe_grab_irq(int vec, struct pt_regs *regs) bfin_write_TIMER_STATUS(1); /* Latch TIMIL0 */ #endif /* This is basically what we need from the register frame. */ - __raw_get_cpu_var(__ipipe_tick_regs).ipend = regs->ipend; - __raw_get_cpu_var(__ipipe_tick_regs).pc = regs->pc; + __this_cpu_write(__ipipe_tick_regs.ipend, regs->ipend); + __this_cpu_write(__ipipe_tick_regs.pc, regs->pc); if (this_domain != ipipe_root_domain) - __raw_get_cpu_var(__ipipe_tick_regs).ipend &= ~0x10; + __this_cpu_and(__ipipe_tick_regs.ipend, ~0x10); else - __raw_get_cpu_var(__ipipe_tick_regs).ipend |= 0x10; + __this_cpu_or(__ipipe_tick_regs.ipend, 0x10); } /* diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index ba6c30d8534d..8ad3e90cc8fc 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c @@ -146,7 +146,7 @@ static irqreturn_t ipi_handler_int1(int irq, void *dev_instance) platform_clear_ipi(cpu, IRQ_SUPPLE_1); smp_rmb(); - bfin_ipi_data = &__get_cpu_var(bfin_ipi); + bfin_ipi_data = this_cpu_ptr(&bfin_ipi); while ((pending = atomic_xchg(&bfin_ipi_data->bits, 0)) != 0) { msg = 0; do { -- cgit v1.2.3 From 8c23af6122e56ad30e3de259731c31a998ff5b11 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:53 -0500 Subject: avr32: Replace __get_cpu_var with __this_cpu_write Replace the single use of __get_cpu_var in avr32 with __this_cpu_write. Cc: Haavard Skinnemoen Acked-by: Hans-Christian Egtvedt Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/avr32/kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/avr32/kernel/kprobes.c b/arch/avr32/kernel/kprobes.c index f820e9f25520..a94ece4a72c8 100644 --- a/arch/avr32/kernel/kprobes.c +++ b/arch/avr32/kernel/kprobes.c @@ -104,7 +104,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) static void __kprobes set_current_kprobe(struct kprobe *p) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); } static int __kprobes kprobe_handler(struct pt_regs *regs) -- cgit v1.2.3 From 494fc42170bf0747ac28e12ef13a7d388d5ff2c7 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:54 -0500 Subject: sparc: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Cc: sparclinux@vger.kernel.org Acked-by: David S. Miller Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/sparc/include/asm/cpudata_32.h | 2 +- arch/sparc/include/asm/cpudata_64.h | 2 +- arch/sparc/kernel/kprobes.c | 6 +++--- arch/sparc/kernel/leon_smp.c | 2 +- arch/sparc/kernel/nmi.c | 16 ++++++++-------- arch/sparc/kernel/pci_sun4v.c | 8 ++++---- arch/sparc/kernel/perf_event.c | 26 +++++++++++++------------- arch/sparc/kernel/sun4d_smp.c | 2 +- arch/sparc/kernel/time_64.c | 2 +- arch/sparc/mm/tlb.c | 4 ++-- 10 files changed, 35 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/cpudata_32.h b/arch/sparc/include/asm/cpudata_32.h index 0300d94c25b3..05f366379f53 100644 --- a/arch/sparc/include/asm/cpudata_32.h +++ b/arch/sparc/include/asm/cpudata_32.h @@ -26,6 +26,6 @@ typedef struct { DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) -#define local_cpu_data() __get_cpu_var(__cpu_data) +#define local_cpu_data() (*this_cpu_ptr(&__cpu_data)) #endif /* _SPARC_CPUDATA_H */ diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index 0e594076912c..a6e424d185d0 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -30,7 +30,7 @@ typedef struct { DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) -#define local_cpu_data() __get_cpu_var(__cpu_data) +#define local_cpu_data() (*this_cpu_ptr(&__cpu_data)) #endif /* !(__ASSEMBLY__) */ diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c index 98d712843413..cd83be527586 100644 --- a/arch/sparc/kernel/kprobes.c +++ b/arch/sparc/kernel/kprobes.c @@ -83,7 +83,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; kcb->kprobe_orig_tnpc = kcb->prev_kprobe.orig_tnpc; kcb->kprobe_orig_tstate_pil = kcb->prev_kprobe.orig_tstate_pil; @@ -92,7 +92,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); kcb->kprobe_orig_tnpc = regs->tnpc; kcb->kprobe_orig_tstate_pil = (regs->tstate & TSTATE_PIL); } @@ -155,7 +155,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) ret = 1; goto no_kprobe; } - p = __get_cpu_var(current_kprobe); + p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) goto ss_probe; } diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index 018ef11f57df..ea2bad306f93 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c @@ -343,7 +343,7 @@ static void leon_ipi_resched(int cpu) void leonsmp_ipi_interrupt(void) { - struct leon_ipi_work *work = &__get_cpu_var(leon_ipi_work); + struct leon_ipi_work *work = this_cpu_ptr(&leon_ipi_work); if (work->single) { work->single = 0; diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 5b1151dcba13..a9973bb4a1b2 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -100,20 +100,20 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); sum = local_cpu_data().irq0_irqs; - if (__get_cpu_var(nmi_touch)) { - __get_cpu_var(nmi_touch) = 0; + if (__this_cpu_read(nmi_touch)) { + __this_cpu_write(nmi_touch, 0); touched = 1; } - if (!touched && __get_cpu_var(last_irq_sum) == sum) { + if (!touched && __this_cpu_read(last_irq_sum) == sum) { __this_cpu_inc(alert_counter); if (__this_cpu_read(alert_counter) == 30 * nmi_hz) die_nmi("BUG: NMI Watchdog detected LOCKUP", regs, panic_on_timeout); } else { - __get_cpu_var(last_irq_sum) = sum; + __this_cpu_write(last_irq_sum, sum); __this_cpu_write(alert_counter, 0); } - if (__get_cpu_var(wd_enabled)) { + if (__this_cpu_read(wd_enabled)) { pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); } @@ -154,7 +154,7 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) void stop_nmi_watchdog(void *unused) { pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); - __get_cpu_var(wd_enabled) = 0; + __this_cpu_write(wd_enabled, 0); atomic_dec(&nmi_active); } @@ -207,7 +207,7 @@ error: void start_nmi_watchdog(void *unused) { - __get_cpu_var(wd_enabled) = 1; + __this_cpu_write(wd_enabled, 1); atomic_inc(&nmi_active); pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); @@ -218,7 +218,7 @@ void start_nmi_watchdog(void *unused) static void nmi_adjust_hz_one(void *unused) { - if (!__get_cpu_var(wd_enabled)) + if (!__this_cpu_read(wd_enabled)) return; pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index d07f6b29aed8..49d33b178793 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -48,7 +48,7 @@ static int iommu_batch_initialized; /* Interrupts must be disabled. */ static inline void iommu_batch_start(struct device *dev, unsigned long prot, unsigned long entry) { - struct iommu_batch *p = &__get_cpu_var(iommu_batch); + struct iommu_batch *p = this_cpu_ptr(&iommu_batch); p->dev = dev; p->prot = prot; @@ -94,7 +94,7 @@ static long iommu_batch_flush(struct iommu_batch *p) static inline void iommu_batch_new_entry(unsigned long entry) { - struct iommu_batch *p = &__get_cpu_var(iommu_batch); + struct iommu_batch *p = this_cpu_ptr(&iommu_batch); if (p->entry + p->npages == entry) return; @@ -106,7 +106,7 @@ static inline void iommu_batch_new_entry(unsigned long entry) /* Interrupts must be disabled. */ static inline long iommu_batch_add(u64 phys_page) { - struct iommu_batch *p = &__get_cpu_var(iommu_batch); + struct iommu_batch *p = this_cpu_ptr(&iommu_batch); BUG_ON(p->npages >= PGLIST_NENTS); @@ -120,7 +120,7 @@ static inline long iommu_batch_add(u64 phys_page) /* Interrupts must be disabled. */ static inline long iommu_batch_end(void) { - struct iommu_batch *p = &__get_cpu_var(iommu_batch); + struct iommu_batch *p = this_cpu_ptr(&iommu_batch); BUG_ON(p->npages >= PGLIST_NENTS); diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index d35c490a91cb..264049a6cb74 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1013,7 +1013,7 @@ static void update_pcrs_for_enable(struct cpu_hw_events *cpuc) static void sparc_pmu_enable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int i; if (cpuc->enabled) @@ -1031,7 +1031,7 @@ static void sparc_pmu_enable(struct pmu *pmu) static void sparc_pmu_disable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int i; if (!cpuc->enabled) @@ -1065,7 +1065,7 @@ static int active_event_index(struct cpu_hw_events *cpuc, static void sparc_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx = active_event_index(cpuc, event); if (flags & PERF_EF_RELOAD) { @@ -1080,7 +1080,7 @@ static void sparc_pmu_start(struct perf_event *event, int flags) static void sparc_pmu_stop(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx = active_event_index(cpuc, event); if (!(event->hw.state & PERF_HES_STOPPED)) { @@ -1096,7 +1096,7 @@ static void sparc_pmu_stop(struct perf_event *event, int flags) static void sparc_pmu_del(struct perf_event *event, int _flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); unsigned long flags; int i; @@ -1133,7 +1133,7 @@ static void sparc_pmu_del(struct perf_event *event, int _flags) static void sparc_pmu_read(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx = active_event_index(cpuc, event); struct hw_perf_event *hwc = &event->hw; @@ -1145,7 +1145,7 @@ static DEFINE_MUTEX(pmc_grab_mutex); static void perf_stop_nmi_watchdog(void *unused) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int i; stop_nmi_watchdog(NULL); @@ -1356,7 +1356,7 @@ static int collect_events(struct perf_event *group, int max_count, static int sparc_pmu_add(struct perf_event *event, int ef_flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int n0, ret = -EAGAIN; unsigned long flags; @@ -1498,7 +1498,7 @@ static int sparc_pmu_event_init(struct perf_event *event) */ static void sparc_pmu_start_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; @@ -1511,7 +1511,7 @@ static void sparc_pmu_start_txn(struct pmu *pmu) */ static void sparc_pmu_cancel_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1524,13 +1524,13 @@ static void sparc_pmu_cancel_txn(struct pmu *pmu) */ static int sparc_pmu_commit_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int n; if (!sparc_pmu) return -EINVAL; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); n = cpuc->n_events; if (check_excludes(cpuc->event, 0, n)) return -EINVAL; @@ -1601,7 +1601,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, regs = args->regs; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); /* If the PMU has the TOE IRQ enable bits, we need to do a * dummy write to the %pcr to clear the overflow bits and thus diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index d5c319553fd0..9d98e5002a09 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c @@ -204,7 +204,7 @@ static void __init smp4d_ipi_init(void) void sun4d_ipi_interrupt(void) { - struct sun4d_ipi_work *work = &__get_cpu_var(sun4d_ipi_work); + struct sun4d_ipi_work *work = this_cpu_ptr(&sun4d_ipi_work); if (work->single) { work->single = 0; diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 3fddf64c7fc6..59da0c3ea788 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -765,7 +765,7 @@ void setup_sparc64_timer(void) : /* no outputs */ : "r" (pstate)); - sevt = &__get_cpu_var(sparc64_events); + sevt = this_cpu_ptr(&sparc64_events); memcpy(sevt, &sparc64_clockevent, sizeof(*sevt)); sevt->cpumask = cpumask_of(smp_processor_id()); diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index b89aba217e3b..9df2190c097e 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -52,14 +52,14 @@ out: void arch_enter_lazy_mmu_mode(void) { - struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + struct tlb_batch *tb = this_cpu_ptr(&tlb_batch); tb->active = 1; } void arch_leave_lazy_mmu_mode(void) { - struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + struct tlb_batch *tb = this_cpu_ptr(&tlb_batch); if (tb->tlb_nr) flush_tlb_pending(); -- cgit v1.2.3 From 23f66e2d661b4d3226d16e25910a9e9472ce2410 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Aug 2014 11:18:29 -0400 Subject: Revert "powerpc: Replace __get_cpu_var uses" This reverts commit 5828f666c069af74e00db21559f1535103c9f79a due to build failure after merging with pending powerpc changes. Link: http://lkml.kernel.org/g/20140827142243.6277eaff@canb.auug.org.au Signed-off-by: Tejun Heo Reported-by: Stephen Rothwell Cc: Christoph Lameter Cc: Benjamin Herrenschmidt --- arch/powerpc/include/asm/hardirq.h | 4 +--- arch/powerpc/include/asm/tlbflush.h | 4 ++-- arch/powerpc/include/asm/xics.h | 8 ++++---- arch/powerpc/kernel/dbell.c | 2 +- arch/powerpc/kernel/hw_breakpoint.c | 6 +++--- arch/powerpc/kernel/iommu.c | 2 +- arch/powerpc/kernel/irq.c | 4 ++-- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/kprobes.c | 6 +++--- arch/powerpc/kernel/mce.c | 24 +++++++++++------------ arch/powerpc/kernel/process.c | 10 +++++----- arch/powerpc/kernel/smp.c | 6 +++--- arch/powerpc/kernel/sysfs.c | 4 ++-- arch/powerpc/kernel/time.c | 22 ++++++++++----------- arch/powerpc/kernel/traps.c | 8 ++++---- arch/powerpc/kvm/e500.c | 14 ++++++------- arch/powerpc/kvm/e500mc.c | 4 ++-- arch/powerpc/mm/hash_native_64.c | 2 +- arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/hugetlbpage-book3e.c | 6 +++--- arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/perf/core-book3s.c | 22 ++++++++++----------- arch/powerpc/perf/core-fsl-emb.c | 6 +++--- arch/powerpc/platforms/cell/interrupt.c | 6 +++--- arch/powerpc/platforms/powernv/opal-tracepoints.c | 4 ++-- arch/powerpc/platforms/ps3/interrupt.c | 2 +- arch/powerpc/platforms/pseries/dtl.c | 2 +- arch/powerpc/platforms/pseries/hvCall_inst.c | 4 ++-- arch/powerpc/platforms/pseries/iommu.c | 8 ++++---- arch/powerpc/platforms/pseries/lpar.c | 6 +++--- arch/powerpc/platforms/pseries/ras.c | 4 ++-- arch/powerpc/sysdev/xics/xics-common.c | 2 +- 32 files changed, 103 insertions(+), 105 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 8d907ba4fd05..1bbb3013d6aa 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -21,9 +21,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define __ARCH_IRQ_STAT -#define local_softirq_pending() __this_cpu_read(irq_stat.__softirq_pending) -#define set_softirq_pending(x) __this_cpu_write(irq_stat._softirq_pending, (x)) -#define or_softirq_pending(x) __this_cpu_or(irq_stat._softirq_pending, (x)) +#define local_softirq_pending() __get_cpu_var(irq_stat).__softirq_pending static inline void ack_bad_irq(unsigned int irq) { diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index cd7c2719d3ef..2def01ed0cb2 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -107,14 +107,14 @@ extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch); static inline void arch_enter_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); batch->active = 1; } static inline void arch_leave_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); if (batch->index) __flush_tlb_pending(batch); diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 5007ad0448ce..282d43a0c855 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -97,7 +97,7 @@ DECLARE_PER_CPU(struct xics_cppr, xics_cppr); static inline void xics_push_cppr(unsigned int vec) { - struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1)) return; @@ -110,7 +110,7 @@ static inline void xics_push_cppr(unsigned int vec) static inline unsigned char xics_pop_cppr(void) { - struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); if (WARN_ON(os_cppr->index < 1)) return LOWEST_PRIORITY; @@ -120,7 +120,7 @@ static inline unsigned char xics_pop_cppr(void) static inline void xics_set_base_cppr(unsigned char cppr) { - struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); /* we only really want to set the priority when there's * just one cppr value on the stack @@ -132,7 +132,7 @@ static inline void xics_set_base_cppr(unsigned char cppr) static inline unsigned char xics_cppr_top(void) { - struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); return os_cppr->stack[os_cppr->index]; } diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index f4217819cc31..d55c76c571f3 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -41,7 +41,7 @@ void doorbell_exception(struct pt_regs *regs) may_hard_irq_enable(); - __this_cpu_inc(irq_stat.doorbell_irqs); + __get_cpu_var(irq_stat).doorbell_irqs++; smp_ipi_demux(); diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index b62f90eaf19e..0bb5918faaaf 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -63,7 +63,7 @@ int hw_breakpoint_slots(int type) int arch_install_hw_breakpoint(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot = this_cpu_ptr(&bp_per_reg); + struct perf_event **slot = &__get_cpu_var(bp_per_reg); *slot = bp; @@ -88,7 +88,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) */ void arch_uninstall_hw_breakpoint(struct perf_event *bp) { - struct perf_event **slot = this_cpu_ptr(&bp_per_reg); + struct perf_event **slot = &__get_cpu_var(bp_per_reg); if (*slot != bp) { WARN_ONCE(1, "Can't find the breakpoint"); @@ -226,7 +226,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) */ rcu_read_lock(); - bp = __this_cpu_read(bp_per_reg); + bp = __get_cpu_var(bp_per_reg); if (!bp) goto out; info = counter_arch_bp(bp); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 71e60bfb89e2..a10642a0d861 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -208,7 +208,7 @@ static unsigned long iommu_range_alloc(struct device *dev, * We don't need to disable preemption here because any CPU can * safely use any IOMMU pool. */ - pool_nr = __this_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1); + pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1); if (largealloc) pool = &(tbl->large_pool); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 74d40c6855b8..4c5891de162e 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -114,7 +114,7 @@ static inline notrace void set_soft_enabled(unsigned long enable) static inline notrace int decrementer_check_overflow(void) { u64 now = get_tb_or_rtc(); - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); return now >= *next_tb; } @@ -499,7 +499,7 @@ void __do_irq(struct pt_regs *regs) /* And finally process it */ if (unlikely(irq == NO_IRQ)) - __this_cpu_inc(irq_stat.spurious_irqs); + __get_cpu_var(irq_stat).spurious_irqs++; else generic_handle_irq(irq); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index e77c3ccf8dcf..8504657379f1 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -155,7 +155,7 @@ static int kgdb_singlestep(struct pt_regs *regs) { struct thread_info *thread_info, *exception_thread_info; struct thread_info *backup_current_thread_info = - this_cpu_ptr(&kgdb_thread_info); + &__get_cpu_var(kgdb_thread_info); if (user_mode(regs)) return 0; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 7c053f281406..2f72af82513c 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -119,7 +119,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; kcb->kprobe_status = kcb->prev_kprobe.status; kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr; } @@ -127,7 +127,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - __this_cpu_write(current_kprobe, p); + __get_cpu_var(current_kprobe) = p; kcb->kprobe_saved_msr = regs->msr; } @@ -192,7 +192,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) ret = 1; goto no_kprobe; } - p = __this_cpu_read(current_kprobe); + p = __get_cpu_var(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 15c99b649b04..a7fd4cb78b78 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -73,8 +73,8 @@ void save_mce_event(struct pt_regs *regs, long handled, uint64_t nip, uint64_t addr) { uint64_t srr1; - int index = __this_cpu_inc_return(mce_nest_count); - struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); + int index = __get_cpu_var(mce_nest_count)++; + struct machine_check_event *mce = &__get_cpu_var(mce_event[index]); /* * Return if we don't have enough space to log mce event. @@ -143,7 +143,7 @@ void save_mce_event(struct pt_regs *regs, long handled, */ int get_mce_event(struct machine_check_event *mce, bool release) { - int index = __this_cpu_read(mce_nest_count) - 1; + int index = __get_cpu_var(mce_nest_count) - 1; struct machine_check_event *mc_evt; int ret = 0; @@ -153,7 +153,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) /* Check if we have MCE info to process. */ if (index < MAX_MC_EVT) { - mc_evt = this_cpu_ptr(&mce_event[index]); + mc_evt = &__get_cpu_var(mce_event[index]); /* Copy the event structure and release the original */ if (mce) *mce = *mc_evt; @@ -163,7 +163,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) } /* Decrement the count to free the slot. */ if (release) - __this_cpu_dec(mce_nest_count); + __get_cpu_var(mce_nest_count)--; return ret; } @@ -184,13 +184,13 @@ void machine_check_queue_event(void) if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; - index = __this_cpu_inc_return(mce_queue_count); + index = __get_cpu_var(mce_queue_count)++; /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { - __this_cpu_dec(mce_queue_count); + __get_cpu_var(mce_queue_count)--; return; } - memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); + __get_cpu_var(mce_event_queue[index]) = evt; /* Queue irq work to process this event later. */ irq_work_queue(&mce_event_process_work); @@ -208,11 +208,11 @@ static void machine_check_process_queued_event(struct irq_work *work) * For now just print it to console. * TODO: log this error event to FSP or nvram. */ - while (__this_cpu_read(mce_queue_count) > 0) { - index = __this_cpu_read(mce_queue_count) - 1; + while (__get_cpu_var(mce_queue_count) > 0) { + index = __get_cpu_var(mce_queue_count) - 1; machine_check_print_event_info( - this_cpu_ptr(&mce_event_queue[index])); - __this_cpu_dec(mce_queue_count); + &__get_cpu_var(mce_event_queue[index])); + __get_cpu_var(mce_queue_count)--; } } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 2df2f2956520..bf44ae962ab8 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -498,7 +498,7 @@ static inline int set_dawr(struct arch_hw_breakpoint *brk) void __set_breakpoint(struct arch_hw_breakpoint *brk) { - __this_cpu_write(current_brk, *brk); + __get_cpu_var(current_brk) = *brk; if (cpu_has_feature(CPU_FTR_DAWR)) set_dawr(brk); @@ -841,7 +841,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * schedule DABR */ #ifndef CONFIG_HAVE_HW_BREAKPOINT - if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk), &new->thread.hw_brk))) + if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) __set_breakpoint(&new->thread.hw_brk); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif @@ -855,7 +855,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * Collect processor utilization data per process */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); + struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); long unsigned start_tb, current_tb; start_tb = old_thread->start_tb; cu->current_tb = current_tb = mfspr(SPRN_PURR); @@ -865,7 +865,7 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC_BOOK3S_64 - batch = this_cpu_ptr(&ppc64_tlb_batch); + batch = &__get_cpu_var(ppc64_tlb_batch); if (batch->active) { current_thread_info()->local_flags |= _TLF_LAZY_MMU; if (batch->index) @@ -888,7 +888,7 @@ struct task_struct *__switch_to(struct task_struct *prev, #ifdef CONFIG_PPC_BOOK3S_64 if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; - batch = this_cpu_ptr(&ppc64_tlb_batch); + batch = &__get_cpu_var(ppc64_tlb_batch); batch->active = 1; } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 60391a51467a..a0738af4aba6 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -242,7 +242,7 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) irqreturn_t smp_ipi_demux(void) { - struct cpu_messages *info = this_cpu_ptr(&ipi_message); + struct cpu_messages *info = &__get_cpu_var(ipi_message); unsigned int all; mb(); /* order any irq clear */ @@ -438,9 +438,9 @@ void generic_mach_cpu_die(void) idle_task_exit(); cpu = smp_processor_id(); printk(KERN_DEBUG "CPU%d offline\n", cpu); - __this_cpu_write(cpu_state, CPU_DEAD); + __get_cpu_var(cpu_state) = CPU_DEAD; smp_wmb(); - while (__this_cpu_read(cpu_state) != CPU_UP_PREPARE) + while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) cpu_relax(); } diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index fa1fd8a0c867..67fd2fd2620a 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -394,10 +394,10 @@ void ppc_enable_pmcs(void) ppc_set_pmu_inuse(1); /* Only need to enable them once */ - if (__this_cpu_read(pmcs_enabled)) + if (__get_cpu_var(pmcs_enabled)) return; - __this_cpu_write(pmcs_enabled, 1); + __get_cpu_var(pmcs_enabled) = 1; if (ppc_md.enable_pmcs) ppc_md.enable_pmcs(); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 4769e5b7f905..368ab374d33c 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -458,9 +458,9 @@ static inline void clear_irq_work_pending(void) DEFINE_PER_CPU(u8, irq_work_pending); -#define set_irq_work_pending_flag() __this_cpu_write(irq_work_pending, 1) -#define test_irq_work_pending() __this_cpu_read(irq_work_pending) -#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) +#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 +#define test_irq_work_pending() __get_cpu_var(irq_work_pending) +#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 #endif /* 32 vs 64 bit */ @@ -482,8 +482,8 @@ void arch_irq_work_raise(void) void __timer_interrupt(void) { struct pt_regs *regs = get_irq_regs(); - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - struct clock_event_device *evt = this_cpu_ptr(&decrementers); + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + struct clock_event_device *evt = &__get_cpu_var(decrementers); u64 now; trace_timer_interrupt_entry(regs); @@ -498,7 +498,7 @@ void __timer_interrupt(void) *next_tb = ~(u64)0; if (evt->event_handler) evt->event_handler(evt); - __this_cpu_inc(irq_stat.timer_irqs_event); + __get_cpu_var(irq_stat).timer_irqs_event++; } else { now = *next_tb - now; if (now <= DECREMENTER_MAX) @@ -506,13 +506,13 @@ void __timer_interrupt(void) /* We may have raced with new irq work */ if (test_irq_work_pending()) set_dec(1); - __this_cpu_inc(irq_stat.timer_irqs_others); + __get_cpu_var(irq_stat).timer_irqs_others++; } #ifdef CONFIG_PPC64 /* collect purr register values often, for accurate calculations */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); + struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); cu->current_tb = mfspr(SPRN_PURR); } #endif @@ -527,7 +527,7 @@ void __timer_interrupt(void) void timer_interrupt(struct pt_regs * regs) { struct pt_regs *old_regs; - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -813,7 +813,7 @@ static void __init clocksource_init(void) static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { - __this_cpu_write(decrementers_next_tb, get_tb_or_rtc() + evt); + __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt; set_dec(evt); /* We may have raced with new irq work */ @@ -833,7 +833,7 @@ static void decrementer_set_mode(enum clock_event_mode mode, /* Interrupt handler for the timer broadcast IPI */ void tick_broadcast_ipi_handler(void) { - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); *next_tb = get_tb_or_rtc(); __timer_interrupt(); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index e6595b72269b..0dc43f9932cf 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -295,7 +295,7 @@ long machine_check_early(struct pt_regs *regs) { long handled = 0; - __this_cpu_inc(irq_stat.mce_exceptions); + __get_cpu_var(irq_stat).mce_exceptions++; if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); @@ -304,7 +304,7 @@ long machine_check_early(struct pt_regs *regs) long hmi_exception_realmode(struct pt_regs *regs) { - __this_cpu_inc(irq_stat.hmi_exceptions); + __get_cpu_var(irq_stat).hmi_exceptions++; if (ppc_md.hmi_exception_early) ppc_md.hmi_exception_early(regs); @@ -700,7 +700,7 @@ void machine_check_exception(struct pt_regs *regs) enum ctx_state prev_state = exception_enter(); int recover = 0; - __this_cpu_inc(irq_stat.mce_exceptions); + __get_cpu_var(irq_stat).mce_exceptions++; /* See if any machine dependent calls. In theory, we would want * to call the CPU first, and call the ppc_md. one if the CPU @@ -1519,7 +1519,7 @@ void vsx_unavailable_tm(struct pt_regs *regs) void performance_monitor_exception(struct pt_regs *regs) { - __this_cpu_inc(irq_stat.pmu_irqs); + __get_cpu_var(irq_stat).pmu_irqs++; perf_irq(regs); } diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 16095841afe1..2e02ed849f36 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -76,11 +76,11 @@ static inline int local_sid_setup_one(struct id *entry) unsigned long sid; int ret = -1; - sid = __this_cpu_inc_return(pcpu_last_used_sid); + sid = ++(__get_cpu_var(pcpu_last_used_sid)); if (sid < NUM_TIDS) { - __this_cpu_write(pcpu_sids)entry[sid], entry); + __get_cpu_var(pcpu_sids).entry[sid] = entry; entry->val = sid; - entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]); + entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid]; ret = sid; } @@ -108,8 +108,8 @@ static inline int local_sid_setup_one(struct id *entry) static inline int local_sid_lookup(struct id *entry) { if (entry && entry->val != 0 && - __this_cpu_read(pcpu_sids.entry[entry->val]) == entry && - entry->pentry == this_cpu_ptr(&pcpu_sids.entry[entry->val])) + __get_cpu_var(pcpu_sids).entry[entry->val] == entry && + entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val]) return entry->val; return -1; } @@ -117,8 +117,8 @@ static inline int local_sid_lookup(struct id *entry) /* Invalidate all id mappings on local core -- call with preempt disabled */ static inline void local_sid_destroy_all(void) { - __this_cpu_write(pcpu_last_used_sid, 0); - memset(this_cpu_ptr(&pcpu_sids), 0, sizeof(pcpu_sids)); + __get_cpu_var(pcpu_last_used_sid) = 0; + memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); } static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 6ef54e523f33..164bad2a19bf 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -141,9 +141,9 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GESR, vcpu->arch.shared->esr); if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || - __this_cpu_read(last_vcpu_of_lpid[vcpu->kvm->arch.lpid]) != vcpu) { + __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); - __this_cpu_write(last_vcpu_of_lpid[vcpu->kvm->arch.lpid], vcpu); + __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu; } kvmppc_load_guest_fp(vcpu); diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 504a16f1a1a0..afc0a8295f84 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -625,7 +625,7 @@ static void native_flush_hash_range(unsigned long number, int local) unsigned long want_v; unsigned long flags; real_pte_t pte; - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); unsigned long psize = batch->psize; int ssize = batch->ssize; int i; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 060d51fda35e..daee7f4e5a14 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1314,7 +1314,7 @@ void flush_hash_range(unsigned long number, int local) else { int i; struct ppc64_tlb_batch *batch = - this_cpu_ptr(&ppc64_tlb_batch); + &__get_cpu_var(ppc64_tlb_batch); for (i = 0; i < number; i++) flush_hash_page(batch->vpn[i], batch->pte[i], diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index ba47aaf33a4b..5e4ee2573903 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -33,13 +33,13 @@ static inline int tlb1_next(void) ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - index = this_cpu_read(next_tlbcam_idx); + index = __get_cpu_var(next_tlbcam_idx); /* Just round-robin the entries and wrap when we hit the end */ if (unlikely(index == ncams - 1)) - __this_cpu_write(next_tlbcam_idx, tlbcam_index); + __get_cpu_var(next_tlbcam_idx) = tlbcam_index; else - __this_cpu_inc(next_tlbcam_idx); + __get_cpu_var(next_tlbcam_idx)++; return index; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8aa04f03fd31..7e70ae968e5f 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -462,7 +462,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) { struct hugepd_freelist **batchp; - batchp = this_cpu_ptr(&hugepd_freelist_cur); + batchp = &get_cpu_var(hugepd_freelist_cur); if (atomic_read(&tlb->mm->mm_users) < 2 || cpumask_equal(mm_cpumask(tlb->mm), diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 690f9c7bf3c8..b7cd00b0171e 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -339,7 +339,7 @@ static void power_pmu_bhrb_reset(void) static void power_pmu_bhrb_enable(struct perf_event *event) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); if (!ppmu->bhrb_nr) return; @@ -354,7 +354,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event) static void power_pmu_bhrb_disable(struct perf_event *event) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); if (!ppmu->bhrb_nr) return; @@ -1144,7 +1144,7 @@ static void power_pmu_disable(struct pmu *pmu) if (!ppmu) return; local_irq_save(flags); - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); if (!cpuhw->disabled) { /* @@ -1211,7 +1211,7 @@ static void power_pmu_enable(struct pmu *pmu) return; local_irq_save(flags); - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); if (!cpuhw->disabled) goto out; @@ -1403,7 +1403,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) * Add the event to the list (if there is room) * and check whether the total set is still feasible. */ - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); n0 = cpuhw->n_events; if (n0 >= ppmu->n_counter) goto out; @@ -1469,7 +1469,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags) power_pmu_read(event); - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); for (i = 0; i < cpuhw->n_events; ++i) { if (event == cpuhw->event[i]) { while (++i < cpuhw->n_events) { @@ -1575,7 +1575,7 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) */ void power_pmu_start_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; @@ -1589,7 +1589,7 @@ void power_pmu_start_txn(struct pmu *pmu) */ void power_pmu_cancel_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1607,7 +1607,7 @@ int power_pmu_commit_txn(struct pmu *pmu) if (!ppmu) return -EAGAIN; - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); n = cpuhw->n_events; if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) return -EAGAIN; @@ -1964,7 +1964,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) { struct cpu_hw_events *cpuhw; - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); power_pmu_bhrb_read(cpuhw); data.br_stack = &cpuhw->bhrb_stack; } @@ -2037,7 +2037,7 @@ static bool pmc_overflow(unsigned long val) static void perf_event_interrupt(struct pt_regs *regs) { int i, j; - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); struct perf_event *event; unsigned long val[8]; int found, active; diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index 4acaea01fe03..d35ae52c69dc 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -210,7 +210,7 @@ static void fsl_emb_pmu_disable(struct pmu *pmu) unsigned long flags; local_irq_save(flags); - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); if (!cpuhw->disabled) { cpuhw->disabled = 1; @@ -249,7 +249,7 @@ static void fsl_emb_pmu_enable(struct pmu *pmu) unsigned long flags; local_irq_save(flags); - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); if (!cpuhw->disabled) goto out; @@ -653,7 +653,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, static void perf_event_interrupt(struct pt_regs *regs) { int i; - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); struct perf_event *event; unsigned long val; int found = 0; diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 4c11421847be..8a106b4172e0 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -82,7 +82,7 @@ static void iic_unmask(struct irq_data *d) static void iic_eoi(struct irq_data *d) { - struct iic *iic = this_cpu_ptr(&cpu_iic); + struct iic *iic = &__get_cpu_var(cpu_iic); out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]); BUG_ON(iic->eoi_ptr < 0); } @@ -148,7 +148,7 @@ static unsigned int iic_get_irq(void) struct iic *iic; unsigned int virq; - iic = this_cpu_ptr(&cpu_iic); + iic = &__get_cpu_var(cpu_iic); *(unsigned long *) &pending = in_be64((u64 __iomem *) &iic->regs->pending_destr); if (!(pending.flags & CBE_IIC_IRQ_VALID)) @@ -163,7 +163,7 @@ static unsigned int iic_get_irq(void) void iic_setup_cpu(void) { - out_be64(this_cpu_ptr(&cpu_iic.regs->prio), 0xff); + out_be64(&__get_cpu_var(cpu_iic).regs->prio, 0xff); } u8 iic_get_target_id(int cpu) diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index 9527e2a7c541..d8a000a9988b 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -48,7 +48,7 @@ void __trace_opal_entry(unsigned long opcode, unsigned long *args) local_irq_save(flags); - depth = this_cpu_ptr(&opal_trace_depth); + depth = &__get_cpu_var(opal_trace_depth); if (*depth) goto out; @@ -69,7 +69,7 @@ void __trace_opal_exit(long opcode, unsigned long retval) local_irq_save(flags); - depth = this_cpu_ptr(&opal_trace_depth); + depth = &__get_cpu_var(opal_trace_depth); if (*depth) goto out; diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index a6c42f34303a..5f3b23220b8e 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -711,7 +711,7 @@ void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq) static unsigned int ps3_get_irq(void) { - struct ps3_private *pd = this_cpu_ptr(&ps3_private); + struct ps3_private *pd = &__get_cpu_var(ps3_private); u64 x = (pd->bmp.status & pd->bmp.mask); unsigned int plug; diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 39049e4884fb..1062f71f5a85 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -75,7 +75,7 @@ static atomic_t dtl_count; */ static void consume_dtle(struct dtl_entry *dtle, u64 index) { - struct dtl_ring *dtlr = this_cpu_ptr(&dtl_rings); + struct dtl_ring *dtlr = &__get_cpu_var(dtl_rings); struct dtl_entry *wp = dtlr->write_ptr; struct lppaca *vpa = local_paca->lppaca_ptr; diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index f02ec3ab428c..4575f0c9e521 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -110,7 +110,7 @@ static void probe_hcall_entry(void *ignored, unsigned long opcode, unsigned long if (opcode > MAX_HCALL_OPCODE) return; - h = this_cpu_ptr(&hcall_stats[opcode / 4]); + h = &__get_cpu_var(hcall_stats)[opcode / 4]; h->tb_start = mftb(); h->purr_start = mfspr(SPRN_PURR); } @@ -123,7 +123,7 @@ static void probe_hcall_exit(void *ignored, unsigned long opcode, unsigned long if (opcode > MAX_HCALL_OPCODE) return; - h = this_cpu_ptr(&hcall_stats[opcode / 4]); + h = &__get_cpu_var(hcall_stats)[opcode / 4]; h->num_calls++; h->tb_total += mftb() - h->tb_start; h->purr_total += mfspr(SPRN_PURR) - h->purr_start; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 8c355ed4291e..4642d6a4d356 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -200,7 +200,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, local_irq_save(flags); /* to protect tcep and the page behind it */ - tcep = __this_cpu_read(tce_page); + tcep = __get_cpu_var(tce_page); /* This is safe to do since interrupts are off when we're called * from iommu_alloc{,_sg}() @@ -213,7 +213,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } - __this_cpu_write(tce_page, tcep); + __get_cpu_var(tce_page) = tcep; } rpn = __pa(uaddr) >> TCE_SHIFT; @@ -399,7 +399,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, long l, limit; local_irq_disable(); /* to protect tcep and the page behind it */ - tcep = __this_cpu_read(tce_page); + tcep = __get_cpu_var(tce_page); if (!tcep) { tcep = (__be64 *)__get_free_page(GFP_ATOMIC); @@ -407,7 +407,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, local_irq_enable(); return -ENOMEM; } - __this_cpu_write(tce_page, tcep); + __get_cpu_var(tce_page) = tcep; } proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 56df72da59fe..34e64237fff9 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -507,7 +507,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) unsigned long vpn; unsigned long i, pix, rc; unsigned long flags = 0; - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long param[9]; unsigned long hash, index, shift, hidx, slot; @@ -697,7 +697,7 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args) local_irq_save(flags); - depth = this_cpu_ptr(&hcall_trace_depth); + depth = &__get_cpu_var(hcall_trace_depth); if (*depth) goto out; @@ -722,7 +722,7 @@ void __trace_hcall_exit(long opcode, unsigned long retval, local_irq_save(flags); - depth = this_cpu_ptr(&hcall_trace_depth); + depth = &__get_cpu_var(hcall_trace_depth); if (*depth) goto out; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 179a69fd5568..dff05b9eb946 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -302,8 +302,8 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) /* If it isn't an extended log we can use the per cpu 64bit buffer */ h = (struct rtas_error_log *)&savep[1]; if (!rtas_error_extended(h)) { - memcpy(this_cpu_ptr(&mce_data_buf), h, sizeof(__u64)); - errhdr = (struct rtas_error_log *)this_cpu_ptr(&mce_data_buf); + memcpy(&__get_cpu_var(mce_data_buf), h, sizeof(__u64)); + errhdr = (struct rtas_error_log *)&__get_cpu_var(mce_data_buf); } else { int len, error_log_length; diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 365249cd346b..fe0cca477164 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -155,7 +155,7 @@ int __init xics_smp_probe(void) void xics_teardown_cpu(void) { - struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); /* * we have to reset the cppr index to 0 because we're -- cgit v1.2.3 From 4ba2968420fa9d0604b6a6a5c61bfa8d0fa84ae0 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 26 Aug 2014 19:12:21 -0500 Subject: percpu: Resolve ambiguities in __get_cpu_var/cpumask_var_t __get_cpu_var can paper over differences in the definitions of cpumask_var_t and either use the address of the cpumask variable directly or perform a fetch of the address of the struct cpumask allocated elsewhere. This is important particularly when using per cpu cpumask_var_t declarations because in one case we have an offset into a per cpu area to handle and in the other case we need to fetch a pointer from the offset. This patch introduces a new macro this_cpu_cpumask_var_ptr() that is defined where cpumask_var_t is defined and performs the proper actions. All use cases where __get_cpu_var is used with cpumask_var_t are converted to the use of this_cpu_cpumask_var_ptr(). Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/perf_event_p4.h | 2 +- arch/x86/kernel/apic/x2apic_cluster.c | 3 +-- arch/x86/oprofile/op_model_p4.c | 2 +- include/linux/cpumask.h | 11 +++++++++++ kernel/sched/deadline.c | 2 +- kernel/sched/fair.c | 2 +- kernel/sched/rt.c | 2 +- 7 files changed, 17 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 85e13ccf15c4..d725382c2ae0 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -189,7 +189,7 @@ static inline int p4_ht_thread(int cpu) { #ifdef CONFIG_SMP if (smp_num_siblings == 2) - return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); + return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; } diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 6ce600f9bc78..1f5d5f2ffae6 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -42,8 +42,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) * We are to modify mask, so we need an own copy * and be sure it's manipulated with irq off. */ - ipi_mask_ptr = __raw_get_cpu_var(ipi_mask); - cpumask_copy(ipi_mask_ptr, mask); + ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask); /* * The idea is to send one IPI per cluster. diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 98ab13058f89..ad1d91f475ab 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -372,7 +372,7 @@ static unsigned int get_stagger(void) { #ifdef CONFIG_SMP int cpu = smp_processor_id(); - return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); + return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; } diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 2997af6d2ccd..0a9a6da21e74 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -666,10 +666,19 @@ static inline size_t cpumask_size(void) * * This code makes NR_CPUS length memcopy and brings to a memory corruption. * cpumask_copy() provide safe copy functionality. + * + * Note that there is another evil here: If you define a cpumask_var_t + * as a percpu variable then the way to obtain the address of the cpumask + * structure differently influences what this_cpu_* operation needs to be + * used. Please use this_cpu_cpumask_var_t in those cases. The direct use + * of this_cpu_ptr() or this_cpu_read() will lead to failures when the + * other type of cpumask_var_t implementation is configured. */ #ifdef CONFIG_CPUMASK_OFFSTACK typedef struct cpumask *cpumask_var_t; +#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) + bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); @@ -681,6 +690,8 @@ void free_bootmem_cpumask_var(cpumask_var_t mask); #else typedef struct cpumask cpumask_var_t[1]; +#define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) + static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return true; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 255ce138b652..4a608cfaecbd 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1158,7 +1158,7 @@ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); static int find_later_rq(struct task_struct *task) { struct sched_domain *sd; - struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl); + struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl); int this_cpu = smp_processor_id(); int best_cpu, cpu = task_cpu(task); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bfa3c86d0d68..197d659c144c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6539,7 +6539,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_group *group; struct rq *busiest; unsigned long flags; - struct cpumask *cpus = __get_cpu_var(load_balance_mask); + struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask); struct lb_env env = { .sd = sd, diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 5f6edca4fafd..a4c50fce9b90 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1526,7 +1526,7 @@ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); static int find_lowest_rq(struct task_struct *task) { struct sched_domain *sd; - struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); + struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); -- cgit v1.2.3 From f318f7db00f4968d934b4ccb8e3fc0b21f419046 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 1 Sep 2014 16:15:52 -0500 Subject: ia64: sn_nodepda cannot be assigned to after this_cpu conversion. Use __this_cpu_write. There must be an explit statement to modify the percpu variable after the conversion of the sn_nodpda macro to use this_cpu_read. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo Compile-tested-by: Guenter Roeck --- arch/ia64/sn/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 36182c84363c..5f6b6b48c1d5 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -629,7 +629,7 @@ void sn_cpu_init(void) cnode = nasid_to_cnodeid(nasid); - sn_nodepda = nodepdaindr[cnode]; + __this_cpu_write(__sn_nodepda, nodepdaindr[cnode]); pda->led_address = (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT)); -- cgit v1.2.3 From 59f6e2073c72d36c814a4417320bfa4874faa228 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 8 Sep 2014 23:06:54 +0100 Subject: percpu: Resolve ambiguities in __get_cpu_var/cpumask_var_t -fix A commit in linux-next was causing boot to fail and bisection identified the patch 4ba2968420fa ("percpu: Resolve ambiguities in __get_cpu_var/cpumask_var_"). One of the changes in that patch looks very suspicious. Reverting the full patch fixes boot as does this fixlet. Signed-off-by: Mel Gorman Signed-off-by: Tejun Heo Cc: Christoph Lameter --- arch/x86/kernel/apic/x2apic_cluster.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 1f5d5f2ffae6..e658f21681c8 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -43,6 +43,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) * and be sure it's manipulated with irq off. */ ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask); + cpumask_copy(ipi_mask_ptr, mask); /* * The idea is to send one IPI per cluster. -- cgit v1.2.3 From b7d5006de1afab266175288f41e5dc70e69cce33 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 27 Aug 2014 07:35:05 +0200 Subject: s390: remove unused MACHINE_FLAG_RRBM Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/setup.h | 3 --- arch/s390/kernel/early.c | 2 -- 2 files changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 089a49814c50..dbde7c236577 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -55,7 +55,6 @@ extern void detect_memory_memblock(void); #define MACHINE_FLAG_LPP (1UL << 13) #define MACHINE_FLAG_TOPOLOGY (1UL << 14) #define MACHINE_FLAG_TE (1UL << 15) -#define MACHINE_FLAG_RRBM (1UL << 16) #define MACHINE_FLAG_TLB_LC (1UL << 17) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) @@ -78,7 +77,6 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_LPP (0) #define MACHINE_HAS_TOPOLOGY (0) #define MACHINE_HAS_TE (0) -#define MACHINE_HAS_RRBM (0) #define MACHINE_HAS_TLB_LC (0) #else /* CONFIG_64BIT */ #define MACHINE_HAS_IEEE (1) @@ -91,7 +89,6 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_LPP (S390_lowcore.machine_flags & MACHINE_FLAG_LPP) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) -#define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM) #define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) #endif /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 0dff972a169c..f6c66b5d0cfa 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -390,8 +390,6 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_LPP; if (test_facility(50) && test_facility(73)) S390_lowcore.machine_flags |= MACHINE_FLAG_TE; - if (test_facility(66)) - S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM; if (test_facility(51)) S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; #endif -- cgit v1.2.3 From 070b7be633dc33c0899e8c934b4d5fad046b06e8 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 29 Aug 2014 12:44:40 +0200 Subject: s390/vdso: replace stck with stcke If gettimeofday / clock_gettime are called multiple times in a row the STCK instruction will stall until a difference in the result is visible. This unnecessarily slows down the vdso calls, use stcke instead of stck to get rid of the stall. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/vdso32/clock_gettime.S | 8 ++++---- arch/s390/kernel/vdso32/gettimeofday.S | 4 ++-- arch/s390/kernel/vdso64/clock_gettime.S | 8 ++++---- arch/s390/kernel/vdso64/gettimeofday.S | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index 7cf18f8d4cb4..4e20a9365bb7 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -30,8 +30,8 @@ __kernel_clock_gettime: 1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ jnz 1b - stck 24(%r15) /* Store TOD clock */ - lm %r0,%r1,24(%r15) + stcke 24(%r15) /* Store TOD clock */ + lm %r0,%r1,25(%r15) s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ sl %r1,__VDSO_XTIME_STAMP+4(%r5) brc 3,2f @@ -72,8 +72,8 @@ __kernel_clock_gettime: 11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ jnz 11b - stck 24(%r15) /* Store TOD clock */ - lm %r0,%r1,24(%r15) + stcke 24(%r15) /* Store TOD clock */ + lm %r0,%r1,25(%r15) s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ sl %r1,__VDSO_XTIME_STAMP+4(%r5) brc 3,12f diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S index fd621a950f7c..60def5f562db 100644 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -29,8 +29,8 @@ __kernel_gettimeofday: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ jnz 1b - stck 24(%r15) /* Store TOD clock */ - lm %r0,%r1,24(%r15) + stcke 24(%r15) /* Store TOD clock */ + lm %r0,%r1,25(%r15) s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ sl %r1,__VDSO_XTIME_STAMP+4(%r5) brc 3,3f diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 3f34e09db5f4..4add40b78ed5 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -33,10 +33,10 @@ __kernel_clock_gettime: 0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ jnz 0b - stck 48(%r15) /* Store TOD clock */ + stcke 48(%r15) /* Store TOD clock */ lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ lg %r0,__VDSO_WTOM_SEC(%r5) - lg %r1,48(%r15) + lg %r1,49(%r15) sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ alg %r1,__VDSO_WTOM_NSEC(%r5) @@ -58,9 +58,9 @@ __kernel_clock_gettime: 5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ jnz 5b - stck 48(%r15) /* Store TOD clock */ + stcke 48(%r15) /* Store TOD clock */ lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - lg %r1,48(%r15) + lg %r1,49(%r15) sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S index d0860d1d0ccc..7a344995a97f 100644 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -28,8 +28,8 @@ __kernel_gettimeofday: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ jnz 0b - stck 48(%r15) /* Store TOD clock */ - lg %r1,48(%r15) + stcke 48(%r15) /* Store TOD clock */ + lg %r1,49(%r15) sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ -- cgit v1.2.3 From b7eacb59cd7fb5e98852186e485c0c865f862645 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 29 Aug 2014 12:31:45 +0200 Subject: s390/vdso: add vdso support for coarse clocks Add CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE optimization to the 64-bit and 31-bit vdso. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/vdso.h | 18 +++++++++++------- arch/s390/kernel/asm-offsets.c | 7 +++++++ arch/s390/kernel/time.c | 13 +++++++++++++ arch/s390/kernel/vdso32/clock_getres.S | 11 +++++++++-- arch/s390/kernel/vdso32/clock_gettime.S | 24 ++++++++++++++++++++++++ arch/s390/kernel/vdso64/clock_getres.S | 8 +++++++- arch/s390/kernel/vdso64/clock_gettime.S | 24 ++++++++++++++++++++++++ 7 files changed, 95 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index bc9746a7d47c..a62526d09201 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -22,13 +22,17 @@ struct vdso_data { __u64 xtime_tod_stamp; /* TOD clock for xtime 0x08 */ __u64 xtime_clock_sec; /* Kernel time 0x10 */ __u64 xtime_clock_nsec; /* 0x18 */ - __u64 wtom_clock_sec; /* Wall to monotonic clock 0x20 */ - __u64 wtom_clock_nsec; /* 0x28 */ - __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ - __u32 tz_dsttime; /* Type of dst correction 0x34 */ - __u32 ectg_available; /* ECTG instruction present 0x38 */ - __u32 tk_mult; /* Mult. used for xtime_nsec 0x3c */ - __u32 tk_shift; /* Shift used for xtime_nsec 0x40 */ + __u64 xtime_coarse_sec; /* Coarse kernel time 0x20 */ + __u64 xtime_coarse_nsec; /* 0x28 */ + __u64 wtom_clock_sec; /* Wall to monotonic clock 0x30 */ + __u64 wtom_clock_nsec; /* 0x38 */ + __u64 wtom_coarse_sec; /* Coarse wall to monotonic 0x40 */ + __u64 wtom_coarse_nsec; /* 0x48 */ + __u32 tz_minuteswest; /* Minutes west of Greenwich 0x50 */ + __u32 tz_dsttime; /* Type of dst correction 0x54 */ + __u32 ectg_available; /* ECTG instruction present 0x58 */ + __u32 tk_mult; /* Mult. used for xtime_nsec 0x5c */ + __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */ }; struct vdso_per_cpu_data { diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index afe1715a4eb7..3e9e479d9b49 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -62,8 +62,12 @@ int main(void) DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp)); DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec)); DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); + DEFINE(__VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); + DEFINE(__VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); + DEFINE(__VDSO_WTOM_CRS_SEC, offsetof(struct vdso_data, wtom_coarse_sec)); + DEFINE(__VDSO_WTOM_CRS_NSEC, offsetof(struct vdso_data, wtom_coarse_nsec)); DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult)); @@ -73,8 +77,11 @@ int main(void) /* constants used by the vdso */ DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); + DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); BLANK(); /* idle data offsets */ DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter)); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 4cef607f3711..69e980de0f62 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -232,6 +232,19 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtom_clock_nsec -= nsecps; vdso_data->wtom_clock_sec++; } + + vdso_data->xtime_coarse_sec = tk->xtime_sec; + vdso_data->xtime_coarse_nsec = + (long)(tk->tkr.xtime_nsec >> tk->tkr.shift); + vdso_data->wtom_coarse_sec = + vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec; + vdso_data->wtom_coarse_nsec = + vdso_data->xtime_coarse_nsec + tk->wall_to_monotonic.tv_nsec; + while (vdso_data->wtom_coarse_nsec >= NSEC_PER_SEC) { + vdso_data->wtom_coarse_nsec -= NSEC_PER_SEC; + vdso_data->wtom_coarse_sec++; + } + vdso_data->tk_mult = tk->tkr.mult; vdso_data->tk_shift = tk->tkr.shift; smp_wmb(); diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S index 36aaa25d05da..eca3f001f081 100644 --- a/arch/s390/kernel/vdso32/clock_getres.S +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -19,14 +19,20 @@ .type __kernel_clock_getres,@function __kernel_clock_getres: .cfi_startproc + basr %r1,0 + la %r1,4f-.(%r1) chi %r2,__CLOCK_REALTIME je 0f chi %r2,__CLOCK_MONOTONIC + je 0f + la %r1,5f-4f(%r1) + chi %r2,__CLOCK_REALTIME_COARSE + je 0f + chi %r2,__CLOCK_MONOTONIC_COARSE jne 3f 0: ltr %r3,%r3 jz 2f /* res == NULL */ - basr %r1,0 -1: l %r0,4f-1b(%r1) +1: l %r0,0(%r1) xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */ st %r0,4(%r3) /* store tp->tv_usec */ 2: lhi %r2,0 @@ -35,5 +41,6 @@ __kernel_clock_getres: svc 0 br %r14 4: .long __CLOCK_REALTIME_RES +5: .long __CLOCK_COARSE_RES .cfi_endproc .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index 4e20a9365bb7..48c2206a3956 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -21,8 +21,12 @@ __kernel_clock_gettime: .cfi_startproc basr %r5,0 0: al %r5,21f-0b(%r5) /* get &_vdso_data */ + chi %r2,__CLOCK_REALTIME_COARSE + je 10f chi %r2,__CLOCK_REALTIME je 11f + chi %r2,__CLOCK_MONOTONIC_COARSE + je 9f chi %r2,__CLOCK_MONOTONIC jne 19f @@ -68,6 +72,26 @@ __kernel_clock_gettime: lhi %r2,0 br %r14 + /* CLOCK_MONOTONIC_COARSE */ +9: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 9b + l %r2,__VDSO_WTOM_CRS_SEC+4(%r5) + l %r1,__VDSO_WTOM_CRS_NSEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 9b + j 8b + + /* CLOCK_REALTIME_COARSE */ +10: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 10b + l %r2,__VDSO_XTIME_CRS_SEC+4(%r5) + l %r1,__VDSO_XTIME_CRS_NSEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 10b + j 17f + /* CLOCK_REALTIME */ 11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 34deba7c7ed1..c8513deb8c66 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -19,6 +19,12 @@ .type __kernel_clock_getres,@function __kernel_clock_getres: .cfi_startproc + larl %r1,4f + cghi %r2,__CLOCK_REALTIME_COARSE + je 0f + cghi %r2,__CLOCK_MONOTONIC_COARSE + je 0f + larl %r1,3f cghi %r2,__CLOCK_REALTIME je 0f cghi %r2,__CLOCK_MONOTONIC @@ -32,7 +38,6 @@ __kernel_clock_getres: jz 2f 0: ltgr %r3,%r3 jz 1f /* res == NULL */ - larl %r1,3f lg %r0,0(%r1) xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ stg %r0,8(%r3) /* store tp->tv_usec */ @@ -42,5 +47,6 @@ __kernel_clock_getres: svc 0 br %r14 3: .quad __CLOCK_REALTIME_RES +4: .quad __CLOCK_COARSE_RES .cfi_endproc .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 4add40b78ed5..9d9761f8e110 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -20,12 +20,16 @@ __kernel_clock_gettime: .cfi_startproc larl %r5,_vdso_data + cghi %r2,__CLOCK_REALTIME_COARSE + je 4f cghi %r2,__CLOCK_REALTIME je 5f cghi %r2,__CLOCK_THREAD_CPUTIME_ID je 9f cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ je 9f + cghi %r2,__CLOCK_MONOTONIC_COARSE + je 3f cghi %r2,__CLOCK_MONOTONIC jne 12f @@ -54,6 +58,26 @@ __kernel_clock_gettime: lghi %r2,0 br %r14 + /* CLOCK_MONOTONIC_COARSE */ +3: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 3b + lg %r0,__VDSO_WTOM_CRS_SEC(%r5) + lg %r1,__VDSO_WTOM_CRS_NSEC(%r5) + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 3b + j 2b + + /* CLOCK_REALTIME_COARSE */ +4: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 4b + lg %r0,__VDSO_XTIME_CRS_SEC(%r5) + lg %r1,__VDSO_XTIME_CRS_NSEC(%r5) + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 4b + j 7f + /* CLOCK_REALTIME */ 5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ -- cgit v1.2.3 From 0f1b1ff54b386926ef1a524e60ef89ae7738bbd5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 14 Aug 2014 16:06:02 +0200 Subject: s390: pass march flag to assembly files as well Currently the march flag gets only passed to C files, but not to assembler files. This means that we can't add new instructions like e.g. aghik to asm files, since the assembler doesn't know of the new instructions if the appropriate march flag isn't specified. So also pass the march flag when compiling assembler files as well. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 874e6d6e9c5f..878e67973151 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -35,13 +35,16 @@ endif export LD_BFD -cflags-$(CONFIG_MARCH_G5) += -march=g5 -cflags-$(CONFIG_MARCH_Z900) += -march=z900 -cflags-$(CONFIG_MARCH_Z990) += -march=z990 -cflags-$(CONFIG_MARCH_Z9_109) += -march=z9-109 -cflags-$(CONFIG_MARCH_Z10) += -march=z10 -cflags-$(CONFIG_MARCH_Z196) += -march=z196 -cflags-$(CONFIG_MARCH_ZEC12) += -march=zEC12 +mflags-$(CONFIG_MARCH_G5) := -march=g5 +mflags-$(CONFIG_MARCH_Z900) := -march=z900 +mflags-$(CONFIG_MARCH_Z990) := -march=z990 +mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109 +mflags-$(CONFIG_MARCH_Z10) := -march=z10 +mflags-$(CONFIG_MARCH_Z196) := -march=z196 +mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12 + +aflags-y += $(mflags-y) +cflags-y += $(mflags-y) cflags-$(CONFIG_MARCH_G5_TUNE) += -mtune=g5 cflags-$(CONFIG_MARCH_Z900_TUNE) += -mtune=z900 -- cgit v1.2.3 From 2481a87b0250bbf429fc8cdc78331efbc44a0221 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 15 Aug 2014 12:33:46 +0200 Subject: s390/ftrace: optimize function graph caller code When the function graph tracer is disabled we can skip three additional instructions. So let's just do this. So if function tracing is enabled but function graph tracing is runtime disabled, we get away with a single unconditional branch. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ftrace.h | 1 + arch/s390/kernel/ftrace.c | 24 ++++++++++++++++++++++++ arch/s390/kernel/mcount64.S | 15 +++++++++------ 3 files changed, 34 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index bf246dae1367..7b8e456d76c9 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -4,6 +4,7 @@ #ifndef __ASSEMBLY__ extern void _mcount(void); +extern char ftrace_graph_caller_end; struct dyn_arch_ftrace { }; diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 54d6493c4a56..de55efa5b64e 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -170,6 +170,29 @@ out: * directly after the instructions. To enable the call we calculate * the original offset to prepare_ftrace_return and put it back. */ + +#ifdef CONFIG_64BIT + +int ftrace_enable_ftrace_graph_caller(void) +{ + static unsigned short offset = 0x0002; + + return probe_kernel_write((void *) ftrace_graph_caller + 2, + &offset, sizeof(offset)); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + unsigned short offset; + + offset = ((void *) &ftrace_graph_caller_end - + (void *) ftrace_graph_caller) / 2; + return probe_kernel_write((void *) ftrace_graph_caller + 2, + &offset, sizeof(offset)); +} + +#else /* CONFIG_64BIT */ + int ftrace_enable_ftrace_graph_caller(void) { unsigned short offset; @@ -188,5 +211,6 @@ int ftrace_disable_ftrace_graph_caller(void) &offset, sizeof(offset)); } +#endif /* CONFIG_64BIT */ #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S index c67a8bf0fd9a..5b33c83adde9 100644 --- a/arch/s390/kernel/mcount64.S +++ b/arch/s390/kernel/mcount64.S @@ -32,14 +32,17 @@ ENTRY(ftrace_caller) lg %r14,0(%r14) basr %r14,%r14 #ifdef CONFIG_FUNCTION_GRAPH_TRACER +# The j instruction gets runtime patched to a nop instruction. +# See ftrace_enable_ftrace_graph_caller. The patched instruction is: +# j .+4 +ENTRY(ftrace_graph_caller) + j ftrace_graph_caller_end lg %r2,168(%r15) lg %r3,272(%r15) -ENTRY(ftrace_graph_caller) -# The bras instruction gets runtime patched to call prepare_ftrace_return. -# See ftrace_enable_ftrace_graph_caller. The patched instruction is: -# bras %r14,prepare_ftrace_return - bras %r14,0f -0: stg %r2,168(%r15) + brasl %r14,prepare_ftrace_return + stg %r2,168(%r15) +ftrace_graph_caller_end: + .globl ftrace_graph_caller_end #endif aghi %r15,160 lmg %r2,%r5,32(%r15) -- cgit v1.2.3 From 10dec7dbd50ab0be96dda085d625d54ce800e426 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 15 Aug 2014 13:01:46 +0200 Subject: s390/ftrace: add HAVE_DYNAMIC_FTRACE_WITH_REGS support This code is based on a patch from Vojtech Pavlik. http://marc.info/?l=linux-s390&m=140438885114413&w=2 The actual implementation now differs significantly: Instead of adding a second function "ftrace_regs_caller" which would be nearly identical to the existing ftrace_caller function, the current ftrace_caller function is now an alias to ftrace_regs_caller and always passes the needed pt_regs structure and function_trace_op parameters unconditionally. Besides that also use asm offsets to correctly allocate and access the new struct pt_regs on the stack. While at it we can make use of new instruction to get rid of some indirect loads if compiled for new machines. The passed struct pt_regs can be changed by the called function and it's new contents will replace the current contents. Note: to change the return address the embedded psw member of the pt_regs structure must be changed. The psw member is right now incomplete, since the mask part is missing. For all current use cases this should be sufficent. Providing and restoring a sane mask would mean we need to add an epsw/lpswe pair to the mcount code. Only these two instruction would cost us ~120 cycles which currently seems not necessary. Cc: Vojtech Pavlik Cc: Jiri Kosina Cc: Jiri Slaby Cc: Steven Rostedt Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + arch/s390/include/asm/ftrace.h | 4 ++++ arch/s390/kernel/ftrace.c | 7 +++++++ arch/s390/kernel/mcount64.S | 43 ++++++++++++++++++++++++++++++------------ 4 files changed, 43 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 05c78bb5f570..3f845fc02ac4 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -114,6 +114,7 @@ config S390 select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS if 64BIT select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 7b8e456d76c9..1759d73fb95b 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -24,4 +24,8 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) #define MCOUNT_INSN_SIZE 22 #endif +#ifdef CONFIG_64BIT +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#endif + #endif /* _ASM_S390_FTRACE_H */ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index de55efa5b64e..14b61954d5a8 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -107,6 +107,13 @@ asm( #endif /* CONFIG_64BIT */ +#ifdef CONFIG_64BIT +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + return 0; +} +#endif int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S index 5b33c83adde9..4a65dabae851 100644 --- a/arch/s390/kernel/mcount64.S +++ b/arch/s390/kernel/mcount64.S @@ -8,28 +8,47 @@ #include #include #include +#include .section .kprobes.text, "ax" ENTRY(ftrace_stub) br %r14 +#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) +#define STACK_PARENT_IP (STACK_FRAME_SIZE + 8) +#define STACK_PTREGS (STACK_FRAME_OVERHEAD) +#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) +#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) + ENTRY(_mcount) #ifdef CONFIG_DYNAMIC_FTRACE br %r14 ENTRY(ftrace_caller) + .globl ftrace_regs_caller + .set ftrace_regs_caller,ftrace_caller #endif - stmg %r2,%r5,32(%r15) - stg %r14,112(%r15) lgr %r1,%r15 - aghi %r15,-160 + aghi %r15,-STACK_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) + stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) + stmg %r0,%r13,STACK_PTREGS_GPRS(%r15) + stg %r14,(STACK_PTREGS_PSW+8)(%r15) +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + aghik %r2,%r14,-MCOUNT_INSN_SIZE + lgrl %r4,function_trace_op + lgrl %r14,ftrace_trace_function +#else lgr %r2,%r14 - lg %r3,168(%r15) aghi %r2,-MCOUNT_INSN_SIZE + larl %r4,function_trace_op + lg %r4,0(%r4) larl %r14,ftrace_trace_function lg %r14,0(%r14) +#endif + lg %r3,STACK_PARENT_IP(%r15) + la %r5,STACK_PTREGS(%r15) basr %r14,%r14 #ifdef CONFIG_FUNCTION_GRAPH_TRACER # The j instruction gets runtime patched to a nop instruction. @@ -37,16 +56,16 @@ ENTRY(ftrace_caller) # j .+4 ENTRY(ftrace_graph_caller) j ftrace_graph_caller_end - lg %r2,168(%r15) - lg %r3,272(%r15) + lg %r2,STACK_PARENT_IP(%r15) + lg %r3,(STACK_PTREGS_PSW+8)(%r15) brasl %r14,prepare_ftrace_return - stg %r2,168(%r15) + stg %r2,STACK_PARENT_IP(%r15) ftrace_graph_caller_end: .globl ftrace_graph_caller_end #endif - aghi %r15,160 - lmg %r2,%r5,32(%r15) - lg %r14,112(%r15) + lmg %r0,%r13,STACK_PTREGS_GPRS(%r15) + lg %r14,(STACK_PTREGS_PSW+8)(%r15) + aghi %r15,STACK_FRAME_SIZE br %r14 #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -54,10 +73,10 @@ ftrace_graph_caller_end: ENTRY(return_to_handler) stmg %r2,%r5,32(%r15) lgr %r1,%r15 - aghi %r15,-160 + aghi %r15,-STACK_FRAME_OVERHEAD stg %r1,__SF_BACKCHAIN(%r15) brasl %r14,ftrace_return_to_handler - aghi %r15,160 + aghi %r15,STACK_FRAME_OVERHEAD lgr %r14,%r2 lmg %r2,%r5,32(%r15) br %r14 -- cgit v1.2.3 From 5d6a0163494c78ad7b6de733c8793e66b5da9212 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 15 Aug 2014 13:16:09 +0200 Subject: s390/ftrace: enforce DYNAMIC_FTRACE if FUNCTION_TRACER is selected We have too many combinations for function tracing. Lets simply stick to the most advanced option, so we don't have to care of other combinations. This means we always select DYNAMIC_FTRACE if FUNCTION_TRACER is selected. In the s390 Makefile also remove CONFIG_FTRACE_SYSCALLS since that functionality got moved to architecture independent code in the meantime. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + arch/s390/kernel/Makefile | 4 +--- arch/s390/kernel/ftrace.c | 6 ------ arch/s390/kernel/mcount.S | 2 -- arch/s390/kernel/mcount64.S | 2 -- 5 files changed, 2 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3f845fc02ac4..47492fc692f4 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -97,6 +97,7 @@ config S390 select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS2 + select DYNAMIC_FTRACE if FUNCTION_TRACER select GENERIC_CLOCKEVENTS select GENERIC_CPU_DEVICES if !SMP select GENERIC_FIND_FIRST_BIT diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index a95c4ca99617..d44245d4df37 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -53,9 +53,7 @@ obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y) obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) -obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o ifdef CONFIG_64BIT diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 14b61954d5a8..f908e42e11c4 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -17,8 +17,6 @@ #include #include "entry.h" -#ifdef CONFIG_DYNAMIC_FTRACE - void ftrace_disable_code(void); void ftrace_enable_insn(void); @@ -142,8 +140,6 @@ int __init ftrace_dyn_arch_init(void) return 0; } -#endif /* CONFIG_DYNAMIC_FTRACE */ - #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* * Hook the return address and push it in the stack of return addresses @@ -169,7 +165,6 @@ out: return parent; } -#ifdef CONFIG_DYNAMIC_FTRACE /* * Patch the kernel code at ftrace_graph_caller location. The instruction * there is branch relative and save to prepare_ftrace_return. To disable @@ -219,5 +214,4 @@ int ftrace_disable_ftrace_graph_caller(void) } #endif /* CONFIG_64BIT */ -#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 433c6dbfa442..be6dbd9a81a7 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -15,11 +15,9 @@ ENTRY(ftrace_stub) br %r14 ENTRY(_mcount) -#ifdef CONFIG_DYNAMIC_FTRACE br %r14 ENTRY(ftrace_caller) -#endif stm %r2,%r5,16(%r15) bras %r1,1f 0: .long ftrace_trace_function diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S index 4a65dabae851..8cf976f83a10 100644 --- a/arch/s390/kernel/mcount64.S +++ b/arch/s390/kernel/mcount64.S @@ -22,13 +22,11 @@ ENTRY(ftrace_stub) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) ENTRY(_mcount) -#ifdef CONFIG_DYNAMIC_FTRACE br %r14 ENTRY(ftrace_caller) .globl ftrace_regs_caller .set ftrace_regs_caller,ftrace_caller -#endif lgr %r1,%r15 aghi %r15,-STACK_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) -- cgit v1.2.3 From ea2f47699082b971769be8b8f38c08b49219f471 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 3 Sep 2014 10:37:40 +0200 Subject: s390/kprobes: remove unused jprobe_return_end() Even if it has a __used annotation it is actually unused. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/kprobes.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index bc71a7b95af5..c48a00c9f351 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -789,11 +789,6 @@ void __kprobes jprobe_return(void) asm volatile(".word 0x0002"); } -static void __used __kprobes jprobe_return_end(void) -{ - asm volatile("bcr 0,0"); -} - int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); -- cgit v1.2.3 From 3d1e220d08c6a00ffa83d39030b8162f66665b2b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 3 Sep 2014 13:26:23 +0200 Subject: s390/ftrace: optimize mcount code Reduce the number of executed instructions within the mcount block if function tracing is enabled. We achieve that by using a non-standard C function call ABI. Since the called function is also written in assembler this is not a problem. This also allows to replace the unconditional store at the beginning of the mcount block with a larl instruction, which doesn't touch memory. In theory we could also patch the first instruction of the mcount block to enable and disable function tracing. However this would break kprobes. This could be fixed with implementing the "kprobes_on_ftrace" feature; however keeping the odd jprobes working seems not to be possible without a lot of code churn. Therefore keep the code easy and simply accept one wasted 1-cycle "larl" instruction per function prologue. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ftrace.h | 2 +- arch/s390/kernel/ftrace.c | 57 +++++++++++++++++++++++++++++------------- arch/s390/kernel/mcount64.S | 30 +++++++++++----------- 3 files changed, 55 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 1759d73fb95b..d419362dc231 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -19,7 +19,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) #endif /* __ASSEMBLY__ */ #ifdef CONFIG_64BIT -#define MCOUNT_INSN_SIZE 12 +#define MCOUNT_INSN_SIZE 18 #else #define MCOUNT_INSN_SIZE 22 #endif diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index f908e42e11c4..fcb009d3edde 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -1,7 +1,7 @@ /* * Dynamic function tracer architecture backend. * - * Copyright IBM Corp. 2009 + * Copyright IBM Corp. 2009,2014 * * Author(s): Heiko Carstens , * Martin Schwidefsky @@ -17,6 +17,7 @@ #include #include "entry.h" +void mcount_replace_code(void); void ftrace_disable_code(void); void ftrace_enable_insn(void); @@ -24,38 +25,50 @@ void ftrace_enable_insn(void); /* * The 64-bit mcount code looks like this: * stg %r14,8(%r15) # offset 0 - * > larl %r1,<&counter> # offset 6 - * > brasl %r14,_mcount # offset 12 + * larl %r1,<&counter> # offset 6 + * brasl %r14,_mcount # offset 12 * lg %r14,8(%r15) # offset 18 - * Total length is 24 bytes. The middle two instructions of the mcount - * block get overwritten by ftrace_make_nop / ftrace_make_call. + * Total length is 24 bytes. The complete mcount block initially gets replaced + * by ftrace_make_nop. Subsequent calls to ftrace_make_call / ftrace_make_nop + * only patch the jg/lg instruction within the block. + * Note: we do not patch the first instruction to an unconditional branch, + * since that would break kprobes/jprobes. It is easier to leave the larl + * instruction in and only modify the second instruction. * The 64-bit enabled ftrace code block looks like this: - * stg %r14,8(%r15) # offset 0 + * larl %r0,.+24 # offset 0 * > lg %r1,__LC_FTRACE_FUNC # offset 6 - * > lgr %r0,%r0 # offset 12 - * > basr %r14,%r1 # offset 16 - * lg %r14,8(%15) # offset 18 - * The return points of the mcount/ftrace function have the same offset 18. - * The 64-bit disable ftrace code block looks like this: - * stg %r14,8(%r15) # offset 0 + * br %r1 # offset 12 + * brcl 0,0 # offset 14 + * brc 0,0 # offset 20 + * The ftrace function gets called with a non-standard C function call ABI + * where r0 contains the return address. It is also expected that the called + * function only clobbers r0 and r1, but restores r2-r15. + * The return point of the ftrace function has offset 24, so execution + * continues behind the mcount block. + * larl %r0,.+24 # offset 0 * > jg .+18 # offset 6 - * > lgr %r0,%r0 # offset 12 - * > basr %r14,%r1 # offset 16 - * lg %r14,8(%15) # offset 18 + * br %r1 # offset 12 + * brcl 0,0 # offset 14 + * brc 0,0 # offset 20 * The jg instruction branches to offset 24 to skip as many instructions * as possible. */ asm( " .align 4\n" + "mcount_replace_code:\n" + " larl %r0,0f\n" "ftrace_disable_code:\n" " jg 0f\n" - " lgr %r0,%r0\n" - " basr %r14,%r1\n" + " br %r1\n" + " brcl 0,0\n" + " brc 0,0\n" "0:\n" " .align 4\n" "ftrace_enable_insn:\n" " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n"); +#define MCOUNT_BLOCK_SIZE 24 +#define MCOUNT_INSN_OFFSET 6 #define FTRACE_INSN_SIZE 6 #else /* CONFIG_64BIT */ @@ -116,6 +129,16 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { +#ifdef CONFIG_64BIT + /* Initial replacement of the whole mcount block */ + if (addr == MCOUNT_ADDR) { + if (probe_kernel_write((void *) rec->ip - MCOUNT_INSN_OFFSET, + mcount_replace_code, + MCOUNT_BLOCK_SIZE)) + return -EPERM; + return 0; + } +#endif if (probe_kernel_write((void *) rec->ip, ftrace_disable_code, MCOUNT_INSN_SIZE)) return -EPERM; diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S index 8cf976f83a10..07abe8d464d4 100644 --- a/arch/s390/kernel/mcount64.S +++ b/arch/s390/kernel/mcount64.S @@ -16,7 +16,6 @@ ENTRY(ftrace_stub) br %r14 #define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) -#define STACK_PARENT_IP (STACK_FRAME_SIZE + 8) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) @@ -31,40 +30,39 @@ ENTRY(ftrace_caller) aghi %r15,-STACK_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) - stmg %r0,%r13,STACK_PTREGS_GPRS(%r15) - stg %r14,(STACK_PTREGS_PSW+8)(%r15) + stg %r0,(STACK_PTREGS_PSW+8)(%r15) + stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - aghik %r2,%r14,-MCOUNT_INSN_SIZE + aghik %r2,%r0,-MCOUNT_INSN_SIZE lgrl %r4,function_trace_op - lgrl %r14,ftrace_trace_function + lgrl %r1,ftrace_trace_function #else - lgr %r2,%r14 + lgr %r2,%r0 aghi %r2,-MCOUNT_INSN_SIZE larl %r4,function_trace_op lg %r4,0(%r4) - larl %r14,ftrace_trace_function - lg %r14,0(%r14) + larl %r1,ftrace_trace_function + lg %r1,0(%r1) #endif - lg %r3,STACK_PARENT_IP(%r15) + lgr %r3,%r14 la %r5,STACK_PTREGS(%r15) - basr %r14,%r14 + basr %r14,%r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER # The j instruction gets runtime patched to a nop instruction. # See ftrace_enable_ftrace_graph_caller. The patched instruction is: # j .+4 ENTRY(ftrace_graph_caller) j ftrace_graph_caller_end - lg %r2,STACK_PARENT_IP(%r15) + lg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) lg %r3,(STACK_PTREGS_PSW+8)(%r15) brasl %r14,prepare_ftrace_return - stg %r2,STACK_PARENT_IP(%r15) + stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) ftrace_graph_caller_end: .globl ftrace_graph_caller_end #endif - lmg %r0,%r13,STACK_PTREGS_GPRS(%r15) - lg %r14,(STACK_PTREGS_PSW+8)(%r15) - aghi %r15,STACK_FRAME_SIZE - br %r14 + lg %r1,(STACK_PTREGS_PSW+8)(%r15) + lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15) + br %r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER -- cgit v1.2.3 From 442302820356977237e32a76a211e7942255003a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 8 Sep 2014 08:20:43 +0200 Subject: s390/spinlock: optimize spin_unlock code Use a memory barrier + store sequence instead of a load + compare and swap sequence to unlock a spinlock and an rw lock. For the spinlock case this saves us two memory reads and a not needed cpu serialization after the compare and swap instruction stored the new value. The kernel size (performance_defconfig) gets reduced by ~14k. Average execution time of a tight inlined spin_unlock loop drops from 5.8ns to 0.7ns on a zEC12 machine. An artificial stress test case where several counters are protected with a single spinlock and which are only incremented while holding the spinlock shows ~30% improvement on a 4 cpu machine. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/barrier.h | 6 ++++-- arch/s390/include/asm/spinlock.h | 21 ++++++++++++++------- 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 19ff956b752b..b5dce6544d76 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -15,11 +15,13 @@ #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES /* Fast-BCR without checkpoint synchronization */ -#define mb() do { asm volatile("bcr 14,0" : : : "memory"); } while (0) +#define __ASM_BARRIER "bcr 14,0\n" #else -#define mb() do { asm volatile("bcr 15,0" : : : "memory"); } while (0) +#define __ASM_BARRIER "bcr 15,0\n" #endif +#define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0) + #define rmb() mb() #define wmb() mb() #define read_barrier_depends() do { } while(0) diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 96879f7ad6da..d26ad2ac7cb2 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -64,11 +64,6 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp) _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL)); } -static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp) -{ - return _raw_compare_and_swap(&lp->lock, SPINLOCK_LOCKVAL, 0); -} - static inline void arch_spin_lock(arch_spinlock_t *lp) { if (!arch_spin_trylock_once(lp)) @@ -91,7 +86,13 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp) static inline void arch_spin_unlock(arch_spinlock_t *lp) { - arch_spin_tryrelease_once(lp); + typecheck(unsigned int, lp->lock); + asm volatile( + __ASM_BARRIER + "st %1,%0\n" + : "+Q" (lp->lock) + : "d" (0) + : "cc", "memory"); } static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) @@ -179,7 +180,13 @@ static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) static inline void arch_write_unlock(arch_rwlock_t *rw) { - _raw_compare_and_swap(&rw->lock, 0x80000000, 0); + typecheck(unsigned int, rw->lock); + asm volatile( + __ASM_BARRIER + "st %1,%0\n" + : "+Q" (rw->lock) + : "d" (0) + : "cc", "memory"); } static inline int arch_read_trylock(arch_rwlock_t *rw) -- cgit v1.2.3 From 3d598f47e804a77208c6bb0a454123018e2f2281 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 19 Aug 2014 20:29:12 +0300 Subject: dmaengine: dw: move dw_dmac.h to where it belongs to There is a common storage for platform data related structures and definitions inside kernel source tree. The patch moves file from include/linux to include/linux/platform_data and renames it acoordingly. The users are also updated. Signed-off-by: Andy Shevchenko Acked-by: Viresh Kumar [For the arch/avr32/.* and .*sound/atmel.*] Acked-by: Hans-Christian Egtvedt Signed-off-by: Vinod Koul --- MAINTAINERS | 2 +- arch/avr32/mach-at32ap/at32ap700x.c | 2 +- arch/avr32/mach-at32ap/include/mach/atmel-mci.h | 2 +- drivers/dma/dw/internal.h | 2 +- drivers/dma/dw/regs.h | 6 +- include/linux/dw_dmac.h | 111 ------------------------ include/linux/platform_data/dma-dw.h | 111 ++++++++++++++++++++++++ include/sound/atmel-abdac.h | 2 +- include/sound/atmel-ac97c.h | 2 +- sound/atmel/abdac.c | 2 +- sound/atmel/ac97c.c | 2 +- 11 files changed, 122 insertions(+), 122 deletions(-) delete mode 100644 include/linux/dw_dmac.h create mode 100644 include/linux/platform_data/dma-dw.h (limited to 'arch') diff --git a/MAINTAINERS b/MAINTAINERS index aefa94841ff3..a10072963889 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7875,7 +7875,7 @@ SYNOPSYS DESIGNWARE DMAC DRIVER M: Viresh Kumar M: Andy Shevchenko S: Maintained -F: include/linux/dw_dmac.h +F: include/linux/platform_data/dma-dw.h F: drivers/dma/dw/ SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index db85b5ec3351..2a1aa712c6e7 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -7,7 +7,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/arch/avr32/mach-at32ap/include/mach/atmel-mci.h b/arch/avr32/mach-at32ap/include/mach/atmel-mci.h index 4bba58561d5c..11d7f4b28dc8 100644 --- a/arch/avr32/mach-at32ap/include/mach/atmel-mci.h +++ b/arch/avr32/mach-at32ap/include/mach/atmel-mci.h @@ -1,7 +1,7 @@ #ifndef __MACH_ATMEL_MCI_H #define __MACH_ATMEL_MCI_H -#include +#include /** * struct mci_dma_data - DMA data for MCI interface diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h index 32667f9e0dda..43cc1dfad5c9 100644 --- a/drivers/dma/dw/internal.h +++ b/drivers/dma/dw/internal.h @@ -12,7 +12,7 @@ #define _DW_DMAC_INTERNAL_H #include -#include +#include #include "regs.h" diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index bb98d3e91e8b..af02439155e9 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -11,7 +11,7 @@ #include #include -#include +#include #define DW_DMA_MAX_NR_CHANNELS 8 #define DW_DMA_MAX_NR_REQUESTS 16 @@ -161,7 +161,7 @@ struct dw_dma_regs { #define DWC_CTLH_DONE 0x00001000 #define DWC_CTLH_BLOCK_TS_MASK 0x00000fff -/* Bitfields in CFG_LO. Platform-configurable bits are in */ +/* Bitfields in CFG_LO. Platform-configurable bits are in */ #define DWC_CFGL_CH_PRIOR_MASK (0x7 << 5) /* priority mask */ #define DWC_CFGL_CH_PRIOR(x) ((x) << 5) /* priority */ #define DWC_CFGL_CH_SUSP (1 << 8) /* pause xfer */ @@ -172,7 +172,7 @@ struct dw_dma_regs { #define DWC_CFGL_RELOAD_SAR (1 << 30) #define DWC_CFGL_RELOAD_DAR (1 << 31) -/* Bitfields in CFG_HI. Platform-configurable bits are in */ +/* Bitfields in CFG_HI. Platform-configurable bits are in */ #define DWC_CFGH_DS_UPD_EN (1 << 5) #define DWC_CFGH_SS_UPD_EN (1 << 6) diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h deleted file mode 100644 index 68b4024184de..000000000000 --- a/include/linux/dw_dmac.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Driver for the Synopsys DesignWare DMA Controller - * - * Copyright (C) 2007 Atmel Corporation - * Copyright (C) 2010-2011 ST Microelectronics - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef DW_DMAC_H -#define DW_DMAC_H - -#include - -/** - * struct dw_dma_slave - Controller-specific information about a slave - * - * @dma_dev: required DMA master device. Depricated. - * @bus_id: name of this device channel, not just a device name since - * devices may have more than one channel e.g. "foo_tx" - * @cfg_hi: Platform-specific initializer for the CFG_HI register - * @cfg_lo: Platform-specific initializer for the CFG_LO register - * @src_master: src master for transfers on allocated channel. - * @dst_master: dest master for transfers on allocated channel. - */ -struct dw_dma_slave { - struct device *dma_dev; - u32 cfg_hi; - u32 cfg_lo; - u8 src_master; - u8 dst_master; -}; - -/** - * struct dw_dma_platform_data - Controller configuration parameters - * @nr_channels: Number of channels supported by hardware (max 8) - * @is_private: The device channels should be marked as private and not for - * by the general purpose DMA channel allocator. - * @chan_allocation_order: Allocate channels starting from 0 or 7 - * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0. - * @block_size: Maximum block size supported by the controller - * @nr_masters: Number of AHB masters supported by the controller - * @data_width: Maximum data width supported by hardware per AHB master - * (0 - 8bits, 1 - 16bits, ..., 5 - 256bits) - */ -struct dw_dma_platform_data { - unsigned int nr_channels; - bool is_private; -#define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ -#define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */ - unsigned char chan_allocation_order; -#define CHAN_PRIORITY_ASCENDING 0 /* chan0 highest */ -#define CHAN_PRIORITY_DESCENDING 1 /* chan7 highest */ - unsigned char chan_priority; - unsigned short block_size; - unsigned char nr_masters; - unsigned char data_width[4]; -}; - -/* bursts size */ -enum dw_dma_msize { - DW_DMA_MSIZE_1, - DW_DMA_MSIZE_4, - DW_DMA_MSIZE_8, - DW_DMA_MSIZE_16, - DW_DMA_MSIZE_32, - DW_DMA_MSIZE_64, - DW_DMA_MSIZE_128, - DW_DMA_MSIZE_256, -}; - -/* Platform-configurable bits in CFG_HI */ -#define DWC_CFGH_FCMODE (1 << 0) -#define DWC_CFGH_FIFO_MODE (1 << 1) -#define DWC_CFGH_PROTCTL(x) ((x) << 2) -#define DWC_CFGH_SRC_PER(x) ((x) << 7) -#define DWC_CFGH_DST_PER(x) ((x) << 11) - -/* Platform-configurable bits in CFG_LO */ -#define DWC_CFGL_LOCK_CH_XFER (0 << 12) /* scope of LOCK_CH */ -#define DWC_CFGL_LOCK_CH_BLOCK (1 << 12) -#define DWC_CFGL_LOCK_CH_XACT (2 << 12) -#define DWC_CFGL_LOCK_BUS_XFER (0 << 14) /* scope of LOCK_BUS */ -#define DWC_CFGL_LOCK_BUS_BLOCK (1 << 14) -#define DWC_CFGL_LOCK_BUS_XACT (2 << 14) -#define DWC_CFGL_LOCK_CH (1 << 15) /* channel lockout */ -#define DWC_CFGL_LOCK_BUS (1 << 16) /* busmaster lockout */ -#define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */ -#define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */ - -/* DMA API extensions */ -struct dw_cyclic_desc { - struct dw_desc **desc; - unsigned long periods; - void (*period_callback)(void *param); - void *period_callback_param; -}; - -struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, - dma_addr_t buf_addr, size_t buf_len, size_t period_len, - enum dma_transfer_direction direction); -void dw_dma_cyclic_free(struct dma_chan *chan); -int dw_dma_cyclic_start(struct dma_chan *chan); -void dw_dma_cyclic_stop(struct dma_chan *chan); - -dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan); - -dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan); - -#endif /* DW_DMAC_H */ diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h new file mode 100644 index 000000000000..68b4024184de --- /dev/null +++ b/include/linux/platform_data/dma-dw.h @@ -0,0 +1,111 @@ +/* + * Driver for the Synopsys DesignWare DMA Controller + * + * Copyright (C) 2007 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef DW_DMAC_H +#define DW_DMAC_H + +#include + +/** + * struct dw_dma_slave - Controller-specific information about a slave + * + * @dma_dev: required DMA master device. Depricated. + * @bus_id: name of this device channel, not just a device name since + * devices may have more than one channel e.g. "foo_tx" + * @cfg_hi: Platform-specific initializer for the CFG_HI register + * @cfg_lo: Platform-specific initializer for the CFG_LO register + * @src_master: src master for transfers on allocated channel. + * @dst_master: dest master for transfers on allocated channel. + */ +struct dw_dma_slave { + struct device *dma_dev; + u32 cfg_hi; + u32 cfg_lo; + u8 src_master; + u8 dst_master; +}; + +/** + * struct dw_dma_platform_data - Controller configuration parameters + * @nr_channels: Number of channels supported by hardware (max 8) + * @is_private: The device channels should be marked as private and not for + * by the general purpose DMA channel allocator. + * @chan_allocation_order: Allocate channels starting from 0 or 7 + * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0. + * @block_size: Maximum block size supported by the controller + * @nr_masters: Number of AHB masters supported by the controller + * @data_width: Maximum data width supported by hardware per AHB master + * (0 - 8bits, 1 - 16bits, ..., 5 - 256bits) + */ +struct dw_dma_platform_data { + unsigned int nr_channels; + bool is_private; +#define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ +#define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */ + unsigned char chan_allocation_order; +#define CHAN_PRIORITY_ASCENDING 0 /* chan0 highest */ +#define CHAN_PRIORITY_DESCENDING 1 /* chan7 highest */ + unsigned char chan_priority; + unsigned short block_size; + unsigned char nr_masters; + unsigned char data_width[4]; +}; + +/* bursts size */ +enum dw_dma_msize { + DW_DMA_MSIZE_1, + DW_DMA_MSIZE_4, + DW_DMA_MSIZE_8, + DW_DMA_MSIZE_16, + DW_DMA_MSIZE_32, + DW_DMA_MSIZE_64, + DW_DMA_MSIZE_128, + DW_DMA_MSIZE_256, +}; + +/* Platform-configurable bits in CFG_HI */ +#define DWC_CFGH_FCMODE (1 << 0) +#define DWC_CFGH_FIFO_MODE (1 << 1) +#define DWC_CFGH_PROTCTL(x) ((x) << 2) +#define DWC_CFGH_SRC_PER(x) ((x) << 7) +#define DWC_CFGH_DST_PER(x) ((x) << 11) + +/* Platform-configurable bits in CFG_LO */ +#define DWC_CFGL_LOCK_CH_XFER (0 << 12) /* scope of LOCK_CH */ +#define DWC_CFGL_LOCK_CH_BLOCK (1 << 12) +#define DWC_CFGL_LOCK_CH_XACT (2 << 12) +#define DWC_CFGL_LOCK_BUS_XFER (0 << 14) /* scope of LOCK_BUS */ +#define DWC_CFGL_LOCK_BUS_BLOCK (1 << 14) +#define DWC_CFGL_LOCK_BUS_XACT (2 << 14) +#define DWC_CFGL_LOCK_CH (1 << 15) /* channel lockout */ +#define DWC_CFGL_LOCK_BUS (1 << 16) /* busmaster lockout */ +#define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */ +#define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */ + +/* DMA API extensions */ +struct dw_cyclic_desc { + struct dw_desc **desc; + unsigned long periods; + void (*period_callback)(void *param); + void *period_callback_param; +}; + +struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, + dma_addr_t buf_addr, size_t buf_len, size_t period_len, + enum dma_transfer_direction direction); +void dw_dma_cyclic_free(struct dma_chan *chan); +int dw_dma_cyclic_start(struct dma_chan *chan); +void dw_dma_cyclic_stop(struct dma_chan *chan); + +dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan); + +dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan); + +#endif /* DW_DMAC_H */ diff --git a/include/sound/atmel-abdac.h b/include/sound/atmel-abdac.h index edff6a8ba1b5..a8f735d677fa 100644 --- a/include/sound/atmel-abdac.h +++ b/include/sound/atmel-abdac.h @@ -10,7 +10,7 @@ #ifndef __INCLUDE_SOUND_ATMEL_ABDAC_H #define __INCLUDE_SOUND_ATMEL_ABDAC_H -#include +#include /** * struct atmel_abdac_pdata - board specific ABDAC configuration diff --git a/include/sound/atmel-ac97c.h b/include/sound/atmel-ac97c.h index 00e6c289a936..f2a1cdc37661 100644 --- a/include/sound/atmel-ac97c.h +++ b/include/sound/atmel-ac97c.h @@ -10,7 +10,7 @@ #ifndef __INCLUDE_SOUND_ATMEL_AC97C_H #define __INCLUDE_SOUND_ATMEL_AC97C_H -#include +#include #define AC97C_CAPTURE 0x01 #define AC97C_PLAYBACK 0x02 diff --git a/sound/atmel/abdac.c b/sound/atmel/abdac.c index edf2ca72d518..154a7c44e38d 100644 --- a/sound/atmel/abdac.c +++ b/sound/atmel/abdac.c @@ -9,7 +9,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/sound/atmel/ac97c.c b/sound/atmel/ac97c.c index a04d23174dc2..1dfb35afef8f 100644 --- a/sound/atmel/ac97c.c +++ b/sound/atmel/ac97c.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include -- cgit v1.2.3 From 61c4319cb523a2346416cddaa7d4e2f9260c14d9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 19 Aug 2014 20:29:13 +0300 Subject: avr32: at32ap700x: don't rely on default DMA masters In future we are going to remove the defaults of AHB masters from dw_dmac driver. It means each user have to supply proper source and destination masters by itself explicitly. Signed-off-by: Andy Shevchenko Acked-by: Hans-Christian Egtvedt Signed-off-by: Vinod Koul --- arch/avr32/mach-at32ap/at32ap700x.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index 2a1aa712c6e7..ec7be287a97e 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -1360,6 +1360,8 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data) | DWC_CFGH_DST_PER(1)); slave->sdata.cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL); + slave->sdata.src_master = 1; + slave->sdata.dst_master = 0; data->dma_slave = slave; -- cgit v1.2.3 From 7e1e2f27c5508518e58e5cbb11e26cbb815f4c56 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 19 Aug 2014 20:29:14 +0300 Subject: dmaengine: dw: convert dw_dma_slave to use explicit HS interfaces Instead of exposing the possibility to set DMA registers CFG_HI and CFG_LO strict user to provide handshake interfaces explicitly. Signed-off-by: Andy Shevchenko Acked-by: Hans-Christian Egtvedt Signed-off-by: Vinod Koul --- arch/avr32/mach-at32ap/at32ap700x.c | 15 +++++---------- drivers/dma/dw/core.c | 4 ++-- include/linux/platform_data/dma-dw.h | 10 ++++------ 3 files changed, 11 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index ec7be287a97e..37b75602adf6 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -1356,10 +1356,8 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data) goto fail; slave->sdata.dma_dev = &dw_dmac0_device.dev; - slave->sdata.cfg_hi = (DWC_CFGH_SRC_PER(0) - | DWC_CFGH_DST_PER(1)); - slave->sdata.cfg_lo &= ~(DWC_CFGL_HS_DST_POL - | DWC_CFGL_HS_SRC_POL); + slave->sdata.src_id = 0; + slave->sdata.dst_id = 1; slave->sdata.src_master = 1; slave->sdata.dst_master = 0; @@ -2054,8 +2052,7 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data, /* Check if DMA slave interface for capture should be configured. */ if (flags & AC97C_CAPTURE) { rx_dws->dma_dev = &dw_dmac0_device.dev; - rx_dws->cfg_hi = DWC_CFGH_SRC_PER(3); - rx_dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL); + rx_dws->src_id = 3; rx_dws->src_master = 0; rx_dws->dst_master = 1; } @@ -2063,8 +2060,7 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data, /* Check if DMA slave interface for playback should be configured. */ if (flags & AC97C_PLAYBACK) { tx_dws->dma_dev = &dw_dmac0_device.dev; - tx_dws->cfg_hi = DWC_CFGH_DST_PER(4); - tx_dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL); + tx_dws->dst_id = 4; tx_dws->src_master = 0; tx_dws->dst_master = 1; } @@ -2136,8 +2132,7 @@ at32_add_device_abdac(unsigned int id, struct atmel_abdac_pdata *data) dws = &data->dws; dws->dma_dev = &dw_dmac0_device.dev; - dws->cfg_hi = DWC_CFGH_DST_PER(2); - dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL); + dws->dst_id = 2; dws->src_master = 0; dws->dst_master = 1; diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 1af731b83b3f..0a9c052d437c 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -155,8 +155,8 @@ static void dwc_initialize(struct dw_dma_chan *dwc) */ BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev); - cfghi = dws->cfg_hi; - cfglo |= dws->cfg_lo & ~DWC_CFGL_CH_PRIOR_MASK; + cfghi |= DWC_CFGH_DST_PER(dws->dst_id); + cfghi |= DWC_CFGH_SRC_PER(dws->src_id); } else { if (dwc->direction == DMA_MEM_TO_DEV) cfghi = DWC_CFGH_DST_PER(dwc->request_line); diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 68b4024184de..bc411a1bf8e7 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -17,17 +17,15 @@ * struct dw_dma_slave - Controller-specific information about a slave * * @dma_dev: required DMA master device. Depricated. - * @bus_id: name of this device channel, not just a device name since - * devices may have more than one channel e.g. "foo_tx" - * @cfg_hi: Platform-specific initializer for the CFG_HI register - * @cfg_lo: Platform-specific initializer for the CFG_LO register + * @src_id: src request line + * @dst_id: dst request line * @src_master: src master for transfers on allocated channel. * @dst_master: dest master for transfers on allocated channel. */ struct dw_dma_slave { struct device *dma_dev; - u32 cfg_hi; - u32 cfg_lo; + u8 src_id; + u8 dst_id; u8 src_master; u8 dst_master; }; -- cgit v1.2.3 From 1d46fea7d091f9dc2d4fd3fcb9f0117ca288f9a5 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 27 Aug 2014 00:42:56 +0200 Subject: drm/rcar-du: Use struct videomode in platform data In preparation for DT support where panel timings will be described by a DRM-agnostic video mode, replace the struct drm_mode_modeinfo instance in the panel platform data with a struct videomode. Signed-off-by: Laurent Pinchart --- arch/arm/mach-shmobile/board-koelsch-reference.c | 19 +++++++++---------- arch/arm/mach-shmobile/board-koelsch.c | 19 +++++++++---------- arch/arm/mach-shmobile/board-lager-reference.c | 19 +++++++++---------- arch/arm/mach-shmobile/board-lager.c | 19 +++++++++---------- arch/arm/mach-shmobile/board-marzen.c | 19 +++++++++---------- drivers/gpu/drm/rcar-du/Kconfig | 1 + drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c | 15 +++------------ include/linux/platform_data/rcar-du.h | 4 ++-- 8 files changed, 51 insertions(+), 64 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/board-koelsch-reference.c b/arch/arm/mach-shmobile/board-koelsch-reference.c index 3ff88c138896..364e69bf85d4 100644 --- a/arch/arm/mach-shmobile/board-koelsch-reference.c +++ b/arch/arm/mach-shmobile/board-koelsch-reference.c @@ -41,16 +41,15 @@ static struct rcar_du_encoder_data koelsch_du_encoders[] = { .width_mm = 210, .height_mm = 158, .mode = { - .clock = 65000, - .hdisplay = 1024, - .hsync_start = 1048, - .hsync_end = 1184, - .htotal = 1344, - .vdisplay = 768, - .vsync_start = 771, - .vsync_end = 777, - .vtotal = 806, - .flags = 0, + .pixelclock = 65000000, + .hactive = 1024, + .hfront_porch = 20, + .hback_porch = 160, + .hsync_len = 136, + .vactive = 768, + .vfront_porch = 3, + .vback_porch = 29, + .vsync_len = 6, }, }, }, diff --git a/arch/arm/mach-shmobile/board-koelsch.c b/arch/arm/mach-shmobile/board-koelsch.c index b7d5bc7659cd..ad10ddb6a321 100644 --- a/arch/arm/mach-shmobile/board-koelsch.c +++ b/arch/arm/mach-shmobile/board-koelsch.c @@ -63,16 +63,15 @@ static struct rcar_du_encoder_data koelsch_du_encoders[] = { .width_mm = 210, .height_mm = 158, .mode = { - .clock = 65000, - .hdisplay = 1024, - .hsync_start = 1048, - .hsync_end = 1184, - .htotal = 1344, - .vdisplay = 768, - .vsync_start = 771, - .vsync_end = 777, - .vtotal = 806, - .flags = 0, + .pixelclock = 65000000, + .hactive = 1024, + .hfront_porch = 20, + .hback_porch = 160, + .hsync_len = 136, + .vactive = 768, + .vfront_porch = 3, + .vback_porch = 29, + .vsync_len = 6, }, }, }, diff --git a/arch/arm/mach-shmobile/board-lager-reference.c b/arch/arm/mach-shmobile/board-lager-reference.c index 41c808e56005..12a53a1c3d02 100644 --- a/arch/arm/mach-shmobile/board-lager-reference.c +++ b/arch/arm/mach-shmobile/board-lager-reference.c @@ -43,16 +43,15 @@ static struct rcar_du_encoder_data lager_du_encoders[] = { .width_mm = 210, .height_mm = 158, .mode = { - .clock = 65000, - .hdisplay = 1024, - .hsync_start = 1048, - .hsync_end = 1184, - .htotal = 1344, - .vdisplay = 768, - .vsync_start = 771, - .vsync_end = 777, - .vtotal = 806, - .flags = 0, + .pixelclock = 65000000, + .hactive = 1024, + .hfront_porch = 20, + .hback_porch = 160, + .hsync_len = 136, + .vactive = 768, + .vfront_porch = 3, + .vback_porch = 29, + .vsync_len = 6, }, }, }, diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c index e1d8215da0b0..80576c2ee668 100644 --- a/arch/arm/mach-shmobile/board-lager.c +++ b/arch/arm/mach-shmobile/board-lager.c @@ -99,16 +99,15 @@ static struct rcar_du_encoder_data lager_du_encoders[] = { .width_mm = 210, .height_mm = 158, .mode = { - .clock = 65000, - .hdisplay = 1024, - .hsync_start = 1048, - .hsync_end = 1184, - .htotal = 1344, - .vdisplay = 768, - .vsync_start = 771, - .vsync_end = 777, - .vtotal = 806, - .flags = 0, + .pixelclock = 65000000, + .hactive = 1024, + .hfront_porch = 20, + .hback_porch = 160, + .hsync_len = 136, + .vactive = 768, + .vfront_porch = 3, + .vback_porch = 29, + .vsync_len = 6, }, }, }, diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c index e5cf4201e769..ce33d7825c49 100644 --- a/arch/arm/mach-shmobile/board-marzen.c +++ b/arch/arm/mach-shmobile/board-marzen.c @@ -192,16 +192,15 @@ static struct rcar_du_encoder_data du_encoders[] = { .width_mm = 210, .height_mm = 158, .mode = { - .clock = 65000, - .hdisplay = 1024, - .hsync_start = 1048, - .hsync_end = 1184, - .htotal = 1344, - .vdisplay = 768, - .vsync_start = 771, - .vsync_end = 777, - .vtotal = 806, - .flags = 0, + .pixelclock = 65000000, + .hactive = 1024, + .hfront_porch = 20, + .hback_porch = 160, + .hsync_len = 136, + .vactive = 768, + .vfront_porch = 3, + .vback_porch = 29, + .vsync_len = 6, }, }, }, diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig index 2e3d7b5b0ad7..c96f6089f8bf 100644 --- a/drivers/gpu/drm/rcar-du/Kconfig +++ b/drivers/gpu/drm/rcar-du/Kconfig @@ -6,6 +6,7 @@ config DRM_RCAR_DU select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER select DRM_KMS_FB_HELPER + select VIDEOMODE_HELPERS help Choose this option if you have an R-Car chipset. If M is selected the module will be called rcar-du-drm. diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c index cfcf6e74ad0a..d29544121658 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c @@ -40,18 +40,9 @@ static int rcar_du_lvds_connector_get_modes(struct drm_connector *connector) return 0; mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; - mode->clock = lvdscon->panel->mode.clock; - mode->hdisplay = lvdscon->panel->mode.hdisplay; - mode->hsync_start = lvdscon->panel->mode.hsync_start; - mode->hsync_end = lvdscon->panel->mode.hsync_end; - mode->htotal = lvdscon->panel->mode.htotal; - mode->vdisplay = lvdscon->panel->mode.vdisplay; - mode->vsync_start = lvdscon->panel->mode.vsync_start; - mode->vsync_end = lvdscon->panel->mode.vsync_end; - mode->vtotal = lvdscon->panel->mode.vtotal; - mode->flags = lvdscon->panel->mode.flags; - - drm_mode_set_name(mode); + + drm_display_mode_from_videomode(&lvdscon->panel->mode, mode); + drm_mode_probed_add(connector, mode); return 1; diff --git a/include/linux/platform_data/rcar-du.h b/include/linux/platform_data/rcar-du.h index 1a2e9901a22e..a5f045e1d8fe 100644 --- a/include/linux/platform_data/rcar-du.h +++ b/include/linux/platform_data/rcar-du.h @@ -14,7 +14,7 @@ #ifndef __RCAR_DU_H__ #define __RCAR_DU_H__ -#include +#include