diff options
144 files changed, 2160 insertions, 1248 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 560ecce38ff5..f1bc3dc6b369 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4292,10 +4292,7 @@ S: Maintained F: net/sched/sch_netem.c NETERION 10GbE DRIVERS (s2io/vxge) -M: Ramkrishna Vepa <ramkrishna.vepa@exar.com> -M: Sivakumar Subramani <sivakumar.subramani@exar.com> -M: Sreenivasa Honnur <sreenivasa.honnur@exar.com> -M: Jon Mason <jon.mason@exar.com> +M: Jon Mason <jdmason@kudzu.us> L: netdev@vger.kernel.org W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/Linux?Anonymous W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/X3100Linux?Anonymous @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 38 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Flesh-Eating Bats with Fangs # *DOCUMENTATION* diff --git a/arch/ia64/include/asm/xen/hypercall.h b/arch/ia64/include/asm/xen/hypercall.h index 96fc62366aa4..ed28bcd5bb85 100644 --- a/arch/ia64/include/asm/xen/hypercall.h +++ b/arch/ia64/include/asm/xen/hypercall.h @@ -107,7 +107,7 @@ extern unsigned long __hypercall(unsigned long a1, unsigned long a2, static inline int xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg) { - return _hypercall2(int, sched_op_new, cmd, arg); + return _hypercall2(int, sched_op, cmd, arg); } static inline long diff --git a/arch/ia64/xen/suspend.c b/arch/ia64/xen/suspend.c index fd66b048c6fa..419c8620945a 100644 --- a/arch/ia64/xen/suspend.c +++ b/arch/ia64/xen/suspend.c @@ -37,19 +37,14 @@ xen_mm_unpin_all(void) /* nothing */ } -void xen_pre_device_suspend(void) -{ - /* nothing */ -} - void -xen_pre_suspend() +xen_arch_pre_suspend() { /* nothing */ } void -xen_post_suspend(int suspend_cancelled) +xen_arch_post_suspend(int suspend_cancelled) { if (suspend_cancelled) return; diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index f5ecc0566bc2..d88983516e26 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -4,6 +4,7 @@ config MIPS select HAVE_GENERIC_DMA_COHERENT select HAVE_IDE select HAVE_OPROFILE + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select PERF_USE_VMALLOC select HAVE_ARCH_KGDB @@ -208,6 +209,7 @@ config MACH_JZ4740 select ARCH_REQUIRE_GPIOLIB select SYS_HAS_EARLY_PRINTK select HAVE_PWM + select HAVE_CLK config LASAT bool "LASAT Networks platforms" @@ -333,6 +335,8 @@ config PNX8550_STB810 config PMC_MSP bool "PMC-Sierra MSP chipsets" depends on EXPERIMENTAL + select CEVT_R4K + select CSRC_R4K select DMA_NONCOHERENT select SWAP_IO_SPACE select NO_EXCEPT_FILL diff --git a/arch/mips/alchemy/mtx-1/board_setup.c b/arch/mips/alchemy/mtx-1/board_setup.c index 6398fa95905c..40b84b991191 100644 --- a/arch/mips/alchemy/mtx-1/board_setup.c +++ b/arch/mips/alchemy/mtx-1/board_setup.c @@ -54,8 +54,8 @@ int mtx1_pci_idsel(unsigned int devsel, int assert); static void mtx1_reset(char *c) { - /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */ - au_writel(0x00000000, 0xAE00001C); + /* Jump to the reset vector */ + __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000)); } static void mtx1_power_off(void) diff --git a/arch/mips/alchemy/mtx-1/platform.c b/arch/mips/alchemy/mtx-1/platform.c index e30e42add697..956f946218c5 100644 --- a/arch/mips/alchemy/mtx-1/platform.c +++ b/arch/mips/alchemy/mtx-1/platform.c @@ -28,6 +28,8 @@ #include <linux/mtd/physmap.h> #include <mtd/mtd-abi.h> +#include <asm/mach-au1x00/au1xxx_eth.h> + static struct gpio_keys_button mtx1_gpio_button[] = { { .gpio = 207, @@ -140,10 +142,17 @@ static struct __initdata platform_device * mtx1_devs[] = { &mtx1_mtd, }; +static struct au1000_eth_platform_data mtx1_au1000_eth0_pdata = { + .phy_search_highest_addr = 1, + .phy1_search_mac0 = 1, +}; + static int __init mtx1_register_devices(void) { int rc; + au1xxx_override_eth_cfg(0, &mtx1_au1000_eth0_pdata); + rc = gpio_request(mtx1_gpio_button[0].gpio, mtx1_gpio_button[0].desc); if (rc < 0) { diff --git a/arch/mips/alchemy/xxs1500/board_setup.c b/arch/mips/alchemy/xxs1500/board_setup.c index b43c918925d3..80c521e5290d 100644 --- a/arch/mips/alchemy/xxs1500/board_setup.c +++ b/arch/mips/alchemy/xxs1500/board_setup.c @@ -36,8 +36,8 @@ static void xxs1500_reset(char *c) { - /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */ - au_writel(0x00000000, 0xAE00001C); + /* Jump to the reset vector */ + __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000)); } static void xxs1500_power_off(void) diff --git a/arch/mips/include/asm/perf_event.h b/arch/mips/include/asm/perf_event.h index e00007cf8162..d0c77496c728 100644 --- a/arch/mips/include/asm/perf_event.h +++ b/arch/mips/include/asm/perf_event.h @@ -11,15 +11,5 @@ #ifndef __MIPS_PERF_EVENT_H__ #define __MIPS_PERF_EVENT_H__ - -/* - * MIPS performance counters do not raise NMI upon overflow, a regular - * interrupt will be signaled. Hence we can do the pending perf event - * work at the tail of the irq handler. - */ -static inline void -set_perf_event_pending(void) -{ -} - +/* Leave it empty here. The file is required by linux/perf_event.h */ #endif /* __MIPS_PERF_EVENT_H__ */ diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 5a84a1f11231..94ca2b018af7 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -17,29 +17,13 @@ #include <asm/cacheflush.h> #include <asm/uasm.h> -/* - * If the Instruction Pointer is in module space (0xc0000000), return true; - * otherwise, it is in kernel space (0x80000000), return false. - * - * FIXME: This will not work when the kernel space and module space are the - * same. If they are the same, we need to modify scripts/recordmcount.pl, - * ftrace_make_nop/call() and the other related parts to ensure the - * enabling/disabling of the calling site to _mcount is right for both kernel - * and module. - */ - -static inline int in_module(unsigned long ip) -{ - return ip & 0x40000000; -} +#include <asm-generic/sections.h> #ifdef CONFIG_DYNAMIC_FTRACE #define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */ #define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */ -#define INSN_B_1F_4 0x10000004 /* b 1f; offset = 4 */ -#define INSN_B_1F_5 0x10000005 /* b 1f; offset = 5 */ #define INSN_NOP 0x00000000 /* nop */ #define INSN_JAL(addr) \ ((unsigned int)(JAL | (((addr) >> 2) & ADDR_MASK))) @@ -69,6 +53,20 @@ static inline void ftrace_dyn_arch_init_insns(void) #endif } +/* + * Check if the address is in kernel space + * + * Clone core_kernel_text() from kernel/extable.c, but doesn't call + * init_kernel_text() for Ftrace doesn't trace functions in init sections. + */ +static inline int in_kernel_space(unsigned long ip) +{ + if (ip >= (unsigned long)_stext && + ip <= (unsigned long)_etext) + return 1; + return 0; +} + static int ftrace_modify_code(unsigned long ip, unsigned int new_code) { int faulted; @@ -84,6 +82,42 @@ static int ftrace_modify_code(unsigned long ip, unsigned int new_code) return 0; } +/* + * The details about the calling site of mcount on MIPS + * + * 1. For kernel: + * + * move at, ra + * jal _mcount --> nop + * + * 2. For modules: + * + * 2.1 For KBUILD_MCOUNT_RA_ADDRESS and CONFIG_32BIT + * + * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005) + * addiu v1, v1, low_16bit_of_mcount + * move at, ra + * move $12, ra_address + * jalr v1 + * sub sp, sp, 8 + * 1: offset = 5 instructions + * 2.2 For the Other situations + * + * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004) + * addiu v1, v1, low_16bit_of_mcount + * move at, ra + * jalr v1 + * nop | move $12, ra_address | sub sp, sp, 8 + * 1: offset = 4 instructions + */ + +#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT) +#define MCOUNT_OFFSET_INSNS 5 +#else +#define MCOUNT_OFFSET_INSNS 4 +#endif +#define INSN_B_1F (0x10000000 | MCOUNT_OFFSET_INSNS) + int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { @@ -91,39 +125,11 @@ int ftrace_make_nop(struct module *mod, unsigned long ip = rec->ip; /* - * We have compiled module with -mlong-calls, but compiled the kernel - * without it, we need to cope with them respectively. + * If ip is in kernel space, no long call, otherwise, long call is + * needed. */ - if (in_module(ip)) { -#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT) - /* - * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005) - * addiu v1, v1, low_16bit_of_mcount - * move at, ra - * move $12, ra_address - * jalr v1 - * sub sp, sp, 8 - * 1: offset = 5 instructions - */ - new = INSN_B_1F_5; -#else - /* - * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004) - * addiu v1, v1, low_16bit_of_mcount - * move at, ra - * jalr v1 - * nop | move $12, ra_address | sub sp, sp, 8 - * 1: offset = 4 instructions - */ - new = INSN_B_1F_4; -#endif - } else { - /* - * move at, ra - * jal _mcount --> nop - */ - new = INSN_NOP; - } + new = in_kernel_space(ip) ? INSN_NOP : INSN_B_1F; + return ftrace_modify_code(ip, new); } @@ -132,8 +138,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) unsigned int new; unsigned long ip = rec->ip; - /* ip, module: 0xc0000000, kernel: 0x80000000 */ - new = in_module(ip) ? insn_lui_v1_hi16_mcount : insn_jal_ftrace_caller; + new = in_kernel_space(ip) ? insn_jal_ftrace_caller : + insn_lui_v1_hi16_mcount; return ftrace_modify_code(ip, new); } @@ -190,29 +196,25 @@ int ftrace_disable_ftrace_graph_caller(void) #define S_R_SP (0xafb0 << 16) /* s{d,w} R, offset(sp) */ #define OFFSET_MASK 0xffff /* stack offset range: 0 ~ PT_SIZE */ -unsigned long ftrace_get_parent_addr(unsigned long self_addr, - unsigned long parent, - unsigned long parent_addr, - unsigned long fp) +unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long + old_parent_ra, unsigned long parent_ra_addr, unsigned long fp) { - unsigned long sp, ip, ra; + unsigned long sp, ip, tmp; unsigned int code; int faulted; /* - * For module, move the ip from calling site of mcount to the - * instruction "lui v1, hi_16bit_of_mcount"(offset is 20), but for - * kernel, move to the instruction "move ra, at"(offset is 12) + * For module, move the ip from the return address after the + * instruction "lui v1, hi_16bit_of_mcount"(offset is 24), but for + * kernel, move after the instruction "move ra, at"(offset is 16) */ - ip = self_addr - (in_module(self_addr) ? 20 : 12); + ip = self_ra - (in_kernel_space(self_ra) ? 16 : 24); /* * search the text until finding the non-store instruction or "s{d,w} * ra, offset(sp)" instruction */ do { - ip -= 4; - /* get the code at "ip": code = *(unsigned int *)ip; */ safe_load_code(code, ip, faulted); @@ -224,18 +226,20 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr, * store the ra on the stack */ if ((code & S_R_SP) != S_R_SP) - return parent_addr; + return parent_ra_addr; - } while (((code & S_RA_SP) != S_RA_SP)); + /* Move to the next instruction */ + ip -= 4; + } while ((code & S_RA_SP) != S_RA_SP); sp = fp + (code & OFFSET_MASK); - /* ra = *(unsigned long *)sp; */ - safe_load_stack(ra, sp, faulted); + /* tmp = *(unsigned long *)sp; */ + safe_load_stack(tmp, sp, faulted); if (unlikely(faulted)) return 0; - if (ra == parent) + if (tmp == old_parent_ra) return sp; return 0; } @@ -246,21 +250,21 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr, * Hook the return address and push it in the stack of return addrs * in current thread info. */ -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, +void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, unsigned long fp) { - unsigned long old; + unsigned long old_parent_ra; struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long) &return_to_handler; - int faulted; + int faulted, insns; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; /* - * "parent" is the stack address saved the return address of the caller - * of _mcount. + * "parent_ra_addr" is the stack address saved the return address of + * the caller of _mcount. * * if the gcc < 4.5, a leaf function does not save the return address * in the stack address, so, we "emulate" one in _mcount's stack space, @@ -275,37 +279,44 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, * do it in ftrace_graph_caller of mcount.S. */ - /* old = *parent; */ - safe_load_stack(old, parent, faulted); + /* old_parent_ra = *parent_ra_addr; */ + safe_load_stack(old_parent_ra, parent_ra_addr, faulted); if (unlikely(faulted)) goto out; #ifndef KBUILD_MCOUNT_RA_ADDRESS - parent = (unsigned long *)ftrace_get_parent_addr(self_addr, old, - (unsigned long)parent, fp); + parent_ra_addr = (unsigned long *)ftrace_get_parent_ra_addr(self_ra, + old_parent_ra, (unsigned long)parent_ra_addr, fp); /* * If fails when getting the stack address of the non-leaf function's * ra, stop function graph tracer and return */ - if (parent == 0) + if (parent_ra_addr == 0) goto out; #endif - /* *parent = return_hooker; */ - safe_store_stack(return_hooker, parent, faulted); + /* *parent_ra_addr = return_hooker; */ + safe_store_stack(return_hooker, parent_ra_addr, faulted); if (unlikely(faulted)) goto out; - if (ftrace_push_return_trace(old, self_addr, &trace.depth, fp) == - -EBUSY) { - *parent = old; + if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp) + == -EBUSY) { + *parent_ra_addr = old_parent_ra; return; } - trace.func = self_addr; + /* + * Get the recorded ip of the current mcount calling site in the + * __mcount_loc section, which will be used to filter the function + * entries configured through the tracing/set_graph_function interface. + */ + + insns = in_kernel_space(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1; + trace.func = self_ra - (MCOUNT_INSN_SIZE * insns); /* Only trace if the calling function expects to */ if (!ftrace_graph_entry(&trace)) { current->curr_ret_stack--; - *parent = old; + *parent_ra_addr = old_parent_ra; } return; out: diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c index 2b7f3f703b83..a8244854d3dc 100644 --- a/arch/mips/kernel/perf_event.c +++ b/arch/mips/kernel/perf_event.c @@ -161,41 +161,6 @@ mipspmu_event_set_period(struct perf_event *event, return ret; } -static int mipspmu_enable(struct perf_event *event) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - int idx; - int err = 0; - - /* To look for a free counter for this event. */ - idx = mipspmu->alloc_counter(cpuc, hwc); - if (idx < 0) { - err = idx; - goto out; - } - - /* - * If there is an event in the counter we are going to use then - * make sure it is disabled. - */ - event->hw.idx = idx; - mipspmu->disable_event(idx); - cpuc->events[idx] = event; - - /* Set the period for the event. */ - mipspmu_event_set_period(event, hwc, idx); - - /* Enable the event. */ - mipspmu->enable_event(hwc, idx); - - /* Propagate our changes to the userspace mapping. */ - perf_event_update_userpage(event); - -out: - return err; -} - static void mipspmu_event_update(struct perf_event *event, struct hw_perf_event *hwc, int idx) @@ -204,7 +169,7 @@ static void mipspmu_event_update(struct perf_event *event, unsigned long flags; int shift = 64 - TOTAL_BITS; s64 prev_raw_count, new_raw_count; - s64 delta; + u64 delta; again: prev_raw_count = local64_read(&hwc->prev_count); @@ -231,32 +196,90 @@ again: return; } -static void mipspmu_disable(struct perf_event *event) +static void mipspmu_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!mipspmu) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + /* Set the period for the event. */ + mipspmu_event_set_period(event, hwc, hwc->idx); + + /* Enable the event. */ + mipspmu->enable_event(hwc, hwc->idx); +} + +static void mipspmu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!mipspmu) + return; + + if (!(hwc->state & PERF_HES_STOPPED)) { + /* We are working on a local event. */ + mipspmu->disable_event(hwc->idx); + barrier(); + mipspmu_event_update(event, hwc, hwc->idx); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } +} + +static int mipspmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; + int idx; + int err = 0; + perf_pmu_disable(event->pmu); - WARN_ON(idx < 0 || idx >= mipspmu->num_counters); + /* To look for a free counter for this event. */ + idx = mipspmu->alloc_counter(cpuc, hwc); + if (idx < 0) { + err = idx; + goto out; + } - /* We are working on a local event. */ + /* + * If there is an event in the counter we are going to use then + * make sure it is disabled. + */ + event->hw.idx = idx; mipspmu->disable_event(idx); + cpuc->events[idx] = event; - barrier(); - - mipspmu_event_update(event, hwc, idx); - cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + mipspmu_start(event, PERF_EF_RELOAD); + /* Propagate our changes to the userspace mapping. */ perf_event_update_userpage(event); + +out: + perf_pmu_enable(event->pmu); + return err; } -static void mipspmu_unthrottle(struct perf_event *event) +static void mipspmu_del(struct perf_event *event, int flags) { + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; - mipspmu->enable_event(hwc, hwc->idx); + WARN_ON(idx < 0 || idx >= mipspmu->num_counters); + + mipspmu_stop(event, PERF_EF_UPDATE); + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); } static void mipspmu_read(struct perf_event *event) @@ -270,12 +293,17 @@ static void mipspmu_read(struct perf_event *event) mipspmu_event_update(event, hwc, hwc->idx); } -static struct pmu pmu = { - .enable = mipspmu_enable, - .disable = mipspmu_disable, - .unthrottle = mipspmu_unthrottle, - .read = mipspmu_read, -}; +static void mipspmu_enable(struct pmu *pmu) +{ + if (mipspmu) + mipspmu->start(); +} + +static void mipspmu_disable(struct pmu *pmu) +{ + if (mipspmu) + mipspmu->stop(); +} static atomic_t active_events = ATOMIC_INIT(0); static DEFINE_MUTEX(pmu_reserve_mutex); @@ -318,6 +346,82 @@ static void mipspmu_free_irq(void) perf_irq = save_perf_irq; } +/* + * mipsxx/rm9000/loongson2 have different performance counters, they have + * specific low-level init routines. + */ +static void reset_counters(void *arg); +static int __hw_perf_event_init(struct perf_event *event); + +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (atomic_dec_and_mutex_lock(&active_events, + &pmu_reserve_mutex)) { + /* + * We must not call the destroy function with interrupts + * disabled. + */ + on_each_cpu(reset_counters, + (void *)(long)mipspmu->num_counters, 1); + mipspmu_free_irq(); + mutex_unlock(&pmu_reserve_mutex); + } +} + +static int mipspmu_event_init(struct perf_event *event) +{ + int err = 0; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + + if (!mipspmu || event->cpu >= nr_cpumask_bits || + (event->cpu >= 0 && !cpu_online(event->cpu))) + return -ENODEV; + + if (!atomic_inc_not_zero(&active_events)) { + if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) { + atomic_dec(&active_events); + return -ENOSPC; + } + + mutex_lock(&pmu_reserve_mutex); + if (atomic_read(&active_events) == 0) + err = mipspmu_get_irq(); + + if (!err) + atomic_inc(&active_events); + mutex_unlock(&pmu_reserve_mutex); + } + + if (err) + return err; + + err = __hw_perf_event_init(event); + if (err) + hw_perf_event_destroy(event); + + return err; +} + +static struct pmu pmu = { + .pmu_enable = mipspmu_enable, + .pmu_disable = mipspmu_disable, + .event_init = mipspmu_event_init, + .add = mipspmu_add, + .del = mipspmu_del, + .start = mipspmu_start, + .stop = mipspmu_stop, + .read = mipspmu_read, +}; + static inline unsigned int mipspmu_perf_event_encode(const struct mips_perf_event *pev) { @@ -382,8 +486,9 @@ static int validate_event(struct cpu_hw_events *cpuc, { struct hw_perf_event fake_hwc = event->hw; - if (event->pmu && event->pmu != &pmu) - return 0; + /* Allow mixed event group. So return 1 to pass validation. */ + if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF) + return 1; return mipspmu->alloc_counter(cpuc, &fake_hwc) >= 0; } @@ -409,73 +514,6 @@ static int validate_group(struct perf_event *event) return 0; } -/* - * mipsxx/rm9000/loongson2 have different performance counters, they have - * specific low-level init routines. - */ -static void reset_counters(void *arg); -static int __hw_perf_event_init(struct perf_event *event); - -static void hw_perf_event_destroy(struct perf_event *event) -{ - if (atomic_dec_and_mutex_lock(&active_events, - &pmu_reserve_mutex)) { - /* - * We must not call the destroy function with interrupts - * disabled. - */ - on_each_cpu(reset_counters, - (void *)(long)mipspmu->num_counters, 1); - mipspmu_free_irq(); - mutex_unlock(&pmu_reserve_mutex); - } -} - -const struct pmu *hw_perf_event_init(struct perf_event *event) -{ - int err = 0; - - if (!mipspmu || event->cpu >= nr_cpumask_bits || - (event->cpu >= 0 && !cpu_online(event->cpu))) - return ERR_PTR(-ENODEV); - - if (!atomic_inc_not_zero(&active_events)) { - if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) { - atomic_dec(&active_events); - return ERR_PTR(-ENOSPC); - } - - mutex_lock(&pmu_reserve_mutex); - if (atomic_read(&active_events) == 0) - err = mipspmu_get_irq(); - - if (!err) - atomic_inc(&active_events); - mutex_unlock(&pmu_reserve_mutex); - } - - if (err) - return ERR_PTR(err); - - err = __hw_perf_event_init(event); - if (err) - hw_perf_event_destroy(event); - - return err ? ERR_PTR(err) : &pmu; -} - -void hw_perf_enable(void) -{ - if (mipspmu) - mipspmu->start(); -} - -void hw_perf_disable(void) -{ - if (mipspmu) - mipspmu->stop(); -} - /* This is needed by specific irq handlers in perf_event_*.c */ static void handle_associated_event(struct cpu_hw_events *cpuc, @@ -496,21 +534,13 @@ handle_associated_event(struct cpu_hw_events *cpuc, #include "perf_event_mipsxx.c" /* Callchain handling code. */ -static inline void -callchain_store(struct perf_callchain_entry *entry, - u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} /* * Leave userspace callchain empty for now. When we find a way to trace * the user stack callchains, we add here. */ -static void -perf_callchain_user(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs) { } @@ -523,23 +553,21 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry, while (!kstack_end(sp)) { addr = *sp++; if (__kernel_text_address(addr)) { - callchain_store(entry, addr); + perf_callchain_store(entry, addr); if (entry->nr >= PERF_MAX_STACK_DEPTH) break; } } } -static void -perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long sp = regs->regs[29]; #ifdef CONFIG_KALLSYMS unsigned long ra = regs->regs[31]; unsigned long pc = regs->cp0_epc; - callchain_store(entry, PERF_CONTEXT_KERNEL); if (raw_show_trace || !__kernel_text_address(pc)) { unsigned long stack_page = (unsigned long)task_stack_page(current); @@ -549,53 +577,12 @@ perf_callchain_kernel(struct pt_regs *regs, return; } do { - callchain_store(entry, pc); + perf_callchain_store(entry, pc); if (entry->nr >= PERF_MAX_STACK_DEPTH) break; pc = unwind_stack(current, &sp, pc, &ra); } while (pc); #else - callchain_store(entry, PERF_CONTEXT_KERNEL); save_raw_perf_callchain(entry, sp); #endif } - -static void -perf_do_callchain(struct pt_regs *regs, - struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (!current || !current->pid) - return; - - if (is_user && current->state != TASK_RUNNING) - return; - - if (!is_user) { - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - if (regs) - perf_callchain_user(regs, entry); -} - -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); - -struct perf_callchain_entry * -perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - perf_do_callchain(regs, entry); - return entry; -} diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 183e0d226669..d9a7db78ed62 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -696,7 +696,7 @@ static int mipsxx_pmu_handle_shared_irq(void) * interrupt, not NMI. */ if (handled == IRQ_HANDLED) - perf_event_do_pending(); + irq_work_run(); #ifdef CONFIG_MIPS_MT_SMP read_unlock(&pmuint_rwlock); @@ -1045,6 +1045,8 @@ init_hw_perf_events(void) "CPU, irq %d%s\n", mipspmu->name, counters, irq, irq < 0 ? " (share with timer interrupt)" : ""); + perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + return 0; } early_initcall(init_hw_perf_events); diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 5922342bca39..dbbe0ce48d89 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -84,7 +84,7 @@ static int protected_save_fp_context(struct sigcontext __user *sc) static int protected_restore_fp_context(struct sigcontext __user *sc) { - int err, tmp; + int err, tmp __maybe_unused; while (1) { lock_fpu_owner(); own_fpu_inatomic(0); diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index a0ed0e052b2e..aae986613795 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -115,7 +115,7 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc) static int protected_restore_fp_context32(struct sigcontext32 __user *sc) { - int err, tmp; + int err, tmp __maybe_unused; while (1) { lock_fpu_owner(); own_fpu_inatomic(0); diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 383aeb95cb49..32a256101082 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -193,6 +193,22 @@ void __devinit smp_prepare_boot_cpu(void) */ static struct task_struct *cpu_idle_thread[NR_CPUS]; +struct create_idle { + struct work_struct work; + struct task_struct *idle; + struct completion done; + int cpu; +}; + +static void __cpuinit do_fork_idle(struct work_struct *work) +{ + struct create_idle *c_idle = + container_of(work, struct create_idle, work); + + c_idle->idle = fork_idle(c_idle->cpu); + complete(&c_idle->done); +} + int __cpuinit __cpu_up(unsigned int cpu) { struct task_struct *idle; @@ -203,8 +219,19 @@ int __cpuinit __cpu_up(unsigned int cpu) * Linux can schedule processes on this slave. */ if (!cpu_idle_thread[cpu]) { - idle = fork_idle(cpu); - cpu_idle_thread[cpu] = idle; + /* + * Schedule work item to avoid forking user task + * Ported from arch/x86/kernel/smpboot.c + */ + struct create_idle c_idle = { + .cpu = cpu, + .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), + }; + + INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle); + schedule_work(&c_idle.work); + wait_for_completion(&c_idle.done); + idle = cpu_idle_thread[cpu] = c_idle.idle; if (IS_ERR(idle)) panic(KERN_ERR "Fork failed for CPU %d", cpu); diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 1dc6edff45e0..58beabf50b3c 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -383,12 +383,11 @@ save_static_function(sys_sysmips); static int __used noinline _sys_sysmips(nabi_no_regargs struct pt_regs regs) { - long cmd, arg1, arg2, arg3; + long cmd, arg1, arg2; cmd = regs.regs[4]; arg1 = regs.regs[5]; arg2 = regs.regs[6]; - arg3 = regs.regs[7]; switch (cmd) { case MIPS_ATOMIC_SET: @@ -405,7 +404,7 @@ _sys_sysmips(nabi_no_regargs struct pt_regs regs) if (arg1 & 2) set_thread_flag(TIF_LOGADE); else - clear_thread_flag(TIF_FIXADE); + clear_thread_flag(TIF_LOGADE); return 0; diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 6a1fdfef8fde..ab52b7cf3b6b 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -148,9 +148,9 @@ struct { spinlock_t tc_list_lock; struct list_head tc_list; /* Thread contexts */ } vpecontrol = { - .vpe_list_lock = SPIN_LOCK_UNLOCKED, + .vpe_list_lock = __SPIN_LOCK_UNLOCKED(vpe_list_lock), .vpe_list = LIST_HEAD_INIT(vpecontrol.vpe_list), - .tc_list_lock = SPIN_LOCK_UNLOCKED, + .tc_list_lock = __SPIN_LOCK_UNLOCKED(tc_list_lock), .tc_list = LIST_HEAD_INIT(vpecontrol.tc_list) }; diff --git a/arch/mips/loongson/Kconfig b/arch/mips/loongson/Kconfig index 6e1b77fec7ea..aca93eed8779 100644 --- a/arch/mips/loongson/Kconfig +++ b/arch/mips/loongson/Kconfig @@ -1,6 +1,7 @@ +if MACH_LOONGSON + choice prompt "Machine Type" - depends on MACH_LOONGSON config LEMOTE_FULOONG2E bool "Lemote Fuloong(2e) mini-PC" @@ -87,3 +88,5 @@ config LOONGSON_UART_BASE config LOONGSON_MC146818 bool default n + +endif # MACH_LOONGSON diff --git a/arch/mips/loongson/common/cmdline.c b/arch/mips/loongson/common/cmdline.c index 1a06defc4f7f..353e1d2e41a5 100644 --- a/arch/mips/loongson/common/cmdline.c +++ b/arch/mips/loongson/common/cmdline.c @@ -44,10 +44,5 @@ void __init prom_init_cmdline(void) strcat(arcs_cmdline, " "); } - if ((strstr(arcs_cmdline, "console=")) == NULL) - strcat(arcs_cmdline, " console=ttyS0,115200"); - if ((strstr(arcs_cmdline, "root=")) == NULL) - strcat(arcs_cmdline, " root=/dev/hda1"); - prom_init_machtype(); } diff --git a/arch/mips/loongson/common/machtype.c b/arch/mips/loongson/common/machtype.c index 81fbe6b73f91..2efd5d9dee27 100644 --- a/arch/mips/loongson/common/machtype.c +++ b/arch/mips/loongson/common/machtype.c @@ -41,7 +41,7 @@ void __weak __init mach_prom_init_machtype(void) void __init prom_init_machtype(void) { - char *p, str[MACHTYPE_LEN]; + char *p, str[MACHTYPE_LEN + 1]; int machtype = MACH_LEMOTE_FL2E; mips_machtype = LOONGSON_MACHTYPE; @@ -53,6 +53,7 @@ void __init prom_init_machtype(void) } p += strlen("machtype="); strncpy(str, p, MACHTYPE_LEN); + str[MACHTYPE_LEN] = '\0'; p = strstr(str, " "); if (p) *p = '\0'; diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h index 2701d9500959..2a7d43f4f161 100644 --- a/arch/mips/math-emu/ieee754int.h +++ b/arch/mips/math-emu/ieee754int.h @@ -70,7 +70,7 @@ #define COMPXSP \ - unsigned xm; int xe; int xs; int xc + unsigned xm; int xe; int xs __maybe_unused; int xc #define COMPYSP \ unsigned ym; int ye; int ys; int yc @@ -104,7 +104,7 @@ #define COMPXDP \ -u64 xm; int xe; int xs; int xc +u64 xm; int xe; int xs __maybe_unused; int xc #define COMPYDP \ u64 ym; int ye; int ys; int yc diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 2efcbd24c82f..279599e9a779 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -324,7 +324,7 @@ int page_is_ram(unsigned long pagenr) void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; - unsigned long lastpfn; + unsigned long lastpfn __maybe_unused; pagetable_init(); diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 083d3412d0bc..04f9e17db9d0 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -109,6 +109,8 @@ static bool scratchpad_available(void) static int scratchpad_offset(int i) { BUG(); + /* Really unreachable, but evidently some GCC want this. */ + return 0; } #endif /* diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c index b7c03d80c88c..68798f869c0f 100644 --- a/arch/mips/pci/ops-pmcmsp.c +++ b/arch/mips/pci/ops-pmcmsp.c @@ -308,7 +308,7 @@ static struct resource pci_mem_resource = { * RETURNS: PCIBIOS_SUCCESSFUL - success * ****************************************************************************/ -static int bpci_interrupt(int irq, void *dev_id) +static irqreturn_t bpci_interrupt(int irq, void *dev_id) { struct msp_pci_regs *preg = (void *)PCI_BASE_REG; unsigned int stat = preg->if_status; @@ -326,7 +326,7 @@ static int bpci_interrupt(int irq, void *dev_id) /* write to clear all asserted interrupts */ preg->if_status = stat; - return PCIBIOS_SUCCESSFUL; + return IRQ_HANDLED; } /***************************************************************************** diff --git a/arch/mips/pmc-sierra/Kconfig b/arch/mips/pmc-sierra/Kconfig index c139988bb85d..8d798497c614 100644 --- a/arch/mips/pmc-sierra/Kconfig +++ b/arch/mips/pmc-sierra/Kconfig @@ -4,15 +4,11 @@ choice config PMC_MSP4200_EVAL bool "PMC-Sierra MSP4200 Eval Board" - select CEVT_R4K - select CSRC_R4K select IRQ_MSP_SLP select HW_HAS_PCI config PMC_MSP4200_GW bool "PMC-Sierra MSP4200 VoIP Gateway" - select CEVT_R4K - select CSRC_R4K select IRQ_MSP_SLP select HW_HAS_PCI diff --git a/arch/mips/pmc-sierra/msp71xx/msp_time.c b/arch/mips/pmc-sierra/msp71xx/msp_time.c index cca64e15f57f..01df84ce31e2 100644 --- a/arch/mips/pmc-sierra/msp71xx/msp_time.c +++ b/arch/mips/pmc-sierra/msp71xx/msp_time.c @@ -81,7 +81,7 @@ void __init plat_time_init(void) mips_hpt_frequency = cpu_rate/2; } -unsigned int __init get_c0_compare_int(void) +unsigned int __cpuinit get_c0_compare_int(void) { return MSP_INT_VPE0_TIMER; } diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h index 92d2f9298e38..9d773a639513 100644 --- a/arch/mn10300/include/asm/atomic.h +++ b/arch/mn10300/include/asm/atomic.h @@ -139,7 +139,7 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m, * Atomically reads the value of @v. Note that the guaranteed * useful range of an atomic_t is only 24 bits. */ -#define atomic_read(v) ((v)->counter) +#define atomic_read(v) (ACCESS_ONCE((v)->counter)) /** * atomic_set - set atomic variable diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h index 679dee0bbd08..3d6e60dad9d9 100644 --- a/arch/mn10300/include/asm/uaccess.h +++ b/arch/mn10300/include/asm/uaccess.h @@ -160,9 +160,10 @@ struct __large_struct { unsigned long buf[100]; }; #define __get_user_check(x, ptr, size) \ ({ \ + const __typeof__(ptr) __guc_ptr = (ptr); \ int _e; \ - if (likely(__access_ok((unsigned long) (ptr), (size)))) \ - _e = __get_user_nocheck((x), (ptr), (size)); \ + if (likely(__access_ok((unsigned long) __guc_ptr, (size)))) \ + _e = __get_user_nocheck((x), __guc_ptr, (size)); \ else { \ _e = -EFAULT; \ (x) = (__typeof__(x))0; \ diff --git a/arch/mn10300/mm/cache-inv-icache.c b/arch/mn10300/mm/cache-inv-icache.c index a8933a60b2d4..a6b63dde603d 100644 --- a/arch/mn10300/mm/cache-inv-icache.c +++ b/arch/mn10300/mm/cache-inv-icache.c @@ -69,7 +69,7 @@ static void flush_icache_page_range(unsigned long start, unsigned long end) /* invalidate the icache coverage on that region */ mn10300_local_icache_inv_range2(addr + off, size); - smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end); + smp_cache_call(SMP_ICACHE_INV_RANGE, start, end); } /** @@ -101,7 +101,7 @@ void flush_icache_range(unsigned long start, unsigned long end) * directly */ start_page = (start >= 0x80000000UL) ? start : 0x80000000UL; mn10300_icache_inv_range(start_page, end); - smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end); + smp_cache_call(SMP_ICACHE_INV_RANGE, start, end); if (start_page == start) goto done; end = start_page; diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h new file mode 100644 index 000000000000..e656ad8c0a2e --- /dev/null +++ b/arch/x86/include/asm/ce4100.h @@ -0,0 +1,6 @@ +#ifndef _ASM_CE4100_H_ +#define _ASM_CE4100_H_ + +int ce4100_pci_init(void); + +#endif diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index a3c28ae4025b..8508bfe52296 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -287,7 +287,7 @@ HYPERVISOR_fpu_taskswitch(int set) static inline int HYPERVISOR_sched_op(int cmd, void *arg) { - return _hypercall2(int, sched_op_new, cmd, arg); + return _hypercall2(int, sched_op, cmd, arg); } static inline long @@ -422,10 +422,17 @@ HYPERVISOR_set_segment_base(int reg, unsigned long value) #endif static inline int -HYPERVISOR_suspend(unsigned long srec) +HYPERVISOR_suspend(unsigned long start_info_mfn) { - return _hypercall3(int, sched_op, SCHEDOP_shutdown, - SHUTDOWN_suspend, srec); + struct sched_shutdown r = { .reason = SHUTDOWN_suspend }; + + /* + * For a PV guest the tools require that the start_info mfn be + * present in rdx/edx when the hypercall is made. Per the + * hypercall calling convention this is the third hypercall + * argument, which is start_info_mfn here. + */ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn); } static inline int diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index f25bdf238a33..c61934fbf22a 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -29,8 +29,10 @@ typedef struct xpaddr { /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ #define INVALID_P2M_ENTRY (~0UL) -#define FOREIGN_FRAME_BIT (1UL<<31) +#define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1)) +#define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2)) #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) +#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT) /* Maximum amount of memory we can handle in a domain in pages */ #define MAX_DOMAIN_PAGES \ @@ -41,12 +43,18 @@ extern unsigned int machine_to_phys_order; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); +extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); +extern unsigned long set_phys_range_identity(unsigned long pfn_s, + unsigned long pfn_e); extern int m2p_add_override(unsigned long mfn, struct page *page); extern int m2p_remove_override(struct page *page); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); +#ifdef CONFIG_XEN_DEBUG_FS +extern int p2m_dump_show(struct seq_file *m, void *v); +#endif static inline unsigned long pfn_to_mfn(unsigned long pfn) { unsigned long mfn; @@ -57,7 +65,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) mfn = get_phys_to_machine(pfn); if (mfn != INVALID_P2M_ENTRY) - mfn &= ~FOREIGN_FRAME_BIT; + mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT); return mfn; } @@ -73,25 +81,44 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn) static inline unsigned long mfn_to_pfn(unsigned long mfn) { unsigned long pfn; + int ret = 0; if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; + if (unlikely((mfn >> machine_to_phys_order) != 0)) { + pfn = ~0; + goto try_override; + } pfn = 0; /* * The array access can fail (e.g., device space beyond end of RAM). * In such cases it doesn't matter what we return (we return garbage), * but we must handle the fault without crashing! */ - __get_user(pfn, &machine_to_phys_mapping[mfn]); - - /* - * If this appears to be a foreign mfn (because the pfn - * doesn't map back to the mfn), then check the local override - * table to see if there's a better pfn to use. + ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); +try_override: + /* ret might be < 0 if there are no entries in the m2p for mfn */ + if (ret < 0) + pfn = ~0; + else if (get_phys_to_machine(pfn) != mfn) + /* + * If this appears to be a foreign mfn (because the pfn + * doesn't map back to the mfn), then check the local override + * table to see if there's a better pfn to use. + * + * m2p_find_override_pfn returns ~0 if it doesn't find anything. + */ + pfn = m2p_find_override_pfn(mfn, ~0); + + /* + * pfn is ~0 if there are no entries in the m2p for mfn or if the + * entry doesn't map back to the mfn and m2p_override doesn't have a + * valid entry for it. */ - if (get_phys_to_machine(pfn) != mfn) - pfn = m2p_find_override_pfn(mfn, pfn); + if (pfn == ~0 && + get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn)) + pfn = mfn; return pfn; } diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index 2329b3eaf8d3..aa8620989162 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -27,16 +27,16 @@ static inline void __init xen_setup_pirqs(void) * its own functions. */ struct xen_pci_frontend_ops { - int (*enable_msi)(struct pci_dev *dev, int **vectors); + int (*enable_msi)(struct pci_dev *dev, int vectors[]); void (*disable_msi)(struct pci_dev *dev); - int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec); + int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec); void (*disable_msix)(struct pci_dev *dev); }; extern struct xen_pci_frontend_ops *xen_pci_frontend; static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, - int **vectors) + int vectors[]) { if (xen_pci_frontend && xen_pci_frontend->enable_msi) return xen_pci_frontend->enable_msi(dev, vectors); @@ -48,7 +48,7 @@ static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev) xen_pci_frontend->disable_msi(dev); } static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, - int **vectors, int nvec) + int vectors[], int nvec) { if (xen_pci_frontend && xen_pci_frontend->enable_msix) return xen_pci_frontend->enable_msix(dev, vectors, nvec); diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index 13a389179514..452932d34730 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c @@ -106,8 +106,8 @@ void __init setup_bios_corruption_check(void) addr += size; } - printk(KERN_INFO "Scanning %d areas for low memory corruption\n", - num_scan_areas); + if (num_scan_areas) + printk(KERN_INFO "Scanning %d areas for low memory corruption\n", num_scan_areas); } @@ -143,12 +143,12 @@ static void check_corruption(struct work_struct *dummy) { check_for_bios_corruption(); schedule_delayed_work(&bios_check_work, - round_jiffies_relative(corruption_check_period*HZ)); + round_jiffies_relative(corruption_check_period*HZ)); } static int start_periodic_check_for_corruption(void) { - if (!memory_corruption_check || corruption_check_period == 0) + if (!num_scan_areas || !memory_corruption_check || corruption_check_period == 0) return 0; printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 7d90ceb882a4..20e3f8702d1e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -229,15 +229,14 @@ void vmalloc_sync_all(void) for (address = VMALLOC_START & PMD_MASK; address >= TASK_SIZE && address < FIXADDR_TOP; address += PMD_SIZE) { - - unsigned long flags; struct page *page; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { spinlock_t *pgt_lock; pmd_t *ret; + /* the pgt_lock only for Xen */ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); @@ -247,7 +246,7 @@ void vmalloc_sync_all(void) if (!ret) break; } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } } @@ -828,6 +827,13 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, unsigned long address, unsigned int fault) { if (fault & VM_FAULT_OOM) { + /* Kernel mode? Handle exceptions or die: */ + if (!(error_code & PF_USER)) { + up_read(¤t->mm->mmap_sem); + no_context(regs, error_code, address); + return; + } + out_of_memory(regs, error_code, address); } else { if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 71a59296af80..c14a5422e152 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -105,18 +105,18 @@ void sync_global_pgds(unsigned long start, unsigned long end) for (address = start; address <= end; address += PGDIR_SIZE) { const pgd_t *pgd_ref = pgd_offset_k(address); - unsigned long flags; struct page *page; if (pgd_none(*pgd_ref)) continue; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { pgd_t *pgd; spinlock_t *pgt_lock; pgd = (pgd_t *)page_address(page) + pgd_index(address); + /* the pgt_lock only for Xen */ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); @@ -128,7 +128,7 @@ void sync_global_pgds(unsigned long start, unsigned long end) spin_unlock(pgt_lock); } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index d343b3c81f3c..90825f2eb0f4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -57,12 +57,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM]; void update_page_count(int level, unsigned long pages) { - unsigned long flags; - /* Protect against CPA */ - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); direct_pages_count[level] += pages; - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } static void split_page_count(int level) @@ -394,7 +392,7 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, struct cpa_data *cpa) { - unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; + unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn; pte_t new_pte, old_pte, *tmp; pgprot_t old_prot, new_prot, req_prot; int i, do_split = 1; @@ -403,7 +401,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, if (cpa->force_split) return 1; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); /* * Check for races, another CPU might have split this page * up already: @@ -498,14 +496,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, } out_unlock: - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); return do_split; } static int split_large_page(pte_t *kpte, unsigned long address) { - unsigned long flags, pfn, pfninc = 1; + unsigned long pfn, pfninc = 1; unsigned int i, level; pte_t *pbase, *tmp; pgprot_t ref_prot; @@ -519,7 +517,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) if (!base) return -ENOMEM; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); /* * Check for races, another CPU might have split this page * up for us already: @@ -591,7 +589,7 @@ out_unlock: */ if (base) __free_page(base); - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); return 0; } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 500242d3c96d..0113d19c8aa6 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -121,14 +121,12 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) static void pgd_dtor(pgd_t *pgd) { - unsigned long flags; /* can be called from interrupt context */ - if (SHARED_KERNEL_PMD) return; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); pgd_list_del(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } /* @@ -260,7 +258,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; pmd_t *pmds[PREALLOCATED_PMDS]; - unsigned long flags; pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); @@ -280,12 +277,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm) * respect to anything walking the pgd_list, so that they * never see a partially populated pgd. */ - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); pgd_ctor(mm, pgd); pgd_prepopulate_pmd(mm, pgd, pmds); - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); return pgd; diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c index 85b68ef5e809..9260b3eb18d4 100644 --- a/arch/x86/pci/ce4100.c +++ b/arch/x86/pci/ce4100.c @@ -34,6 +34,7 @@ #include <linux/pci.h> #include <linux/init.h> +#include <asm/ce4100.h> #include <asm/pci_x86.h> struct sim_reg { @@ -306,10 +307,10 @@ struct pci_raw_ops ce4100_pci_conf = { .write = ce4100_conf_write, }; -static int __init ce4100_pci_init(void) +int __init ce4100_pci_init(void) { init_sim_regs(); raw_pci_ops = &ce4100_pci_conf; - return 0; + /* Indicate caller that it should invoke pci_legacy_init() */ + return 1; } -subsys_initcall(ce4100_pci_init); diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 25cd4a07d09f..8c4085a95ef1 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -20,7 +20,8 @@ #include <asm/xen/pci.h> #ifdef CONFIG_ACPI -static int xen_hvm_register_pirq(u32 gsi, int triggering) +static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, + int trigger, int polarity) { int rc, irq; struct physdev_map_pirq map_irq; @@ -41,7 +42,7 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering) return -1; } - if (triggering == ACPI_EDGE_SENSITIVE) { + if (trigger == ACPI_EDGE_SENSITIVE) { shareable = 0; name = "ioapic-edge"; } else { @@ -55,12 +56,6 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering) return irq; } - -static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, - int trigger, int polarity) -{ - return xen_hvm_register_pirq(gsi, trigger); -} #endif #if defined(CONFIG_PCI_MSI) @@ -91,7 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - int irq, pirq, ret = 0; + int irq, pirq; struct msi_desc *msidesc; struct msi_msg msg; @@ -99,39 +94,32 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) __read_msi_msg(msidesc, &msg); pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); - if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) { - xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ); - if (irq < 0) + if (msg.data != XEN_PIRQ_MSI_DATA || + xen_irq_from_pirq(pirq) < 0) { + pirq = xen_allocate_pirq_msi(dev, msidesc); + if (pirq < 0) goto error; - ret = set_irq_msi(irq, msidesc); - if (ret < 0) - goto error_while; - printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d" - " pirq=%d\n", irq, pirq); - return 0; + xen_msi_compose_msg(dev, pirq, &msg); + __write_msi_msg(msidesc, &msg); + dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); + } else { + dev_dbg(&dev->dev, + "xen: msi already bound to pirq=%d\n", pirq); } - xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ)); - if (irq < 0 || pirq < 0) + irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0, + (type == PCI_CAP_ID_MSIX) ? + "msi-x" : "msi"); + if (irq < 0) goto error; - printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); - xen_msi_compose_msg(dev, pirq, &msg); - ret = set_irq_msi(irq, msidesc); - if (ret < 0) - goto error_while; - write_msi_msg(irq, &msg); + dev_dbg(&dev->dev, + "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq); } return 0; -error_while: - unbind_from_irqhandler(irq, NULL); error: - if (ret == -ENODEV) - dev_err(&dev->dev, "Xen PCI frontend has not registered" \ - " MSI/MSI-X support!\n"); - - return ret; + dev_err(&dev->dev, + "Xen PCI frontend has not registered MSI/MSI-X support!\n"); + return -ENODEV; } /* @@ -150,35 +138,26 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return -ENOMEM; if (type == PCI_CAP_ID_MSIX) - ret = xen_pci_frontend_enable_msix(dev, &v, nvec); + ret = xen_pci_frontend_enable_msix(dev, v, nvec); else - ret = xen_pci_frontend_enable_msi(dev, &v); + ret = xen_pci_frontend_enable_msi(dev, v); if (ret) goto error; i = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = xen_allocate_pirq(v[i], 0, /* not sharable */ - (type == PCI_CAP_ID_MSIX) ? - "pcifront-msi-x" : "pcifront-msi"); - if (irq < 0) { - ret = -1; + irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, + (type == PCI_CAP_ID_MSIX) ? + "pcifront-msi-x" : + "pcifront-msi"); + if (irq < 0) goto free; - } - - ret = set_irq_msi(irq, msidesc); - if (ret) - goto error_while; i++; } kfree(v); return 0; -error_while: - unbind_from_irqhandler(irq, NULL); error: - if (ret == -ENODEV) - dev_err(&dev->dev, "Xen PCI frontend has not registered" \ - " MSI/MSI-X support!\n"); + dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n"); free: kfree(v); return ret; @@ -193,6 +172,9 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev) xen_pci_frontend_disable_msix(dev); else xen_pci_frontend_disable_msi(dev); + + /* Free the IRQ's and the msidesc using the generic code. */ + default_teardown_msi_irqs(dev); } static void xen_teardown_msi_irq(unsigned int irq) @@ -200,47 +182,82 @@ static void xen_teardown_msi_irq(unsigned int irq) xen_destroy_irq(irq); } +#ifdef CONFIG_XEN_DOM0 static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - int irq, ret; + int ret = 0; struct msi_desc *msidesc; list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = xen_create_msi_irq(dev, msidesc, type); - if (irq < 0) - return -1; + struct physdev_map_pirq map_irq; - ret = set_irq_msi(irq, msidesc); - if (ret) - goto error; - } - return 0; + memset(&map_irq, 0, sizeof(map_irq)); + map_irq.domid = DOMID_SELF; + map_irq.type = MAP_PIRQ_TYPE_MSI; + map_irq.index = -1; + map_irq.pirq = -1; + map_irq.bus = dev->bus->number; + map_irq.devfn = dev->devfn; -error: - xen_destroy_irq(irq); + if (type == PCI_CAP_ID_MSIX) { + int pos; + u32 table_offset, bir; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + + pci_read_config_dword(dev, pos + PCI_MSIX_TABLE, + &table_offset); + bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK); + + map_irq.table_base = pci_resource_start(dev, bir); + map_irq.entry_nr = msidesc->msi_attrib.entry_nr; + } + + ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + if (ret) { + dev_warn(&dev->dev, "xen map irq failed %d\n", ret); + goto out; + } + + ret = xen_bind_pirq_msi_to_irq(dev, msidesc, + map_irq.pirq, map_irq.index, + (type == PCI_CAP_ID_MSIX) ? + "msi-x" : "msi"); + if (ret < 0) + goto out; + } + ret = 0; +out: return ret; } #endif +#endif static int xen_pcifront_enable_irq(struct pci_dev *dev) { int rc; int share = 1; + u8 gsi; - dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq); - - if (dev->irq < 0) - return -EINVAL; + rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); + if (rc < 0) { + dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n", + rc); + return rc; + } - if (dev->irq < NR_IRQS_LEGACY) + if (gsi < NR_IRQS_LEGACY) share = 0; - rc = xen_allocate_pirq(dev->irq, share, "pcifront"); + rc = xen_allocate_pirq(gsi, share, "pcifront"); if (rc < 0) { - dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n", - dev->irq, rc); + dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n", + gsi, rc); return rc; } + + dev->irq = rc; + dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq); return 0; } diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index d2c0d51a7178..cd6f184c3b3f 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -15,6 +15,7 @@ #include <linux/serial_reg.h> #include <linux/serial_8250.h> +#include <asm/ce4100.h> #include <asm/setup.h> #include <asm/io.h> @@ -129,4 +130,5 @@ void __init x86_ce4100_early_setup(void) x86_init.resources.probe_roms = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; x86_init.mpparse.find_smp_config = sdv_find_smp_config; + x86_init.pci.init = ce4100_pci_init; } diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 5b54892e4bc3..e4343fe488ed 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -48,3 +48,11 @@ config XEN_DEBUG_FS help Enable statistics output and various tuning options in debugfs. Enabling this option may incur a significant performance overhead. + +config XEN_DEBUG + bool "Enable Xen debug checks" + depends on XEN + default n + help + Enable various WARN_ON checks in the Xen MMU code. + Enabling this option WILL incur a significant performance overhead. diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 50542efe45fb..49dbd78ec3cb 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1284,15 +1284,14 @@ static int init_hvm_pv_info(int *major, int *minor) xen_setup_features(); - pv_info = xen_info; - pv_info.kernel_rpl = 0; + pv_info.name = "Xen HVM"; xen_domain_type = XEN_HVM_DOMAIN; return 0; } -void xen_hvm_init_shared_info(void) +void __ref xen_hvm_init_shared_info(void) { int cpu; struct xen_add_to_physmap xatp; @@ -1331,6 +1330,8 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; + if (xen_have_vector_callback) + xen_init_lock_cpu(cpu); break; default: break; @@ -1355,6 +1356,7 @@ static void __init xen_hvm_guest_init(void) if (xen_feature(XENFEAT_hvm_callback_vector)) xen_have_vector_callback = 1; + xen_hvm_smp_init(); register_cpu_notifier(&xen_hvm_cpu_notifier); xen_unplug_emulated_devices(); have_vcpu_info_placement = 0; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5e92b61ad574..832765c0fb8c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -46,6 +46,7 @@ #include <linux/module.h> #include <linux/gfp.h> #include <linux/memblock.h> +#include <linux/seq_file.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> @@ -416,8 +417,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) if (val & _PAGE_PRESENT) { unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; pteval_t flags = val & PTE_FLAGS_MASK; - unsigned long mfn = pfn_to_mfn(pfn); + unsigned long mfn; + if (!xen_feature(XENFEAT_auto_translated_physmap)) + mfn = get_phys_to_machine(pfn); + else + mfn = pfn; /* * If there's no mfn for the pfn, then just create an * empty non-present pte. Unfortunately this loses @@ -427,8 +432,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) if (unlikely(mfn == INVALID_P2M_ENTRY)) { mfn = 0; flags = 0; + } else { + /* + * Paramount to do this test _after_ the + * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY & + * IDENTITY_FRAME_BIT resolves to true. + */ + mfn &= ~FOREIGN_FRAME_BIT; + if (mfn & IDENTITY_FRAME_BIT) { + mfn &= ~IDENTITY_FRAME_BIT; + flags |= _PAGE_IOMAP; + } } - val = ((pteval_t)mfn << PAGE_SHIFT) | flags; } @@ -532,6 +547,41 @@ pte_t xen_make_pte(pteval_t pte) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); +#ifdef CONFIG_XEN_DEBUG +pte_t xen_make_pte_debug(pteval_t pte) +{ + phys_addr_t addr = (pte & PTE_PFN_MASK); + phys_addr_t other_addr; + bool io_page = false; + pte_t _pte; + + if (pte & _PAGE_IOMAP) + io_page = true; + + _pte = xen_make_pte(pte); + + if (!addr) + return _pte; + + if (io_page && + (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { + other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT; + WARN(addr != other_addr, + "0x%lx is using VM_IO, but it is 0x%lx!\n", + (unsigned long)addr, (unsigned long)other_addr); + } else { + pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP; + other_addr = (_pte.pte & PTE_PFN_MASK); + WARN((addr == other_addr) && (!io_page) && (!iomap_set), + "0x%lx is missing VM_IO (and wasn't fixed)!\n", + (unsigned long)addr); + } + + return _pte; +} +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug); +#endif + pgd_t xen_make_pgd(pgdval_t pgd) { pgd = pte_pfn_to_mfn(pgd); @@ -986,10 +1036,9 @@ static void xen_pgd_pin(struct mm_struct *mm) */ void xen_mm_pin_all(void) { - unsigned long flags; struct page *page; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { if (!PagePinned(page)) { @@ -998,7 +1047,7 @@ void xen_mm_pin_all(void) } } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } /* @@ -1099,10 +1148,9 @@ static void xen_pgd_unpin(struct mm_struct *mm) */ void xen_mm_unpin_all(void) { - unsigned long flags; struct page *page; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { if (PageSavePinned(page)) { @@ -1112,7 +1160,7 @@ void xen_mm_unpin_all(void) } } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) @@ -1942,6 +1990,9 @@ __init void xen_ident_map_ISA(void) static __init void xen_post_allocator_init(void) { +#ifdef CONFIG_XEN_DEBUG + pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); +#endif pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pud = xen_set_pud; @@ -2074,7 +2125,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order, in_frames[i] = virt_to_mfn(vaddr); MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); - set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); + __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); if (out_frames) out_frames[i] = virt_to_pfn(vaddr); @@ -2353,6 +2404,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); #ifdef CONFIG_XEN_DEBUG_FS +static int p2m_dump_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, p2m_dump_show, NULL); +} + +static const struct file_operations p2m_dump_fops = { + .open = p2m_dump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static struct dentry *d_mmu_debug; static int __init xen_mmu_debugfs(void) @@ -2408,6 +2471,7 @@ static int __init xen_mmu_debugfs(void) debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, &mmu_stats.prot_commit_batched); + debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops); return 0; } fs_initcall(xen_mmu_debugfs); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index fd12d7ce7ff9..215a3ce61068 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -23,6 +23,129 @@ * P2M_PER_PAGE depends on the architecture, as a mfn is always * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to * 512 and 1024 entries respectively. + * + * In short, these structures contain the Machine Frame Number (MFN) of the PFN. + * + * However not all entries are filled with MFNs. Specifically for all other + * leaf entries, or for the top root, or middle one, for which there is a void + * entry, we assume it is "missing". So (for example) + * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. + * + * We also have the possibility of setting 1-1 mappings on certain regions, so + * that: + * pfn_to_mfn(0xc0000)=0xc0000 + * + * The benefit of this is, that we can assume for non-RAM regions (think + * PCI BARs, or ACPI spaces), we can create mappings easily b/c we + * get the PFN value to match the MFN. + * + * For this to work efficiently we have one new page p2m_identity and + * allocate (via reserved_brk) any other pages we need to cover the sides + * (1GB or 4MB boundary violations). All entries in p2m_identity are set to + * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs, + * no other fancy value). + * + * On lookup we spot that the entry points to p2m_identity and return the + * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. + * If the entry points to an allocated page, we just proceed as before and + * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in + * appropriate functions (pfn_to_mfn). + * + * The reason for having the IDENTITY_FRAME_BIT instead of just returning the + * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a + * non-identity pfn. To protect ourselves against we elect to set (and get) the + * IDENTITY_FRAME_BIT on all identity mapped PFNs. + * + * This simplistic diagram is used to explain the more subtle piece of code. + * There is also a digram of the P2M at the end that can help. + * Imagine your E820 looking as so: + * + * 1GB 2GB + * /-------------------+---------\/----\ /----------\ /---+-----\ + * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM | + * \-------------------+---------/\----/ \----------/ \---+-----/ + * ^- 1029MB ^- 2001MB + * + * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100), + * 2048MB = 524288 (0x80000)] + * + * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB + * is actually not present (would have to kick the balloon driver to put it in). + * + * When we are told to set the PFNs for identity mapping (see patch: "xen/setup: + * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start + * of the PFN and the end PFN (263424 and 512256 respectively). The first step + * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page + * covers 512^2 of page estate (1GB) and in case the start or end PFN is not + * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn + * to end pfn. We reserve_brk top leaf pages if they are missing (means they + * point to p2m_mid_missing). + * + * With the E820 example above, 263424 is not 1GB aligned so we allocate a + * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000. + * Each entry in the allocate page is "missing" (points to p2m_missing). + * + * Next stage is to determine if we need to do a more granular boundary check + * on the 4MB (or 2MB depending on architecture) off the start and end pfn's. + * We check if the start pfn and end pfn violate that boundary check, and if + * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer + * granularity of setting which PFNs are missing and which ones are identity. + * In our example 263424 and 512256 both fail the check so we reserve_brk two + * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" + * values) and assign them to p2m[1][2] and p2m[1][488] respectively. + * + * At this point we would at minimum reserve_brk one page, but could be up to + * three. Each call to set_phys_range_identity has at maximum a three page + * cost. If we were to query the P2M at this stage, all those entries from + * start PFN through end PFN (so 1029MB -> 2001MB) would return + * INVALID_P2M_ENTRY ("missing"). + * + * The next step is to walk from the start pfn to the end pfn setting + * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity. + * If we find that the middle leaf is pointing to p2m_missing we can swap it + * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this + * point we do not need to worry about boundary aligment (so no need to + * reserve_brk a middle page, figure out which PFNs are "missing" and which + * ones are identity), as that has been done earlier. If we find that the + * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference + * that page (which covers 512 PFNs) and set the appropriate PFN with + * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we + * set from p2m[1][2][256->511] and p2m[1][488][0->256] with + * IDENTITY_FRAME_BIT set. + * + * All other regions that are void (or not filled) either point to p2m_missing + * (considered missing) or have the default value of INVALID_P2M_ENTRY (also + * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511] + * contain the INVALID_P2M_ENTRY value and are considered "missing." + * + * This is what the p2m ends up looking (for the E820 above) with this + * fabulous drawing: + * + * p2m /--------------\ + * /-----\ | &mfn_list[0],| /-----------------\ + * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. | + * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] | + * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] | + * |-----| \ | [p2m_identity]+\\ | .... | + * | 2 |--\ \-------------------->| ... | \\ \----------------/ + * |-----| \ \---------------/ \\ + * | 3 |\ \ \\ p2m_identity + * |-----| \ \-------------------->/---------------\ /-----------------\ + * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... | + * \-----/ / | [p2m_identity]+-->| ..., ~0 | + * / /---------------\ | .... | \-----------------/ + * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. | + * / | IDENTITY[@256]|<----/ \---------------/ + * / | ~0, ~0, .... | + * | \---------------/ + * | + * p2m_missing p2m_missing + * /------------------\ /------------\ + * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | + * | [p2m_mid_missing]+---->| ..., ~0 | + * \------------------/ \------------/ + * + * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) */ #include <linux/init.h> @@ -30,6 +153,7 @@ #include <linux/list.h> #include <linux/hash.h> #include <linux/sched.h> +#include <linux/seq_file.h> #include <asm/cache.h> #include <asm/setup.h> @@ -59,9 +183,15 @@ static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); +static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); + RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); +/* We might hit two boundary violations at the start and end, at max each + * boundary violation will require three middle nodes. */ +RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); + static inline unsigned p2m_top_index(unsigned long pfn) { BUG_ON(pfn >= MAX_P2M_PFN); @@ -136,7 +266,7 @@ static void p2m_init(unsigned long *p2m) * - After resume we're called from within stop_machine, but the mfn * tree should alreay be completely allocated. */ -void xen_build_mfn_list_list(void) +void __ref xen_build_mfn_list_list(void) { unsigned long pfn; @@ -221,6 +351,9 @@ void __init xen_build_dynamic_phys_to_machine(void) p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_top_init(p2m_top); + p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); + p2m_init(p2m_identity); + /* * The domain builder gives us a pre-constructed p2m array in * mfn_list for all the pages initially given to us, so we just @@ -266,6 +399,14 @@ unsigned long get_phys_to_machine(unsigned long pfn) mididx = p2m_mid_index(pfn); idx = p2m_index(pfn); + /* + * The INVALID_P2M_ENTRY is filled in both p2m_*identity + * and in p2m_*missing, so returning the INVALID_P2M_ENTRY + * would be wrong. + */ + if (p2m_top[topidx][mididx] == p2m_identity) + return IDENTITY_FRAME(pfn); + return p2m_top[topidx][mididx][idx]; } EXPORT_SYMBOL_GPL(get_phys_to_machine); @@ -335,9 +476,11 @@ static bool alloc_p2m(unsigned long pfn) p2m_top_mfn_p[topidx] = mid_mfn; } - if (p2m_top[topidx][mididx] == p2m_missing) { + if (p2m_top[topidx][mididx] == p2m_identity || + p2m_top[topidx][mididx] == p2m_missing) { /* p2m leaf page is missing */ unsigned long *p2m; + unsigned long *p2m_orig = p2m_top[topidx][mididx]; p2m = alloc_p2m_page(); if (!p2m) @@ -345,7 +488,7 @@ static bool alloc_p2m(unsigned long pfn) p2m_init(p2m); - if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) + if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig) free_p2m_page(p2m); else mid_mfn[mididx] = virt_to_mfn(p2m); @@ -354,11 +497,91 @@ static bool alloc_p2m(unsigned long pfn) return true; } +bool __early_alloc_p2m(unsigned long pfn) +{ + unsigned topidx, mididx, idx; + + topidx = p2m_top_index(pfn); + mididx = p2m_mid_index(pfn); + idx = p2m_index(pfn); + + /* Pfff.. No boundary cross-over, lets get out. */ + if (!idx) + return false; + + WARN(p2m_top[topidx][mididx] == p2m_identity, + "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n", + topidx, mididx); + + /* + * Could be done by xen_build_dynamic_phys_to_machine.. + */ + if (p2m_top[topidx][mididx] != p2m_missing) + return false; + + /* Boundary cross-over for the edges: */ + if (idx) { + unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); + + p2m_init(p2m); + + p2m_top[topidx][mididx] = p2m; + + } + return idx != 0; +} +unsigned long set_phys_range_identity(unsigned long pfn_s, + unsigned long pfn_e) +{ + unsigned long pfn; + + if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN)) + return 0; + + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) + return pfn_e - pfn_s; + + if (pfn_s > pfn_e) + return 0; + + for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1)); + pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); + pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) + { + unsigned topidx = p2m_top_index(pfn); + if (p2m_top[topidx] == p2m_mid_missing) { + unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); + + p2m_mid_init(mid); + + p2m_top[topidx] = mid; + } + } + + __early_alloc_p2m(pfn_s); + __early_alloc_p2m(pfn_e); + + for (pfn = pfn_s; pfn < pfn_e; pfn++) + if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) + break; + + if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s), + "Identity mapping failed. We are %ld short of 1-1 mappings!\n", + (pfn_e - pfn_s) - (pfn - pfn_s))) + printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn); + + return pfn - pfn_s; +} + /* Try to install p2m mapping; fail if intermediate bits missing */ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) { unsigned topidx, mididx, idx; + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { + BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + return true; + } if (unlikely(pfn >= MAX_P2M_PFN)) { BUG_ON(mfn != INVALID_P2M_ENTRY); return true; @@ -368,6 +591,21 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) mididx = p2m_mid_index(pfn); idx = p2m_index(pfn); + /* For sparse holes were the p2m leaf has real PFN along with + * PCI holes, stick in the PFN as the MFN value. + */ + if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) { + if (p2m_top[topidx][mididx] == p2m_identity) + return true; + + /* Swap over from MISSING to IDENTITY if needed. */ + if (p2m_top[topidx][mididx] == p2m_missing) { + WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing, + p2m_identity) != p2m_missing); + return true; + } + } + if (p2m_top[topidx][mididx] == p2m_missing) return mfn == INVALID_P2M_ENTRY; @@ -378,11 +616,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) { - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); - return true; - } - if (unlikely(!__set_phys_to_machine(pfn, mfn))) { if (!alloc_p2m(pfn)) return false; @@ -421,7 +654,7 @@ int m2p_add_override(unsigned long mfn, struct page *page) { unsigned long flags; unsigned long pfn; - unsigned long address; + unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; @@ -455,7 +688,7 @@ int m2p_remove_override(struct page *page) unsigned long flags; unsigned long mfn; unsigned long pfn; - unsigned long address; + unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; @@ -520,3 +753,80 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) return ret; } EXPORT_SYMBOL_GPL(m2p_find_override_pfn); + +#ifdef CONFIG_XEN_DEBUG_FS + +int p2m_dump_show(struct seq_file *m, void *v) +{ + static const char * const level_name[] = { "top", "middle", + "entry", "abnormal" }; + static const char * const type_name[] = { "identity", "missing", + "pfn", "abnormal"}; +#define TYPE_IDENTITY 0 +#define TYPE_MISSING 1 +#define TYPE_PFN 2 +#define TYPE_UNKNOWN 3 + unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; + unsigned int uninitialized_var(prev_level); + unsigned int uninitialized_var(prev_type); + + if (!p2m_top) + return 0; + + for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) { + unsigned topidx = p2m_top_index(pfn); + unsigned mididx = p2m_mid_index(pfn); + unsigned idx = p2m_index(pfn); + unsigned lvl, type; + + lvl = 4; + type = TYPE_UNKNOWN; + if (p2m_top[topidx] == p2m_mid_missing) { + lvl = 0; type = TYPE_MISSING; + } else if (p2m_top[topidx] == NULL) { + lvl = 0; type = TYPE_UNKNOWN; + } else if (p2m_top[topidx][mididx] == NULL) { + lvl = 1; type = TYPE_UNKNOWN; + } else if (p2m_top[topidx][mididx] == p2m_identity) { + lvl = 1; type = TYPE_IDENTITY; + } else if (p2m_top[topidx][mididx] == p2m_missing) { + lvl = 1; type = TYPE_MISSING; + } else if (p2m_top[topidx][mididx][idx] == 0) { + lvl = 2; type = TYPE_UNKNOWN; + } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) { + lvl = 2; type = TYPE_IDENTITY; + } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) { + lvl = 2; type = TYPE_MISSING; + } else if (p2m_top[topidx][mididx][idx] == pfn) { + lvl = 2; type = TYPE_PFN; + } else if (p2m_top[topidx][mididx][idx] != pfn) { + lvl = 2; type = TYPE_PFN; + } + if (pfn == 0) { + prev_level = lvl; + prev_type = type; + } + if (pfn == MAX_DOMAIN_PAGES-1) { + lvl = 3; + type = TYPE_UNKNOWN; + } + if (prev_type != type) { + seq_printf(m, " [0x%lx->0x%lx] %s\n", + prev_pfn_type, pfn, type_name[prev_type]); + prev_pfn_type = pfn; + prev_type = type; + } + if (prev_level != lvl) { + seq_printf(m, " [0x%lx->0x%lx] level %s\n", + prev_pfn_level, pfn, level_name[prev_level]); + prev_pfn_level = pfn; + prev_level = lvl; + } + } + return 0; +#undef TYPE_IDENTITY +#undef TYPE_MISSING +#undef TYPE_PFN +#undef TYPE_UNKNOWN +} +#endif diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a8a66a50d446..fa0269a99377 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -52,6 +52,8 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size; static __init void xen_add_extra_mem(unsigned long pages) { + unsigned long pfn; + u64 size = (u64)pages * PAGE_SIZE; u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; @@ -66,6 +68,9 @@ static __init void xen_add_extra_mem(unsigned long pages) xen_extra_mem_size += size; xen_max_p2m_pfn = PFN_DOWN(extra_start + size); + + for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++) + __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } static unsigned long __init xen_release_chunk(phys_addr_t start_addr, @@ -104,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr, WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", start, end, ret); if (ret == 1) { - set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); len++; } } @@ -138,12 +143,55 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, return released; } +static unsigned long __init xen_set_identity(const struct e820entry *list, + ssize_t map_size) +{ + phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS; + phys_addr_t start_pci = last; + const struct e820entry *entry; + unsigned long identity = 0; + int i; + + for (i = 0, entry = list; i < map_size; i++, entry++) { + phys_addr_t start = entry->addr; + phys_addr_t end = start + entry->size; + + if (start < last) + start = last; + + if (end <= start) + continue; + + /* Skip over the 1MB region. */ + if (last > end) + continue; + + if (entry->type == E820_RAM) { + if (start > start_pci) + identity += set_phys_range_identity( + PFN_UP(start_pci), PFN_DOWN(start)); + + /* Without saving 'last' we would gooble RAM too + * at the end of the loop. */ + last = end; + start_pci = end; + continue; + } + start_pci = min(start, start_pci); + last = end; + } + if (last > start_pci) + identity += set_phys_range_identity( + PFN_UP(start_pci), PFN_DOWN(last)); + return identity; +} /** * machine_specific_memory_setup - Hook for machine specific memory setup. **/ char * __init xen_memory_setup(void) { static struct e820entry map[E820MAX] __initdata; + static struct e820entry map_raw[E820MAX] __initdata; unsigned long max_pfn = xen_start_info->nr_pages; unsigned long long mem_end; @@ -151,6 +199,7 @@ char * __init xen_memory_setup(void) struct xen_memory_map memmap; unsigned long extra_pages = 0; unsigned long extra_limit; + unsigned long identity_pages = 0; int i; int op; @@ -176,6 +225,7 @@ char * __init xen_memory_setup(void) } BUG_ON(rc); + memcpy(map_raw, map, sizeof(map)); e820.nr_map = 0; xen_extra_mem_start = mem_end; for (i = 0; i < memmap.nr_entries; i++) { @@ -194,6 +244,15 @@ char * __init xen_memory_setup(void) end -= delta; extra_pages += PFN_DOWN(delta); + /* + * Set RAM below 4GB that is not for us to be unusable. + * This prevents "System RAM" address space from being + * used as potential resource for I/O address (happens + * when 'allocate_resource' is called). + */ + if (delta && + (xen_initial_domain() && end < 0x100000000ULL)) + e820_add_region(end, delta, E820_UNUSABLE); } if (map[i].size > 0 && end > xen_extra_mem_start) @@ -251,6 +310,13 @@ char * __init xen_memory_setup(void) xen_add_extra_mem(extra_pages); + /* + * Set P2M for all non-RAM pages and E820 gaps to be identity + * type PFNs. We supply it with the non-sanitized version + * of the E820. + */ + identity_pages = xen_set_identity(map_raw, memmap.nr_entries); + printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages); return "Xen"; } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 72a4c7959045..30612441ed99 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -509,3 +509,41 @@ void __init xen_smp_init(void) xen_fill_possible_map(); xen_init_spinlocks(); } + +static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) +{ + native_smp_prepare_cpus(max_cpus); + WARN_ON(xen_smp_intr_init(0)); + + if (!xen_have_vector_callback) + return; + xen_init_lock_cpu(0); + xen_init_spinlocks(); +} + +static int __cpuinit xen_hvm_cpu_up(unsigned int cpu) +{ + int rc; + rc = native_cpu_up(cpu); + WARN_ON (xen_smp_intr_init(cpu)); + return rc; +} + +static void xen_hvm_cpu_die(unsigned int cpu) +{ + unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); + native_cpu_die(cpu); +} + +void __init xen_hvm_smp_init(void) +{ + smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; + smp_ops.smp_send_reschedule = xen_smp_send_reschedule; + smp_ops.cpu_up = xen_hvm_cpu_up; + smp_ops.cpu_die = xen_hvm_cpu_die; + smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; + smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; +} diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 9bbd63a129b5..45329c8c226e 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -12,7 +12,7 @@ #include "xen-ops.h" #include "mmu.h" -void xen_pre_suspend(void) +void xen_arch_pre_suspend(void) { xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); xen_start_info->console.domU.mfn = @@ -26,8 +26,9 @@ void xen_pre_suspend(void) BUG(); } -void xen_hvm_post_suspend(int suspend_cancelled) +void xen_arch_hvm_post_suspend(int suspend_cancelled) { +#ifdef CONFIG_XEN_PVHVM int cpu; xen_hvm_init_shared_info(); xen_callback_vector(); @@ -37,9 +38,10 @@ void xen_hvm_post_suspend(int suspend_cancelled) xen_setup_runstate_info(cpu); } } +#endif } -void xen_post_suspend(int suspend_cancelled) +void xen_arch_post_suspend(int suspend_cancelled) { xen_build_mfn_list_list(); diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 067759e3d6a5..2e2d370a47b1 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -397,7 +397,9 @@ void xen_setup_timer(int cpu) name = "<timer kasprintf failed>"; irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER, + IRQF_DISABLED|IRQF_PERCPU| + IRQF_NOBALANCING|IRQF_TIMER| + IRQF_FORCE_RESUME, name, NULL); evt = &per_cpu(xen_clock_events, cpu); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9d41bf985757..3112f55638c4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -64,10 +64,12 @@ void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP void xen_smp_init(void); +void __init xen_hvm_smp_init(void); extern cpumask_var_t xen_cpu_initialized_map; #else static inline void xen_smp_init(void) {} +static inline void xen_hvm_smp_init(void) {} #endif #ifdef CONFIG_PARAVIRT_SPINLOCKS diff --git a/block/blk-lib.c b/block/blk-lib.c index eec78becb355..bd3e8df4d5e2 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -109,7 +109,6 @@ struct bio_batch atomic_t done; unsigned long flags; struct completion *wait; - bio_end_io_t *end_io; }; static void bio_batch_end_io(struct bio *bio, int err) @@ -122,12 +121,9 @@ static void bio_batch_end_io(struct bio *bio, int err) else clear_bit(BIO_UPTODATE, &bb->flags); } - if (bb) { - if (bb->end_io) - bb->end_io(bio, err); - atomic_inc(&bb->done); - complete(bb->wait); - } + if (bb) + if (atomic_dec_and_test(&bb->done)) + complete(bb->wait); bio_put(bio); } @@ -150,13 +146,12 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, int ret; struct bio *bio; struct bio_batch bb; - unsigned int sz, issued = 0; + unsigned int sz; DECLARE_COMPLETION_ONSTACK(wait); - atomic_set(&bb.done, 0); + atomic_set(&bb.done, 1); bb.flags = 1 << BIO_UPTODATE; bb.wait = &wait; - bb.end_io = NULL; submit: ret = 0; @@ -185,12 +180,12 @@ submit: break; } ret = 0; - issued++; + atomic_inc(&bb.done); submit_bio(WRITE, bio); } /* Wait for bios in-flight */ - while (issued != atomic_read(&bb.done)) + if (!atomic_dec_and_test(&bb.done)) wait_for_completion(&wait); if (!test_bit(BIO_UPTODATE, &bb.flags)) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index d7aa39e349a6..9cb8668ff5f4 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -120,6 +120,10 @@ static DEFINE_SPINLOCK(minor_lock); #define EXTENDED (1<<EXT_SHIFT) #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) +#define EMULATED_HD_DISK_MINOR_OFFSET (0) +#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256) +#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16)) +#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4) #define DEV_NAME "xvd" /* name in /dev */ @@ -281,7 +285,7 @@ static int blkif_queue_request(struct request *req) info->shadow[id].request = req; ring_req->id = id; - ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req); + ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); ring_req->handle = info->handle; ring_req->operation = rq_data_dir(req) ? @@ -317,7 +321,7 @@ static int blkif_queue_request(struct request *req) rq_data_dir(req) ); info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); - ring_req->seg[i] = + ring_req->u.rw.seg[i] = (struct blkif_request_segment) { .gref = ref, .first_sect = fsect, @@ -434,6 +438,65 @@ static void xlvbd_flush(struct blkfront_info *info) info->feature_flush ? "enabled" : "disabled"); } +static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) +{ + int major; + major = BLKIF_MAJOR(vdevice); + *minor = BLKIF_MINOR(vdevice); + switch (major) { + case XEN_IDE0_MAJOR: + *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET; + *minor = ((*minor / 64) * PARTS_PER_DISK) + + EMULATED_HD_DISK_MINOR_OFFSET; + break; + case XEN_IDE1_MAJOR: + *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET; + *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) + + EMULATED_HD_DISK_MINOR_OFFSET; + break; + case XEN_SCSI_DISK0_MAJOR: + *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET; + *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET; + break; + case XEN_SCSI_DISK1_MAJOR: + case XEN_SCSI_DISK2_MAJOR: + case XEN_SCSI_DISK3_MAJOR: + case XEN_SCSI_DISK4_MAJOR: + case XEN_SCSI_DISK5_MAJOR: + case XEN_SCSI_DISK6_MAJOR: + case XEN_SCSI_DISK7_MAJOR: + *offset = (*minor / PARTS_PER_DISK) + + ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) + + EMULATED_SD_DISK_NAME_OFFSET; + *minor = *minor + + ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) + + EMULATED_SD_DISK_MINOR_OFFSET; + break; + case XEN_SCSI_DISK8_MAJOR: + case XEN_SCSI_DISK9_MAJOR: + case XEN_SCSI_DISK10_MAJOR: + case XEN_SCSI_DISK11_MAJOR: + case XEN_SCSI_DISK12_MAJOR: + case XEN_SCSI_DISK13_MAJOR: + case XEN_SCSI_DISK14_MAJOR: + case XEN_SCSI_DISK15_MAJOR: + *offset = (*minor / PARTS_PER_DISK) + + ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) + + EMULATED_SD_DISK_NAME_OFFSET; + *minor = *minor + + ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) + + EMULATED_SD_DISK_MINOR_OFFSET; + break; + case XENVBD_MAJOR: + *offset = *minor / PARTS_PER_DISK; + break; + default: + printk(KERN_WARNING "blkfront: your disk configuration is " + "incorrect, please use an xvd device instead\n"); + return -ENODEV; + } + return 0; +} static int xlvbd_alloc_gendisk(blkif_sector_t capacity, struct blkfront_info *info, @@ -441,7 +504,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, { struct gendisk *gd; int nr_minors = 1; - int err = -ENODEV; + int err; unsigned int offset; int minor; int nr_parts; @@ -456,12 +519,20 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, } if (!VDEV_IS_EXTENDED(info->vdevice)) { - minor = BLKIF_MINOR(info->vdevice); - nr_parts = PARTS_PER_DISK; + err = xen_translate_vdev(info->vdevice, &minor, &offset); + if (err) + return err; + nr_parts = PARTS_PER_DISK; } else { minor = BLKIF_MINOR_EXT(info->vdevice); nr_parts = PARTS_PER_EXT_DISK; + offset = minor / nr_parts; + if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4) + printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with " + "emulated IDE disks,\n\t choose an xvd device name" + "from xvde on\n", info->vdevice); } + err = -ENODEV; if ((minor % nr_parts) == 0) nr_minors = nr_parts; @@ -475,8 +546,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, if (gd == NULL) goto release; - offset = minor / nr_parts; - if (nr_minors > 1) { if (offset < 26) sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); @@ -615,7 +684,7 @@ static void blkif_completion(struct blk_shadow *s) { int i; for (i = 0; i < s->req.nr_segments; i++) - gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL); + gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); } static irqreturn_t blkif_interrupt(int irq, void *dev_id) @@ -932,7 +1001,7 @@ static int blkif_recover(struct blkfront_info *info) /* Rewrite any grant references invalidated by susp/resume. */ for (j = 0; j < req->nr_segments; j++) gnttab_grant_foreign_access_ref( - req->seg[j].gref, + req->u.rw.seg[j].gref, info->xbdev->otherend_id, pfn_to_mfn(info->shadow[req->id].frame[j]), rq_data_dir(info->shadow[req->id].request)); diff --git a/drivers/gpio/ml_ioh_gpio.c b/drivers/gpio/ml_ioh_gpio.c index cead8e6ff345..7f6f01a4b145 100644 --- a/drivers/gpio/ml_ioh_gpio.c +++ b/drivers/gpio/ml_ioh_gpio.c @@ -326,6 +326,7 @@ static DEFINE_PCI_DEVICE_TABLE(ioh_gpio_pcidev_id) = { { PCI_DEVICE(PCI_VENDOR_ID_ROHM, 0x802E) }, { 0, } }; +MODULE_DEVICE_TABLE(pci, ioh_gpio_pcidev_id); static struct pci_driver ioh_gpio_driver = { .name = "ml_ioh_gpio", diff --git a/drivers/gpio/pch_gpio.c b/drivers/gpio/pch_gpio.c index 0eba0a75c804..2c6af8705103 100644 --- a/drivers/gpio/pch_gpio.c +++ b/drivers/gpio/pch_gpio.c @@ -286,6 +286,7 @@ static DEFINE_PCI_DEVICE_TABLE(pch_gpio_pcidev_id) = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8803) }, { 0, } }; +MODULE_DEVICE_TABLE(pci, pch_gpio_pcidev_id); static struct pci_driver pch_gpio_driver = { .name = "pch_gpio", diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index d270b3ff896b..6140ea1de45a 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2194,7 +2194,6 @@ int evergreen_mc_init(struct radeon_device *rdev) rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; } rdev->mc.visible_vram_size = rdev->mc.aper_size; - rdev->mc.active_vram_size = rdev->mc.visible_vram_size; r700_vram_gtt_location(rdev, &rdev->mc); radeon_update_bandwidth_info(rdev); @@ -2934,7 +2933,7 @@ static int evergreen_startup(struct radeon_device *rdev) /* XXX: ontario has problems blitting to gart at the moment */ if (rdev->family == CHIP_PALM) { rdev->asic->copy = NULL; - rdev->mc.active_vram_size = rdev->mc.visible_vram_size; + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); } /* allocate wb buffer */ diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c index 2adfb03f479b..2be698e78ff2 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c @@ -623,7 +623,7 @@ done: dev_err(rdev->dev, "(%d) pin blit object failed\n", r); return r; } - rdev->mc.active_vram_size = rdev->mc.real_vram_size; + radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); return 0; } @@ -631,7 +631,7 @@ void evergreen_blit_fini(struct radeon_device *rdev) { int r; - rdev->mc.active_vram_size = rdev->mc.visible_vram_size; + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); if (rdev->r600_blit.shader_obj == NULL) return; /* If we can't reserve the bo, unref should be enough to destroy diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 93fa735c8c1a..e372f9e1e5ce 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -70,23 +70,6 @@ MODULE_FIRMWARE(FIRMWARE_R520); void r100_pre_page_flip(struct radeon_device *rdev, int crtc) { - struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc]; - u32 tmp; - - /* make sure flip is at vb rather than hb */ - tmp = RREG32(RADEON_CRTC_OFFSET_CNTL + radeon_crtc->crtc_offset); - tmp &= ~RADEON_CRTC_OFFSET_FLIP_CNTL; - /* make sure pending bit is asserted */ - tmp |= RADEON_CRTC_GUI_TRIG_OFFSET_LEFT_EN; - WREG32(RADEON_CRTC_OFFSET_CNTL + radeon_crtc->crtc_offset, tmp); - - /* set pageflip to happen as late as possible in the vblank interval. - * same field for crtc1/2 - */ - tmp = RREG32(RADEON_CRTC_GEN_CNTL); - tmp &= ~RADEON_CRTC_VSTAT_MODE_MASK; - WREG32(RADEON_CRTC_GEN_CNTL, tmp); - /* enable the pflip int */ radeon_irq_kms_pflip_irq_get(rdev, crtc); } @@ -1041,7 +1024,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) return r; } rdev->cp.ready = true; - rdev->mc.active_vram_size = rdev->mc.real_vram_size; + radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); return 0; } @@ -1059,7 +1042,7 @@ void r100_cp_fini(struct radeon_device *rdev) void r100_cp_disable(struct radeon_device *rdev) { /* Disable ring */ - rdev->mc.active_vram_size = rdev->mc.visible_vram_size; + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); rdev->cp.ready = false; WREG32(RADEON_CP_CSQ_MODE, 0); WREG32(RADEON_CP_CSQ_CNTL, 0); @@ -2329,7 +2312,6 @@ void r100_vram_init_sizes(struct radeon_device *rdev) /* FIXME we don't use the second aperture yet when we could use it */ if (rdev->mc.visible_vram_size > rdev->mc.aper_size) rdev->mc.visible_vram_size = rdev->mc.aper_size; - rdev->mc.active_vram_size = rdev->mc.visible_vram_size; config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE); if (rdev->flags & RADEON_IS_IGP) { uint32_t tom; diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index de88624d5f87..9b3fad23b76c 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1255,7 +1255,6 @@ int r600_mc_init(struct radeon_device *rdev) rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); rdev->mc.visible_vram_size = rdev->mc.aper_size; - rdev->mc.active_vram_size = rdev->mc.visible_vram_size; r600_vram_gtt_location(rdev, &rdev->mc); if (rdev->flags & RADEON_IS_IGP) { @@ -1937,7 +1936,7 @@ void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) */ void r600_cp_stop(struct radeon_device *rdev) { - rdev->mc.active_vram_size = rdev->mc.visible_vram_size; + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); WREG32(SCRATCH_UMSK, 0); } diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index 41f7aafc97c4..df68d91e8190 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -558,7 +558,7 @@ done: dev_err(rdev->dev, "(%d) pin blit object failed\n", r); return r; } - rdev->mc.active_vram_size = rdev->mc.real_vram_size; + radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); return 0; } @@ -566,7 +566,7 @@ void r600_blit_fini(struct radeon_device *rdev) { int r; - rdev->mc.active_vram_size = rdev->mc.visible_vram_size; + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); if (rdev->r600_blit.shader_obj == NULL) return; /* If we can't reserve the bo, unref should be enough to destroy diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 56c48b67ef3d..6b3429495118 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -345,7 +345,6 @@ struct radeon_mc { * about vram size near mc fb location */ u64 mc_vram_size; u64 visible_vram_size; - u64 active_vram_size; u64 gtt_size; u64 gtt_start; u64 gtt_end; @@ -1448,6 +1447,7 @@ extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *m extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); extern int radeon_resume_kms(struct drm_device *dev); extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state); +extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size); /* r600, rv610, rv630, rv620, rv635, rv670, rs780, rs880 */ extern bool r600_card_posted(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index e75d63b8e21d..793c5e6026ad 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -834,6 +834,9 @@ static struct radeon_asic sumo_asic = { .pm_finish = &evergreen_pm_finish, .pm_init_profile = &rs780_pm_init_profile, .pm_get_dynpm_state = &r600_pm_get_dynpm_state, + .pre_page_flip = &evergreen_pre_page_flip, + .page_flip = &evergreen_page_flip, + .post_page_flip = &evergreen_post_page_flip, }; static struct radeon_asic btc_asic = { diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index df95eb83dac6..1fe95dfe48c9 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -156,9 +156,12 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data, { struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_info *args = data; + struct ttm_mem_type_manager *man; + + man = &rdev->mman.bdev.man[TTM_PL_VRAM]; args->vram_size = rdev->mc.real_vram_size; - args->vram_visible = rdev->mc.real_vram_size; + args->vram_visible = (u64)man->size << PAGE_SHIFT; if (rdev->stollen_vga_memory) args->vram_visible -= radeon_bo_size(rdev->stollen_vga_memory); args->vram_visible -= radeon_fbdev_total_size(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c index cf0638c3b7c7..78968b738e88 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c @@ -443,7 +443,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, (target_fb->bits_per_pixel * 8)); crtc_pitch |= crtc_pitch << 16; - + crtc_offset_cntl |= RADEON_CRTC_GUI_TRIG_OFFSET_LEFT_EN; if (tiling_flags & RADEON_TILING_MACRO) { if (ASIC_IS_R300(rdev)) crtc_offset_cntl |= (R300_CRTC_X_Y_MODE_EN | @@ -502,6 +502,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, gen_cntl_val = RREG32(gen_cntl_reg); gen_cntl_val &= ~(0xf << 8); gen_cntl_val |= (format << 8); + gen_cntl_val &= ~RADEON_CRTC_VSTAT_MODE_MASK; WREG32(gen_cntl_reg, gen_cntl_val); crtc_offset = (u32)base; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index e5b2cf10cbf4..8389b4c63d12 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -589,6 +589,20 @@ void radeon_ttm_fini(struct radeon_device *rdev) DRM_INFO("radeon: ttm finalized\n"); } +/* this should only be called at bootup or when userspace + * isn't running */ +void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size) +{ + struct ttm_mem_type_manager *man; + + if (!rdev->mman.initialized) + return; + + man = &rdev->mman.bdev.man[TTM_PL_VRAM]; + /* this just adjusts TTM size idea, which sets lpfn to the correct value */ + man->size = size >> PAGE_SHIFT; +} + static struct vm_operations_struct radeon_ttm_vm_ops; static const struct vm_operations_struct *ttm_vm_ops = NULL; diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 5afe294ed51f..8af4679db23e 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -751,7 +751,6 @@ void rs600_mc_init(struct radeon_device *rdev) rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE); rdev->mc.mc_vram_size = rdev->mc.real_vram_size; rdev->mc.visible_vram_size = rdev->mc.aper_size; - rdev->mc.active_vram_size = rdev->mc.visible_vram_size; rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev); base = RREG32_MC(R_000004_MC_FB_LOCATION); base = G_000004_MC_FB_START(base) << 16; diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index 6638c8e4c81b..66c949b7c18c 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -157,7 +157,6 @@ void rs690_mc_init(struct radeon_device *rdev) rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); rdev->mc.visible_vram_size = rdev->mc.aper_size; - rdev->mc.active_vram_size = rdev->mc.visible_vram_size; base = RREG32_MC(R_000100_MCCFG_FB_LOCATION); base = G_000100_MC_FB_START(base) << 16; rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index d8ba67690656..714ad45757d0 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -307,7 +307,7 @@ static void rv770_mc_program(struct radeon_device *rdev) */ void r700_cp_stop(struct radeon_device *rdev) { - rdev->mc.active_vram_size = rdev->mc.visible_vram_size; + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); WREG32(SCRATCH_UMSK, 0); } @@ -1123,7 +1123,6 @@ int rv770_mc_init(struct radeon_device *rdev) rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); rdev->mc.visible_vram_size = rdev->mc.aper_size; - rdev->mc.active_vram_size = rdev->mc.visible_vram_size; r700_vram_gtt_location(rdev, &rdev->mc); radeon_update_bandwidth_info(rdev); diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c index 3f49dd376f02..6e06019015a5 100644 --- a/drivers/hwmon/f71882fg.c +++ b/drivers/hwmon/f71882fg.c @@ -37,7 +37,7 @@ #define SIO_F71858FG_LD_HWM 0x02 /* Hardware monitor logical device */ #define SIO_F71882FG_LD_HWM 0x04 /* Hardware monitor logical device */ #define SIO_UNLOCK_KEY 0x87 /* Key to enable Super-I/O */ -#define SIO_LOCK_KEY 0xAA /* Key to diasble Super-I/O */ +#define SIO_LOCK_KEY 0xAA /* Key to disable Super-I/O */ #define SIO_REG_LDSEL 0x07 /* Logical device select */ #define SIO_REG_DEVID 0x20 /* Device ID (2 bytes) */ @@ -2111,7 +2111,6 @@ static int f71882fg_remove(struct platform_device *pdev) int nr_fans = (data->type == f71882fg) ? 4 : 3; u8 start_reg = f71882fg_read8(data, F71882FG_REG_START); - platform_set_drvdata(pdev, NULL); if (data->hwmon_dev) hwmon_device_unregister(data->hwmon_dev); @@ -2178,6 +2177,7 @@ static int f71882fg_remove(struct platform_device *pdev) } } + platform_set_drvdata(pdev, NULL); kfree(data); return 0; diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c index a8c3e1c9b02a..4aaa88f8ab5f 100644 --- a/drivers/mtd/chips/cfi_cmdset_0001.c +++ b/drivers/mtd/chips/cfi_cmdset_0001.c @@ -1230,10 +1230,32 @@ static int inval_cache_and_wait_for_operation( sleep_time = chip_op_time / 2; for (;;) { + if (chip->state != chip_state) { + /* Someone's suspended the operation: sleep */ + DECLARE_WAITQUEUE(wait, current); + set_current_state(TASK_UNINTERRUPTIBLE); + add_wait_queue(&chip->wq, &wait); + mutex_unlock(&chip->mutex); + schedule(); + remove_wait_queue(&chip->wq, &wait); + mutex_lock(&chip->mutex); + continue; + } + status = map_read(map, cmd_adr); if (map_word_andequal(map, status, status_OK, status_OK)) break; + if (chip->erase_suspended && chip_state == FL_ERASING) { + /* Erase suspend occured while sleep: reset timeout */ + timeo = reset_timeo; + chip->erase_suspended = 0; + } + if (chip->write_suspended && chip_state == FL_WRITING) { + /* Write suspend occured while sleep: reset timeout */ + timeo = reset_timeo; + chip->write_suspended = 0; + } if (!timeo) { map_write(map, CMD(0x70), cmd_adr); chip->state = FL_STATUS; @@ -1257,27 +1279,6 @@ static int inval_cache_and_wait_for_operation( timeo--; } mutex_lock(&chip->mutex); - - while (chip->state != chip_state) { - /* Someone's suspended the operation: sleep */ - DECLARE_WAITQUEUE(wait, current); - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&chip->wq, &wait); - mutex_unlock(&chip->mutex); - schedule(); - remove_wait_queue(&chip->wq, &wait); - mutex_lock(&chip->mutex); - } - if (chip->erase_suspended && chip_state == FL_ERASING) { - /* Erase suspend occured while sleep: reset timeout */ - timeo = reset_timeo; - chip->erase_suspended = 0; - } - if (chip->write_suspended && chip_state == FL_WRITING) { - /* Write suspend occured while sleep: reset timeout */ - timeo = reset_timeo; - chip->write_suspended = 0; - } } /* Done and happy. */ diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c index d72a5fb2d041..4e1be51cc122 100644 --- a/drivers/mtd/chips/jedec_probe.c +++ b/drivers/mtd/chips/jedec_probe.c @@ -1935,14 +1935,14 @@ static void jedec_reset(u32 base, struct map_info *map, struct cfi_private *cfi) } -static int cfi_jedec_setup(struct cfi_private *p_cfi, int index) +static int cfi_jedec_setup(struct map_info *map, struct cfi_private *cfi, int index) { int i,num_erase_regions; uint8_t uaddr; - if (! (jedec_table[index].devtypes & p_cfi->device_type)) { + if (!(jedec_table[index].devtypes & cfi->device_type)) { DEBUG(MTD_DEBUG_LEVEL1, "Rejecting potential %s with incompatible %d-bit device type\n", - jedec_table[index].name, 4 * (1<<p_cfi->device_type)); + jedec_table[index].name, 4 * (1<<cfi->device_type)); return 0; } @@ -1950,27 +1950,28 @@ static int cfi_jedec_setup(struct cfi_private *p_cfi, int index) num_erase_regions = jedec_table[index].nr_regions; - p_cfi->cfiq = kmalloc(sizeof(struct cfi_ident) + num_erase_regions * 4, GFP_KERNEL); - if (!p_cfi->cfiq) { + cfi->cfiq = kmalloc(sizeof(struct cfi_ident) + num_erase_regions * 4, GFP_KERNEL); + if (!cfi->cfiq) { //xx printk(KERN_WARNING "%s: kmalloc failed for CFI ident structure\n", map->name); return 0; } - memset(p_cfi->cfiq,0,sizeof(struct cfi_ident)); + memset(cfi->cfiq, 0, sizeof(struct cfi_ident)); - p_cfi->cfiq->P_ID = jedec_table[index].cmd_set; - p_cfi->cfiq->NumEraseRegions = jedec_table[index].nr_regions; - p_cfi->cfiq->DevSize = jedec_table[index].dev_size; - p_cfi->cfi_mode = CFI_MODE_JEDEC; + cfi->cfiq->P_ID = jedec_table[index].cmd_set; + cfi->cfiq->NumEraseRegions = jedec_table[index].nr_regions; + cfi->cfiq->DevSize = jedec_table[index].dev_size; + cfi->cfi_mode = CFI_MODE_JEDEC; + cfi->sector_erase_cmd = CMD(0x30); for (i=0; i<num_erase_regions; i++){ - p_cfi->cfiq->EraseRegionInfo[i] = jedec_table[index].regions[i]; + cfi->cfiq->EraseRegionInfo[i] = jedec_table[index].regions[i]; } - p_cfi->cmdset_priv = NULL; + cfi->cmdset_priv = NULL; /* This may be redundant for some cases, but it doesn't hurt */ - p_cfi->mfr = jedec_table[index].mfr_id; - p_cfi->id = jedec_table[index].dev_id; + cfi->mfr = jedec_table[index].mfr_id; + cfi->id = jedec_table[index].dev_id; uaddr = jedec_table[index].uaddr; @@ -1978,8 +1979,8 @@ static int cfi_jedec_setup(struct cfi_private *p_cfi, int index) our brains explode when we see the datasheets talking about address lines numbered from A-1 to A18. The CFI table has unlock addresses in device-words according to the mode the device is connected in */ - p_cfi->addr_unlock1 = unlock_addrs[uaddr].addr1 / p_cfi->device_type; - p_cfi->addr_unlock2 = unlock_addrs[uaddr].addr2 / p_cfi->device_type; + cfi->addr_unlock1 = unlock_addrs[uaddr].addr1 / cfi->device_type; + cfi->addr_unlock2 = unlock_addrs[uaddr].addr2 / cfi->device_type; return 1; /* ok */ } @@ -2175,7 +2176,7 @@ static int jedec_probe_chip(struct map_info *map, __u32 base, "MTD %s(): matched device 0x%x,0x%x unlock_addrs: 0x%.4x 0x%.4x\n", __func__, cfi->mfr, cfi->id, cfi->addr_unlock1, cfi->addr_unlock2 ); - if (!cfi_jedec_setup(cfi, i)) + if (!cfi_jedec_setup(map, cfi, i)) return 0; goto ok_out; } diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c index 77d64ce19e9f..92de7e3a49a5 100644 --- a/drivers/mtd/maps/amd76xrom.c +++ b/drivers/mtd/maps/amd76xrom.c @@ -151,6 +151,7 @@ static int __devinit amd76xrom_init_one (struct pci_dev *pdev, printk(KERN_ERR MOD_NAME " %s(): Unable to register resource %pR - kernel bug?\n", __func__, &window->rsrc); + return -EBUSY; } diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index cb20c67995d8..e0a2373bf0e2 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -413,7 +413,6 @@ error3: error2: list_del(&new->list); error1: - kfree(new); return ret; } diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 15682ec8530e..28af71c61834 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -968,6 +968,6 @@ static void __exit omap_nand_exit(void) module_init(omap_nand_init); module_exit(omap_nand_exit); -MODULE_ALIAS(DRIVER_NAME); +MODULE_ALIAS("platform:" DRIVER_NAME); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Glue layer for NAND flash on TI OMAP boards"); diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c index e78914938c5c..ac08750748a3 100644 --- a/drivers/mtd/onenand/generic.c +++ b/drivers/mtd/onenand/generic.c @@ -131,7 +131,7 @@ static struct platform_driver generic_onenand_driver = { .remove = __devexit_p(generic_onenand_remove), }; -MODULE_ALIAS(DRIVER_NAME); +MODULE_ALIAS("platform:" DRIVER_NAME); static int __init generic_onenand_init(void) { diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index ac31f461cc1c..c849cacf4b2f 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -860,7 +860,7 @@ static void __exit omap2_onenand_exit(void) module_init(omap2_onenand_init); module_exit(omap2_onenand_exit); -MODULE_ALIAS(DRIVER_NAME); +MODULE_ALIAS("platform:" DRIVER_NAME); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>"); MODULE_DESCRIPTION("Glue layer for OneNAND flash on OMAP2 / OMAP3"); diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c index 39214e512452..7ca0eded2561 100644 --- a/drivers/net/ariadne.c +++ b/drivers/net/ariadne.c @@ -425,11 +425,6 @@ static irqreturn_t ariadne_interrupt(int irq, void *data) int csr0, boguscnt; int handled = 0; - if (dev == NULL) { - printk(KERN_WARNING "ariadne_interrupt(): irq for unknown device.\n"); - return IRQ_NONE; - } - lance->RAP = CSR0; /* PCnet-ISA Controller Status */ if (!(lance->RDP & INTR)) /* Check if any interrupt has been */ diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h index 7897d114b290..8849699c66c4 100644 --- a/drivers/net/bnx2x/bnx2x.h +++ b/drivers/net/bnx2x/bnx2x.h @@ -1211,6 +1211,7 @@ struct bnx2x { /* DCBX Negotation results */ struct dcbx_features dcbx_local_feat; u32 dcbx_error; + u32 pending_max; }; /** @@ -1616,8 +1617,8 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms, /* CMNG constants, as derived from system spec calculations */ /* default MIN rate in case VNIC min rate is configured to zero - 100Mbps */ #define DEF_MIN_RATE 100 -/* resolution of the rate shaping timer - 100 usec */ -#define RS_PERIODIC_TIMEOUT_USEC 100 +/* resolution of the rate shaping timer - 400 usec */ +#define RS_PERIODIC_TIMEOUT_USEC 400 /* number of bytes in single QM arbitration cycle - * coefficient for calculating the fairness timer */ #define QM_ARB_BYTES 160000 diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c index 93798129061b..a71b32940533 100644 --- a/drivers/net/bnx2x/bnx2x_cmn.c +++ b/drivers/net/bnx2x/bnx2x_cmn.c @@ -996,6 +996,23 @@ void bnx2x_free_skbs(struct bnx2x *bp) bnx2x_free_rx_skbs(bp); } +void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value) +{ + /* load old values */ + u32 mf_cfg = bp->mf_config[BP_VN(bp)]; + + if (value != bnx2x_extract_max_cfg(bp, mf_cfg)) { + /* leave all but MAX value */ + mf_cfg &= ~FUNC_MF_CFG_MAX_BW_MASK; + + /* set new MAX value */ + mf_cfg |= (value << FUNC_MF_CFG_MAX_BW_SHIFT) + & FUNC_MF_CFG_MAX_BW_MASK; + + bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, mf_cfg); + } +} + static void bnx2x_free_msix_irqs(struct bnx2x *bp) { int i, offset = 1; @@ -1464,6 +1481,11 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) bnx2x_set_eth_mac(bp, 1); + if (bp->pending_max) { + bnx2x_update_max_mf_config(bp, bp->pending_max); + bp->pending_max = 0; + } + if (bp->port.pmf) bnx2x_initial_phy_init(bp, load_mode); diff --git a/drivers/net/bnx2x/bnx2x_cmn.h b/drivers/net/bnx2x/bnx2x_cmn.h index 326ba44b3ded..85ea7f26b51f 100644 --- a/drivers/net/bnx2x/bnx2x_cmn.h +++ b/drivers/net/bnx2x/bnx2x_cmn.h @@ -341,6 +341,15 @@ void bnx2x_dcbx_init(struct bnx2x *bp); */ int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state); +/** + * Updates MAX part of MF configuration in HW + * (if required) + * + * @param bp + * @param value + */ +void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value); + /* dev_close main block */ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode); diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c index ef2919987a10..7e92f9d0dcfd 100644 --- a/drivers/net/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/bnx2x/bnx2x_ethtool.c @@ -238,7 +238,7 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) speed |= (cmd->speed_hi << 16); if (IS_MF_SI(bp)) { - u32 param = 0, part; + u32 part; u32 line_speed = bp->link_vars.line_speed; /* use 10G if no link detected */ @@ -251,24 +251,22 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) REQ_BC_VER_4_SET_MF_BW); return -EINVAL; } + part = (speed * 100) / line_speed; + if (line_speed < speed || !part) { BNX2X_DEV_INFO("Speed setting should be in a range " "from 1%% to 100%% " "of actual line speed\n"); return -EINVAL; } - /* load old values */ - param = bp->mf_config[BP_VN(bp)]; - /* leave only MIN value */ - param &= FUNC_MF_CFG_MIN_BW_MASK; - - /* set new MAX value */ - param |= (part << FUNC_MF_CFG_MAX_BW_SHIFT) - & FUNC_MF_CFG_MAX_BW_MASK; + if (bp->state != BNX2X_STATE_OPEN) + /* store value for following "load" */ + bp->pending_max = part; + else + bnx2x_update_max_mf_config(bp, part); - bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, param); return 0; } diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c index 032ae184b605..aa032339e321 100644 --- a/drivers/net/bnx2x/bnx2x_main.c +++ b/drivers/net/bnx2x/bnx2x_main.c @@ -2092,8 +2092,9 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type) bnx2x_calc_vn_weight_sum(bp); /* calculate and set min-max rate for each vn */ - for (vn = VN_0; vn < E1HVN_MAX; vn++) - bnx2x_init_vn_minmax(bp, vn); + if (bp->port.pmf) + for (vn = VN_0; vn < E1HVN_MAX; vn++) + bnx2x_init_vn_minmax(bp, vn); /* always enable rate shaping and fairness */ bp->cmng.flags.cmng_enables |= @@ -2162,13 +2163,6 @@ static void bnx2x_link_attn(struct bnx2x *bp) bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP); } - /* indicate link status only if link status actually changed */ - if (prev_link_status != bp->link_vars.link_status) - bnx2x_link_report(bp); - - if (IS_MF(bp)) - bnx2x_link_sync_notify(bp); - if (bp->link_vars.link_up && bp->link_vars.line_speed) { int cmng_fns = bnx2x_get_cmng_fns_mode(bp); @@ -2180,6 +2174,13 @@ static void bnx2x_link_attn(struct bnx2x *bp) DP(NETIF_MSG_IFUP, "single function mode without fairness\n"); } + + if (IS_MF(bp)) + bnx2x_link_sync_notify(bp); + + /* indicate link status only if link status actually changed */ + if (prev_link_status != bp->link_vars.link_status) + bnx2x_link_report(bp); } void bnx2x__link_status_update(struct bnx2x *bp) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 1024ae158227..a5d5d0b5b155 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -281,23 +281,23 @@ static inline int __check_agg_selection_timer(struct port *port) } /** - * __get_rx_machine_lock - lock the port's RX machine + * __get_state_machine_lock - lock the port's state machines * @port: the port we're looking at * */ -static inline void __get_rx_machine_lock(struct port *port) +static inline void __get_state_machine_lock(struct port *port) { - spin_lock_bh(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); + spin_lock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock)); } /** - * __release_rx_machine_lock - unlock the port's RX machine + * __release_state_machine_lock - unlock the port's state machines * @port: the port we're looking at * */ -static inline void __release_rx_machine_lock(struct port *port) +static inline void __release_state_machine_lock(struct port *port) { - spin_unlock_bh(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); + spin_unlock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock)); } /** @@ -388,14 +388,14 @@ static u8 __get_duplex(struct port *port) } /** - * __initialize_port_locks - initialize a port's RX machine spinlock + * __initialize_port_locks - initialize a port's STATE machine spinlock * @port: the port we're looking at * */ static inline void __initialize_port_locks(struct port *port) { // make sure it isn't called twice - spin_lock_init(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); + spin_lock_init(&(SLAVE_AD_INFO(port->slave).state_machine_lock)); } //conversions @@ -1025,9 +1025,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) { rx_states_t last_state; - // Lock to prevent 2 instances of this function to run simultaneously(rx interrupt and periodic machine callback) - __get_rx_machine_lock(port); - // keep current State Machine state to compare later if it was changed last_state = port->sm_rx_state; @@ -1133,7 +1130,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) pr_err("%s: An illegal loopback occurred on adapter (%s).\n" "Check the configuration to verify that all adapters are connected to 802.3ad compliant switch ports\n", port->slave->dev->master->name, port->slave->dev->name); - __release_rx_machine_lock(port); return; } __update_selected(lacpdu, port); @@ -1153,7 +1149,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) break; } } - __release_rx_machine_lock(port); } /** @@ -2155,6 +2150,12 @@ void bond_3ad_state_machine_handler(struct work_struct *work) goto re_arm; } + /* Lock around state machines to protect data accessed + * by all (e.g., port->sm_vars). ad_rx_machine may run + * concurrently due to incoming LACPDU. + */ + __get_state_machine_lock(port); + ad_rx_machine(NULL, port); ad_periodic_machine(port); ad_port_selection_logic(port); @@ -2164,6 +2165,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work) // turn off the BEGIN bit, since we already handled it if (port->sm_vars & AD_PORT_BEGIN) port->sm_vars &= ~AD_PORT_BEGIN; + + __release_state_machine_lock(port); } re_arm: @@ -2200,7 +2203,10 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u case AD_TYPE_LACPDU: pr_debug("Received LACPDU on port %d\n", port->actor_port_number); + /* Protect against concurrent state machines */ + __get_state_machine_lock(port); ad_rx_machine(lacpdu, port); + __release_state_machine_lock(port); break; case AD_TYPE_MARKER: diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h index 2c46a154f2c6..b28baff70864 100644 --- a/drivers/net/bonding/bond_3ad.h +++ b/drivers/net/bonding/bond_3ad.h @@ -264,7 +264,8 @@ struct ad_bond_info { struct ad_slave_info { struct aggregator aggregator; // 802.3ad aggregator structure struct port port; // 802.3ad port structure - spinlock_t rx_machine_lock; // To avoid race condition between callback and receive interrupt + spinlock_t state_machine_lock; /* mutex state machines vs. + incoming LACPDU */ u16 id; }; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 5933621ac3ff..fc27a9926d9e 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -528,8 +528,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, vnet_hdr_len = q->vnet_hdr_sz; err = -EINVAL; - if ((len -= vnet_hdr_len) < 0) + if (len < vnet_hdr_len) goto err; + len -= vnet_hdr_len; err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0, sizeof(vnet_hdr)); diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c index 27e6f6d43cac..e3ebd90ae651 100644 --- a/drivers/net/r6040.c +++ b/drivers/net/r6040.c @@ -49,8 +49,8 @@ #include <asm/processor.h> #define DRV_NAME "r6040" -#define DRV_VERSION "0.26" -#define DRV_RELDATE "30May2010" +#define DRV_VERSION "0.27" +#define DRV_RELDATE "23Feb2011" /* PHY CHIP Address */ #define PHY1_ADDR 1 /* For MAC1 */ @@ -69,6 +69,8 @@ /* MAC registers */ #define MCR0 0x00 /* Control register 0 */ +#define MCR0_PROMISC 0x0020 /* Promiscuous mode */ +#define MCR0_HASH_EN 0x0100 /* Enable multicast hash table function */ #define MCR1 0x04 /* Control register 1 */ #define MAC_RST 0x0001 /* Reset the MAC */ #define MBCR 0x08 /* Bus control */ @@ -851,77 +853,92 @@ static void r6040_multicast_list(struct net_device *dev) { struct r6040_private *lp = netdev_priv(dev); void __iomem *ioaddr = lp->base; - u16 *adrp; - u16 reg; unsigned long flags; struct netdev_hw_addr *ha; int i; + u16 *adrp; + u16 hash_table[4] = { 0 }; + + spin_lock_irqsave(&lp->lock, flags); - /* MAC Address */ + /* Keep our MAC Address */ adrp = (u16 *)dev->dev_addr; iowrite16(adrp[0], ioaddr + MID_0L); iowrite16(adrp[1], ioaddr + MID_0M); iowrite16(adrp[2], ioaddr + MID_0H); - /* Promiscous Mode */ - spin_lock_irqsave(&lp->lock, flags); - /* Clear AMCP & PROM bits */ - reg = ioread16(ioaddr) & ~0x0120; - if (dev->flags & IFF_PROMISC) { - reg |= 0x0020; - lp->mcr0 |= 0x0020; - } - /* Too many multicast addresses - * accept all traffic */ - else if ((netdev_mc_count(dev) > MCAST_MAX) || - (dev->flags & IFF_ALLMULTI)) - reg |= 0x0020; + lp->mcr0 = ioread16(ioaddr + MCR0) & ~(MCR0_PROMISC | MCR0_HASH_EN); - iowrite16(reg, ioaddr); - spin_unlock_irqrestore(&lp->lock, flags); + /* Promiscuous mode */ + if (dev->flags & IFF_PROMISC) + lp->mcr0 |= MCR0_PROMISC; - /* Build the hash table */ - if (netdev_mc_count(dev) > MCAST_MAX) { - u16 hash_table[4]; - u32 crc; + /* Enable multicast hash table function to + * receive all multicast packets. */ + else if (dev->flags & IFF_ALLMULTI) { + lp->mcr0 |= MCR0_HASH_EN; - for (i = 0; i < 4; i++) - hash_table[i] = 0; + for (i = 0; i < MCAST_MAX ; i++) { + iowrite16(0, ioaddr + MID_1L + 8 * i); + iowrite16(0, ioaddr + MID_1M + 8 * i); + iowrite16(0, ioaddr + MID_1H + 8 * i); + } + for (i = 0; i < 4; i++) + hash_table[i] = 0xffff; + } + /* Use internal multicast address registers if the number of + * multicast addresses is not greater than MCAST_MAX. */ + else if (netdev_mc_count(dev) <= MCAST_MAX) { + i = 0; netdev_for_each_mc_addr(ha, dev) { - char *addrs = ha->addr; + u16 *adrp = (u16 *) ha->addr; + iowrite16(adrp[0], ioaddr + MID_1L + 8 * i); + iowrite16(adrp[1], ioaddr + MID_1M + 8 * i); + iowrite16(adrp[2], ioaddr + MID_1H + 8 * i); + i++; + } + while (i < MCAST_MAX) { + iowrite16(0, ioaddr + MID_1L + 8 * i); + iowrite16(0, ioaddr + MID_1M + 8 * i); + iowrite16(0, ioaddr + MID_1H + 8 * i); + i++; + } + } + /* Otherwise, Enable multicast hash table function. */ + else { + u32 crc; - if (!(*addrs & 1)) - continue; + lp->mcr0 |= MCR0_HASH_EN; + + for (i = 0; i < MCAST_MAX ; i++) { + iowrite16(0, ioaddr + MID_1L + 8 * i); + iowrite16(0, ioaddr + MID_1M + 8 * i); + iowrite16(0, ioaddr + MID_1H + 8 * i); + } - crc = ether_crc_le(6, addrs); + /* Build multicast hash table */ + netdev_for_each_mc_addr(ha, dev) { + u8 *addrs = ha->addr; + + crc = ether_crc(ETH_ALEN, addrs); crc >>= 26; - hash_table[crc >> 4] |= 1 << (15 - (crc & 0xf)); + hash_table[crc >> 4] |= 1 << (crc & 0xf); } - /* Fill the MAC hash tables with their values */ + } + + iowrite16(lp->mcr0, ioaddr + MCR0); + + /* Fill the MAC hash tables with their values */ + if (lp->mcr0 && MCR0_HASH_EN) { iowrite16(hash_table[0], ioaddr + MAR0); iowrite16(hash_table[1], ioaddr + MAR1); iowrite16(hash_table[2], ioaddr + MAR2); iowrite16(hash_table[3], ioaddr + MAR3); } - /* Multicast Address 1~4 case */ - i = 0; - netdev_for_each_mc_addr(ha, dev) { - if (i >= MCAST_MAX) - break; - adrp = (u16 *) ha->addr; - iowrite16(adrp[0], ioaddr + MID_1L + 8 * i); - iowrite16(adrp[1], ioaddr + MID_1M + 8 * i); - iowrite16(adrp[2], ioaddr + MID_1H + 8 * i); - i++; - } - while (i < MCAST_MAX) { - iowrite16(0xffff, ioaddr + MID_1L + 8 * i); - iowrite16(0xffff, ioaddr + MID_1M + 8 * i); - iowrite16(0xffff, ioaddr + MID_1H + 8 * i); - i++; - } + + spin_unlock_irqrestore(&lp->lock, flags); } static void netdev_get_drvinfo(struct net_device *dev, diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c index 64bfdae5956f..d70bde95460b 100644 --- a/drivers/net/smsc911x.c +++ b/drivers/net/smsc911x.c @@ -1178,6 +1178,11 @@ static int smsc911x_open(struct net_device *dev) smsc911x_reg_write(pdata, HW_CFG, 0x00050000); smsc911x_reg_write(pdata, AFC_CFG, 0x006E3740); + /* Increase the legal frame size of VLAN tagged frames to 1522 bytes */ + spin_lock_irq(&pdata->mac_lock); + smsc911x_mac_write(pdata, VLAN1, ETH_P_8021Q); + spin_unlock_irq(&pdata->mac_lock); + /* Make sure EEPROM has finished loading before setting GPIO_CFG */ timeout = 50; while ((smsc911x_reg_read(pdata, E2P_CMD) & E2P_CMD_EPC_BUSY_) && diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 3a5a6fcc0ead..492b7d807fe8 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -243,7 +243,7 @@ struct pci_ops pcifront_bus_ops = { #ifdef CONFIG_PCI_MSI static int pci_frontend_enable_msix(struct pci_dev *dev, - int **vector, int nvec) + int vector[], int nvec) { int err; int i; @@ -277,18 +277,24 @@ static int pci_frontend_enable_msix(struct pci_dev *dev, if (likely(!err)) { if (likely(!op.value)) { /* we get the result */ - for (i = 0; i < nvec; i++) - *(*vector+i) = op.msix_entries[i].vector; - return 0; + for (i = 0; i < nvec; i++) { + if (op.msix_entries[i].vector <= 0) { + dev_warn(&dev->dev, "MSI-X entry %d is invalid: %d!\n", + i, op.msix_entries[i].vector); + err = -EINVAL; + vector[i] = -1; + continue; + } + vector[i] = op.msix_entries[i].vector; + } } else { printk(KERN_DEBUG "enable msix get value %x\n", op.value); - return op.value; } } else { dev_err(&dev->dev, "enable msix get err %x\n", err); - return err; } + return err; } static void pci_frontend_disable_msix(struct pci_dev *dev) @@ -310,7 +316,7 @@ static void pci_frontend_disable_msix(struct pci_dev *dev) dev_err(&dev->dev, "pci_disable_msix get err %x\n", err); } -static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector) +static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[]) { int err; struct xen_pci_op op = { @@ -324,7 +330,13 @@ static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector) err = do_pci_op(pdev, &op); if (likely(!err)) { - *(*vector) = op.value; + vector[0] = op.value; + if (op.value <= 0) { + dev_warn(&dev->dev, "MSI entry is invalid: %d!\n", + op.value); + err = -EINVAL; + vector[0] = -1; + } } else { dev_err(&dev->dev, "pci frontend enable msi failed for dev " "%x:%x\n", op.bus, op.devfn); @@ -733,8 +745,7 @@ static void free_pdev(struct pcifront_device *pdev) pcifront_free_roots(pdev); - /*For PCIE_AER error handling job*/ - flush_scheduled_work(); + cancel_work_sync(&pdev->op_work); if (pdev->irq >= 0) unbind_from_irqhandler(pdev->irq, pdev); diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 158cecbec718..4a109835e420 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -282,6 +282,9 @@ int core_tmr_lun_reset( atomic_set(&task->task_active, 0); atomic_set(&task->task_stop, 0); + } else { + if (atomic_read(&task->task_execute_queue) != 0) + transport_remove_task_from_execute_queue(task, dev); } __transport_stop_task_timer(task, &flags); @@ -301,6 +304,7 @@ int core_tmr_lun_reset( DEBUG_LR("LUN_RESET: got t_transport_active = 1 for" " task: %p, t_fe_count: %d dev: %p\n", task, fe_count, dev); + atomic_set(&T_TASK(cmd)->t_transport_aborted, 1); spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock, flags); core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count); @@ -310,6 +314,7 @@ int core_tmr_lun_reset( } DEBUG_LR("LUN_RESET: Got t_transport_active = 0 for task: %p," " t_fe_count: %d dev: %p\n", task, fe_count, dev); + atomic_set(&T_TASK(cmd)->t_transport_aborted, 1); spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock, flags); core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 236e22d8cfae..4bbf6c147f89 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1207,7 +1207,7 @@ transport_get_task_from_execute_queue(struct se_device *dev) * * */ -static void transport_remove_task_from_execute_queue( +void transport_remove_task_from_execute_queue( struct se_task *task, struct se_device *dev) { @@ -5549,7 +5549,8 @@ static void transport_generic_wait_for_tasks( atomic_set(&T_TASK(cmd)->transport_lun_stop, 0); } - if (!atomic_read(&T_TASK(cmd)->t_transport_active)) + if (!atomic_read(&T_TASK(cmd)->t_transport_active) || + atomic_read(&T_TASK(cmd)->t_transport_aborted)) goto remove; atomic_set(&T_TASK(cmd)->t_transport_stop, 1); @@ -5956,6 +5957,9 @@ static void transport_processing_shutdown(struct se_device *dev) atomic_set(&task->task_active, 0); atomic_set(&task->task_stop, 0); + } else { + if (atomic_read(&task->task_execute_queue) != 0) + transport_remove_task_from_execute_queue(task, dev); } __transport_stop_task_timer(task, &flags); diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c index eca855a55c0d..3de4ba0260a5 100644 --- a/drivers/watchdog/cpwd.c +++ b/drivers/watchdog/cpwd.c @@ -646,7 +646,7 @@ static int __devexit cpwd_remove(struct platform_device *op) struct cpwd *p = dev_get_drvdata(&op->dev); int i; - for (i = 0; i < 4; i++) { + for (i = 0; i < WD_NUMDEVS; i++) { misc_deregister(&p->devs[i].misc); if (!p->enabled) { diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 24b966d5061a..204a5603c4ae 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -710,7 +710,7 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev) return 0; } -static void __devexit hpwdt_exit_nmi_decoding(void) +static void hpwdt_exit_nmi_decoding(void) { unregister_die_notifier(&die_notifier); if (cru_rom_addr) @@ -726,7 +726,7 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev) return 0; } -static void __devexit hpwdt_exit_nmi_decoding(void) +static void hpwdt_exit_nmi_decoding(void) { } #endif /* CONFIG_HPWDT_NMI_DECODING */ diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c index 0461858e07d0..b61ab1c54293 100644 --- a/drivers/watchdog/sch311x_wdt.c +++ b/drivers/watchdog/sch311x_wdt.c @@ -508,7 +508,7 @@ static int __init sch311x_detect(int sio_config_port, unsigned short *addr) sch311x_sio_outb(sio_config_port, 0x07, 0x0a); /* Check if Logical Device Register is currently active */ - if (sch311x_sio_inb(sio_config_port, 0x30) && 0x01 == 0) + if ((sch311x_sio_inb(sio_config_port, 0x30) & 0x01) == 0) printk(KERN_INFO PFX "Seems that LDN 0x0a is not active...\n"); /* Get the base address of the runtime registers */ diff --git a/drivers/watchdog/w83697ug_wdt.c b/drivers/watchdog/w83697ug_wdt.c index a6c12dec91a1..df2a64dc9672 100644 --- a/drivers/watchdog/w83697ug_wdt.c +++ b/drivers/watchdog/w83697ug_wdt.c @@ -109,7 +109,7 @@ static int w83697ug_select_wd_register(void) outb_p(0x08, WDT_EFDR); /* select logical device 8 (GPIO2) */ outb_p(0x30, WDT_EFER); /* select CR30 */ c = inb_p(WDT_EFDR); - outb_p(c || 0x01, WDT_EFDR); /* set bit 0 to activate GPIO2 */ + outb_p(c | 0x01, WDT_EFDR); /* set bit 0 to activate GPIO2 */ return 0; } diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 43f9f02c7db0..718050ace08f 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -232,7 +232,7 @@ static int increase_reservation(unsigned long nr_pages) set_phys_to_machine(pfn, frame_list[i]); /* Link back into the page tables if not highmem. */ - if (pfn < max_low_pfn) { + if (!xen_hvm_domain() && pfn < max_low_pfn) { int ret; ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), @@ -280,7 +280,7 @@ static int decrease_reservation(unsigned long nr_pages) scrub_page(page); - if (!PageHighMem(page)) { + if (!xen_hvm_domain() && !PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), __pte_ma(0), 0); @@ -296,7 +296,7 @@ static int decrease_reservation(unsigned long nr_pages) /* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { pfn = mfn_to_pfn(frame_list[i]); - set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); balloon_append(pfn_to_page(pfn)); } @@ -392,15 +392,19 @@ static struct notifier_block xenstore_notifier; static int __init balloon_init(void) { - unsigned long pfn, extra_pfn_end; + unsigned long pfn, nr_pages, extra_pfn_end; struct page *page; - if (!xen_pv_domain()) + if (!xen_domain()) return -ENODEV; pr_info("xen_balloon: Initialising balloon driver.\n"); - balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); + if (xen_pv_domain()) + nr_pages = xen_start_info->nr_pages; + else + nr_pages = max_pfn; + balloon_stats.current_pages = min(nr_pages, max_pfn); balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 74681478100a..149fa875e396 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -114,7 +114,7 @@ struct cpu_evtchn_s { static __initdata struct cpu_evtchn_s init_evtchn_mask = { .bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul, }; -static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask; +static struct cpu_evtchn_s __refdata *cpu_evtchn_mask_p = &init_evtchn_mask; static inline unsigned long *cpu_evtchn_mask(int cpu) { @@ -277,7 +277,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) BUG_ON(irq == -1); #ifdef CONFIG_SMP - cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); + cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu)); #endif clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); @@ -294,7 +294,7 @@ static void init_evtchn_cpu_bindings(void) /* By default all event channels notify CPU#0. */ for_each_irq_desc(i, desc) { - cpumask_copy(desc->affinity, cpumask_of(0)); + cpumask_copy(desc->irq_data.affinity, cpumask_of(0)); } #endif @@ -376,81 +376,69 @@ static void unmask_evtchn(int port) put_cpu(); } -static int get_nr_hw_irqs(void) +static int xen_allocate_irq_dynamic(void) { - int ret = 1; + int first = 0; + int irq; #ifdef CONFIG_X86_IO_APIC - ret = get_nr_irqs_gsi(); + /* + * For an HVM guest or domain 0 which see "real" (emulated or + * actual repectively) GSIs we allocate dynamic IRQs + * e.g. those corresponding to event channels or MSIs + * etc. from the range above those "real" GSIs to avoid + * collisions. + */ + if (xen_initial_domain() || xen_hvm_domain()) + first = get_nr_irqs_gsi(); #endif - return ret; -} +retry: + irq = irq_alloc_desc_from(first, -1); -static int find_unbound_pirq(int type) -{ - int rc, i; - struct physdev_get_free_pirq op_get_free_pirq; - op_get_free_pirq.type = type; + if (irq == -ENOMEM && first > NR_IRQS_LEGACY) { + printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n"); + first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY); + goto retry; + } - rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); - if (!rc) - return op_get_free_pirq.pirq; + if (irq < 0) + panic("No available IRQ to bind to: increase nr_irqs!\n"); - for (i = 0; i < nr_irqs; i++) { - if (pirq_to_irq[i] < 0) - return i; - } - return -1; + return irq; } -static int find_unbound_irq(void) +static int xen_allocate_irq_gsi(unsigned gsi) { - struct irq_data *data; - int irq, res; - int bottom = get_nr_hw_irqs(); - int top = nr_irqs-1; - - if (bottom == nr_irqs) - goto no_irqs; + int irq; - /* This loop starts from the top of IRQ space and goes down. - * We need this b/c if we have a PCI device in a Xen PV guest - * we do not have an IO-APIC (though the backend might have them) - * mapped in. To not have a collision of physical IRQs with the Xen - * event channels start at the top of the IRQ space for virtual IRQs. + /* + * A PV guest has no concept of a GSI (since it has no ACPI + * nor access to/knowledge of the physical APICs). Therefore + * all IRQs are dynamically allocated from the entire IRQ + * space. */ - for (irq = top; irq > bottom; irq--) { - data = irq_get_irq_data(irq); - /* only 15->0 have init'd desc; handle irq > 16 */ - if (!data) - break; - if (data->chip == &no_irq_chip) - break; - if (data->chip != &xen_dynamic_chip) - continue; - if (irq_info[irq].type == IRQT_UNBOUND) - return irq; - } - - if (irq == bottom) - goto no_irqs; + if (xen_pv_domain() && !xen_initial_domain()) + return xen_allocate_irq_dynamic(); - res = irq_alloc_desc_at(irq, -1); + /* Legacy IRQ descriptors are already allocated by the arch. */ + if (gsi < NR_IRQS_LEGACY) + return gsi; - if (WARN_ON(res != irq)) - return -1; + irq = irq_alloc_desc_at(gsi, -1); + if (irq < 0) + panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq); return irq; - -no_irqs: - panic("No available IRQ to bind to: increase nr_irqs!\n"); } -static bool identity_mapped_irq(unsigned irq) +static void xen_free_irq(unsigned irq) { - /* identity map all the hardware irqs */ - return irq < get_nr_hw_irqs(); + /* Legacy IRQ descriptors are managed by the arch. */ + if (irq < NR_IRQS_LEGACY) + return; + + irq_free_desc(irq); } static void pirq_unmask_notify(int irq) @@ -486,7 +474,7 @@ static bool probing_irq(int irq) return desc && desc->action == NULL; } -static unsigned int startup_pirq(unsigned int irq) +static unsigned int __startup_pirq(unsigned int irq) { struct evtchn_bind_pirq bind_pirq; struct irq_info *info = info_for_irq(irq); @@ -524,9 +512,15 @@ out: return 0; } -static void shutdown_pirq(unsigned int irq) +static unsigned int startup_pirq(struct irq_data *data) +{ + return __startup_pirq(data->irq); +} + +static void shutdown_pirq(struct irq_data *data) { struct evtchn_close close; + unsigned int irq = data->irq; struct irq_info *info = info_for_irq(irq); int evtchn = evtchn_from_irq(irq); @@ -546,20 +540,20 @@ static void shutdown_pirq(unsigned int irq) info->evtchn = 0; } -static void enable_pirq(unsigned int irq) +static void enable_pirq(struct irq_data *data) { - startup_pirq(irq); + startup_pirq(data); } -static void disable_pirq(unsigned int irq) +static void disable_pirq(struct irq_data *data) { } -static void ack_pirq(unsigned int irq) +static void ack_pirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(irq); + int evtchn = evtchn_from_irq(data->irq); - move_native_irq(irq); + move_native_irq(data->irq); if (VALID_EVTCHN(evtchn)) { mask_evtchn(evtchn); @@ -567,23 +561,6 @@ static void ack_pirq(unsigned int irq) } } -static void end_pirq(unsigned int irq) -{ - int evtchn = evtchn_from_irq(irq); - struct irq_desc *desc = irq_to_desc(irq); - - if (WARN_ON(!desc)) - return; - - if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) == - (IRQ_DISABLED|IRQ_PENDING)) { - shutdown_pirq(irq); - } else if (VALID_EVTCHN(evtchn)) { - unmask_evtchn(evtchn); - pirq_unmask_notify(irq); - } -} - static int find_irq_by_gsi(unsigned gsi) { int irq; @@ -638,14 +615,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) goto out; /* XXX need refcount? */ } - /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore - * we are using the !xen_initial_domain() to drop in the function.*/ - if (identity_mapped_irq(gsi) || (!xen_initial_domain() && - xen_pv_domain())) { - irq = gsi; - irq_alloc_desc_at(irq, -1); - } else - irq = find_unbound_irq(); + irq = xen_allocate_irq_gsi(gsi); set_irq_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq, name); @@ -658,7 +628,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) * this in the priv domain. */ if (xen_initial_domain() && HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { - irq_free_desc(irq); + xen_free_irq(irq); irq = -ENOSPC; goto out; } @@ -674,87 +644,46 @@ out: } #ifdef CONFIG_PCI_MSI -#include <linux/msi.h> -#include "../pci/msi.h" - -void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc) +int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc) { - spin_lock(&irq_mapping_update_lock); - - if (alloc & XEN_ALLOC_IRQ) { - *irq = find_unbound_irq(); - if (*irq == -1) - goto out; - } - - if (alloc & XEN_ALLOC_PIRQ) { - *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI); - if (*pirq == -1) - goto out; - } + int rc; + struct physdev_get_free_pirq op_get_free_pirq; - set_irq_chip_and_handler_name(*irq, &xen_pirq_chip, - handle_level_irq, name); + op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI; + rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); - irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0); - pirq_to_irq[*pirq] = *irq; + WARN_ONCE(rc == -ENOSYS, + "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n"); -out: - spin_unlock(&irq_mapping_update_lock); + return rc ? -1 : op_get_free_pirq.pirq; } -int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type) +int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, + int pirq, int vector, const char *name) { - int irq = -1; - struct physdev_map_pirq map_irq; - int rc; - int pos; - u32 table_offset, bir; - - memset(&map_irq, 0, sizeof(map_irq)); - map_irq.domid = DOMID_SELF; - map_irq.type = MAP_PIRQ_TYPE_MSI; - map_irq.index = -1; - map_irq.pirq = -1; - map_irq.bus = dev->bus->number; - map_irq.devfn = dev->devfn; - - if (type == PCI_CAP_ID_MSIX) { - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); - - pci_read_config_dword(dev, msix_table_offset_reg(pos), - &table_offset); - bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK); - - map_irq.table_base = pci_resource_start(dev, bir); - map_irq.entry_nr = msidesc->msi_attrib.entry_nr; - } + int irq, ret; spin_lock(&irq_mapping_update_lock); - irq = find_unbound_irq(); - + irq = xen_allocate_irq_dynamic(); if (irq == -1) goto out; - rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); - if (rc) { - printk(KERN_WARNING "xen map irq failed %d\n", rc); - - irq_free_desc(irq); - - irq = -1; - goto out; - } - irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index); - set_irq_chip_and_handler_name(irq, &xen_pirq_chip, - handle_level_irq, - (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi"); + handle_level_irq, name); + irq_info[irq] = mk_pirq_info(0, pirq, 0, vector); + pirq_to_irq[pirq] = irq; + ret = set_irq_msi(irq, msidesc); + if (ret < 0) + goto error_irq; out: spin_unlock(&irq_mapping_update_lock); return irq; +error_irq: + spin_unlock(&irq_mapping_update_lock); + xen_free_irq(irq); + return -1; } #endif @@ -779,11 +708,12 @@ int xen_destroy_irq(int irq) printk(KERN_WARNING "unmap irq failed %d\n", rc); goto out; } - pirq_to_irq[info->u.pirq.pirq] = -1; } + pirq_to_irq[info->u.pirq.pirq] = -1; + irq_info[irq] = mk_unbound_info(); - irq_free_desc(irq); + xen_free_irq(irq); out: spin_unlock(&irq_mapping_update_lock); @@ -814,7 +744,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) irq = evtchn_to_irq[evtchn]; if (irq == -1) { - irq = find_unbound_irq(); + irq = xen_allocate_irq_dynamic(); set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, handle_fasteoi_irq, "event"); @@ -839,7 +769,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) irq = per_cpu(ipi_to_irq, cpu)[ipi]; if (irq == -1) { - irq = find_unbound_irq(); + irq = xen_allocate_irq_dynamic(); if (irq < 0) goto out; @@ -875,7 +805,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) irq = per_cpu(virq_to_irq, cpu)[virq]; if (irq == -1) { - irq = find_unbound_irq(); + irq = xen_allocate_irq_dynamic(); set_irq_chip_and_handler_name(irq, &xen_percpu_chip, handle_percpu_irq, "virq"); @@ -934,7 +864,7 @@ static void unbind_from_irq(unsigned int irq) if (irq_info[irq].type != IRQT_UNBOUND) { irq_info[irq] = mk_unbound_info(); - irq_free_desc(irq); + xen_free_irq(irq); } spin_unlock(&irq_mapping_update_lock); @@ -990,7 +920,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, if (irq < 0) return irq; - irqflags |= IRQF_NO_SUSPEND; + irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME; retval = request_irq(irq, handler, irqflags, devname, dev_id); if (retval != 0) { unbind_from_irq(irq); @@ -1234,11 +1164,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) return 0; } -static int set_affinity_irq(unsigned irq, const struct cpumask *dest) +static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, + bool force) { unsigned tcpu = cpumask_first(dest); - return rebind_irq_to_cpu(irq, tcpu); + return rebind_irq_to_cpu(data->irq, tcpu); } int resend_irq_on_evtchn(unsigned int irq) @@ -1257,35 +1188,35 @@ int resend_irq_on_evtchn(unsigned int irq) return 1; } -static void enable_dynirq(unsigned int irq) +static void enable_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(irq); + int evtchn = evtchn_from_irq(data->irq); if (VALID_EVTCHN(evtchn)) unmask_evtchn(evtchn); } -static void disable_dynirq(unsigned int irq) +static void disable_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(irq); + int evtchn = evtchn_from_irq(data->irq); if (VALID_EVTCHN(evtchn)) mask_evtchn(evtchn); } -static void ack_dynirq(unsigned int irq) +static void ack_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(irq); + int evtchn = evtchn_from_irq(data->irq); - move_masked_irq(irq); + move_masked_irq(data->irq); if (VALID_EVTCHN(evtchn)) unmask_evtchn(evtchn); } -static int retrigger_dynirq(unsigned int irq) +static int retrigger_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(irq); + int evtchn = evtchn_from_irq(data->irq); struct shared_info *sh = HYPERVISOR_shared_info; int ret = 0; @@ -1334,7 +1265,7 @@ static void restore_cpu_pirqs(void) printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); - startup_pirq(irq); + __startup_pirq(irq); } } @@ -1445,7 +1376,6 @@ void xen_poll_irq(int irq) void xen_irq_resume(void) { unsigned int cpu, irq, evtchn; - struct irq_desc *desc; init_evtchn_cpu_bindings(); @@ -1465,66 +1395,48 @@ void xen_irq_resume(void) restore_cpu_ipis(cpu); } - /* - * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These - * are not handled by the IRQ core. - */ - for_each_irq_desc(irq, desc) { - if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND)) - continue; - if (desc->status & IRQ_DISABLED) - continue; - - evtchn = evtchn_from_irq(irq); - if (evtchn == -1) - continue; - - unmask_evtchn(evtchn); - } - restore_cpu_pirqs(); } static struct irq_chip xen_dynamic_chip __read_mostly = { - .name = "xen-dyn", + .name = "xen-dyn", - .disable = disable_dynirq, - .mask = disable_dynirq, - .unmask = enable_dynirq, + .irq_disable = disable_dynirq, + .irq_mask = disable_dynirq, + .irq_unmask = enable_dynirq, - .eoi = ack_dynirq, - .set_affinity = set_affinity_irq, - .retrigger = retrigger_dynirq, + .irq_eoi = ack_dynirq, + .irq_set_affinity = set_affinity_irq, + .irq_retrigger = retrigger_dynirq, }; static struct irq_chip xen_pirq_chip __read_mostly = { - .name = "xen-pirq", + .name = "xen-pirq", - .startup = startup_pirq, - .shutdown = shutdown_pirq, + .irq_startup = startup_pirq, + .irq_shutdown = shutdown_pirq, - .enable = enable_pirq, - .unmask = enable_pirq, + .irq_enable = enable_pirq, + .irq_unmask = enable_pirq, - .disable = disable_pirq, - .mask = disable_pirq, + .irq_disable = disable_pirq, + .irq_mask = disable_pirq, - .ack = ack_pirq, - .end = end_pirq, + .irq_ack = ack_pirq, - .set_affinity = set_affinity_irq, + .irq_set_affinity = set_affinity_irq, - .retrigger = retrigger_dynirq, + .irq_retrigger = retrigger_dynirq, }; static struct irq_chip xen_percpu_chip __read_mostly = { - .name = "xen-percpu", + .name = "xen-percpu", - .disable = disable_dynirq, - .mask = disable_dynirq, - .unmask = enable_dynirq, + .irq_disable = disable_dynirq, + .irq_mask = disable_dynirq, + .irq_unmask = enable_dynirq, - .ack = ack_dynirq, + .irq_ack = ack_dynirq, }; int xen_set_callback_via(uint64_t via) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 24177272bcb8..ebb292859b59 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -34,42 +34,38 @@ enum shutdown_state { /* Ignore multiple shutdown requests. */ static enum shutdown_state shutting_down = SHUTDOWN_INVALID; -#ifdef CONFIG_PM_SLEEP -static int xen_hvm_suspend(void *data) -{ - int err; - struct sched_shutdown r = { .reason = SHUTDOWN_suspend }; - int *cancelled = data; - - BUG_ON(!irqs_disabled()); - - err = sysdev_suspend(PMSG_SUSPEND); - if (err) { - printk(KERN_ERR "xen_hvm_suspend: sysdev_suspend failed: %d\n", - err); - return err; - } - - *cancelled = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r); +struct suspend_info { + int cancelled; + unsigned long arg; /* extra hypercall argument */ + void (*pre)(void); + void (*post)(int cancelled); +}; - xen_hvm_post_suspend(*cancelled); +static void xen_hvm_post_suspend(int cancelled) +{ + xen_arch_hvm_post_suspend(cancelled); gnttab_resume(); +} - if (!*cancelled) { - xen_irq_resume(); - xen_console_resume(); - xen_timer_resume(); - } - - sysdev_resume(); +static void xen_pre_suspend(void) +{ + xen_mm_pin_all(); + gnttab_suspend(); + xen_arch_pre_suspend(); +} - return 0; +static void xen_post_suspend(int cancelled) +{ + xen_arch_post_suspend(cancelled); + gnttab_resume(); + xen_mm_unpin_all(); } +#ifdef CONFIG_PM_SLEEP static int xen_suspend(void *data) { + struct suspend_info *si = data; int err; - int *cancelled = data; BUG_ON(!irqs_disabled()); @@ -80,22 +76,20 @@ static int xen_suspend(void *data) return err; } - xen_mm_pin_all(); - gnttab_suspend(); - xen_pre_suspend(); + if (si->pre) + si->pre(); /* * This hypercall returns 1 if suspend was cancelled * or the domain was merely checkpointed, and 0 if it * is resuming in a new domain. */ - *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); + si->cancelled = HYPERVISOR_suspend(si->arg); - xen_post_suspend(*cancelled); - gnttab_resume(); - xen_mm_unpin_all(); + if (si->post) + si->post(si->cancelled); - if (!*cancelled) { + if (!si->cancelled) { xen_irq_resume(); xen_console_resume(); xen_timer_resume(); @@ -109,7 +103,7 @@ static int xen_suspend(void *data) static void do_suspend(void) { int err; - int cancelled = 1; + struct suspend_info si; shutting_down = SHUTDOWN_SUSPEND; @@ -139,20 +133,29 @@ static void do_suspend(void) goto out_resume; } - if (xen_hvm_domain()) - err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0)); - else - err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); + si.cancelled = 1; + + if (xen_hvm_domain()) { + si.arg = 0UL; + si.pre = NULL; + si.post = &xen_hvm_post_suspend; + } else { + si.arg = virt_to_mfn(xen_start_info); + si.pre = &xen_pre_suspend; + si.post = &xen_post_suspend; + } + + err = stop_machine(xen_suspend, &si, cpumask_of(0)); dpm_resume_noirq(PMSG_RESUME); if (err) { printk(KERN_ERR "failed to start xen_suspend: %d\n", err); - cancelled = 1; + si.cancelled = 1; } out_resume: - if (!cancelled) { + if (!si.cancelled) { xen_arch_resume(); xs_resume(); } else @@ -172,12 +175,39 @@ out: } #endif /* CONFIG_PM_SLEEP */ +struct shutdown_handler { + const char *command; + void (*cb)(void); +}; + +static void do_poweroff(void) +{ + shutting_down = SHUTDOWN_POWEROFF; + orderly_poweroff(false); +} + +static void do_reboot(void) +{ + shutting_down = SHUTDOWN_POWEROFF; /* ? */ + ctrl_alt_del(); +} + static void shutdown_handler(struct xenbus_watch *watch, const char **vec, unsigned int len) { char *str; struct xenbus_transaction xbt; int err; + static struct shutdown_handler handlers[] = { + { "poweroff", do_poweroff }, + { "halt", do_poweroff }, + { "reboot", do_reboot }, +#ifdef CONFIG_PM_SLEEP + { "suspend", do_suspend }, +#endif + {NULL, NULL}, + }; + static struct shutdown_handler *handler; if (shutting_down != SHUTDOWN_INVALID) return; @@ -194,7 +224,14 @@ static void shutdown_handler(struct xenbus_watch *watch, return; } - xenbus_write(xbt, "control", "shutdown", ""); + for (handler = &handlers[0]; handler->command; handler++) { + if (strcmp(str, handler->command) == 0) + break; + } + + /* Only acknowledge commands which we are prepared to handle. */ + if (handler->cb) + xenbus_write(xbt, "control", "shutdown", ""); err = xenbus_transaction_end(xbt, 0); if (err == -EAGAIN) { @@ -202,17 +239,8 @@ static void shutdown_handler(struct xenbus_watch *watch, goto again; } - if (strcmp(str, "poweroff") == 0 || - strcmp(str, "halt") == 0) { - shutting_down = SHUTDOWN_POWEROFF; - orderly_poweroff(false); - } else if (strcmp(str, "reboot") == 0) { - shutting_down = SHUTDOWN_POWEROFF; /* ? */ - ctrl_alt_del(); -#ifdef CONFIG_PM_SLEEP - } else if (strcmp(str, "suspend") == 0) { - do_suspend(); -#endif + if (handler->cb) { + handler->cb(); } else { printk(KERN_INFO "Ignoring shutdown request: %s\n", str); shutting_down = SHUTDOWN_INVALID; @@ -291,27 +319,18 @@ static int shutdown_event(struct notifier_block *notifier, return NOTIFY_DONE; } -static int __init __setup_shutdown_event(void) -{ - /* Delay initialization in the PV on HVM case */ - if (xen_hvm_domain()) - return 0; - - if (!xen_pv_domain()) - return -ENODEV; - - return xen_setup_shutdown_event(); -} - int xen_setup_shutdown_event(void) { static struct notifier_block xenstore_notifier = { .notifier_call = shutdown_event }; + + if (!xen_domain()) + return -ENODEV; register_xenstore_notifier(&xenstore_notifier); return 0; } EXPORT_SYMBOL_GPL(xen_setup_shutdown_event); -subsys_initcall(__setup_shutdown_event); +subsys_initcall(xen_setup_shutdown_event); diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index afbe041f42c5..319dd0a94d51 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -156,9 +156,6 @@ static int __devinit platform_pci_init(struct pci_dev *pdev, if (ret) goto out; xenbus_probe(NULL); - ret = xen_setup_shutdown_event(); - if (ret) - goto out; return 0; out: diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6f820fa23df4..7f78cc78fdd0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -729,6 +729,15 @@ struct btrfs_space_info { u64 disk_total; /* total bytes on disk, takes mirrors into account */ + /* + * we bump reservation progress every time we decrement + * bytes_reserved. This way people waiting for reservations + * know something good has happened and they can check + * for progress. The number here isn't to be trusted, it + * just shows reclaim activity + */ + unsigned long reservation_progress; + int full; /* indicates that we cannot allocate any more chunks for this space */ int force_alloc; /* set if we need to force a chunk alloc for diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 588ff9849873..7b3089b5c2df 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, u64 max_reclaim; u64 reclaimed = 0; long time_left; - int pause = 1; int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; int loops = 0; + unsigned long progress; block_rsv = &root->fs_info->delalloc_block_rsv; space_info = block_rsv->space_info; smp_mb(); reserved = space_info->bytes_reserved; + progress = space_info->reservation_progress; if (reserved == 0) return 0; @@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); spin_lock(&space_info->lock); - if (reserved > space_info->bytes_reserved) { - loops = 0; + if (reserved > space_info->bytes_reserved) reclaimed += reserved - space_info->bytes_reserved; - } else { - loops++; - } reserved = space_info->bytes_reserved; spin_unlock(&space_info->lock); + loops++; + if (reserved == 0 || reclaimed >= max_reclaim) break; if (trans && trans->transaction->blocked) return -EAGAIN; - __set_current_state(TASK_INTERRUPTIBLE); - time_left = schedule_timeout(pause); + time_left = schedule_timeout_interruptible(1); /* We were interrupted, exit */ if (time_left) break; - pause <<= 1; - if (pause > HZ / 10) - pause = HZ / 10; + /* we've kicked the IO a few times, if anything has been freed, + * exit. There is no sense in looping here for a long time + * when we really need to commit the transaction, or there are + * just too many writers without enough free space + */ + + if (loops > 3) { + smp_mb(); + if (progress != space_info->reservation_progress) + break; + } } return reclaimed >= to_reclaim; @@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, if (num_bytes) { spin_lock(&space_info->lock); space_info->bytes_reserved -= num_bytes; + space_info->reservation_progress++; spin_unlock(&space_info->lock); } } @@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) if (block_rsv->reserved >= block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; sinfo->bytes_reserved -= num_bytes; + sinfo->reservation_progress++; block_rsv->reserved = block_rsv->size; block_rsv->full = 1; } @@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) to_reserve = 0; } spin_unlock(&BTRFS_I(inode)->accounting_lock); - to_reserve += calc_csum_metadata_size(inode, num_bytes); ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); if (ret) @@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, btrfs_set_block_group_used(&cache->item, old_val); cache->reserved -= num_bytes; cache->space_info->bytes_reserved -= num_bytes; + cache->space_info->reservation_progress++; cache->space_info->bytes_used += num_bytes; cache->space_info->disk_used += num_bytes * factor; spin_unlock(&cache->lock); @@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root, if (reserved) { cache->reserved -= num_bytes; cache->space_info->bytes_reserved -= num_bytes; + cache->space_info->reservation_progress++; } spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); @@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache, space_info->bytes_readonly += num_bytes; cache->reserved -= num_bytes; space_info->bytes_reserved -= num_bytes; + space_info->reservation_progress++; } spin_unlock(&cache->lock); spin_unlock(&space_info->lock); @@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, if (ret) { spin_lock(&cache->space_info->lock); cache->space_info->bytes_reserved -= buf->len; + cache->space_info->reservation_progress++; spin_unlock(&cache->space_info->lock); } goto out; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fd3f172e94e6..714adc4ac4c2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3046,17 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } while (!end) { - off = extent_map_end(em); - if (off >= max) - end = 1; + u64 offset_in_extent; + + /* break if the extent we found is outside the range */ + if (em->start >= max || extent_map_end(em) < off) + break; + + /* + * get_extent may return an extent that starts before our + * requested range. We have to make sure the ranges + * we return to fiemap always move forward and don't + * overlap, so adjust the offsets here + */ + em_start = max(em->start, off); - em_start = em->start; - em_len = em->len; + /* + * record the offset from the start of the extent + * for adjusting the disk offset below + */ + offset_in_extent = em_start - em->start; em_end = extent_map_end(em); + em_len = em_end - em_start; emflags = em->flags; disko = 0; flags = 0; + /* + * bump off for our next call to get_extent + */ + off = extent_map_end(em); + if (off >= max) + end = 1; + if (em->block_start == EXTENT_MAP_LAST_BYTE) { end = 1; flags |= FIEMAP_EXTENT_LAST; @@ -3067,7 +3088,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, flags |= (FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN); } else { - disko = em->block_start; + disko = em->block_start + offset_in_extent; } if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) flags |= FIEMAP_EXTENT_ENCODED; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7084140d5940..f447b783bb84 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, /* Flush processor's dcache for this page */ flush_dcache_page(page); + + /* + * if we get a partial write, we can end up with + * partially up to date pages. These add + * a lot of complexity, so make sure they don't + * happen by forcing this copy to be retried. + * + * The rest of the btrfs_file_write code will fall + * back to page at a time copies after we return 0. + */ + if (!PageUptodate(page) && copied < count) + copied = 0; + iov_iter_advance(i, copied); write_bytes -= copied; total_copied += copied; @@ -763,6 +776,27 @@ out: } /* + * on error we return an unlocked page and the error value + * on success we return a locked page and 0 + */ +static int prepare_uptodate_page(struct page *page, u64 pos) +{ + int ret = 0; + + if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) { + ret = btrfs_readpage(NULL, page); + if (ret) + return ret; + lock_page(page); + if (!PageUptodate(page)) { + unlock_page(page); + return -EIO; + } + } + return 0; +} + +/* * this gets pages into the page cache and locks them down, it also properly * waits for data=ordered extents to finish before allowing the pages to be * modified. @@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, unsigned long index = pos >> PAGE_CACHE_SHIFT; struct inode *inode = fdentry(file)->d_inode; int err = 0; + int faili = 0; u64 start_pos; u64 last_pos; @@ -794,15 +829,24 @@ again: for (i = 0; i < num_pages; i++) { pages[i] = grab_cache_page(inode->i_mapping, index + i); if (!pages[i]) { - int c; - for (c = i - 1; c >= 0; c--) { - unlock_page(pages[c]); - page_cache_release(pages[c]); - } - return -ENOMEM; + faili = i - 1; + err = -ENOMEM; + goto fail; + } + + if (i == 0) + err = prepare_uptodate_page(pages[i], pos); + if (i == num_pages - 1) + err = prepare_uptodate_page(pages[i], + pos + write_bytes); + if (err) { + page_cache_release(pages[i]); + faili = i - 1; + goto fail; } wait_on_page_writeback(pages[i]); } + err = 0; if (start_pos < inode->i_size) { struct btrfs_ordered_extent *ordered; lock_extent_bits(&BTRFS_I(inode)->io_tree, @@ -842,6 +886,14 @@ again: WARN_ON(!PageLocked(pages[i])); } return 0; +fail: + while (faili >= 0) { + unlock_page(pages[faili]); + page_cache_release(pages[faili]); + faili--; + } + return err; + } static ssize_t btrfs_file_aio_write(struct kiocb *iocb, @@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, struct file *file = iocb->ki_filp; struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct page *pinned[2]; struct page **pages = NULL; struct iov_iter i; loff_t *ppos = &iocb->ki_pos; @@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || (file->f_flags & O_DIRECT)); - pinned[0] = NULL; - pinned[1] = NULL; - start_pos = pos; vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); @@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; - /* - * there are lots of better ways to do this, but this code - * makes sure the first and last page in the file range are - * up to date and ready for cow - */ - if ((pos & (PAGE_CACHE_SIZE - 1))) { - pinned[0] = grab_cache_page(inode->i_mapping, first_index); - if (!PageUptodate(pinned[0])) { - ret = btrfs_readpage(NULL, pinned[0]); - BUG_ON(ret); - wait_on_page_locked(pinned[0]); - } else { - unlock_page(pinned[0]); - } - } - if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) { - pinned[1] = grab_cache_page(inode->i_mapping, last_index); - if (!PageUptodate(pinned[1])) { - ret = btrfs_readpage(NULL, pinned[1]); - BUG_ON(ret); - wait_on_page_locked(pinned[1]); - } else { - unlock_page(pinned[1]); - } - } - while (iov_iter_count(&i) > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); size_t write_bytes = min(iov_iter_count(&i), @@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, copied = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, &i); - dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; + + /* + * if we have trouble faulting in the pages, fall + * back to one page at a time + */ + if (copied < write_bytes) + nrptrs = 1; + + if (copied == 0) + dirty_pages = 0; + else + dirty_pages = (copied + offset + + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; if (num_pages > dirty_pages) { if (copied > 0) @@ -1069,10 +1103,6 @@ out: err = ret; kfree(pages); - if (pinned[0]) - page_cache_release(pinned[0]); - if (pinned[1]) - page_cache_release(pinned[1]); *ppos = pos; /* diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c23f050f47c2..4a0107e18747 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4818,10 +4818,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, goto fail; /* - * 1 item for inode ref + * 2 items for inode and inode ref * 2 items for dir items + * 1 item for parent inode */ - trans = btrfs_start_transaction(root, 3); + trans = btrfs_start_transaction(root, 5); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto fail; @@ -6053,6 +6054,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, if (!skip_sum) { dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); if (!dip->csums) { + kfree(dip); ret = -ENOMEM; goto free_ordered; } diff --git a/fs/dcache.c b/fs/dcache.c index 611ffe928c03..a39fe47c466f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -296,8 +296,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) __releases(parent->d_lock) __releases(dentry->d_inode->i_lock) { - dentry->d_parent = NULL; list_del(&dentry->d_u.d_child); + /* + * Inform try_to_ascend() that we are no longer attached to the + * dentry tree + */ + dentry->d_flags |= DCACHE_DISCONNECTED; if (parent) spin_unlock(&parent->d_lock); dentry_iput(dentry); @@ -1012,6 +1016,35 @@ void shrink_dcache_for_umount(struct super_block *sb) } /* + * This tries to ascend one level of parenthood, but + * we can race with renaming, so we need to re-check + * the parenthood after dropping the lock and check + * that the sequence number still matches. + */ +static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq) +{ + struct dentry *new = old->d_parent; + + rcu_read_lock(); + spin_unlock(&old->d_lock); + spin_lock(&new->d_lock); + + /* + * might go back up the wrong parent if we have had a rename + * or deletion + */ + if (new != old->d_parent || + (old->d_flags & DCACHE_DISCONNECTED) || + (!locked && read_seqretry(&rename_lock, seq))) { + spin_unlock(&new->d_lock); + new = NULL; + } + rcu_read_unlock(); + return new; +} + + +/* * Search for at least 1 mount point in the dentry's subdirs. * We descend to the next level whenever the d_subdirs * list is non-empty and continue searching. @@ -1066,24 +1099,10 @@ resume: * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - struct dentry *tmp; - struct dentry *child; - - tmp = this_parent->d_parent; - rcu_read_lock(); - spin_unlock(&this_parent->d_lock); - child = this_parent; - this_parent = tmp; - spin_lock(&this_parent->d_lock); - /* might go back up the wrong parent if we have had a rename - * or deletion */ - if (this_parent != child->d_parent || - (!locked && read_seqretry(&rename_lock, seq))) { - spin_unlock(&this_parent->d_lock); - rcu_read_unlock(); + struct dentry *child = this_parent; + this_parent = try_to_ascend(this_parent, locked, seq); + if (!this_parent) goto rename_retry; - } - rcu_read_unlock(); next = child->d_u.d_child.next; goto resume; } @@ -1181,24 +1200,10 @@ resume: * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - struct dentry *tmp; - struct dentry *child; - - tmp = this_parent->d_parent; - rcu_read_lock(); - spin_unlock(&this_parent->d_lock); - child = this_parent; - this_parent = tmp; - spin_lock(&this_parent->d_lock); - /* might go back up the wrong parent if we have had a rename - * or deletion */ - if (this_parent != child->d_parent || - (!locked && read_seqretry(&rename_lock, seq))) { - spin_unlock(&this_parent->d_lock); - rcu_read_unlock(); + struct dentry *child = this_parent; + this_parent = try_to_ascend(this_parent, locked, seq); + if (!this_parent) goto rename_retry; - } - rcu_read_unlock(); next = child->d_u.d_child.next; goto resume; } @@ -2942,28 +2947,14 @@ resume: spin_unlock(&dentry->d_lock); } if (this_parent != root) { - struct dentry *tmp; - struct dentry *child; - - tmp = this_parent->d_parent; + struct dentry *child = this_parent; if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { this_parent->d_flags |= DCACHE_GENOCIDE; this_parent->d_count--; } - rcu_read_lock(); - spin_unlock(&this_parent->d_lock); - child = this_parent; - this_parent = tmp; - spin_lock(&this_parent->d_lock); - /* might go back up the wrong parent if we have had a rename - * or deletion */ - if (this_parent != child->d_parent || - (!locked && read_seqretry(&rename_lock, seq))) { - spin_unlock(&this_parent->d_lock); - rcu_read_unlock(); + this_parent = try_to_ascend(this_parent, locked, seq); + if (!this_parent) goto rename_retry; - } - rcu_read_unlock(); next = child->d_u.d_child.next; goto resume; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 1cc600e77bb4..2f8e61816d75 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -37,6 +37,7 @@ #include <linux/inet.h> #include <linux/nfs_xdr.h> #include <linux/slab.h> +#include <linux/compat.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -89,7 +90,11 @@ int nfs_wait_bit_killable(void *word) */ u64 nfs_compat_user_ino64(u64 fileid) { - int ino; +#ifdef CONFIG_COMPAT + compat_ulong_t ino; +#else + unsigned long ino; +#endif if (enable_ino64) return fileid; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 7a7474073148..1be36cf65bfc 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -298,6 +298,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); #if defined(CONFIG_NFS_V4_1) struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); +extern void nfs4_schedule_session_recovery(struct nfs4_session *); +#else +static inline void nfs4_schedule_session_recovery(struct nfs4_session *session) +{ +} #endif /* CONFIG_NFS_V4_1 */ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); @@ -307,10 +312,9 @@ extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); -extern void nfs4_schedule_state_recovery(struct nfs_client *); +extern void nfs4_schedule_lease_recovery(struct nfs_client *); extern void nfs4_schedule_state_manager(struct nfs_client *); -extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); -extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state); +extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs41_handle_recall_slot(struct nfs_client *clp); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index f5c9b125e8cc..b73c34375f60 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -219,6 +219,10 @@ decode_and_add_ds(__be32 **pp, struct inode *inode) goto out_err; } buf = kmalloc(rlen + 1, GFP_KERNEL); + if (!buf) { + dprintk("%s: Not enough memory\n", __func__); + goto out_err; + } buf[rlen] = '\0'; memcpy(buf, r_addr, rlen); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1ff76acc7e98..0a07e353a961 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -51,7 +51,6 @@ #include <linux/sunrpc/bc_xprt.h> #include <linux/xattr.h> #include <linux/utsname.h> -#include <linux/mm.h> #include "nfs4_fs.h" #include "delegation.h" @@ -257,12 +256,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, case -NFS4ERR_OPENMODE: if (state == NULL) break; - nfs4_state_mark_reclaim_nograce(clp, state); - goto do_state_recovery; + nfs4_schedule_stateid_recovery(server, state); + goto wait_on_recovery; case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_EXPIRED: - goto do_state_recovery; + nfs4_schedule_lease_recovery(clp); + goto wait_on_recovery; #if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: @@ -273,7 +273,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, case -NFS4ERR_SEQ_MISORDERED: dprintk("%s ERROR: %d Reset session\n", __func__, errorcode); - nfs4_schedule_state_recovery(clp); + nfs4_schedule_session_recovery(clp->cl_session); exception->retry = 1; break; #endif /* defined(CONFIG_NFS_V4_1) */ @@ -296,8 +296,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, } /* We failed to handle the error */ return nfs4_map_errors(ret); -do_state_recovery: - nfs4_schedule_state_recovery(clp); +wait_on_recovery: ret = nfs4_wait_clnt_recover(clp); if (ret == 0) exception->retry = 1; @@ -436,8 +435,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * clp = res->sr_session->clp; do_renew_lease(clp, timestamp); /* Check sequence flags */ - if (atomic_read(&clp->cl_count) > 1) - nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); + if (res->sr_status_flags != 0) + nfs4_schedule_lease_recovery(clp); break; case -NFS4ERR_DELAY: /* The server detected a resend of the RPC call and @@ -1256,14 +1255,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state case -NFS4ERR_BAD_HIGH_SLOT: case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: case -NFS4ERR_DEADSESSION: - nfs4_schedule_state_recovery( - server->nfs_client); + nfs4_schedule_session_recovery(server->nfs_client->cl_session); goto out; case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: /* Don't recall a delegation if it was lost */ - nfs4_schedule_state_recovery(server->nfs_client); + nfs4_schedule_lease_recovery(server->nfs_client); goto out; case -ERESTARTSYS: /* @@ -1272,7 +1270,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state */ case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - nfs4_state_mark_reclaim_nograce(server->nfs_client, state); + nfs4_schedule_stateid_recovery(server, state); case -EKEYEXPIRED: /* * User RPCSEC_GSS context has expired. @@ -1588,7 +1586,7 @@ static int nfs4_recover_expired_lease(struct nfs_server *server) if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) break; - nfs4_schedule_state_recovery(clp); + nfs4_schedule_state_manager(clp); ret = -EIO; } return ret; @@ -3179,7 +3177,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { /* Unless we're shutting down, schedule state recovery! */ if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) - nfs4_schedule_state_recovery(clp); + nfs4_schedule_lease_recovery(clp); return; } do_renew_lease(clp, timestamp); @@ -3262,7 +3260,7 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen, spages = pages; do { - len = min(PAGE_CACHE_SIZE, buflen); + len = min_t(size_t, PAGE_CACHE_SIZE, buflen); newpage = alloc_page(GFP_KERNEL); if (newpage == NULL) @@ -3504,12 +3502,13 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, case -NFS4ERR_OPENMODE: if (state == NULL) break; - nfs4_state_mark_reclaim_nograce(clp, state); - goto do_state_recovery; + nfs4_schedule_stateid_recovery(server, state); + goto wait_on_recovery; case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_EXPIRED: - goto do_state_recovery; + nfs4_schedule_lease_recovery(clp); + goto wait_on_recovery; #if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: @@ -3520,7 +3519,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, case -NFS4ERR_SEQ_MISORDERED: dprintk("%s ERROR %d, Reset session\n", __func__, task->tk_status); - nfs4_schedule_state_recovery(clp); + nfs4_schedule_session_recovery(clp->cl_session); task->tk_status = 0; return -EAGAIN; #endif /* CONFIG_NFS_V4_1 */ @@ -3537,9 +3536,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, } task->tk_status = nfs4_map_errors(task->tk_status); return 0; -do_state_recovery: +wait_on_recovery: rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); - nfs4_schedule_state_recovery(clp); if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); task->tk_status = 0; @@ -4150,7 +4148,7 @@ static void nfs4_lock_release(void *calldata) task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, data->arg.lock_seqid); if (!IS_ERR(task)) - rpc_put_task(task); + rpc_put_task_async(task); dprintk("%s: cancelling lock!\n", __func__); } else nfs_free_seqid(data->arg.lock_seqid); @@ -4174,23 +4172,18 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = { static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) { - struct nfs_client *clp = server->nfs_client; - struct nfs4_state *state = lsp->ls_state; - switch (error) { case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_EXPIRED: + lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; if (new_lock_owner != 0 || (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) - nfs4_state_mark_reclaim_nograce(clp, state); - lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; + nfs4_schedule_stateid_recovery(server, lsp->ls_state); break; case -NFS4ERR_STALE_STATEID: - if (new_lock_owner != 0 || - (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) - nfs4_state_mark_reclaim_reboot(clp, state); lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; + case -NFS4ERR_EXPIRED: + nfs4_schedule_lease_recovery(server->nfs_client); }; } @@ -4406,12 +4399,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) case -NFS4ERR_EXPIRED: case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: + nfs4_schedule_lease_recovery(server->nfs_client); + goto out; case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: case -NFS4ERR_BAD_HIGH_SLOT: case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: case -NFS4ERR_DEADSESSION: - nfs4_schedule_state_recovery(server->nfs_client); + nfs4_schedule_session_recovery(server->nfs_client->cl_session); goto out; case -ERESTARTSYS: /* @@ -4421,7 +4416,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OPENMODE: - nfs4_state_mark_reclaim_nograce(server->nfs_client, state); + nfs4_schedule_stateid_recovery(server, state); err = 0; goto out; case -EKEYEXPIRED: @@ -5028,10 +5023,20 @@ int nfs4_proc_create_session(struct nfs_client *clp) int status; unsigned *ptr; struct nfs4_session *session = clp->cl_session; + long timeout = 0; + int err; dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); - status = _nfs4_proc_create_session(clp); + do { + status = _nfs4_proc_create_session(clp); + if (status == -NFS4ERR_DELAY) { + err = nfs4_delay(clp->cl_rpcclient, &timeout); + if (err) + status = err; + } + } while (status == -NFS4ERR_DELAY); + if (status) goto out; @@ -5140,7 +5145,7 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client rpc_delay(task, NFS4_POLL_RETRY_MAX); return -EAGAIN; default: - nfs4_schedule_state_recovery(clp); + nfs4_schedule_lease_recovery(clp); } return 0; } @@ -5227,7 +5232,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr if (IS_ERR(task)) ret = PTR_ERR(task); else - rpc_put_task(task); + rpc_put_task_async(task); dprintk("<-- %s status=%d\n", __func__, ret); return ret; } @@ -5243,8 +5248,13 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) goto out; } ret = rpc_wait_for_completion_task(task); - if (!ret) + if (!ret) { + struct nfs4_sequence_res *res = task->tk_msg.rpc_resp; + + if (task->tk_status == 0) + nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); ret = task->tk_status; + } rpc_put_task(task); out: dprintk("<-- %s status=%d\n", __func__, ret); @@ -5281,7 +5291,7 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf rpc_delay(task, NFS4_POLL_RETRY_MAX); return -EAGAIN; default: - nfs4_schedule_state_recovery(clp); + nfs4_schedule_lease_recovery(clp); } return 0; } @@ -5349,6 +5359,9 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) status = PTR_ERR(task); goto out; } + status = nfs4_wait_for_completion_rpc_task(task); + if (status == 0) + status = task->tk_status; rpc_put_task(task); return 0; out: diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e6742b57a04c..0592288f9f06 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1007,9 +1007,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) } /* - * Schedule a state recovery attempt + * Schedule a lease recovery attempt */ -void nfs4_schedule_state_recovery(struct nfs_client *clp) +void nfs4_schedule_lease_recovery(struct nfs_client *clp) { if (!clp) return; @@ -1018,7 +1018,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp) nfs4_schedule_state_manager(clp); } -int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) +static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) { set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); @@ -1032,7 +1032,7 @@ int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *st return 1; } -int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) +static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) { set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); @@ -1041,6 +1041,14 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s return 1; } +void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state) +{ + struct nfs_client *clp = server->nfs_client; + + nfs4_state_mark_reclaim_nograce(clp, state); + nfs4_schedule_state_manager(clp); +} + static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) { struct inode *inode = state->inode; @@ -1436,10 +1444,15 @@ static int nfs4_reclaim_lease(struct nfs_client *clp) } #ifdef CONFIG_NFS_V4_1 +void nfs4_schedule_session_recovery(struct nfs4_session *session) +{ + nfs4_schedule_lease_recovery(session->clp); +} + void nfs41_handle_recall_slot(struct nfs_client *clp) { set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); - nfs4_schedule_state_recovery(clp); + nfs4_schedule_state_manager(clp); } static void nfs4_reset_all_state(struct nfs_client *clp) @@ -1447,7 +1460,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp) if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { clp->cl_boot_time = CURRENT_TIME; nfs4_state_start_reclaim_nograce(clp); - nfs4_schedule_state_recovery(clp); + nfs4_schedule_state_manager(clp); } } @@ -1455,7 +1468,7 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp) { if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { nfs4_state_start_reclaim_reboot(clp); - nfs4_schedule_state_recovery(clp); + nfs4_schedule_state_manager(clp); } } @@ -1475,7 +1488,7 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp) { nfs_expire_all_delegations(clp); if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) - nfs4_schedule_state_recovery(clp); + nfs4_schedule_state_manager(clp); } void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4e2c168b6ee9..94d50e86a124 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1660,7 +1660,7 @@ static void encode_create_session(struct xdr_stream *xdr, p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); *p++ = cpu_to_be32(OP_CREATE_SESSION); - p = xdr_encode_hyper(p, clp->cl_ex_clid); + p = xdr_encode_hyper(p, clp->cl_clientid); *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ *p++ = cpu_to_be32(args->flags); /*flags */ @@ -4694,7 +4694,7 @@ static int decode_exchange_id(struct xdr_stream *xdr, p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) goto out_overflow; - xdr_decode_hyper(p, &clp->cl_ex_clid); + xdr_decode_hyper(p, &clp->cl_clientid); p = xdr_inline_decode(xdr, 12); if (unlikely(!p)) goto out_overflow; diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 903908a20023..c541093a5bf2 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -86,11 +86,14 @@ /* Default path we try to mount. "%s" gets replaced by our IP address */ #define NFS_ROOT "/tftpboot/%s" +/* Default NFSROOT mount options. */ +#define NFS_DEF_OPTIONS "udp" + /* Parameters passed from the kernel command line */ static char nfs_root_parms[256] __initdata = ""; /* Text-based mount options passed to super.c */ -static char nfs_root_options[256] __initdata = ""; +static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS; /* Address of NFS server */ static __be32 servaddr __initdata = htonl(INADDR_NONE); @@ -160,8 +163,14 @@ static int __init root_nfs_copy(char *dest, const char *src, } static int __init root_nfs_cat(char *dest, const char *src, - const size_t destlen) + const size_t destlen) { + size_t len = strlen(dest); + + if (len && dest[len - 1] != ',') + if (strlcat(dest, ",", destlen) > destlen) + return -1; + if (strlcat(dest, src, destlen) > destlen) return -1; return 0; @@ -194,16 +203,6 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath, if (root_nfs_cat(nfs_root_options, incoming, sizeof(nfs_root_options))) return -1; - - /* - * Possibly prepare for more options to be appended - */ - if (nfs_root_options[0] != '\0' && - nfs_root_options[strlen(nfs_root_options)] != ',') - if (root_nfs_cat(nfs_root_options, ",", - sizeof(nfs_root_options))) - return -1; - return 0; } @@ -217,7 +216,7 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath, */ static int __init root_nfs_data(char *cmdline) { - char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1]; + char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1]; int len, retval = -1; char *tmp = NULL; const size_t tmplen = sizeof(nfs_export_path); @@ -244,9 +243,9 @@ static int __init root_nfs_data(char *cmdline) * Append mandatory options for nfsroot so they override * what has come before */ - snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4", + snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4", &servaddr); - if (root_nfs_cat(nfs_root_options, addr_option, + if (root_nfs_cat(nfs_root_options, mand_options, sizeof(nfs_root_options))) goto out_optionstoolong; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index e313a51acdd1..6481d537d69d 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -180,7 +180,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n task_setup_data.rpc_client = NFS_CLIENT(dir); task = rpc_run_task(&task_setup_data); if (!IS_ERR(task)) - rpc_put_task(task); + rpc_put_task_async(task); return 1; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c8278f4046cb..42b92d7a9cc4 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1292,6 +1292,8 @@ static int nfs_commit_rpcsetup(struct list_head *head, task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); + if (how & FLUSH_SYNC) + rpc_wait_for_completion_task(task); rpc_put_task(task); return 0; } diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c index 48cec7cbca17..be03a0b08b47 100644 --- a/fs/partitions/osf.c +++ b/fs/partitions/osf.c @@ -10,10 +10,13 @@ #include "check.h" #include "osf.h" +#define MAX_OSF_PARTITIONS 8 + int osf_partition(struct parsed_partitions *state) { int i; int slot = 1; + unsigned int npartitions; Sector sect; unsigned char *data; struct disklabel { @@ -45,7 +48,7 @@ int osf_partition(struct parsed_partitions *state) u8 p_fstype; u8 p_frag; __le16 p_cpg; - } d_partitions[8]; + } d_partitions[MAX_OSF_PARTITIONS]; } * label; struct d_partition * partition; @@ -63,7 +66,12 @@ int osf_partition(struct parsed_partitions *state) put_dev_sector(sect); return 0; } - for (i = 0 ; i < le16_to_cpu(label->d_npartitions); i++, partition++) { + npartitions = le16_to_cpu(label->d_npartitions); + if (npartitions > MAX_OSF_PARTITIONS) { + put_dev_sector(sect); + return 0; + } + for (i = 0 ; i < npartitions; i++, partition++) { if (slot == state->limit) break; if (le32_to_cpu(partition->p_size)) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 55e0d4253e49..d746da19c6a2 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -55,7 +55,7 @@ * Used by threaded interrupts which need to keep the * irq line disabled until the threaded handler has been run. * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend - * + * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set */ #define IRQF_DISABLED 0x00000020 #define IRQF_SAMPLE_RANDOM 0x00000040 @@ -67,6 +67,7 @@ #define IRQF_IRQPOLL 0x00001000 #define IRQF_ONESHOT 0x00002000 #define IRQF_NO_SUSPEND 0x00004000 +#define IRQF_FORCE_RESUME 0x00008000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index b197563913bf..3e112de12d8d 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -68,11 +68,7 @@ struct nfs_client { unsigned char cl_id_uniquifier; u32 cl_cb_ident; /* v4.0 callback identifier */ const struct nfs4_minor_version_ops *cl_mvops; -#endif /* CONFIG_NFS_V4 */ -#ifdef CONFIG_NFS_V4_1 - /* clientid returned from EXCHANGE_ID, used by session operations */ - u64 cl_ex_clid; /* The sequence id to use for the next CREATE_SESSION */ u32 cl_seqid; /* The flags used for obtaining the clientid during EXCHANGE_ID */ @@ -80,7 +76,7 @@ struct nfs_client { struct nfs4_session *cl_session; /* sharred session */ struct list_head cl_layouts; struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */ -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_NFS_V4 */ #ifdef CONFIG_NFS_FSCACHE struct fscache_cookie *fscache; /* client index cache cookie */ @@ -185,7 +181,7 @@ struct nfs_server { /* maximum number of slots to use */ #define NFS4_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_NFS_V4) /* Sessions */ #define SLOT_TABLE_SZ (NFS4_MAX_SLOT_TABLE/(8*sizeof(long))) @@ -225,5 +221,5 @@ struct nfs4_session { struct nfs_client *clp; }; -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_NFS_V4 */ #endif diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 88513fd8e208..d81db8012c63 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -212,6 +212,7 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, const struct rpc_call_ops *ops); void rpc_put_task(struct rpc_task *); +void rpc_put_task_async(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_exit(struct rpc_task *, int); void rpc_release_calldata(const struct rpc_call_ops *, void *); diff --git a/include/target/target_core_transport.h b/include/target/target_core_transport.h index 246940511579..2e8ec51f0615 100644 --- a/include/target/target_core_transport.h +++ b/include/target/target_core_transport.h @@ -135,6 +135,8 @@ extern void transport_complete_task(struct se_task *, int); extern void transport_add_task_to_execute_queue(struct se_task *, struct se_task *, struct se_device *); +extern void transport_remove_task_from_execute_queue(struct se_task *, + struct se_device *); unsigned char *transport_dump_cmd_direction(struct se_cmd *); extern void transport_dump_dev_state(struct se_device *, char *, int *); extern void transport_dump_dev_info(struct se_device *, struct se_lun *, diff --git a/include/xen/events.h b/include/xen/events.h index 00f53ddcc062..962da2ced5b4 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -75,11 +75,9 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name); int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name); #ifdef CONFIG_PCI_MSI -/* Allocate an irq and a pirq to be used with MSIs. */ -#define XEN_ALLOC_PIRQ (1 << 0) -#define XEN_ALLOC_IRQ (1 << 1) -void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_mask); -int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type); +int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc); +int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, + int pirq, int vector, const char *name); #endif /* De-allocates the above mentioned physical interrupt. */ diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index c2d1fa4dc1ee..61e523af3c46 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -51,11 +51,7 @@ typedef uint64_t blkif_sector_t; */ #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 -struct blkif_request { - uint8_t operation; /* BLKIF_OP_??? */ - uint8_t nr_segments; /* number of segments */ - blkif_vdev_t handle; /* only for read/write requests */ - uint64_t id; /* private guest value, echoed in resp */ +struct blkif_request_rw { blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ struct blkif_request_segment { grant_ref_t gref; /* reference to I/O buffer frame */ @@ -65,6 +61,16 @@ struct blkif_request { } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; +struct blkif_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + union { + struct blkif_request_rw rw; + } u; +}; + struct blkif_response { uint64_t id; /* copied from request */ uint8_t operation; /* copied from request */ @@ -91,4 +97,25 @@ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); #define VDISK_REMOVABLE 0x2 #define VDISK_READONLY 0x4 +/* Xen-defined major numbers for virtual disks, they look strangely + * familiar */ +#define XEN_IDE0_MAJOR 3 +#define XEN_IDE1_MAJOR 22 +#define XEN_SCSI_DISK0_MAJOR 8 +#define XEN_SCSI_DISK1_MAJOR 65 +#define XEN_SCSI_DISK2_MAJOR 66 +#define XEN_SCSI_DISK3_MAJOR 67 +#define XEN_SCSI_DISK4_MAJOR 68 +#define XEN_SCSI_DISK5_MAJOR 69 +#define XEN_SCSI_DISK6_MAJOR 70 +#define XEN_SCSI_DISK7_MAJOR 71 +#define XEN_SCSI_DISK8_MAJOR 128 +#define XEN_SCSI_DISK9_MAJOR 129 +#define XEN_SCSI_DISK10_MAJOR 130 +#define XEN_SCSI_DISK11_MAJOR 131 +#define XEN_SCSI_DISK12_MAJOR 132 +#define XEN_SCSI_DISK13_MAJOR 133 +#define XEN_SCSI_DISK14_MAJOR 134 +#define XEN_SCSI_DISK15_MAJOR 135 + #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 2befa3e2f1bc..b33257bc7e83 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -30,7 +30,7 @@ #define __HYPERVISOR_stack_switch 3 #define __HYPERVISOR_set_callbacks 4 #define __HYPERVISOR_fpu_taskswitch 5 -#define __HYPERVISOR_sched_op 6 +#define __HYPERVISOR_sched_op_compat 6 #define __HYPERVISOR_dom0_op 7 #define __HYPERVISOR_set_debugreg 8 #define __HYPERVISOR_get_debugreg 9 @@ -52,7 +52,7 @@ #define __HYPERVISOR_mmuext_op 26 #define __HYPERVISOR_acm_op 27 #define __HYPERVISOR_nmi_op 28 -#define __HYPERVISOR_sched_op_new 29 +#define __HYPERVISOR_sched_op 29 #define __HYPERVISOR_callback_op 30 #define __HYPERVISOR_xenoprof_op 31 #define __HYPERVISOR_event_channel_op 32 diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 98b92154a264..03c85d7387fb 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -5,9 +5,9 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); -void xen_pre_suspend(void); -void xen_post_suspend(int suspend_cancelled); -void xen_hvm_post_suspend(int suspend_cancelled); +void xen_arch_pre_suspend(void); +void xen_arch_post_suspend(int suspend_cancelled); +void xen_arch_hvm_post_suspend(int suspend_cancelled); void xen_mm_pin_all(void); void xen_mm_unpin_all(void); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 9033c1c70828..2782bacdf494 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -282,8 +282,17 @@ EXPORT_SYMBOL(disable_irq); void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume) { - if (resume) + if (resume) { + if (!(desc->status & IRQ_SUSPENDED)) { + if (!desc->action) + return; + if (!(desc->action->flags & IRQF_FORCE_RESUME)) + return; + /* Pretend that it got disabled ! */ + desc->depth++; + } desc->status &= ~IRQ_SUSPENDED; + } switch (desc->depth) { case 0: diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index 0d4005d85b03..d6bfb89cce91 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -53,9 +53,6 @@ void resume_device_irqs(void) for_each_irq_desc(irq, desc) { unsigned long flags; - if (!(desc->status & IRQ_SUSPENDED)) - continue; - raw_spin_lock_irqsave(&desc->lock, flags); __enable_irq(desc, irq, true); raw_spin_unlock_irqrestore(&desc->lock, flags); diff --git a/kernel/sched.c b/kernel/sched.c index 18d38e4ec7ba..42eab5a8437d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4213,6 +4213,7 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) { __wake_up_common(q, mode, 1, 0, key); } +EXPORT_SYMBOL_GPL(__wake_up_locked_key); /** * __wake_up_sync_key - wake up threads blocked on a waitqueue. diff --git a/mm/huge_memory.c b/mm/huge_memory.c index dbe99a5f2073..113e35c47502 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1762,6 +1762,10 @@ static void collapse_huge_page(struct mm_struct *mm, #ifndef CONFIG_NUMA VM_BUG_ON(!*hpage); new_page = *hpage; + if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { + up_read(&mm->mmap_sem); + return; + } #else VM_BUG_ON(*hpage); /* @@ -1781,12 +1785,12 @@ static void collapse_huge_page(struct mm_struct *mm, *hpage = ERR_PTR(-ENOMEM); return; } -#endif if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { up_read(&mm->mmap_sem); put_page(new_page); return; } +#endif /* after allocating the hugepage upgrade to mmap_sem write mode */ up_read(&mm->mmap_sem); diff --git a/mm/rmap.c b/mm/rmap.c index f21f4a1d6a1c..941bf82e8961 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -497,41 +497,51 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; int referenced = 0; - /* - * Don't want to elevate referenced for mlocked page that gets this far, - * in order that it progresses to try_to_unmap and is moved to the - * unevictable list. - */ - if (vma->vm_flags & VM_LOCKED) { - *mapcount = 0; /* break early from loop */ - *vm_flags |= VM_LOCKED; - goto out; - } - - /* Pretend the page is referenced if the task has the - swap token and is in the middle of a page fault. */ - if (mm != current->mm && has_swap_token(mm) && - rwsem_is_locked(&mm->mmap_sem)) - referenced++; - if (unlikely(PageTransHuge(page))) { pmd_t *pmd; spin_lock(&mm->page_table_lock); + /* + * rmap might return false positives; we must filter + * these out using page_check_address_pmd(). + */ pmd = page_check_address_pmd(page, mm, address, PAGE_CHECK_ADDRESS_PMD_FLAG); - if (pmd && !pmd_trans_splitting(*pmd) && - pmdp_clear_flush_young_notify(vma, address, pmd)) + if (!pmd) { + spin_unlock(&mm->page_table_lock); + goto out; + } + + if (vma->vm_flags & VM_LOCKED) { + spin_unlock(&mm->page_table_lock); + *mapcount = 0; /* break early from loop */ + *vm_flags |= VM_LOCKED; + goto out; + } + + /* go ahead even if the pmd is pmd_trans_splitting() */ + if (pmdp_clear_flush_young_notify(vma, address, pmd)) referenced++; spin_unlock(&mm->page_table_lock); } else { pte_t *pte; spinlock_t *ptl; + /* + * rmap might return false positives; we must filter + * these out using page_check_address(). + */ pte = page_check_address(page, mm, address, &ptl, 0); if (!pte) goto out; + if (vma->vm_flags & VM_LOCKED) { + pte_unmap_unlock(pte, ptl); + *mapcount = 0; /* break early from loop */ + *vm_flags |= VM_LOCKED; + goto out; + } + if (ptep_clear_flush_young_notify(vma, address, pte)) { /* * Don't treat a reference through a sequentially read @@ -546,6 +556,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, pte_unmap_unlock(pte, ptl); } + /* Pretend the page is referenced if the task has the + swap token and is in the middle of a page fault. */ + if (mm != current->mm && has_swap_token(mm) && + rwsem_is_locked(&mm->mmap_sem)) + referenced++; + (*mapcount)--; if (referenced) diff --git a/net/Makefile b/net/Makefile index a3330ebe2c53..a51d9465e628 100644 --- a/net/Makefile +++ b/net/Makefile @@ -19,9 +19,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ -ifneq ($(CONFIG_IPV6),) -obj-y += ipv6/ -endif +obj-$(CONFIG_NET) += ipv6/ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 9190ae462cb4..6dee7bf648a9 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -6,6 +6,7 @@ config BRIDGE tristate "802.1d Ethernet Bridging" select LLC select STP + depends on IPV6 || IPV6=n ---help--- If you say Y here, then your Linux box will be able to act as an Ethernet bridge, which means that the different Ethernet segments it diff --git a/net/core/pktgen.c b/net/core/pktgen.c index a9e7fc4c461f..b5bada92f637 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3321,7 +3321,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) pkt_dev->started_at); ktime_t idle = ns_to_ktime(pkt_dev->idle_acc); - p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n", + p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n", (unsigned long long)ktime_to_us(elapsed), (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)), (unsigned long long)ktime_to_us(idle), diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index df4616fce929..036652c8166d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -670,7 +670,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) ifap = &ifa->ifa_next) { if (!strcmp(ifr.ifr_name, ifa->ifa_label) && sin_orig.sin_addr.s_addr == - ifa->ifa_address) { + ifa->ifa_local) { break; /* found */ } } @@ -1040,8 +1040,8 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev, return; arp_send(ARPOP_REQUEST, ETH_P_ARP, - ifa->ifa_address, dev, - ifa->ifa_address, NULL, + ifa->ifa_local, dev, + ifa->ifa_local, NULL, dev->dev_addr, NULL); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 4f4483e697bd..e528a42a52be 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -57,6 +57,7 @@ MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETDEV("ip6tnl0"); #ifdef IP6_TNL_DEBUG #define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 904312e25a3c..e7db7014e89f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -739,8 +739,10 @@ restart: if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); - else + else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl->fl6_dst); + else + goto out2; dst_release(&rt->dst); rt = nrt ? : net->ipv6.ip6_null_entry; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 71f373c421bc..c47a511f203d 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -551,7 +551,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, if (conn->c_loopback && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); - return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + scat = &rm->data.op_sg[sg]; + ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + ret = min_t(int, ret, scat->length - conn->c_xmit_data_off); + return ret; } /* FIXME we may overallocate here */ diff --git a/net/rds/loop.c b/net/rds/loop.c index aeec1d483b17..bca6761a3ca2 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -61,10 +61,15 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off) { + struct scatterlist *sgp = &rm->data.op_sg[sg]; + int ret = sizeof(struct rds_header) + + be32_to_cpu(rm->m_inc.i_hdr.h_len); + /* Do not send cong updates to loopback */ if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); - return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; + ret = min_t(int, ret, sgp->length - conn->c_xmit_data_off); + goto out; } BUG_ON(hdr_off || sg || off); @@ -80,8 +85,8 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm, NULL); rds_inc_put(&rm->m_inc); - - return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); +out: + return ret; } /* diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 243fc09b164e..59e599498e37 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -252,23 +252,37 @@ static void rpc_set_active(struct rpc_task *task) /* * Mark an RPC call as having completed by clearing the 'active' bit + * and then waking up all tasks that were sleeping. */ -static void rpc_mark_complete_task(struct rpc_task *task) +static int rpc_complete_task(struct rpc_task *task) { - smp_mb__before_clear_bit(); + void *m = &task->tk_runstate; + wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE); + struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE); + unsigned long flags; + int ret; + + spin_lock_irqsave(&wq->lock, flags); clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); - smp_mb__after_clear_bit(); - wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE); + ret = atomic_dec_and_test(&task->tk_count); + if (waitqueue_active(wq)) + __wake_up_locked_key(wq, TASK_NORMAL, &k); + spin_unlock_irqrestore(&wq->lock, flags); + return ret; } /* * Allow callers to wait for completion of an RPC call + * + * Note the use of out_of_line_wait_on_bit() rather than wait_on_bit() + * to enforce taking of the wq->lock and hence avoid races with + * rpc_complete_task(). */ int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) { if (action == NULL) action = rpc_wait_bit_killable; - return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, + return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, action, TASK_KILLABLE); } EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); @@ -857,34 +871,67 @@ static void rpc_async_release(struct work_struct *work) rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); } -void rpc_put_task(struct rpc_task *task) +static void rpc_release_resources_task(struct rpc_task *task) { - if (!atomic_dec_and_test(&task->tk_count)) - return; - /* Release resources */ if (task->tk_rqstp) xprt_release(task); if (task->tk_msg.rpc_cred) put_rpccred(task->tk_msg.rpc_cred); rpc_task_release_client(task); - if (task->tk_workqueue != NULL) { +} + +static void rpc_final_put_task(struct rpc_task *task, + struct workqueue_struct *q) +{ + if (q != NULL) { INIT_WORK(&task->u.tk_work, rpc_async_release); - queue_work(task->tk_workqueue, &task->u.tk_work); + queue_work(q, &task->u.tk_work); } else rpc_free_task(task); } + +static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q) +{ + if (atomic_dec_and_test(&task->tk_count)) { + rpc_release_resources_task(task); + rpc_final_put_task(task, q); + } +} + +void rpc_put_task(struct rpc_task *task) +{ + rpc_do_put_task(task, NULL); +} EXPORT_SYMBOL_GPL(rpc_put_task); +void rpc_put_task_async(struct rpc_task *task) +{ + rpc_do_put_task(task, task->tk_workqueue); +} +EXPORT_SYMBOL_GPL(rpc_put_task_async); + static void rpc_release_task(struct rpc_task *task) { dprintk("RPC: %5u release task\n", task->tk_pid); BUG_ON (RPC_IS_QUEUED(task)); - /* Wake up anyone who is waiting for task completion */ - rpc_mark_complete_task(task); + rpc_release_resources_task(task); - rpc_put_task(task); + /* + * Note: at this point we have been removed from rpc_clnt->cl_tasks, + * so it should be safe to use task->tk_count as a test for whether + * or not any other processes still hold references to our rpc_task. + */ + if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) { + /* Wake up anyone who may be waiting for task completion */ + if (!rpc_complete_task(task)) + return; + } else { + if (!atomic_dec_and_test(&task->tk_count)) + return; + } + rpc_final_put_task(task, task->tk_workqueue); } int rpciod_up(void) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 9df1eadc912a..1a10dcd999ea 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1335,6 +1335,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, p, 0, length, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) { put_page(p); + svc_rdma_put_context(ctxt, 1); return; } atomic_inc(&xprt->sc_dma_used); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c431f5a57960..be96d429b475 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1631,7 +1631,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, } xs_reclassify_socket(family, sock); - if (xs_bind(transport, sock)) { + err = xs_bind(transport, sock); + if (err) { sock_release(sock); goto out; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d8c04a602cf1..ba5b8c208498 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1724,7 +1724,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = 0; - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) { + err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); + goto out; + } skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { @@ -1864,7 +1868,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, memset(&tmp_scm, 0, sizeof(tmp_scm)); } - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) { + err = sock_intr_errno(timeo); + goto out; + } do { int chunk; @@ -1895,11 +1903,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, timeo = unix_stream_data_wait(sk, timeo); - if (signal_pending(current)) { + if (signal_pending(current) + || mutex_lock_interruptible(&u->readlock)) { err = sock_intr_errno(timeo); goto out; } - mutex_lock(&u->readlock); + continue; unlock: unix_state_unlock(sk); diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c index 6c94c6ce2925..291228e25984 100644 --- a/scripts/basic/fixdep.c +++ b/scripts/basic/fixdep.c @@ -309,6 +309,11 @@ static void do_config_file(const char *filename) close(fd); } +/* + * Important: The below generated source_foo.o and deps_foo.o variable + * assignments are parsed not only by make, but also by the rather simple + * parser in scripts/mod/sumversion.c. + */ static void parse_dep_file(void *map, size_t len) { char *m = map; @@ -323,7 +328,6 @@ static void parse_dep_file(void *map, size_t len) exit(1); } memcpy(s, m, p-m); s[p-m] = 0; - printf("deps_%s := \\\n", target); m = p+1; clear_config(); @@ -343,12 +347,15 @@ static void parse_dep_file(void *map, size_t len) strrcmp(s, "arch/um/include/uml-config.h") && strrcmp(s, ".ver")) { /* - * Do not output the first dependency (the - * source file), so that kbuild is not confused - * if a .c file is rewritten into .S or vice - * versa. + * Do not list the source file as dependency, so that + * kbuild is not confused if a .c file is rewritten + * into .S or vice versa. Storing it in source_* is + * needed for modpost to compute srcversions. */ - if (!first) + if (first) { + printf("source_%s := %s\n\n", target, s); + printf("deps_%s := \\\n", target); + } else printf(" %s \\\n", s); do_config_file(s); } diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c index ecf9c7dc1825..9dfcd6d988da 100644 --- a/scripts/mod/sumversion.c +++ b/scripts/mod/sumversion.c @@ -300,8 +300,8 @@ static int is_static_library(const char *objfile) return 0; } -/* We have dir/file.o. Open dir/.file.o.cmd, look for deps_ line to - * figure out source file. */ +/* We have dir/file.o. Open dir/.file.o.cmd, look for source_ and deps_ line + * to figure out source files. */ static int parse_source_files(const char *objfile, struct md4_ctx *md) { char *cmd, *file, *line, *dir; @@ -340,6 +340,21 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md) */ while ((line = get_next_line(&pos, file, flen)) != NULL) { char* p = line; + + if (strncmp(line, "source_", sizeof("source_")-1) == 0) { + p = strrchr(line, ' '); + if (!p) { + warn("malformed line: %s\n", line); + goto out_file; + } + p++; + if (!parse_file(p, md)) { + warn("could not open %s: %s\n", + p, strerror(errno)); + goto out_file; + } + continue; + } if (strncmp(line, "deps_", sizeof("deps_")-1) == 0) { check_files = 1; continue; diff --git a/sound/soc/codecs/wm8978.c b/sound/soc/codecs/wm8978.c index 4bbc3442703f..8dfb0a0da673 100644 --- a/sound/soc/codecs/wm8978.c +++ b/sound/soc/codecs/wm8978.c @@ -145,18 +145,18 @@ static const struct snd_kcontrol_new wm8978_snd_controls[] = { SOC_SINGLE("DAC Playback Limiter Threshold", WM8978_DAC_LIMITER_2, 4, 7, 0), SOC_SINGLE("DAC Playback Limiter Boost", - WM8978_DAC_LIMITER_2, 0, 15, 0), + WM8978_DAC_LIMITER_2, 0, 12, 0), SOC_ENUM("ALC Enable Switch", alc1), SOC_SINGLE("ALC Capture Min Gain", WM8978_ALC_CONTROL_1, 0, 7, 0), SOC_SINGLE("ALC Capture Max Gain", WM8978_ALC_CONTROL_1, 3, 7, 0), - SOC_SINGLE("ALC Capture Hold", WM8978_ALC_CONTROL_2, 4, 7, 0), + SOC_SINGLE("ALC Capture Hold", WM8978_ALC_CONTROL_2, 4, 10, 0), SOC_SINGLE("ALC Capture Target", WM8978_ALC_CONTROL_2, 0, 15, 0), SOC_ENUM("ALC Capture Mode", alc3), - SOC_SINGLE("ALC Capture Decay", WM8978_ALC_CONTROL_3, 4, 15, 0), - SOC_SINGLE("ALC Capture Attack", WM8978_ALC_CONTROL_3, 0, 15, 0), + SOC_SINGLE("ALC Capture Decay", WM8978_ALC_CONTROL_3, 4, 10, 0), + SOC_SINGLE("ALC Capture Attack", WM8978_ALC_CONTROL_3, 0, 10, 0), SOC_SINGLE("ALC Capture Noise Gate Switch", WM8978_NOISE_GATE, 3, 1, 0), SOC_SINGLE("ALC Capture Noise Gate Threshold", @@ -211,8 +211,10 @@ static const struct snd_kcontrol_new wm8978_snd_controls[] = { WM8978_LOUT2_SPK_CONTROL, WM8978_ROUT2_SPK_CONTROL, 6, 1, 1), /* DAC / ADC oversampling */ - SOC_SINGLE("DAC 128x Oversampling Switch", WM8978_DAC_CONTROL, 8, 1, 0), - SOC_SINGLE("ADC 128x Oversampling Switch", WM8978_ADC_CONTROL, 8, 1, 0), + SOC_SINGLE("DAC 128x Oversampling Switch", WM8978_DAC_CONTROL, + 5, 1, 0), + SOC_SINGLE("ADC 128x Oversampling Switch", WM8978_ADC_CONTROL, + 5, 1, 0), }; /* Mixer #1: Output (OUT1, OUT2) Mixer: mix AUX, Input mixer output and DAC */ diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index 4afbe3b2e443..c6c958ee5d59 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -1418,7 +1418,7 @@ SND_SOC_DAPM_DAC_E("DAC1R", NULL, SND_SOC_NOPM, 0, 0, static const struct snd_soc_dapm_widget wm8994_dac_widgets[] = { SND_SOC_DAPM_DAC("DAC2L", NULL, WM8994_POWER_MANAGEMENT_5, 3, 0), -SND_SOC_DAPM_DAC("DAC1R", NULL, WM8994_POWER_MANAGEMENT_5, 2, 0), +SND_SOC_DAPM_DAC("DAC2R", NULL, WM8994_POWER_MANAGEMENT_5, 2, 0), SND_SOC_DAPM_DAC("DAC1L", NULL, WM8994_POWER_MANAGEMENT_5, 1, 0), SND_SOC_DAPM_DAC("DAC1R", NULL, WM8994_POWER_MANAGEMENT_5, 0, 0), }; @@ -3325,6 +3325,12 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec) case WM8958: snd_soc_add_controls(codec, wm8958_snd_controls, ARRAY_SIZE(wm8958_snd_controls)); + snd_soc_dapm_new_controls(dapm, wm8994_lateclk_widgets, + ARRAY_SIZE(wm8994_lateclk_widgets)); + snd_soc_dapm_new_controls(dapm, wm8994_adc_widgets, + ARRAY_SIZE(wm8994_adc_widgets)); + snd_soc_dapm_new_controls(dapm, wm8994_dac_widgets, + ARRAY_SIZE(wm8994_dac_widgets)); snd_soc_dapm_new_controls(dapm, wm8958_dapm_widgets, ARRAY_SIZE(wm8958_dapm_widgets)); break; @@ -3350,6 +3356,8 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec) } break; case WM8958: + snd_soc_dapm_add_routes(dapm, wm8994_lateclk_intercon, + ARRAY_SIZE(wm8994_lateclk_intercon)); snd_soc_dapm_add_routes(dapm, wm8958_intercon, ARRAY_SIZE(wm8958_intercon)); break; diff --git a/sound/soc/omap/am3517evm.c b/sound/soc/omap/am3517evm.c index 161750443ebc..73dde4a1adc3 100644 --- a/sound/soc/omap/am3517evm.c +++ b/sound/soc/omap/am3517evm.c @@ -139,7 +139,7 @@ static struct snd_soc_dai_link am3517evm_dai = { .cpu_dai_name ="omap-mcbsp-dai.0", .codec_dai_name = "tlv320aic23-hifi", .platform_name = "omap-pcm-audio", - .codec_name = "tlv320aic23-codec", + .codec_name = "tlv320aic23-codec.2-001a", .init = am3517evm_aic23_init, .ops = &am3517evm_ops, }; diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 25e54230cc6a..1790f83ee665 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -941,7 +941,7 @@ static void dapm_seq_run(struct snd_soc_dapm_context *dapm, } if (!list_empty(&pending)) - dapm_seq_run_coalesced(dapm, &pending); + dapm_seq_run_coalesced(cur_dapm, &pending); } static void dapm_widget_update(struct snd_soc_dapm_context *dapm) |