From 4c82fd0abb87e20d0d68ef5237e74732352806c8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Dec 2017 12:08:33 +0100 Subject: netfilter: uapi: correct UNTRACKED conntrack state bit number nft_ct exposes this bit to userspace. This used to be #define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_NUMBER + 1)) (IP_CT_NUMBER is 5, so this was 0x40) .. but this got changed to 8 (0x100) when the untracked object got removed. Replace this with a literal 6 to prevent further incompatible changes in case IP_CT_NUMBER ever increases. Fixes: cc41c84b7e7f2 ("netfilter: kill the fake untracked conntrack objects") Reported-by: Li Shuang Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 3fea7709a441..57ccfb32e87f 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -36,7 +36,7 @@ enum ip_conntrack_info { #define NF_CT_STATE_INVALID_BIT (1 << 0) #define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1)) -#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_UNTRACKED + 1)) +#define NF_CT_STATE_UNTRACKED_BIT (1 << 6) /* Bitset representing status of connection. */ enum ip_conntrack_status { -- cgit v1.2.3 From af955bf15d2c27496b0269b1f05c26f758c68314 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 19 Dec 2017 10:27:24 +0000 Subject: crypto: af_alg - Fix race around ctx->rcvused by making it atomic_t This variable was increased and decreased without any protection. Result was an occasional misscount and negative wrap around resulting in false resource allocation failures. Fixes: 7d2c3f54e6f6 ("crypto: af_alg - remove locking in async callback") Signed-off-by: Jonathan Cameron Reviewed-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/af_alg.c | 4 ++-- crypto/algif_aead.c | 2 +- crypto/algif_skcipher.c | 2 +- include/crypto/if_alg.h | 5 +++-- 4 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/crypto/af_alg.c b/crypto/af_alg.c index f1a2caf1b59b..d3f1c431724b 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -664,7 +664,7 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq) unsigned int i; list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) { - ctx->rcvused -= rsgl->sg_num_bytes; + atomic_sub(rsgl->sg_num_bytes, &ctx->rcvused); af_alg_free_sg(&rsgl->sgl); list_del(&rsgl->list); if (rsgl != &areq->first_rsgl) @@ -1162,7 +1162,7 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, areq->last_rsgl = rsgl; len += err; - ctx->rcvused += err; + atomic_add(err, &ctx->rcvused); rsgl->sg_num_bytes = err; iov_iter_advance(&msg->msg_iter, err); } diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index b73db2b27656..20df8c1b6851 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -571,7 +571,7 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk) INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; ctx->used = 0; - ctx->rcvused = 0; + atomic_set(&ctx->rcvused, 0); ctx->more = 0; ctx->merge = 0; ctx->enc = 0; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index baef9bfccdda..c5c47b680152 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -390,7 +390,7 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk) INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; ctx->used = 0; - ctx->rcvused = 0; + atomic_set(&ctx->rcvused, 0); ctx->more = 0; ctx->merge = 0; ctx->enc = 0; diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 38d9c5861ed8..f38227a78eae 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -150,7 +151,7 @@ struct af_alg_ctx { struct crypto_wait wait; size_t used; - size_t rcvused; + atomic_t rcvused; bool more; bool merge; @@ -215,7 +216,7 @@ static inline int af_alg_rcvbuf(struct sock *sk) struct af_alg_ctx *ctx = ask->private; return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) - - ctx->rcvused, 0); + atomic_read(&ctx->rcvused), 0); } /** -- cgit v1.2.3 From aa8c6248f8c75acfd610fe15d8cae23cf70d9d09 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:07:36 +0100 Subject: x86/mm/pti: Add infrastructure for page table isolation Add the initial files for kernel page table isolation, with a minimal init function and the boot time detection for this misfeature. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- Documentation/admin-guide/kernel-parameters.txt | 2 + arch/x86/boot/compressed/pagetable.c | 3 + arch/x86/entry/calling.h | 7 +++ arch/x86/include/asm/pti.h | 14 +++++ arch/x86/mm/Makefile | 7 ++- arch/x86/mm/init.c | 2 + arch/x86/mm/pti.c | 84 +++++++++++++++++++++++++ include/linux/pti.h | 11 ++++ init/main.c | 3 + 9 files changed, 130 insertions(+), 3 deletions(-) create mode 100644 arch/x86/include/asm/pti.h create mode 100644 arch/x86/mm/pti.c create mode 100644 include/linux/pti.h (limited to 'include') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 05496622b4ef..5dfd26265484 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2685,6 +2685,8 @@ steal time is computed, but won't influence scheduler behaviour + nopti [X86-64] Disable kernel page table isolation + nolapic [X86-32,APIC] Do not enable or use the local APIC. nolapic_timer [X86-32,APIC] Do not use the local APIC timer. diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index 972319ff5b01..e691ff734cb5 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -23,6 +23,9 @@ */ #undef CONFIG_AMD_MEM_ENCRYPT +/* No PAGE_TABLE_ISOLATION support needed either: */ +#undef CONFIG_PAGE_TABLE_ISOLATION + #include "misc.h" /* These actually do the work of building the kernel identity maps. */ diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index a9d17a7686ab..3d3389a92c33 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -205,18 +205,23 @@ For 32-bit we have the following conventions - kernel is built with .endm .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI mov %cr3, \scratch_reg ADJUST_KERNEL_CR3 \scratch_reg mov \scratch_reg, %cr3 +.Lend_\@: .endm .macro SWITCH_TO_USER_CR3 scratch_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI mov %cr3, \scratch_reg ADJUST_USER_CR3 \scratch_reg mov \scratch_reg, %cr3 +.Lend_\@: .endm .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req + ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI movq %cr3, \scratch_reg movq \scratch_reg, \save_reg /* @@ -233,11 +238,13 @@ For 32-bit we have the following conventions - kernel is built with .endm .macro RESTORE_CR3 save_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI /* * The CR3 write could be avoided when not changing its value, * but would require a CR3 read *and* a scratch register. */ movq \save_reg, %cr3 +.Lend_\@: .endm #else /* CONFIG_PAGE_TABLE_ISOLATION=n: */ diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h new file mode 100644 index 000000000000..0b5ef05b2d2d --- /dev/null +++ b/arch/x86/include/asm/pti.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _ASM_X86_PTI_H +#define _ASM_X86_PTI_H +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +extern void pti_init(void); +extern void pti_check_boottime_disable(void); +#else +static inline void pti_check_boottime_disable(void) { } +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_PTI_H */ diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 2e0017af8f9b..52906808e277 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -43,9 +43,10 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o obj-$(CONFIG_ACPI_NUMA) += srat.o obj-$(CONFIG_NUMA_EMU) += numa_emulation.o -obj-$(CONFIG_X86_INTEL_MPX) += mpx.o -obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o -obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o +obj-$(CONFIG_X86_INTEL_MPX) += mpx.o +obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o +obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o +obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 020223420308..af75069fb116 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -20,6 +20,7 @@ #include #include #include +#include /* * We need to define the tracepoints somewhere, and tlb.c @@ -630,6 +631,7 @@ void __init init_mem_mapping(void) { unsigned long end; + pti_check_boottime_disable(); probe_page_size_mask(); setup_pcid(); diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c new file mode 100644 index 000000000000..375f23a758bc --- /dev/null +++ b/arch/x86/mm/pti.c @@ -0,0 +1,84 @@ +/* + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * This code is based in part on work published here: + * + * https://github.com/IAIK/KAISER + * + * The original work was written by and and signed off by for the Linux + * kernel by: + * + * Signed-off-by: Richard Fellner + * Signed-off-by: Moritz Lipp + * Signed-off-by: Daniel Gruss + * Signed-off-by: Michael Schwarz + * + * Major changes to the original code by: Dave Hansen + * Mostly rewritten by Thomas Gleixner and + * Andy Lutomirsky + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#undef pr_fmt +#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt + +static void __init pti_print_if_insecure(const char *reason) +{ + if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) + pr_info("%s\n", reason); +} + +void __init pti_check_boottime_disable(void) +{ + if (hypervisor_is_type(X86_HYPER_XEN_PV)) { + pti_print_if_insecure("disabled on XEN PV."); + return; + } + + if (cmdline_find_option_bool(boot_command_line, "nopti")) { + pti_print_if_insecure("disabled on command line."); + return; + } + + if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) + return; + + setup_force_cpu_cap(X86_FEATURE_PTI); +} + +/* + * Initialize kernel page table isolation + */ +void __init pti_init(void) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + pr_info("enabled\n"); +} diff --git a/include/linux/pti.h b/include/linux/pti.h new file mode 100644 index 000000000000..0174883a935a --- /dev/null +++ b/include/linux/pti.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _INCLUDE_PTI_H +#define _INCLUDE_PTI_H + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +#include +#else +static inline void pti_init(void) { } +#endif + +#endif diff --git a/init/main.c b/init/main.c index 8a390f60ec81..b32ec72cdf3d 100644 --- a/init/main.c +++ b/init/main.c @@ -75,6 +75,7 @@ #include #include #include +#include #include #include #include @@ -506,6 +507,8 @@ static void __init mm_init(void) ioremap_huge_init(); /* Should be run before the first non-init thread is created */ init_espfix_bsp(); + /* Should be run after espfix64 is set up. */ + pti_init(); } asmlinkage __visible void __init start_kernel(void) -- cgit v1.2.3 From 39c3fd58952d7599d367c84c1330b785d91d6088 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 2 Dec 2017 18:11:04 +0100 Subject: kernel/irq: Extend lockdep class for request mutex The IRQ code already has support for lockdep class for the lock mutex in an interrupt descriptor. Extend this to add a second class for the request mutex in the descriptor. Not having a class is resulting in false positive splats in some code paths. Signed-off-by: Andrew Lunn Signed-off-by: Thomas Gleixner Acked-by: linus.walleij@linaro.org Cc: grygorii.strashko@ti.com Cc: f.fainelli@gmail.com Link: https://lkml.kernel.org/r/1512234664-21555-1-git-send-email-andrew@lunn.ch --- arch/powerpc/sysdev/fsl_msi.c | 4 +++- drivers/gpio/gpio-bcm-kona.c | 3 ++- drivers/gpio/gpio-brcmstb.c | 4 +++- drivers/gpio/gpio-tegra.c | 4 +++- drivers/gpio/gpiolib.c | 27 ++++++++++++++++--------- drivers/irqchip/irq-renesas-intc-irqpin.c | 6 +++++- drivers/mfd/arizona-irq.c | 4 +++- drivers/pinctrl/pinctrl-single.c | 5 ++++- include/linux/gpio/driver.h | 33 ++++++++++++++++++++----------- include/linux/irqdesc.h | 9 ++++++--- kernel/irq/generic-chip.c | 11 +++++++---- 11 files changed, 75 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 44cbf4c12ea1..df95102e732c 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev) } static struct lock_class_key fsl_msi_irq_class; +static struct lock_class_key fsl_msi_irq_request_class; static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, int offset, int irq_index) @@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, dev_err(&dev->dev, "No memory for MSI cascade data\n"); return -ENOMEM; } - irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class); + irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class, + &fsl_msi_irq_request_class); cascade_data->index = offset; cascade_data->msi_data = msi; cascade_data->virq = virt_msir; diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index dfcf56ee3c61..76861a00bb92 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -522,6 +522,7 @@ static struct of_device_id const bcm_kona_gpio_of_match[] = { * category than their parents, so it won't report false recursion. */ static struct lock_class_key gpio_lock_class; +static struct lock_class_key gpio_request_class; static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) @@ -531,7 +532,7 @@ static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq, ret = irq_set_chip_data(irq, d->host_data); if (ret < 0) return ret; - irq_set_lockdep_class(irq, &gpio_lock_class); + irq_set_lockdep_class(irq, &gpio_lock_class, &gpio_request_class); irq_set_chip_and_handler(irq, &bcm_gpio_irq_chip, handle_simple_irq); irq_set_noprobe(irq); diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c index 545d43a587b7..5b24801bffef 100644 --- a/drivers/gpio/gpio-brcmstb.c +++ b/drivers/gpio/gpio-brcmstb.c @@ -327,6 +327,7 @@ static struct brcmstb_gpio_bank *brcmstb_gpio_hwirq_to_bank( * category than their parents, so it won't report false recursion. */ static struct lock_class_key brcmstb_gpio_irq_lock_class; +static struct lock_class_key brcmstb_gpio_irq_request_class; static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq, @@ -346,7 +347,8 @@ static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq, ret = irq_set_chip_data(irq, &bank->gc); if (ret < 0) return ret; - irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class); + irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class, + &brcmstb_gpio_irq_lock_class); irq_set_chip_and_handler(irq, &priv->irq_chip, handle_level_irq); irq_set_noprobe(irq); return 0; diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index 8db47f671708..02fa8fe2292a 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -565,6 +565,7 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = { * than their parents, so it won't report false recursion. */ static struct lock_class_key gpio_lock_class; +static struct lock_class_key gpio_request_class; static int tegra_gpio_probe(struct platform_device *pdev) { @@ -670,7 +671,8 @@ static int tegra_gpio_probe(struct platform_device *pdev) bank = &tgi->bank_info[GPIO_BANK(gpio)]; - irq_set_lockdep_class(irq, &gpio_lock_class); + irq_set_lockdep_class(irq, &gpio_lock_class, + &gpio_request_class); irq_set_chip_data(irq, bank); irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq); } diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index aad84a6306c4..44332b793718 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -73,7 +73,8 @@ LIST_HEAD(gpio_devices); static void gpiochip_free_hogs(struct gpio_chip *chip); static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip); static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip); static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip); @@ -1100,7 +1101,8 @@ static void gpiochip_setup_devs(void) } int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, - struct lock_class_key *key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { unsigned long flags; int status = 0; @@ -1246,7 +1248,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, if (status) goto err_remove_from_list; - status = gpiochip_add_irqchip(chip, key); + status = gpiochip_add_irqchip(chip, lock_key, request_key); if (status) goto err_remove_chip; @@ -1632,7 +1634,7 @@ int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, * This lock class tells lockdep that GPIO irqs are in a different * category than their parents, so it won't report false recursion. */ - irq_set_lockdep_class(irq, chip->irq.lock_key); + irq_set_lockdep_class(irq, chip->irq.lock_key, chip->irq.request_key); irq_set_chip_and_handler(irq, chip->irq.chip, chip->irq.handler); /* Chips that use nested thread handlers have them marked */ if (chip->irq.threaded) @@ -1712,10 +1714,12 @@ static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset) /** * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip * @gpiochip: the GPIO chip to add the IRQ chip to - * @lock_key: lockdep class + * @lock_key: lockdep class for IRQ lock + * @request_key: lockdep class for IRQ request */ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *lock_key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { struct irq_chip *irqchip = gpiochip->irq.chip; const struct irq_domain_ops *ops; @@ -1753,6 +1757,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, gpiochip->to_irq = gpiochip_to_irq; gpiochip->irq.default_type = type; gpiochip->irq.lock_key = lock_key; + gpiochip->irq.request_key = request_key; if (gpiochip->irq.domain_ops) ops = gpiochip->irq.domain_ops; @@ -1850,7 +1855,8 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip) * @type: the default type for IRQs on this irqchip, pass IRQ_TYPE_NONE * to have the core avoid setting up any default type in the hardware. * @threaded: whether this irqchip uses a nested thread handler - * @lock_key: lockdep class + * @lock_key: lockdep class for IRQ lock + * @request_key: lockdep class for IRQ request * * This function closely associates a certain irqchip with a certain * gpiochip, providing an irq domain to translate the local IRQs to @@ -1872,7 +1878,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type, bool threaded, - struct lock_class_key *lock_key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { struct device_node *of_node; @@ -1913,6 +1920,7 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, gpiochip->irq.default_type = type; gpiochip->to_irq = gpiochip_to_irq; gpiochip->irq.lock_key = lock_key; + gpiochip->irq.request_key = request_key; gpiochip->irq.domain = irq_domain_add_simple(of_node, gpiochip->ngpio, first_irq, &gpiochip_domain_ops, gpiochip); @@ -1940,7 +1948,8 @@ EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_key); #else /* CONFIG_GPIOLIB_IRQCHIP */ static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { return 0; } diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c index 06f29cf5018a..cee59fe1321c 100644 --- a/drivers/irqchip/irq-renesas-intc-irqpin.c +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -342,6 +342,9 @@ static irqreturn_t intc_irqpin_shared_irq_handler(int irq, void *dev_id) */ static struct lock_class_key intc_irqpin_irq_lock_class; +/* And this is for the request mutex */ +static struct lock_class_key intc_irqpin_irq_request_class; + static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { @@ -352,7 +355,8 @@ static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, intc_irqpin_dbg(&p->irq[hw], "map"); irq_set_chip_data(virq, h->host_data); - irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class); + irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class, + &intc_irqpin_irq_request_class); irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); return 0; } diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c index 09cf3699e354..a307832d7e45 100644 --- a/drivers/mfd/arizona-irq.c +++ b/drivers/mfd/arizona-irq.c @@ -184,6 +184,7 @@ static struct irq_chip arizona_irq_chip = { }; static struct lock_class_key arizona_irq_lock_class; +static struct lock_class_key arizona_irq_request_class; static int arizona_irq_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) @@ -191,7 +192,8 @@ static int arizona_irq_map(struct irq_domain *h, unsigned int virq, struct arizona *data = h->host_data; irq_set_chip_data(virq, data); - irq_set_lockdep_class(virq, &arizona_irq_lock_class); + irq_set_lockdep_class(virq, &arizona_irq_lock_class, + &arizona_irq_request_class); irq_set_chip_and_handler(virq, &arizona_irq_chip, handle_simple_irq); irq_set_nested_thread(virq, 1); irq_set_noprobe(virq); diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index e6cd8de793e2..3501491e5bfc 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -222,6 +222,9 @@ static enum pin_config_param pcs_bias[] = { */ static struct lock_class_key pcs_lock_class; +/* Class for the IRQ request mutex */ +static struct lock_class_key pcs_request_class; + /* * REVISIT: Reads and writes could eventually use regmap or something * generic. But at least on omaps, some mux registers are performance @@ -1486,7 +1489,7 @@ static int pcs_irqdomain_map(struct irq_domain *d, unsigned int irq, irq_set_chip_data(irq, pcs_soc); irq_set_chip_and_handler(irq, &pcs->chip, handle_level_irq); - irq_set_lockdep_class(irq, &pcs_lock_class); + irq_set_lockdep_class(irq, &pcs_lock_class, &pcs_request_class); irq_set_noprobe(irq); return 0; diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 55e672592fa9..7258cd676df4 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -66,9 +66,10 @@ struct gpio_irq_chip { /** * @lock_key: * - * Per GPIO IRQ chip lockdep class. + * Per GPIO IRQ chip lockdep classes. */ struct lock_class_key *lock_key; + struct lock_class_key *request_key; /** * @parent_handler: @@ -323,7 +324,8 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip, /* add/remove chips */ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, - struct lock_class_key *lock_key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); /** * gpiochip_add_data() - register a gpio_chip @@ -350,11 +352,13 @@ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, */ #ifdef CONFIG_LOCKDEP #define gpiochip_add_data(chip, data) ({ \ - static struct lock_class_key key; \ - gpiochip_add_data_with_key(chip, data, &key); \ + static struct lock_class_key lock_key; \ + static struct lock_class_key request_key; \ + gpiochip_add_data_with_key(chip, data, &lock_key, \ + &request_key); \ }) #else -#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL) +#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL, NULL) #endif static inline int gpiochip_add(struct gpio_chip *chip) @@ -429,7 +433,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type, bool threaded, - struct lock_class_key *lock_key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); #ifdef CONFIG_LOCKDEP @@ -445,10 +450,12 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type) { - static struct lock_class_key key; + static struct lock_class_key lock_key; + static struct lock_class_key request_key; return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, false, &key); + handler, type, false, + &lock_key, &request_key); } static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, @@ -458,10 +465,12 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, unsigned int type) { - static struct lock_class_key key; + static struct lock_class_key lock_key; + static struct lock_class_key request_key; return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, true, &key); + handler, type, true, + &lock_key, &request_key); } #else static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, @@ -471,7 +480,7 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, unsigned int type) { return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, false, NULL); + handler, type, false, NULL, NULL); } static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, @@ -481,7 +490,7 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, unsigned int type) { return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, true, NULL); + handler, type, true, NULL, NULL); } #endif /* CONFIG_LOCKDEP */ diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 39fb3700f7a9..25b33b664537 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -255,12 +255,15 @@ static inline bool irq_is_percpu_devid(unsigned int irq) } static inline void -irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class) +irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, + struct lock_class_key *request_class) { struct irq_desc *desc = irq_to_desc(irq); - if (desc) - lockdep_set_class(&desc->lock, class); + if (desc) { + lockdep_set_class(&desc->lock, lock_class); + lockdep_set_class(&desc->request_mutex, request_class); + } } #ifdef CONFIG_IRQ_PREFLOW_FASTEOI diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index c26c5bb6b491..508c03dfef25 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -364,10 +364,11 @@ irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq) EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip); /* - * Separate lockdep class for interrupt chip which can nest irq_desc - * lock. + * Separate lockdep classes for interrupt chip which can nest irq_desc + * lock and request mutex. */ static struct lock_class_key irq_nested_lock_class; +static struct lock_class_key irq_nested_request_class; /* * irq_map_generic_chip - Map a generic chip for an irq domain @@ -409,7 +410,8 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, set_bit(idx, &gc->installed); if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK) - irq_set_lockdep_class(virq, &irq_nested_lock_class); + irq_set_lockdep_class(virq, &irq_nested_lock_class, + &irq_nested_request_class); if (chip->irq_calc_mask) chip->irq_calc_mask(data); @@ -479,7 +481,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, continue; if (flags & IRQ_GC_INIT_NESTED_LOCK) - irq_set_lockdep_class(i, &irq_nested_lock_class); + irq_set_lockdep_class(i, &irq_nested_lock_class, + &irq_nested_request_class); if (!(flags & IRQ_GC_NO_MASK)) { struct irq_data *d = irq_get_irq_data(i); -- cgit v1.2.3 From 466a2b42d67644447a1765276259a3ea5531ddff Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 21 Dec 2017 02:22:45 +0100 Subject: cpufreq: schedutil: Use idle_calls counter of the remote CPU Since the recent remote cpufreq callback work, its possible that a cpufreq update is triggered from a remote CPU. For single policies however, the current code uses the local CPU when trying to determine if the remote sg_cpu entered idle or is busy. This is incorrect. To remedy this, compare with the nohz tick idle_calls counter of the remote CPU. Fixes: 674e75411fc2 (sched: cpufreq: Allow remote cpufreq callbacks) Acked-by: Viresh Kumar Acked-by: Peter Zijlstra (Intel) Signed-off-by: Joel Fernandes Cc: 4.14+ # 4.14+ Signed-off-by: Rafael J. Wysocki --- include/linux/tick.h | 1 + kernel/sched/cpufreq_schedutil.c | 2 +- kernel/time/tick-sched.c | 13 +++++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tick.h b/include/linux/tick.h index f442d1a42025..7cc35921218e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -119,6 +119,7 @@ extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern unsigned long tick_nohz_get_idle_calls(void); +extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); #else /* !CONFIG_NO_HZ_COMMON */ diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 2f52ec0f1539..d6717a3331a1 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -244,7 +244,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, #ifdef CONFIG_NO_HZ_COMMON static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { - unsigned long idle_calls = tick_nohz_get_idle_calls(); + unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu); bool ret = idle_calls == sg_cpu->saved_idle_calls; sg_cpu->saved_idle_calls = idle_calls; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 99578f06c8d4..77555faf6fbc 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -985,6 +985,19 @@ ktime_t tick_nohz_get_sleep_length(void) return ts->sleep_length; } +/** + * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value + * for a particular CPU. + * + * Called from the schedutil frequency scaling governor in scheduler context. + */ +unsigned long tick_nohz_get_idle_calls_cpu(int cpu) +{ + struct tick_sched *ts = tick_get_tick_sched(cpu); + + return ts->idle_calls; +} + /** * tick_nohz_get_idle_calls - return the current idle calls counter value * -- cgit v1.2.3 From 69790ba92b8d67eaee5e50b30a5b696d40664caf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Dec 2017 16:44:34 +0100 Subject: genirq: Introduce IRQD_CAN_RESERVE flag Add a new flag to mark interrupts which can use reservation mode. This is going to be used in subsequent patches to disable reservation mode for a certain class of MSI devices. Signed-off-by: Thomas Gleixner Tested-by: Alexandru Chirvasitu Tested-by: Andy Shevchenko Cc: Dou Liyang Cc: Pavel Machek Cc: Maciej W. Rozycki Cc: Mikael Pettersson Cc: Josh Poulson Cc: Mihai Costache Cc: Stephen Hemminger Cc: Marc Zyngier Cc: linux-pci@vger.kernel.org Cc: Haiyang Zhang Cc: Dexuan Cui Cc: Simon Xiao Cc: Saeed Mahameed Cc: Jork Loeser Cc: Bjorn Helgaas Cc: devel@linuxdriverproject.org Cc: KY Srinivasan Cc: Alan Cox Cc: Sakari Ailus , Cc: linux-media@vger.kernel.org --- include/linux/irq.h | 17 +++++++++++++++++ kernel/irq/debugfs.c | 1 + 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index e140f69163b6..a0231e96a578 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -212,6 +212,7 @@ struct irq_data { * mask. Applies only to affinity managed irqs. * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set + * IRQD_CAN_RESERVE - Can use reservation mode */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -233,6 +234,7 @@ enum { IRQD_MANAGED_SHUTDOWN = (1 << 23), IRQD_SINGLE_TARGET = (1 << 24), IRQD_DEFAULT_TRIGGER_SET = (1 << 25), + IRQD_CAN_RESERVE = (1 << 26), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -377,6 +379,21 @@ static inline bool irqd_is_managed_and_shutdown(struct irq_data *d) return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN; } +static inline void irqd_set_can_reserve(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_CAN_RESERVE; +} + +static inline void irqd_clr_can_reserve(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_CAN_RESERVE; +} + +static inline bool irqd_can_reserve(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_CAN_RESERVE; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 7f608ac39653..acfaaef8672a 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -113,6 +113,7 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING), BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), + BIT_MASK_DESCR(IRQD_CAN_RESERVE), BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), -- cgit v1.2.3 From 702cb0a02813299d6911b775c637906ae21b737d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Dec 2017 16:59:06 +0100 Subject: genirq/irqdomain: Rename early argument of irq_domain_activate_irq() The 'early' argument of irq_domain_activate_irq() is actually used to denote reservation mode. To avoid confusion, rename it before abuse happens. No functional change. Fixes: 72491643469a ("genirq/irqdomain: Update irq_domain_ops.activate() signature") Signed-off-by: Thomas Gleixner Cc: Alexandru Chirvasitu Cc: Andy Shevchenko Cc: Dou Liyang Cc: Pavel Machek Cc: Maciej W. Rozycki Cc: Mikael Pettersson Cc: Josh Poulson Cc: Mihai Costache Cc: Stephen Hemminger Cc: Marc Zyngier Cc: linux-pci@vger.kernel.org Cc: Haiyang Zhang Cc: Dexuan Cui Cc: Simon Xiao Cc: Saeed Mahameed Cc: Jork Loeser Cc: Bjorn Helgaas Cc: devel@linuxdriverproject.org Cc: KY Srinivasan Cc: Alan Cox Cc: Sakari Ailus , Cc: linux-media@vger.kernel.org --- arch/x86/include/asm/irqdomain.h | 2 +- arch/x86/include/asm/trace/irq_vectors.h | 16 ++++++++-------- arch/x86/kernel/apic/io_apic.c | 2 +- arch/x86/kernel/apic/vector.c | 6 +++--- arch/x86/platform/uv/uv_irq.c | 2 +- drivers/gpio/gpio-xgene-sb.c | 2 +- drivers/iommu/amd_iommu.c | 2 +- drivers/iommu/intel_irq_remapping.c | 2 +- drivers/irqchip/irq-gic-v3-its.c | 4 ++-- drivers/pinctrl/stm32/pinctrl-stm32.c | 2 +- include/linux/irqdomain.h | 2 +- kernel/irq/internals.h | 2 +- kernel/irq/irqdomain.c | 13 +++++++------ 13 files changed, 29 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h index 139feef467f7..c066ffae222b 100644 --- a/arch/x86/include/asm/irqdomain.h +++ b/arch/x86/include/asm/irqdomain.h @@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); extern int mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early); + struct irq_data *irq_data, bool reserve); extern void mp_irqdomain_deactivate(struct irq_domain *domain, struct irq_data *irq_data); extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 84b9ec0c1bc0..22647a642e98 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -283,34 +283,34 @@ TRACE_EVENT(vector_alloc_managed, DECLARE_EVENT_CLASS(vector_activate, TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve, - bool early), + bool reserve), - TP_ARGS(irq, is_managed, can_reserve, early), + TP_ARGS(irq, is_managed, can_reserve, reserve), TP_STRUCT__entry( __field( unsigned int, irq ) __field( bool, is_managed ) __field( bool, can_reserve ) - __field( bool, early ) + __field( bool, reserve ) ), TP_fast_assign( __entry->irq = irq; __entry->is_managed = is_managed; __entry->can_reserve = can_reserve; - __entry->early = early; + __entry->reserve = reserve; ), - TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d", + TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d", __entry->irq, __entry->is_managed, __entry->can_reserve, - __entry->early) + __entry->reserve) ); #define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \ DEFINE_EVENT_FN(vector_activate, name, \ TP_PROTO(unsigned int irq, bool is_managed, \ - bool can_reserve, bool early), \ - TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL); \ + bool can_reserve, bool reserve), \ + TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL); \ DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate); DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 201579dc5242..8a7963421460 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2988,7 +2988,7 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, } int mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { unsigned long flags; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 1e969dba0476..52c85c8147e9 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -399,21 +399,21 @@ static int activate_managed(struct irq_data *irqd) } static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, - bool early) + bool reserve) { struct apic_chip_data *apicd = apic_chip_data(irqd); unsigned long flags; int ret = 0; trace_vector_activate(irqd->irq, apicd->is_managed, - apicd->can_reserve, early); + apicd->can_reserve, reserve); /* Nothing to do for fixed assigned vectors */ if (!apicd->can_reserve && !apicd->is_managed) return 0; raw_spin_lock_irqsave(&vector_lock, flags); - if (early || irqd_is_managed_and_shutdown(irqd)) + if (reserve || irqd_is_managed_and_shutdown(irqd)) vector_assign_managed_shutdown(irqd); else if (apicd->is_managed) ret = activate_managed(irqd); diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 5f6fd860820a..e4cb9f4cde8a 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -128,7 +128,7 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq, * on the specified blade to allow the sending of MSIs to the specified CPU. */ static int uv_domain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); return 0; diff --git a/drivers/gpio/gpio-xgene-sb.c b/drivers/gpio/gpio-xgene-sb.c index 2313af82fad3..acd59113e08b 100644 --- a/drivers/gpio/gpio-xgene-sb.c +++ b/drivers/gpio/gpio-xgene-sb.c @@ -139,7 +139,7 @@ static int xgene_gpio_sb_to_irq(struct gpio_chip *gc, u32 gpio) static int xgene_gpio_sb_domain_activate(struct irq_domain *d, struct irq_data *irq_data, - bool early) + bool reserve) { struct xgene_gpio_sb *priv = d->host_data; u32 gpio = HWIRQ_TO_GPIO(priv, irq_data->hwirq); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 7d5eb004091d..97baf88d9505 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4184,7 +4184,7 @@ static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu, struct irq_cfg *cfg); static int irq_remapping_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { struct amd_ir_data *data = irq_data->chip_data; struct irq_2_irte *irte_info = &data->irq_2_irte; diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 76a193c7fcfc..66f69af2c219 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -1397,7 +1397,7 @@ static void intel_irq_remapping_free(struct irq_domain *domain, } static int intel_irq_remapping_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { intel_ir_reconfigure_irte(irq_data, true); return 0; diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 4039e64cd342..06f025fd5726 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2303,7 +2303,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, } static int its_irq_domain_activate(struct irq_domain *domain, - struct irq_data *d, bool early) + struct irq_data *d, bool reserve) { struct its_device *its_dev = irq_data_get_irq_chip_data(d); u32 event = its_get_event_id(d); @@ -2818,7 +2818,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq } static int its_vpe_irq_domain_activate(struct irq_domain *domain, - struct irq_data *d, bool early) + struct irq_data *d, bool reserve) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); struct its_node *its; diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index a276c61be217..e62ab087bfd8 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -290,7 +290,7 @@ static int stm32_gpio_domain_translate(struct irq_domain *d, } static int stm32_gpio_domain_activate(struct irq_domain *d, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { struct stm32_gpio_bank *bank = d->host_data; struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent); diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index a34355d19546..48c7e86bb556 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -113,7 +113,7 @@ struct irq_domain_ops { unsigned int nr_irqs, void *arg); void (*free)(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs); - int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool early); + int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve); void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *out_hwirq, unsigned int *out_type); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 07d08ca701ec..ab19371eab9b 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -440,7 +440,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) #endif /* !CONFIG_GENERIC_PENDING_IRQ */ #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) -static inline int irq_domain_activate_irq(struct irq_data *data, bool early) +static inline int irq_domain_activate_irq(struct irq_data *data, bool reserve) { irqd_set_activated(data); return 0; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 4f4f60015e8a..62068ad46930 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1693,7 +1693,7 @@ static void __irq_domain_deactivate_irq(struct irq_data *irq_data) } } -static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) +static int __irq_domain_activate_irq(struct irq_data *irqd, bool reserve) { int ret = 0; @@ -1702,9 +1702,9 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) if (irqd->parent_data) ret = __irq_domain_activate_irq(irqd->parent_data, - early); + reserve); if (!ret && domain->ops->activate) { - ret = domain->ops->activate(domain, irqd, early); + ret = domain->ops->activate(domain, irqd, reserve); /* Rollback in case of error */ if (ret && irqd->parent_data) __irq_domain_deactivate_irq(irqd->parent_data); @@ -1716,17 +1716,18 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) /** * irq_domain_activate_irq - Call domain_ops->activate recursively to activate * interrupt - * @irq_data: outermost irq_data associated with interrupt + * @irq_data: Outermost irq_data associated with interrupt + * @reserve: If set only reserve an interrupt vector instead of assigning one * * This is the second step to call domain_ops->activate to program interrupt * controllers, so the interrupt could actually get delivered. */ -int irq_domain_activate_irq(struct irq_data *irq_data, bool early) +int irq_domain_activate_irq(struct irq_data *irq_data, bool reserve) { int ret = 0; if (!irqd_is_activated(irq_data)) - ret = __irq_domain_activate_irq(irq_data, early); + ret = __irq_domain_activate_irq(irq_data, reserve); if (!ret) irqd_set_activated(irq_data); return ret; -- cgit v1.2.3 From 26456f87aca7157c057de65c9414b37f1ab881d1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 Dec 2017 21:37:25 +0100 Subject: timers: Reinitialize per cpu bases on hotplug The timer wheel bases are not (re)initialized on CPU hotplug. That leaves them with a potentially stale clk and next_expiry valuem, which can cause trouble then the CPU is plugged. Add a prepare callback which forwards the clock, sets next_expiry to far in the future and reset the control flags to a known state. Set base->must_forward_clk so the first timer which is queued will try to forward the clock to current jiffies. Fixes: 500462a9de65 ("timers: Switch to a non-cascading wheel") Reported-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Sebastian Siewior Cc: Anna-Maria Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1712272152200.2431@nanos --- include/linux/cpuhotplug.h | 2 +- include/linux/timer.h | 4 +++- kernel/cpu.c | 4 ++-- kernel/time/timer.c | 15 +++++++++++++++ 4 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 201ab7267986..1a32e558eb11 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -86,7 +86,7 @@ enum cpuhp_state { CPUHP_MM_ZSWP_POOL_PREPARE, CPUHP_KVM_PPC_BOOK3S_PREPARE, CPUHP_ZCOMP_PREPARE, - CPUHP_TIMERS_DEAD, + CPUHP_TIMERS_PREPARE, CPUHP_MIPS_SOC_PREPARE, CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, diff --git a/include/linux/timer.h b/include/linux/timer.h index 04af640ea95b..2448f9cc48a3 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -207,9 +207,11 @@ unsigned long round_jiffies_up(unsigned long j); unsigned long round_jiffies_up_relative(unsigned long j); #ifdef CONFIG_HOTPLUG_CPU +int timers_prepare_cpu(unsigned int cpu); int timers_dead_cpu(unsigned int cpu); #else -#define timers_dead_cpu NULL +#define timers_prepare_cpu NULL +#define timers_dead_cpu NULL #endif #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 41376c3ac93b..97858477e586 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1277,9 +1277,9 @@ static struct cpuhp_step cpuhp_bp_states[] = { * before blk_mq_queue_reinit_notify() from notify_dead(), * otherwise a RCU stall occurs. */ - [CPUHP_TIMERS_DEAD] = { + [CPUHP_TIMERS_PREPARE] = { .name = "timers:dead", - .startup.single = NULL, + .startup.single = timers_prepare_cpu, .teardown.single = timers_dead_cpu, }, /* Kicks the plugged cpu into life */ diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 19a9c3da7698..6be576e02209 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1853,6 +1853,21 @@ static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *h } } +int timers_prepare_cpu(unsigned int cpu) +{ + struct timer_base *base; + int b; + + for (b = 0; b < NR_BASES; b++) { + base = per_cpu_ptr(&timer_bases[b], cpu); + base->clk = jiffies; + base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; + base->is_idle = false; + base->must_forward_clk = true; + } + return 0; +} + int timers_dead_cpu(unsigned int cpu) { struct timer_base *old_base; -- cgit v1.2.3 From 98801506552593c9b8ac11021b0cdad12cab4f6b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 10:02:19 +0000 Subject: fscache: Fix the default for fscache_maybe_release_page() Fix the default for fscache_maybe_release_page() for when the cookie isn't valid or the page isn't cached. It mustn't return false as that indicates the page cannot yet be freed. The problem with the default is that if, say, there's no cache, but a network filesystem's pages are using up almost all the available memory, a system can OOM because the filesystem ->releasepage() op will not allow them to be released as fscache_maybe_release_page() incorrectly prevents it. This can be tested by writing a sequence of 512MiB files to an AFS mount. It does not affect NFS or CIFS because both of those wrap the call in a check of PG_fscache and it shouldn't bother Ceph as that only has PG_private set whilst writeback is in progress. This might be an issue for 9P, however. Note that the pages aren't entirely stuck. Removing a file or unmounting will clear things because that uses ->invalidatepage() instead. Fixes: 201a15428bd5 ("FS-Cache: Handle pages pending storage that get evicted under OOM conditions") Reported-by: Marc Dionne Signed-off-by: David Howells Reviewed-by: Jeff Layton Acked-by: Al Viro Tested-by: Marc Dionne cc: stable@vger.kernel.org # 2.6.32+ --- include/linux/fscache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index f4ff47d4a893..fe0c349684fa 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -755,7 +755,7 @@ bool fscache_maybe_release_page(struct fscache_cookie *cookie, { if (fscache_cookie_valid(cookie) && PageFsCache(page)) return __fscache_maybe_release_page(cookie, page, gfp); - return false; + return true; } /** -- cgit v1.2.3 From f24c4d478013d82bd1b943df566fff3561d52864 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jan 2018 17:21:10 +0000 Subject: efi/capsule-loader: Reinstate virtual capsule mapping Commit: 82c3768b8d68 ("efi/capsule-loader: Use a cached copy of the capsule header") ... refactored the capsule loading code that maps the capsule header, to avoid having to map it several times. However, as it turns out, the vmap() call we ended up removing did not just map the header, but the entire capsule image, and dropping this virtual mapping breaks capsules that are processed by the firmware immediately (i.e., without a reboot). Unfortunately, that change was part of a larger refactor that allowed a quirk to be implemented for Quark, which has a non-standard memory layout for capsules, and we have slightly painted ourselves into a corner by allowing quirk code to mangle the capsule header and memory layout. So we need to fix this without breaking Quark. Fortunately, Quark does not appear to care about the virtual mapping, and so we can simply do a partial revert of commit: 2a457fb31df6 ("efi/capsule-loader: Use page addresses rather than struct page pointers") ... and create a vmap() mapping of the entire capsule (including header) based on the reinstated struct page array, unless running on Quark, in which case we pass the capsule header copy as before. Reported-by: Ge Song Tested-by: Bryan O'Donoghue Tested-by: Ge Song Signed-off-by: Ard Biesheuvel Cc: Cc: Dave Young Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 82c3768b8d68 ("efi/capsule-loader: Use a cached copy of the capsule header") Link: http://lkml.kernel.org/r/20180102172110.17018-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/quirks.c | 13 +++++++++- drivers/firmware/efi/capsule-loader.c | 45 ++++++++++++++++++++++++++++------- include/linux/efi.h | 4 +++- 3 files changed, 52 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 8a99a2e96537..5b513ccffde4 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -592,7 +592,18 @@ static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff, /* * Update the first page pointer to skip over the CSH header. */ - cap_info->pages[0] += csh->headersize; + cap_info->phys[0] += csh->headersize; + + /* + * cap_info->capsule should point at a virtual mapping of the entire + * capsule, starting at the capsule header. Our image has the Quark + * security header prepended, so we cannot rely on the default vmap() + * mapping created by the generic capsule code. + * Given that the Quark firmware does not appear to care about the + * virtual mapping, let's just point cap_info->capsule at our copy + * of the capsule header. + */ + cap_info->capsule = &cap_info->header; return 1; } diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index ec8ac5c4dd84..055e2e8f985a 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -20,10 +20,6 @@ #define NO_FURTHER_WRITE_ACTION -1 -#ifndef phys_to_page -#define phys_to_page(x) pfn_to_page((x) >> PAGE_SHIFT) -#endif - /** * efi_free_all_buff_pages - free all previous allocated buffer pages * @cap_info: pointer to current instance of capsule_info structure @@ -35,7 +31,7 @@ static void efi_free_all_buff_pages(struct capsule_info *cap_info) { while (cap_info->index > 0) - __free_page(phys_to_page(cap_info->pages[--cap_info->index])); + __free_page(cap_info->pages[--cap_info->index]); cap_info->index = NO_FURTHER_WRITE_ACTION; } @@ -71,6 +67,14 @@ int __efi_capsule_setup_info(struct capsule_info *cap_info) cap_info->pages = temp_page; + temp_page = krealloc(cap_info->phys, + pages_needed * sizeof(phys_addr_t *), + GFP_KERNEL | __GFP_ZERO); + if (!temp_page) + return -ENOMEM; + + cap_info->phys = temp_page; + return 0; } @@ -105,9 +109,24 @@ int __weak efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, **/ static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info) { + bool do_vunmap = false; int ret; - ret = efi_capsule_update(&cap_info->header, cap_info->pages); + /* + * cap_info->capsule may have been assigned already by a quirk + * handler, so only overwrite it if it is NULL + */ + if (!cap_info->capsule) { + cap_info->capsule = vmap(cap_info->pages, cap_info->index, + VM_MAP, PAGE_KERNEL); + if (!cap_info->capsule) + return -ENOMEM; + do_vunmap = true; + } + + ret = efi_capsule_update(cap_info->capsule, cap_info->phys); + if (do_vunmap) + vunmap(cap_info->capsule); if (ret) { pr_err("capsule update failed\n"); return ret; @@ -165,10 +184,12 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff, goto failed; } - cap_info->pages[cap_info->index++] = page_to_phys(page); + cap_info->pages[cap_info->index] = page; + cap_info->phys[cap_info->index] = page_to_phys(page); cap_info->page_bytes_remain = PAGE_SIZE; + cap_info->index++; } else { - page = phys_to_page(cap_info->pages[cap_info->index - 1]); + page = cap_info->pages[cap_info->index - 1]; } kbuff = kmap(page); @@ -252,6 +273,7 @@ static int efi_capsule_release(struct inode *inode, struct file *file) struct capsule_info *cap_info = file->private_data; kfree(cap_info->pages); + kfree(cap_info->phys); kfree(file->private_data); file->private_data = NULL; return 0; @@ -281,6 +303,13 @@ static int efi_capsule_open(struct inode *inode, struct file *file) return -ENOMEM; } + cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); + if (!cap_info->phys) { + kfree(cap_info->pages); + kfree(cap_info); + return -ENOMEM; + } + file->private_data = cap_info; return 0; diff --git a/include/linux/efi.h b/include/linux/efi.h index d813f7b04da7..29fdf8029cf6 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -140,11 +140,13 @@ struct efi_boot_memmap { struct capsule_info { efi_capsule_header_t header; + efi_capsule_header_t *capsule; int reset_type; long index; size_t count; size_t total_size; - phys_addr_t *pages; + struct page **pages; + phys_addr_t *phys; size_t page_bytes_remain; }; -- cgit v1.2.3 From c0bace798436bca0fdc221ff61143f1376a9c3de Mon Sep 17 00:00:00 2001 From: Felix Janda Date: Mon, 1 Jan 2018 19:33:20 +0100 Subject: uapi libc compat: add fallback for unsupported libcs libc-compat.h aims to prevent symbol collisions between uapi and libc headers for each supported libc. This requires continuous coordination between them. The goal of this commit is to improve the situation for libcs (such as musl) which are not yet supported and/or do not wish to be explicitly supported, while not affecting supported libcs. More precisely, with this commit, unsupported libcs can request the suppression of any specific uapi definition by defining the correspondings _UAPI_DEF_* macro as 0. This can fix symbol collisions for them, as long as the libc headers are included before the uapi headers. Inclusion in the other order is outside the scope of this commit. All infrastructure in order to enable this fallback for unsupported libcs is already in place, except that libc-compat.h unconditionally defines all _UAPI_DEF_* macros to 1 for all unsupported libcs so that any previous definitions are ignored. In order to fix this, this commit merely makes these definitions conditional. This commit together with the musl libc commit http://git.musl-libc.org/cgit/musl/commit/?id=04983f2272382af92eb8f8838964ff944fbb8258 fixes for example the following compiler errors when is included after musl's : ./linux/in6.h:32:8: error: redefinition of 'struct in6_addr' ./linux/in6.h:49:8: error: redefinition of 'struct sockaddr_in6' ./linux/in6.h:59:8: error: redefinition of 'struct ipv6_mreq' The comments referencing glibc are still correct, but this file is not only used for glibc any more. Signed-off-by: Felix Janda Reviewed-by: Hauke Mehrtens Signed-off-by: David S. Miller --- include/uapi/linux/libc-compat.h | 55 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index 282875cf8056..8254c937c9f4 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -168,46 +168,99 @@ /* If we did not see any headers from any supported C libraries, * or we are being included in the kernel, then define everything - * that we need. */ + * that we need. Check for previous __UAPI_* definitions to give + * unsupported C libraries a way to opt out of any kernel definition. */ #else /* !defined(__GLIBC__) */ /* Definitions for if.h */ +#ifndef __UAPI_DEF_IF_IFCONF #define __UAPI_DEF_IF_IFCONF 1 +#endif +#ifndef __UAPI_DEF_IF_IFMAP #define __UAPI_DEF_IF_IFMAP 1 +#endif +#ifndef __UAPI_DEF_IF_IFNAMSIZ #define __UAPI_DEF_IF_IFNAMSIZ 1 +#endif +#ifndef __UAPI_DEF_IF_IFREQ #define __UAPI_DEF_IF_IFREQ 1 +#endif /* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS #define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1 +#endif /* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO #define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 +#endif /* Definitions for in.h */ +#ifndef __UAPI_DEF_IN_ADDR #define __UAPI_DEF_IN_ADDR 1 +#endif +#ifndef __UAPI_DEF_IN_IPPROTO #define __UAPI_DEF_IN_IPPROTO 1 +#endif +#ifndef __UAPI_DEF_IN_PKTINFO #define __UAPI_DEF_IN_PKTINFO 1 +#endif +#ifndef __UAPI_DEF_IP_MREQ #define __UAPI_DEF_IP_MREQ 1 +#endif +#ifndef __UAPI_DEF_SOCKADDR_IN #define __UAPI_DEF_SOCKADDR_IN 1 +#endif +#ifndef __UAPI_DEF_IN_CLASS #define __UAPI_DEF_IN_CLASS 1 +#endif /* Definitions for in6.h */ +#ifndef __UAPI_DEF_IN6_ADDR #define __UAPI_DEF_IN6_ADDR 1 +#endif +#ifndef __UAPI_DEF_IN6_ADDR_ALT #define __UAPI_DEF_IN6_ADDR_ALT 1 +#endif +#ifndef __UAPI_DEF_SOCKADDR_IN6 #define __UAPI_DEF_SOCKADDR_IN6 1 +#endif +#ifndef __UAPI_DEF_IPV6_MREQ #define __UAPI_DEF_IPV6_MREQ 1 +#endif +#ifndef __UAPI_DEF_IPPROTO_V6 #define __UAPI_DEF_IPPROTO_V6 1 +#endif +#ifndef __UAPI_DEF_IPV6_OPTIONS #define __UAPI_DEF_IPV6_OPTIONS 1 +#endif +#ifndef __UAPI_DEF_IN6_PKTINFO #define __UAPI_DEF_IN6_PKTINFO 1 +#endif +#ifndef __UAPI_DEF_IP6_MTUINFO #define __UAPI_DEF_IP6_MTUINFO 1 +#endif /* Definitions for ipx.h */ +#ifndef __UAPI_DEF_SOCKADDR_IPX #define __UAPI_DEF_SOCKADDR_IPX 1 +#endif +#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION #define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 +#endif +#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION #define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 +#endif +#ifndef __UAPI_DEF_IPX_CONFIG_DATA #define __UAPI_DEF_IPX_CONFIG_DATA 1 +#endif +#ifndef __UAPI_DEF_IPX_ROUTE_DEF #define __UAPI_DEF_IPX_ROUTE_DEF 1 +#endif /* Definitions for xattr.h */ +#ifndef __UAPI_DEF_XATTR #define __UAPI_DEF_XATTR 1 +#endif #endif /* __GLIBC__ */ -- cgit v1.2.3 From f1c8d3720f2e6c8c2b209120678236debd0360e5 Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 2 Jan 2018 14:05:19 -0800 Subject: vxlan: trivial indenting fix. Fix indentation of reserved_flags2 field in vxlanhdr_gpe. Fixes: e1e5314de08b ("vxlan: implement GPE") Signed-off-by: William Tu Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/vxlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 13223396dc64..f96391e84a8a 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -146,7 +146,7 @@ struct vxlanhdr_gpe { np_applied:1, instance_applied:1, version:2, -reserved_flags2:2; + reserved_flags2:2; #elif defined(__BIG_ENDIAN_BITFIELD) u8 reserved_flags2:2, version:2, -- cgit v1.2.3 From 6926e041a8920c8ec27e4e155efa760aa01551fd Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Wed, 3 Jan 2018 23:14:21 +0100 Subject: uapi/if_ether.h: prevent redefinition of struct ethhdr Musl provides its own ethhdr struct definition. Add a guard to prevent its definition of the appropriate musl header has already been included. glibc does not implement this header, but when glibc will implement this they can just define __UAPI_DEF_ETHHDR 0 to make it work with the kernel. Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 3 +++ include/uapi/linux/libc-compat.h | 6 ++++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 3ee3bf7c8526..144de4d2f385 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -23,6 +23,7 @@ #define _UAPI_LINUX_IF_ETHER_H #include +#include /* * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble @@ -149,11 +150,13 @@ * This is an Ethernet frame header. */ +#if __UAPI_DEF_ETHHDR struct ethhdr { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ unsigned char h_source[ETH_ALEN]; /* source ether addr */ __be16 h_proto; /* packet type ID field */ } __attribute__((packed)); +#endif #endif /* _UAPI_LINUX_IF_ETHER_H */ diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index 8254c937c9f4..fc29efaa918c 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -264,4 +264,10 @@ #endif /* __GLIBC__ */ +/* Definitions for if_ether.h */ +/* allow libcs like musl to deactivate this, glibc does not implement this. */ +#ifndef __UAPI_DEF_ETHHDR +#define __UAPI_DEF_ETHHDR 1 +#endif + #endif /* _UAPI_LIBC_COMPAT_H */ -- cgit v1.2.3 From 040ee69226f8a96b7943645d68f41d5d44b5ff7d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 2 Dec 2017 20:20:38 -0500 Subject: fix "netfilter: xt_bpf: Fix XT_BPF_MODE_FD_PINNED mode of 'xt_bpf_info_v1'" Descriptor table is a shared object; it's not a place where you can stick temporary references to files, especially when we don't need an opened file at all. Cc: stable@vger.kernel.org # v4.14 Fixes: 98589a0998b8 ("netfilter: xt_bpf: Fix XT_BPF_MODE_FD_PINNED mode of 'xt_bpf_info_v1'") Signed-off-by: Al Viro --- include/linux/bpf.h | 10 ++++++++++ kernel/bpf/inode.c | 40 +++++++++++++++++++++++++++++++++++++++- kernel/bpf/syscall.c | 2 +- net/netfilter/xt_bpf.c | 14 ++------------ 4 files changed, 52 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e55e4255a210..b63a592ad29d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -419,6 +419,8 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) attr->numa_node : NUMA_NO_NODE; } +struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -506,6 +508,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, { return 0; } + +static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, + enum bpf_prog_type type) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, @@ -514,6 +522,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, return bpf_prog_get_type_dev(ufd, type, false); } +bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); + int bpf_prog_offload_compile(struct bpf_prog *prog); void bpf_prog_offload_destroy(struct bpf_prog *prog); diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 01aaef1a77c5..5bb5e49ef4c3 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -368,7 +368,45 @@ out: putname(pname); return ret; } -EXPORT_SYMBOL_GPL(bpf_obj_get_user); + +static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) +{ + struct bpf_prog *prog; + int ret = inode_permission(inode, MAY_READ | MAY_WRITE); + if (ret) + return ERR_PTR(ret); + + if (inode->i_op == &bpf_map_iops) + return ERR_PTR(-EINVAL); + if (inode->i_op != &bpf_prog_iops) + return ERR_PTR(-EACCES); + + prog = inode->i_private; + + ret = security_bpf_prog(prog); + if (ret < 0) + return ERR_PTR(ret); + + if (!bpf_prog_get_ok(prog, &type, false)) + return ERR_PTR(-EINVAL); + + return bpf_prog_inc(prog); +} + +struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) +{ + struct bpf_prog *prog; + struct path path; + int ret = kern_path(name, LOOKUP_FOLLOW, &path); + if (ret) + return ERR_PTR(ret); + prog = __get_prog_inode(d_backing_inode(path.dentry), type); + if (!IS_ERR(prog)) + touch_atime(&path); + path_put(&path); + return prog; +} +EXPORT_SYMBOL(bpf_prog_get_type_path); static void bpf_evict_inode(struct inode *inode) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2c4cfeaa8d5e..5cb783fc8224 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1057,7 +1057,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) } EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); -static bool bpf_prog_get_ok(struct bpf_prog *prog, +bool bpf_prog_get_ok(struct bpf_prog *prog, enum bpf_prog_type *attach_type, bool attach_drv) { /* not an attachment, just a refcount inc, always allow */ diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 041da0d9c06f..fa2ca0a13619 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -52,18 +52,8 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret) static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) { - mm_segment_t oldfs = get_fs(); - int retval, fd; - - set_fs(KERNEL_DS); - fd = bpf_obj_get_user(path, 0); - set_fs(oldfs); - if (fd < 0) - return fd; - - retval = __bpf_mt_check_fd(fd, ret); - sys_close(fd); - return retval; + *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER); + return PTR_ERR_OR_ZERO(*ret); } static int bpf_mt_check(const struct xt_mtchk_param *par) -- cgit v1.2.3 From 5133550296d43236439494aa955bfb765a89f615 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Thu, 4 Jan 2018 21:06:49 +0300 Subject: sh_eth: fix SH7757 GEther initialization Renesas SH7757 has 2 Fast and 2 Gigabit Ether controllers, while the 'sh_eth' driver can only reset and initialize TSU of the first controller pair. Shimoda-san tried to solve that adding the 'needs_init' member to the 'struct sh_eth_plat_data', however the platform code still never sets this flag. I think that we can infer this information from the 'devno' variable (set to 'platform_device::id') and reset/init the Ether controller pair only for an even 'devno'; therefore 'sh_eth_plat_data::needs_init' can be removed... Fixes: 150647fb2c31 ("net: sh_eth: change the condition of initialization") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 4 ++-- include/linux/sh_eth.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 1bdd67a8a869..f21c1db91c3f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3254,8 +3254,8 @@ static int sh_eth_drv_probe(struct platform_device *pdev) ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER; } - /* initialize first or needed device */ - if (!devno || pd->needs_init) { + /* Need to init only the first port of the two sharing a TSU */ + if (devno % 2 == 0) { if (mdp->cd->chip_reset) mdp->cd->chip_reset(ndev); diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h index ff3642d267f7..94081e9a5010 100644 --- a/include/linux/sh_eth.h +++ b/include/linux/sh_eth.h @@ -17,7 +17,6 @@ struct sh_eth_plat_data { unsigned char mac_addr[ETH_ALEN]; unsigned no_ether_link:1; unsigned ether_link_active_low:1; - unsigned needs_init:1; }; #endif -- cgit v1.2.3 From b6c5734db07079c9410147b32407f2366d584e6c Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 5 Jan 2018 11:17:18 -0200 Subject: sctp: fix the handling of ICMP Frag Needed for too small MTUs syzbot reported a hang involving SCTP, on which it kept flooding dmesg with the message: [ 246.742374] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too low, using default minimum of 512 That happened because whenever SCTP hits an ICMP Frag Needed, it tries to adjust to the new MTU and triggers an immediate retransmission. But it didn't consider the fact that MTUs smaller than the SCTP minimum MTU allowed (512) would not cause the PMTU to change, and issued the retransmission anyway (thus leading to another ICMP Frag Needed, and so on). As IPv4 (ip_rt_min_pmtu=556) and IPv6 (IPV6_MIN_MTU=1280) minimum MTU are higher than that, sctp_transport_update_pmtu() is changed to re-fetch the PMTU that got set after our request, and with that, detect if there was an actual change or not. The fix, thus, skips the immediate retransmission if the received ICMP resulted in no change, in the hope that SCTP will select another path. Note: The value being used for the minimum MTU (512, SCTP_DEFAULT_MINSEGMENT) is not right and instead it should be (576, SCTP_MIN_PMTU), but such change belongs to another patch. Changes from v1: - do not disable PMTU discovery, in the light of commit 06ad391919b2 ("[SCTP] Don't disable PMTU discovery when mtu is small") and as suggested by Xin Long. - changed the way to break the rtx loop by detecting if the icmp resulted in a change or not Changes from v2: none See-also: https://lkml.org/lkml/2017/12/22/811 Reported-by: syzbot Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 +- net/sctp/input.c | 8 ++++++-- net/sctp/transport.c | 29 +++++++++++++++++++---------- 3 files changed, 26 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2f8f93da5dc2..9a5ccf03a59b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -966,7 +966,7 @@ void sctp_transport_burst_limited(struct sctp_transport *); void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); void sctp_transport_reset(struct sctp_transport *t); -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); +bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); diff --git a/net/sctp/input.c b/net/sctp/input.c index 9320661cc41d..141c9c466ec1 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -406,8 +406,12 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, */ return; - /* Update transports view of the MTU */ - sctp_transport_update_pmtu(t, pmtu); + /* Update transports view of the MTU. Return if no update was needed. + * If an update wasn't needed/possible, it also doesn't make sense to + * try to retransmit now. + */ + if (!sctp_transport_update_pmtu(t, pmtu)) + return; /* Update association pmtu. */ sctp_assoc_sync_pmtu(asoc); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 1e5a22430cf5..47f82bd794d9 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -248,28 +248,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) +bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { struct dst_entry *dst = sctp_transport_dst_check(t); + bool change = true; if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { - pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", - __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); - /* Use default minimum segment size and disable - * pmtu discovery on this transport. - */ - t->pathmtu = SCTP_DEFAULT_MINSEGMENT; - } else { - t->pathmtu = pmtu; + pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n", + __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); + /* Use default minimum segment instead */ + pmtu = SCTP_DEFAULT_MINSEGMENT; } + pmtu = SCTP_TRUNC4(pmtu); if (dst) { dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); dst = sctp_transport_dst_check(t); } - if (!dst) + if (!dst) { t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); + dst = t->dst; + } + + if (dst) { + /* Re-fetch, as under layers may have a higher minimum size */ + pmtu = SCTP_TRUNC4(dst_mtu(dst)); + change = t->pathmtu != pmtu; + } + t->pathmtu = pmtu; + + return change; } /* Caches the dst entry and source address for a transport's destination -- cgit v1.2.3