diff options
Diffstat (limited to 'include')
158 files changed, 1986 insertions, 983 deletions
diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 9fdf21302fdf..0d132ee2a291 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h @@ -2,6 +2,13 @@ #ifndef _ASM_GENERIC_BITOPS_FIND_H_ #define _ASM_GENERIC_BITOPS_FIND_H_ +extern unsigned long _find_next_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long nbits, + unsigned long start, unsigned long invert, unsigned long le); +extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); + #ifndef find_next_bit /** * find_next_bit - find the next set bit in a memory region @@ -12,8 +19,22 @@ * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -extern unsigned long find_next_bit(const unsigned long *addr, unsigned long - size, unsigned long offset); +static inline +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_bit(addr, NULL, size, offset, 0UL, 0); +} #endif #ifndef find_next_and_bit @@ -27,9 +48,23 @@ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -extern unsigned long find_next_and_bit(const unsigned long *addr1, +static inline +unsigned long find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, - unsigned long offset); + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr1 & *addr2 & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_bit(addr1, addr2, size, offset, 0UL, 0); +} #endif #ifndef find_next_zero_bit @@ -42,8 +77,22 @@ extern unsigned long find_next_and_bit(const unsigned long *addr1, * Returns the bit number of the next zero bit * If no bits are zero, returns @size. */ -extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned - long size, unsigned long offset); +static inline +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr | ~GENMASK(size - 1, offset); + return val == ~0UL ? size : ffz(val); + } + + return _find_next_bit(addr, NULL, size, offset, ~0UL, 0); +} #endif #ifdef CONFIG_GENERIC_FIND_FIRST_BIT @@ -56,8 +105,17 @@ extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned * Returns the bit number of the first set bit. * If no bits are set, returns @size. */ -extern unsigned long find_first_bit(const unsigned long *addr, - unsigned long size); +static inline +unsigned long find_first_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr & GENMASK(size - 1, 0); + + return val ? __ffs(val) : size; + } + + return _find_first_bit(addr, size); +} /** * find_first_zero_bit - find the first cleared bit in a memory region @@ -67,8 +125,17 @@ extern unsigned long find_first_bit(const unsigned long *addr, * Returns the bit number of the first cleared bit. * If no bits are zero, returns @size. */ -extern unsigned long find_first_zero_bit(const unsigned long *addr, - unsigned long size); +static inline +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr | ~GENMASK(size - 1, 0); + + return val == ~0UL ? size : ffz(val); + } + + return _find_first_zero_bit(addr, size); +} #else /* CONFIG_GENERIC_FIND_FIRST_BIT */ #ifndef find_first_bit @@ -80,6 +147,27 @@ extern unsigned long find_first_zero_bit(const unsigned long *addr, #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ +#ifndef find_last_bit +/** + * find_last_bit - find the last set bit in a memory region + * @addr: The address to start the search at + * @size: The number of bits to search + * + * Returns the bit number of the last set bit, or size. + */ +static inline +unsigned long find_last_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr & GENMASK(size - 1, 0); + + return val ? __fls(val) : size; + } + + return _find_last_bit(addr, size); +} +#endif + /** * find_next_clump8 - find next 8-bit clump with set bits in a memory region * @clump: location to store copy of found clump diff --git a/include/asm-generic/bitops/le.h b/include/asm-generic/bitops/le.h index 188d3eba3ace..5a28629cbf4d 100644 --- a/include/asm-generic/bitops/le.h +++ b/include/asm-generic/bitops/le.h @@ -2,8 +2,10 @@ #ifndef _ASM_GENERIC_BITOPS_LE_H_ #define _ASM_GENERIC_BITOPS_LE_H_ +#include <asm-generic/bitops/find.h> #include <asm/types.h> #include <asm/byteorder.h> +#include <linux/swab.h> #if defined(__LITTLE_ENDIAN) @@ -32,13 +34,41 @@ static inline unsigned long find_first_zero_bit_le(const void *addr, #define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) #ifndef find_next_zero_bit_le -extern unsigned long find_next_zero_bit_le(const void *addr, - unsigned long size, unsigned long offset); +static inline +unsigned long find_next_zero_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val = *(const unsigned long *)addr; + + if (unlikely(offset >= size)) + return size; + + val = swab(val) | ~GENMASK(size - 1, offset); + return val == ~0UL ? size : ffz(val); + } + + return _find_next_bit(addr, NULL, size, offset, ~0UL, 1); +} #endif #ifndef find_next_bit_le -extern unsigned long find_next_bit_le(const void *addr, - unsigned long size, unsigned long offset); +static inline +unsigned long find_next_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val = *(const unsigned long *)addr; + + if (unlikely(offset >= size)) + return size; + + val = swab(val) & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_bit(addr, NULL, size, offset, 0UL, 1); +} #endif #ifndef find_first_zero_bit_le diff --git a/include/asm-generic/bitsperlong.h b/include/asm-generic/bitsperlong.h index 3905c1c93dc2..1023e2a4bd37 100644 --- a/include/asm-generic/bitsperlong.h +++ b/include/asm-generic/bitsperlong.h @@ -23,4 +23,16 @@ #define BITS_PER_LONG_LONG 64 #endif +/* + * small_const_nbits(n) is true precisely when it is known at compile-time + * that BITMAP_SIZE(n) is 1, i.e. 1 <= n <= BITS_PER_LONG. This allows + * various bit/bitmap APIs to provide a fast inline implementation. Bitmaps + * of size 0 are very rare, and a compile-time-known-size 0 is most likely + * a sign of error. They will be handled correctly by the bit/bitmap APIs, + * but using the out-of-line functions, so that the inline implementations + * can unconditionally dereference the pointer(s). + */ +#define small_const_nbits(nbits) \ + (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0) + #endif /* __ASM_GENERIC_BITS_PER_LONG */ diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 76d456c516a1..e93375c710b9 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -1064,17 +1064,6 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p) #endif #endif /* CONFIG_GENERIC_IOMAP */ -/* - * Convert a virtual cached pointer to an uncached pointer - */ -#ifndef xlate_dev_kmem_ptr -#define xlate_dev_kmem_ptr xlate_dev_kmem_ptr -static inline void *xlate_dev_kmem_ptr(void *addr) -{ - return addr; -} -#endif - #ifndef xlate_dev_mem_ptr #define xlate_dev_mem_ptr xlate_dev_mem_ptr static inline void *xlate_dev_mem_ptr(phys_addr_t addr) diff --git a/include/dt-bindings/clock/sifive-fu740-prci.h b/include/dt-bindings/clock/sifive-fu740-prci.h index cd7706ea5677..7899b7fee7db 100644 --- a/include/dt-bindings/clock/sifive-fu740-prci.h +++ b/include/dt-bindings/clock/sifive-fu740-prci.h @@ -19,5 +19,6 @@ #define PRCI_CLK_CLTXPLL 5 #define PRCI_CLK_TLCLK 6 #define PRCI_CLK_PCLK 7 +#define PRCI_CLK_PCIE_AUX 8 #endif /* __DT_BINDINGS_CLOCK_SIFIVE_FU740_PRCI_H */ diff --git a/include/dt-bindings/input/atmel-maxtouch.h b/include/dt-bindings/input/atmel-maxtouch.h new file mode 100644 index 000000000000..7345ab32224d --- /dev/null +++ b/include/dt-bindings/input/atmel-maxtouch.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef _DT_BINDINGS_ATMEL_MAXTOUCH_H +#define _DT_BINDINGS_ATMEL_MAXTOUCH_H + +#define ATMEL_MXT_WAKEUP_NONE 0 +#define ATMEL_MXT_WAKEUP_I2C_SCL 1 +#define ATMEL_MXT_WAKEUP_GPIO 2 + +#endif /* _DT_BINDINGS_ATMEL_MAXTOUCH_H */ diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h index 875e002a4180..6acd3cf13a18 100644 --- a/include/keys/system_keyring.h +++ b/include/keys/system_keyring.h @@ -16,9 +16,16 @@ extern int restrict_link_by_builtin_trusted(struct key *keyring, const struct key_type *type, const union key_payload *payload, struct key *restriction_key); +extern __init int load_module_cert(struct key *keyring); #else #define restrict_link_by_builtin_trusted restrict_link_reject + +static inline __init int load_module_cert(struct key *keyring) +{ + return 0; +} + #endif #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 6fd3cda608e4..864b9997efb2 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -61,6 +61,7 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); +int kvm_pmu_probe_pmuver(void); #else struct kvm_pmu { }; @@ -116,6 +117,9 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) { return 0; } + +static inline int kvm_pmu_probe_pmuver(void) { return 0xf; } + #endif #endif diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 3d74f1060bd1..ec621180ef09 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -322,6 +322,7 @@ struct vgic_cpu { */ struct vgic_io_device rd_iodev; struct vgic_redist_region *rdreg; + u32 rdreg_index; /* Contains the attributes and gpa of the LPI pending tables. */ u64 pendbaser; diff --git a/include/linux/align.h b/include/linux/align.h new file mode 100644 index 000000000000..2b4acec7b95a --- /dev/null +++ b/include/linux/align.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_ALIGN_H +#define _LINUX_ALIGN_H + +#include <linux/const.h> + +/* @a is a power of 2 value */ +#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) +#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) +#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) +#define PTR_ALIGN_DOWN(p, a) ((typeof(p))ALIGN_DOWN((unsigned long)(p), (a))) +#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) + +#endif /* _LINUX_ALIGN_H */ diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 450717299928..58e6c3806c09 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -10,6 +10,8 @@ #include <linux/types.h> +struct amd_iommu; + /* * This is mainly used to communicate information back-and-forth * between SVM and IOMMU for setting up and tearing down posted @@ -33,24 +35,6 @@ extern int amd_iommu_detect(void); extern int amd_iommu_init_hardware(void); /** - * amd_iommu_enable_device_erratum() - Enable erratum workaround for device - * in the IOMMUv2 driver - * @pdev: The PCI device the workaround is necessary for - * @erratum: The erratum workaround to enable - * - * The function needs to be called before amd_iommu_init_device(). - * Possible values for the erratum number are for now: - * - AMD_PRI_DEV_ERRATUM_ENABLE_RESET - Reset PRI capability when PRI - * is enabled - * - AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE - Limit number of outstanding PRI - * requests to one - */ -#define AMD_PRI_DEV_ERRATUM_ENABLE_RESET 0 -#define AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE 1 - -extern void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum); - -/** * amd_iommu_init_device() - Init device for use with IOMMUv2 driver * @pdev: The PCI device to initialize * @pasids: Number of PASIDs to support for this device @@ -212,4 +196,14 @@ static inline int amd_iommu_deactivate_guest_mode(void *data) } #endif /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */ +int amd_iommu_get_num_iommus(void); +bool amd_iommu_pc_supported(void); +u8 amd_iommu_pc_get_max_banks(unsigned int idx); +u8 amd_iommu_pc_get_max_counters(unsigned int idx); +int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, + u64 *value); +int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, + u64 *value); +struct amd_iommu *get_amd_iommu(unsigned int idx); + #endif /* _ASM_X86_AMD_IOMMU_H */ diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 62c54234576c..6861489a1890 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -55,6 +55,8 @@ #define ARM_SMCCC_OWNER_TRUSTED_OS 50 #define ARM_SMCCC_OWNER_TRUSTED_OS_END 63 +#define ARM_SMCCC_FUNC_QUERY_CALL_UID 0xff01 + #define ARM_SMCCC_QUIRK_NONE 0 #define ARM_SMCCC_QUIRK_QCOM_A6 1 /* Save/restore register a6 */ @@ -87,8 +89,47 @@ ARM_SMCCC_SMC_32, \ 0, 0x7fff) +#define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_FUNC_QUERY_CALL_UID) + +/* KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 */ +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU + +/* KVM "vendor specific" services */ +#define ARM_SMCCC_KVM_FUNC_FEATURES 0 +#define ARM_SMCCC_KVM_FUNC_PTP 1 +#define ARM_SMCCC_KVM_FUNC_FEATURES_2 127 +#define ARM_SMCCC_KVM_NUM_FUNCS 128 + +#define ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_FEATURES) + #define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED 1 +/* + * ptp_kvm is a feature used for time sync between vm and host. + * ptp_kvm module in guest kernel will get service from host using + * this hypercall ID. + */ +#define ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_PTP) + +/* ptp_kvm counter type ID */ +#define KVM_PTP_VIRT_COUNTER 0 +#define KVM_PTP_PHYS_COUNTER 1 + /* Paravirtualised time calls (defined by ARM DEN0057A) */ #define ARM_SMCCC_HV_PV_TIME_FEATURES \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ diff --git a/include/linux/async.h b/include/linux/async.h index 0a17cd27f348..cce4ad31e8fc 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -112,7 +112,6 @@ async_schedule_dev_domain(async_func_t func, struct device *dev, return async_schedule_node_domain(func, dev, dev_to_node(dev), domain); } -void async_unregister_domain(struct async_domain *domain); extern void async_synchronize_full(void); extern void async_synchronize_full_domain(struct async_domain *domain); extern void async_synchronize_cookie(async_cookie_t cookie); diff --git a/include/linux/bio.h b/include/linux/bio.h index a0b4cfdf62a4..f1a99f0a240c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -106,6 +106,8 @@ static inline void *bio_data(struct bio *bio) return NULL; } +extern unsigned int bio_max_size(struct bio *bio); + /** * bio_full - check if the bio is full * @bio: bio to check @@ -119,7 +121,7 @@ static inline bool bio_full(struct bio *bio, unsigned len) if (bio->bi_vcnt >= bio->bi_max_vecs) return true; - if (bio->bi_iter.bi_size > UINT_MAX - len) + if (bio->bi_iter.bi_size > bio_max_size(bio) - len) return true; return false; diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 70a932470b2d..a36cfcec4e77 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -4,10 +4,13 @@ #ifndef __ASSEMBLY__ -#include <linux/types.h> +#include <linux/align.h> #include <linux/bitops.h> +#include <linux/limits.h> #include <linux/string.h> -#include <linux/kernel.h> +#include <linux/types.h> + +struct device; /* * bitmaps provide bit arrays that consume one or more unsigned @@ -118,54 +121,59 @@ * Allocation and deallocation of bitmap. * Provided in lib/bitmap.c to avoid circular dependency. */ -extern unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags); -extern unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags); -extern void bitmap_free(const unsigned long *bitmap); +unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags); +unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags); +void bitmap_free(const unsigned long *bitmap); + +/* Managed variants of the above. */ +unsigned long *devm_bitmap_alloc(struct device *dev, + unsigned int nbits, gfp_t flags); +unsigned long *devm_bitmap_zalloc(struct device *dev, + unsigned int nbits, gfp_t flags); /* * lib/bitmap.c provides these functions: */ -extern int __bitmap_equal(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int nbits); -extern bool __pure __bitmap_or_equal(const unsigned long *src1, - const unsigned long *src2, - const unsigned long *src3, - unsigned int nbits); -extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, - unsigned int nbits); -extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, - unsigned int shift, unsigned int nbits); -extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, - unsigned int shift, unsigned int nbits); -extern void bitmap_cut(unsigned long *dst, const unsigned long *src, - unsigned int first, unsigned int cut, - unsigned int nbits); -extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int nbits); -extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int nbits); -extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int nbits); -extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, +int __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +bool __pure __bitmap_or_equal(const unsigned long *src1, + const unsigned long *src2, + const unsigned long *src3, + unsigned int nbits); +void __bitmap_complement(unsigned long *dst, const unsigned long *src, + unsigned int nbits); +void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits); +void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits); +void bitmap_cut(unsigned long *dst, const unsigned long *src, + unsigned int first, unsigned int cut, unsigned int nbits); +int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +void __bitmap_replace(unsigned long *dst, + const unsigned long *old, const unsigned long *new, + const unsigned long *mask, unsigned int nbits); +int __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); -extern void __bitmap_replace(unsigned long *dst, - const unsigned long *old, const unsigned long *new, - const unsigned long *mask, unsigned int nbits); -extern int __bitmap_intersects(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int nbits); -extern int __bitmap_subset(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int nbits); -extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); -extern void __bitmap_set(unsigned long *map, unsigned int start, int len); -extern void __bitmap_clear(unsigned long *map, unsigned int start, int len); - -extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map, - unsigned long size, - unsigned long start, - unsigned int nr, - unsigned long align_mask, - unsigned long align_offset); +int __bitmap_subset(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); +void __bitmap_set(unsigned long *map, unsigned int start, int len); +void __bitmap_clear(unsigned long *map, unsigned int start, int len); + +unsigned long bitmap_find_next_zero_area_off(unsigned long *map, + unsigned long size, + unsigned long start, + unsigned int nr, + unsigned long align_mask, + unsigned long align_offset); /** * bitmap_find_next_zero_area - find a contiguous aligned zero area @@ -190,46 +198,38 @@ bitmap_find_next_zero_area(unsigned long *map, align_mask, 0); } -extern int bitmap_parse(const char *buf, unsigned int buflen, +int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *dst, int nbits); -extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, +int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); -extern int bitmap_parselist(const char *buf, unsigned long *maskp, +int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); -extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, +int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); -extern void bitmap_remap(unsigned long *dst, const unsigned long *src, +void bitmap_remap(unsigned long *dst, const unsigned long *src, const unsigned long *old, const unsigned long *new, unsigned int nbits); -extern int bitmap_bitremap(int oldbit, +int bitmap_bitremap(int oldbit, const unsigned long *old, const unsigned long *new, int bits); -extern void bitmap_onto(unsigned long *dst, const unsigned long *orig, +void bitmap_onto(unsigned long *dst, const unsigned long *orig, const unsigned long *relmap, unsigned int bits); -extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, +void bitmap_fold(unsigned long *dst, const unsigned long *orig, unsigned int sz, unsigned int nbits); -extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); -extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); -extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); +int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); +void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); +int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); #ifdef __BIG_ENDIAN -extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); +void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); #else #define bitmap_copy_le bitmap_copy #endif -extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits); -extern int bitmap_print_to_pagebuf(bool list, char *buf, +unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits); +int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) -/* - * The static inlines below do not handle constant nbits==0 correctly, - * so make such users (should any ever turn up) call the out-of-line - * versions. - */ -#define small_const_nbits(nbits) \ - (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0) - static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); @@ -265,9 +265,9 @@ static inline void bitmap_copy_clear_tail(unsigned long *dst, * therefore conversion is not needed when copying data from/to arrays of u32. */ #if BITS_PER_LONG == 64 -extern void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, +void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, unsigned int nbits); -extern void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, +void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits); #else #define bitmap_from_arr32(bitmap, buf, nbits) \ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index a5a48303b0f1..26bf15e6cd35 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -286,17 +286,5 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, }) #endif -#ifndef find_last_bit -/** - * find_last_bit - find the last set bit in a memory region - * @addr: The address to start the search at - * @size: The number of bits to search - * - * Returns the bit number of the last set bit, or size. - */ -extern unsigned long find_last_bit(const unsigned long *addr, - unsigned long size); -#endif - #endif /* __KERNEL__ */ #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b91ba6207365..9fb255b48a57 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -11,7 +11,6 @@ #include <linux/minmax.h> #include <linux/timer.h> #include <linux/workqueue.h> -#include <linux/pagemap.h> #include <linux/backing-dev-defs.h> #include <linux/wait.h> #include <linux/mempool.h> @@ -327,6 +326,8 @@ enum blk_bounce { }; struct queue_limits { + unsigned int bio_max_bytes; + enum blk_bounce bounce; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 6023a1367853..06841517ab1e 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -302,10 +302,11 @@ struct bpf_verifier_state_list { }; /* Possible states for alu_state member. */ -#define BPF_ALU_SANITIZE_SRC 1U -#define BPF_ALU_SANITIZE_DST 2U +#define BPF_ALU_SANITIZE_SRC (1U << 0) +#define BPF_ALU_SANITIZE_DST (1U << 1) #define BPF_ALU_NEG_VALUE (1U << 2) #define BPF_ALU_NON_POINTER (1U << 3) +#define BPF_ALU_IMMEDIATE (1U << 4) #define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ BPF_ALU_SANITIZE_DST) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 6b47f94378c5..e7e99da31349 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -194,6 +194,8 @@ void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size, struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); +void invalidate_bh_lrus_cpu(int cpu); +bool has_bh_in_lru(int cpu, void *dummy); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); @@ -406,6 +408,8 @@ static inline int inode_has_buffers(struct inode *inode) { return 0; } static inline void invalidate_inode_buffers(struct inode *inode) {} static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } +static inline void invalidate_bh_lrus_cpu(int cpu) {} +static inline bool has_bh_in_lru(int cpu, void *dummy) { return 0; } #define buffer_heads_over_limit 0 #endif /* CONFIG_BLOCK */ diff --git a/include/linux/bug.h b/include/linux/bug.h index f639bd0122f3..348acf2558f3 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -36,6 +36,9 @@ static inline int is_warning_bug(const struct bug_entry *bug) return bug->flags & BUGFLAG_WARNING; } +void bug_get_file_line(struct bug_entry *bug, const char **file, + unsigned int *line); + struct bug_entry *find_bug(unsigned long bugaddr); enum bug_trap_type report_bug(unsigned long bug_addr, struct pt_regs *regs); @@ -58,6 +61,13 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr, return BUG_TRAP_TYPE_BUG; } +struct bug_entry; +static inline void bug_get_file_line(struct bug_entry *bug, const char **file, + unsigned int *line) +{ + *file = NULL; + *line = 0; +} static inline void generic_bug_clear_once(void) {} diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index a247b089ca78..d6ab416ee2d2 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -17,6 +17,7 @@ #include <linux/timer.h> #include <linux/init.h> #include <linux/of.h> +#include <linux/clocksource_ids.h> #include <asm/div64.h> #include <asm/io.h> @@ -62,6 +63,10 @@ struct module; * 400-499: Perfect * The ideal clocksource. A must-use where * available. + * @id: Defaults to CSID_GENERIC. The id value is captured + * in certain snapshot functions to allow callers to + * validate the clocksource from which the snapshot was + * taken. * @flags: Flags describing special properties * @enable: Optional function to enable the clocksource * @disable: Optional function to disable the clocksource @@ -100,6 +105,7 @@ struct clocksource { const char *name; struct list_head list; int rating; + enum clocksource_ids id; enum vdso_clock_mode vdso_clock_mode; unsigned long flags; diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h new file mode 100644 index 000000000000..16775d7d8f8d --- /dev/null +++ b/include/linux/clocksource_ids.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CLOCKSOURCE_IDS_H +#define _LINUX_CLOCKSOURCE_IDS_H + +/* Enum to give clocksources a unique identifier */ +enum clocksource_ids { + CSID_GENERIC = 0, + CSID_ARM_ARCH_COUNTER, + CSID_MAX, +}; + +#endif diff --git a/include/linux/cma.h b/include/linux/cma.h index 217999c8a762..53fd8c3cdbd0 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -44,9 +44,9 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, const char *name, struct cma **res_cma); -extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, +extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align, bool no_warn); -extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count); +extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count); extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data); #endif diff --git a/include/linux/compaction.h b/include/linux/compaction.h index ed4070ed41ef..4221888bdcd6 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -81,7 +81,6 @@ static inline unsigned long compact_gap(unsigned int order) } #ifdef CONFIG_COMPACTION -extern int sysctl_compact_memory; extern unsigned int sysctl_compaction_proactiveness; extern int sysctl_compaction_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); diff --git a/include/linux/compat.h b/include/linux/compat.h index acac0b571df1..98dd7b324c35 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -75,7 +75,6 @@ __diag_push(); \ __diag_ignore(GCC, 8, "-Wattribute-alias", \ "Type aliasing is used to sanitize syscall arguments");\ - asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_compat_sys##name)))); \ ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \ diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 2e8c69b43c64..97cfd13bae51 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * +/* * configfs.h - definitions for the device driver filesystem * * Based on sysfs: diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 976ec2697610..85008a65e21f 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -50,6 +50,7 @@ enum coresight_dev_subtype_sink { CORESIGHT_DEV_SUBTYPE_SINK_PORT, CORESIGHT_DEV_SUBTYPE_SINK_BUFFER, CORESIGHT_DEV_SUBTYPE_SINK_SYSMEM, + CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM, }; enum coresight_dev_subtype_link { @@ -455,6 +456,18 @@ static inline void csdev_access_write64(struct csdev_access *csa, u64 val, u32 o } #endif /* CONFIG_64BIT */ +static inline bool coresight_is_percpu_source(struct coresight_device *csdev) +{ + return csdev && (csdev->type == CORESIGHT_DEV_TYPE_SOURCE) && + (csdev->subtype.source_subtype == CORESIGHT_DEV_SUBTYPE_SOURCE_PROC); +} + +static inline bool coresight_is_percpu_sink(struct coresight_device *csdev) +{ + return csdev && (csdev->type == CORESIGHT_DEV_TYPE_SINK) && + (csdev->subtype.sink_subtype == CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM); +} + extern struct coresight_device * coresight_register(struct coresight_desc *desc); extern void coresight_unregister(struct coresight_device *csdev); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index db82ce5304f9..4a62b3980642 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -57,7 +57,7 @@ enum cpuhp_state { CPUHP_PAGE_ALLOC_DEAD, CPUHP_NET_DEV_DEAD, CPUHP_PCI_XGENE_DEAD, - CPUHP_IOMMU_INTEL_DEAD, + CPUHP_IOMMU_IOVA_DEAD, CPUHP_LUSTRE_CFS_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_PADATA_DEAD, @@ -169,6 +169,7 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_RAPL_ONLINE, CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, + CPUHP_AP_PERF_X86_IDXD_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_CFD_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, diff --git a/include/linux/crc8.h b/include/linux/crc8.h index 13c8dabb0441..674045c59a04 100644 --- a/include/linux/crc8.h +++ b/include/linux/crc8.h @@ -96,6 +96,6 @@ void crc8_populate_msb(u8 table[CRC8_TABLE_SIZE], u8 polynomial); * Williams, Ross N., ross<at>ross.net * (see URL http://www.ross.net/crc/download/crc_v3.txt). */ -u8 crc8(const u8 table[CRC8_TABLE_SIZE], u8 *pdata, size_t nbytes, u8 crc); +u8 crc8(const u8 table[CRC8_TABLE_SIZE], const u8 *pdata, size_t nbytes, u8 crc); #endif /* __CRC8_H_ */ diff --git a/include/linux/cred.h b/include/linux/cred.h index ac0e5f97d7d8..14971322e1a0 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -53,7 +53,6 @@ do { \ groups_free(group_info); \ } while (0) -extern struct group_info init_groups; #ifdef CONFIG_MULTIUSER extern struct group_info *groups_alloc(int); extern void groups_free(struct group_info *); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index c1e48014106f..9e23d33bb6f1 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -59,6 +59,7 @@ struct qstr { extern const struct qstr empty_name; extern const struct qstr slash_name; +extern const struct qstr dotdot_name; struct dentry_stat_t { long nr_dentry; @@ -300,8 +301,8 @@ char *dynamic_dname(struct dentry *, char *, int, const char *, ...); extern char *__d_path(const struct path *, const struct path *, char *, int); extern char *d_absolute_path(const struct path *, char *, int); extern char *d_path(const struct path *, char *, int); -extern char *dentry_path_raw(struct dentry *, char *, int); -extern char *dentry_path(struct dentry *, char *, int); +extern char *dentry_path_raw(const struct dentry *, char *, int); +extern char *dentry_path(const struct dentry *, char *, int); /* Allocation counts.. */ diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 2d3bdcccf5eb..21651f946751 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -82,16 +82,16 @@ static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) return 0; } -static inline void delayacct_set_flag(int flag) +static inline void delayacct_set_flag(struct task_struct *p, int flag) { - if (current->delays) - current->delays->flags |= flag; + if (p->delays) + p->delays->flags |= flag; } -static inline void delayacct_clear_flag(int flag) +static inline void delayacct_clear_flag(struct task_struct *p, int flag) { - if (current->delays) - current->delays->flags &= ~flag; + if (p->delays) + p->delays->flags &= ~flag; } static inline void delayacct_tsk_init(struct task_struct *tsk) @@ -114,7 +114,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk) static inline void delayacct_blkio_start(void) { - delayacct_set_flag(DELAYACCT_PF_BLKIO); + delayacct_set_flag(current, DELAYACCT_PF_BLKIO); if (current->delays) __delayacct_blkio_start(); } @@ -123,7 +123,7 @@ static inline void delayacct_blkio_end(struct task_struct *p) { if (p->delays) __delayacct_blkio_end(p); - delayacct_clear_flag(DELAYACCT_PF_BLKIO); + delayacct_clear_flag(p, DELAYACCT_PF_BLKIO); } static inline int delayacct_add_tsk(struct taskstats *d, @@ -166,9 +166,9 @@ static inline void delayacct_thrashing_end(void) } #else -static inline void delayacct_set_flag(int flag) +static inline void delayacct_set_flag(struct task_struct *p, int flag) {} -static inline void delayacct_clear_flag(int flag) +static inline void delayacct_clear_flag(struct task_struct *p, int flag) {} static inline void delayacct_init(void) {} diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 5c641f930caf..ff700fb6ce1d 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -574,11 +574,6 @@ struct dm_table *dm_swap_table(struct mapped_device *md, */ void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm); -/* - * A wrapper around vmalloc. - */ -void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); - /*----------------------------------------------------------------- * Macros. *---------------------------------------------------------------*/ diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 706b68d1359b..6e75a2d689b4 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -40,6 +40,8 @@ void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, struct iommu_domain *domain); +extern bool iommu_dma_forcedac; + #else /* CONFIG_IOMMU_DMA */ struct iommu_domain; @@ -81,10 +83,5 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he { } -static inline void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, - struct iommu_domain *domain) -{ -} - #endif /* CONFIG_IOMMU_DMA */ #endif /* __DMA_IOMMU_H */ diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 51872e736e7b..0d53a96a3d64 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -22,6 +22,11 @@ struct dma_map_ops { gfp_t gfp); void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, dma_addr_t dma_handle, enum dma_data_direction dir); + struct sg_table *(*alloc_noncontiguous)(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, + unsigned long attrs); + void (*free_noncontiguous)(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir); int (*mmap)(struct device *, struct vm_area_struct *, void *, dma_addr_t, size_t, unsigned long attrs); @@ -198,6 +203,20 @@ static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, } #endif /* CONFIG_DMA_DECLARE_COHERENT */ +/* + * This is the actual return value from the ->alloc_noncontiguous method. + * The users of the DMA API should only care about the sg_table, but to make + * the DMA-API internal vmaping and freeing easier we stash away the page + * array as well (except for the fallback case). This can go away any time, + * e.g. when a vmap-variant that takes a scatterlist comes along. + */ +struct dma_sgt_handle { + struct sg_table sgt; + struct page **pages; +}; +#define sgt_handle(sgt) \ + container_of((sgt), struct dma_sgt_handle, sgt) + int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 2a984cb4d1e0..183e7103a66d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -95,7 +95,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { debug_dma_mapping_error(dev, dma_addr); - if (dma_addr == DMA_MAPPING_ERROR) + if (unlikely(dma_addr == DMA_MAPPING_ERROR)) return -ENOMEM; return 0; } @@ -144,6 +144,15 @@ u64 dma_get_required_mask(struct device *dev); size_t dma_max_mapping_size(struct device *dev); bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); unsigned long dma_get_merge_boundary(struct device *dev); +struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); +void dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir); +void *dma_vmap_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt); +void dma_vunmap_noncontiguous(struct device *dev, void *vaddr); +int dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma, + size_t size, struct sg_table *sgt); #else /* CONFIG_HAS_DMA */ static inline dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, size_t offset, size_t size, @@ -257,12 +266,37 @@ static inline unsigned long dma_get_merge_boundary(struct device *dev) { return 0; } +static inline struct sg_table *dma_alloc_noncontiguous(struct device *dev, + size_t size, enum dma_data_direction dir, gfp_t gfp, + unsigned long attrs) +{ + return NULL; +} +static inline void dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir) +{ +} +static inline void *dma_vmap_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt) +{ + return NULL; +} +static inline void dma_vunmap_noncontiguous(struct device *dev, void *vaddr) +{ +} +static inline int dma_mmap_noncontiguous(struct device *dev, + struct vm_area_struct *vma, size_t size, struct sg_table *sgt) +{ + return -EINVAL; +} #endif /* CONFIG_HAS_DMA */ struct page *dma_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); void dma_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_handle, enum dma_data_direction dir); +int dma_mmap_pages(struct device *dev, struct vm_area_struct *vma, + size_t size, struct page *page); static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) @@ -401,7 +435,6 @@ static inline void dma_sync_sgtable_for_device(struct device *dev, static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { - return dma_alloc_attrs(dev, size, dma_handle, gfp, (gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0); } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index c6cc0a566ef5..5487a80617a3 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -168,7 +168,7 @@ struct f2fs_checkpoint { unsigned char alloc_type[MAX_ACTIVE_LOGS]; /* SIT and NAT version bitmap */ - unsigned char sit_nat_version_bitmap[1]; + unsigned char sit_nat_version_bitmap[]; } __packed; #define CP_CHKSUM_OFFSET 4092 /* default chksum offset in checkpoint */ diff --git a/include/linux/file.h b/include/linux/file.h index 225982792fa2..2de2e4613d7b 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -92,23 +92,20 @@ extern void put_unused_fd(unsigned int fd); extern void fd_install(unsigned int fd, struct file *file); -extern int __receive_fd(int fd, struct file *file, int __user *ufd, +extern int __receive_fd(struct file *file, int __user *ufd, unsigned int o_flags); static inline int receive_fd_user(struct file *file, int __user *ufd, unsigned int o_flags) { if (ufd == NULL) return -EFAULT; - return __receive_fd(-1, file, ufd, o_flags); + return __receive_fd(file, ufd, o_flags); } static inline int receive_fd(struct file *file, unsigned int o_flags) { - return __receive_fd(-1, file, NULL, o_flags); -} -static inline int receive_fd_replace(int fd, struct file *file, unsigned int o_flags) -{ - return __receive_fd(fd, file, NULL, o_flags); + return __receive_fd(file, NULL, o_flags); } +int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags); extern void flush_delayed_fput(void); extern void __fput_sync(struct file *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 12766edee81f..c3c88fdb9b2a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -145,7 +145,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* Expect random access pattern */ #define FMODE_RANDOM ((__force fmode_t)0x1000) -/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ +/* File is huge (eg. /dev/mem): treat loff_t as unsigned */ #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) /* File is opened with O_PATH; almost nothing can be done with it */ @@ -442,7 +442,6 @@ int pagecache_write_end(struct file *, struct address_space *mapping, * @i_mmap: Tree of private and shared mappings. * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable. * @nrpages: Number of page entries, protected by the i_pages lock. - * @nrexceptional: Shadow or DAX entries, protected by the i_pages lock. * @writeback_index: Writeback starts here. * @a_ops: Methods. * @flags: Error bits and flags (AS_*). @@ -463,7 +462,6 @@ struct address_space { struct rb_root_cached i_mmap; struct rw_semaphore i_mmap_rwsem; unsigned long nrpages; - unsigned long nrexceptional; pgoff_t writeback_index; const struct address_space_operations *a_ops; unsigned long flags; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 86e5028bfa20..a69f363b61bf 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -33,7 +33,7 @@ /* * If the arch's mcount caller does not support all of ftrace's * features, then it must call an indirect function that - * does. Or at least does enough to prevent any unwelcomed side effects. + * does. Or at least does enough to prevent any unwelcome side effects. */ #if !ARCH_SUPPORTS_FTRACE_OPS # define FTRACE_FORCE_LIST_FUNC 1 @@ -389,7 +389,7 @@ DECLARE_PER_CPU(int, disable_stack_tracer); */ static inline void stack_tracer_disable(void) { - /* Preemption or interupts must be disabled */ + /* Preemption or interrupts must be disabled */ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); this_cpu_inc(disable_stack_tracer); diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 6cb82301d8e9..939b1a8f571b 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -404,4 +404,3 @@ s_fields \ /* }}}1 */ #endif /* GENL_MAGIC_FUNC_H */ -/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index 35d21fddaf2d..f81d48987528 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -283,4 +283,3 @@ enum { \ /* }}}1 */ #endif /* GENL_MAGIC_STRUCT_H */ -/* vim: set foldmethod=marker nofoldenable : */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 26f4d907254a..11da8af06704 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -490,7 +490,7 @@ static inline int gfp_zonelist(gfp_t flags) /* * We get the zone list from the current node and the gfp_mask. - * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones. + * This zone list contains a maximum of MAX_NUMNODES*MAX_NR_ZONES zones. * There are two zonelists per node, one for all zones with memory and * one containing just zones from the node the zonelist belongs to. * @@ -657,7 +657,7 @@ extern int alloc_contig_range(unsigned long start, unsigned long end, extern struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask, int nid, nodemask_t *nodemask); #endif -void free_contig_range(unsigned long pfn, unsigned int nr_pages); +void free_contig_range(unsigned long pfn, unsigned long nr_pages); #ifdef CONFIG_CMA /* CMA stuff */ diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index ecf0032a0995..3a268781fcec 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -227,7 +227,7 @@ struct gpio_irq_chip { /** * @valid_mask: * - * If not %NULL holds bitmask of GPIOs which are valid to be included + * If not %NULL, holds bitmask of GPIOs which are valid to be included * in IRQ domain of the chip. */ unsigned long *valid_mask; @@ -346,7 +346,7 @@ struct gpio_irq_chip { * output. * * A gpio_chip can help platforms abstract various sources of GPIOs so - * they can all be accessed through a common programing interface. + * they can all be accessed through a common programming interface. * Example sources would be SOC controllers, FPGAs, multifunction * chips, dedicated GPIO expanders, and so on. * @@ -435,15 +435,15 @@ struct gpio_chip { /** * @valid_mask: * - * If not %NULL holds bitmask of GPIOs which are valid to be used + * If not %NULL, holds bitmask of GPIOs which are valid to be used * from the chip. */ unsigned long *valid_mask; #if defined(CONFIG_OF_GPIO) /* - * If CONFIG_OF is enabled, then all GPIO controllers described in the - * device tree automatically may have an OF translation + * If CONFIG_OF_GPIO is enabled, then all GPIO controllers described in + * the device tree automatically may have an OF translation */ /** @@ -508,7 +508,7 @@ extern int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, * for GPIOs will fail rudely. * * gpiochip_add_data() must only be called after gpiolib initialization, - * ie after core_initcall(). + * i.e. after core_initcall(). * * If gc->base is negative, this requests dynamic assignment of * a range of valid GPIOs. diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 44170f312ae7..832b49b50c7b 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -332,4 +332,11 @@ static inline void memcpy_to_page(struct page *page, size_t offset, kunmap_local(to); } +static inline void memzero_page(struct page *page, size_t offset, size_t len) +{ + char *addr = kmap_atomic(page); + memset(addr + offset, 0, len); + kunmap_atomic(addr); +} + #endif /* _LINUX_HIGHMEM_H */ diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ba973efcd369..9626fda5efce 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -87,9 +87,6 @@ enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, -#ifdef CONFIG_DEBUG_VM - TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG, -#endif }; struct kobject; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index cccd1aab69dd..b92f25ccef58 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -11,6 +11,7 @@ #include <linux/kref.h> #include <linux/pgtable.h> #include <linux/gfp.h> +#include <linux/userfaultfd_k.h> struct ctl_table; struct user_struct; @@ -134,11 +135,14 @@ void hugetlb_show_meminfo(void); unsigned long hugetlb_total_pages(void); vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); +#ifdef CONFIG_USERFAULTFD int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, + enum mcopy_atomic_mode mode, struct page **pagep); +#endif /* CONFIG_USERFAULTFD */ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, vm_flags_t vm_flags); @@ -152,7 +156,8 @@ void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); -pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); +pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, pud_t *pud); struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); @@ -161,7 +166,7 @@ extern struct list_head huge_boot_pages; /* arch callbacks */ -pte_t *huge_pte_alloc(struct mm_struct *mm, +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long sz); pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); @@ -187,6 +192,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); bool is_hugetlb_entry_migration(pte_t pte); +void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ @@ -308,16 +314,19 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, BUG(); } +#ifdef CONFIG_USERFAULTFD static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, + enum mcopy_atomic_mode mode, struct page **pagep) { BUG(); return 0; } +#endif /* CONFIG_USERFAULTFD */ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) @@ -368,6 +377,8 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, return 0; } +static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } + #endif /* !CONFIG_HUGETLB_PAGE */ /* * hugepages at page global directory. If arch support @@ -555,6 +566,7 @@ HPAGEFLAG(Freed, freed) #define HSTATE_NAME_LEN 32 /* Defines one hugetlb page size */ struct hstate { + struct mutex resize_lock; int next_nid_to_alloc; int next_nid_to_free; unsigned int order; @@ -583,6 +595,7 @@ struct huge_bootmem_page { struct hstate *hstate; }; +int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, @@ -865,6 +878,12 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; +static inline int isolate_or_dissolve_huge_page(struct page *page, + struct list_head *list) +{ + return -ENOMEM; +} + static inline struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve) @@ -1039,4 +1058,14 @@ static inline __init void hugetlb_cma_check(void) } #endif +bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); + +#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE +/* + * ARCHes with special requirements for evicting HUGETLB backing TLB entries can + * implement this. + */ +#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) +#endif + #endif /* _LINUX_HUGETLB_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index b2412b4d4c20..40fc5813cf93 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -25,7 +25,6 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; extern struct nsproxy init_nsproxy; -extern struct group_info init_groups; extern struct cred init_cred; #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE diff --git a/include/linux/initrd.h b/include/linux/initrd.h index 85c15717af34..1bbe9af48dc3 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -20,8 +20,10 @@ extern void free_initrd_mem(unsigned long, unsigned long); #ifdef CONFIG_BLK_DEV_INITRD extern void __init reserve_initrd_mem(void); +extern void wait_for_initramfs(void); #else static inline void __init reserve_initrd_mem(void) {} +static inline void wait_for_initramfs(void) {} #endif extern phys_addr_t phys_initrd_start; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 1bc46b88711a..03faf20a6817 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -20,6 +20,7 @@ #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/dmar.h> #include <linux/ioasid.h> +#include <linux/bitfield.h> #include <asm/cacheflush.h> #include <asm/iommu.h> @@ -80,6 +81,7 @@ #define DMAR_IQ_SHIFT 4 /* Invalidation queue head/tail shift */ #define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ #define DMAR_ICS_REG 0x9c /* Invalidation complete status register */ +#define DMAR_IQER_REG 0xb0 /* Invalidation queue error record register */ #define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ #define DMAR_PQH_REG 0xc0 /* Page request queue head register */ #define DMAR_PQT_REG 0xc8 /* Page request queue tail register */ @@ -126,6 +128,10 @@ #define DMAR_VCMD_REG 0xe10 /* Virtual command register */ #define DMAR_VCRSP_REG 0xe20 /* Virtual command response register */ +#define DMAR_IQER_REG_IQEI(reg) FIELD_GET(GENMASK_ULL(3, 0), reg) +#define DMAR_IQER_REG_ITESID(reg) FIELD_GET(GENMASK_ULL(47, 32), reg) +#define DMAR_IQER_REG_ICESID(reg) FIELD_GET(GENMASK_ULL(63, 48), reg) + #define OFFSET_STRIDE (9) #define dmar_readq(a) readq(a) @@ -372,6 +378,7 @@ enum { /* PASID cache invalidation granu */ #define QI_PC_ALL_PASIDS 0 #define QI_PC_PASID_SEL 1 +#define QI_PC_GLOBAL 3 #define QI_EIOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) #define QI_EIOTLB_IH(ih) (((u64)ih) << 6) @@ -763,14 +770,11 @@ u32 intel_svm_get_pasid(struct iommu_sva *handle); int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); -struct svm_dev_ops; - struct intel_svm_dev { struct list_head list; struct rcu_head rcu; struct device *dev; struct intel_iommu *iommu; - struct svm_dev_ops *ops; struct iommu_sva sva; u32 pasid; int users; diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 39d368a810b8..10fa80eef13a 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -8,13 +8,6 @@ #ifndef __INTEL_SVM_H__ #define __INTEL_SVM_H__ -struct device; - -struct svm_dev_ops { - void (*fault_cb)(struct device *dev, u32 pasid, u64 address, - void *private, int rwxp, int response); -}; - /* Values for rxwp in fault_cb callback */ #define SVM_REQ_READ (1<<3) #define SVM_REQ_WRITE (1<<2) @@ -22,16 +15,6 @@ struct svm_dev_ops { #define SVM_REQ_PRIV (1<<0) /* - * The SVM_FLAG_PRIVATE_PASID flag requests a PASID which is *not* the "main" - * PASID for the current process. Even if a PASID already exists, a new one - * will be allocated. And the PASID allocated with SVM_FLAG_PRIVATE_PASID - * will not be given to subsequent callers. This facility allows a driver to - * disambiguate between multiple device contexts which access the same MM, - * if there is no other way to do so. It should be used sparingly, if at all. - */ -#define SVM_FLAG_PRIVATE_PASID (1<<0) - -/* * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only * for access to kernel addresses. No IOTLB flushes are automatically done * for kernel mappings; it is valid only for access to the kernel's static @@ -42,18 +25,18 @@ struct svm_dev_ops { * It is unlikely that we will ever hook into flush_tlb_kernel_range() to * do such IOTLB flushes automatically. */ -#define SVM_FLAG_SUPERVISOR_MODE (1<<1) +#define SVM_FLAG_SUPERVISOR_MODE BIT(0) /* * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest * processes. Compared to the host bind, the primary differences are: * 1. mm life cycle management * 2. fault reporting */ -#define SVM_FLAG_GUEST_MODE (1<<2) +#define SVM_FLAG_GUEST_MODE BIT(1) /* * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space, * which requires guest and host PASID translation at both directions. */ -#define SVM_FLAG_GUEST_PASID (1<<3) +#define SVM_FLAG_GUEST_PASID BIT(2) #endif /* __INTEL_SVM_H__ */ diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index a4c9ca2c31f1..4d40dfa75b55 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -204,10 +204,6 @@ struct io_pgtable { #define io_pgtable_ops_to_pgtable(x) container_of((x), struct io_pgtable, ops) -struct io_pgtable_domain_attr { - unsigned long quirks; -}; - static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop) { if (iop->cfg.tlb && iop->cfg.tlb->tlb_flush_all) diff --git a/include/linux/iomap.h b/include/linux/iomap.h index d202fd2d0f91..c87d0cb0de6d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -198,7 +198,6 @@ struct iomap_ioend { struct inode *io_inode; /* file being written to */ size_t io_size; /* size of the extent */ loff_t io_offset; /* offset in the file */ - void *io_private; /* file system private data */ struct bio *io_bio; /* bio being built */ struct bio io_inline_bio; /* MUST BE LAST! */ }; @@ -234,9 +233,7 @@ struct iomap_writepage_ctx { void iomap_finish_ioends(struct iomap_ioend *ioend, int error); void iomap_ioend_try_merge(struct iomap_ioend *ioend, - struct list_head *more_ioends, - void (*merge_private)(struct iomap_ioend *ioend, - struct iomap_ioend *next)); + struct list_head *more_ioends); void iomap_sort_ioends(struct list_head *ioend_list); int iomap_writepage(struct page *page, struct writeback_control *wbc, struct iomap_writepage_ctx *wpc, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5e7fe519430a..32d448050bf7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -96,32 +96,6 @@ enum iommu_cap { IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */ }; -/* - * Following constraints are specifc to FSL_PAMUV1: - * -aperture must be power of 2, and naturally aligned - * -number of windows must be power of 2, and address space size - * of each window is determined by aperture size / # of windows - * -the actual size of the mapped region of a window must be power - * of 2 starting with 4KB and physical address must be naturally - * aligned. - * DOMAIN_ATTR_FSL_PAMUV1 corresponds to the above mentioned contraints. - * The caller can invoke iommu_domain_get_attr to check if the underlying - * iommu implementation supports these constraints. - */ - -enum iommu_attr { - DOMAIN_ATTR_GEOMETRY, - DOMAIN_ATTR_PAGING, - DOMAIN_ATTR_WINDOWS, - DOMAIN_ATTR_FSL_PAMU_STASH, - DOMAIN_ATTR_FSL_PAMU_ENABLE, - DOMAIN_ATTR_FSL_PAMUV1, - DOMAIN_ATTR_NESTING, /* two stages of translation */ - DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, - DOMAIN_ATTR_IO_PGTABLE_CFG, - DOMAIN_ATTR_MAX, -}; - /* These are the possible reserved region types */ enum iommu_resv_type { /* Memory regions which must be mapped 1:1 at all times */ @@ -156,10 +130,24 @@ struct iommu_resv_region { enum iommu_resv_type type; }; -/* Per device IOMMU features */ +/** + * enum iommu_dev_features - Per device IOMMU features + * @IOMMU_DEV_FEAT_AUX: Auxiliary domain feature + * @IOMMU_DEV_FEAT_SVA: Shared Virtual Addresses + * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall. Generally + * enabling %IOMMU_DEV_FEAT_SVA requires + * %IOMMU_DEV_FEAT_IOPF, but some devices manage I/O Page + * Faults themselves instead of relying on the IOMMU. When + * supported, this feature must be enabled before and + * disabled after %IOMMU_DEV_FEAT_SVA. + * + * Device drivers query whether a feature is supported using + * iommu_dev_has_feature(), and enable it using iommu_dev_enable_feature(). + */ enum iommu_dev_features { - IOMMU_DEV_FEAT_AUX, /* Aux-domain feature */ - IOMMU_DEV_FEAT_SVA, /* Shared Virtual Addresses */ + IOMMU_DEV_FEAT_AUX, + IOMMU_DEV_FEAT_SVA, + IOMMU_DEV_FEAT_IOPF, }; #define IOMMU_PASID_INVALID (-1U) @@ -203,13 +191,11 @@ struct iommu_iotlb_gather { * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain * @device_group: find iommu group for a particular device - * @domain_get_attr: Query domain attributes - * @domain_set_attr: Change domain attributes + * @enable_nesting: Enable nesting + * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) * @get_resv_regions: Request list of reserved regions for a device * @put_resv_regions: Free list of reserved regions for a device * @apply_resv_region: Temporary helper call-back for iova reserved ranges - * @domain_window_enable: Configure and enable a particular window for a domain - * @domain_window_disable: Disable a particular window for a domain * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) @@ -255,10 +241,9 @@ struct iommu_ops { void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); - int (*domain_get_attr)(struct iommu_domain *domain, - enum iommu_attr attr, void *data); - int (*domain_set_attr)(struct iommu_domain *domain, - enum iommu_attr attr, void *data); + int (*enable_nesting)(struct iommu_domain *domain); + int (*set_pgtable_quirks)(struct iommu_domain *domain, + unsigned long quirks); /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); @@ -267,11 +252,6 @@ struct iommu_ops { struct iommu_domain *domain, struct iommu_resv_region *region); - /* Window handling functions */ - int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr, - phys_addr_t paddr, u64 size, int prot); - void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr); - int (*of_xlate)(struct device *dev, struct of_phandle_args *args); bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); @@ -353,6 +333,7 @@ struct iommu_fault_param { * struct dev_iommu - Collection of per-device IOMMU data * * @fault_param: IOMMU detected device fault reporting data + * @iopf_param: I/O Page Fault queue and data * @fwspec: IOMMU fwspec data * @iommu_dev: IOMMU device this device is linked to * @priv: IOMMU Driver private data @@ -363,12 +344,15 @@ struct iommu_fault_param { struct dev_iommu { struct mutex lock; struct iommu_fault_param *fault_param; + struct iopf_device_param *iopf_param; struct iommu_fwspec *fwspec; struct iommu_device *iommu_dev; void *priv; }; -int iommu_device_register(struct iommu_device *iommu); +int iommu_device_register(struct iommu_device *iommu, + const struct iommu_ops *ops, + struct device *hwdev); void iommu_device_unregister(struct iommu_device *iommu); int iommu_device_sysfs_add(struct iommu_device *iommu, struct device *parent, @@ -379,25 +363,6 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link); void iommu_device_unlink(struct iommu_device *iommu, struct device *link); int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain); -static inline void __iommu_device_set_ops(struct iommu_device *iommu, - const struct iommu_ops *ops) -{ - iommu->ops = ops; -} - -#define iommu_device_set_ops(iommu, ops) \ -do { \ - struct iommu_ops *__ops = (struct iommu_ops *)(ops); \ - __ops->owner = THIS_MODULE; \ - __iommu_device_set_ops(iommu, __ops); \ -} while (0) - -static inline void iommu_device_set_fwnode(struct iommu_device *iommu, - struct fwnode_handle *fwnode) -{ - iommu->fwnode = fwnode; -} - static inline struct iommu_device *dev_to_iommu_device(struct device *dev) { return (struct iommu_device *)dev_get_drvdata(dev); @@ -507,15 +472,12 @@ extern int iommu_page_response(struct device *dev, extern int iommu_group_id(struct iommu_group *group); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); -extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr, - void *data); -extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, - void *data); +int iommu_enable_nesting(struct iommu_domain *domain); +int iommu_set_pgtable_quirks(struct iommu_domain *domain, + unsigned long quirks); -/* Window handling function prototypes */ -extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, - phys_addr_t offset, u64 size, - int prot); +void iommu_set_dma_strict(bool val); +bool iommu_get_dma_strict(struct iommu_domain *domain); extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags); @@ -547,7 +509,7 @@ static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, * structure can be rewritten. */ if (gather->pgsize != size || - end < gather->start || start > gather->end) { + end + 1 < gather->start || start > gather->end + 1) { if (gather->pgsize) iommu_iotlb_sync(domain, gather); gather->pgsize = size; @@ -571,8 +533,7 @@ struct iommu_group *fsl_mc_device_group(struct device *dev); * struct iommu_fwspec - per-device IOMMU instance data * @ops: ops for this device's IOMMU * @iommu_fwnode: firmware handle for this device's IOMMU - * @iommu_priv: IOMMU driver private data for this device - * @num_pasid_bits: number of PASID bits supported by this device + * @flags: IOMMU_FWSPEC_* flags * @num_ids: number of associated device IDs * @ids: IDs which this device may present to the IOMMU */ @@ -580,7 +541,6 @@ struct iommu_fwspec { const struct iommu_ops *ops; struct fwnode_handle *iommu_fwnode; u32 flags; - u32 num_pasid_bits; unsigned int num_ids; u32 ids[]; }; @@ -742,13 +702,6 @@ static inline void iommu_iotlb_sync(struct iommu_domain *domain, { } -static inline int iommu_domain_window_enable(struct iommu_domain *domain, - u32 wnd_nr, phys_addr_t paddr, - u64 size, int prot) -{ - return -ENODEV; -} - static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { return 0; @@ -889,33 +842,19 @@ static inline int iommu_group_id(struct iommu_group *group) return -ENODEV; } -static inline int iommu_domain_get_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) -{ - return -EINVAL; -} - -static inline int iommu_domain_set_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) +static inline int iommu_set_pgtable_quirks(struct iommu_domain *domain, + unsigned long quirks) { - return -EINVAL; + return 0; } -static inline int iommu_device_register(struct iommu_device *iommu) +static inline int iommu_device_register(struct iommu_device *iommu, + const struct iommu_ops *ops, + struct device *hwdev) { return -ENODEV; } -static inline void iommu_device_set_ops(struct iommu_device *iommu, - const struct iommu_ops *ops) -{ -} - -static inline void iommu_device_set_fwnode(struct iommu_device *iommu, - struct fwnode_handle *fwnode) -{ -} - static inline struct iommu_device *dev_to_iommu_device(struct device *dev) { return NULL; diff --git a/include/linux/iova.h b/include/linux/iova.h index c834c01c0a5b..71d8a2de6635 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -95,6 +95,7 @@ struct iova_domain { flush-queues */ atomic_t fq_timer_on; /* 1 when timer is active, 0 when not */ + struct hlist_node cpuhp_dead; }; static inline unsigned long iova_size(struct iova *iova) @@ -156,7 +157,6 @@ int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); -void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); #else static inline int iova_cache_get(void) { @@ -233,10 +233,6 @@ static inline void put_iova_domain(struct iova_domain *iovad) { } -static inline void free_cpu_cached_iovas(unsigned int cpu, - struct iova_domain *iovad) -{ -} #endif #endif diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 7a1dd7b969b6..62a8e3d23829 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -256,11 +256,11 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, irq_hw_number_t hwirq_max, int direct_max, const struct irq_domain_ops *ops, void *host_data); -struct irq_domain *irq_domain_add_simple(struct device_node *of_node, - unsigned int size, - unsigned int first_irq, - const struct irq_domain_ops *ops, - void *host_data); +struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, + unsigned int size, + unsigned int first_irq, + const struct irq_domain_ops *ops, + void *host_data); struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, unsigned int size, unsigned int first_irq, @@ -325,6 +325,15 @@ static inline struct irq_domain *irq_find_host(struct device_node *node) return d; } +static inline struct irq_domain *irq_domain_add_simple(struct device_node *of_node, + unsigned int size, + unsigned int first_irq, + const struct irq_domain_ops *ops, + void *host_data) +{ + return irq_domain_create_simple(of_node_to_fwnode(of_node), size, first_irq, ops, host_data); +} + /** * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain. * @of_node: pointer to interrupt controller's device tree node. diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2f9d15410c93..15d8bad3d2f2 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -3,6 +3,7 @@ #define _LINUX_KERNEL_H #include <stdarg.h> +#include <linux/align.h> #include <linux/limits.h> #include <linux/linkage.h> #include <linux/stddef.h> @@ -30,14 +31,6 @@ */ #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) -/* @a is a power of 2 value */ -#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) -#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) -#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) -#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) -#define PTR_ALIGN_DOWN(p, a) ((typeof(p))ALIGN_DOWN((unsigned long)(p), (a))) -#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) - /* generic data direction definitions */ #define READ 0 #define WRITE 1 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1b65e7204344..8895b95b6a22 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -192,8 +192,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, void *val); int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev); -void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_io_device *dev); +int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_io_device *dev); struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr); @@ -218,6 +218,20 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif +#ifdef KVM_ARCH_WANT_MMU_NOTIFIER +struct kvm_gfn_range { + struct kvm_memory_slot *slot; + gfn_t start; + gfn_t end; + pte_t pte; + bool may_block; +}; +bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); +bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); +bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); +bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range); +#endif + enum { OUTSIDE_GUEST_MODE, IN_GUEST_MODE, @@ -640,6 +654,7 @@ void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); void kvm_put_kvm(struct kvm *kvm); +bool file_is_kvm(struct file *file); void kvm_put_kvm_no_destroy(struct kvm *kvm); static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) @@ -886,7 +901,7 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot); #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - struct kvm_memory_slot *memslot); + const struct kvm_memory_slot *memslot); #else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, @@ -945,6 +960,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); +bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_post_init_vm(struct kvm *kvm); void kvm_arch_pre_destroy_vm(struct kvm *kvm); @@ -1116,7 +1132,7 @@ __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn) } static inline unsigned long -__gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) +__gfn_to_hva_memslot(const struct kvm_memory_slot *slot, gfn_t gfn) { return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; } diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 61f04f7dc1a4..04c01794de83 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -59,6 +59,7 @@ LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc, struct fs_parameter *param) LSM_HOOK(int, 0, sb_alloc_security, struct super_block *sb) +LSM_HOOK(void, LSM_RET_VOID, sb_delete, struct super_block *sb) LSM_HOOK(void, LSM_RET_VOID, sb_free_security, struct super_block *sb) LSM_HOOK(void, LSM_RET_VOID, sb_free_mnt_opts, void *mnt_opts) LSM_HOOK(int, 0, sb_eat_lsm_opts, char *orig, void **mnt_opts) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index ba2ccd950833..5c4c5c0602cb 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -108,6 +108,9 @@ * allocated. * @sb contains the super_block structure to be modified. * Return 0 if operation was successful. + * @sb_delete: + * Release objects tied to a superblock (e.g. inodes). + * @sb contains the super_block structure being released. * @sb_free_security: * Deallocate and clear the sb->s_security field. * @sb contains the super_block structure to be modified. @@ -1585,6 +1588,7 @@ struct lsm_blob_sizes { int lbs_cred; int lbs_file; int lbs_inode; + int lbs_superblock; int lbs_ipc; int lbs_msg_msg; int lbs_task; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5904716f29ba..c193be760709 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -114,12 +114,13 @@ struct batched_lruvec_stat { }; /* - * Bitmap of shrinker::id corresponding to memcg-aware shrinkers, - * which have elements charged to this memcg. + * Bitmap and deferred work of shrinker::id corresponding to memcg-aware + * shrinkers, which have elements charged to this memcg. */ -struct memcg_shrinker_map { +struct shrinker_info { struct rcu_head rcu; - unsigned long map[]; + atomic_long_t *nr_deferred; + unsigned long *map; }; /* @@ -145,7 +146,7 @@ struct mem_cgroup_per_node { struct mem_cgroup_reclaim_iter iter; - struct memcg_shrinker_map __rcu *shrinker_map; + struct shrinker_info __rcu *shrinker_info; struct rb_node tree_node; /* RB tree node */ unsigned long usage_in_excess;/* Set to the value by which */ @@ -1610,10 +1611,10 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) return false; } -extern int memcg_expand_shrinker_maps(int new_id); - -extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg, - int nid, int shrinker_id); +int alloc_shrinker_info(struct mem_cgroup *memcg); +void free_shrinker_info(struct mem_cgroup *memcg); +void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); +void reparent_shrinker_deferred(struct mem_cgroup *memcg); #else #define mem_cgroup_sockets_enabled 0 static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; @@ -1623,8 +1624,8 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) return false; } -static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg, - int nid, int shrinker_id) +static inline void set_shrinker_bit(struct mem_cgroup *memcg, + int nid, int shrinker_id) { } #endif diff --git a/include/linux/memory.h b/include/linux/memory.h index 4da95e684e20..97e92e8b556a 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -29,6 +29,11 @@ struct memory_block { int online_type; /* for passing data to online routine */ int nid; /* NID for this memory block */ struct device dev; + /* + * Number of vmemmap pages. These pages + * lay at the beginning of the memory block. + */ + unsigned long nr_vmemmap_pages; }; int arch_get_memory_phys_device(unsigned long start_pfn); @@ -80,7 +85,8 @@ static inline int memory_notify(unsigned long val, void *v) #else extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); -int create_memory_block_devices(unsigned long start, unsigned long size); +int create_memory_block_devices(unsigned long start, unsigned long size, + unsigned long vmemmap_pages); void remove_memory_block_devices(unsigned long start, unsigned long size); extern void memory_dev_init(void); extern int memory_notify(unsigned long val, void *v); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 7288aa5ef73b..28f32fd00fe9 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -56,6 +56,14 @@ typedef int __bitwise mhp_t; #define MHP_MERGE_RESOURCE ((__force mhp_t)BIT(0)) /* + * We want memmap (struct page array) to be self contained. + * To do so, we will use the beginning of the hot-added range to build + * the page tables for the memmap array that describes the entire range. + * Only selected architectures support it with SPARSE_VMEMMAP. + */ +#define MHP_MEMMAP_ON_MEMORY ((__force mhp_t)BIT(1)) + +/* * Extended parameters for memory hotplug: * altmap: alternative allocator for memmap array (optional) * pgprot: page protection flags to apply to newly created page tables @@ -99,9 +107,13 @@ static inline void zone_seqlock_init(struct zone *zone) extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages); extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages); extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); +extern void adjust_present_page_count(struct zone *zone, long nr_pages); /* VM interface that may be used by firmware interface */ +extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, + struct zone *zone); +extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages); extern int online_pages(unsigned long pfn, unsigned long nr_pages, - int online_type, int nid); + struct zone *zone); extern struct zone *test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn); extern void __offline_isolated_pages(unsigned long start_pfn, @@ -359,6 +371,7 @@ extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_ extern int arch_create_linear_mapping(int nid, u64 start, u64 size, struct mhp_params *params); void arch_remove_linear_mapping(u64 start, u64 size); +extern bool mhp_supports_memmap_on_memory(unsigned long size); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/linux/memremap.h b/include/linux/memremap.h index f5b464daeeca..45a79da89c5f 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -17,7 +17,7 @@ struct device; * @alloc: track pages consumed, private to vmemmap_populate() */ struct vmem_altmap { - const unsigned long base_pfn; + unsigned long base_pfn; const unsigned long end_pfn; const unsigned long reserve; unsigned long free; diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 3a389633b68f..4bb4e519e3f5 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -27,6 +27,7 @@ enum migrate_reason { MR_MEMPOLICY_MBIND, MR_NUMA_MISPLACED, MR_CONTIG_RANGE, + MR_LONGTERM_PIN, MR_TYPES }; @@ -43,10 +44,7 @@ extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason); extern struct page *alloc_migration_target(struct page *page, unsigned long private); extern int isolate_movable_page(struct page *page, isolate_mode_t mode); -extern void putback_movable_page(struct page *page); -extern void migrate_prep(void); -extern void migrate_prep_local(void); extern void migrate_page_states(struct page *newpage, struct page *page); extern void migrate_page_copy(struct page *newpage, struct page *page); extern int migrate_huge_page_move_mapping(struct address_space *mapping, @@ -66,9 +64,6 @@ static inline struct page *alloc_migration_target(struct page *page, static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) { return -EBUSY; } -static inline int migrate_prep(void) { return -ENOSYS; } -static inline int migrate_prep_local(void) { return -ENOSYS; } - static inline void migrate_page_states(struct page *newpage, struct page *page) { } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4e531c2aab52..f8e8d7e90616 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1236,7 +1236,7 @@ enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; -static inline bool mlx5_is_roce_enabled(struct mlx5_core_dev *dev) +static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev) { struct devlink *devlink = priv_to_devlink(dev); union devlink_param_value val; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 52b7cabcde08..6d16eed6850e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -133,6 +133,7 @@ enum { MLX5_CMD_OP_PAGE_FAULT_RESUME = 0x204, MLX5_CMD_OP_ALLOC_MEMIC = 0x205, MLX5_CMD_OP_DEALLOC_MEMIC = 0x206, + MLX5_CMD_OP_MODIFY_MEMIC = 0x207, MLX5_CMD_OP_CREATE_EQ = 0x301, MLX5_CMD_OP_DESTROY_EQ = 0x302, MLX5_CMD_OP_QUERY_EQ = 0x303, @@ -1031,7 +1032,11 @@ struct mlx5_ifc_device_mem_cap_bits { u8 header_modify_sw_icm_start_address[0x40]; - u8 reserved_at_180[0x680]; + u8 reserved_at_180[0x80]; + + u8 memic_operations[0x20]; + + u8 reserved_at_220[0x5e0]; }; struct mlx5_ifc_device_event_cap_bits { @@ -10498,6 +10503,41 @@ struct mlx5_ifc_destroy_vport_lag_in_bits { u8 reserved_at_40[0x40]; }; +enum { + MLX5_MODIFY_MEMIC_OP_MOD_ALLOC, + MLX5_MODIFY_MEMIC_OP_MOD_DEALLOC, +}; + +struct mlx5_ifc_modify_memic_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x18]; + u8 memic_operation_type[0x8]; + + u8 memic_start_addr[0x40]; + + u8 reserved_at_c0[0x140]; +}; + +struct mlx5_ifc_modify_memic_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + u8 memic_operation_addr[0x40]; + + u8 reserved_at_c0[0x140]; +}; + struct mlx5_ifc_alloc_memic_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; diff --git a/include/linux/mm.h b/include/linux/mm.h index 011f43605807..322ec61d0da7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -106,7 +106,7 @@ extern int mmap_rnd_compat_bits __read_mostly; * embedding these tags into addresses that point to these memory regions, and * checking that the memory and the pointer tags match on memory accesses) * redefine this macro to strip tags from pointers. - * It's defined as noop for arcitectures that don't support memory tagging. + * It's defined as noop for architectures that don't support memory tagging. */ #ifndef untagged_addr #define untagged_addr(addr) (addr) @@ -372,6 +372,13 @@ extern unsigned int kobjsize(const void *objp); # define VM_GROWSUP VM_NONE #endif +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR +# define VM_UFFD_MINOR_BIT 37 +# define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */ +#else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ +# define VM_UFFD_MINOR VM_NONE +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ + /* Bits set in the VMA until the stack is in its final location */ #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ) @@ -1134,6 +1141,11 @@ static inline bool is_zone_device_page(const struct page *page) } #endif +static inline bool is_zone_movable_page(const struct page *page) +{ + return page_zonenum(page) == ZONE_MOVABLE; +} + #ifdef CONFIG_DEV_PAGEMAP_OPS void free_devmap_managed_page(struct page *page); DECLARE_STATIC_KEY_FALSE(devmap_managed_key); @@ -1543,6 +1555,20 @@ static inline unsigned long page_to_section(const struct page *page) } #endif +/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */ +#ifdef CONFIG_MIGRATION +static inline bool is_pinnable_page(struct page *page) +{ + return !(is_zone_movable_page(page) || is_migrate_cma_page(page)) || + is_zero_pfn(page_to_pfn(page)); +} +#else +static inline bool is_pinnable_page(struct page *page) +{ + return true; +} +#endif + static inline void set_page_zone(struct page *page, enum zone_type zone) { page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3b2205741048..0d53eba1c383 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -55,7 +55,7 @@ enum migratetype { * pageblocks to MIGRATE_CMA which can be done by * __free_pageblock_cma() function. What is important though * is that a range of pageblocks must be aligned to - * MAX_ORDER_NR_PAGES should biggest page be bigger then + * MAX_ORDER_NR_PAGES should biggest page be bigger than * a single pageblock. */ MIGRATE_CMA, @@ -407,8 +407,13 @@ enum zone_type { * to increase the number of THP/huge pages. Notable special cases are: * * 1. Pinned pages: (long-term) pinning of movable pages might - * essentially turn such pages unmovable. Memory offlining might - * retry a long time. + * essentially turn such pages unmovable. Therefore, we do not allow + * pinning long-term pages in ZONE_MOVABLE. When pages are pinned and + * faulted, they come from the right zone right away. However, it is + * still possible that address space already has pages in + * ZONE_MOVABLE at the time when pages are pinned (i.e. user has + * touches that memory before pinning). In such case we migrate them + * to a different zone. When migration fails - pinning fails. * 2. memblock allocations: kernelcore/movablecore setups might create * situations where ZONE_MOVABLE contains unmovable allocations * after boot. Memory offlining and allocations fail early. @@ -427,6 +432,15 @@ enum zone_type { * techniques might use alloc_contig_range() to hide previously * exposed pages from the buddy again (e.g., to implement some sort * of memory unplug in virtio-mem). + * 6. ZERO_PAGE(0), kernelcore/movablecore setups might create + * situations where ZERO_PAGE(0) which is allocated differently + * on different platforms may end up in a movable zone. ZERO_PAGE(0) + * cannot be migrated. + * 7. Memory-hotplug: when using memmap_on_memory and onlining the + * memory to the MOVABLE zone, the vmemmap pages are also placed in + * such zone. Such pages cannot be really moved around as they are + * self-stored in the range, but they are treated as movable when + * the range they describe is about to be offlined. * * In general, no unmovable allocations that degrade memory offlining * should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range()) @@ -1383,10 +1397,8 @@ static inline int online_section_nr(unsigned long nr) #ifdef CONFIG_MEMORY_HOTPLUG void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn); -#ifdef CONFIG_MEMORY_HOTREMOVE void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn); #endif -#endif static inline struct mem_section *__pfn_to_section(unsigned long pfn) { diff --git a/include/linux/msi.h b/include/linux/msi.h index aef35fd1cf11..6aff469e511d 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -240,8 +240,7 @@ void pci_msi_unmask_irq(struct irq_data *data); /* * The arch hooks to setup up msi irqs. Default functions are implemented * as weak symbols so that they /can/ be overriden by architecture specific - * code if needed. These hooks must be enabled by the architecture or by - * drivers which depend on them via msi_controller based MSI handling. + * code if needed. These hooks can only be enabled by the architecture. * * If CONFIG_PCI_MSI_ARCH_FALLBACKS is not selected they are replaced by * stubs with warnings. @@ -251,7 +250,6 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc); void arch_teardown_msi_irq(unsigned int irq); int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void arch_teardown_msi_irqs(struct pci_dev *dev); -void default_teardown_msi_irqs(struct pci_dev *dev); #else static inline int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { @@ -272,19 +270,6 @@ static inline void arch_teardown_msi_irqs(struct pci_dev *dev) void arch_restore_msi_irqs(struct pci_dev *dev); void default_restore_msi_irqs(struct pci_dev *dev); -struct msi_controller { - struct module *owner; - struct device *dev; - struct device_node *of_node; - struct list_head list; - - int (*setup_irq)(struct msi_controller *chip, struct pci_dev *dev, - struct msi_desc *desc); - int (*setup_irqs)(struct msi_controller *chip, struct pci_dev *dev, - int nvec, int type); - void (*teardown_irq)(struct msi_controller *chip, unsigned int irq); -}; - #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN #include <linux/irqhandler.h> diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 2aab9612f6ab..4f9a4b3c5892 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -53,8 +53,7 @@ int arpt_register_table(struct net *net, const struct xt_table *table, const struct arpt_replace *repl, const struct nf_hook_ops *ops); void arpt_unregister_table(struct net *net, const char *name); -void arpt_unregister_table_pre_exit(struct net *net, const char *name, - const struct nf_hook_ops *ops); +void arpt_unregister_table_pre_exit(struct net *net, const char *name); extern unsigned int arpt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table); diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 5b4c67c91f56..15004c469807 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -452,6 +452,7 @@ enum lock_type4 { #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) #define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) +#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) #define FATTR4_WORD2_MODE_UMASK (1UL << 17) #define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18) @@ -709,6 +710,14 @@ struct nl4_server { } u; }; +enum nfs4_change_attr_type { + NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0, + NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1, + NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2, + NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3, + NFS4_CHANGE_TYPE_IS_UNDEFINED = 4, +}; + /* * Options for setxattr. These match the flags for setxattr(2). */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index eadaabd18dc7..ffba254d2098 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -246,11 +246,15 @@ struct nfs4_copy_state { BIT(13) /* Deferred cache invalidation */ #define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */ #define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */ +#define NFS_INO_INVALID_NLINK BIT(16) /* cached nlinks is invalid */ +#define NFS_INO_INVALID_MODE BIT(17) /* cached mode is invalid */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ | NFS_INO_INVALID_MTIME \ | NFS_INO_INVALID_SIZE \ + | NFS_INO_INVALID_NLINK \ + | NFS_INO_INVALID_MODE \ | NFS_INO_INVALID_OTHER) /* inode metadata is invalid */ /* @@ -386,7 +390,7 @@ extern void nfs_access_set_mask(struct nfs_access_entry *, u32); extern int nfs_permission(struct user_namespace *, struct inode *, int); extern int nfs_open(struct inode *, struct file *); extern int nfs_attribute_cache_expired(struct inode *inode); -extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); +extern int nfs_revalidate_inode(struct inode *inode, unsigned long flags); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_clear_invalid_mapping(struct address_space *mapping); extern bool nfs_mapping_need_revalidate_inode(struct inode *inode); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a28d71b45b5f..d71a0e90faeb 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -156,6 +156,7 @@ struct nfs_server { #define NFS_MOUNT_WRITE_EAGER 0x01000000 #define NFS_MOUNT_WRITE_WAIT 0x02000000 + unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ @@ -180,6 +181,9 @@ struct nfs_server { #define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */ #define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */ + enum nfs4_change_attr_type + change_attr_type;/* Description of change attribute */ + struct nfs_fsid fsid; __u64 maxfilesize; /* maximum file size */ struct timespec64 time_delta; /* smallest time granularity */ @@ -265,16 +269,7 @@ struct nfs_server { #define NFS_CAP_SYMLINKS (1U << 2) #define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ATOMIC_OPEN (1U << 4) -/* #define NFS_CAP_CHANGE_ATTR (1U << 5) */ #define NFS_CAP_LGOPEN (1U << 5) -#define NFS_CAP_FILEID (1U << 6) -#define NFS_CAP_MODE (1U << 7) -#define NFS_CAP_NLINK (1U << 8) -#define NFS_CAP_OWNER (1U << 9) -#define NFS_CAP_OWNER_GROUP (1U << 10) -#define NFS_CAP_ATIME (1U << 11) -#define NFS_CAP_CTIME (1U << 12) -#define NFS_CAP_MTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) #define NFS_CAP_STATEID_NFSV41 (1U << 16) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 3327239fa2f9..717ecc87c9e7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -15,6 +15,8 @@ #define NFS_DEF_FILE_IO_SIZE (4096U) #define NFS_MIN_FILE_IO_SIZE (1024U) +#define NFS_BITMASK_SZ 3 + struct nfs4_string { unsigned int len; char *data; @@ -150,6 +152,8 @@ struct nfs_fsinfo { __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ __u32 blksize; /* preferred pnfs io block size */ __u32 clone_blksize; /* granularity of a CLONE operation */ + enum nfs4_change_attr_type + change_attr_type; /* Info about change attr */ __u32 xattr_support; /* User xattrs supported */ }; @@ -525,7 +529,8 @@ struct nfs_closeargs { struct nfs_seqid * seqid; fmode_t fmode; u32 share_access; - u32 * bitmask; + const u32 * bitmask; + u32 bitmask_store[NFS_BITMASK_SZ]; struct nfs4_layoutreturn_args *lr_args; }; @@ -608,7 +613,8 @@ struct nfs4_delegreturnargs { struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; const nfs4_stateid *stateid; - u32 * bitmask; + const u32 *bitmask; + u32 bitmask_store[NFS_BITMASK_SZ]; struct nfs4_layoutreturn_args *lr_args; }; @@ -648,7 +654,8 @@ struct nfs_pgio_args { union { unsigned int replen; /* used by read */ struct { - u32 * bitmask; /* used by write */ + const u32 * bitmask; /* used by write */ + u32 bitmask_store[NFS_BITMASK_SZ]; /* used by write */ enum nfs3_stable_how stable; /* used by write */ }; }; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 469fa7ffcf96..a4bd41128bf3 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -18,6 +18,11 @@ struct pagevec; +static inline bool mapping_empty(struct address_space *mapping) +{ + return xa_empty(&mapping->i_pages); +} + /* * Bits in mapping->flags. */ diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 65d3d83015c3..fbdadd4d8377 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -85,6 +85,7 @@ extern const struct pci_ecam_ops pci_thunder_ecam_ops; /* Cavium ThunderX 1.x */ extern const struct pci_ecam_ops xgene_v1_pcie_ecam_ops; /* APM X-Gene PCIe v1 */ extern const struct pci_ecam_ops xgene_v2_pcie_ecam_ops; /* APM X-Gene PCIe v2.x */ extern const struct pci_ecam_ops al_pcie_ops; /* Amazon Annapurna Labs PCIe */ +extern const struct pci_ecam_ops tegra194_pcie_ops; /* Tegra194 PCIe */ #endif #if IS_ENABLED(CONFIG_PCI_HOST_COMMON) diff --git a/include/linux/pci.h b/include/linux/pci.h index 0fa104ee13d9..c20211e59a57 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -458,7 +458,6 @@ struct pci_dev { u32 saved_config_space[16]; /* Config space saved at suspend time */ struct hlist_head saved_cap_space; - struct bin_attribute *rom_attr; /* Attribute descriptor for sysfs ROM entry */ int rom_attr_enabled; /* Display of ROM attribute enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */ @@ -540,7 +539,6 @@ struct pci_host_bridge { int (*map_irq)(const struct pci_dev *, u8, u8); void (*release_fn)(struct pci_host_bridge *); void *release_data; - struct msi_controller *msi; unsigned int ignore_reset_delay:1; /* For entire hierarchy */ unsigned int no_ext_tags:1; /* No Extended Tags */ unsigned int native_aer:1; /* OS may use PCIe AER */ @@ -551,6 +549,7 @@ struct pci_host_bridge { unsigned int native_dpc:1; /* OS may use PCIe DPC */ unsigned int preserve_config:1; /* Preserve FW resource setup */ unsigned int size_windows:1; /* Enable root bus sizing */ + unsigned int msi_domain:1; /* Bridge wants MSI domain */ /* Resource alignment requirements */ resource_size_t (*align_resource)(struct pci_dev *dev, @@ -621,7 +620,6 @@ struct pci_bus { struct resource busn_res; /* Bus numbers routed to this bus */ struct pci_ops *ops; /* Configuration access functions */ - struct msi_controller *msi; /* MSI controller */ void *sysdata; /* Hook for sys-specific extension */ struct proc_dir_entry *procdir; /* Directory entry in /proc/bus/pci */ @@ -1085,6 +1083,7 @@ u8 pci_find_next_ht_capability(struct pci_dev *dev, u8 pos, int ht_cap); u16 pci_find_ext_capability(struct pci_dev *dev, int cap); u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 pos, int cap); struct pci_bus *pci_find_next_bus(const struct pci_bus *from); +u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap); u64 pci_get_dsn(struct pci_dev *dev); @@ -1209,6 +1208,7 @@ int __must_check pci_set_mwi(struct pci_dev *dev); int __must_check pcim_set_mwi(struct pci_dev *dev); int pci_try_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); +void pci_disable_parity(struct pci_dev *dev); void pci_intx(struct pci_dev *dev, int enable); bool pci_check_and_mask_intx(struct pci_dev *dev); bool pci_check_and_unmask_intx(struct pci_dev *dev); @@ -1310,7 +1310,6 @@ void pci_unlock_rescan_remove(void); /* Vital Product Data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); -int pci_set_vpd_size(struct pci_dev *dev, size_t len); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx); @@ -2319,14 +2318,13 @@ static inline u8 pci_vpd_info_field_size(const u8 *info_field) /** * pci_vpd_find_tag - Locates the Resource Data Type tag provided * @buf: Pointer to buffered vpd data - * @off: The offset into the buffer at which to begin the search * @len: The length of the vpd buffer * @rdt: The Resource Data Type to search for * * Returns the index where the Resource Data Type was found or * -ENOENT otherwise. */ -int pci_vpd_find_tag(const u8 *buf, unsigned int off, unsigned int len, u8 rdt); +int pci_vpd_find_tag(const u8 *buf, unsigned int len, u8 rdt); /** * pci_vpd_find_info_keyword - Locates an information field keyword in the VPD diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a763928a0e41..f5a6a2f069ed 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -954,8 +954,6 @@ extern void perf_event_itrace_started(struct perf_event *event); extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); extern void perf_pmu_unregister(struct pmu *pmu); -extern int perf_num_counters(void); -extern const char *perf_pmu_name(void); extern void __perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task); extern void __perf_event_task_sched_out(struct task_struct *prev, diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 5e772392a379..46b13780c2c8 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -426,7 +426,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres /* * On some architectures hardware does not set page access bit when accessing - * memory page, it is responsibilty of software setting this bit. It brings + * memory page, it is responsibility of software setting this bit. It brings * out extra page fault penalty to track page access bit. For optimization page * access bit can be set during all page fault flow on these arches. * To be differentiate with macro pte_mkyoung, this macro is used on platforms @@ -519,7 +519,7 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); /* * This is an implementation of pmdp_establish() that is only suitable for an * architecture that doesn't have hardware dirty/accessed bits. In this case we - * can't race with CPU which sets these bits and non-atomic aproach is fine. + * can't race with CPU which sets these bits and non-atomic approach is fine. */ static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) @@ -852,7 +852,7 @@ static inline void __ptep_modify_prot_commit(struct vm_area_struct *vma, * updates, but to prevent any updates it may make from being lost. * * This does not protect against other software modifications of the - * pte; the appropriate pte lock must be held over the transation. + * pte; the appropriate pte lock must be held over the transaction. * * Note that this interface is intended to be batchable, meaning that * ptep_modify_prot_commit may not actually update the pte, but merely @@ -1111,6 +1111,7 @@ extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, extern void untrack_pfn_moved(struct vm_area_struct *vma); #endif +#ifdef CONFIG_MMU #ifdef __HAVE_COLOR_ZERO_PAGE static inline int is_zero_pfn(unsigned long pfn) { @@ -1134,6 +1135,17 @@ static inline unsigned long my_zero_pfn(unsigned long addr) return zero_pfn; } #endif +#else +static inline int is_zero_pfn(unsigned long pfn) +{ + return 0; +} + +static inline unsigned long my_zero_pfn(unsigned long addr) +{ + return 0; +} +#endif /* CONFIG_MMU */ #ifdef CONFIG_MMU @@ -1269,13 +1281,13 @@ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) * * The complete check uses is_pmd_migration_entry() in linux/swapops.h * But using that requires moving current function and pmd_trans_unstable() - * to linux/swapops.h to resovle dependency, which is too much code move. + * to linux/swapops.h to resolve dependency, which is too much code move. * * !pmd_present() is equivalent to is_pmd_migration_entry() currently, * because !pmd_present() pages can only be under migration not swapped * out. * - * pmd_none() is preseved for future condition checks on pmd migration + * pmd_none() is preserved for future condition checks on pmd migration * entries and not confusing with this function name, although it is * redundant with !pmd_present(). */ diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 6035d9a98fb8..45f53afc46e2 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -5679,6 +5679,7 @@ enum tcpc_cc_polarity { #define PD_STATUS_EVENT_SOP_DISC_DONE BIT(0) #define PD_STATUS_EVENT_SOP_PRIME_DISC_DONE BIT(1) +#define PD_STATUS_EVENT_HARD_RESET BIT(2) struct ec_params_typec_status { uint8_t port; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 000cc0533c33..069c7fd95396 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -32,6 +32,7 @@ struct proc_ops { ssize_t (*proc_read)(struct file *, char __user *, size_t, loff_t *); ssize_t (*proc_read_iter)(struct kiocb *, struct iov_iter *); ssize_t (*proc_write)(struct file *, const char __user *, size_t, loff_t *); + /* mandatory unless nonseekable_open() or equivalent is used */ loff_t (*proc_lseek)(struct file *, loff_t, int); int (*proc_release)(struct inode *, struct file *); __poll_t (*proc_poll)(struct file *, struct poll_table_struct *); diff --git a/include/linux/profile.h b/include/linux/profile.h index bad18ca43150..fd18ca96f557 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -15,7 +15,6 @@ #define KVM_PROFILING 4 struct proc_dir_entry; -struct pt_regs; struct notifier_block; #if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS) @@ -84,8 +83,6 @@ int task_handoff_unregister(struct notifier_block * n); int profile_event_register(enum profile_type, struct notifier_block * n); int profile_event_unregister(enum profile_type, struct notifier_block * n); -struct pt_regs; - #else #define prof_on 0 diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index b801ead1e2bb..d48a7192e881 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -73,6 +73,7 @@ enum sev_cmd { SEV_CMD_SEND_UPDATE_DATA = 0x041, SEV_CMD_SEND_UPDATE_VMSA = 0x042, SEV_CMD_SEND_FINISH = 0x043, + SEV_CMD_SEND_CANCEL = 0x044, /* Guest migration commands (incoming) */ SEV_CMD_RECEIVE_START = 0x050, @@ -326,11 +327,11 @@ struct sev_data_send_start { u64 pdh_cert_address; /* In */ u32 pdh_cert_len; /* In */ u32 reserved1; - u64 plat_cert_address; /* In */ - u32 plat_cert_len; /* In */ + u64 plat_certs_address; /* In */ + u32 plat_certs_len; /* In */ u32 reserved2; - u64 amd_cert_address; /* In */ - u32 amd_cert_len; /* In */ + u64 amd_certs_address; /* In */ + u32 amd_certs_len; /* In */ u32 reserved3; u64 session_address; /* In */ u32 session_len; /* In/Out */ @@ -393,6 +394,15 @@ struct sev_data_send_finish { } __packed; /** + * struct sev_data_send_cancel - SEND_CANCEL command parameters + * + * @handle: handle of the VM to process + */ +struct sev_data_send_cancel { + u32 handle; /* In */ +} __packed; + +/** * struct sev_data_receive_start - RECEIVE_START command parameters * * @handle: handle of the VM to perform receive operation diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h new file mode 100644 index 000000000000..f960a719f0d5 --- /dev/null +++ b/include/linux/ptp_kvm.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Virtual PTP 1588 clock for use with KVM guests + * + * Copyright (C) 2017 Red Hat Inc. + */ + +#ifndef _PTP_KVM_H_ +#define _PTP_KVM_H_ + +struct timespec64; +struct clocksource; + +int kvm_arch_ptp_init(void); +int kvm_arch_ptp_get_clock(struct timespec64 *ts); +int kvm_arch_ptp_get_crosststamp(u64 *cycle, + struct timespec64 *tspec, struct clocksource **cs); + +#endif /* _PTP_KVM_H_ */ diff --git a/include/linux/pwm.h b/include/linux/pwm.h index e4d84d4db293..5bb90af4997e 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -91,6 +91,11 @@ struct pwm_device { * pwm_get_state() - retrieve the current PWM state * @pwm: PWM device * @state: state to fill with the current PWM state + * + * The returned PWM state represents the state that was applied by a previous call to + * pwm_apply_state(). Drivers may have to slightly tweak that state before programming it to + * hardware. If pwm_apply_state() was never called, this returns either the current hardware + * state (if supported) or the default settings. */ static inline void pwm_get_state(const struct pwm_device *pwm, struct pwm_state *state) @@ -392,8 +397,6 @@ int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, int pwm_set_chip_data(struct pwm_device *pwm, void *data); void *pwm_get_chip_data(struct pwm_device *pwm); -int pwmchip_add_with_polarity(struct pwm_chip *chip, - enum pwm_polarity polarity); int pwmchip_add(struct pwm_chip *chip); int pwmchip_remove(struct pwm_chip *chip); struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index f28ee75d1005..8b795b544f75 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -315,6 +315,7 @@ struct rproc; /** * struct rproc_mem_entry - memory entry descriptor * @va: virtual address + * @is_iomem: io memory * @dma: dma address * @len: length, in bytes * @da: device address @@ -329,6 +330,7 @@ struct rproc; */ struct rproc_mem_entry { void *va; + bool is_iomem; dma_addr_t dma; size_t len; u32 da; @@ -361,6 +363,7 @@ enum rsc_handling_status { * @start: power on the device and boot it * @stop: power off the device * @attach: attach to a device that his already powered up + * @detach: detach from a device, leaving it powered up * @kick: kick a virtqueue (virtqueue id given as a parameter) * @da_to_va: optional platform hook to perform address translations * @parse_fw: parse firmware to extract information (e.g. resource table) @@ -368,7 +371,9 @@ enum rsc_handling_status { * RSC_HANDLED if resource was handled, RSC_IGNORED if not handled and a * negative value on error * @load_rsc_table: load resource table from firmware image - * @find_loaded_rsc_table: find the loaded resouce table + * @find_loaded_rsc_table: find the loaded resource table from firmware image + * @get_loaded_rsc_table: get resource table installed in memory + * by external entity * @load: load firmware to memory, where the remote processor * expects to find it * @sanity_check: sanity check the fw image @@ -383,13 +388,16 @@ struct rproc_ops { int (*start)(struct rproc *rproc); int (*stop)(struct rproc *rproc); int (*attach)(struct rproc *rproc); + int (*detach)(struct rproc *rproc); void (*kick)(struct rproc *rproc, int vqid); - void * (*da_to_va)(struct rproc *rproc, u64 da, size_t len); + void * (*da_to_va)(struct rproc *rproc, u64 da, size_t len, bool *is_iomem); int (*parse_fw)(struct rproc *rproc, const struct firmware *fw); int (*handle_rsc)(struct rproc *rproc, u32 rsc_type, void *rsc, int offset, int avail); struct resource_table *(*find_loaded_rsc_table)( struct rproc *rproc, const struct firmware *fw); + struct resource_table *(*get_loaded_rsc_table)( + struct rproc *rproc, size_t *size); int (*load)(struct rproc *rproc, const struct firmware *fw); int (*sanity_check)(struct rproc *rproc, const struct firmware *fw); u64 (*get_boot_addr)(struct rproc *rproc, const struct firmware *fw); @@ -405,6 +413,8 @@ struct rproc_ops { * @RPROC_RUNNING: device is up and running * @RPROC_CRASHED: device has crashed; need to start recovery * @RPROC_DELETED: device is deleted + * @RPROC_ATTACHED: device has been booted by another entity and the core + * has attached to it * @RPROC_DETACHED: device has been booted by another entity and waiting * for the core to attach to it * @RPROC_LAST: just keep this one at the end @@ -421,8 +431,9 @@ enum rproc_state { RPROC_RUNNING = 2, RPROC_CRASHED = 3, RPROC_DELETED = 4, - RPROC_DETACHED = 5, - RPROC_LAST = 6, + RPROC_ATTACHED = 5, + RPROC_DETACHED = 6, + RPROC_LAST = 7, }; /** @@ -505,11 +516,12 @@ struct rproc_dump_segment { * @recovery_disabled: flag that state if recovery was disabled * @max_notifyid: largest allocated notify id. * @table_ptr: pointer to the resource table in effect + * @clean_table: copy of the resource table without modifications. Used + * when a remote processor is attached or detached from the core * @cached_table: copy of the resource table * @table_sz: size of @cached_table * @has_iommu: flag to indicate if remote processor is behind an MMU * @auto_boot: flag to indicate if remote processor should be auto-started - * @autonomous: true if an external entity has booted the remote processor * @dump_segments: list of segments in the firmware * @nb_vdev: number of vdev currently handled by rproc * @char_dev: character device of the rproc @@ -542,11 +554,11 @@ struct rproc { bool recovery_disabled; int max_notifyid; struct resource_table *table_ptr; + struct resource_table *clean_table; struct resource_table *cached_table; size_t table_sz; bool has_iommu; bool auto_boot; - bool autonomous; struct list_head dump_segments; int nb_vdev; u8 elf_class; @@ -655,6 +667,7 @@ rproc_of_resm_mem_entry_init(struct device *dev, u32 of_resm_idx, size_t len, int rproc_boot(struct rproc *rproc); void rproc_shutdown(struct rproc *rproc); +int rproc_detach(struct rproc *rproc); int rproc_set_firmware(struct rproc *rproc, const char *fw_name); void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type); void rproc_coredump_using_sections(struct rproc *rproc); diff --git a/include/linux/reset.h b/include/linux/reset.h index 46e6372cb431..db0e6115a2f6 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -76,6 +76,11 @@ static inline int reset_control_reset(struct reset_control *rstc) return 0; } +static inline int reset_control_rearm(struct reset_control *rstc) +{ + return 0; +} + static inline int reset_control_assert(struct reset_control *rstc) { return 0; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 136ea0997e6d..dac53fd3afea 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -61,7 +61,8 @@ enum ring_buffer_type { unsigned ring_buffer_event_length(struct ring_buffer_event *event); void *ring_buffer_event_data(struct ring_buffer_event *event); -u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event); +u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer, + struct ring_buffer_event *event); /* * ring_buffer_discard_commit will remove an event that has not @@ -180,7 +181,7 @@ unsigned long ring_buffer_commit_overrun_cpu(struct trace_buffer *buffer, int cp unsigned long ring_buffer_dropped_events_cpu(struct trace_buffer *buffer, int cpu); unsigned long ring_buffer_read_events_cpu(struct trace_buffer *buffer, int cpu); -u64 ring_buffer_time_stamp(struct trace_buffer *buffer, int cpu); +u64 ring_buffer_time_stamp(struct trace_buffer *buffer); void ring_buffer_normalize_time_stamp(struct trace_buffer *buffer, int cpu, u64 *ts); void ring_buffer_set_clock(struct trace_buffer *buffer, diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index a5db828b2420..d97dcd049f18 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -18,8 +18,7 @@ #include <linux/mutex.h> #include <linux/poll.h> #include <linux/rpmsg/byteorder.h> - -#define RPMSG_ADDR_ANY 0xFFFFFFFF +#include <uapi/linux/rpmsg.h> struct rpmsg_device; struct rpmsg_endpoint; diff --git a/include/linux/sched.h b/include/linux/sched.h index 9c25c8e67030..d2c881384517 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1583,7 +1583,7 @@ extern struct pid *cad_pid; #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ -#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */ +#define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 90b2a0bce11c..e24b1fe348e3 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -151,12 +151,13 @@ static inline bool in_vfork(struct task_struct *tsk) * Applies per-task gfp context to the given allocation flags. * PF_MEMALLOC_NOIO implies GFP_NOIO * PF_MEMALLOC_NOFS implies GFP_NOFS + * PF_MEMALLOC_PIN implies !GFP_MOVABLE */ static inline gfp_t current_gfp_context(gfp_t flags) { unsigned int pflags = READ_ONCE(current->flags); - if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS))) { + if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) { /* * NOIO implies both NOIO and NOFS and it is a weaker context * so always make sure it makes precedence @@ -165,6 +166,9 @@ static inline gfp_t current_gfp_context(gfp_t flags) flags &= ~(__GFP_IO | __GFP_FS); else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; + + if (pflags & PF_MEMALLOC_PIN) + flags &= ~__GFP_MOVABLE; } return flags; } @@ -271,29 +275,18 @@ static inline void memalloc_noreclaim_restore(unsigned int flags) current->flags = (current->flags & ~PF_MEMALLOC) | flags; } -#ifdef CONFIG_CMA -static inline unsigned int memalloc_nocma_save(void) +static inline unsigned int memalloc_pin_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_NOCMA; + unsigned int flags = current->flags & PF_MEMALLOC_PIN; - current->flags |= PF_MEMALLOC_NOCMA; + current->flags |= PF_MEMALLOC_PIN; return flags; } -static inline void memalloc_nocma_restore(unsigned int flags) -{ - current->flags = (current->flags & ~PF_MEMALLOC_NOCMA) | flags; -} -#else -static inline unsigned int memalloc_nocma_save(void) -{ - return 0; -} - -static inline void memalloc_nocma_restore(unsigned int flags) +static inline void memalloc_pin_restore(unsigned int flags) { + current->flags = (current->flags & ~PF_MEMALLOC_PIN) | flags; } -#endif #ifdef CONFIG_MEMCG DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg); diff --git a/include/linux/security.h b/include/linux/security.h index 9aeda3f9e838..06f7c50ce77f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -291,6 +291,7 @@ void security_bprm_committed_creds(struct linux_binprm *bprm); int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc); int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param); int security_sb_alloc(struct super_block *sb); +void security_sb_delete(struct super_block *sb); void security_sb_free(struct super_block *sb); void security_free_mnt_opts(void **mnt_opts); int security_sb_eat_lsm_opts(char *options, void **mnt_opts); @@ -633,6 +634,9 @@ static inline int security_sb_alloc(struct super_block *sb) return 0; } +static inline void security_sb_delete(struct super_block *sb) +{ } + static inline void security_sb_free(struct super_block *sb) { } diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 9d6c28cc4d8f..5b31c5147969 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -72,6 +72,31 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) } /** + * seq_buf_terminate - Make sure buffer is nul terminated + * @s: the seq_buf descriptor to terminate. + * + * This makes sure that the buffer in @s is nul terminated and + * safe to read as a string. + * + * Note, if this is called when the buffer has overflowed, then + * the last byte of the buffer is zeroed, and the len will still + * point passed it. + * + * After this function is called, s->buffer is safe to use + * in string operations. + */ +static inline void seq_buf_terminate(struct seq_buf *s) +{ + if (WARN_ON(s->size == 0)) + return; + + if (seq_buf_buffer_left(s)) + s->buffer[s->len] = 0; + else + s->buffer[s->size - 1] = 0; +} + +/** * seq_buf_get_buf - get buffer to write arbitrary data to * @s: the seq_buf handle * @bufp: the beginning of the buffer is stored here diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 0f80123650e2..1eac79ce57d4 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -79,13 +79,14 @@ struct shrinker { #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ /* Flags */ -#define SHRINKER_NUMA_AWARE (1 << 0) -#define SHRINKER_MEMCG_AWARE (1 << 1) +#define SHRINKER_REGISTERED (1 << 0) +#define SHRINKER_NUMA_AWARE (1 << 1) +#define SHRINKER_MEMCG_AWARE (1 << 2) /* * It just makes sense when the shrinker is also MEMCG_AWARE for now, * non-MEMCG_AWARE shrinker should not have this flag set. */ -#define SHRINKER_NONSLAB (1 << 2) +#define SHRINKER_NONSLAB (1 << 3) extern int prealloc_shrinker(struct shrinker *shrinker); extern void register_shrinker_prepared(struct shrinker *shrinker); diff --git a/include/linux/smp.h b/include/linux/smp.h index 84a0b4828f66..669e35c03be2 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -56,6 +56,14 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, int smp_call_function_single_async(int cpu, call_single_data_t *csd); /* + * Cpus stopping functions in panic. All have default weak definitions. + * Architecture-dependent code may override them. + */ +void panic_smp_self_stop(void); +void nmi_panic_self_stop(struct pt_regs *regs); +void crash_smp_send_stop(void); + +/* * Call a function on all processors */ static inline void on_each_cpu(smp_call_func_t func, void *info, int wait) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d2e97ee802af..d81fe8b364d0 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -247,6 +247,7 @@ struct rpc_xprt { struct rpc_task * snd_task; /* Task blocked in send */ struct list_head xmit_queue; /* Send queue */ + atomic_long_t xmit_queuelen; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) diff --git a/include/linux/swap.h b/include/linux/swap.h index 4cc6ec3bf0ab..144727041e78 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -10,8 +10,10 @@ #include <linux/sched.h> #include <linux/node.h> #include <linux/fs.h> +#include <linux/pagemap.h> #include <linux/atomic.h> #include <linux/page-flags.h> +#include <uapi/linux/mempolicy.h> #include <asm/page.h> struct notifier_block; @@ -339,6 +341,20 @@ extern void lru_note_cost(struct lruvec *lruvec, bool file, extern void lru_note_cost_page(struct page *); extern void lru_cache_add(struct page *); extern void mark_page_accessed(struct page *); + +extern atomic_t lru_disable_count; + +static inline bool lru_cache_disabled(void) +{ + return atomic_read(&lru_disable_count); +} + +static inline void lru_cache_enable(void) +{ + atomic_dec(&lru_disable_count); +} + +extern void lru_cache_disable(void); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); @@ -378,6 +394,12 @@ extern int sysctl_min_slab_ratio; #define node_reclaim_mode 0 #endif +static inline bool node_reclaim_enabled(void) +{ + /* Is any node_reclaim_mode bit set? */ + return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP); +} + extern void check_move_unevictable_pages(struct pagevec *pvec); extern int kswapd_run(int nid); diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 5857a937c637..216854a5e513 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -6,6 +6,7 @@ #include <linux/init.h> #include <linux/types.h> #include <linux/limits.h> +#include <linux/spinlock.h> struct device; struct page; @@ -36,20 +37,11 @@ enum swiotlb_force { extern void swiotlb_init(int verbose); int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); -extern unsigned long swiotlb_nr_tbl(void); unsigned long swiotlb_size_or_default(void); extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); extern int swiotlb_late_init_with_default_size(size_t default_size); extern void __init swiotlb_update_mem_attributes(void); -/* - * Enumeration for sync targets - */ -enum dma_sync_target { - SYNC_FOR_CPU = 0, - SYNC_FOR_DEVICE = 1, -}; - phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, size_t mapping_size, size_t alloc_size, enum dma_data_direction dir, unsigned long attrs); @@ -57,32 +49,70 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, extern void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, size_t mapping_size, - size_t alloc_size, enum dma_data_direction dir, unsigned long attrs); -extern void swiotlb_tbl_sync_single(struct device *hwdev, - phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, - enum dma_sync_target target); - +void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir); +void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir); dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs); #ifdef CONFIG_SWIOTLB extern enum swiotlb_force swiotlb_force; -extern phys_addr_t io_tlb_start, io_tlb_end; + +/** + * struct io_tlb_mem - IO TLB Memory Pool Descriptor + * + * @start: The start address of the swiotlb memory pool. Used to do a quick + * range check to see if the memory was in fact allocated by this + * API. + * @end: The end address of the swiotlb memory pool. Used to do a quick + * range check to see if the memory was in fact allocated by this + * API. + * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and + * @end. This is command line adjustable via setup_io_tlb_npages. + * @used: The number of used IO TLB block. + * @list: The free list describing the number of free entries available + * from each index. + * @index: The index to start searching in the next round. + * @orig_addr: The original address corresponding to a mapped entry. + * @alloc_size: Size of the allocated buffer. + * @lock: The lock to protect the above data structures in the map and + * unmap calls. + * @debugfs: The dentry to debugfs. + * @late_alloc: %true if allocated using the page allocator + */ +struct io_tlb_mem { + phys_addr_t start; + phys_addr_t end; + unsigned long nslabs; + unsigned long used; + unsigned int index; + spinlock_t lock; + struct dentry *debugfs; + bool late_alloc; + struct io_tlb_slot { + phys_addr_t orig_addr; + size_t alloc_size; + unsigned int list; + } slots[]; +}; +extern struct io_tlb_mem *io_tlb_default_mem; static inline bool is_swiotlb_buffer(phys_addr_t paddr) { - return paddr >= io_tlb_start && paddr < io_tlb_end; + struct io_tlb_mem *mem = io_tlb_default_mem; + + return mem && paddr >= mem->start && paddr < mem->end; } void __init swiotlb_exit(void); unsigned int swiotlb_max_segment(void); size_t swiotlb_max_mapping_size(struct device *dev); bool is_swiotlb_active(void); -void __init swiotlb_adjust_size(unsigned long new_size); +void __init swiotlb_adjust_size(unsigned long size); #else #define swiotlb_force SWIOTLB_NO_FORCE static inline bool is_swiotlb_buffer(phys_addr_t paddr) @@ -106,7 +136,7 @@ static inline bool is_swiotlb_active(void) return false; } -static inline void swiotlb_adjust_size(unsigned long new_size) +static inline void swiotlb_adjust_size(unsigned long size) { } #endif /* CONFIG_SWIOTLB */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a672bbe28577..050511e8f1f8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -69,6 +69,8 @@ struct io_uring_params; struct clone_args; struct open_how; struct mount_attr; +struct landlock_ruleset_attr; +enum landlock_rule_type; #include <linux/types.h> #include <linux/aio_abi.h> @@ -1043,6 +1045,11 @@ asmlinkage long sys_pidfd_send_signal(int pidfd, int sig, siginfo_t __user *info, unsigned int flags); asmlinkage long sys_pidfd_getfd(int pidfd, int fd, unsigned int flags); +asmlinkage long sys_landlock_create_ruleset(const struct landlock_ruleset_attr __user *attr, + size_t size, __u32 flags); +asmlinkage long sys_landlock_add_rule(int ruleset_fd, enum landlock_rule_type rule_type, + const void __user *rule_attr, __u32 flags); +asmlinkage long sys_landlock_restrict_self(int ruleset_fd, __u32 flags); /* * Architecture-specific system calls diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 6ac7bb1d2b1f..d296f3b88fb9 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -91,7 +91,7 @@ struct thermal_cooling_device_ops { struct thermal_cooling_device { int id; - char type[THERMAL_NAME_LENGTH]; + char *type; struct device device; struct device_node *np; void *devdata; @@ -390,7 +390,6 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); int thermal_zone_get_slope(struct thermal_zone_device *tz); int thermal_zone_get_offset(struct thermal_zone_device *tz); -void thermal_notify_framework(struct thermal_zone_device *, int); int thermal_zone_device_enable(struct thermal_zone_device *tz); int thermal_zone_device_disable(struct thermal_zone_device *tz); void thermal_zone_device_critical(struct thermal_zone_device *tz); @@ -436,10 +435,6 @@ static inline int thermal_zone_get_offset( struct thermal_zone_device *tz) { return -ENODEV; } -static inline void thermal_notify_framework(struct thermal_zone_device *tz, - int trip) -{ } - static inline int thermal_zone_device_enable(struct thermal_zone_device *tz) { return -ENODEV; } diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index c6792cf01bc7..78a98bdff76d 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -3,6 +3,7 @@ #define _LINUX_TIMEKEEPING_H #include <linux/errno.h> +#include <linux/clocksource_ids.h> /* Included from linux/ktime.h */ @@ -243,11 +244,12 @@ struct ktime_timestamps { * @cs_was_changed_seq: The sequence number of clocksource change events */ struct system_time_snapshot { - u64 cycles; - ktime_t real; - ktime_t raw; - unsigned int clock_was_set_seq; - u8 cs_was_changed_seq; + u64 cycles; + ktime_t real; + ktime_t raw; + enum clocksource_ids cs_id; + unsigned int clock_was_set_seq; + u8 cs_was_changed_seq; }; /** diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 28e7af1406f2..ad413b382a3c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -206,7 +206,7 @@ static inline unsigned int tracing_gen_ctx_dec(void) trace_ctx = tracing_gen_ctx(); /* - * Subtract one from the preeption counter if preemption is enabled, + * Subtract one from the preemption counter if preemption is enabled, * see trace_event_buffer_reserve()for details. */ if (IS_ENABLED(CONFIG_PREEMPTION)) @@ -404,7 +404,6 @@ trace_get_fields(struct trace_event_call *event_call) return event_call->class->get_fields(event_call); } -struct trace_array; struct trace_subsystem_dir; enum { @@ -640,7 +639,8 @@ enum event_trigger_type { extern int filter_match_preds(struct event_filter *filter, void *rec); extern enum event_trigger_type -event_triggers_call(struct trace_event_file *file, void *rec, +event_triggers_call(struct trace_event_file *file, + struct trace_buffer *buffer, void *rec, struct ring_buffer_event *event); extern void event_triggers_post_call(struct trace_event_file *file, @@ -664,7 +664,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file) if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { if (eflags & EVENT_FILE_FL_TRIGGER_MODE) - event_triggers_call(file, NULL, NULL); + event_triggers_call(file, NULL, NULL, NULL); if (eflags & EVENT_FILE_FL_SOFT_DISABLED) return true; if (eflags & EVENT_FILE_FL_PID_FILTER) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 9cfb099da58f..13f65420f188 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -465,7 +465,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * * * * The declared 'local variable' is called '__entry' * * - * * __field(pid_t, prev_prid) is equivalent to a standard declariton: + * * __field(pid_t, prev_prid) is equivalent to a standard declaration: * * * * pid_t prev_pid; * * diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index a8e5f3ea9bb2..794d1538b8ba 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -17,6 +17,9 @@ #include <linux/mm.h> #include <asm-generic/pgtable_uffd.h> +/* The set of all possible UFFD-related VM flags. */ +#define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR) + /* * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining * new flags, since they might collide with O_* ones. We want @@ -34,6 +37,22 @@ extern int sysctl_unprivileged_userfaultfd; extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); +/* + * The mode of operation for __mcopy_atomic and its helpers. + * + * This is almost an implementation detail (mcopy_atomic below doesn't take this + * as a parameter), but it's exposed here because memory-kind-specific + * implementations (e.g. hugetlbfs) need to know the mode of operation. + */ +enum mcopy_atomic_mode { + /* A normal copy_from_user into the destination range. */ + MCOPY_ATOMIC_NORMAL, + /* Don't copy; map the destination range to the zero page. */ + MCOPY_ATOMIC_ZEROPAGE, + /* Just install pte(s) with the existing page(s) in the page cache. */ + MCOPY_ATOMIC_CONTINUE, +}; + extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, bool *mmap_changing, __u64 mode); @@ -41,6 +60,8 @@ extern ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long len, bool *mmap_changing); +extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start, + unsigned long len, bool *mmap_changing); extern int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, unsigned long len, bool enable_wp, bool *mmap_changing); @@ -52,6 +73,22 @@ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx; } +/* + * Never enable huge pmd sharing on some uffd registered vmas: + * + * - VM_UFFD_WP VMAs, because write protect information is per pgtable entry. + * + * - VM_UFFD_MINOR VMAs, because otherwise we would never get minor faults for + * VMAs which share huge pmds. (If you have two mappings to the same + * underlying pages, and fault in the non-UFFD-registered one with a write, + * with huge pmd sharing this would *also* setup the second UFFD-registered + * mapping, and we'd not get minor faults.) + */ +static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma) +{ + return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); +} + static inline bool userfaultfd_missing(struct vm_area_struct *vma) { return vma->vm_flags & VM_UFFD_MISSING; @@ -62,6 +99,11 @@ static inline bool userfaultfd_wp(struct vm_area_struct *vma) return vma->vm_flags & VM_UFFD_WP; } +static inline bool userfaultfd_minor(struct vm_area_struct *vma) +{ + return vma->vm_flags & VM_UFFD_MINOR; +} + static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, pte_t pte) { @@ -76,7 +118,7 @@ static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, static inline bool userfaultfd_armed(struct vm_area_struct *vma) { - return vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP); + return vma->vm_flags & __VM_UFFD_FLAGS; } extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); @@ -123,6 +165,11 @@ static inline bool userfaultfd_wp(struct vm_area_struct *vma) return false; } +static inline bool userfaultfd_minor(struct vm_area_struct *vma) +{ + return false; +} + static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, pte_t pte) { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 15fa085fab05..f311d227aa1b 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -8,7 +8,7 @@ #include <linux/vhost_iotlb.h> /** - * vDPA callback definition. + * struct vdpa_calllback - vDPA callback definition. * @callback: interrupt callback function * @private: the data passed to the callback function */ @@ -18,7 +18,7 @@ struct vdpa_callback { }; /** - * vDPA notification area + * struct vdpa_notification_area - vDPA notification area * @addr: base address of the notification area * @size: size of the notification area */ @@ -28,7 +28,7 @@ struct vdpa_notification_area { }; /** - * vDPA vq_state definition + * struct vdpa_vq_state - vDPA vq_state definition * @avail_index: available index */ struct vdpa_vq_state { @@ -38,7 +38,7 @@ struct vdpa_vq_state { struct vdpa_mgmt_dev; /** - * vDPA device - representation of a vDPA device + * struct vdpa_device - representation of a vDPA device * @dev: underlying device * @dma_dev: the actual device that is performing DMA * @config: the configuration ops for this device. @@ -59,7 +59,7 @@ struct vdpa_device { }; /** - * vDPA IOVA range - the IOVA range support by the device + * struct vdpa_iova_range - the IOVA range support by the device * @first: start of the IOVA range * @last: end of the IOVA range */ @@ -69,7 +69,7 @@ struct vdpa_iova_range { }; /** - * vDPA_config_ops - operations for configuring a vDPA device. + * struct vdpa_config_ops - operations for configuring a vDPA device. * Note: vDPA device drivers are required to implement all of the * operations unless it is mentioned to be optional in the following * list. @@ -150,6 +150,9 @@ struct vdpa_iova_range { * @set_status: Set the device status * @vdev: vdpa device * @status: virtio device status + * @get_config_size: Get the size of the configuration space + * @vdev: vdpa device + * Returns size_t: configuration size * @get_config: Read from device specific configuration space * @vdev: vdpa device * @offset: offset from the beginning of @@ -231,6 +234,7 @@ struct vdpa_config_ops { u32 (*get_vendor_id)(struct vdpa_device *vdev); u8 (*get_status)(struct vdpa_device *vdev); void (*set_status)(struct vdpa_device *vdev, u8 status); + size_t (*get_config_size)(struct vdpa_device *vdev); void (*get_config)(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len); void (*set_config)(struct vdpa_device *vdev, unsigned int offset, @@ -267,7 +271,7 @@ int _vdpa_register_device(struct vdpa_device *vdev, int nvqs); void _vdpa_unregister_device(struct vdpa_device *vdev); /** - * vdpa_driver - operations for a vDPA driver + * struct vdpa_driver - operations for a vDPA driver * @driver: underlying device driver * @probe: the function to call when a device is found. Returns 0 or -errno. * @remove: the function to call when a device is removed. @@ -344,18 +348,18 @@ static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset, } /** - * vdpa_mgmtdev_ops - vdpa device ops - * @dev_add: Add a vdpa device using alloc and register - * @mdev: parent device to use for device addition - * @name: name of the new vdpa device - * Driver need to add a new device using _vdpa_register_device() - * after fully initializing the vdpa device. Driver must return 0 - * on success or appropriate error code. - * @dev_del: Remove a vdpa device using unregister - * @mdev: parent device to use for device removal - * @dev: vdpa device to remove - * Driver need to remove the specified device by calling - * _vdpa_unregister_device(). + * struct vdpa_mgmtdev_ops - vdpa device ops + * @dev_add: Add a vdpa device using alloc and register + * @mdev: parent device to use for device addition + * @name: name of the new vdpa device + * Driver need to add a new device using _vdpa_register_device() + * after fully initializing the vdpa device. Driver must return 0 + * on success or appropriate error code. + * @dev_del: Remove a vdpa device using unregister + * @mdev: parent device to use for device removal + * @dev: vdpa device to remove + * Driver need to remove the specified device by calling + * _vdpa_unregister_device(). */ struct vdpa_mgmtdev_ops { int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name); diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index f26acbeec965..6a95b58fd0f4 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -13,6 +13,8 @@ struct virtio_pci_modern_device { void __iomem *device; /* Base of vq notifications (non-legacy mode). */ void __iomem *notify_base; + /* Physical base of vq notifications */ + resource_size_t notify_pa; /* Where to read and clear interrupt */ u8 __iomem *isr; @@ -99,13 +101,8 @@ void vp_modern_set_queue_size(struct virtio_pci_modern_device *mdev, u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev, u16 idx); u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev); -u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev, - u16 idx); -void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off, - size_t minlen, - u32 align, - u32 start, u32 size, - size_t *len); +void __iomem * vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev, + u16 index, resource_size_t *pa); int vp_modern_probe(struct virtio_pci_modern_device *mdev); void vp_modern_remove(struct virtio_pci_modern_device *mdev); #endif diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 18e75974d4e3..ae0dd1948c2b 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -71,6 +71,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_HUGETLB_PAGE HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, #endif +#ifdef CONFIG_CMA + CMA_ALLOC_SUCCESS, + CMA_ALLOC_FAIL, +#endif UNEVICTABLE_PGCULLED, /* culled to noreclaim list */ UNEVICTABLE_PGSCANNED, /* scanned for reclaimability */ UNEVICTABLE_PGRESCUED, /* rescued from noreclaim list */ @@ -121,6 +125,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, SWAP_RA, SWAP_RA_HIT, #endif +#ifdef CONFIG_X86 + DIRECT_MAP_LEVEL2_SPLIT, + DIRECT_MAP_LEVEL3_SPLIT, +#endif NR_VM_EVENT_ITEMS }; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 394d03cc0e92..4d668abb6391 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -33,7 +33,7 @@ struct notifier_block; /* in notifier.h */ * * If IS_ENABLED(CONFIG_KASAN_VMALLOC), VM_KASAN is set on a vm_struct after * shadow memory has been mapped. It's used to handle allocation errors so that - * we don't try to poision shadow on free if it was never allocated. + * we don't try to poison shadow on free if it was never allocated. * * Otherwise, VM_KASAN is set for kasan_module_alloc() allocations and used to * determine which allocations need the module shadow freed. @@ -43,7 +43,7 @@ struct notifier_block; /* in notifier.h */ /* * Maximum alignment for ioremap() regions. - * Can be overriden by arch-specific value. + * Can be overridden by arch-specific value. */ #ifndef IOREMAP_MAX_ORDER #define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */ @@ -227,9 +227,8 @@ static inline void set_vm_flush_reset_perms(void *addr) } #endif -/* for /dev/kmem */ +/* for /proc/kcore */ extern long vread(char *buf, char *addr, unsigned long count); -extern long vwrite(char *buf, char *addr, unsigned long count); /* * Internals. Dont't use.. diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 59bd50f99291..84db7b8f912f 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -46,6 +46,9 @@ struct vringh { /* IOTLB for this vring */ struct vhost_iotlb *iotlb; + /* spinlock to synchronize IOTLB accesses */ + spinlock_t *iotlb_lock; + /* The function to call to notify the guest about added buffers */ void (*notify)(struct vringh *); }; @@ -196,6 +199,19 @@ static inline void vringh_kiov_cleanup(struct vringh_kiov *kiov) kiov->iov = NULL; } +static inline size_t vringh_kiov_length(struct vringh_kiov *kiov) +{ + size_t len = 0; + int i; + + for (i = kiov->i; i < kiov->used; i++) + len += kiov->iov[i].iov_len; + + return len; +} + +void vringh_kiov_advance(struct vringh_kiov *kiov, size_t len); + int vringh_getdesc_kern(struct vringh *vrh, struct vringh_kiov *riov, struct vringh_kiov *wiov, @@ -258,7 +274,8 @@ static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val) #if IS_REACHABLE(CONFIG_VHOST_IOTLB) -void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb); +void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb, + spinlock_t *iotlb_lock); int vringh_init_iotlb(struct vringh *vrh, u64 features, unsigned int num, bool weak_barriers, diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index e8df72e1627a..5e848884ff61 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -68,7 +68,6 @@ enum sctp_verb { SCTP_CMD_ASSOC_FAILED, /* Handle association failure. */ SCTP_CMD_DISCARD_PACKET, /* Discard the whole packet. */ SCTP_CMD_GEN_SHUTDOWN, /* Generate a SHUTDOWN chunk. */ - SCTP_CMD_UPDATE_ASSOC, /* Update association information. */ SCTP_CMD_PURGE_OUTQUEUE, /* Purge all data waiting to be sent. */ SCTP_CMD_SETUP_T2, /* Hi-level, setup T2-shutdown parms. */ SCTP_CMD_RTO_PENDING, /* Set transport's rto_pending. */ diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index bae29f50adff..226ae3702d8a 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -10,7 +10,7 @@ #include <rdma/ib_verbs.h> -int rdma_query_gid(struct ib_device *device, u8 port_num, int index, +int rdma_query_gid(struct ib_device *device, u32 port_num, int index, union ib_gid *gid); void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr); const struct ib_gid_attr *rdma_find_gid(struct ib_device *device, @@ -20,10 +20,10 @@ const struct ib_gid_attr *rdma_find_gid(struct ib_device *device, const struct ib_gid_attr *rdma_find_gid_by_port(struct ib_device *ib_dev, const union ib_gid *gid, enum ib_gid_type gid_type, - u8 port, + u32 port, struct net_device *ndev); const struct ib_gid_attr *rdma_find_gid_by_filter( - struct ib_device *device, const union ib_gid *gid, u8 port_num, + struct ib_device *device, const union ib_gid *gid, u32 port_num, bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *, void *), void *context); @@ -43,7 +43,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr); * the local software cache. */ int ib_get_cached_pkey(struct ib_device *device_handle, - u8 port_num, + u32 port_num, int index, u16 *pkey); @@ -59,7 +59,7 @@ int ib_get_cached_pkey(struct ib_device *device_handle, * the local software cache. */ int ib_find_cached_pkey(struct ib_device *device, - u8 port_num, + u32 port_num, u16 pkey, u16 *index); @@ -75,7 +75,7 @@ int ib_find_cached_pkey(struct ib_device *device, * the local software cache. */ int ib_find_exact_cached_pkey(struct ib_device *device, - u8 port_num, + u32 port_num, u16 pkey, u16 *index); @@ -89,7 +89,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device, * the local software cache. */ int ib_get_cached_lmc(struct ib_device *device, - u8 port_num, + u32 port_num, u8 *lmc); /** @@ -102,12 +102,12 @@ int ib_get_cached_lmc(struct ib_device *device, * the local software cache. */ int ib_get_cached_port_state(struct ib_device *device, - u8 port_num, + u32 port_num, enum ib_port_state *port_active); bool rdma_is_zero_gid(const union ib_gid *gid); const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device, - u8 port_num, int index); + u32 port_num, int index); void rdma_put_gid_attr(const struct ib_gid_attr *attr); void rdma_hold_gid_attr(const struct ib_gid_attr *attr); ssize_t rdma_query_gid_table(struct ib_device *device, diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 8dfb1ddf345a..f1d34f06a68b 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -668,7 +668,7 @@ struct ib_mad_reg_req { * @registration_flags: Registration flags to set for this agent */ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, - u8 port_num, + u32 port_num, enum ib_qp_type qp_type, struct ib_mad_reg_req *mad_reg_req, u8 rmpp_version, diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 4c52c2fd22a1..ba3c808a3789 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -423,7 +423,7 @@ struct ib_sa_query; void ib_sa_cancel_query(int id, struct ib_sa_query *query); int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, - u8 port_num, struct sa_path_rec *rec, + u32 port_num, struct sa_path_rec *rec, ib_sa_comp_mask comp_mask, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct sa_path_rec *resp, @@ -431,7 +431,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, void *context, struct ib_sa_query **query); int ib_sa_service_rec_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, u8 method, + struct ib_device *device, u32 port_num, u8 method, struct ib_sa_service_rec *rec, ib_sa_comp_mask comp_mask, unsigned long timeout_ms, gfp_t gfp_mask, @@ -477,7 +477,8 @@ struct ib_sa_multicast { * group, and the user must rejoin the group to continue using it. */ struct ib_sa_multicast *ib_sa_join_multicast(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, + struct ib_device *device, + u32 port_num, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, gfp_t gfp_mask, int (*callback)(int status, @@ -506,20 +507,20 @@ void ib_sa_free_multicast(struct ib_sa_multicast *multicast); * @mgid: MGID of multicast group. * @rec: Location to copy SA multicast member record. */ -int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, +int ib_sa_get_mcmember_rec(struct ib_device *device, u32 port_num, union ib_gid *mgid, struct ib_sa_mcmember_rec *rec); /** * ib_init_ah_from_mcmember - Initialize address handle attributes based on * an SA multicast member record. */ -int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, +int ib_init_ah_from_mcmember(struct ib_device *device, u32 port_num, struct ib_sa_mcmember_rec *rec, struct net_device *ndev, enum ib_gid_type gid_type, struct rdma_ah_attr *ah_attr); -int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, +int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num, struct sa_path_rec *rec, struct rdma_ah_attr *ah_attr, const struct ib_gid_attr *sgid_attr); @@ -538,7 +539,7 @@ void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec); /* Support GuidInfoRecord */ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, + struct ib_device *device, u32 port_num, struct ib_sa_guidinfo_rec *rec, ib_sa_comp_mask comp_mask, u8 method, unsigned long timeout_ms, gfp_t gfp_mask, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index ca28fca5736b..7e2f3699b898 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -152,7 +152,7 @@ struct ib_gid_attr { union ib_gid gid; enum ib_gid_type gid_type; u16 index; - u8 port_num; + u32 port_num; }; enum { @@ -736,7 +736,7 @@ struct ib_event { struct ib_qp *qp; struct ib_srq *srq; struct ib_wq *wq; - u8 port_num; + u32 port_num; } element; enum ib_event_type event; }; @@ -919,7 +919,7 @@ struct rdma_ah_attr { struct ib_global_route grh; u8 sl; u8 static_rate; - u8 port_num; + u32 port_num; u8 ah_flags; enum rdma_ah_attr_type type; union { @@ -1006,7 +1006,7 @@ struct ib_wc { u16 pkey_index; u8 sl; u8 dlid_path_bits; - u8 port_num; /* valid only for DR SMPs on switches */ + u32 port_num; /* valid only for DR SMPs on switches */ u8 smac[ETH_ALEN]; u16 vlan_id; u8 network_hdr_type; @@ -1161,7 +1161,7 @@ struct ib_qp_init_attr { /* * Only needed for special QP types, or when using the RW API. */ - u8 port_num; + u32 port_num; struct ib_rwq_ind_table *rwq_ind_tbl; u32 source_qpn; }; @@ -1280,11 +1280,11 @@ struct ib_qp_attr { u8 max_rd_atomic; u8 max_dest_rd_atomic; u8 min_rnr_timer; - u8 port_num; + u32 port_num; u8 timeout; u8 retry_cnt; u8 rnr_retry; - u8 alt_port_num; + u32 alt_port_num; u8 alt_timeout; u32 rate_limit; struct net_device *xmit_slave; @@ -1401,7 +1401,7 @@ struct ib_ud_wr { u32 remote_qpn; u32 remote_qkey; u16 pkey_index; /* valid for GSI only */ - u8 port_num; /* valid for DR SMPs on switch only */ + u32 port_num; /* valid for DR SMPs on switch only */ }; static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr) @@ -1610,6 +1610,11 @@ struct ib_srq { } xrc; }; } ext; + + /* + * Implementation details of the RDMA core, don't use in drivers: + */ + struct rdma_restrack_entry res; }; enum ib_raw_packet_caps { @@ -1708,7 +1713,7 @@ struct ib_qp_security; struct ib_port_pkey { enum port_pkey_state state; u16 pkey_index; - u8 port_num; + u32 port_num; struct list_head qp_list; struct list_head to_error_list; struct ib_qp_security *sec; @@ -1769,7 +1774,7 @@ struct ib_qp { enum ib_qp_type qp_type; struct ib_rwq_ind_table *rwq_ind_tbl; struct ib_qp_security *qp_sec; - u8 port; + u32 port; bool integrity_en; /* @@ -2065,7 +2070,7 @@ struct ib_flow_attr { u16 priority; u32 flags; u8 num_of_specs; - u8 port; + u32 port; union ib_flow_spec flows[]; }; @@ -2194,7 +2199,7 @@ enum rdma_netdev_t { struct rdma_netdev { void *clnt_priv; struct ib_device *hca; - u8 port_num; + u32 port_num; int mtu; /* @@ -2215,6 +2220,8 @@ struct rdma_netdev { int set_qkey, u32 qkey); int (*detach_mcast)(struct net_device *dev, struct ib_device *hca, union ib_gid *gid, u16 mlid); + /* timeout */ + void (*tx_timeout)(struct net_device *dev, unsigned int txqueue); }; struct rdma_netdev_alloc_params { @@ -2223,7 +2230,7 @@ struct rdma_netdev_alloc_params { unsigned int rxqs; void *param; - int (*initialize_rdma_netdev)(struct ib_device *device, u8 port_num, + int (*initialize_rdma_netdev)(struct ib_device *device, u32 port_num, struct net_device *netdev, void *param); }; @@ -2301,12 +2308,11 @@ struct ib_device_ops { int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int (*peek_cq)(struct ib_cq *cq, int wc_cnt); int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags); - int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt); int (*post_srq_recv)(struct ib_srq *srq, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); int (*process_mad)(struct ib_device *device, int process_mad_flags, - u8 port_num, const struct ib_wc *in_wc, + u32 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad *in_mad, struct ib_mad *out_mad, size_t *out_mad_size, u16 *out_mad_pkey_index); @@ -2318,9 +2324,9 @@ struct ib_device_ops { void (*get_dev_fw_str)(struct ib_device *device, char *str); const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, int comp_vector); - int (*query_port)(struct ib_device *device, u8 port_num, + int (*query_port)(struct ib_device *device, u32 port_num, struct ib_port_attr *port_attr); - int (*modify_port)(struct ib_device *device, u8 port_num, + int (*modify_port)(struct ib_device *device, u32 port_num, int port_modify_mask, struct ib_port_modify *port_modify); /** @@ -2329,10 +2335,10 @@ struct ib_device_ops { * structure to avoid cache line misses when accessing struct ib_device * in fast paths. */ - int (*get_port_immutable)(struct ib_device *device, u8 port_num, + int (*get_port_immutable)(struct ib_device *device, u32 port_num, struct ib_port_immutable *immutable); enum rdma_link_layer (*get_link_layer)(struct ib_device *device, - u8 port_num); + u32 port_num); /** * When calling get_netdev, the HW vendor's driver should return the * net device of device @device at port @port_num or NULL if such @@ -2341,7 +2347,8 @@ struct ib_device_ops { * that this function returns NULL before the net device has finished * NETDEV_UNREGISTER state. */ - struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num); + struct net_device *(*get_netdev)(struct ib_device *device, + u32 port_num); /** * rdma netdev operation * @@ -2349,11 +2356,11 @@ struct ib_device_ops { * must return -EOPNOTSUPP if it doesn't support the specified type. */ struct net_device *(*alloc_rdma_netdev)( - struct ib_device *device, u8 port_num, enum rdma_netdev_t type, + struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *)); - int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num, + int (*rdma_netdev_get_params)(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, struct rdma_netdev_alloc_params *params); /** @@ -2361,7 +2368,7 @@ struct ib_device_ops { * link layer is either IB or iWarp. It is no-op if @port_num port * is RoCE link layer. */ - int (*query_gid)(struct ib_device *device, u8 port_num, int index, + int (*query_gid)(struct ib_device *device, u32 port_num, int index, union ib_gid *gid); /** * When calling add_gid, the HW vendor's driver should add the gid @@ -2386,7 +2393,7 @@ struct ib_device_ops { * This function is only called when roce_gid_table is used. */ int (*del_gid)(const struct ib_gid_attr *attr, void **context); - int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index, + int (*query_pkey)(struct ib_device *device, u32 port_num, u16 index, u16 *pkey); int (*alloc_ucontext)(struct ib_ucontext *context, struct ib_udata *udata); @@ -2475,16 +2482,16 @@ struct ib_device_ops { struct ib_flow_action *action, const struct ib_flow_action_attrs_esp *attr, struct uverbs_attr_bundle *attrs); - int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port, + int (*set_vf_link_state)(struct ib_device *device, int vf, u32 port, int state); - int (*get_vf_config)(struct ib_device *device, int vf, u8 port, + int (*get_vf_config)(struct ib_device *device, int vf, u32 port, struct ifla_vf_info *ivf); - int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, + int (*get_vf_stats)(struct ib_device *device, int vf, u32 port, struct ifla_vf_stats *stats); - int (*get_vf_guid)(struct ib_device *device, int vf, u8 port, + int (*get_vf_guid)(struct ib_device *device, int vf, u32 port, struct ifla_vf_guid *node_guid, struct ifla_vf_guid *port_guid); - int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, + int (*set_vf_guid)(struct ib_device *device, int vf, u32 port, u64 guid, int type); struct ib_wq *(*create_wq)(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, @@ -2522,7 +2529,7 @@ struct ib_device_ops { * struct tells the core to set a default lifespan. */ struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device, - u8 port_num); + u32 port_num); /** * get_hw_stats - Fill in the counter value(s) in the stats struct. * @index - The index in the value array we wish to have updated, or @@ -2536,12 +2543,12 @@ struct ib_device_ops { * one given in index at their option */ int (*get_hw_stats)(struct ib_device *device, - struct rdma_hw_stats *stats, u8 port, int index); + struct rdma_hw_stats *stats, u32 port, int index); /* * This function is called once for each port when a ib device is * registered. */ - int (*init_port)(struct ib_device *device, u8 port_num, + int (*init_port)(struct ib_device *device, u32 port_num, struct kobject *port_sysfs); /** * Allows rdma drivers to add their own restrack attributes. @@ -2685,7 +2692,7 @@ struct ib_device { /* CQ adaptive moderation (RDMA DIM) */ u16 use_cq_dim:1; u8 node_type; - u8 phys_port_cnt; + u32 phys_port_cnt; struct ib_device_attr attrs; struct attribute_group *hw_stats_ag; struct rdma_hw_stats *hw_stats; @@ -2751,7 +2758,7 @@ struct ib_client { * netdev. */ struct net_device *(*get_net_dev_by_params)( struct ib_device *dev, - u8 port, + u32 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr, @@ -2932,10 +2939,10 @@ void ib_unregister_event_handler(struct ib_event_handler *event_handler); void ib_dispatch_event(const struct ib_event *event); int ib_query_port(struct ib_device *device, - u8 port_num, struct ib_port_attr *port_attr); + u32 port_num, struct ib_port_attr *port_attr); enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, - u8 port_num); + u32 port_num); /** * rdma_cap_ib_switch - Check if the device is IB switch @@ -2959,7 +2966,7 @@ static inline bool rdma_cap_ib_switch(const struct ib_device *device) * * Return start port number */ -static inline u8 rdma_start_port(const struct ib_device *device) +static inline u32 rdma_start_port(const struct ib_device *device) { return rdma_cap_ib_switch(device) ? 0 : 1; } @@ -2970,9 +2977,10 @@ static inline u8 rdma_start_port(const struct ib_device *device) * @iter - The unsigned int to store the port number */ #define rdma_for_each_port(device, iter) \ - for (iter = rdma_start_port(device + BUILD_BUG_ON_ZERO(!__same_type( \ - unsigned int, iter))); \ - iter <= rdma_end_port(device); (iter)++) + for (iter = rdma_start_port(device + \ + BUILD_BUG_ON_ZERO(!__same_type(u32, \ + iter))); \ + iter <= rdma_end_port(device); iter++) /** * rdma_end_port - Return the last valid port number for the device @@ -2982,7 +2990,7 @@ static inline u8 rdma_start_port(const struct ib_device *device) * * Return last port number */ -static inline u8 rdma_end_port(const struct ib_device *device) +static inline u32 rdma_end_port(const struct ib_device *device) { return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt; } @@ -2995,55 +3003,63 @@ static inline int rdma_is_port_valid(const struct ib_device *device, } static inline bool rdma_is_grh_required(const struct ib_device *device, - u8 port_num) + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_PORT_IB_GRH_REQUIRED; } -static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_ib(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_IB; } -static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_roce(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP); } -static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP; } -static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_ROCE; } -static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_iwarp(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_IWARP; } -static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num) +static inline bool rdma_ib_or_roce(const struct ib_device *device, + u32 port_num) { return rdma_protocol_ib(device, port_num) || rdma_protocol_roce(device, port_num); } -static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_raw_packet(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET; } -static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_usnic(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_USNIC; @@ -3061,7 +3077,7 @@ static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_n * * Return: true if the port supports sending/receiving of MAD packets. */ -static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_ib_mad(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_MAD; @@ -3086,7 +3102,7 @@ static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num) * * Return: true if the port supports OPA MAD packet formats. */ -static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num) +static inline bool rdma_cap_opa_mad(struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_OPA_MAD; @@ -3112,7 +3128,7 @@ static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num) * * Return: true if the port provides an SMI. */ -static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_ib_smi(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_SMI; @@ -3133,7 +3149,7 @@ static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num) * Return: true if the port supports an IB CM (this does not guarantee that * a CM is actually running however). */ -static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_ib_cm(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_CM; @@ -3151,7 +3167,7 @@ static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num) * Return: true if the port supports an iWARP CM (this does not guarantee that * a CM is actually running however). */ -static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_iw_cm(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IW_CM; @@ -3172,7 +3188,7 @@ static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num) * Administration interface. This does not imply that the SA service is * running locally. */ -static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_ib_sa(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_SA; @@ -3195,7 +3211,8 @@ static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num) * overhead of registering/unregistering with the SM and tracking of the * total number of queue pairs attached to the multicast group. */ -static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_ib_mcast(const struct ib_device *device, + u32 port_num) { return rdma_cap_ib_sa(device, port_num); } @@ -3213,7 +3230,7 @@ static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num * Return: true if the port uses a GID address to identify devices on the * network. */ -static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_af_ib(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_AF_IB; @@ -3235,7 +3252,7 @@ static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num) * addition of a Global Route Header built from our Ethernet Address * Handle into our header list for connectionless packets. */ -static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_eth_ah(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_ETH_AH; @@ -3250,7 +3267,7 @@ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num) * Return: true if we are running on an OPA device which supports * the extended OPA addressing. */ -static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num) +static inline bool rdma_cap_opa_ah(struct ib_device *device, u32 port_num) { return (device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH; @@ -3268,7 +3285,8 @@ static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num) * Return the max MAD size required by the Port. Will return 0 if the port * does not support MADs */ -static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num) +static inline size_t rdma_max_mad_size(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.max_mad_size; } @@ -3287,7 +3305,7 @@ static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_n * its GIDs. */ static inline bool rdma_cap_roce_gid_table(const struct ib_device *device, - u8 port_num) + u32 port_num) { return rdma_protocol_roce(device, port_num) && device->ops.add_gid && device->ops.del_gid; @@ -3328,7 +3346,7 @@ static inline bool rdma_core_cap_opa_port(struct ib_device *device, * Return the MTU size supported by the port as an integer value. Will return * -1 if enum value of mtu is not supported. */ -static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port, +static inline int rdma_mtu_enum_to_int(struct ib_device *device, u32 port, int mtu) { if (rdma_core_cap_opa_port(device, port)) @@ -3345,7 +3363,7 @@ static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port, * * Return the MTU size supported by the port as an integer value. */ -static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port, +static inline int rdma_mtu_from_attr(struct ib_device *device, u32 port, struct ib_port_attr *attr) { if (rdma_core_cap_opa_port(device, port)) @@ -3354,34 +3372,34 @@ static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port, return ib_mtu_enum_to_int(attr->max_mtu); } -int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, +int ib_set_vf_link_state(struct ib_device *device, int vf, u32 port, int state); -int ib_get_vf_config(struct ib_device *device, int vf, u8 port, +int ib_get_vf_config(struct ib_device *device, int vf, u32 port, struct ifla_vf_info *info); -int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, +int ib_get_vf_stats(struct ib_device *device, int vf, u32 port, struct ifla_vf_stats *stats); -int ib_get_vf_guid(struct ib_device *device, int vf, u8 port, +int ib_get_vf_guid(struct ib_device *device, int vf, u32 port, struct ifla_vf_guid *node_guid, struct ifla_vf_guid *port_guid); -int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, +int ib_set_vf_guid(struct ib_device *device, int vf, u32 port, u64 guid, int type); int ib_query_pkey(struct ib_device *device, - u8 port_num, u16 index, u16 *pkey); + u32 port_num, u16 index, u16 *pkey); int ib_modify_device(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify); int ib_modify_port(struct ib_device *device, - u8 port_num, int port_modify_mask, + u32 port_num, int port_modify_mask, struct ib_port_modify *port_modify); int ib_find_gid(struct ib_device *device, union ib_gid *gid, - u8 *port_num, u16 *index); + u32 *port_num, u16 *index); int ib_find_pkey(struct ib_device *device, - u8 port_num, u16 pkey, u16 *index); + u32 port_num, u16 pkey, u16 *index); enum ib_pd_flags { /* @@ -3496,7 +3514,7 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr); * attributes which are initialized using ib_init_ah_attr_from_wc(). * */ -int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, +int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct rdma_ah_attr *ah_attr); @@ -3513,7 +3531,7 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, * in all UD QP post sends. */ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, - const struct ib_grh *grh, u8 port_num); + const struct ib_grh *grh, u32 port_num); /** * rdma_modify_ah - Modifies the address vector associated with an address @@ -3915,20 +3933,6 @@ struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe, void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe); -/** - * ib_req_ncomp_notif - Request completion notification when there are - * at least the specified number of unreaped completions on the CQ. - * @cq: The CQ to generate an event for. - * @wc_cnt: The number of unreaped completions that should be on the - * CQ before an event is generated. - */ -static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) -{ - return cq->device->ops.req_ncomp_notif ? - cq->device->ops.req_ncomp_notif(cq, wc_cnt) : - -ENOSYS; -} - /* * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to * NULL. This causes the ib_dma* helpers to just stash the kernel virtual @@ -4272,12 +4276,12 @@ struct ib_device *ib_device_get_by_netdev(struct net_device *ndev, enum rdma_driver_id driver_id); struct ib_device *ib_device_get_by_name(const char *name, enum rdma_driver_id driver_id); -struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, +struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u32 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr); int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, unsigned int port); -struct net_device *ib_device_netdev(struct ib_device *dev, u8 port); +struct net_device *ib_device_netdev(struct ib_device *dev, u32 port); struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr); @@ -4311,7 +4315,8 @@ void ib_drain_rq(struct ib_qp *qp); void ib_drain_sq(struct ib_qp *qp); void ib_drain_qp(struct ib_qp *qp); -int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u16 *speed, u8 *width); +int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, + u8 *width); static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr) { @@ -4379,12 +4384,12 @@ static inline bool rdma_ah_get_make_grd(const struct rdma_ah_attr *attr) return false; } -static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u8 port_num) +static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u32 port_num) { attr->port_num = port_num; } -static inline u8 rdma_ah_get_port_num(const struct rdma_ah_attr *attr) +static inline u32 rdma_ah_get_port_num(const struct rdma_ah_attr *attr) { return attr->port_num; } @@ -4482,7 +4487,7 @@ void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src); * @port_num: Port number */ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, - u8 port_num) + u32 port_num) { if (rdma_protocol_roce(dev, port_num)) return RDMA_AH_ATTR_TYPE_ROCE; @@ -4554,12 +4559,12 @@ struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); -struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num, +struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *)); -int rdma_init_netdev(struct ib_device *device, u8 port_num, +int rdma_init_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *), diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index 91975400e1b3..03abd30e6c8c 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -70,6 +70,7 @@ struct iw_cm_id { u8 tos; bool tos_set:1; bool mapped:1; + bool afonly:1; }; struct iw_cm_conn_param { diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 32a67af18415..d989f030fae0 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -107,7 +107,7 @@ struct rdma_cm_id { struct rdma_route route; enum rdma_ucm_port_space ps; enum ib_qp_type qp_type; - u8 port_num; + u32 port_num; }; struct rdma_cm_id * @@ -331,6 +331,8 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse); int rdma_set_afonly(struct rdma_cm_id *id, int afonly); int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout); + +int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer); /** * rdma_get_service_id - Return the IB service ID for a specified address. * @id: Communication identifier associated with the address. diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h index e75cf9742e04..0295b22cd1cd 100644 --- a/include/rdma/rdma_counter.h +++ b/include/rdma/rdma_counter.h @@ -40,26 +40,26 @@ struct rdma_counter { struct rdma_counter_mode mode; struct mutex lock; struct rdma_hw_stats *stats; - u8 port; + u32 port; }; void rdma_counter_init(struct ib_device *dev); void rdma_counter_release(struct ib_device *dev); -int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, +int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port, enum rdma_nl_counter_mask mask, struct netlink_ext_ack *extack); -int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port); +int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port); int rdma_counter_unbind_qp(struct ib_qp *qp, bool force); int rdma_counter_query_stats(struct rdma_counter *counter); -u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index); -int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, +u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index); +int rdma_counter_bind_qpn(struct ib_device *dev, u32 port, u32 qp_num, u32 counter_id); -int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, +int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port, u32 qp_num, u32 *counter_id); -int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port, +int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port, u32 qp_num, u32 counter_id); -int rdma_counter_get_mode(struct ib_device *dev, u8 port, +int rdma_counter_get_mode(struct ib_device *dev, u32 port, enum rdma_nl_counter_mode *mode, enum rdma_nl_counter_mask *mask); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 9fd217b24916..2dafd7dbe893 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -92,7 +92,7 @@ struct rvt_ibport { /* * The pkey table is allocated and maintained by the driver. Drivers * need to have access to this before registering with rdmav. However - * rdmavt will need access to it so drivers need to proviee this during + * rdmavt will need access to it so drivers need to provide this during * the attach port API call. */ u16 *pkey_table; @@ -230,7 +230,7 @@ struct rvt_driver_provided { void (*do_send)(struct rvt_qp *qp); /* - * Returns a pointer to the undelying hardware's PCI device. This is + * Returns a pointer to the underlying hardware's PCI device. This is * used to display information as to what hardware is being referenced * in an output message */ @@ -245,7 +245,7 @@ struct rvt_driver_provided { void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp); /* - * Init a struture allocated with qp_priv_alloc(). This should be + * Init a structure allocated with qp_priv_alloc(). This should be * called after all qp fields have been initialized in rdmavt. */ int (*qp_priv_init)(struct rvt_dev_info *rdi, struct rvt_qp *qp, @@ -257,7 +257,7 @@ struct rvt_driver_provided { void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); /* - * Inform the driver the particular qp in quesiton has been reset so + * Inform the driver the particular qp in question has been reset so * that it can clean up anything it needs to. */ void (*notify_qp_reset)(struct rvt_qp *qp); @@ -281,7 +281,7 @@ struct rvt_driver_provided { void (*stop_send_queue)(struct rvt_qp *qp); /* - * Have the drivr drain any in progress operations + * Have the driver drain any in progress operations */ void (*quiesce_qp)(struct rvt_qp *qp); @@ -309,16 +309,16 @@ struct rvt_driver_provided { /* * Query driver for the state of the port. */ - int (*query_port_state)(struct rvt_dev_info *rdi, u8 port_num, + int (*query_port_state)(struct rvt_dev_info *rdi, u32 port_num, struct ib_port_attr *props); /* * Tell driver to shutdown a port */ - int (*shut_down_port)(struct rvt_dev_info *rdi, u8 port_num); + int (*shut_down_port)(struct rvt_dev_info *rdi, u32 port_num); /* Tell driver to send a trap for changed port capabilities */ - void (*cap_mask_chg)(struct rvt_dev_info *rdi, u8 port_num); + void (*cap_mask_chg)(struct rvt_dev_info *rdi, u32 port_num); /* * The following functions can be safely ignored completely. Any use of @@ -338,7 +338,7 @@ struct rvt_driver_provided { /* Let the driver pick the next queue pair number*/ int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port_num); + enum ib_qp_type type, u32 port_num); /* Determine if its safe or allowed to modify the qp */ int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 05e18839eaff..79d109c47242 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -50,6 +50,10 @@ enum rdma_restrack_type { */ RDMA_RESTRACK_COUNTER, /** + * @RDMA_RESTRACK_SRQ: Shared receive queue (SRQ) + */ + RDMA_RESTRACK_SRQ, + /** * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations */ RDMA_RESTRACK_MAX diff --git a/include/rdma/rw.h b/include/rdma/rw.h index 6ad9dc836c10..d606cac48233 100644 --- a/include/rdma/rw.h +++ b/include/rdma/rw.h @@ -42,29 +42,29 @@ struct rdma_rw_ctx { }; }; -int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, +int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, struct scatterlist *sg, u32 sg_cnt, u32 sg_offset, u64 remote_addr, u32 rkey, enum dma_data_direction dir); -void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, - struct scatterlist *sg, u32 sg_cnt, - enum dma_data_direction dir); +void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, + u32 port_num, struct scatterlist *sg, u32 sg_cnt, + enum dma_data_direction dir); int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, - u8 port_num, struct scatterlist *sg, u32 sg_cnt, + u32 port_num, struct scatterlist *sg, u32 sg_cnt, struct scatterlist *prot_sg, u32 prot_sg_cnt, struct ib_sig_attrs *sig_attrs, u64 remote_addr, u32 rkey, enum dma_data_direction dir); void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, - u8 port_num, struct scatterlist *sg, u32 sg_cnt, + u32 port_num, struct scatterlist *sg, u32 sg_cnt, struct scatterlist *prot_sg, u32 prot_sg_cnt, enum dma_data_direction dir); struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, - u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr); -int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, + u32 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr); +int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr); -unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num, +unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num, unsigned int maxpages); void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr); int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr); diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 39ef204753ec..23bb404aba12 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -875,9 +875,14 @@ static inline __malloc void *uverbs_kcalloc(struct uverbs_attr_bundle *bundle, return ERR_PTR(-EOVERFLOW); return uverbs_zalloc(bundle, bytes); } -int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, - size_t idx, s64 lower_bound, u64 upper_bound, - s64 *def_val); + +int _uverbs_get_const_signed(s64 *to, + const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, s64 lower_bound, u64 upper_bound, + s64 *def_val); +int _uverbs_get_const_unsigned(u64 *to, + const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 upper_bound, u64 *def_val); int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, size_t idx, const void *from, size_t size); #else @@ -921,27 +926,77 @@ uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, { return -EINVAL; } +static inline int +_uverbs_get_const_signed(s64 *to, + const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, s64 lower_bound, u64 upper_bound, + s64 *def_val) +{ + return -EINVAL; +} +static inline int +_uverbs_get_const_unsigned(u64 *to, + const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 upper_bound, u64 *def_val) +{ + return -EINVAL; +} #endif -#define uverbs_get_const(_to, _attrs_bundle, _idx) \ +#define uverbs_get_const_signed(_to, _attrs_bundle, _idx) \ ({ \ s64 _val; \ - int _ret = _uverbs_get_const(&_val, _attrs_bundle, _idx, \ - type_min(typeof(*_to)), \ - type_max(typeof(*_to)), NULL); \ - (*_to) = _val; \ + int _ret = \ + _uverbs_get_const_signed(&_val, _attrs_bundle, _idx, \ + type_min(typeof(*(_to))), \ + type_max(typeof(*(_to))), NULL); \ + (*(_to)) = _val; \ _ret; \ }) -#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \ +#define uverbs_get_const_unsigned(_to, _attrs_bundle, _idx) \ + ({ \ + u64 _val; \ + int _ret = \ + _uverbs_get_const_unsigned(&_val, _attrs_bundle, _idx, \ + type_max(typeof(*(_to))), NULL); \ + (*(_to)) = _val; \ + _ret; \ + }) + +#define uverbs_get_const_default_signed(_to, _attrs_bundle, _idx, _default) \ ({ \ s64 _val; \ s64 _def_val = _default; \ int _ret = \ - _uverbs_get_const(&_val, _attrs_bundle, _idx, \ - type_min(typeof(*_to)), \ - type_max(typeof(*_to)), &_def_val); \ - (*_to) = _val; \ + _uverbs_get_const_signed(&_val, _attrs_bundle, _idx, \ + type_min(typeof(*(_to))), \ + type_max(typeof(*(_to))), &_def_val); \ + (*(_to)) = _val; \ _ret; \ }) + +#define uverbs_get_const_default_unsigned(_to, _attrs_bundle, _idx, _default) \ + ({ \ + u64 _val; \ + u64 _def_val = _default; \ + int _ret = \ + _uverbs_get_const_unsigned(&_val, _attrs_bundle, _idx, \ + type_max(typeof(*(_to))), &_def_val); \ + (*(_to)) = _val; \ + _ret; \ + }) + +#define uverbs_get_const(_to, _attrs_bundle, _idx) \ + (is_signed_type(typeof(*(_to))) ? \ + uverbs_get_const_signed(_to, _attrs_bundle, _idx) : \ + uverbs_get_const_unsigned(_to, _attrs_bundle, _idx)) \ + +#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \ + (is_signed_type(typeof(*(_to))) ? \ + uverbs_get_const_default_signed(_to, _attrs_bundle, _idx, \ + _default) : \ + uverbs_get_const_default_unsigned(_to, _attrs_bundle, _idx, \ + _default)) + #endif diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h index f04f5126f61e..ee7873f872c3 100644 --- a/include/rdma/uverbs_named_ioctl.h +++ b/include/rdma/uverbs_named_ioctl.h @@ -20,7 +20,7 @@ /* These are static so they do not need to be qualified */ #define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id -#define UVERBS_OBJECT_METHODS(object_id) _object_methods_##object_id +#define UVERBS_OBJECT_METHODS(object_id) _UVERBS_NAME(_object_methods_##object_id, __LINE__) #define DECLARE_UVERBS_NAMED_METHOD(_method_id, ...) \ static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ diff --git a/include/trace/events/cma.h b/include/trace/events/cma.h index 5017a8829270..c3d354702cb0 100644 --- a/include/trace/events/cma.h +++ b/include/trace/events/cma.h @@ -8,28 +8,31 @@ #include <linux/types.h> #include <linux/tracepoint.h> -TRACE_EVENT(cma_alloc, +DECLARE_EVENT_CLASS(cma_alloc_class, - TP_PROTO(unsigned long pfn, const struct page *page, - unsigned int count, unsigned int align), + TP_PROTO(const char *name, unsigned long pfn, const struct page *page, + unsigned long count, unsigned int align), - TP_ARGS(pfn, page, count, align), + TP_ARGS(name, pfn, page, count, align), TP_STRUCT__entry( + __string(name, name) __field(unsigned long, pfn) __field(const struct page *, page) - __field(unsigned int, count) + __field(unsigned long, count) __field(unsigned int, align) ), TP_fast_assign( + __assign_str(name, name); __entry->pfn = pfn; __entry->page = page; __entry->count = count; __entry->align = align; ), - TP_printk("pfn=%lx page=%p count=%u align=%u", + TP_printk("name=%s pfn=%lx page=%p count=%lu align=%u", + __get_str(name), __entry->pfn, __entry->page, __entry->count, @@ -38,29 +41,72 @@ TRACE_EVENT(cma_alloc, TRACE_EVENT(cma_release, - TP_PROTO(unsigned long pfn, const struct page *page, - unsigned int count), + TP_PROTO(const char *name, unsigned long pfn, const struct page *page, + unsigned long count), - TP_ARGS(pfn, page, count), + TP_ARGS(name, pfn, page, count), TP_STRUCT__entry( + __string(name, name) __field(unsigned long, pfn) __field(const struct page *, page) - __field(unsigned int, count) + __field(unsigned long, count) ), TP_fast_assign( + __assign_str(name, name); __entry->pfn = pfn; __entry->page = page; __entry->count = count; ), - TP_printk("pfn=%lx page=%p count=%u", + TP_printk("name=%s pfn=%lx page=%p count=%lu", + __get_str(name), __entry->pfn, __entry->page, __entry->count) ); +TRACE_EVENT(cma_alloc_start, + + TP_PROTO(const char *name, unsigned long count, unsigned int align), + + TP_ARGS(name, count, align), + + TP_STRUCT__entry( + __string(name, name) + __field(unsigned long, count) + __field(unsigned int, align) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->count = count; + __entry->align = align; + ), + + TP_printk("name=%s count=%lu align=%u", + __get_str(name), + __entry->count, + __entry->align) +); + +DEFINE_EVENT(cma_alloc_class, cma_alloc_finish, + + TP_PROTO(const char *name, unsigned long pfn, const struct page *page, + unsigned long count, unsigned int align), + + TP_ARGS(name, pfn, page, count, align) +); + +DEFINE_EVENT(cma_alloc_class, cma_alloc_busy_retry, + + TP_PROTO(const char *name, unsigned long pfn, const struct page *page, + unsigned long count, unsigned int align), + + TP_ARGS(name, pfn, page, count, align) +); + #endif /* _TRACE_CMA_H */ /* This part must be outside protection */ diff --git a/include/trace/events/intel_iommu.h b/include/trace/events/intel_iommu.h index e801f4910522..d233f2916584 100644 --- a/include/trace/events/intel_iommu.h +++ b/include/trace/events/intel_iommu.h @@ -15,126 +15,6 @@ #include <linux/tracepoint.h> #include <linux/intel-iommu.h> -DECLARE_EVENT_CLASS(dma_map, - TP_PROTO(struct device *dev, dma_addr_t dev_addr, phys_addr_t phys_addr, - size_t size), - - TP_ARGS(dev, dev_addr, phys_addr, size), - - TP_STRUCT__entry( - __string(dev_name, dev_name(dev)) - __field(dma_addr_t, dev_addr) - __field(phys_addr_t, phys_addr) - __field(size_t, size) - ), - - TP_fast_assign( - __assign_str(dev_name, dev_name(dev)); - __entry->dev_addr = dev_addr; - __entry->phys_addr = phys_addr; - __entry->size = size; - ), - - TP_printk("dev=%s dev_addr=0x%llx phys_addr=0x%llx size=%zu", - __get_str(dev_name), - (unsigned long long)__entry->dev_addr, - (unsigned long long)__entry->phys_addr, - __entry->size) -); - -DEFINE_EVENT(dma_map, map_single, - TP_PROTO(struct device *dev, dma_addr_t dev_addr, phys_addr_t phys_addr, - size_t size), - TP_ARGS(dev, dev_addr, phys_addr, size) -); - -DEFINE_EVENT(dma_map, bounce_map_single, - TP_PROTO(struct device *dev, dma_addr_t dev_addr, phys_addr_t phys_addr, - size_t size), - TP_ARGS(dev, dev_addr, phys_addr, size) -); - -DECLARE_EVENT_CLASS(dma_unmap, - TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size), - - TP_ARGS(dev, dev_addr, size), - - TP_STRUCT__entry( - __string(dev_name, dev_name(dev)) - __field(dma_addr_t, dev_addr) - __field(size_t, size) - ), - - TP_fast_assign( - __assign_str(dev_name, dev_name(dev)); - __entry->dev_addr = dev_addr; - __entry->size = size; - ), - - TP_printk("dev=%s dev_addr=0x%llx size=%zu", - __get_str(dev_name), - (unsigned long long)__entry->dev_addr, - __entry->size) -); - -DEFINE_EVENT(dma_unmap, unmap_single, - TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size), - TP_ARGS(dev, dev_addr, size) -); - -DEFINE_EVENT(dma_unmap, unmap_sg, - TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size), - TP_ARGS(dev, dev_addr, size) -); - -DEFINE_EVENT(dma_unmap, bounce_unmap_single, - TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size), - TP_ARGS(dev, dev_addr, size) -); - -DECLARE_EVENT_CLASS(dma_map_sg, - TP_PROTO(struct device *dev, int index, int total, - struct scatterlist *sg), - - TP_ARGS(dev, index, total, sg), - - TP_STRUCT__entry( - __string(dev_name, dev_name(dev)) - __field(dma_addr_t, dev_addr) - __field(phys_addr_t, phys_addr) - __field(size_t, size) - __field(int, index) - __field(int, total) - ), - - TP_fast_assign( - __assign_str(dev_name, dev_name(dev)); - __entry->dev_addr = sg->dma_address; - __entry->phys_addr = sg_phys(sg); - __entry->size = sg->dma_length; - __entry->index = index; - __entry->total = total; - ), - - TP_printk("dev=%s [%d/%d] dev_addr=0x%llx phys_addr=0x%llx size=%zu", - __get_str(dev_name), __entry->index, __entry->total, - (unsigned long long)__entry->dev_addr, - (unsigned long long)__entry->phys_addr, - __entry->size) -); - -DEFINE_EVENT(dma_map_sg, map_sg, - TP_PROTO(struct device *dev, int index, int total, - struct scatterlist *sg), - TP_ARGS(dev, index, total, sg) -); - -DEFINE_EVENT(dma_map_sg, bounce_map_sg, - TP_PROTO(struct device *dev, int index, int total, - struct scatterlist *sg), - TP_ARGS(dev, index, total, sg) -); - TRACE_EVENT(qi_submit, TP_PROTO(struct intel_iommu *iommu, u64 qw0, u64 qw1, u64 qw2, u64 qw3), diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index bd528176a3d5..abb8b24744fd 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -49,7 +49,7 @@ TRACE_EVENT(io_uring_create, ); /** - * io_uring_register - called after a buffer/file/eventfd was succesfully + * io_uring_register - called after a buffer/file/eventfd was successfully * registered for a ring * * @ctx: pointer to a ring context structure diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 49d7d0fe29f6..37e1e1a2d67d 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -255,30 +255,6 @@ TRACE_EVENT(kvm_fpu, TP_printk("%s", __print_symbolic(__entry->load, kvm_fpu_load_symbol)) ); -TRACE_EVENT(kvm_age_page, - TP_PROTO(ulong gfn, int level, struct kvm_memory_slot *slot, int ref), - TP_ARGS(gfn, level, slot, ref), - - TP_STRUCT__entry( - __field( u64, hva ) - __field( u64, gfn ) - __field( u8, level ) - __field( u8, referenced ) - ), - - TP_fast_assign( - __entry->gfn = gfn; - __entry->level = level; - __entry->hva = ((gfn - slot->base_gfn) << - PAGE_SHIFT) + slot->userspace_addr; - __entry->referenced = ref; - ), - - TP_printk("hva %llx gfn %llx level %u %s", - __entry->hva, __entry->gfn, __entry->level, - __entry->referenced ? "YOUNG" : "OLD") -); - #ifdef CONFIG_KVM_ASYNC_PF DECLARE_EVENT_CLASS(kvm_async_get_page_class, @@ -462,6 +438,72 @@ TRACE_EVENT(kvm_dirty_ring_exit, TP_printk("vcpu %d", __entry->vcpu_id) ); +TRACE_EVENT(kvm_unmap_hva_range, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field( unsigned long, start ) + __field( unsigned long, end ) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("mmu notifier unmap range: %#016lx -- %#016lx", + __entry->start, __entry->end) +); + +TRACE_EVENT(kvm_set_spte_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("mmu notifier set pte hva: %#016lx", __entry->hva) +); + +TRACE_EVENT(kvm_age_hva, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field( unsigned long, start ) + __field( unsigned long, end ) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("mmu notifier age hva: %#016lx -- %#016lx", + __entry->start, __entry->end) +); + +TRACE_EVENT(kvm_test_age_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("mmu notifier test age hva: %#016lx", __entry->hva) +); + #endif /* _TRACE_KVM_MAIN_H */ /* This part must be outside protection */ diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h index 4d434398d64d..9fb2a3bbcdfb 100644 --- a/include/trace/events/migrate.h +++ b/include/trace/events/migrate.h @@ -20,7 +20,8 @@ EM( MR_SYSCALL, "syscall_or_cpuset") \ EM( MR_MEMPOLICY_MBIND, "mempolicy_mbind") \ EM( MR_NUMA_MISPLACED, "numa_misplaced") \ - EMe(MR_CONTIG_RANGE, "contig_range") + EM( MR_CONTIG_RANGE, "contig_range") \ + EMe(MR_LONGTERM_PIN, "longterm_pin") /* * First define the enums in the above macros to be exported to userspace @@ -81,6 +82,28 @@ TRACE_EVENT(mm_migrate_pages, __print_symbolic(__entry->mode, MIGRATE_MODE), __print_symbolic(__entry->reason, MIGRATE_REASON)) ); + +TRACE_EVENT(mm_migrate_pages_start, + + TP_PROTO(enum migrate_mode mode, int reason), + + TP_ARGS(mode, reason), + + TP_STRUCT__entry( + __field(enum migrate_mode, mode) + __field(int, reason) + ), + + TP_fast_assign( + __entry->mode = mode; + __entry->reason = reason; + ), + + TP_printk("mode=%s reason=%s", + __print_symbolic(__entry->mode, MIGRATE_MODE), + __print_symbolic(__entry->reason, MIGRATE_REASON)) +); + #endif /* _TRACE_MIGRATE_H */ /* This part must be outside protection */ diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 67018d367b9f..629c7a0eaff2 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -137,6 +137,12 @@ IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) #define IF_HAVE_VM_SOFTDIRTY(flag,name) #endif +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR +# define IF_HAVE_UFFD_MINOR(flag, name) {flag, name}, +#else +# define IF_HAVE_UFFD_MINOR(flag, name) +#endif + #define __def_vmaflag_names \ {VM_READ, "read" }, \ {VM_WRITE, "write" }, \ @@ -148,6 +154,7 @@ IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) {VM_MAYSHARE, "mayshare" }, \ {VM_GROWSDOWN, "growsdown" }, \ {VM_UFFD_MISSING, "uffd_missing" }, \ +IF_HAVE_UFFD_MINOR(VM_UFFD_MINOR, "uffd_minor" ) \ {VM_PFNMAP, "pfnmap" }, \ {VM_DENYWRITE, "denywrite" }, \ {VM_UFFD_WP, "uffd_wp" }, \ diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index c7711e9b6900..6768b64bc738 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -48,7 +48,7 @@ TRACE_EVENT(rcu_utilization, * RCU flavor, the grace-period number, and a string identifying the * grace-period-related event as follows: * - * "AccReadyCB": CPU acclerates new callbacks to RCU_NEXT_READY_TAIL. + * "AccReadyCB": CPU accelerates new callbacks to RCU_NEXT_READY_TAIL. * "AccWaitCB": CPU accelerates new callbacks to RCU_WAIT_TAIL. * "newreq": Request a new grace period. * "start": Start a grace period. diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index c838e7ac1c2d..bd55908c1bef 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -60,6 +60,46 @@ DECLARE_EVENT_CLASS(rpcrdma_completion_class, ), \ TP_ARGS(wc, cid)) +DECLARE_EVENT_CLASS(rpcrdma_mr_completion_class, + TP_PROTO( + const struct ib_wc *wc, + const struct rpc_rdma_cid *cid + ), + + TP_ARGS(wc, cid), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) + __field(unsigned long, status) + __field(unsigned int, vendor_err) + ), + + TP_fast_assign( + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; + __entry->status = wc->status; + if (wc->status) + __entry->vendor_err = wc->vendor_err; + else + __entry->vendor_err = 0; + ), + + TP_printk("cq.id=%u mr.id=%d status=%s (%lu/0x%x)", + __entry->cq_id, __entry->completion_id, + rdma_show_wc_status(__entry->status), + __entry->status, __entry->vendor_err + ) +); + +#define DEFINE_MR_COMPLETION_EVENT(name) \ + DEFINE_EVENT(rpcrdma_mr_completion_class, name, \ + TP_PROTO( \ + const struct ib_wc *wc, \ + const struct rpc_rdma_cid *cid \ + ), \ + TP_ARGS(wc, cid)) + DECLARE_EVENT_CLASS(rpcrdma_receive_completion_class, TP_PROTO( const struct ib_wc *wc, @@ -150,19 +190,17 @@ DECLARE_EVENT_CLASS(xprtrdma_rxprt, TP_ARGS(r_xprt), TP_STRUCT__entry( - __field(const void *, r_xprt) __string(addr, rpcrdma_addrstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt)) ), TP_fast_assign( - __entry->r_xprt = r_xprt; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p", - __get_str(addr), __get_str(port), __entry->r_xprt + TP_printk("peer=[%s]:%s", + __get_str(addr), __get_str(port) ) ); @@ -182,7 +220,6 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class, TP_ARGS(r_xprt, rc), TP_STRUCT__entry( - __field(const void *, r_xprt) __field(int, rc) __field(int, connect_status) __string(addr, rpcrdma_addrstr(r_xprt)) @@ -190,15 +227,14 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class, ), TP_fast_assign( - __entry->r_xprt = r_xprt; __entry->rc = rc; __entry->connect_status = r_xprt->rx_ep->re_connect_status; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connection status=%d", - __get_str(addr), __get_str(port), __entry->r_xprt, + TP_printk("peer=[%s]:%s rc=%d connection status=%d", + __get_str(addr), __get_str(port), __entry->rc, __entry->connect_status ) ); @@ -343,7 +379,7 @@ DECLARE_EVENT_CLASS(xprtrdma_mr_class, __entry->task_id = task->tk_pid; __entry->client_id = task->tk_client->cl_clid; - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->nents = mr->mr_nents; __entry->handle = mr->mr_handle; __entry->length = mr->mr_length; @@ -384,7 +420,7 @@ DECLARE_EVENT_CLASS(xprtrdma_anonymous_mr_class, ), TP_fast_assign( - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->nents = mr->mr_nents; __entry->handle = mr->mr_handle; __entry->length = mr->mr_length; @@ -495,22 +531,19 @@ TRACE_EVENT(xprtrdma_op_connect, TP_ARGS(r_xprt, delay), TP_STRUCT__entry( - __field(const void *, r_xprt) __field(unsigned long, delay) __string(addr, rpcrdma_addrstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt)) ), TP_fast_assign( - __entry->r_xprt = r_xprt; __entry->delay = delay; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p delay=%lu", - __get_str(addr), __get_str(port), __entry->r_xprt, - __entry->delay + TP_printk("peer=[%s]:%s delay=%lu", + __get_str(addr), __get_str(port), __entry->delay ) ); @@ -525,7 +558,6 @@ TRACE_EVENT(xprtrdma_op_set_cto, TP_ARGS(r_xprt, connect, reconnect), TP_STRUCT__entry( - __field(const void *, r_xprt) __field(unsigned long, connect) __field(unsigned long, reconnect) __string(addr, rpcrdma_addrstr(r_xprt)) @@ -533,51 +565,18 @@ TRACE_EVENT(xprtrdma_op_set_cto, ), TP_fast_assign( - __entry->r_xprt = r_xprt; __entry->connect = connect; __entry->reconnect = reconnect; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: connect=%lu reconnect=%lu", - __get_str(addr), __get_str(port), __entry->r_xprt, + TP_printk("peer=[%s]:%s connect=%lu reconnect=%lu", + __get_str(addr), __get_str(port), __entry->connect / HZ, __entry->reconnect / HZ ) ); -TRACE_EVENT(xprtrdma_qp_event, - TP_PROTO( - const struct rpcrdma_ep *ep, - const struct ib_event *event - ), - - TP_ARGS(ep, event), - - TP_STRUCT__entry( - __field(unsigned long, event) - __string(name, event->device->name) - __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) - __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) - ), - - TP_fast_assign( - const struct rdma_cm_id *id = ep->re_id; - - __entry->event = event->event; - __assign_str(name, event->device->name); - memcpy(__entry->srcaddr, &id->route.addr.src_addr, - sizeof(struct sockaddr_in6)); - memcpy(__entry->dstaddr, &id->route.addr.dst_addr, - sizeof(struct sockaddr_in6)); - ), - - TP_printk("%pISpc -> %pISpc device=%s %s (%lu)", - __entry->srcaddr, __entry->dstaddr, __get_str(name), - rdma_show_ib_event(__entry->event), __entry->event - ) -); - /** ** Call events **/ @@ -591,22 +590,19 @@ TRACE_EVENT(xprtrdma_createmrs, TP_ARGS(r_xprt, count), TP_STRUCT__entry( - __field(const void *, r_xprt) __string(addr, rpcrdma_addrstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt)) __field(unsigned int, count) ), TP_fast_assign( - __entry->r_xprt = r_xprt; __entry->count = count; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: created %u MRs", - __get_str(addr), __get_str(port), __entry->r_xprt, - __entry->count + TP_printk("peer=[%s]:%s created %u MRs", + __get_str(addr), __get_str(port), __entry->count ) ); @@ -829,7 +825,7 @@ TRACE_EVENT(xprtrdma_post_recvs, TP_ARGS(r_xprt, count, status), TP_STRUCT__entry( - __field(const void *, r_xprt) + __field(u32, cq_id) __field(unsigned int, count) __field(int, status) __field(int, posted) @@ -838,16 +834,18 @@ TRACE_EVENT(xprtrdma_post_recvs, ), TP_fast_assign( - __entry->r_xprt = r_xprt; + const struct rpcrdma_ep *ep = r_xprt->rx_ep; + + __entry->cq_id = ep->re_attr.recv_cq->res.id; __entry->count = count; __entry->status = status; - __entry->posted = r_xprt->rx_ep->re_receive_count; + __entry->posted = ep->re_receive_count; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)", - __get_str(addr), __get_str(port), __entry->r_xprt, + TP_printk("peer=[%s]:%s cq.id=%d %u new recvs, %d active (rc %d)", + __get_str(addr), __get_str(port), __entry->cq_id, __entry->count, __entry->posted, __entry->status ) ); @@ -886,10 +884,10 @@ TRACE_EVENT(xprtrdma_post_linv_err, DEFINE_RECEIVE_COMPLETION_EVENT(xprtrdma_wc_receive); DEFINE_COMPLETION_EVENT(xprtrdma_wc_send); -DEFINE_COMPLETION_EVENT(xprtrdma_wc_fastreg); -DEFINE_COMPLETION_EVENT(xprtrdma_wc_li); -DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_wake); -DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_done); +DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_fastreg); +DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li); +DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_wake); +DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_done); TRACE_EVENT(xprtrdma_frwr_alloc, TP_PROTO( @@ -905,7 +903,7 @@ TRACE_EVENT(xprtrdma_frwr_alloc, ), TP_fast_assign( - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->rc = rc; ), @@ -933,7 +931,7 @@ TRACE_EVENT(xprtrdma_frwr_dereg, ), TP_fast_assign( - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->nents = mr->mr_nents; __entry->handle = mr->mr_handle; __entry->length = mr->mr_length; @@ -966,7 +964,7 @@ TRACE_EVENT(xprtrdma_frwr_sgerr, ), TP_fast_assign( - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->addr = mr->mr_sg->dma_address; __entry->dir = mr->mr_dir; __entry->nents = sg_nents; @@ -996,7 +994,7 @@ TRACE_EVENT(xprtrdma_frwr_maperr, ), TP_fast_assign( - __entry->mr_id = mr->frwr.fr_mr->res.id; + __entry->mr_id = mr->mr_ibmr->res.id; __entry->addr = mr->mr_sg->dma_address; __entry->dir = mr->mr_dir; __entry->num_mapped = num_mapped; @@ -1010,11 +1008,12 @@ TRACE_EVENT(xprtrdma_frwr_maperr, ) ); +DEFINE_MR_EVENT(fastreg); DEFINE_MR_EVENT(localinv); +DEFINE_MR_EVENT(reminv); DEFINE_MR_EVENT(map); DEFINE_ANON_MR_EVENT(unmap); -DEFINE_ANON_MR_EVENT(recycle); TRACE_EVENT(xprtrdma_dma_maperr, TP_PROTO( @@ -1248,22 +1247,19 @@ TRACE_EVENT(xprtrdma_cb_setup, TP_ARGS(r_xprt, reqs), TP_STRUCT__entry( - __field(const void *, r_xprt) __field(unsigned int, reqs) __string(addr, rpcrdma_addrstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt)) ), TP_fast_assign( - __entry->r_xprt = r_xprt; __entry->reqs = reqs; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: %u reqs", - __get_str(addr), __get_str(port), - __entry->r_xprt, __entry->reqs + TP_printk("peer=[%s]:%s %u reqs", + __get_str(addr), __get_str(port), __entry->reqs ) ); diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index cbe3e152d24c..1eca2305ca42 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -174,7 +174,7 @@ DEFINE_EVENT(sched_wakeup_template, sched_waking, TP_ARGS(p)); /* - * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. + * Tracepoint called when the task is actually woken; p->state == TASK_RUNNING. * It is not always called from the waking context. */ DEFINE_EVENT(sched_wakeup_template, sched_wakeup, diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index bda16e9e6ba7..d02e01a27b69 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1079,6 +1079,46 @@ TRACE_EVENT(xprt_transmit, __entry->seqno, __entry->status) ); +TRACE_EVENT(xprt_retransmit, + TP_PROTO( + const struct rpc_rqst *rqst + ), + + TP_ARGS(rqst), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(int, ntrans) + __field(int, version) + __string(progname, + rqst->rq_task->tk_client->cl_program->name) + __string(procedure, + rqst->rq_task->tk_msg.rpc_proc->p_name) + ), + + TP_fast_assign( + struct rpc_task *task = rqst->rq_task; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->ntrans = rqst->rq_ntrans; + __assign_str(progname, + task->tk_client->cl_program->name) + __entry->version = task->tk_client->cl_vers; + __assign_str(procedure, task->tk_msg.rpc_proc->p_name) + ), + + TP_printk( + "task:%u@%u xid=0x%08x %sv%d %s ntrans=%d", + __entry->task_id, __entry->client_id, __entry->xid, + __get_str(progname), __entry->version, __get_str(procedure), + __entry->ntrans) +); + TRACE_EVENT(xprt_ping, TP_PROTO(const struct rpc_xprt *xprt, int status), @@ -1141,7 +1181,6 @@ DECLARE_EVENT_CLASS(xprt_writelock_event, DEFINE_WRITELOCK_EVENT(reserve_xprt); DEFINE_WRITELOCK_EVENT(release_xprt); -DEFINE_WRITELOCK_EVENT(transmit_queued); DECLARE_EVENT_CLASS(xprt_cong_event, TP_PROTO( diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 19abb6c3eb73..6ad031c71be7 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -119,7 +119,7 @@ TRACE_EVENT(timer_expire_entry, * When used in combination with the timer_expire_entry tracepoint we can * determine the runtime of the timer callback function. * - * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might + * NOTE: Do NOT dereference timer in TP_fast_assign. The pointer might * be invalid. We solely track the pointer. */ DEFINE_EVENT(timer_class, timer_expire_exit, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 739c839d28fe..6de5a7fc066b 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -866,8 +866,15 @@ __SYSCALL(__NR_mount_setattr, sys_mount_setattr) #define __NR_quotactl_path 443 __SYSCALL(__NR_quotactl_path, sys_quotactl_path) +#define __NR_landlock_create_ruleset 444 +__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) +#define __NR_landlock_add_rule 445 +__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) +#define __NR_landlock_restrict_self 446 +__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) + #undef __NR_syscalls -#define __NR_syscalls 444 +#define __NR_syscalls 447 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index fcff6669137b..e5c6e458bdf7 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -193,8 +193,22 @@ struct dm_name_list { __u32 next; /* offset to the next record from the _start_ of this */ char name[0]; + + /* + * The following members can be accessed by taking a pointer that + * points immediately after the terminating zero character in "name" + * and aligning this pointer to next 8-byte boundary. + * Uuid is present if the flag DM_NAME_LIST_FLAG_HAS_UUID is set. + * + * __u32 event_nr; + * __u32 flags; + * char uuid[0]; + */ }; +#define DM_NAME_LIST_FLAG_HAS_UUID 1 +#define DM_NAME_LIST_FLAG_DOESNT_HAVE_UUID 2 + /* * Used to retrieve the target versions */ @@ -272,9 +286,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 44 +#define DM_VERSION_MINOR 45 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2021-02-01)" +#define DM_VERSION_EXTRA "-ioctl (2021-03-22)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h index e8eb4ad03cf1..d174914a837d 100644 --- a/include/uapi/linux/if_bonding.h +++ b/include/uapi/linux/if_bonding.h @@ -153,14 +153,3 @@ enum { #define BOND_3AD_STAT_MAX (__BOND_3AD_STAT_MAX - 1) #endif /* _LINUX_IF_BONDING_H */ - -/* - * Local variables: - * version-control: t - * kept-new-versions: 5 - * c-indent-level: 8 - * c-basic-offset: 8 - * tab-width: 8 - * End: - */ - diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index e1d9e75f2c94..59178fc229ca 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -288,7 +288,8 @@ struct iommu_gpasid_bind_data_vtd { #define IOMMU_SVA_VTD_GPASID_PWT (1 << 3) /* page-level write through */ #define IOMMU_SVA_VTD_GPASID_EMTE (1 << 4) /* extended mem type enable */ #define IOMMU_SVA_VTD_GPASID_CD (1 << 5) /* PASID-level cache disable */ -#define IOMMU_SVA_VTD_GPASID_LAST (1 << 6) +#define IOMMU_SVA_VTD_GPASID_WPE (1 << 6) /* Write protect enable */ +#define IOMMU_SVA_VTD_GPASID_LAST (1 << 7) __u64 flags; __u32 pat; __u32 emt; diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index 05669c87a0af..778dc191c265 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -42,6 +42,7 @@ #define KEXEC_ARCH_MIPS_LE (10 << 16) #define KEXEC_ARCH_MIPS ( 8 << 16) #define KEXEC_ARCH_AARCH64 (183 << 16) +#define KEXEC_ARCH_RISCV (243 << 16) /* The artificial cap on the number of segments passed to kexec_load. */ #define KEXEC_SEGMENT_MAX 16 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f6afee209620..3fd9a7e9d90c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1078,6 +1078,10 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_DIRTY_LOG_RING 192 #define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_PPC_DAWR1 194 +#define KVM_CAP_SET_GUEST_DEBUG2 195 +#define KVM_CAP_SGX_ATTRIBUTE 196 +#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 +#define KVM_CAP_PTP_KVM 198 #ifdef KVM_CAP_IRQ_ROUTING @@ -1671,6 +1675,8 @@ enum sev_cmd_id { KVM_SEV_CERT_EXPORT, /* Attestation report */ KVM_SEV_GET_ATTESTATION_REPORT, + /* Guest Migration Extension */ + KVM_SEV_SEND_CANCEL, KVM_SEV_NR_MAX, }; @@ -1729,6 +1735,45 @@ struct kvm_sev_attestation_report { __u32 len; }; +struct kvm_sev_send_start { + __u32 policy; + __u64 pdh_cert_uaddr; + __u32 pdh_cert_len; + __u64 plat_certs_uaddr; + __u32 plat_certs_len; + __u64 amd_certs_uaddr; + __u32 amd_certs_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_send_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +struct kvm_sev_receive_start { + __u32 handle; + __u32 policy; + __u64 pdh_uaddr; + __u32 pdh_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_receive_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h new file mode 100644 index 000000000000..b3d952067f59 --- /dev/null +++ b/include/uapi/linux/landlock.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Landlock - User space API + * + * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> + * Copyright © 2018-2020 ANSSI + */ + +#ifndef _UAPI_LINUX_LANDLOCK_H +#define _UAPI_LINUX_LANDLOCK_H + +#include <linux/types.h> + +/** + * struct landlock_ruleset_attr - Ruleset definition + * + * Argument of sys_landlock_create_ruleset(). This structure can grow in + * future versions. + */ +struct landlock_ruleset_attr { + /** + * @handled_access_fs: Bitmask of actions (cf. `Filesystem flags`_) + * that is handled by this ruleset and should then be forbidden if no + * rule explicitly allow them. This is needed for backward + * compatibility reasons. + */ + __u64 handled_access_fs; +}; + +/* + * sys_landlock_create_ruleset() flags: + * + * - %LANDLOCK_CREATE_RULESET_VERSION: Get the highest supported Landlock ABI + * version. + */ +#define LANDLOCK_CREATE_RULESET_VERSION (1U << 0) + +/** + * enum landlock_rule_type - Landlock rule type + * + * Argument of sys_landlock_add_rule(). + */ +enum landlock_rule_type { + /** + * @LANDLOCK_RULE_PATH_BENEATH: Type of a &struct + * landlock_path_beneath_attr . + */ + LANDLOCK_RULE_PATH_BENEATH = 1, +}; + +/** + * struct landlock_path_beneath_attr - Path hierarchy definition + * + * Argument of sys_landlock_add_rule(). + */ +struct landlock_path_beneath_attr { + /** + * @allowed_access: Bitmask of allowed actions for this file hierarchy + * (cf. `Filesystem flags`_). + */ + __u64 allowed_access; + /** + * @parent_fd: File descriptor, open with ``O_PATH``, which identifies + * the parent directory of a file hierarchy, or just a file. + */ + __s32 parent_fd; + /* + * This struct is packed to avoid trailing reserved members. + * Cf. security/landlock/syscalls.c:build_check_abi() + */ +} __attribute__((packed)); + +/** + * DOC: fs_access + * + * A set of actions on kernel objects may be defined by an attribute (e.g. + * &struct landlock_path_beneath_attr) including a bitmask of access. + * + * Filesystem flags + * ~~~~~~~~~~~~~~~~ + * + * These flags enable to restrict a sandboxed process to a set of actions on + * files and directories. Files or directories opened before the sandboxing + * are not subject to these restrictions. + * + * A file can only receive these access rights: + * + * - %LANDLOCK_ACCESS_FS_EXECUTE: Execute a file. + * - %LANDLOCK_ACCESS_FS_WRITE_FILE: Open a file with write access. + * - %LANDLOCK_ACCESS_FS_READ_FILE: Open a file with read access. + * + * A directory can receive access rights related to files or directories. The + * following access right is applied to the directory itself, and the + * directories beneath it: + * + * - %LANDLOCK_ACCESS_FS_READ_DIR: Open a directory or list its content. + * + * However, the following access rights only apply to the content of a + * directory, not the directory itself: + * + * - %LANDLOCK_ACCESS_FS_REMOVE_DIR: Remove an empty directory or rename one. + * - %LANDLOCK_ACCESS_FS_REMOVE_FILE: Unlink (or rename) a file. + * - %LANDLOCK_ACCESS_FS_MAKE_CHAR: Create (or rename or link) a character + * device. + * - %LANDLOCK_ACCESS_FS_MAKE_DIR: Create (or rename) a directory. + * - %LANDLOCK_ACCESS_FS_MAKE_REG: Create (or rename or link) a regular file. + * - %LANDLOCK_ACCESS_FS_MAKE_SOCK: Create (or rename or link) a UNIX domain + * socket. + * - %LANDLOCK_ACCESS_FS_MAKE_FIFO: Create (or rename or link) a named pipe. + * - %LANDLOCK_ACCESS_FS_MAKE_BLOCK: Create (or rename or link) a block device. + * - %LANDLOCK_ACCESS_FS_MAKE_SYM: Create (or rename or link) a symbolic link. + * + * .. warning:: + * + * It is currently not possible to restrict some file-related actions + * accessible through these syscall families: :manpage:`chdir(2)`, + * :manpage:`truncate(2)`, :manpage:`stat(2)`, :manpage:`flock(2)`, + * :manpage:`chmod(2)`, :manpage:`chown(2)`, :manpage:`setxattr(2)`, + * :manpage:`utime(2)`, :manpage:`ioctl(2)`, :manpage:`fcntl(2)`, + * :manpage:`access(2)`. + * Future Landlock evolutions will enable to restrict them. + */ +#define LANDLOCK_ACCESS_FS_EXECUTE (1ULL << 0) +#define LANDLOCK_ACCESS_FS_WRITE_FILE (1ULL << 1) +#define LANDLOCK_ACCESS_FS_READ_FILE (1ULL << 2) +#define LANDLOCK_ACCESS_FS_READ_DIR (1ULL << 3) +#define LANDLOCK_ACCESS_FS_REMOVE_DIR (1ULL << 4) +#define LANDLOCK_ACCESS_FS_REMOVE_FILE (1ULL << 5) +#define LANDLOCK_ACCESS_FS_MAKE_CHAR (1ULL << 6) +#define LANDLOCK_ACCESS_FS_MAKE_DIR (1ULL << 7) +#define LANDLOCK_ACCESS_FS_MAKE_REG (1ULL << 8) +#define LANDLOCK_ACCESS_FS_MAKE_SOCK (1ULL << 9) +#define LANDLOCK_ACCESS_FS_MAKE_FIFO (1ULL << 10) +#define LANDLOCK_ACCESS_FS_MAKE_BLOCK (1ULL << 11) +#define LANDLOCK_ACCESS_FS_MAKE_SYM (1ULL << 12) + +#endif /* _UAPI_LINUX_LANDLOCK_H */ diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 8948467b3992..4832fd0b5642 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -64,5 +64,12 @@ enum { #define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */ #define MPOL_F_MORON (1 << 4) /* Migrate On protnone Reference On Node */ +/* + * These bit locations are exposed in the vm.zone_reclaim_mode sysctl + * ABI. New bits are OK, but existing bits can never change. + */ +#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */ +#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ +#define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */ #endif /* _UAPI_LINUX_MEMPOLICY_H */ diff --git a/include/uapi/linux/netfilter/xt_SECMARK.h b/include/uapi/linux/netfilter/xt_SECMARK.h index 1f2a708413f5..beb2cadba8a9 100644 --- a/include/uapi/linux/netfilter/xt_SECMARK.h +++ b/include/uapi/linux/netfilter/xt_SECMARK.h @@ -20,4 +20,10 @@ struct xt_secmark_target_info { char secctx[SECMARK_SECCTX_MAX]; }; +struct xt_secmark_target_info_v1 { + __u8 mode; + char secctx[SECMARK_SECCTX_MAX]; + __u32 secid; +}; + #endif /*_XT_SECMARK_H_target */ diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index ed5415e0f1c1..800bb0ffa6e6 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -178,9 +178,3 @@ #define NFS4_MAX_BACK_CHANNEL_OPS 2 #endif /* _UAPI_LINUX_NFS4_H */ - -/* - * Local variables: - * c-basic-offset: 8 - * End: - */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e54e639248c8..bf8143505c49 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1182,10 +1182,15 @@ enum perf_callchain_context { /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ + +/* CoreSight PMU AUX buffer formats */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) diff --git a/include/uapi/linux/rpmsg.h b/include/uapi/linux/rpmsg.h index e14c6dab4223..f5ca8740f3fb 100644 --- a/include/uapi/linux/rpmsg.h +++ b/include/uapi/linux/rpmsg.h @@ -9,11 +9,13 @@ #include <linux/ioctl.h> #include <linux/types.h> +#define RPMSG_ADDR_ANY 0xFFFFFFFF + /** * struct rpmsg_endpoint_info - endpoint info representation * @name: name of service - * @src: local address - * @dst: destination address + * @src: local address. To set to RPMSG_ADDR_ANY if not used. + * @dst: destination address. To set to RPMSG_ADDR_ANY if not used. */ struct rpmsg_endpoint_info { char name[32]; @@ -21,7 +23,14 @@ struct rpmsg_endpoint_info { __u32 dst; }; +/** + * Instantiate a new rmpsg char device endpoint. + */ #define RPMSG_CREATE_EPT_IOCTL _IOW(0xb5, 0x1, struct rpmsg_endpoint_info) + +/** + * Destroy a rpmsg char device endpoint created by the RPMSG_CREATE_EPT_IOCTL. + */ #define RPMSG_DESTROY_EPT_IOCTL _IO(0xb5, 0x2) #endif diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h index 3b39ef1dbb46..5ae3ace84de0 100644 --- a/include/uapi/linux/seg6_local.h +++ b/include/uapi/linux/seg6_local.h @@ -27,6 +27,7 @@ enum { SEG6_LOCAL_OIF, SEG6_LOCAL_BPF, SEG6_LOCAL_VRFTABLE, + SEG6_LOCAL_COUNTERS, __SEG6_LOCAL_MAX, }; #define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1) @@ -78,4 +79,33 @@ enum { #define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1) +/* SRv6 Behavior counters are encoded as netlink attributes guaranteeing the + * correct alignment. + * Each counter is identified by a different attribute type (i.e. + * SEG6_LOCAL_CNT_PACKETS). + * + * - SEG6_LOCAL_CNT_PACKETS: identifies a counter that counts the number of + * packets that have been CORRECTLY processed by an SRv6 Behavior instance + * (i.e., packets that generate errors or are dropped are NOT counted). + * + * - SEG6_LOCAL_CNT_BYTES: identifies a counter that counts the total amount + * of traffic in bytes of all packets that have been CORRECTLY processed by + * an SRv6 Behavior instance (i.e., packets that generate errors or are + * dropped are NOT counted). + * + * - SEG6_LOCAL_CNT_ERRORS: identifies a counter that counts the number of + * packets that have NOT been properly processed by an SRv6 Behavior instance + * (i.e., packets that generate errors or are dropped). + */ +enum { + SEG6_LOCAL_CNT_UNSPEC, + SEG6_LOCAL_CNT_PAD, /* pad for 64 bits values */ + SEG6_LOCAL_CNT_PACKETS, + SEG6_LOCAL_CNT_BYTES, + SEG6_LOCAL_CNT_ERRORS, + __SEG6_LOCAL_CNT_MAX, +}; + +#define SEG6_LOCAL_CNT_MAX (__SEG6_LOCAL_CNT_MAX - 1) + #endif diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index c105054cbb57..9aa2fedfa309 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -60,7 +60,7 @@ enum thermal_genl_event { THERMAL_GENL_EVENT_UNSPEC, THERMAL_GENL_EVENT_TZ_CREATE, /* Thermal zone creation */ THERMAL_GENL_EVENT_TZ_DELETE, /* Thermal zone deletion */ - THERMAL_GENL_EVENT_TZ_DISABLE, /* Thermal zone disabed */ + THERMAL_GENL_EVENT_TZ_DISABLE, /* Thermal zone disabled */ THERMAL_GENL_EVENT_TZ_ENABLE, /* Thermal zone enabled */ THERMAL_GENL_EVENT_TZ_TRIP_UP, /* Trip point crossed the way up */ THERMAL_GENL_EVENT_TZ_TRIP_DOWN, /* Trip point crossed the way down */ diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 5f2d88212f7c..bafbeb1a2624 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -19,15 +19,19 @@ * means the userland is reading). */ #define UFFD_API ((__u64)0xAA) +#define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \ + UFFDIO_REGISTER_MODE_WP | \ + UFFDIO_REGISTER_MODE_MINOR) #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ UFFD_FEATURE_EVENT_FORK | \ UFFD_FEATURE_EVENT_REMAP | \ - UFFD_FEATURE_EVENT_REMOVE | \ + UFFD_FEATURE_EVENT_REMOVE | \ UFFD_FEATURE_EVENT_UNMAP | \ UFFD_FEATURE_MISSING_HUGETLBFS | \ UFFD_FEATURE_MISSING_SHMEM | \ UFFD_FEATURE_SIGBUS | \ - UFFD_FEATURE_THREAD_ID) + UFFD_FEATURE_THREAD_ID | \ + UFFD_FEATURE_MINOR_HUGETLBFS) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -36,10 +40,12 @@ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY | \ (__u64)1 << _UFFDIO_ZEROPAGE | \ - (__u64)1 << _UFFDIO_WRITEPROTECT) + (__u64)1 << _UFFDIO_WRITEPROTECT | \ + (__u64)1 << _UFFDIO_CONTINUE) #define UFFD_API_RANGE_IOCTLS_BASIC \ ((__u64)1 << _UFFDIO_WAKE | \ - (__u64)1 << _UFFDIO_COPY) + (__u64)1 << _UFFDIO_COPY | \ + (__u64)1 << _UFFDIO_CONTINUE) /* * Valid ioctl command number range with this API is from 0x00 to @@ -55,6 +61,7 @@ #define _UFFDIO_COPY (0x03) #define _UFFDIO_ZEROPAGE (0x04) #define _UFFDIO_WRITEPROTECT (0x06) +#define _UFFDIO_CONTINUE (0x07) #define _UFFDIO_API (0x3F) /* userfaultfd ioctl ids */ @@ -73,6 +80,8 @@ struct uffdio_zeropage) #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ struct uffdio_writeprotect) +#define UFFDIO_CONTINUE _IOR(UFFDIO, _UFFDIO_CONTINUE, \ + struct uffdio_continue) /* read() structure */ struct uffd_msg { @@ -127,6 +136,7 @@ struct uffd_msg { /* flags for UFFD_EVENT_PAGEFAULT */ #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ #define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */ +#define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */ struct uffdio_api { /* userland asks for an API number and the features to enable */ @@ -171,6 +181,10 @@ struct uffdio_api { * * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will * be returned, if feature is not requested 0 will be returned. + * + * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults + * can be intercepted (via REGISTER_MODE_MINOR) for + * hugetlbfs-backed pages. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -181,6 +195,7 @@ struct uffdio_api { #define UFFD_FEATURE_EVENT_UNMAP (1<<6) #define UFFD_FEATURE_SIGBUS (1<<7) #define UFFD_FEATURE_THREAD_ID (1<<8) +#define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) __u64 features; __u64 ioctls; @@ -195,6 +210,7 @@ struct uffdio_register { struct uffdio_range range; #define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0) #define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1) +#define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2) __u64 mode; /* @@ -257,6 +273,18 @@ struct uffdio_writeprotect { __u64 mode; }; +struct uffdio_continue { + struct uffdio_range range; +#define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0) + __u64 mode; + + /* + * Fields below here are written by the ioctl and must be at the end: + * the copy_from_user will not read past here. + */ + __s64 mapped; +}; + /* * Flags for the userfaultfd(2) system call itself. */ diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 34b1f53a3901..ef33ea002b0b 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -333,10 +333,21 @@ struct vfio_region_info_cap_type { #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3) /* 10de vendor PCI sub-types */ -/* subtype 1 was VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM, don't use */ +/* + * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. + * + * Deprecated, region no longer provided + */ +#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) /* 1014 vendor PCI sub-types */ -/* subtype 1 was VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD, don't use */ +/* + * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU + * to do TLB invalidation on a GPU. + * + * Deprecated, region no longer provided + */ +#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) /* sub-types for VFIO_REGION_TYPE_GFX */ #define VFIO_REGION_SUBTYPE_GFX_EDID (1) @@ -630,9 +641,36 @@ struct vfio_device_migration_info { */ #define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3 -/* subtype 4 was VFIO_REGION_INFO_CAP_NVLINK2_SSATGT, don't use */ +/* + * Capability with compressed real address (aka SSA - small system address) + * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing + * and by the userspace to associate a NVLink bridge with a GPU. + * + * Deprecated, capability no longer provided + */ +#define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT 4 + +struct vfio_region_info_cap_nvlink2_ssatgt { + struct vfio_info_cap_header header; + __u64 tgt; +}; -/* subtype 5 was VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD, don't use */ +/* + * Capability with an NVLink link speed. The value is read by + * the NVlink2 bridge driver from the bridge's "ibm,nvlink-speed" + * property in the device tree. The value is fixed in the hardware + * and failing to provide the correct value results in the link + * not working with no indication from the driver why. + * + * Deprecated, capability no longer provided + */ +#define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD 5 + +struct vfio_region_info_cap_nvlink2_lnkspd { + struct vfio_info_cap_header header; + __u32 link_speed; + __u32 __pad; +}; /** * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9, diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 90b739d05adf..42b177655560 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -86,6 +86,8 @@ struct hns_roce_ib_create_qp_resp { struct hns_roce_ib_alloc_ucontext_resp { __u32 qp_tab_size; __u32 cqe_size; + __u32 srq_tab_size; + __u32 reserved; }; struct hns_roce_ib_alloc_pd_resp { diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 3fd9b380a091..ca2372864b70 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -41,6 +41,25 @@ enum mlx5_ib_create_flow_action_attrs { MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS = (1U << UVERBS_ID_NS_SHIFT), }; +enum mlx5_ib_dm_methods { + MLX5_IB_METHOD_DM_MAP_OP_ADDR = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_METHOD_DM_QUERY, +}; + +enum mlx5_ib_dm_map_op_addr_attrs { + MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_OP, + MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_START_OFFSET, + MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_PAGE_INDEX, +}; + +enum mlx5_ib_query_dm_attrs { + MLX5_IB_ATTR_QUERY_DM_REQ_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_QUERY_DM_RESP_START_OFFSET, + MLX5_IB_ATTR_QUERY_DM_RESP_PAGE_INDEX, + MLX5_IB_ATTR_QUERY_DM_RESP_LENGTH, +}; + enum mlx5_ib_alloc_dm_attrs { MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, @@ -154,6 +173,7 @@ enum mlx5_ib_devx_umem_reg_attrs { MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, + MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP, }; enum mlx5_ib_devx_umem_dereg_attrs { @@ -300,4 +320,13 @@ enum mlx5_ib_pd_methods { }; +enum mlx5_ib_device_methods { + MLX5_IB_METHOD_QUERY_PORT = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_query_port_attrs { + MLX5_IB_ATTR_QUERY_PORT_PORT_NUM = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_QUERY_PORT, +}; + #endif diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index 56b26eaea083..a21ca8ece8db 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -83,5 +83,30 @@ enum mlx5_ib_uapi_uar_alloc_type { MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC = 0x1, }; +enum mlx5_ib_uapi_query_port_flags { + MLX5_IB_UAPI_QUERY_PORT_VPORT = 1 << 0, + MLX5_IB_UAPI_QUERY_PORT_VPORT_VHCA_ID = 1 << 1, + MLX5_IB_UAPI_QUERY_PORT_VPORT_STEERING_ICM_RX = 1 << 2, + MLX5_IB_UAPI_QUERY_PORT_VPORT_STEERING_ICM_TX = 1 << 3, + MLX5_IB_UAPI_QUERY_PORT_VPORT_REG_C0 = 1 << 4, + MLX5_IB_UAPI_QUERY_PORT_ESW_OWNER_VHCA_ID = 1 << 5, +}; + +struct mlx5_ib_uapi_reg { + __u32 value; + __u32 mask; +}; + +struct mlx5_ib_uapi_query_port { + __aligned_u64 flags; + __u16 vport; + __u16 vport_vhca_id; + __u16 esw_owner_vhca_id; + __u16 rsvd0; + __aligned_u64 vport_steering_icm_rx; + __aligned_u64 vport_steering_icm_tx; + struct mlx5_ib_uapi_reg reg_c0; +}; + #endif diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index d2f5b8396243..75a1ae2311d8 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -293,6 +293,10 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_RES_MR_GET_RAW, + RDMA_NLDEV_CMD_RES_CTX_GET, /* can dump */ + + RDMA_NLDEV_CMD_RES_SRQ_GET, /* can dump */ + RDMA_NLDEV_NUM_OPS }; @@ -533,6 +537,18 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_RES_RAW, /* binary */ + RDMA_NLDEV_ATTR_RES_CTX, /* nested table */ + RDMA_NLDEV_ATTR_RES_CTX_ENTRY, /* nested table */ + + RDMA_NLDEV_ATTR_RES_SRQ, /* nested table */ + RDMA_NLDEV_ATTR_RES_SRQ_ENTRY, /* nested table */ + RDMA_NLDEV_ATTR_RES_SRQN, /* u32 */ + + RDMA_NLDEV_ATTR_MIN_RANGE, /* u32 */ + RDMA_NLDEV_ATTR_MAX_RANGE, /* u32 */ + + RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, /* u8 */ + /* * Always the end */ diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h index 9e9f9bf7c66d..449bd383cb76 100644 --- a/include/xen/interface/elfnote.h +++ b/include/xen/interface/elfnote.h @@ -208,13 +208,3 @@ #define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY #endif /* __XEN_PUBLIC_ELFNOTE_H__ */ - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/include/xen/interface/hvm/hvm_vcpu.h b/include/xen/interface/hvm/hvm_vcpu.h index 32ca83edd44d..bfc2138e0bf5 100644 --- a/include/xen/interface/hvm/hvm_vcpu.h +++ b/include/xen/interface/hvm/hvm_vcpu.h @@ -131,13 +131,3 @@ struct vcpu_hvm_context { typedef struct vcpu_hvm_context vcpu_hvm_context_t; #endif /* __XEN_PUBLIC_HVM_HVM_VCPU_H__ */ - -/* - * Local variables: - * mode: C - * c-file-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/include/xen/interface/io/xenbus.h b/include/xen/interface/io/xenbus.h index aaf2951b1cce..fb8716112251 100644 --- a/include/xen/interface/io/xenbus.h +++ b/include/xen/interface/io/xenbus.h @@ -39,13 +39,3 @@ enum xenbus_state }; #endif /* _XEN_PUBLIC_IO_XENBUS_H */ - -/* - * Local variables: - * c-file-style: "linux" - * indent-tabs-mode: t - * c-indent-level: 8 - * c-basic-offset: 8 - * tab-width: 8 - * End: - */ diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h index dbc4a4b785f6..b3e647f86e3e 100644 --- a/include/xen/swiotlb-xen.h +++ b/include/xen/swiotlb-xen.h @@ -10,7 +10,8 @@ void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle, void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir); -extern int xen_swiotlb_init(int verbose, bool early); +int xen_swiotlb_init(void); +void __init xen_swiotlb_init_early(void); extern const struct dma_map_ops xen_swiotlb_dma_ops; #endif /* __LINUX_SWIOTLB_XEN_H */ |