diff options
Diffstat (limited to 'include/linux')
331 files changed, 7605 insertions, 2900 deletions
diff --git a/include/linux/a.out.h b/include/linux/a.out.h deleted file mode 100644 index 600cf45645c6..000000000000 --- a/include/linux/a.out.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __A_OUT_GNU_H__ -#define __A_OUT_GNU_H__ - -#include <uapi/linux/a.out.h> - -#ifndef __ASSEMBLY__ -#ifdef linux -#include <asm/page.h> -#if defined(__i386__) || defined(__mc68000__) -#else -#ifndef SEGMENT_SIZE -#define SEGMENT_SIZE PAGE_SIZE -#endif -#endif -#endif -#endif /*__ASSEMBLY__ */ -#endif /* __A_OUT_GNU_H__ */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6f64b2f3dc54..3015235d65e3 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -279,14 +279,17 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) { } void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); +#if defined(CONFIG_ARM64) || defined(CONFIG_LOONGARCH) +void acpi_arch_dma_setup(struct device *dev); +#else +static inline void acpi_arch_dma_setup(struct device *dev) { } +#endif + #ifdef CONFIG_ARM64 void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa); -void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size); #else static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } -static inline void -acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) { } #endif int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); @@ -495,7 +498,7 @@ bool acpi_dev_resource_address_space(struct acpi_resource *ares, struct resource_win *win); bool acpi_dev_resource_ext_address_space(struct acpi_resource *ares, struct resource_win *win); -unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable); +unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable, u8 wake_capable); unsigned int acpi_dev_get_irq_type(int triggering, int polarity); bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index, struct resource *res); @@ -506,6 +509,7 @@ int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list, void *preproc_data); int acpi_dev_get_dma_resources(struct acpi_device *adev, struct list_head *list); +int acpi_dev_get_memory_resources(struct acpi_device *adev, struct list_head *list); int acpi_dev_filter_resource_type(struct acpi_resource *ares, unsigned long types); @@ -798,6 +802,11 @@ acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *u return false; } +static inline int acpi_dev_uid_to_integer(struct acpi_device *adev, u64 *integer) +{ + return -ENODEV; +} + static inline struct acpi_device * acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv) { @@ -977,8 +986,7 @@ static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) return DEV_DMA_NOT_SUPPORTED; } -static inline int acpi_dma_get_range(struct device *dev, u64 *dma_addr, - u64 *offset, u64 *size) +static inline int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map) { return -ENODEV; } @@ -1075,6 +1083,7 @@ acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state, struct acpi_s2idle_dev_ops { struct list_head list_node; void (*prepare)(void); + void (*check)(void); void (*restore)(void); }; int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg); @@ -1202,7 +1211,8 @@ bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); bool acpi_gpio_get_io_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); -int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index); +int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name, int index, + bool *wake_capable); #else static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio) @@ -1214,16 +1224,28 @@ static inline bool acpi_gpio_get_io_resource(struct acpi_resource *ares, { return false; } -static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, - const char *name, int index) +static inline int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name, + int index, bool *wake_capable) { return -ENXIO; } #endif +static inline int acpi_dev_gpio_irq_wake_get(struct acpi_device *adev, int index, + bool *wake_capable) +{ + return acpi_dev_gpio_irq_wake_get_by(adev, NULL, index, wake_capable); +} + +static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, + int index) +{ + return acpi_dev_gpio_irq_wake_get_by(adev, name, index, NULL); +} + static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) { - return acpi_dev_gpio_irq_get_by(adev, NULL, index); + return acpi_dev_gpio_irq_wake_get_by(adev, NULL, index, NULL); } /* Device properties */ diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 49e5383d4222..17fa26215292 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -13,6 +13,7 @@ #include <linux/compiler.h> +struct clk; struct device; struct ata_port_info; struct ahci_host_priv; @@ -21,8 +22,12 @@ struct scsi_host_template; int ahci_platform_enable_phys(struct ahci_host_priv *hpriv); void ahci_platform_disable_phys(struct ahci_host_priv *hpriv); +struct clk *ahci_platform_find_clk(struct ahci_host_priv *hpriv, + const char *con_id); int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); +int ahci_platform_deassert_rsts(struct ahci_host_priv *hpriv); +int ahci_platform_assert_rsts(struct ahci_host_priv *hpriv); int ahci_platform_enable_regulators(struct ahci_host_priv *hpriv); void ahci_platform_disable_regulators(struct ahci_host_priv *hpriv); int ahci_platform_enable_resources(struct ahci_host_priv *hpriv); @@ -41,6 +46,7 @@ int ahci_platform_resume_host(struct device *dev); int ahci_platform_suspend(struct device *dev); int ahci_platform_resume(struct device *dev); -#define AHCI_PLATFORM_GET_RESETS 0x01 +#define AHCI_PLATFORM_GET_RESETS BIT(0) +#define AHCI_PLATFORM_RST_TRIGGER BIT(1) #endif /* _AHCI_PLATFORM_H */ diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h new file mode 100644 index 000000000000..1c4b8659f171 --- /dev/null +++ b/include/linux/amd-pstate.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/include/linux/amd-pstate.h + * + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * + * Author: Meng Li <li.meng@amd.com> + */ + +#ifndef _LINUX_AMD_PSTATE_H +#define _LINUX_AMD_PSTATE_H + +#include <linux/pm_qos.h> + +/********************************************************************* + * AMD P-state INTERFACE * + *********************************************************************/ +/** + * struct amd_aperf_mperf + * @aperf: actual performance frequency clock count + * @mperf: maximum performance frequency clock count + * @tsc: time stamp counter + */ +struct amd_aperf_mperf { + u64 aperf; + u64 mperf; + u64 tsc; +}; + +/** + * struct amd_cpudata - private CPU data for AMD P-State + * @cpu: CPU number + * @req: constraint request to apply + * @cppc_req_cached: cached performance request hints + * @highest_perf: the maximum performance an individual processor may reach, + * assuming ideal conditions + * @nominal_perf: the maximum sustained performance level of the processor, + * assuming ideal operating conditions + * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power + * savings are achieved + * @lowest_perf: the absolute lowest performance level of the processor + * @max_freq: the frequency that mapped to highest_perf + * @min_freq: the frequency that mapped to lowest_perf + * @nominal_freq: the frequency that mapped to nominal_perf + * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf + * @cur: Difference of Aperf/Mperf/tsc count between last and current sample + * @prev: Last Aperf/Mperf/tsc count value read from register + * @freq: current cpu frequency value + * @boost_supported: check whether the Processor or SBIOS supports boost mode + * + * The amd_cpudata is key private data for each CPU thread in AMD P-State, and + * represents all the attributes and goals that AMD P-State requests at runtime. + */ +struct amd_cpudata { + int cpu; + + struct freq_qos_request req[2]; + u64 cppc_req_cached; + + u32 highest_perf; + u32 nominal_perf; + u32 lowest_nonlinear_perf; + u32 lowest_perf; + + u32 max_freq; + u32 min_freq; + u32 nominal_freq; + u32 lowest_nonlinear_freq; + + struct amd_aperf_mperf cur; + struct amd_aperf_mperf prev; + + u64 freq; + bool boost_supported; +}; + +#endif /* _LINUX_AMD_PSTATE_H */ diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index e5c76c1ef9ed..5f02d2e6b9d9 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -17,6 +17,7 @@ struct ffa_device { bool mode_32bit; uuid_t uuid; struct device dev; + const struct ffa_ops *ops; }; #define to_ffa_dev(d) container_of(d, struct ffa_device, dev) @@ -47,17 +48,18 @@ static inline void *ffa_dev_get_drvdata(struct ffa_device *fdev) } #if IS_REACHABLE(CONFIG_ARM_FFA_TRANSPORT) -struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id); +struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id, + const struct ffa_ops *ops); void ffa_device_unregister(struct ffa_device *ffa_dev); int ffa_driver_register(struct ffa_driver *driver, struct module *owner, const char *mod_name); void ffa_driver_unregister(struct ffa_driver *driver); bool ffa_device_is_valid(struct ffa_device *ffa_dev); -const struct ffa_dev_ops *ffa_dev_ops_get(struct ffa_device *dev); #else static inline -struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id) +struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id, + const struct ffa_ops *ops) { return NULL; } @@ -76,11 +78,6 @@ static inline void ffa_driver_unregister(struct ffa_driver *driver) {} static inline bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; } -static inline -const struct ffa_dev_ops *ffa_dev_ops_get(struct ffa_device *dev) -{ - return NULL; -} #endif /* CONFIG_ARM_FFA_TRANSPORT */ #define ffa_register(driver) \ @@ -109,7 +106,10 @@ struct ffa_partition_info { #define FFA_PARTITION_DIRECT_SEND BIT(1) /* partition can send and receive indirect messages. */ #define FFA_PARTITION_INDIRECT_MSG BIT(2) +/* partition runs in the AArch64 execution state. */ +#define FFA_PARTITION_AARCH64_EXEC BIT(8) u32 properties; + u32 uuid[4]; }; /* For use with FFA_MSG_SEND_DIRECT_{REQ,RESP} which pass data via registers */ @@ -257,18 +257,28 @@ struct ffa_mem_ops_args { struct ffa_mem_region_attributes *attrs; }; -struct ffa_dev_ops { +struct ffa_info_ops { u32 (*api_version_get)(void); int (*partition_info_get)(const char *uuid_str, struct ffa_partition_info *buffer); +}; + +struct ffa_msg_ops { void (*mode_32bit_set)(struct ffa_device *dev); int (*sync_send_receive)(struct ffa_device *dev, struct ffa_send_direct_data *data); +}; + +struct ffa_mem_ops { int (*memory_reclaim)(u64 g_handle, u32 flags); - int (*memory_share)(struct ffa_device *dev, - struct ffa_mem_ops_args *args); - int (*memory_lend)(struct ffa_device *dev, - struct ffa_mem_ops_args *args); + int (*memory_share)(struct ffa_mem_ops_args *args); + int (*memory_lend)(struct ffa_mem_ops_args *args); +}; + +struct ffa_ops { + const struct ffa_info_ops *info_ops; + const struct ffa_msg_ops *msg_ops; + const struct ffa_mem_ops *mem_ops; }; #endif /* _LINUX_ARM_FFA_H */ diff --git a/include/linux/ata.h b/include/linux/ata.h index 21292b5bbb55..e3050e153a71 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -566,6 +566,18 @@ struct ata_bmdma_prd { ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 2))) +#define ata_id_has_devslp(id) \ + ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ + ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ + ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8))) +#define ata_id_has_ncq_autosense(id) \ + ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ + ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ + ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7))) +#define ata_id_has_dipm(id) \ + ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ + ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ + ((id)[ATA_ID_FEATURE_SUPP] & (1 << 3))) #define ata_id_iordy_disable(id) ((id)[ATA_ID_CAPABILITY] & (1 << 10)) #define ata_id_has_iordy(id) ((id)[ATA_ID_CAPABILITY] & (1 << 11)) #define ata_id_u32(id,n) \ @@ -578,9 +590,6 @@ struct ata_bmdma_prd { #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20) #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4)) -#define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)) -#define ata_id_has_ncq_autosense(id) \ - ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7)) static inline bool ata_id_has_hipm(const u16 *id) { @@ -592,17 +601,6 @@ static inline bool ata_id_has_hipm(const u16 *id) return val & (1 << 9); } -static inline bool ata_id_has_dipm(const u16 *id) -{ - u16 val = id[ATA_ID_FEATURE_SUPP]; - - if (val == 0 || val == 0xffff) - return false; - - return val & (1 << 3); -} - - static inline bool ata_id_has_fua(const u16 *id) { if ((id[ATA_ID_CFSSE] & 0xC000) != 0x4000) @@ -771,16 +769,21 @@ static inline bool ata_id_has_read_log_dma_ext(const u16 *id) static inline bool ata_id_has_sense_reporting(const u16 *id) { - if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) + if (!(id[ATA_ID_CFS_ENABLE_2] & BIT(15))) + return false; + if ((id[ATA_ID_COMMAND_SET_3] & (BIT(15) | BIT(14))) != BIT(14)) return false; - return id[ATA_ID_COMMAND_SET_3] & (1 << 6); + return id[ATA_ID_COMMAND_SET_3] & BIT(6); } static inline bool ata_id_sense_reporting_enabled(const u16 *id) { - if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) + if (!ata_id_has_sense_reporting(id)) + return false; + /* ata_id_has_sense_reporting() == true, word 86 must have bit 15 set */ + if ((id[ATA_ID_COMMAND_SET_4] & (BIT(15) | BIT(14))) != BIT(14)) return false; - return id[ATA_ID_COMMAND_SET_4] & (1 << 6); + return id[ATA_ID_COMMAND_SET_4] & BIT(6); } /** diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index e3314f746bfa..2d94c30ed439 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -271,6 +271,7 @@ #define BCMA_CC_SROM_CONTROL_OP_WRDIS 0x40000000 #define BCMA_CC_SROM_CONTROL_OP_WREN 0x60000000 #define BCMA_CC_SROM_CONTROL_OTPSEL 0x00000010 +#define BCMA_CC_SROM_CONTROL_OTP_PRESENT 0x00000020 #define BCMA_CC_SROM_CONTROL_LOCK 0x00000008 #define BCMA_CC_SROM_CONTROL_SIZE_MASK 0x00000006 #define BCMA_CC_SROM_CONTROL_SIZE_1K 0x00000000 diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 3dc20c4f394c..8d51f69f9f5e 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -43,9 +43,6 @@ struct linux_binprm { * original userspace. */ point_of_no_return:1; -#ifdef __alpha__ - unsigned int taso:1; -#endif struct file *executable; /* Executable to pass to the interpreter */ struct file *interpreter; struct file *file; diff --git a/include/linux/bio.h b/include/linux/bio.h index ca22b06700a9..2c5806997bbf 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -509,7 +509,7 @@ static inline void bio_set_dev(struct bio *bio, struct block_device *bdev) { bio_clear_flag(bio, BIO_REMAPPED); if (bio->bi_bdev != bdev) - bio_clear_flag(bio, BIO_THROTTLED); + bio_clear_flag(bio, BIO_BPS_THROTTLED); bio->bi_bdev = bdev; bio_associate_blkg(bio); } diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index f65410a49fda..7d6d73b78147 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -51,6 +51,7 @@ struct device; * bitmap_empty(src, nbits) Are all bits zero in *src? * bitmap_full(src, nbits) Are all bits set in *src? * bitmap_weight(src, nbits) Hamming Weight: number set bits + * bitmap_weight_and(src1, src2, nbits) Hamming Weight of and'ed bitmap * bitmap_set(dst, pos, nbits) Set specified bit area * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area @@ -164,6 +165,8 @@ bool __bitmap_intersects(const unsigned long *bitmap1, bool __bitmap_subset(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); +unsigned int __bitmap_weight_and(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); void __bitmap_set(unsigned long *map, unsigned int start, int len); void __bitmap_clear(unsigned long *map, unsigned int start, int len); @@ -222,7 +225,6 @@ void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int n #else #define bitmap_copy_le bitmap_copy #endif -unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits); int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); @@ -439,6 +441,15 @@ unsigned int bitmap_weight(const unsigned long *src, unsigned int nbits) return __bitmap_weight(src, nbits); } +static __always_inline +unsigned long bitmap_weight_and(const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return hweight_long(*src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)); + return __bitmap_weight_and(src1, src2, nbits); +} + static __always_inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) { diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 3b89c64bcfd8..2ba557e067fe 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -248,6 +248,25 @@ static inline unsigned long __ffs64(u64 word) } /** + * fns - find N'th set bit in a word + * @word: The word to search + * @n: Bit to find + */ +static inline unsigned long fns(unsigned long word, unsigned int n) +{ + unsigned int bit; + + while (word) { + bit = __ffs(word); + if (n-- == 0) + return bit; + __clear_bit(bit, &word); + } + + return BITS_PER_LONG; +} + +/** * assign_bit - Assign value to a bit in memory * @nr: the bit to set * @addr: the address to start counting from @@ -328,10 +347,10 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, const typeof(*(ptr)) mask__ = (mask), bits__ = (bits); \ typeof(*(ptr)) old__, new__; \ \ + old__ = READ_ONCE(*(ptr)); \ do { \ - old__ = READ_ONCE(*(ptr)); \ new__ = (old__ & ~mask__) | bits__; \ - } while (cmpxchg(ptr, old__, new__) != old__); \ + } while (!try_cmpxchg(ptr, &old__, new__)); \ \ old__; \ }) @@ -343,11 +362,12 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, const typeof(*(ptr)) clear__ = (clear), test__ = (test);\ typeof(*(ptr)) old__, new__; \ \ + old__ = READ_ONCE(*(ptr)); \ do { \ - old__ = READ_ONCE(*(ptr)); \ + if (old__ & test__) \ + break; \ new__ = old__ & ~clear__; \ - } while (!(old__ & test__) && \ - cmpxchg(ptr, old__, new__) != old__); \ + } while (!try_cmpxchg(ptr, &old__, new__)); \ \ !(old__ & test__); \ }) diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 9f40dbc65f82..dd5841a42c33 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -18,14 +18,14 @@ struct bio; struct cgroup_subsys_state; -struct request_queue; +struct gendisk; #define FC_APPID_LEN 129 #ifdef CONFIG_BLK_CGROUP extern struct cgroup_subsys_state * const blkcg_root_css; -void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); +void blkcg_schedule_throttle(struct gendisk *disk, bool use_memdelay); void blkcg_maybe_throttle_current(void); bool blk_cgroup_congested(void); void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css); @@ -39,7 +39,6 @@ struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio); static inline void blkcg_maybe_throttle_current(void) { } static inline bool blk_cgroup_congested(void) { return false; } -static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { } static inline struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio) { return NULL; diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h index 0b1f45c62623..ca544e1d3508 100644 --- a/include/linux/blk-mq-pci.h +++ b/include/linux/blk-mq-pci.h @@ -5,7 +5,7 @@ struct blk_mq_queue_map; struct pci_dev; -int blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, - int offset); +void blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, + int offset); #endif /* _LINUX_BLK_MQ_PCI_H */ diff --git a/include/linux/blk-mq-rdma.h b/include/linux/blk-mq-rdma.h index 5cc5f0f36218..53b58c610e76 100644 --- a/include/linux/blk-mq-rdma.h +++ b/include/linux/blk-mq-rdma.h @@ -5,7 +5,7 @@ struct blk_mq_tag_set; struct ib_device; -int blk_mq_rdma_map_queues(struct blk_mq_queue_map *map, +void blk_mq_rdma_map_queues(struct blk_mq_queue_map *map, struct ib_device *dev, int first_vec); #endif /* _LINUX_BLK_MQ_RDMA_H */ diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h index 687ae287e1dc..13226e9b22dd 100644 --- a/include/linux/blk-mq-virtio.h +++ b/include/linux/blk-mq-virtio.h @@ -5,7 +5,7 @@ struct blk_mq_queue_map; struct virtio_device; -int blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, +void blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, struct virtio_device *vdev, int first_vec); #endif /* _LINUX_BLK_MQ_VIRTIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 92294a5fb083..ba18e9bdb799 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -14,7 +14,12 @@ struct blk_flush_queue; #define BLKDEV_MIN_RQ 4 #define BLKDEV_DEFAULT_RQ 128 -typedef void (rq_end_io_fn)(struct request *, blk_status_t); +enum rq_end_io_ret { + RQ_END_IO_NONE, + RQ_END_IO_FREE, +}; + +typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t); /* * request flags */ @@ -268,9 +273,16 @@ static inline void rq_list_move(struct request **src, struct request **dst, rq_list_add(dst, rq); } +/** + * enum blk_eh_timer_return - How the timeout handler should proceed + * @BLK_EH_DONE: The block driver completed the command or will complete it at + * a later time. + * @BLK_EH_RESET_TIMER: Reset the request timer and continue waiting for the + * request to complete. + */ enum blk_eh_timer_return { - BLK_EH_DONE, /* drivers has completed the command */ - BLK_EH_RESET_TIMER, /* reset timer and try again */ + BLK_EH_DONE, + BLK_EH_RESET_TIMER, }; #define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */ @@ -630,7 +642,7 @@ struct blk_mq_ops { * @map_queues: This allows drivers specify their own queue mapping by * overriding the setup-time function that builds the mq_map. */ - int (*map_queues)(struct blk_mq_tag_set *set); + void (*map_queues)(struct blk_mq_tag_set *set); #ifdef CONFIG_BLK_DEBUG_FS /** @@ -841,8 +853,9 @@ static inline bool blk_mq_add_to_batch(struct request *req, struct io_comp_batch *iob, int ioerror, void (*complete)(struct io_comp_batch *)) { - if (!iob || (req->rq_flags & RQF_ELV) || req->end_io || ioerror) + if (!iob || (req->rq_flags & RQF_ELV) || ioerror) return false; + if (!iob->complete) iob->complete = complete; else if (iob->complete != complete) @@ -880,7 +893,7 @@ void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); -int blk_mq_map_queues(struct blk_mq_queue_map *qmap); +void blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); @@ -963,15 +976,17 @@ blk_status_t blk_insert_cloned_request(struct request *rq); struct rq_map_data { struct page **pages; - int page_order; - int nr_entries; unsigned long offset; - int null_mapped; - int from_user; + unsigned short page_order; + unsigned short nr_entries; + bool null_mapped; + bool from_user; }; int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); +int blk_rq_map_user_io(struct request *, struct rq_map_data *, + void __user *, unsigned long, gfp_t, bool, int, bool, int); int blk_rq_map_user_iov(struct request_queue *, struct request *, struct rq_map_data *, const struct iov_iter *, gfp_t); int blk_rq_unmap_user(struct bio *); @@ -980,6 +995,7 @@ int blk_rq_map_kern(struct request_queue *, struct request *, void *, int blk_rq_append_bio(struct request *rq, struct bio *bio); void blk_execute_rq_nowait(struct request *rq, bool at_head); blk_status_t blk_execute_rq(struct request *rq, bool at_head); +bool blk_rq_is_poll(struct request *rq); struct req_iterator { struct bvec_iter iter; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1ef99790f6ed..e0b098089ef2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -321,11 +321,10 @@ enum { BIO_NO_PAGE_REF, /* don't put release vec pages */ BIO_CLONED, /* doesn't own data */ BIO_BOUNCED, /* bio is a bounce bio */ - BIO_WORKINGSET, /* contains userspace workingset pages */ BIO_QUIET, /* Make BIO Quiet */ BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ BIO_REFFED, /* bio has elevated ->bi_cnt */ - BIO_THROTTLED, /* This bio has already been subjected to + BIO_BPS_THROTTLED, /* This bio has already been subjected to * throttling rules. Don't do it again. */ BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion * of this bio. */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 84b13fdd34a7..50e358a19d98 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -580,9 +580,9 @@ struct request_queue { #define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ -#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ - (1 << QUEUE_FLAG_SAME_COMP) | \ - (1 << QUEUE_FLAG_NOWAIT)) +#define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_IO_STAT) | \ + (1UL << QUEUE_FLAG_SAME_COMP) | \ + (1UL << QUEUE_FLAG_NOWAIT)) void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); @@ -618,7 +618,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) #define blk_queue_pm_only(q) atomic_read(&(q)->pm_only) #define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags) -#define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags) #define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags) extern void blk_set_pm_only(struct request_queue *q); @@ -1280,6 +1279,11 @@ static inline bool bdev_fua(struct block_device *bdev) return test_bit(QUEUE_FLAG_FUA, &bdev_get_queue(bdev)->queue_flags); } +static inline bool bdev_nowait(struct block_device *bdev) +{ + return test_bit(QUEUE_FLAG_NOWAIT, &bdev_get_queue(bdev)->queue_flags); +} + static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); @@ -1300,6 +1304,15 @@ static inline bool bdev_is_zoned(struct block_device *bdev) return false; } +static inline bool bdev_op_is_zoned_write(struct block_device *bdev, + blk_opf_t op) +{ + if (!bdev_is_zoned(bdev)) + return false; + + return op == REQ_OP_WRITE || op == REQ_OP_WRITE_ZEROES; +} + static inline sector_t bdev_zone_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); @@ -1498,6 +1511,7 @@ int sync_blockdev(struct block_device *bdev); int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); +void bdev_statx_dioalign(struct inode *inode, struct kstat *stat); void printk_all_partitions(void); #else static inline void invalidate_bdev(struct block_device *bdev) @@ -1514,6 +1528,9 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } +static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) +{ +} static inline void printk_all_partitions(void) { } diff --git a/include/linux/bma150.h b/include/linux/bma150.h index 31c9e323a391..4d4a62d49341 100644 --- a/include/linux/bma150.h +++ b/include/linux/bma150.h @@ -33,8 +33,8 @@ struct bma150_cfg { unsigned char lg_hyst; /* Low-G hysterisis */ unsigned char lg_dur; /* Low-G duration */ unsigned char lg_thres; /* Low-G threshold */ - unsigned char range; /* one of BMA0150_RANGE_xxx */ - unsigned char bandwidth; /* one of BMA0150_BW_xxx */ + unsigned char range; /* one of BMA150_RANGE_xxx */ + unsigned char bandwidth; /* one of BMA150_BW_xxx */ }; struct bma150_platform_data { diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 2bd1b5f8de9b..57e9e109257e 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -414,6 +414,11 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); + +const struct bpf_func_proto * +cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); +const struct bpf_func_proto * +cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); #else static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } @@ -444,6 +449,18 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, return -EINVAL; } +static inline const struct bpf_func_proto * +cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + return NULL; +} + +static inline const struct bpf_func_proto * +cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + return NULL; +} + static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map) { return 0; } static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 20c26aed7896..9e7d46d16032 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -48,6 +48,7 @@ struct mem_cgroup; struct module; struct bpf_func_state; struct ftrace_ops; +struct cgroup; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -279,14 +280,33 @@ static inline void check_and_init_map_value(struct bpf_map *map, void *dst) } } -/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */ -static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) +/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and + * forced to use 'long' read/writes to try to atomically copy long counters. + * Best-effort only. No barriers here, since it _will_ race with concurrent + * updates from BPF programs. Called from bpf syscall and mostly used with + * size 8 or 16 bytes, so ask compiler to inline it. + */ +static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) +{ + const long *lsrc = src; + long *ldst = dst; + + size /= sizeof(long); + while (size--) + *ldst++ = *lsrc++; +} + +/* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */ +static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, bool long_memcpy) { u32 curr_off = 0; int i; if (likely(!map->off_arr)) { - memcpy(dst, src, map->value_size); + if (long_memcpy) + bpf_long_memcpy(dst, src, round_up(map->value_size, 8)); + else + memcpy(dst, src, map->value_size); return; } @@ -298,6 +318,36 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) } memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off); } + +static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) +{ + __copy_map_value(map, dst, src, false); +} + +static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src) +{ + __copy_map_value(map, dst, src, true); +} + +static inline void zero_map_value(struct bpf_map *map, void *dst) +{ + u32 curr_off = 0; + int i; + + if (likely(!map->off_arr)) { + memset(dst, 0, map->value_size); + return; + } + + for (i = 0; i < map->off_arr->cnt; i++) { + u32 next_off = map->off_arr->field_off[i]; + + memset(dst + curr_off, 0, next_off - curr_off); + curr_off += map->off_arr->field_sz[i]; + } + memset(dst + curr_off, 0, map->value_size - curr_off); +} + void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); void bpf_timer_cancel_and_free(void *timer); @@ -401,7 +451,7 @@ enum bpf_type_flag { /* DYNPTR points to memory local to the bpf program. */ DYNPTR_TYPE_LOCAL = BIT(8 + BPF_BASE_TYPE_BITS), - /* DYNPTR points to a ringbuf record. */ + /* DYNPTR points to a kernel-produced ringbuf record. */ DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS), /* Size is known at compile time. */ @@ -606,6 +656,7 @@ enum bpf_reg_type { PTR_TO_MEM, /* reg points to valid memory region */ PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_FUNC, /* reg points to a bpf program function */ + PTR_TO_DYNPTR, /* reg points to a dynptr */ __BPF_REG_TYPE_MAX, /* Extended reg_types. */ @@ -726,10 +777,14 @@ enum bpf_cgroup_storage_type { */ #define MAX_BPF_FUNC_REG_ARGS 5 +/* The argument is a structure. */ +#define BTF_FMODEL_STRUCT_ARG BIT(0) + struct btf_func_model { u8 ret_size; u8 nr_args; u8 arg_size[MAX_BPF_FUNC_ARGS]; + u8 arg_flags[MAX_BPF_FUNC_ARGS]; }; /* Restore arguments before returning from trampoline to let original function @@ -809,6 +864,10 @@ u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx); +u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); @@ -891,6 +950,7 @@ struct bpf_dispatcher { struct bpf_dispatcher_prog progs[BPF_DISPATCHER_MAX]; int num_progs; void *image; + void *rw_image; u32 image_off; struct bpf_ksym ksym; }; @@ -909,7 +969,7 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampolin struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); -int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); +int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -923,7 +983,14 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); }, \ } +#ifdef CONFIG_X86_64 +#define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5))) +#else +#define BPF_DISPATCHER_ATTRIBUTES +#endif + #define DEFINE_BPF_DISPATCHER(name) \ + notrace BPF_DISPATCHER_ATTRIBUTES \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ @@ -945,7 +1012,6 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); /* Called only from JIT-enabled code, so there's no need for stubs. */ -void *bpf_jit_alloc_exec_page(void); void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); @@ -1333,6 +1399,11 @@ struct bpf_array { #define BPF_MAP_CAN_READ BIT(0) #define BPF_MAP_CAN_WRITE BIT(1) +/* Maximum number of user-producer ring buffer samples that can be drained in + * a call to bpf_user_ringbuf_drain(). + */ +#define BPF_MAX_USER_RINGBUF_SAMPLES (128 * 1024) + static inline u32 bpf_map_flags_to_cap(struct bpf_map *map) { u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); @@ -1729,8 +1800,40 @@ int bpf_obj_get_user(const char __user *pathname, int flags); extern int bpf_iter_ ## target(args); \ int __init bpf_iter_ ## target(args) { return 0; } +/* + * The task type of iterators. + * + * For BPF task iterators, they can be parameterized with various + * parameters to visit only some of tasks. + * + * BPF_TASK_ITER_ALL (default) + * Iterate over resources of every task. + * + * BPF_TASK_ITER_TID + * Iterate over resources of a task/tid. + * + * BPF_TASK_ITER_TGID + * Iterate over resources of every task of a process / task group. + */ +enum bpf_iter_task_type { + BPF_TASK_ITER_ALL = 0, + BPF_TASK_ITER_TID, + BPF_TASK_ITER_TGID, +}; + struct bpf_iter_aux_info { + /* for map_elem iter */ struct bpf_map *map; + + /* for cgroup iter */ + struct { + struct cgroup *start; /* starting cgroup */ + enum bpf_cgroup_iter_order order; + } cgroup; + struct { + enum bpf_iter_task_type type; + u32 pid; + } task; }; typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, @@ -1815,22 +1918,6 @@ int bpf_get_file_flag(int flags); int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size, size_t actual_size); -/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and - * forced to use 'long' read/writes to try to atomically copy long counters. - * Best-effort only. No barriers here, since it _will_ race with concurrent - * updates from BPF programs. Called from bpf syscall and mostly used with - * size 8 or 16 bytes, so ask compiler to inline it. - */ -static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) -{ - const long *lsrc = src; - long *ldst = dst; - - size /= sizeof(long); - while (size--) - *ldst++ = *lsrc++; -} - /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr); @@ -1932,13 +2019,22 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, const char *func_name, struct btf_func_model *m); +struct bpf_kfunc_arg_meta { + u64 r0_size; + bool r0_rdonly; + int ref_obj_id; + u32 flags; +}; + struct bpf_reg_state; int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); +int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, + struct bpf_reg_state *regs); int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, - u32 kfunc_flags); + struct bpf_kfunc_arg_meta *meta); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *reg); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, @@ -1966,6 +2062,17 @@ static inline bool unprivileged_ebpf_enabled(void) return !sysctl_unprivileged_bpf_disabled; } +/* Not all bpf prog type has the bpf_ctx. + * For the bpf prog type that has initialized the bpf_ctx, + * this function can be used to decide if a kernel function + * is called by a bpf program. + */ +static inline bool has_current_bpf_ctx(void) +{ + return !!current->bpf_ctx; +} + +void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2148,6 +2255,15 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id) return ERR_PTR(-ENOTSUPP); } +static inline int btf_struct_access(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, + u32 *next_btf_id, enum bpf_type_flag *flag) +{ + return -EACCES; +} + static inline const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -2175,6 +2291,14 @@ static inline bool unprivileged_ebpf_enabled(void) return false; } +static inline bool has_current_bpf_ctx(void) +{ + return false; +} + +static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog) +{ +} #endif /* CONFIG_BPF_SYSCALL */ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, @@ -2349,6 +2473,7 @@ extern const struct bpf_func_proto bpf_get_numa_node_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; extern const struct bpf_func_proto bpf_ktime_get_ns_proto; extern const struct bpf_func_proto bpf_ktime_get_boot_ns_proto; +extern const struct bpf_func_proto bpf_ktime_get_tai_ns_proto; extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto; @@ -2361,6 +2486,7 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto; extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto; +extern const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto; extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; extern const struct bpf_func_proto bpf_msg_redirect_map_proto; extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; @@ -2410,6 +2536,7 @@ extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; extern const struct bpf_func_proto bpf_set_retval_proto; extern const struct bpf_func_proto bpf_get_retval_proto; +extern const struct bpf_func_proto bpf_user_ringbuf_drain_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); @@ -2554,7 +2681,7 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_INVALID, /* Points to memory that is local to the bpf program */ BPF_DYNPTR_TYPE_LOCAL, - /* Underlying data is a ringbuf record */ + /* Underlying data is a kernel-produced ringbuf record */ BPF_DYNPTR_TYPE_RINGBUF, }; @@ -2562,6 +2689,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type, u32 offset, u32 size); void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); int bpf_dynptr_check_size(u32 size); +u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr); #ifdef CONFIG_BPF_LSM void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype); @@ -2571,4 +2699,12 @@ static inline void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype) {} static inline void bpf_cgroup_atype_put(int cgroup_atype) {} #endif /* CONFIG_BPF_LSM */ +struct key; + +#ifdef CONFIG_KEYS +struct bpf_key { + struct key *key; + bool has_ref; +}; +#endif /* CONFIG_KEYS */ #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h new file mode 100644 index 000000000000..3e164b8efaa9 --- /dev/null +++ b/include/linux/bpf_mem_alloc.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#ifndef _BPF_MEM_ALLOC_H +#define _BPF_MEM_ALLOC_H +#include <linux/compiler_types.h> +#include <linux/workqueue.h> + +struct bpf_mem_cache; +struct bpf_mem_caches; + +struct bpf_mem_alloc { + struct bpf_mem_caches __percpu *caches; + struct bpf_mem_cache __percpu *cache; + struct work_struct work; +}; + +int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu); +void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma); + +/* kmalloc/kfree equivalent: */ +void *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size); +void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr); + +/* kmem_cache_alloc/free equivalent: */ +void *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma); +void bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr); + +#endif /* _BPF_MEM_ALLOC_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 2b9112b80171..2c6a4f2562a7 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -126,6 +126,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 2e3bad8640dc..9e1e6965f407 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -212,6 +212,17 @@ struct bpf_reference_state { * is used purely to inform the user of a reference leak. */ int insn_idx; + /* There can be a case like: + * main (frame 0) + * cb (frame 1) + * func (frame 3) + * cb (frame 4) + * Hence for frame 4, if callback_ref just stored boolean, it would be + * impossible to distinguish nested callback refs. Hence store the + * frameno and compare that to callback_ref in check_reference_leak when + * exiting a callback function. + */ + int callback_ref; }; /* state of the program: @@ -237,6 +248,7 @@ struct bpf_func_state { */ u32 async_entry_cnt; bool in_callback_fn; + struct tnum callback_ret_range; bool in_async_callback_fn; /* The following fields should be last. See copy_func_state() */ @@ -337,6 +349,27 @@ struct bpf_verifier_state { iter < frame->allocated_stack / BPF_REG_SIZE; \ iter++, reg = bpf_get_spilled_reg(iter, frame)) +/* Invoke __expr over regsiters in __vst, setting __state and __reg */ +#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \ + ({ \ + struct bpf_verifier_state *___vstate = __vst; \ + int ___i, ___j; \ + for (___i = 0; ___i <= ___vstate->curframe; ___i++) { \ + struct bpf_reg_state *___regs; \ + __state = ___vstate->frame[___i]; \ + ___regs = __state->regs; \ + for (___j = 0; ___j < MAX_BPF_REG; ___j++) { \ + __reg = &___regs[___j]; \ + (void)(__expr); \ + } \ + bpf_for_each_spilled_reg(___j, __state, __reg) { \ + if (!__reg) \ + continue; \ + (void)(__expr); \ + } \ + } \ + }) + /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; @@ -560,6 +593,11 @@ int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state u32 regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size); +bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, + struct bpf_reg_state *reg); +bool is_dynptr_type_expected(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + enum bpf_arg_type arg_type); /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, @@ -587,6 +625,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, struct bpf_attach_target_info *tgt_info); void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab); +int mark_chain_precision(struct bpf_verifier_env *env, int regno); + #define BPF_BASE_TYPE_MASK GENMASK(BPF_BASE_TYPE_BITS - 1, 0) /* extract base type from bpf_{arg, return, reg}_type. */ diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 6ff567ece34a..9e77165f3ef6 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -293,6 +293,7 @@ #define MII_BRCM_FET_SHDW_MC_FAME 0x4000 /* Force Auto MDIX enable */ #define MII_BRCM_FET_SHDW_AUXMODE4 0x1a /* Auxiliary mode 4 */ +#define MII_BRCM_FET_SHDW_AM4_STANDBY 0x0008 /* Standby enable */ #define MII_BRCM_FET_SHDW_AM4_LED_MASK 0x0003 #define MII_BRCM_FET_SHDW_AM4_LED_MODE1 0x0001 diff --git a/include/linux/btf.h b/include/linux/btf.h index cdb376d53238..f9aababc5d78 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -49,6 +49,17 @@ * for this case. */ #define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */ +#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */ +#define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */ + +/* + * Return the name of the passed struct, if exists, or halt the build if for + * example the structure gets renamed. In this way, developers have to revisit + * the code using that structure name, and update it accordingly. + */ +#define stringify_struct(x) \ + ({ BUILD_BUG_ON(sizeof(struct x) < 0); \ + __stringify(x); }) struct btf; struct btf_member; @@ -439,4 +450,14 @@ static inline int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dt } #endif +static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t) +{ + if (!btf_type_is_ptr(t)) + return false; + + t = btf_type_skip_modifiers(btf, t->type, NULL); + + return btf_type_is_struct(t); +} + #endif diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index df518c429667..33fa5e94aa80 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -225,8 +225,6 @@ struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); -void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size, - gfp_t gfp); struct buffer_head *__bread_gfp(struct block_device *, sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); @@ -236,15 +234,16 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); -void ll_rw_block(blk_opf_t, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); -int submit_bh(blk_opf_t, struct buffer_head *); +void submit_bh(blk_opf_t, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); -int bh_submit_read(struct buffer_head *bh); +int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait); +void __bh_read_batch(int nr, struct buffer_head *bhs[], + blk_opf_t op_flags, bool force_lock); extern int buffer_heads_over_limit; @@ -351,12 +350,6 @@ sb_breadahead(struct super_block *sb, sector_t block) __breadahead(sb->s_bdev, block, sb->s_blocksize); } -static inline void -sb_breadahead_unmovable(struct super_block *sb, sector_t block) -{ - __breadahead_gfp(sb->s_bdev, block, sb->s_blocksize, 0); -} - static inline struct buffer_head * sb_getblk(struct super_block *sb, sector_t block) { @@ -418,6 +411,41 @@ static inline struct buffer_head *__getblk(struct block_device *bdev, return __getblk_gfp(bdev, block, size, __GFP_MOVABLE); } +static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags) +{ + if (!buffer_uptodate(bh) && trylock_buffer(bh)) { + if (!buffer_uptodate(bh)) + __bh_read(bh, op_flags, false); + else + unlock_buffer(bh); + } +} + +static inline void bh_read_nowait(struct buffer_head *bh, blk_opf_t op_flags) +{ + if (!bh_uptodate_or_lock(bh)) + __bh_read(bh, op_flags, false); +} + +/* Returns 1 if buffer uptodated, 0 on success, and -EIO on error. */ +static inline int bh_read(struct buffer_head *bh, blk_opf_t op_flags) +{ + if (bh_uptodate_or_lock(bh)) + return 1; + return __bh_read(bh, op_flags, true); +} + +static inline void bh_read_batch(int nr, struct buffer_head *bhs[]) +{ + __bh_read_batch(nr, bhs, 0, true); +} + +static inline void bh_readahead_batch(int nr, struct buffer_head *bhs[], + blk_opf_t op_flags) +{ + __bh_read_batch(nr, bhs, op_flags, false); +} + /** * __bread() - reads a specified block and returns the bh * @bdev: the block_device to read from diff --git a/include/linux/cache.h b/include/linux/cache.h index d742c57eaee5..5da1bbd96154 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -85,4 +85,17 @@ #define cache_line_size() L1_CACHE_BYTES #endif +/* + * Helper to add padding within a struct to ensure data fall into separate + * cachelines. + */ +#if defined(CONFIG_SMP) +struct cacheline_padding { + char x[0]; +} ____cacheline_internodealigned_in_smp; +#define CACHELINE_PADDING(name) struct cacheline_padding name +#else +#define CACHELINE_PADDING(name) +#endif + #endif /* __LINUX_CACHE_H */ diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index c3e50e537e39..58f5431a5559 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -147,6 +147,11 @@ static inline u32 can_get_static_ctrlmode(struct can_priv *priv) return priv->ctrlmode & ~priv->ctrlmode_supported; } +static inline bool can_is_canxl_dev_mtu(unsigned int mtu) +{ + return (mtu >= CANXL_MIN_MTU && mtu <= CANXL_MAX_MTU); +} + void can_setup(struct net_device *dev); struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index 182749e858b3..1abc25a8d144 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -20,7 +20,8 @@ void can_flush_echo_skb(struct net_device *dev); int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx, unsigned int frame_len); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, - u8 *len_ptr, unsigned int *frame_len_ptr); + unsigned int *len_ptr, + unsigned int *frame_len_ptr); unsigned int __must_check can_get_echo_skb(struct net_device *dev, unsigned int idx, unsigned int *frame_len_ptr); @@ -29,6 +30,9 @@ void can_free_echo_skb(struct net_device *dev, unsigned int idx, struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); struct sk_buff *alloc_canfd_skb(struct net_device *dev, struct canfd_frame **cfd); +struct sk_buff *alloc_canxl_skb(struct net_device *dev, + struct canxl_frame **cxl, + unsigned int data_len); struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf); bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb); @@ -97,10 +101,59 @@ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) return nskb; } +static inline bool can_is_can_skb(const struct sk_buff *skb) +{ + struct can_frame *cf = (struct can_frame *)skb->data; + + /* the CAN specific type of skb is identified by its data length */ + return (skb->len == CAN_MTU && cf->len <= CAN_MAX_DLEN); +} + static inline bool can_is_canfd_skb(const struct sk_buff *skb) { + struct canfd_frame *cfd = (struct canfd_frame *)skb->data; + /* the CAN specific type of skb is identified by its data length */ - return skb->len == CANFD_MTU; + return (skb->len == CANFD_MTU && cfd->len <= CANFD_MAX_DLEN); +} + +static inline bool can_is_canxl_skb(const struct sk_buff *skb) +{ + const struct canxl_frame *cxl = (struct canxl_frame *)skb->data; + + if (skb->len < CANXL_HDR_SIZE + CANXL_MIN_DLEN || skb->len > CANXL_MTU) + return false; + + /* this also checks valid CAN XL data length boundaries */ + if (skb->len != CANXL_HDR_SIZE + cxl->len) + return false; + + return cxl->flags & CANXL_XLF; +} + +/* get length element value from can[|fd|xl]_frame structure */ +static inline unsigned int can_skb_get_len_val(struct sk_buff *skb) +{ + const struct canxl_frame *cxl = (struct canxl_frame *)skb->data; + const struct canfd_frame *cfd = (struct canfd_frame *)skb->data; + + if (can_is_canxl_skb(skb)) + return cxl->len; + + return cfd->len; +} + +/* get needed data length inside CAN frame for all frame types (RTR aware) */ +static inline unsigned int can_skb_get_data_len(struct sk_buff *skb) +{ + unsigned int len = can_skb_get_len_val(skb); + const struct can_frame *cf = (struct can_frame *)skb->data; + + /* RTR frames have an actual length of zero */ + if (can_is_can_skb(skb) && cf->can_id & CAN_RTR_FLAG) + return 0; + + return len; } #endif /* !_CAN_SKB_H */ diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e7f2fb2fc207..99c1726be6ee 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -207,7 +207,6 @@ struct ceph_msg_data_cursor { struct ceph_msg_data *data; /* current data item */ size_t resid; /* bytes not yet consumed */ - bool last_piece; /* current is last piece */ bool need_crc; /* crc update needed */ union { #ifdef CONFIG_BLOCK @@ -498,8 +497,7 @@ void ceph_con_discard_requeued(struct ceph_connection *con, u64 reconnect_seq); void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor, struct ceph_msg *msg, size_t length); struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, - size_t *page_offset, size_t *length, - bool *last_piece); + size_t *page_offset, size_t *length); void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes); u32 ceph_crc32c_page(u32 crc, struct page *page, unsigned int page_offset, diff --git a/include/linux/cfi.h b/include/linux/cfi.h index c6dfc1ed0626..5e134f4ce8b7 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -2,49 +2,38 @@ /* * Clang Control Flow Integrity (CFI) support. * - * Copyright (C) 2021 Google LLC + * Copyright (C) 2022 Google LLC */ #ifndef _LINUX_CFI_H #define _LINUX_CFI_H -#ifdef CONFIG_CFI_CLANG -typedef void (*cfi_check_fn)(uint64_t id, void *ptr, void *diag); - -/* Compiler-generated function in each module, and the kernel */ -extern void __cfi_check(uint64_t id, void *ptr, void *diag); - -/* - * Force the compiler to generate a CFI jump table entry for a function - * and store the jump table address to __cfi_jt_<function>. - */ -#define __CFI_ADDRESSABLE(fn, __attr) \ - const void *__cfi_jt_ ## fn __visible __attr = (void *)&fn - -#ifdef CONFIG_CFI_CLANG_SHADOW - -extern void cfi_module_add(struct module *mod, unsigned long base_addr); -extern void cfi_module_remove(struct module *mod, unsigned long base_addr); - -#else +#include <linux/bug.h> +#include <linux/module.h> -static inline void cfi_module_add(struct module *mod, unsigned long base_addr) {} -static inline void cfi_module_remove(struct module *mod, unsigned long base_addr) {} - -#endif /* CONFIG_CFI_CLANG_SHADOW */ - -#else /* !CONFIG_CFI_CLANG */ - -#ifdef CONFIG_X86_KERNEL_IBT - -#define __CFI_ADDRESSABLE(fn, __attr) \ - const void *__cfi_jt_ ## fn __visible __attr = (void *)&fn +#ifdef CONFIG_CFI_CLANG +enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, + unsigned long *target, u32 type); -#endif /* CONFIG_X86_KERNEL_IBT */ +static inline enum bug_trap_type report_cfi_failure_noaddr(struct pt_regs *regs, + unsigned long addr) +{ + return report_cfi_failure(regs, addr, NULL, 0); +} +#ifdef CONFIG_ARCH_USES_CFI_TRAPS +bool is_cfi_trap(unsigned long addr); +#endif #endif /* CONFIG_CFI_CLANG */ -#ifndef __CFI_ADDRESSABLE -#define __CFI_ADDRESSABLE(fn, __attr) -#endif +#ifdef CONFIG_MODULES +#ifdef CONFIG_ARCH_USES_CFI_TRAPS +void module_cfi_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod); +#else +static inline void module_cfi_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *mod) {} +#endif /* CONFIG_ARCH_USES_CFI_TRAPS */ +#endif /* CONFIG_MODULES */ #endif /* _LINUX_CFI_H */ diff --git a/include/linux/cfi_types.h b/include/linux/cfi_types.h new file mode 100644 index 000000000000..6b8713675765 --- /dev/null +++ b/include/linux/cfi_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Clang Control Flow Integrity (CFI) type definitions. + */ +#ifndef _LINUX_CFI_TYPES_H +#define _LINUX_CFI_TYPES_H + +#ifdef __ASSEMBLY__ +#include <linux/linkage.h> + +#ifdef CONFIG_CFI_CLANG +/* + * Use the __kcfi_typeid_<function> type identifier symbol to + * annotate indirectly called assembly functions. The compiler emits + * these symbols for all address-taken function declarations in C + * code. + */ +#ifndef __CFI_TYPE +#define __CFI_TYPE(name) \ + .4byte __kcfi_typeid_##name +#endif + +#define SYM_TYPED_ENTRY(name, linkage, align...) \ + linkage(name) ASM_NL \ + align ASM_NL \ + __CFI_TYPE(name) ASM_NL \ + name: + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_TYPED_ENTRY(name, linkage, align) + +#else /* CONFIG_CFI_CLANG */ + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_START(name, linkage, align) + +#endif /* CONFIG_CFI_CLANG */ + +#ifndef SYM_TYPED_FUNC_START +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _LINUX_CFI_TYPES_H */ diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4bcf56b3491c..6e01f10f0d88 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -126,11 +126,11 @@ enum { CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ CFTYPE_DEBUG = (1 << 5), /* create when cgroup_debug */ - CFTYPE_PRESSURE = (1 << 6), /* only if pressure feature is enabled */ /* internal flags, do not use outside cgroup core proper */ __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ + __CFTYPE_ADDED = (1 << 18), }; /* @@ -384,7 +384,7 @@ struct cgroup { /* * The depth this cgroup is at. The root is at depth zero and each * step down the hierarchy increments the level. This along with - * ancestor_ids[] can determine whether a given cgroup is a + * ancestors[] can determine whether a given cgroup is a * descendant of another without traversing the hierarchy. */ int level; @@ -428,6 +428,9 @@ struct cgroup { struct cgroup_file procs_file; /* handle for "cgroup.procs" */ struct cgroup_file events_file; /* handle for "cgroup.events" */ + /* handles for "{cpu,memory,io,irq}.pressure" */ + struct cgroup_file psi_files[NR_PSI_RESOURCES]; + /* * The bitmask of subsystems enabled on the child cgroups. * ->subtree_control is the one configured through @@ -504,8 +507,8 @@ struct cgroup { /* Used to store internal freezer state */ struct cgroup_freezer_state freezer; - /* ids of the ancestors at each level including self */ - u64 ancestor_ids[]; + /* All ancestors including self */ + struct cgroup *ancestors[]; }; /* @@ -522,11 +525,15 @@ struct cgroup_root { /* Unique id for this hierarchy. */ int hierarchy_id; - /* The root cgroup. Root is destroyed on its release. */ + /* + * The root cgroup. The containing cgroup_root will be destroyed on its + * release. cgrp->ancestors[0] will be used overflowing into the + * following field. cgrp_ancestor_storage must immediately follow. + */ struct cgroup cgrp; - /* for cgrp->ancestor_ids[0] */ - u64 cgrp_ancestor_id_storage; + /* must follow cgrp for cgrp->ancestors[0], see above */ + struct cgroup *cgrp_ancestor_storage; /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ atomic_t nr_cgrps; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ac5d0515680e..f2a9f2274c3b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -114,6 +114,7 @@ int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); void cgroup_file_notify(struct cgroup_file *cfile); +void cgroup_file_show(struct cgroup_file *cfile, bool show); int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); @@ -432,6 +433,18 @@ static inline void cgroup_put(struct cgroup *cgrp) css_put(&cgrp->self); } +extern struct mutex cgroup_mutex; + +static inline void cgroup_lock(void) +{ + mutex_lock(&cgroup_mutex); +} + +static inline void cgroup_unlock(void) +{ + mutex_unlock(&cgroup_mutex); +} + /** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for @@ -446,7 +459,6 @@ static inline void cgroup_put(struct cgroup *cgrp) * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU -extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ @@ -574,7 +586,7 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp, { if (cgrp->root != ancestor->root || cgrp->level < ancestor->level) return false; - return cgrp->ancestor_ids[ancestor->level] == cgroup_id(ancestor); + return cgrp->ancestors[ancestor->level] == ancestor; } /** @@ -591,11 +603,9 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp, static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp, int ancestor_level) { - if (cgrp->level < ancestor_level) + if (ancestor_level < 0 || ancestor_level > cgrp->level) return NULL; - while (cgrp && cgrp->level > ancestor_level) - cgrp = cgroup_parent(cgrp); - return cgrp; + return cgrp->ancestors[ancestor_level]; } /** @@ -672,11 +682,6 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) pr_cont_kernfs_path(cgrp->kn); } -static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) -{ - return cgrp->psi; -} - bool cgroup_psi_enabled(void); static inline void cgroup_init_kthreadd(void) @@ -708,6 +713,8 @@ struct cgroup; static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; } static inline void css_get(struct cgroup_subsys_state *css) {} static inline void css_put(struct cgroup_subsys_state *css) {} +static inline void cgroup_lock(void) {} +static inline void cgroup_unlock(void) {} static inline int cgroup_attach_task_all(struct task_struct *from, struct task_struct *t) { return 0; } static inline int cgroupstats_build(struct cgroupstats *stats, @@ -747,11 +754,6 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task, static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) {} - -static inline struct cgroup *cgroup_get_from_id(u64 id) -{ - return NULL; -} #endif /* !CONFIG_CGROUPS */ #ifdef CONFIG_CGROUPS diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 1615010aa0ec..267cd06b54a0 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -42,6 +42,8 @@ struct dentry; * struct clk_rate_request - Structure encoding the clk constraints that * a clock user might require. * + * Should be initialized by calling clk_hw_init_rate_request(). + * * @rate: Requested clock rate. This field will be adjusted by * clock drivers according to hardware capabilities. * @min_rate: Minimum rate imposed by clk users. @@ -60,6 +62,15 @@ struct clk_rate_request { struct clk_hw *best_parent_hw; }; +void clk_hw_init_rate_request(const struct clk_hw *hw, + struct clk_rate_request *req, + unsigned long rate); +void clk_hw_forward_rate_request(const struct clk_hw *core, + const struct clk_rate_request *old_req, + const struct clk_hw *parent, + struct clk_rate_request *req, + unsigned long parent_rate); + /** * struct clk_duty - Struture encoding the duty cycle ratio of a clock * @@ -118,8 +129,9 @@ struct clk_duty { * * @recalc_rate Recalculate the rate of this clock, by querying hardware. The * parent rate is an input parameter. It is up to the caller to - * ensure that the prepare_mutex is held across this call. - * Returns the calculated rate. Optional, but recommended - if + * ensure that the prepare_mutex is held across this call. If the + * driver cannot figure out a rate for this clock, it must return + * 0. Returns the calculated rate. Optional, but recommended - if * this op is not set then clock rate will be initialized to 0. * * @round_rate: Given a target rate as input, returns the closest rate actually @@ -350,7 +362,7 @@ struct clk_hw *__clk_hw_register_fixed_rate(struct device *dev, const char *parent_name, const struct clk_hw *parent_hw, const struct clk_parent_data *parent_data, unsigned long flags, unsigned long fixed_rate, unsigned long fixed_accuracy, - unsigned long clk_fixed_flags); + unsigned long clk_fixed_flags, bool devm); struct clk *clk_register_fixed_rate(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned long fixed_rate); @@ -365,7 +377,20 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, */ #define clk_hw_register_fixed_rate(dev, name, parent_name, flags, fixed_rate) \ __clk_hw_register_fixed_rate((dev), NULL, (name), (parent_name), NULL, \ - NULL, (flags), (fixed_rate), 0, 0) + NULL, (flags), (fixed_rate), 0, 0, false) + +/** + * devm_clk_hw_register_fixed_rate - register fixed-rate clock with the clock + * framework + * @dev: device that is registering this clock + * @name: name of this clock + * @parent_name: name of clock's parent + * @flags: framework-specific flags + * @fixed_rate: non-adjustable clock rate + */ +#define devm_clk_hw_register_fixed_rate(dev, name, parent_name, flags, fixed_rate) \ + __clk_hw_register_fixed_rate((dev), NULL, (name), (parent_name), NULL, \ + NULL, (flags), (fixed_rate), 0, 0, true) /** * clk_hw_register_fixed_rate_parent_hw - register fixed-rate clock with * the clock framework @@ -378,7 +403,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, #define clk_hw_register_fixed_rate_parent_hw(dev, name, parent_hw, flags, \ fixed_rate) \ __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, (parent_hw), \ - NULL, (flags), (fixed_rate), 0, 0) + NULL, (flags), (fixed_rate), 0, 0, false) /** * clk_hw_register_fixed_rate_parent_data - register fixed-rate clock with * the clock framework @@ -392,7 +417,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, fixed_rate) \ __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, NULL, \ (parent_data), (flags), (fixed_rate), 0, \ - 0) + 0, false) /** * clk_hw_register_fixed_rate_with_accuracy - register fixed-rate clock with * the clock framework @@ -408,7 +433,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, fixed_accuracy) \ __clk_hw_register_fixed_rate((dev), NULL, (name), (parent_name), \ NULL, NULL, (flags), (fixed_rate), \ - (fixed_accuracy), 0) + (fixed_accuracy), 0, false) /** * clk_hw_register_fixed_rate_with_accuracy_parent_hw - register fixed-rate * clock with the clock framework @@ -423,7 +448,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, parent_hw, flags, fixed_rate, fixed_accuracy) \ __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, (parent_hw) \ NULL, NULL, (flags), (fixed_rate), \ - (fixed_accuracy), 0) + (fixed_accuracy), 0, false) /** * clk_hw_register_fixed_rate_with_accuracy_parent_data - register fixed-rate * clock with the clock framework @@ -438,7 +463,21 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, parent_data, flags, fixed_rate, fixed_accuracy) \ __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, NULL, \ (parent_data), NULL, (flags), \ - (fixed_rate), (fixed_accuracy), 0) + (fixed_rate), (fixed_accuracy), 0, false) +/** + * clk_hw_register_fixed_rate_parent_accuracy - register fixed-rate clock with + * the clock framework + * @dev: device that is registering this clock + * @name: name of this clock + * @parent_name: name of clock's parent + * @flags: framework-specific flags + * @fixed_rate: non-adjustable clock rate + */ +#define clk_hw_register_fixed_rate_parent_accuracy(dev, name, parent_data, \ + flags, fixed_rate) \ + __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, NULL, \ + (parent_data), (flags), (fixed_rate), 0, \ + CLK_FIXED_RATE_PARENT_ACCURACY, false) void clk_unregister_fixed_rate(struct clk *clk); void clk_hw_unregister_fixed_rate(struct clk_hw *hw); @@ -957,6 +996,13 @@ struct clk *clk_register_mux_table(struct device *dev, const char *name, (parent_names), NULL, NULL, (flags), (reg), \ (shift), (mask), (clk_mux_flags), (table), \ (lock)) +#define clk_hw_register_mux_table_parent_data(dev, name, parent_data, \ + num_parents, flags, reg, shift, mask, \ + clk_mux_flags, table, lock) \ + __clk_hw_register_mux((dev), NULL, (name), (num_parents), \ + NULL, NULL, (parent_data), (flags), (reg), \ + (shift), (mask), (clk_mux_flags), (table), \ + (lock)) #define clk_hw_register_mux(dev, name, parent_names, num_parents, flags, reg, \ shift, width, clk_mux_flags, lock) \ __clk_hw_register_mux((dev), NULL, (name), (num_parents), \ @@ -974,6 +1020,13 @@ struct clk *clk_register_mux_table(struct device *dev, const char *name, __clk_hw_register_mux((dev), NULL, (name), (num_parents), NULL, NULL, \ (parent_data), (flags), (reg), (shift), \ BIT((width)) - 1, (clk_mux_flags), NULL, (lock)) +#define clk_hw_register_mux_parent_data_table(dev, name, parent_data, \ + num_parents, flags, reg, shift, \ + width, clk_mux_flags, table, \ + lock) \ + __clk_hw_register_mux((dev), NULL, (name), (num_parents), NULL, NULL, \ + (parent_data), (flags), (reg), (shift), \ + BIT((width)) - 1, (clk_mux_flags), table, (lock)) #define devm_clk_hw_register_mux(dev, name, parent_names, num_parents, flags, reg, \ shift, width, clk_mux_flags, lock) \ __devm_clk_hw_register_mux((dev), NULL, (name), (num_parents), \ @@ -987,6 +1040,13 @@ struct clk *clk_register_mux_table(struct device *dev, const char *name, (parent_hws), NULL, (flags), (reg), \ (shift), BIT((width)) - 1, \ (clk_mux_flags), NULL, (lock)) +#define devm_clk_hw_register_mux_parent_data_table(dev, name, parent_data, \ + num_parents, flags, reg, shift, \ + width, clk_mux_flags, table, \ + lock) \ + __devm_clk_hw_register_mux((dev), NULL, (name), (num_parents), NULL, \ + NULL, (parent_data), (flags), (reg), (shift), \ + BIT((width)) - 1, (clk_mux_flags), table, (lock)) int clk_mux_val_to_index(struct clk_hw *hw, const u32 *table, unsigned int flags, unsigned int val); @@ -1255,6 +1315,8 @@ int clk_mux_determine_rate_flags(struct clk_hw *hw, struct clk_rate_request *req, unsigned long flags); void clk_hw_reparent(struct clk_hw *hw, struct clk_hw *new_parent); +void clk_hw_get_rate_range(struct clk_hw *hw, unsigned long *min_rate, + unsigned long *max_rate); void clk_hw_set_rate_range(struct clk_hw *hw, unsigned long min_rate, unsigned long max_rate); @@ -1454,7 +1516,7 @@ int devm_of_clk_add_hw_provider(struct device *dev, void *data), void *data); void of_clk_del_provider(struct device_node *np); -void devm_of_clk_del_provider(struct device *dev); + struct clk *of_clk_src_simple_get(struct of_phandle_args *clkspec, void *data); struct clk_hw *of_clk_hw_simple_get(struct of_phandle_args *clkspec, @@ -1491,7 +1553,7 @@ static inline int devm_of_clk_add_hw_provider(struct device *dev, return 0; } static inline void of_clk_del_provider(struct device_node *np) {} -static inline void devm_of_clk_del_provider(struct device *dev) {} + static inline struct clk *of_clk_src_simple_get( struct of_phandle_args *clkspec, void *data) { diff --git a/include/linux/clk.h b/include/linux/clk.h index c13061cabdfc..1ef013324237 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -799,7 +799,7 @@ int clk_set_rate_exclusive(struct clk *clk, unsigned long rate); * * Returns true if @parent is a possible parent for @clk, false otherwise. */ -bool clk_has_parent(struct clk *clk, struct clk *parent); +bool clk_has_parent(const struct clk *clk, const struct clk *parent); /** * clk_set_rate_range - set a rate range for a clock source diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index 3484309b59bf..7af499bdbecb 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -12,6 +12,8 @@ #ifndef AT91_PMC_H #define AT91_PMC_H +#include <linux/bits.h> + #define AT91_PMC_V1 (1) /* PMC version 1 */ #define AT91_PMC_V2 (2) /* PMC version 2 [SAM9X60] */ @@ -45,8 +47,8 @@ #define AT91_PMC_PCSR 0x18 /* Peripheral Clock Status Register */ #define AT91_PMC_PLL_ACR 0x18 /* PLL Analog Control Register [for SAM9X60] */ -#define AT91_PMC_PLL_ACR_DEFAULT_UPLL 0x12020010UL /* Default PLL ACR value for UPLL */ -#define AT91_PMC_PLL_ACR_DEFAULT_PLLA 0x00020010UL /* Default PLL ACR value for PLLA */ +#define AT91_PMC_PLL_ACR_DEFAULT_UPLL UL(0x12020010) /* Default PLL ACR value for UPLL */ +#define AT91_PMC_PLL_ACR_DEFAULT_PLLA UL(0x00020010) /* Default PLL ACR value for PLLA */ #define AT91_PMC_PLL_ACR_UTMIVR (1 << 12) /* UPLL Voltage regulator Control */ #define AT91_PMC_PLL_ACR_UTMIBG (1 << 13) /* UPLL Bandgap Control */ diff --git a/include/linux/clk/davinci.h b/include/linux/clk/davinci.h index 8a7b5cd7eac0..f6ebab6228c2 100644 --- a/include/linux/clk/davinci.h +++ b/include/linux/clk/davinci.h @@ -28,13 +28,5 @@ int dm365_pll1_init(struct device *dev, void __iomem *base, struct regmap *cfgch int dm365_pll2_init(struct device *dev, void __iomem *base, struct regmap *cfgchip); int dm365_psc_init(struct device *dev, void __iomem *base); #endif -#ifdef CONFIG_ARCH_DAVINCI_DM644x -int dm644x_pll1_init(struct device *dev, void __iomem *base, struct regmap *cfgchip); -int dm644x_psc_init(struct device *dev, void __iomem *base); -#endif -#ifdef CONFIG_ARCH_DAVINCI_DM646x -int dm646x_pll1_init(struct device *dev, void __iomem *base, struct regmap *cfgchip); -int dm646x_psc_init(struct device *dev, void __iomem *base); -#endif #endif /* __LINUX_CLK_DAVINCI_PLL_H___ */ diff --git a/include/linux/clk/spear.h b/include/linux/clk/spear.h index a64d034ceddd..eaf95ca656f8 100644 --- a/include/linux/clk/spear.h +++ b/include/linux/clk/spear.h @@ -8,6 +8,20 @@ #ifndef __LINUX_CLK_SPEAR_H #define __LINUX_CLK_SPEAR_H +#ifdef CONFIG_ARCH_SPEAR3XX +void __init spear3xx_clk_init(void __iomem *misc_base, + void __iomem *soc_config_base); +#else +static inline void __init spear3xx_clk_init(void __iomem *misc_base, + void __iomem *soc_config_base) {} +#endif + +#ifdef CONFIG_ARCH_SPEAR6XX +void __init spear6xx_clk_init(void __iomem *misc_base); +#else +static inline void __init spear6xx_clk_init(void __iomem *misc_base) {} +#endif + #ifdef CONFIG_MACH_SPEAR1310 void __init spear1310_clk_init(void __iomem *misc_base, void __iomem *ras_base); #else diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h index 8a8423eb8e9a..45570bc21a43 100644 --- a/include/linux/clkdev.h +++ b/include/linux/clkdev.h @@ -46,6 +46,4 @@ int clk_hw_register_clkdev(struct clk_hw *, const char *, const char *); int devm_clk_hw_register_clkdev(struct device *dev, struct clk_hw *hw, const char *con_id, const char *dev_id); -void devm_clk_release_clkdev(struct device *dev, const char *con_id, - const char *dev_id); #endif diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index c84fec767445..6cfd6902bd5b 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -51,6 +51,29 @@ #define __no_sanitize_undefined #endif +#if __has_feature(memory_sanitizer) +#define __SANITIZE_MEMORY__ +/* + * Unlike other sanitizers, KMSAN still inserts code into functions marked with + * no_sanitize("kernel-memory"). Using disable_sanitizer_instrumentation + * provides the behavior consistent with other __no_sanitize_ attributes, + * guaranteeing that __no_sanitize_memory functions remain uninstrumented. + */ +#define __no_sanitize_memory __disable_sanitizer_instrumentation + +/* + * The __no_kmsan_checks attribute ensures that a function does not produce + * false positive reports by: + * - initializing all local variables and memory stores in this function; + * - skipping all shadow checks; + * - passing initialized arguments to this function's callees. + */ +#define __no_kmsan_checks __attribute__((no_sanitize("kernel-memory"))) +#else +#define __no_sanitize_memory +#define __no_kmsan_checks +#endif + /* * Support for __has_feature(coverage_sanitizer) was added in Clang 13 together * with no_sanitize("coverage"). Prior versions of Clang support coverage @@ -66,17 +89,9 @@ # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif -#define __nocfi __attribute__((__no_sanitize__("cfi"))) -#define __cficanonical __attribute__((__cfi_canonical_jump_table__)) - -#if defined(CONFIG_CFI_CLANG) -/* - * With CONFIG_CFI_CLANG, the compiler replaces function address - * references with the address of the function's CFI jump table - * entry. The function_nocfi macro always returns the address of the - * actual function instead. - */ -#define function_nocfi(x) __builtin_function_start(x) +#if __has_feature(kcfi) +/* Disable CFI checking inside a function. */ +#define __nocfi __attribute__((__no_sanitize__("kcfi"))) #endif /* diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 9b157b71036f..f55a37efdb97 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -115,6 +115,12 @@ #endif /* + * GCC does not support KMSAN. + */ +#define __no_sanitize_memory +#define __no_kmsan_checks + +/* * Turn individual warnings and errors on and off locally, depending * on version. */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 7713d7bcdaea..973a1bfd7ef5 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -203,16 +203,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, __v; \ }) -/* - * With CONFIG_CFI_CLANG, the compiler replaces function addresses in - * instrumented C code with jump table addresses. Architectures that - * support CFI can define this macro to return the actual function address - * when needed. - */ -#ifndef function_nocfi -#define function_nocfi(x) (x) -#endif - #endif /* __KERNEL__ */ /* @@ -221,9 +211,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, * otherwise, or eliminated entirely due to lack of references that are * visible to the compiler. */ -#define __ADDRESSABLE(sym) \ - static void * __section(".discard.addressable") __used \ +#define ___ADDRESSABLE(sym, __attrs) \ + static void * __used __attrs \ __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym; +#define __ADDRESSABLE(sym) \ + ___ADDRESSABLE(sym, __section(".discard.addressable")) /** * offset_to_ptr - convert a relative memory offset to an absolute pointer diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 445e80517cab..898b3458b24a 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -35,7 +35,8 @@ /* * Note: do not use this directly. Instead, use __alloc_size() since it is conditionally - * available and includes other attributes. + * available and includes other attributes. For GCC < 9.1, __alloc_size__ gets undefined + * in compiler-gcc.h, due to misbehaviors. * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alloc_005fsize-function-attribute * clang: https://clang.llvm.org/docs/AttributeReference.html#alloc-size @@ -371,4 +372,11 @@ */ #define __weak __attribute__((__weak__)) +/* + * Used by functions that use '__builtin_return_address'. These function + * don't want to be splited or made inline, which can make + * the '__builtin_return_address' get unexpected address. + */ +#define __fix_address noinline __noclone + #endif /* __LINUX_COMPILER_ATTRIBUTES_H */ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 4f2a819fd60a..eb0466236661 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -4,8 +4,12 @@ #ifndef __ASSEMBLY__ +/* + * Skipped when running bindgen due to a libclang issue; + * see https://github.com/rust-lang/rust-bindgen/issues/2244. + */ #if defined(CONFIG_DEBUG_INFO_BTF) && defined(CONFIG_PAHOLE_HAS_BTF_TAG) && \ - __has_attribute(btf_type_tag) + __has_attribute(btf_type_tag) && !defined(__BINDGEN__) # define BTF_TYPE_TAG(value) __attribute__((btf_type_tag(#value))) #else # define BTF_TYPE_TAG(value) /* nothing */ @@ -229,7 +233,8 @@ struct ftrace_likely_data { /* Section for code which can't be instrumented at all */ #define noinstr \ noinline notrace __attribute((__section__(".noinstr.text"))) \ - __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage + __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \ + __no_sanitize_memory #endif /* __KERNEL__ */ @@ -265,20 +270,18 @@ struct ftrace_likely_data { # define __nocfi #endif -#ifndef __cficanonical -# define __cficanonical -#endif - /* * Any place that could be marked with the "alloc_size" attribute is also - * a place to be marked with the "malloc" attribute. Do this as part of the - * __alloc_size macro to avoid redundant attributes and to avoid missing a - * __malloc marking. + * a place to be marked with the "malloc" attribute, except those that may + * be performing a _reallocation_, as that may alias the existing pointer. + * For these, use __realloc_size(). */ #ifdef __alloc_size__ # define __alloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__) __malloc +# define __realloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__) #else # define __alloc_size(x, ...) __malloc +# define __realloc_size(x, ...) #endif #ifndef asm_volatile_goto diff --git a/include/linux/completion.h b/include/linux/completion.h index 51d9ab079629..62b32b19e0a8 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -103,6 +103,7 @@ extern void wait_for_completion(struct completion *); extern void wait_for_completion_io(struct completion *); extern int wait_for_completion_interruptible(struct completion *x); extern int wait_for_completion_killable(struct completion *x); +extern int wait_for_completion_state(struct completion *x, unsigned int state); extern unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout); extern unsigned long wait_for_completion_io_timeout(struct completion *x, diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 9f445f09fcfe..1554021231f9 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -372,6 +372,29 @@ static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa, return csa->read(offset, true, false); } +static inline u64 csdev_access_relaxed_read_pair(struct csdev_access *csa, + u32 lo_offset, u32 hi_offset) +{ + if (likely(csa->io_mem)) { + return readl_relaxed(csa->base + lo_offset) | + ((u64)readl_relaxed(csa->base + hi_offset) << 32); + } + + return csa->read(lo_offset, true, false) | (csa->read(hi_offset, true, false) << 32); +} + +static inline void csdev_access_relaxed_write_pair(struct csdev_access *csa, u64 val, + u32 lo_offset, u32 hi_offset) +{ + if (likely(csa->io_mem)) { + writel_relaxed((u32)val, csa->base + lo_offset); + writel_relaxed((u32)(val >> 32), csa->base + hi_offset); + } else { + csa->write((u32)val, lo_offset, true, false); + csa->write((u32)(val >> 32), hi_offset, true, false); + } +} + static inline u32 csdev_access_read32(struct csdev_access *csa, u32 offset) { if (likely(csa->io_mem)) diff --git a/include/linux/counter.h b/include/linux/counter.h index 1fe17f5adb09..c41fa602ed28 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -31,6 +31,8 @@ enum counter_comp_type { COUNTER_COMP_ENUM, COUNTER_COMP_COUNT_DIRECTION, COUNTER_COMP_COUNT_MODE, + COUNTER_COMP_SIGNAL_POLARITY, + COUNTER_COMP_ARRAY, }; /** @@ -38,66 +40,114 @@ enum counter_comp_type { * @type: Counter component data type * @name: device-specific component name * @priv: component-relevant data - * @action_read: Synapse action mode read callback. The read value of the + * @action_read: Synapse action mode read callback. The read value of the * respective Synapse action mode should be passed back via * the action parameter. - * @device_u8_read: Device u8 component read callback. The read value of the + * @device_u8_read: Device u8 component read callback. The read value of the * respective Device u8 component should be passed back via * the val parameter. - * @count_u8_read: Count u8 component read callback. The read value of the + * @count_u8_read: Count u8 component read callback. The read value of the * respective Count u8 component should be passed back via * the val parameter. - * @signal_u8_read: Signal u8 component read callback. The read value of the + * @signal_u8_read: Signal u8 component read callback. The read value of the * respective Signal u8 component should be passed back via * the val parameter. - * @device_u32_read: Device u32 component read callback. The read value of + * @device_u32_read: Device u32 component read callback. The read value of * the respective Device u32 component should be passed * back via the val parameter. - * @count_u32_read: Count u32 component read callback. The read value of the + * @count_u32_read: Count u32 component read callback. The read value of the * respective Count u32 component should be passed back via * the val parameter. - * @signal_u32_read: Signal u32 component read callback. The read value of + * @signal_u32_read: Signal u32 component read callback. The read value of * the respective Signal u32 component should be passed * back via the val parameter. - * @device_u64_read: Device u64 component read callback. The read value of + * @device_u64_read: Device u64 component read callback. The read value of * the respective Device u64 component should be passed * back via the val parameter. - * @count_u64_read: Count u64 component read callback. The read value of the + * @count_u64_read: Count u64 component read callback. The read value of the * respective Count u64 component should be passed back via * the val parameter. - * @signal_u64_read: Signal u64 component read callback. The read value of + * @signal_u64_read: Signal u64 component read callback. The read value of * the respective Signal u64 component should be passed * back via the val parameter. - * @action_write: Synapse action mode write callback. The write value of + * @signal_array_u32_read: Signal u32 array component read callback. The + * index of the respective Count u32 array + * component element is passed via the idx + * parameter. The read value of the respective + * Count u32 array component element should be + * passed back via the val parameter. + * @device_array_u64_read: Device u64 array component read callback. The + * index of the respective Device u64 array + * component element is passed via the idx + * parameter. The read value of the respective + * Device u64 array component element should be + * passed back via the val parameter. + * @count_array_u64_read: Count u64 array component read callback. The + * index of the respective Count u64 array + * component element is passed via the idx + * parameter. The read value of the respective + * Count u64 array component element should be + * passed back via the val parameter. + * @signal_array_u64_read: Signal u64 array component read callback. The + * index of the respective Count u64 array + * component element is passed via the idx + * parameter. The read value of the respective + * Count u64 array component element should be + * passed back via the val parameter. + * @action_write: Synapse action mode write callback. The write value of * the respective Synapse action mode is passed via the * action parameter. - * @device_u8_write: Device u8 component write callback. The write value of + * @device_u8_write: Device u8 component write callback. The write value of * the respective Device u8 component is passed via the val * parameter. - * @count_u8_write: Count u8 component write callback. The write value of + * @count_u8_write: Count u8 component write callback. The write value of * the respective Count u8 component is passed via the val * parameter. - * @signal_u8_write: Signal u8 component write callback. The write value of + * @signal_u8_write: Signal u8 component write callback. The write value of * the respective Signal u8 component is passed via the val * parameter. - * @device_u32_write: Device u32 component write callback. The write value of + * @device_u32_write: Device u32 component write callback. The write value of * the respective Device u32 component is passed via the * val parameter. - * @count_u32_write: Count u32 component write callback. The write value of + * @count_u32_write: Count u32 component write callback. The write value of * the respective Count u32 component is passed via the val * parameter. - * @signal_u32_write: Signal u32 component write callback. The write value of + * @signal_u32_write: Signal u32 component write callback. The write value of * the respective Signal u32 component is passed via the * val parameter. - * @device_u64_write: Device u64 component write callback. The write value of + * @device_u64_write: Device u64 component write callback. The write value of * the respective Device u64 component is passed via the * val parameter. - * @count_u64_write: Count u64 component write callback. The write value of + * @count_u64_write: Count u64 component write callback. The write value of * the respective Count u64 component is passed via the val * parameter. - * @signal_u64_write: Signal u64 component write callback. The write value of + * @signal_u64_write: Signal u64 component write callback. The write value of * the respective Signal u64 component is passed via the * val parameter. + * @signal_array_u32_write: Signal u32 array component write callback. The + * index of the respective Signal u32 array + * component element is passed via the idx + * parameter. The write value of the respective + * Signal u32 array component element is passed via + * the val parameter. + * @device_array_u64_write: Device u64 array component write callback. The + * index of the respective Device u64 array + * component element is passed via the idx + * parameter. The write value of the respective + * Device u64 array component element is passed via + * the val parameter. + * @count_array_u64_write: Count u64 array component write callback. The + * index of the respective Count u64 array + * component element is passed via the idx + * parameter. The write value of the respective + * Count u64 array component element is passed via + * the val parameter. + * @signal_array_u64_write: Signal u64 array component write callback. The + * index of the respective Signal u64 array + * component element is passed via the idx + * parameter. The write value of the respective + * Signal u64 array component element is passed via + * the val parameter. */ struct counter_comp { enum counter_comp_type type; @@ -125,6 +175,17 @@ struct counter_comp { struct counter_count *count, u64 *val); int (*signal_u64_read)(struct counter_device *counter, struct counter_signal *signal, u64 *val); + int (*signal_array_u32_read)(struct counter_device *counter, + struct counter_signal *signal, + size_t idx, u32 *val); + int (*device_array_u64_read)(struct counter_device *counter, + size_t idx, u64 *val); + int (*count_array_u64_read)(struct counter_device *counter, + struct counter_count *count, + size_t idx, u64 *val); + int (*signal_array_u64_read)(struct counter_device *counter, + struct counter_signal *signal, + size_t idx, u64 *val); }; union { int (*action_write)(struct counter_device *counter, @@ -148,6 +209,17 @@ struct counter_comp { struct counter_count *count, u64 val); int (*signal_u64_write)(struct counter_device *counter, struct counter_signal *signal, u64 val); + int (*signal_array_u32_write)(struct counter_device *counter, + struct counter_signal *signal, + size_t idx, u32 val); + int (*device_array_u64_write)(struct counter_device *counter, + size_t idx, u64 val); + int (*count_array_u64_write)(struct counter_device *counter, + struct counter_count *count, + size_t idx, u64 val); + int (*signal_array_u64_write)(struct counter_device *counter, + struct counter_signal *signal, + size_t idx, u64 val); }; }; @@ -452,6 +524,60 @@ struct counter_available { .priv = &(_available), \ } +struct counter_array { + enum counter_comp_type type; + const struct counter_available *avail; + union { + size_t length; + size_t idx; + }; +}; + +#define DEFINE_COUNTER_ARRAY_U64(_name, _length) \ + struct counter_array _name = { \ + .type = COUNTER_COMP_U64, \ + .length = (_length), \ + } + +#define DEFINE_COUNTER_ARRAY_CAPTURE(_name, _length) \ + DEFINE_COUNTER_ARRAY_U64(_name, _length) + +#define DEFINE_COUNTER_ARRAY_POLARITY(_name, _enums, _length) \ + DEFINE_COUNTER_AVAILABLE(_name##_available, _enums); \ + struct counter_array _name = { \ + .type = COUNTER_COMP_SIGNAL_POLARITY, \ + .avail = &(_name##_available), \ + .length = (_length), \ + } + +#define COUNTER_COMP_DEVICE_ARRAY_U64(_name, _read, _write, _array) \ +{ \ + .type = COUNTER_COMP_ARRAY, \ + .name = (_name), \ + .device_array_u64_read = (_read), \ + .device_array_u64_write = (_write), \ + .priv = &(_array), \ +} +#define COUNTER_COMP_COUNT_ARRAY_U64(_name, _read, _write, _array) \ +{ \ + .type = COUNTER_COMP_ARRAY, \ + .name = (_name), \ + .count_array_u64_read = (_read), \ + .count_array_u64_write = (_write), \ + .priv = &(_array), \ +} +#define COUNTER_COMP_SIGNAL_ARRAY_U64(_name, _read, _write, _array) \ +{ \ + .type = COUNTER_COMP_ARRAY, \ + .name = (_name), \ + .signal_array_u64_read = (_read), \ + .signal_array_u64_write = (_write), \ + .priv = &(_array), \ +} + +#define COUNTER_COMP_CAPTURE(_read, _write) \ + COUNTER_COMP_COUNT_U64("capture", _read, _write) + #define COUNTER_COMP_CEILING(_read, _write) \ COUNTER_COMP_COUNT_U64("ceiling", _read, _write) @@ -477,10 +603,31 @@ struct counter_available { #define COUNTER_COMP_FLOOR(_read, _write) \ COUNTER_COMP_COUNT_U64("floor", _read, _write) +#define COUNTER_COMP_POLARITY(_read, _write, _available) \ +{ \ + .type = COUNTER_COMP_SIGNAL_POLARITY, \ + .name = "polarity", \ + .signal_u32_read = (_read), \ + .signal_u32_write = (_write), \ + .priv = &(_available), \ +} + #define COUNTER_COMP_PRESET(_read, _write) \ COUNTER_COMP_COUNT_U64("preset", _read, _write) #define COUNTER_COMP_PRESET_ENABLE(_read, _write) \ COUNTER_COMP_COUNT_BOOL("preset_enable", _read, _write) +#define COUNTER_COMP_ARRAY_CAPTURE(_read, _write, _array) \ + COUNTER_COMP_COUNT_ARRAY_U64("capture", _read, _write, _array) + +#define COUNTER_COMP_ARRAY_POLARITY(_read, _write, _array) \ +{ \ + .type = COUNTER_COMP_ARRAY, \ + .name = "polarity", \ + .signal_array_u32_read = (_read), \ + .signal_array_u32_write = (_write), \ + .priv = &(_array), \ +} + #endif /* _COUNTER_H_ */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index bd047864c7ac..c2aa0aa26b45 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -35,19 +35,23 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; */ #define cpumask_pr_args(maskp) nr_cpu_ids, cpumask_bits(maskp) -#if NR_CPUS == 1 -#define nr_cpu_ids 1U +#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS) +#define nr_cpu_ids ((unsigned int)NR_CPUS) #else extern unsigned int nr_cpu_ids; #endif -#ifdef CONFIG_CPUMASK_OFFSTACK -/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, - * not all bits may be allocated. */ -#define nr_cpumask_bits nr_cpu_ids +static inline void set_nr_cpu_ids(unsigned int nr) +{ +#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS) + WARN_ON(nr != nr_cpu_ids); #else -#define nr_cpumask_bits ((unsigned int)NR_CPUS) + nr_cpu_ids = nr; #endif +} + +/* Deprecated. Always use nr_cpu_ids. */ +#define nr_cpumask_bits nr_cpu_ids /* * The following particular system cpumasks and operations manage @@ -67,10 +71,6 @@ extern unsigned int nr_cpu_ids; * cpu_online_mask is the dynamic subset of cpu_present_mask, * indicating those CPUs available for scheduling. * - * If HOTPLUG is enabled, then cpu_possible_mask is forced to have - * all NR_CPUS bits set, otherwise it is just the set of CPUs that - * ACPI reports present at boot. - * * If HOTPLUG is enabled, then cpu_present_mask varies dynamically, * depending on what ACPI reports as currently plugged in, otherwise * cpu_present_mask is just a copy of cpu_possible_mask. @@ -246,9 +246,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu(cpu, mask) \ - for ((cpu) = -1; \ - (cpu) = cpumask_next((cpu), (mask)), \ - (cpu) < nr_cpu_ids;) + for_each_set_bit(cpu, cpumask_bits(mask), nr_cpumask_bits) /** * for_each_cpu_not - iterate over every cpu in a complemented mask @@ -258,9 +256,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_not(cpu, mask) \ - for ((cpu) = -1; \ - (cpu) = cpumask_next_zero((cpu), (mask)), \ - (cpu) < nr_cpu_ids;) + for_each_clear_bit(cpu, cpumask_bits(mask), nr_cpumask_bits) #if NR_CPUS == 1 static inline @@ -293,10 +289,8 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * * After the loop, cpu is >= nr_cpu_ids. */ -#define for_each_cpu_wrap(cpu, mask, start) \ - for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false); \ - (cpu) < nr_cpumask_bits; \ - (cpu) = cpumask_next_wrap((cpu), (mask), (start), true)) +#define for_each_cpu_wrap(cpu, mask, start) \ + for_each_set_bit_wrap(cpu, cpumask_bits(mask), nr_cpumask_bits, start) /** * for_each_cpu_and - iterate over every cpu in both masks @@ -313,9 +307,25 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_and(cpu, mask1, mask2) \ - for ((cpu) = -1; \ - (cpu) = cpumask_next_and((cpu), (mask1), (mask2)), \ - (cpu) < nr_cpu_ids;) + for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits) + +/** + * for_each_cpu_andnot - iterate over every cpu present in one mask, excluding + * those present in another. + * @cpu: the (optionally unsigned) integer iterator + * @mask1: the first cpumask pointer + * @mask2: the second cpumask pointer + * + * This saves a temporary CPU mask in many places. It is equivalent to: + * struct cpumask tmp; + * cpumask_andnot(&tmp, &mask1, &mask2); + * for_each_cpu(cpu, &tmp) + * ... + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_andnot(cpu, mask1, mask2) \ + for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits) /** * cpumask_any_but - return a "random" in a cpumask, but not this one. @@ -337,6 +347,50 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) return i; } +/** + * cpumask_nth - get the first cpu in a cpumask + * @srcp: the cpumask pointer + * @cpu: the N'th cpu to find, starting from 0 + * + * Returns >= nr_cpu_ids if such cpu doesn't exist. + */ +static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) +{ + return find_nth_bit(cpumask_bits(srcp), nr_cpumask_bits, cpumask_check(cpu)); +} + +/** + * cpumask_nth_and - get the first cpu in 2 cpumasks + * @srcp1: the cpumask pointer + * @srcp2: the cpumask pointer + * @cpu: the N'th cpu to find, starting from 0 + * + * Returns >= nr_cpu_ids if such cpu doesn't exist. + */ +static inline +unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, + const struct cpumask *srcp2) +{ + return find_nth_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), + nr_cpumask_bits, cpumask_check(cpu)); +} + +/** + * cpumask_nth_andnot - get the first cpu set in 1st cpumask, and clear in 2nd. + * @srcp1: the cpumask pointer + * @srcp2: the cpumask pointer + * @cpu: the N'th cpu to find, starting from 0 + * + * Returns >= nr_cpu_ids if such cpu doesn't exist. + */ +static inline +unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, + const struct cpumask *srcp2) +{ + return find_nth_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), + nr_cpumask_bits, cpumask_check(cpu)); +} + #define CPU_BITS_NONE \ { \ [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ @@ -587,6 +641,17 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp) } /** + * cpumask_weight_and - Count of bits in (*srcp1 & *srcp2) + * @srcp1: the cpumask to count bits (< nr_cpu_ids) in. + * @srcp2: the cpumask to count bits (< nr_cpu_ids) in. + */ +static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, + const struct cpumask *srcp2) +{ + return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits); +} + +/** * cpumask_shift_right - *dstp = *srcp >> n * @dstp: the cpumask result * @srcp: the input to shift @@ -1127,9 +1192,10 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, * cover a worst-case of every other cpu being on one of two nodes for a * very large NR_CPUS. * - * Use PAGE_SIZE as a minimum for smaller configurations. + * Use PAGE_SIZE as a minimum for smaller configurations while avoiding + * unsigned comparison to -1. */ -#define CPUMAP_FILE_MAX_BYTES ((((NR_CPUS * 9)/32 - 1) > PAGE_SIZE) \ +#define CPUMAP_FILE_MAX_BYTES (((NR_CPUS * 9)/32 > PAGE_SIZE) \ ? (NR_CPUS * 9)/32 - 1 : PAGE_SIZE) #define CPULIST_FILE_MAX_BYTES (((NR_CPUS * 7)/2 > PAGE_SIZE) ? (NR_CPUS * 7)/2 : PAGE_SIZE) diff --git a/include/linux/damon.h b/include/linux/damon.h index 7b1f4a488230..620ada094c3b 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -216,13 +216,26 @@ struct damos_stat { }; /** - * struct damos - Represents a Data Access Monitoring-based Operation Scheme. + * struct damos_access_pattern - Target access pattern of the given scheme. * @min_sz_region: Minimum size of target regions. * @max_sz_region: Maximum size of target regions. * @min_nr_accesses: Minimum ``->nr_accesses`` of target regions. * @max_nr_accesses: Maximum ``->nr_accesses`` of target regions. * @min_age_region: Minimum age of target regions. * @max_age_region: Maximum age of target regions. + */ +struct damos_access_pattern { + unsigned long min_sz_region; + unsigned long max_sz_region; + unsigned int min_nr_accesses; + unsigned int max_nr_accesses; + unsigned int min_age_region; + unsigned int max_age_region; +}; + +/** + * struct damos - Represents a Data Access Monitoring-based Operation Scheme. + * @pattern: Access pattern of target regions. * @action: &damo_action to be applied to the target regions. * @quota: Control the aggressiveness of this scheme. * @wmarks: Watermarks for automated (in)activation of this scheme. @@ -230,10 +243,8 @@ struct damos_stat { * @list: List head for siblings. * * For each aggregation interval, DAMON finds regions which fit in the - * condition (&min_sz_region, &max_sz_region, &min_nr_accesses, - * &max_nr_accesses, &min_age_region, &max_age_region) and applies &action to - * those. To avoid consuming too much CPU time or IO resources for the - * &action, "a is used. + * &pattern and applies &action to those. To avoid consuming too much + * CPU time or IO resources for the &action, "a is used. * * To do the work only when needed, schemes can be activated for specific * system situations using &wmarks. If all schemes that registered to the @@ -248,12 +259,7 @@ struct damos_stat { * &action is applied. */ struct damos { - unsigned long min_sz_region; - unsigned long max_sz_region; - unsigned int min_nr_accesses; - unsigned int max_nr_accesses; - unsigned int min_age_region; - unsigned int max_age_region; + struct damos_access_pattern pattern; enum damos_action action; struct damos_quota quota; struct damos_watermarks wmarks; @@ -340,7 +346,7 @@ struct damon_operations { unsigned long (*apply_scheme)(struct damon_ctx *context, struct damon_target *t, struct damon_region *r, struct damos *scheme); - bool (*target_valid)(void *target); + bool (*target_valid)(struct damon_target *t); void (*cleanup)(struct damon_ctx *context); }; @@ -383,13 +389,15 @@ struct damon_callback { }; /** - * struct damon_ctx - Represents a context for each monitoring. This is the - * main interface that allows users to set the attributes and get the results - * of the monitoring. + * struct damon_attrs - Monitoring attributes for accuracy/overhead control. * * @sample_interval: The time between access samplings. * @aggr_interval: The time between monitor results aggregations. * @ops_update_interval: The time between monitoring operations updates. + * @min_nr_regions: The minimum number of adaptive monitoring + * regions. + * @max_nr_regions: The maximum number of adaptive monitoring + * regions. * * For each @sample_interval, DAMON checks whether each region is accessed or * not. It aggregates and keeps the access information (number of accesses to @@ -399,7 +407,21 @@ struct damon_callback { * @ops_update_interval. All time intervals are in micro-seconds. * Please refer to &struct damon_operations and &struct damon_callback for more * detail. + */ +struct damon_attrs { + unsigned long sample_interval; + unsigned long aggr_interval; + unsigned long ops_update_interval; + unsigned long min_nr_regions; + unsigned long max_nr_regions; +}; + +/** + * struct damon_ctx - Represents a context for each monitoring. This is the + * main interface that allows users to set the attributes and get the results + * of the monitoring. * + * @attrs: Monitoring attributes for accuracy/overhead control. * @kdamond: Kernel thread who does the monitoring. * @kdamond_lock: Mutex for the synchronizations with @kdamond. * @@ -421,15 +443,11 @@ struct damon_callback { * @ops: Set of monitoring operations for given use cases. * @callback: Set of callbacks for monitoring events notifications. * - * @min_nr_regions: The minimum number of adaptive monitoring regions. - * @max_nr_regions: The maximum number of adaptive monitoring regions. * @adaptive_targets: Head of monitoring targets (&damon_target) list. * @schemes: Head of schemes (&damos) list. */ struct damon_ctx { - unsigned long sample_interval; - unsigned long aggr_interval; - unsigned long ops_update_interval; + struct damon_attrs attrs; /* private: internal use only */ struct timespec64 last_aggregation; @@ -442,8 +460,6 @@ struct damon_ctx { struct damon_operations ops; struct damon_callback callback; - unsigned long min_nr_regions; - unsigned long max_nr_regions; struct list_head adaptive_targets; struct list_head schemes; }; @@ -463,9 +479,23 @@ static inline struct damon_region *damon_last_region(struct damon_target *t) return list_last_entry(&t->regions_list, struct damon_region, list); } +static inline struct damon_region *damon_first_region(struct damon_target *t) +{ + return list_first_entry(&t->regions_list, struct damon_region, list); +} + +static inline unsigned long damon_sz_region(struct damon_region *r) +{ + return r->ar.end - r->ar.start; +} + + #define damon_for_each_region(r, t) \ list_for_each_entry(r, &t->regions_list, list) +#define damon_for_each_region_from(r, t) \ + list_for_each_entry_from(r, &t->regions_list, list) + #define damon_for_each_region_safe(r, next, t) \ list_for_each_entry_safe(r, next, &t->regions_list, list) @@ -501,12 +531,9 @@ void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); -struct damos *damon_new_scheme( - unsigned long min_sz_region, unsigned long max_sz_region, - unsigned int min_nr_accesses, unsigned int max_nr_accesses, - unsigned int min_age_region, unsigned int max_age_region, - enum damos_action action, struct damos_quota *quota, - struct damos_watermarks *wmarks); +struct damos *damon_new_scheme(struct damos_access_pattern *pattern, + enum damos_action action, struct damos_quota *quota, + struct damos_watermarks *wmarks); void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); void damon_destroy_scheme(struct damos *s); @@ -519,10 +546,8 @@ unsigned int damon_nr_regions(struct damon_target *t); struct damon_ctx *damon_new_ctx(void); void damon_destroy_ctx(struct damon_ctx *ctx); -int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, - unsigned long aggr_int, unsigned long ops_upd_int, - unsigned long min_nr_reg, unsigned long max_nr_reg); -int damon_set_schemes(struct damon_ctx *ctx, +int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs); +void damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, ssize_t nr_schemes); int damon_nr_running_ctxs(void); bool damon_is_registered_ops(enum damon_ops_id id); @@ -538,6 +563,9 @@ static inline bool damon_target_has_pid(const struct damon_ctx *ctx) int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); +int damon_set_region_biggest_system_ram_default(struct damon_target *t, + unsigned long *start, unsigned long *end); + #endif /* CONFIG_DAMON */ #endif /* _DAMON_H */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 92c78ed02b54..6b351e009f59 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -16,6 +16,7 @@ #include <linux/wait.h> struct path; +struct file; struct vfsmount; /* @@ -250,7 +251,7 @@ extern struct dentry * d_make_root(struct inode *); /* <clickety>-<click> the ramfs-type tree */ extern void d_genocide(struct dentry *); -extern void d_tmpfile(struct dentry *, struct inode *); +extern void d_tmpfile(struct file *, struct inode *); extern struct dentry *d_find_alias(struct inode *); extern void d_prune_aliases(struct inode *); @@ -287,8 +288,8 @@ static inline unsigned d_count(const struct dentry *dentry) /* * helper function for dentry_operations.d_dname() members */ -extern __printf(4, 5) -char *dynamic_dname(struct dentry *, char *, int, const char *, ...); +extern __printf(3, 4) +char *dynamic_dname(char *, int, const char *, ...); extern char *__d_path(const struct path *, const struct path *, char *, int); extern char *d_absolute_path(const struct path *, char *, int); diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 58aea2d7385c..0da97dba9ef8 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -73,8 +73,8 @@ extern int delayacct_add_tsk(struct taskstats *, struct task_struct *); extern __u64 __delayacct_blkio_ticks(struct task_struct *); extern void __delayacct_freepages_start(void); extern void __delayacct_freepages_end(void); -extern void __delayacct_thrashing_start(void); -extern void __delayacct_thrashing_end(void); +extern void __delayacct_thrashing_start(bool *in_thrashing); +extern void __delayacct_thrashing_end(bool *in_thrashing); extern void __delayacct_swapin_start(void); extern void __delayacct_swapin_end(void); extern void __delayacct_compact_start(void); @@ -143,22 +143,22 @@ static inline void delayacct_freepages_end(void) __delayacct_freepages_end(); } -static inline void delayacct_thrashing_start(void) +static inline void delayacct_thrashing_start(bool *in_thrashing) { if (!static_branch_unlikely(&delayacct_key)) return; if (current->delays) - __delayacct_thrashing_start(); + __delayacct_thrashing_start(in_thrashing); } -static inline void delayacct_thrashing_end(void) +static inline void delayacct_thrashing_end(bool *in_thrashing) { if (!static_branch_unlikely(&delayacct_key)) return; if (current->delays) - __delayacct_thrashing_end(); + __delayacct_thrashing_end(in_thrashing); } static inline void delayacct_swapin_start(void) @@ -237,9 +237,9 @@ static inline void delayacct_freepages_start(void) {} static inline void delayacct_freepages_end(void) {} -static inline void delayacct_thrashing_start(void) +static inline void delayacct_thrashing_start(bool *in_thrashing) {} -static inline void delayacct_thrashing_end(void) +static inline void delayacct_thrashing_end(bool *in_thrashing) {} static inline void delayacct_swapin_start(void) {} diff --git a/include/linux/dlm.h b/include/linux/dlm.h index ff951e9f6f20..c6bc2b5ee7e6 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -56,9 +56,6 @@ struct dlm_lockspace_ops { * DLM_LSFL_TIMEWARN * The dlm should emit netlink messages if locks have been waiting * for a configurable amount of time. (Unused.) - * DLM_LSFL_FS - * The lockspace user is in the kernel (i.e. filesystem). Enables - * direct bast/cast callbacks. * DLM_LSFL_NEWEXCL * dlm_new_lockspace() should return -EEXIST if the lockspace exists. * @@ -134,7 +131,7 @@ int dlm_lock(dlm_lockspace_t *lockspace, int mode, struct dlm_lksb *lksb, uint32_t flags, - void *name, + const void *name, unsigned int namelen, uint32_t parent_lkid, void (*lockast) (void *astarg), diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h deleted file mode 100644 index 24607dc3c2ac..000000000000 --- a/include/linux/dma-iommu.h +++ /dev/null @@ -1,93 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2014-2015 ARM Ltd. - */ -#ifndef __DMA_IOMMU_H -#define __DMA_IOMMU_H - -#include <linux/errno.h> -#include <linux/types.h> - -#ifdef CONFIG_IOMMU_DMA -#include <linux/dma-mapping.h> -#include <linux/iommu.h> -#include <linux/msi.h> - -/* Domain management interface for IOMMU drivers */ -int iommu_get_dma_cookie(struct iommu_domain *domain); -int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); -void iommu_put_dma_cookie(struct iommu_domain *domain); - -/* Setup call for arch DMA mapping code */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); -int iommu_dma_init_fq(struct iommu_domain *domain); - -/* The DMA API isn't _quite_ the whole story, though... */ -/* - * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU device - * - * The MSI page will be stored in @desc. - * - * Return: 0 on success otherwise an error describing the failure. - */ -int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); - -/* Update the MSI message if required. */ -void iommu_dma_compose_msi_msg(struct msi_desc *desc, - struct msi_msg *msg); - -void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); - -void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, - struct iommu_domain *domain); - -extern bool iommu_dma_forcedac; - -#else /* CONFIG_IOMMU_DMA */ - -struct iommu_domain; -struct msi_desc; -struct msi_msg; -struct device; - -static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, - u64 dma_limit) -{ -} - -static inline int iommu_dma_init_fq(struct iommu_domain *domain) -{ - return -EINVAL; -} - -static inline int iommu_get_dma_cookie(struct iommu_domain *domain) -{ - return -ENODEV; -} - -static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) -{ - return -ENODEV; -} - -static inline void iommu_put_dma_cookie(struct iommu_domain *domain) -{ -} - -static inline int iommu_dma_prepare_msi(struct msi_desc *desc, - phys_addr_t msi_addr) -{ - return 0; -} - -static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, - struct msi_msg *msg) -{ -} - -static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) -{ -} - -#endif /* CONFIG_IOMMU_DMA */ -#endif /* __DMA_IOMMU_H */ diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index c8ccbc94d5d2..0637659a702c 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -62,6 +62,11 @@ struct dma_resv_list; * For example when asking for WRITE fences then the KERNEL fences are returned * as well. Similar when asked for READ fences then both WRITE and KERNEL * fences are returned as well. + * + * Already used fences can be promoted in the sense that a fence with + * DMA_RESV_USAGE_BOOKKEEP could become DMA_RESV_USAGE_READ by adding it again + * with this usage. But fences can never be degraded in the sense that a fence + * with DMA_RESV_USAGE_WRITE could become DMA_RESV_USAGE_READ. */ enum dma_resv_usage { /** @@ -98,10 +103,15 @@ enum dma_resv_usage { * @DMA_RESV_USAGE_BOOKKEEP: No implicit sync. * * This should be used by submissions which don't want to participate in - * implicit synchronization. + * any implicit synchronization. + * + * The most common case are preemption fences, page table updates, TLB + * flushes as well as explicit synced user submissions. * - * The most common case are preemption fences as well as page table - * updates and their TLB flushes. + * Explicit synced user user submissions can be promoted to + * DMA_RESV_USAGE_READ or DMA_RESV_USAGE_WRITE as needed using + * dma_buf_import_sync_file() when implicit synchronization should + * become necessary after initial adding of the fence. */ DMA_RESV_USAGE_BOOKKEEP }; diff --git a/include/linux/dma/hsu.h b/include/linux/dma/hsu.h index a6b7bc707356..77ea602c287c 100644 --- a/include/linux/dma/hsu.h +++ b/include/linux/dma/hsu.h @@ -8,11 +8,13 @@ #ifndef _DMA_HSU_H #define _DMA_HSU_H -#include <linux/device.h> -#include <linux/interrupt.h> +#include <linux/errno.h> +#include <linux/kconfig.h> +#include <linux/types.h> #include <linux/platform_data/dma-hsu.h> +struct device; struct hsu_dma; /** diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 8917a32173c4..d81a51978d01 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -65,7 +65,6 @@ struct dmar_pci_notify_info { extern struct rw_semaphore dmar_global_lock; extern struct list_head dmar_drhd_units; -extern int intel_iommu_enabled; #define for_each_drhd_unit(drhd) \ list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ @@ -89,8 +88,7 @@ extern int intel_iommu_enabled; static inline bool dmar_rcu_check(void) { return rwsem_is_locked(&dmar_global_lock) || - system_state == SYSTEM_BOOTING || - (IS_ENABLED(CONFIG_INTEL_IOMMU) && !intel_iommu_enabled); + system_state == SYSTEM_BOOTING; } #define dmar_rcu_dereference(p) rcu_dereference_check((p), dmar_rcu_check()) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index dce631e678dd..41682278d2e8 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -6,6 +6,8 @@ #include <linux/jump_label.h> #endif +#include <linux/build_bug.h> + /* * An instance of this structure is created in a special * ELF section at every dynamic debug callsite. At runtime, @@ -21,6 +23,9 @@ struct _ddebug { const char *filename; const char *format; unsigned int lineno:18; +#define CLS_BITS 6 + unsigned int class_id:CLS_BITS; +#define _DPRINTK_CLASS_DFLT ((1 << CLS_BITS) - 1) /* * The flags field controls the behaviour at the callsite. * The bits here are changed dynamically when the user @@ -51,15 +56,82 @@ struct _ddebug { #endif } __attribute__((aligned(8))); - +enum class_map_type { + DD_CLASS_TYPE_DISJOINT_BITS, + /** + * DD_CLASS_TYPE_DISJOINT_BITS: classes are independent, one per bit. + * expecting hex input. Built for drm.debug, basis for other types. + */ + DD_CLASS_TYPE_LEVEL_NUM, + /** + * DD_CLASS_TYPE_LEVEL_NUM: input is numeric level, 0-N. + * N turns on just bits N-1 .. 0, so N=0 turns all bits off. + */ + DD_CLASS_TYPE_DISJOINT_NAMES, + /** + * DD_CLASS_TYPE_DISJOINT_NAMES: input is a CSV of [+-]CLASS_NAMES, + * classes are independent, like _DISJOINT_BITS. + */ + DD_CLASS_TYPE_LEVEL_NAMES, + /** + * DD_CLASS_TYPE_LEVEL_NAMES: input is a CSV of [+-]CLASS_NAMES, + * intended for names like: INFO,DEBUG,TRACE, with a module prefix + * avoid EMERG,ALERT,CRIT,ERR,WARNING: they're not debug + */ +}; + +struct ddebug_class_map { + struct list_head link; + struct module *mod; + const char *mod_name; /* needed for builtins */ + const char **class_names; + const int length; + const int base; /* index of 1st .class_id, allows split/shared space */ + enum class_map_type map_type; +}; + +/** + * DECLARE_DYNDBG_CLASSMAP - declare classnames known by a module + * @_var: a struct ddebug_class_map, passed to module_param_cb + * @_type: enum class_map_type, chooses bits/verbose, numeric/symbolic + * @_base: offset of 1st class-name. splits .class_id space + * @classes: class-names used to control class'd prdbgs + */ +#define DECLARE_DYNDBG_CLASSMAP(_var, _maptype, _base, ...) \ + static const char *_var##_classnames[] = { __VA_ARGS__ }; \ + static struct ddebug_class_map __aligned(8) __used \ + __section("__dyndbg_classes") _var = { \ + .mod = THIS_MODULE, \ + .mod_name = KBUILD_MODNAME, \ + .base = _base, \ + .map_type = _maptype, \ + .length = NUM_TYPE_ARGS(char*, __VA_ARGS__), \ + .class_names = _var##_classnames, \ + } +#define NUM_TYPE_ARGS(eltype, ...) \ + (sizeof((eltype[]){__VA_ARGS__}) / sizeof(eltype)) + +/* encapsulate linker provided built-in (or module) dyndbg data */ +struct _ddebug_info { + struct _ddebug *descs; + struct ddebug_class_map *classes; + unsigned int num_descs; + unsigned int num_classes; +}; + +struct ddebug_class_param { + union { + unsigned long *bits; + unsigned int *lvl; + }; + char flags[8]; + const struct ddebug_class_map *map; +}; #if defined(CONFIG_DYNAMIC_DEBUG_CORE) -/* exported for module authors to exercise >control */ -int dynamic_debug_exec_queries(const char *query, const char *modname); +int ddebug_add_module(struct _ddebug_info *dyndbg, const char *modname); -int ddebug_add_module(struct _ddebug *tab, unsigned int n, - const char *modname); extern int ddebug_remove_module(const char *mod_name); extern __printf(2, 3) void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...); @@ -87,7 +159,7 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, const struct ib_device *ibdev, const char *fmt, ...); -#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \ +#define DEFINE_DYNAMIC_DEBUG_METADATA_CLS(name, cls, fmt) \ static struct _ddebug __aligned(8) \ __section("__dyndbg") name = { \ .modname = KBUILD_MODNAME, \ @@ -96,8 +168,14 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, .format = (fmt), \ .lineno = __LINE__, \ .flags = _DPRINTK_FLAGS_DEFAULT, \ + .class_id = cls, \ _DPRINTK_KEY_INIT \ - } + }; \ + BUILD_BUG_ON_MSG(cls > _DPRINTK_CLASS_DFLT, \ + "classid value overflow") + +#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \ + DEFINE_DYNAMIC_DEBUG_METADATA_CLS(name, _DPRINTK_CLASS_DFLT, fmt) #ifdef CONFIG_JUMP_LABEL @@ -128,17 +206,34 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, #endif /* CONFIG_JUMP_LABEL */ -#define __dynamic_func_call(id, fmt, func, ...) do { \ - DEFINE_DYNAMIC_DEBUG_METADATA(id, fmt); \ - if (DYNAMIC_DEBUG_BRANCH(id)) \ - func(&id, ##__VA_ARGS__); \ -} while (0) - -#define __dynamic_func_call_no_desc(id, fmt, func, ...) do { \ - DEFINE_DYNAMIC_DEBUG_METADATA(id, fmt); \ +/* + * Factory macros: ($prefix)dynamic_func_call($suffix) + * + * Lower layer (with __ prefix) gets the callsite metadata, and wraps + * the func inside a debug-branch/static-key construct. Upper layer + * (with _ prefix) does the UNIQUE_ID once, so that lower can ref the + * name/label multiple times, and tie the elements together. + * Multiple flavors: + * (|_cls): adds in _DPRINT_CLASS_DFLT as needed + * (|_no_desc): former gets callsite descriptor as 1st arg (for prdbgs) + */ +#define __dynamic_func_call_cls(id, cls, fmt, func, ...) do { \ + DEFINE_DYNAMIC_DEBUG_METADATA_CLS(id, cls, fmt); \ if (DYNAMIC_DEBUG_BRANCH(id)) \ - func(__VA_ARGS__); \ + func(&id, ##__VA_ARGS__); \ +} while (0) +#define __dynamic_func_call(id, fmt, func, ...) \ + __dynamic_func_call_cls(id, _DPRINTK_CLASS_DFLT, fmt, \ + func, ##__VA_ARGS__) + +#define __dynamic_func_call_cls_no_desc(id, cls, fmt, func, ...) do { \ + DEFINE_DYNAMIC_DEBUG_METADATA_CLS(id, cls, fmt); \ + if (DYNAMIC_DEBUG_BRANCH(id)) \ + func(__VA_ARGS__); \ } while (0) +#define __dynamic_func_call_no_desc(id, fmt, func, ...) \ + __dynamic_func_call_cls_no_desc(id, _DPRINTK_CLASS_DFLT, \ + fmt, func, ##__VA_ARGS__) /* * "Factory macro" for generating a call to func, guarded by a @@ -148,22 +243,33 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, * the varargs. Note that fmt is repeated in invocations of this * macro. */ +#define _dynamic_func_call_cls(cls, fmt, func, ...) \ + __dynamic_func_call_cls(__UNIQUE_ID(ddebug), cls, fmt, func, ##__VA_ARGS__) #define _dynamic_func_call(fmt, func, ...) \ - __dynamic_func_call(__UNIQUE_ID(ddebug), fmt, func, ##__VA_ARGS__) + _dynamic_func_call_cls(_DPRINTK_CLASS_DFLT, fmt, func, ##__VA_ARGS__) + /* * A variant that does the same, except that the descriptor is not * passed as the first argument to the function; it is only called * with precisely the macro's varargs. */ -#define _dynamic_func_call_no_desc(fmt, func, ...) \ - __dynamic_func_call_no_desc(__UNIQUE_ID(ddebug), fmt, func, ##__VA_ARGS__) +#define _dynamic_func_call_cls_no_desc(cls, fmt, func, ...) \ + __dynamic_func_call_cls_no_desc(__UNIQUE_ID(ddebug), cls, fmt, \ + func, ##__VA_ARGS__) +#define _dynamic_func_call_no_desc(fmt, func, ...) \ + _dynamic_func_call_cls_no_desc(_DPRINTK_CLASS_DFLT, fmt, \ + func, ##__VA_ARGS__) + +#define dynamic_pr_debug_cls(cls, fmt, ...) \ + _dynamic_func_call_cls(cls, fmt, __dynamic_pr_debug, \ + pr_fmt(fmt), ##__VA_ARGS__) #define dynamic_pr_debug(fmt, ...) \ - _dynamic_func_call(fmt, __dynamic_pr_debug, \ + _dynamic_func_call(fmt, __dynamic_pr_debug, \ pr_fmt(fmt), ##__VA_ARGS__) #define dynamic_dev_dbg(dev, fmt, ...) \ - _dynamic_func_call(fmt,__dynamic_dev_dbg, \ + _dynamic_func_call(fmt, __dynamic_dev_dbg, \ dev, fmt, ##__VA_ARGS__) #define dynamic_netdev_dbg(dev, fmt, ...) \ @@ -181,14 +287,24 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, KERN_DEBUG, prefix_str, prefix_type, \ rowsize, groupsize, buf, len, ascii) +struct kernel_param; +int param_set_dyndbg_classes(const char *instr, const struct kernel_param *kp); +int param_get_dyndbg_classes(char *buffer, const struct kernel_param *kp); + +/* for test only, generally expect drm.debug style macro wrappers */ +#define __pr_debug_cls(cls, fmt, ...) do { \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(cls), \ + "expecting constant class int/enum"); \ + dynamic_pr_debug_cls(cls, fmt, ##__VA_ARGS__); \ + } while (0) + #else /* !CONFIG_DYNAMIC_DEBUG_CORE */ #include <linux/string.h> #include <linux/errno.h> #include <linux/printk.h> -static inline int ddebug_add_module(struct _ddebug *tab, unsigned int n, - const char *modname) +static inline int ddebug_add_module(struct _ddebug_info *dinfo, const char *modname) { return 0; } @@ -201,7 +317,7 @@ static inline int ddebug_remove_module(const char *mod) static inline int ddebug_dyndbg_module_param_cb(char *param, char *val, const char *modname) { - if (strstr(param, "dyndbg")) { + if (!strcmp(param, "dyndbg")) { /* avoid pr_warn(), which wants pr_fmt() fully defined */ printk(KERN_WARNING "dyndbg param is supported only in " "CONFIG_DYNAMIC_DEBUG builds\n"); @@ -221,12 +337,14 @@ static inline int ddebug_dyndbg_module_param_cb(char *param, char *val, rowsize, groupsize, buf, len, ascii); \ } while (0) -static inline int dynamic_debug_exec_queries(const char *query, const char *modname) -{ - pr_warn("kernel not built with CONFIG_DYNAMIC_DEBUG_CORE\n"); - return 0; -} +struct kernel_param; +static inline int param_set_dyndbg_classes(const char *instr, const struct kernel_param *kp) +{ return 0; } +static inline int param_get_dyndbg_classes(char *buffer, const struct kernel_param *kp) +{ return 0; } #endif /* !CONFIG_DYNAMIC_DEBUG_CORE */ +extern const struct kernel_param_ops param_ops_dyndbg_classes; + #endif diff --git a/include/linux/edac.h b/include/linux/edac.h index e730b3468719..fa4bda2a70f6 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -231,21 +231,21 @@ enum mem_type { #define MEM_FLAG_DDR BIT(MEM_DDR) #define MEM_FLAG_RDDR BIT(MEM_RDDR) #define MEM_FLAG_RMBS BIT(MEM_RMBS) -#define MEM_FLAG_DDR2 BIT(MEM_DDR2) -#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) -#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2) -#define MEM_FLAG_XDR BIT(MEM_XDR) -#define MEM_FLAG_DDR3 BIT(MEM_DDR3) -#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) -#define MEM_FLAG_LPDDR3 BIT(MEM_LPDDR3) -#define MEM_FLAG_DDR4 BIT(MEM_DDR4) -#define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) -#define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) -#define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4) -#define MEM_FLAG_DDR5 BIT(MEM_DDR5) -#define MEM_FLAG_RDDR5 BIT(MEM_RDDR5) -#define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5) -#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) +#define MEM_FLAG_DDR2 BIT(MEM_DDR2) +#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) +#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2) +#define MEM_FLAG_XDR BIT(MEM_XDR) +#define MEM_FLAG_DDR3 BIT(MEM_DDR3) +#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) +#define MEM_FLAG_LPDDR3 BIT(MEM_LPDDR3) +#define MEM_FLAG_DDR4 BIT(MEM_DDR4) +#define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) +#define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) +#define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4) +#define MEM_FLAG_DDR5 BIT(MEM_DDR5) +#define MEM_FLAG_RDDR5 BIT(MEM_RDDR5) +#define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5) +#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) #define MEM_FLAG_WIO2 BIT(MEM_WIO2) #define MEM_FLAG_HBM2 BIT(MEM_HBM2) diff --git a/include/linux/efi.h b/include/linux/efi.h index d2b84c2fec39..da3974bf05d3 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -368,6 +368,9 @@ void efi_native_runtime_setup(void); #define UV_SYSTEM_TABLE_GUID EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93) #define LINUX_EFI_CRASH_GUID EFI_GUID(0xcfc8fc79, 0xbe2e, 0x4ddc, 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0) #define LOADED_IMAGE_PROTOCOL_GUID EFI_GUID(0x5b1b31a1, 0x9562, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) +#define LOADED_IMAGE_DEVICE_PATH_PROTOCOL_GUID EFI_GUID(0xbc62157e, 0x3e33, 0x4fec, 0x99, 0x20, 0x2d, 0x3b, 0x36, 0xd7, 0x50, 0xdf) +#define EFI_DEVICE_PATH_PROTOCOL_GUID EFI_GUID(0x09576e91, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) +#define EFI_DEVICE_PATH_TO_TEXT_PROTOCOL_GUID EFI_GUID(0x8b843e20, 0x8132, 0x4852, 0x90, 0xcc, 0x55, 0x1a, 0x4e, 0x4a, 0x7f, 0x1c) #define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID EFI_GUID(0x9042a9de, 0x23dc, 0x4a38, 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a) #define EFI_UGA_PROTOCOL_GUID EFI_GUID(0x982c298b, 0xf4fa, 0x41cb, 0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39) #define EFI_PCI_IO_PROTOCOL_GUID EFI_GUID(0x4cf5b200, 0x68b8, 0x4ca5, 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x02, 0x9a) @@ -408,8 +411,10 @@ void efi_native_runtime_setup(void); #define LINUX_EFI_TPM_FINAL_LOG_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25) #define LINUX_EFI_MEMRESERVE_TABLE_GUID EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5, 0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2) #define LINUX_EFI_INITRD_MEDIA_GUID EFI_GUID(0x5568e427, 0x68fc, 0x4f3d, 0xac, 0x74, 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68) +#define LINUX_EFI_ZBOOT_MEDIA_GUID EFI_GUID(0xe565a30d, 0x47da, 0x4dbd, 0xb3, 0x54, 0x9b, 0xb5, 0xc8, 0x4f, 0x8b, 0xe2) #define LINUX_EFI_MOK_VARIABLE_TABLE_GUID EFI_GUID(0xc451ed2b, 0x9694, 0x45d3, 0xba, 0xba, 0xed, 0x9f, 0x89, 0x88, 0xa3, 0x89) #define LINUX_EFI_COCO_SECRET_AREA_GUID EFI_GUID(0xadf956ad, 0xe98c, 0x484c, 0xae, 0x11, 0xb5, 0x1c, 0x7d, 0x33, 0x64, 0x47) +#define LINUX_EFI_BOOT_MEMMAP_GUID EFI_GUID(0x800f683f, 0xd08b, 0x423a, 0xa2, 0x93, 0x96, 0x5c, 0x3c, 0x6f, 0xe2, 0xb4) #define RISCV_EFI_BOOT_PROTOCOL_GUID EFI_GUID(0xccd15fec, 0x6f73, 0x4eec, 0x83, 0x95, 0x3e, 0x69, 0xe4, 0xb9, 0x40, 0xbf) @@ -518,6 +523,15 @@ typedef union { efi_system_table_32_t mixed_mode; } efi_system_table_t; +struct efi_boot_memmap { + unsigned long map_size; + unsigned long desc_size; + u32 desc_ver; + unsigned long map_key; + unsigned long buff_size; + efi_memory_desc_t map[]; +}; + /* * Architecture independent structure for describing a memory map for the * benefit of efi_memmap_init_early(), and for passing context between @@ -952,6 +966,7 @@ extern int efi_status_to_err(efi_status_t status); #define EFI_DEV_MEDIA_VENDOR 3 #define EFI_DEV_MEDIA_FILE 4 #define EFI_DEV_MEDIA_PROTOCOL 5 +#define EFI_DEV_MEDIA_REL_OFFSET 8 #define EFI_DEV_BIOS_BOOT 0x05 #define EFI_DEV_END_PATH 0x7F #define EFI_DEV_END_PATH2 0xFF @@ -982,12 +997,27 @@ struct efi_vendor_dev_path { u8 vendordata[]; } __packed; +struct efi_rel_offset_dev_path { + struct efi_generic_dev_path header; + u32 reserved; + u64 starting_offset; + u64 ending_offset; +} __packed; + +struct efi_mem_mapped_dev_path { + struct efi_generic_dev_path header; + u32 memory_type; + u64 starting_addr; + u64 ending_addr; +} __packed; + struct efi_dev_path { union { struct efi_generic_dev_path header; struct efi_acpi_dev_path acpi; struct efi_pci_dev_path pci; struct efi_vendor_dev_path vendor; + struct efi_rel_offset_dev_path rel_offset; }; } __packed; @@ -1321,6 +1351,11 @@ struct linux_efi_coco_secret_area { u64 size; }; +struct linux_efi_initrd { + unsigned long base; + unsigned long size; +}; + /* Header of a populated EFI secret area */ #define EFI_SECRET_TABLE_HEADER_GUID EFI_GUID(0x1e74f542, 0x71dd, 0x4d66, 0x96, 0x3e, 0xef, 0x42, 0x87, 0xff, 0x17, 0x3b) diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 84a466b176cf..d95ab85f96ba 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -253,7 +253,6 @@ static __always_inline void arch_exit_to_user_mode(void) { } /** * arch_do_signal_or_restart - Architecture specific signal delivery function * @regs: Pointer to currents pt_regs - * @has_signal: actual signal to handle * * Invoked from exit_to_user_mode_loop(). */ diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 92b10e67d5f8..a541f0c4f146 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -428,6 +428,28 @@ static inline bool ether_addr_equal_masked(const u8 *addr1, const u8 *addr2, return true; } +static inline bool ether_addr_is_ipv4_mcast(const u8 *addr) +{ + u8 base[ETH_ALEN] = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 }; + u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0x80, 0x00, 0x00 }; + + return ether_addr_equal_masked(addr, base, mask); +} + +static inline bool ether_addr_is_ipv6_mcast(const u8 *addr) +{ + u8 base[ETH_ALEN] = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 }; + u8 mask[ETH_ALEN] = { 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }; + + return ether_addr_equal_masked(addr, base, mask); +} + +static inline bool ether_addr_is_ip_mcast(const u8 *addr) +{ + return ether_addr_is_ipv4_mcast(addr) || + ether_addr_is_ipv6_mcast(addr); +} + /** * ether_addr_to_u64 - Convert an Ethernet address into a u64 value. * @addr: Pointer to a six-byte array containing the Ethernet address diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 305d5f19093b..30eb30d6909b 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -46,7 +46,7 @@ void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt); static inline bool eventfd_signal_allowed(void) { - return !current->in_eventfd_signal; + return !current->in_eventfd; } #else /* CONFIG_EVENTFD */ diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h index c2b1d4fd5987..fe7e6ba918f1 100644 --- a/include/linux/export-internal.h +++ b/include/linux/export-internal.h @@ -10,8 +10,10 @@ #include <linux/compiler.h> #include <linux/types.h> -/* __used is needed to keep __crc_* for LTO */ #define SYMBOL_CRC(sym, crc, sec) \ - u32 __section("___kcrctab" sec "+" #sym) __used __crc_##sym = crc + asm(".section \"___kcrctab" sec "+" #sym "\",\"a\"" "\n" \ + "__crc_" #sym ":" "\n" \ + ".long " #crc "\n" \ + ".previous" "\n") #endif /* __LINUX_EXPORT_INTERNAL_H__ */ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index d445150c5350..ee0d75d9a302 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -73,6 +73,42 @@ struct f2fs_device { __le32 total_segments; } __packed; +/* reason of stop_checkpoint */ +enum stop_cp_reason { + STOP_CP_REASON_SHUTDOWN, + STOP_CP_REASON_FAULT_INJECT, + STOP_CP_REASON_META_PAGE, + STOP_CP_REASON_WRITE_FAIL, + STOP_CP_REASON_CORRUPTED_SUMMARY, + STOP_CP_REASON_UPDATE_INODE, + STOP_CP_REASON_FLUSH_FAIL, + STOP_CP_REASON_MAX, +}; + +#define MAX_STOP_REASON 32 + +/* detail reason for EFSCORRUPTED */ +enum f2fs_error { + ERROR_CORRUPTED_CLUSTER, + ERROR_FAIL_DECOMPRESSION, + ERROR_INVALID_BLKADDR, + ERROR_CORRUPTED_DIRENT, + ERROR_CORRUPTED_INODE, + ERROR_INCONSISTENT_SUMMARY, + ERROR_INCONSISTENT_FOOTER, + ERROR_INCONSISTENT_SUM_TYPE, + ERROR_CORRUPTED_JOURNAL, + ERROR_INCONSISTENT_NODE_COUNT, + ERROR_INCONSISTENT_BLOCK_COUNT, + ERROR_INVALID_CURSEG, + ERROR_INCONSISTENT_SIT, + ERROR_CORRUPTED_VERITY_XATTR, + ERROR_CORRUPTED_XATTR, + ERROR_MAX, +}; + +#define MAX_F2FS_ERRORS 16 + struct f2fs_super_block { __le32 magic; /* Magic Number */ __le16 major_ver; /* Major Version */ @@ -116,7 +152,9 @@ struct f2fs_super_block { __u8 hot_ext_count; /* # of hot file extension */ __le16 s_encoding; /* Filename charset encoding */ __le16 s_encoding_flags; /* Filename charset encoding flags */ - __u8 reserved[306]; /* valid reserved region */ + __u8 s_stop_reason[MAX_STOP_REASON]; /* stop checkpoint reason */ + __u8 s_errors[MAX_F2FS_ERRORS]; /* reason of image corrupts */ + __u8 reserved[258]; /* valid reserved region */ __le32 crc; /* checksum of superblock */ } __packed; diff --git a/include/linux/fb.h b/include/linux/fb.h index 07fcd0e56682..0aff76bcbb00 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -615,10 +615,6 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf, /* drivers/video/fbmem.c */ extern int register_framebuffer(struct fb_info *fb_info); extern void unregister_framebuffer(struct fb_info *fb_info); -extern int remove_conflicting_pci_framebuffers(struct pci_dev *pdev, - const char *name); -extern int remove_conflicting_framebuffers(struct apertures_struct *a, - const char *name, bool primary); extern int fb_prepare_logo(struct fb_info *fb_info, int rotate); extern int fb_show_logo(struct fb_info *fb_info, int rotate); extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); @@ -631,16 +627,10 @@ extern int fb_get_color_depth(struct fb_var_screeninfo *var, extern int fb_get_options(const char *name, char **option); extern int fb_new_modelist(struct fb_info *info); -extern struct fb_info *registered_fb[FB_MAX]; -extern int num_registered_fb; extern bool fb_center_logo; extern int fb_logo_count; extern struct class *fb_class; -#define for_each_registered_fb(i) \ - for (i = 0; i < FB_MAX; i++) \ - if (!registered_fb[i]) {} else - static inline void lock_fb_info(struct fb_info *info) { mutex_lock(&info->lock); diff --git a/include/linux/filter.h b/include/linux/filter.h index a5f21dc3c432..efc42a6e3aed 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -567,6 +567,12 @@ struct sk_filter { DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); +extern struct mutex nf_conn_btf_access_lock; +extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, u32 *next_btf_id, + enum bpf_type_flag *flag); + typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx, const struct bpf_insn *insnsi, unsigned int (*bpf_func)(const void *, @@ -900,8 +906,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); void sk_reuseport_prog_free(struct bpf_prog *prog); int sk_detach_filter(struct sock *sk); -int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, - unsigned int len); +int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len); bool sk_filter_charge(struct sock *sk, struct sk_filter *fp); void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); @@ -1018,6 +1023,8 @@ extern long bpf_jit_limit_max; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); +void bpf_jit_fill_hole_with_zero(void *area, unsigned int size); + struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, @@ -1030,6 +1037,9 @@ void bpf_jit_free(struct bpf_prog *fp); struct bpf_binary_header * bpf_jit_binary_pack_hdr(const struct bpf_prog *fp); +void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns); +void bpf_prog_pack_free(struct bpf_binary_header *hdr); + static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) { return list_empty(&fp->aux->ksym.lnode) || @@ -1100,7 +1110,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) return false; if (!bpf_jit_harden) return false; - if (bpf_jit_harden == 1 && capable(CAP_SYS_ADMIN)) + if (bpf_jit_harden == 1 && bpf_capable()) return false; return true; diff --git a/include/linux/find.h b/include/linux/find.h index 424ef67d4a42..ccaf61a0f5fd 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -8,15 +8,33 @@ #include <linux/bitops.h> -extern unsigned long _find_next_bit(const unsigned long *addr1, - const unsigned long *addr2, unsigned long nbits, - unsigned long start, unsigned long invert, unsigned long le); +unsigned long _find_next_bit(const unsigned long *addr1, unsigned long nbits, + unsigned long start); +unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start); +unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start); +unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, + unsigned long start); extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); +unsigned long __find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n); +unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long size, unsigned long n); +unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long size, unsigned long n); extern unsigned long _find_first_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size); extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); +#ifdef __BIG_ENDIAN +unsigned long _find_first_zero_bit_le(const unsigned long *addr, unsigned long size); +unsigned long _find_next_zero_bit_le(const unsigned long *addr, unsigned + long size, unsigned long offset); +unsigned long _find_next_bit_le(const unsigned long *addr, unsigned + long size, unsigned long offset); +#endif + #ifndef find_next_bit /** * find_next_bit - find the next set bit in a memory region @@ -41,7 +59,7 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, return val ? __ffs(val) : size; } - return _find_next_bit(addr, NULL, size, offset, 0UL, 0); + return _find_next_bit(addr, size, offset); } #endif @@ -71,7 +89,38 @@ unsigned long find_next_and_bit(const unsigned long *addr1, return val ? __ffs(val) : size; } - return _find_next_bit(addr1, addr2, size, offset, 0UL, 0); + return _find_next_and_bit(addr1, addr2, size, offset); +} +#endif + +#ifndef find_next_andnot_bit +/** + * find_next_andnot_bit - find the next set bit in *addr1 excluding all the bits + * in *addr2 + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * @offset: The bitnumber to start searching at + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_andnot_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr1 & ~*addr2 & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_andnot_bit(addr1, addr2, size, offset); } #endif @@ -99,7 +148,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, return val == ~0UL ? size : ffz(val); } - return _find_next_bit(addr, NULL, size, offset, ~0UL, 0); + return _find_next_zero_bit(addr, size, offset); } #endif @@ -125,6 +174,87 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) } #endif +/** + * find_nth_bit - find N'th set bit in a memory region + * @addr: The address to start the search at + * @size: The maximum number of bits to search + * @n: The number of set bit, which position is needed, counting from 0 + * + * The following is semantically equivalent: + * idx = find_nth_bit(addr, size, 0); + * idx = find_first_bit(addr, size); + * + * Returns the bit number of the N'th set bit. + * If no such, returns @size. + */ +static inline +unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n) +{ + if (n >= size) + return size; + + if (small_const_nbits(size)) { + unsigned long val = *addr & GENMASK(size - 1, 0); + + return val ? fns(val, n) : size; + } + + return __find_nth_bit(addr, size, n); +} + +/** + * find_nth_and_bit - find N'th set bit in 2 memory regions + * @addr1: The 1st address to start the search at + * @addr2: The 2nd address to start the search at + * @size: The maximum number of bits to search + * @n: The number of set bit, which position is needed, counting from 0 + * + * Returns the bit number of the N'th set bit. + * If no such, returns @size. + */ +static inline +unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long size, unsigned long n) +{ + if (n >= size) + return size; + + if (small_const_nbits(size)) { + unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0); + + return val ? fns(val, n) : size; + } + + return __find_nth_and_bit(addr1, addr2, size, n); +} + +/** + * find_nth_andnot_bit - find N'th set bit in 2 memory regions, + * flipping bits in 2nd region + * @addr1: The 1st address to start the search at + * @addr2: The 2nd address to start the search at + * @size: The maximum number of bits to search + * @n: The number of set bit, which position is needed, counting from 0 + * + * Returns the bit number of the N'th set bit. + * If no such, returns @size. + */ +static inline +unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long size, unsigned long n) +{ + if (n >= size) + return size; + + if (small_const_nbits(size)) { + unsigned long val = *addr1 & (~*addr2) & GENMASK(size - 1, 0); + + return val ? fns(val, n) : size; + } + + return __find_nth_andnot_bit(addr1, addr2, size, n); +} + #ifndef find_first_and_bit /** * find_first_and_bit - find the first set bit in both memory regions @@ -194,6 +324,78 @@ unsigned long find_last_bit(const unsigned long *addr, unsigned long size) #endif /** + * find_next_and_bit_wrap - find the next set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * @offset: The bitnumber to start searching at + * + * Returns the bit number for the next set bit, or first set bit up to @offset + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_and_bit_wrap(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size, unsigned long offset) +{ + unsigned long bit = find_next_and_bit(addr1, addr2, size, offset); + + if (bit < size) + return bit; + + bit = find_first_and_bit(addr1, addr2, offset); + return bit < offset ? bit : size; +} + +/** + * find_next_bit_wrap - find the next set bit in both memory regions + * @addr: The first address to base the search on + * @size: The bitmap size in bits + * @offset: The bitnumber to start searching at + * + * Returns the bit number for the next set bit, or first set bit up to @offset + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_bit_wrap(const unsigned long *addr, + unsigned long size, unsigned long offset) +{ + unsigned long bit = find_next_bit(addr, size, offset); + + if (bit < size) + return bit; + + bit = find_first_bit(addr, offset); + return bit < offset ? bit : size; +} + +/* + * Helper for for_each_set_bit_wrap(). Make sure you're doing right thing + * before using it alone. + */ +static inline +unsigned long __for_each_wrap(const unsigned long *bitmap, unsigned long size, + unsigned long start, unsigned long n) +{ + unsigned long bit; + + /* If not wrapped around */ + if (n > start) { + /* and have a bit, just return it. */ + bit = find_next_bit(bitmap, size, n); + if (bit < size) + return bit; + + /* Otherwise, wrap around and ... */ + n = 0; + } + + /* Search the other part. */ + bit = find_next_bit(bitmap, start, n); + return bit < start ? bit : size; +} + +/** * find_next_clump8 - find next 8-bit clump with set bits in a memory region * @clump: location to store copy of found clump * @addr: address to base the search on @@ -247,7 +449,21 @@ unsigned long find_next_zero_bit_le(const void *addr, unsigned return val == ~0UL ? size : ffz(val); } - return _find_next_bit(addr, NULL, size, offset, ~0UL, 1); + return _find_next_zero_bit_le(addr, size, offset); +} +#endif + +#ifndef find_first_zero_bit_le +static inline +unsigned long find_first_zero_bit_le(const void *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = swab(*(const unsigned long *)addr) | ~GENMASK(size - 1, 0); + + return val == ~0UL ? size : ffz(val); + } + + return _find_first_zero_bit_le(addr, size); } #endif @@ -266,40 +482,39 @@ unsigned long find_next_bit_le(const void *addr, unsigned return val ? __ffs(val) : size; } - return _find_next_bit(addr, NULL, size, offset, 0UL, 1); + return _find_next_bit_le(addr, size, offset); } #endif -#ifndef find_first_zero_bit_le -#define find_first_zero_bit_le(addr, size) \ - find_next_zero_bit_le((addr), (size), 0) -#endif - #else #error "Please fix <asm/byteorder.h>" #endif #define for_each_set_bit(bit, addr, size) \ - for ((bit) = find_next_bit((addr), (size), 0); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) + for ((bit) = 0; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++) + +#define for_each_and_bit(bit, addr1, addr2, size) \ + for ((bit) = 0; \ + (bit) = find_next_and_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ + (bit)++) + +#define for_each_andnot_bit(bit, addr1, addr2, size) \ + for ((bit) = 0; \ + (bit) = find_next_andnot_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ + (bit)++) /* same as for_each_set_bit() but use bit as value to start with */ #define for_each_set_bit_from(bit, addr, size) \ - for ((bit) = find_next_bit((addr), (size), (bit)); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) + for (; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++) #define for_each_clear_bit(bit, addr, size) \ - for ((bit) = find_next_zero_bit((addr), (size), 0); \ - (bit) < (size); \ - (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + for ((bit) = 0; \ + (bit) = find_next_zero_bit((addr), (size), (bit)), (bit) < (size); \ + (bit)++) /* same as for_each_clear_bit() but use bit as value to start with */ #define for_each_clear_bit_from(bit, addr, size) \ - for ((bit) = find_next_zero_bit((addr), (size), (bit)); \ - (bit) < (size); \ - (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + for (; (bit) = find_next_zero_bit((addr), (size), (bit)), (bit) < (size); (bit)++) /** * for_each_set_bitrange - iterate over all set bit ranges [b; e) @@ -309,11 +524,11 @@ unsigned long find_next_bit_le(const void *addr, unsigned * @size: bitmap size in number of bits */ #define for_each_set_bitrange(b, e, addr, size) \ - for ((b) = find_next_bit((addr), (size), 0), \ - (e) = find_next_zero_bit((addr), (size), (b) + 1); \ + for ((b) = 0; \ + (b) = find_next_bit((addr), (size), b), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1), \ (b) < (size); \ - (b) = find_next_bit((addr), (size), (e) + 1), \ - (e) = find_next_zero_bit((addr), (size), (b) + 1)) + (b) = (e) + 1) /** * for_each_set_bitrange_from - iterate over all set bit ranges [b; e) @@ -323,11 +538,11 @@ unsigned long find_next_bit_le(const void *addr, unsigned * @size: bitmap size in number of bits */ #define for_each_set_bitrange_from(b, e, addr, size) \ - for ((b) = find_next_bit((addr), (size), (b)), \ - (e) = find_next_zero_bit((addr), (size), (b) + 1); \ + for (; \ + (b) = find_next_bit((addr), (size), (b)), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1), \ (b) < (size); \ - (b) = find_next_bit((addr), (size), (e) + 1), \ - (e) = find_next_zero_bit((addr), (size), (b) + 1)) + (b) = (e) + 1) /** * for_each_clear_bitrange - iterate over all unset bit ranges [b; e) @@ -337,11 +552,11 @@ unsigned long find_next_bit_le(const void *addr, unsigned * @size: bitmap size in number of bits */ #define for_each_clear_bitrange(b, e, addr, size) \ - for ((b) = find_next_zero_bit((addr), (size), 0), \ - (e) = find_next_bit((addr), (size), (b) + 1); \ + for ((b) = 0; \ + (b) = find_next_zero_bit((addr), (size), (b)), \ + (e) = find_next_bit((addr), (size), (b) + 1), \ (b) < (size); \ - (b) = find_next_zero_bit((addr), (size), (e) + 1), \ - (e) = find_next_bit((addr), (size), (b) + 1)) + (b) = (e) + 1) /** * for_each_clear_bitrange_from - iterate over all unset bit ranges [b; e) @@ -351,11 +566,24 @@ unsigned long find_next_bit_le(const void *addr, unsigned * @size: bitmap size in number of bits */ #define for_each_clear_bitrange_from(b, e, addr, size) \ - for ((b) = find_next_zero_bit((addr), (size), (b)), \ - (e) = find_next_bit((addr), (size), (b) + 1); \ + for (; \ + (b) = find_next_zero_bit((addr), (size), (b)), \ + (e) = find_next_bit((addr), (size), (b) + 1), \ (b) < (size); \ - (b) = find_next_zero_bit((addr), (size), (e) + 1), \ - (e) = find_next_bit((addr), (size), (b) + 1)) + (b) = (e) + 1) + +/** + * for_each_set_bit_wrap - iterate over all set bits starting from @start, and + * wrapping around the end of bitmap. + * @bit: offset for current iteration + * @addr: bitmap address to base the search on + * @size: bitmap size in number of bits + * @start: Starting bit for bitmap traversing, wrapping around the bitmap end + */ +#define for_each_set_bit_wrap(bit, addr, size, start) \ + for ((bit) = find_next_bit_wrap((addr), (size), (start)); \ + (bit) < (size); \ + (bit) = __for_each_wrap((addr), (size), (start), (bit) + 1)) /** * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 9f50dacbf7d6..76d2b3ebad84 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -153,6 +153,9 @@ enum pm_ioctl_id { /* Runtime feature configuration */ IOCTL_SET_FEATURE_CONFIG = 26, IOCTL_GET_FEATURE_CONFIG = 27, + /* Dynamic SD/GEM configuration */ + IOCTL_SET_SD_CONFIG = 30, + IOCTL_SET_GEM_CONFIG = 31, }; enum pm_query_id { @@ -400,6 +403,30 @@ enum pm_feature_config_id { }; /** + * enum pm_sd_config_type - PM SD configuration. + * @SD_CONFIG_EMMC_SEL: To set SD_EMMC_SEL in CTRL_REG_SD and SD_SLOTTYPE + * @SD_CONFIG_BASECLK: To set SD_BASECLK in SD_CONFIG_REG1 + * @SD_CONFIG_8BIT: To set SD_8BIT in SD_CONFIG_REG2 + * @SD_CONFIG_FIXED: To set fixed config registers + */ +enum pm_sd_config_type { + SD_CONFIG_EMMC_SEL = 1, + SD_CONFIG_BASECLK = 2, + SD_CONFIG_8BIT = 3, + SD_CONFIG_FIXED = 4, +}; + +/** + * enum pm_gem_config_type - PM GEM configuration. + * @GEM_CONFIG_SGMII_MODE: To set GEM_SGMII_MODE in GEM_CLK_CTRL register + * @GEM_CONFIG_FIXED: To set fixed config registers + */ +enum pm_gem_config_type { + GEM_CONFIG_SGMII_MODE = 1, + GEM_CONFIG_FIXED = 2, +}; + +/** * struct zynqmp_pm_query_data - PM query data * @qid: query ID * @arg1: Argument 1 of query data @@ -475,6 +502,9 @@ int zynqmp_pm_is_function_supported(const u32 api_id, const u32 id); int zynqmp_pm_set_feature_config(enum pm_feature_config_id id, u32 value); int zynqmp_pm_get_feature_config(enum pm_feature_config_id id, u32 *payload); int zynqmp_pm_register_sgi(u32 sgi_num, u32 reset); +int zynqmp_pm_set_sd_config(u32 node, enum pm_sd_config_type config, u32 value); +int zynqmp_pm_set_gem_config(u32 node, enum pm_gem_config_type config, + u32 value); #else static inline int zynqmp_pm_get_api_version(u32 *version) { @@ -745,6 +775,21 @@ static inline int zynqmp_pm_register_sgi(u32 sgi_num, u32 reset) { return -ENODEV; } + +static inline int zynqmp_pm_set_sd_config(u32 node, + enum pm_sd_config_type config, + u32 value) +{ + return -ENODEV; +} + +static inline int zynqmp_pm_set_gem_config(u32 node, + enum pm_gem_config_type config, + u32 value) +{ + return -ENODEV; +} + #endif #endif /* __FIRMWARE_ZYNQMP_H__ */ diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 3b401fa0f374..4029fe368a4f 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -2,7 +2,9 @@ #ifndef _LINUX_FORTIFY_STRING_H_ #define _LINUX_FORTIFY_STRING_H_ +#include <linux/bug.h> #include <linux/const.h> +#include <linux/limits.h> #define __FORTIFY_INLINE extern __always_inline __gnu_inline __overloadable #define __RENAME(x) __asm__(#x) @@ -17,9 +19,10 @@ void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning(" #define __compiletime_strlen(p) \ ({ \ unsigned char *__p = (unsigned char *)(p); \ - size_t __ret = (size_t)-1; \ - size_t __p_size = __builtin_object_size(p, 1); \ - if (__p_size != (size_t)-1) { \ + size_t __ret = SIZE_MAX; \ + size_t __p_size = __member_size(p); \ + if (__p_size != SIZE_MAX && \ + __builtin_constant_p(*__p)) { \ size_t __p_len = __p_size - 1; \ if (__builtin_constant_p(__p[__p_len]) && \ __p[__p_len] == '\0') \ @@ -69,20 +72,59 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __underlying_memcpy(dst, src, bytes) /* - * Clang's use of __builtin_object_size() within inlines needs hinting via - * __pass_object_size(). The preference is to only ever use type 1 (member + * Clang's use of __builtin_*object_size() within inlines needs hinting via + * __pass_*object_size(). The preference is to only ever use type 1 (member * size, rather than struct size), but there remain some stragglers using * type 0 that will be converted in the future. */ -#define POS __pass_object_size(1) -#define POS0 __pass_object_size(0) +#define POS __pass_object_size(1) +#define POS0 __pass_object_size(0) +#define __struct_size(p) __builtin_object_size(p, 0) +#define __member_size(p) __builtin_object_size(p, 1) +#define __compiletime_lessthan(bounds, length) ( \ + __builtin_constant_p((bounds) < (length)) && \ + (bounds) < (length) \ +) + +/** + * strncpy - Copy a string to memory with non-guaranteed NUL padding + * + * @p: pointer to destination of copy + * @q: pointer to NUL-terminated source string to copy + * @size: bytes to write at @p + * + * If strlen(@q) >= @size, the copy of @q will stop after @size bytes, + * and @p will NOT be NUL-terminated + * + * If strlen(@q) < @size, following the copy of @q, trailing NUL bytes + * will be written to @p until @size total bytes have been written. + * + * Do not use this function. While FORTIFY_SOURCE tries to avoid + * over-reads of @q, it cannot defend against writing unterminated + * results to @p. Using strncpy() remains ambiguous and fragile. + * Instead, please choose an alternative, so that the expectation + * of @p's contents is unambiguous: + * + * +--------------------+-----------------+------------+ + * | @p needs to be: | padded to @size | not padded | + * +====================+=================+============+ + * | NUL-terminated | strscpy_pad() | strscpy() | + * +--------------------+-----------------+------------+ + * | not NUL-terminated | strtomem_pad() | strtomem() | + * +--------------------+-----------------+------------+ + * + * Note strscpy*()'s differing return values for detecting truncation, + * and strtomem*()'s expectation that the destination is marked with + * __nonstring when it is a character array. + * + */ __FORTIFY_INLINE __diagnose_as(__builtin_strncpy, 1, 2, 3) char *strncpy(char * const POS p, const char *q, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 1); + size_t p_size = __member_size(p); - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __write_overflow(); if (p_size < size) fortify_panic(__func__); @@ -92,9 +134,9 @@ char *strncpy(char * const POS p, const char *q, __kernel_size_t size) __FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2) char *strcat(char * const POS p, const char *q) { - size_t p_size = __builtin_object_size(p, 1); + size_t p_size = __member_size(p); - if (p_size == (size_t)-1) + if (p_size == SIZE_MAX) return __underlying_strcat(p, q); if (strlcat(p, q, p_size) >= p_size) fortify_panic(__func__); @@ -104,12 +146,12 @@ char *strcat(char * const POS p, const char *q) extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size_t maxlen) { - size_t p_size = __builtin_object_size(p, 1); + size_t p_size = __member_size(p); size_t p_len = __compiletime_strlen(p); size_t ret; /* We can take compile-time actions when maxlen is const. */ - if (__builtin_constant_p(maxlen) && p_len != (size_t)-1) { + if (__builtin_constant_p(maxlen) && p_len != SIZE_MAX) { /* If p is const, we can use its compile-time-known len. */ if (maxlen >= p_size) return p_len; @@ -134,10 +176,10 @@ __FORTIFY_INLINE __diagnose_as(__builtin_strlen, 1) __kernel_size_t __fortify_strlen(const char * const POS p) { __kernel_size_t ret; - size_t p_size = __builtin_object_size(p, 1); + size_t p_size = __member_size(p); /* Give up if we don't know how large p is. */ - if (p_size == (size_t)-1) + if (p_size == SIZE_MAX) return __underlying_strlen(p); ret = strnlen(p, p_size); if (p_size <= ret) @@ -149,12 +191,12 @@ __kernel_size_t __fortify_strlen(const char * const POS p) extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); __FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, size_t size) { - size_t p_size = __builtin_object_size(p, 1); - size_t q_size = __builtin_object_size(q, 1); + size_t p_size = __member_size(p); + size_t q_size = __member_size(q); size_t q_len; /* Full count of source string length. */ size_t len; /* Count of characters going into destination. */ - if (p_size == (size_t)-1 && q_size == (size_t)-1) + if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __real_strlcpy(p, q, size); q_len = strlen(q); len = (q_len >= size) ? size - 1 : q_len; @@ -178,18 +220,18 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s { size_t len; /* Use string size rather than possible enclosing struct size. */ - size_t p_size = __builtin_object_size(p, 1); - size_t q_size = __builtin_object_size(q, 1); + size_t p_size = __member_size(p); + size_t q_size = __member_size(q); /* If we cannot get size of p and q default to call strscpy. */ - if (p_size == (size_t) -1 && q_size == (size_t) -1) + if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __real_strscpy(p, q, size); /* * If size can be known at compile time and is greater than * p_size, generate a compile time write overflow error. */ - if (__builtin_constant_p(size) && size > p_size) + if (__compiletime_lessthan(p_size, size)) __write_overflow(); /* @@ -224,10 +266,10 @@ __FORTIFY_INLINE __diagnose_as(__builtin_strncat, 1, 2, 3) char *strncat(char * const POS p, const char * const POS q, __kernel_size_t count) { size_t p_len, copy_len; - size_t p_size = __builtin_object_size(p, 1); - size_t q_size = __builtin_object_size(q, 1); + size_t p_size = __member_size(p); + size_t q_size = __member_size(q); - if (p_size == (size_t)-1 && q_size == (size_t)-1) + if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __underlying_strncat(p, q, count); p_len = strlen(p); copy_len = strnlen(q, count); @@ -246,15 +288,16 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, /* * Length argument is a constant expression, so we * can perform compile-time bounds checking where - * buffer sizes are known. + * buffer sizes are also known at compile time. */ /* Error when size is larger than enclosing struct. */ - if (p_size > p_size_field && p_size < size) + if (__compiletime_lessthan(p_size_field, p_size) && + __compiletime_lessthan(p_size, size)) __write_overflow(); /* Warn when write size is larger than dest field. */ - if (p_size_field < size) + if (__compiletime_lessthan(p_size_field, size)) __write_overflow_field(p_size_field, size); } /* @@ -268,10 +311,10 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, /* * Always stop accesses beyond the struct that contains the * field, when the buffer's remaining size is known. - * (The -1 test is to optimize away checks where the buffer + * (The SIZE_MAX test is to optimize away checks where the buffer * lengths are unknown.) */ - if (p_size != (size_t)(-1) && p_size < size) + if (p_size != SIZE_MAX && p_size < size) fortify_panic("memset"); } @@ -282,11 +325,13 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, }) /* - * __builtin_object_size() must be captured here to avoid evaluating argument - * side-effects further into the macro layers. + * __struct_size() vs __member_size() must be captured here to avoid + * evaluating argument side-effects further into the macro layers. */ +#ifndef CONFIG_KMSAN #define memset(p, c, s) __fortify_memset_chk(p, c, s, \ - __builtin_object_size(p, 0), __builtin_object_size(p, 1)) + __struct_size(p), __member_size(p)) +#endif /* * To make sure the compiler can enforce protection against buffer overflows, @@ -319,7 +364,7 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, * V = vulnerable to run-time overflow (will need refactoring to solve) * */ -__FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size, +__FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, const size_t p_size, const size_t q_size, const size_t p_size_field, @@ -330,25 +375,28 @@ __FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size, /* * Length argument is a constant expression, so we * can perform compile-time bounds checking where - * buffer sizes are known. + * buffer sizes are also known at compile time. */ /* Error when size is larger than enclosing struct. */ - if (p_size > p_size_field && p_size < size) + if (__compiletime_lessthan(p_size_field, p_size) && + __compiletime_lessthan(p_size, size)) __write_overflow(); - if (q_size > q_size_field && q_size < size) + if (__compiletime_lessthan(q_size_field, q_size) && + __compiletime_lessthan(q_size, size)) __read_overflow2(); /* Warn when write size argument larger than dest field. */ - if (p_size_field < size) + if (__compiletime_lessthan(p_size_field, size)) __write_overflow_field(p_size_field, size); /* * Warn for source field over-read when building with W=1 * or when an over-write happened, so both can be fixed at * the same time. */ - if ((IS_ENABLED(KBUILD_EXTRA_WARN1) || p_size_field < size) && - q_size_field < size) + if ((IS_ENABLED(KBUILD_EXTRA_WARN1) || + __compiletime_lessthan(p_size_field, size)) && + __compiletime_lessthan(q_size_field, size)) __read_overflow2_field(q_size_field, size); } /* @@ -362,41 +410,104 @@ __FORTIFY_INLINE void fortify_memcpy_chk(__kernel_size_t size, /* * Always stop accesses beyond the struct that contains the * field, when the buffer's remaining size is known. - * (The -1 test is to optimize away checks where the buffer + * (The SIZE_MAX test is to optimize away checks where the buffer * lengths are unknown.) */ - if ((p_size != (size_t)(-1) && p_size < size) || - (q_size != (size_t)(-1) && q_size < size)) + if ((p_size != SIZE_MAX && p_size < size) || + (q_size != SIZE_MAX && q_size < size)) fortify_panic(func); + + /* + * Warn when writing beyond destination field size. + * + * We must ignore p_size_field == 0 for existing 0-element + * fake flexible arrays, until they are all converted to + * proper flexible arrays. + * + * The implementation of __builtin_*object_size() behaves + * like sizeof() when not directly referencing a flexible + * array member, which means there will be many bounds checks + * that will appear at run-time, without a way for them to be + * detected at compile-time (as can be done when the destination + * is specifically the flexible array member). + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101832 + */ + if (p_size_field != 0 && p_size_field != SIZE_MAX && + p_size != p_size_field && p_size_field < size) + return true; + + return false; } #define __fortify_memcpy_chk(p, q, size, p_size, q_size, \ p_size_field, q_size_field, op) ({ \ size_t __fortify_size = (size_t)(size); \ - fortify_memcpy_chk(__fortify_size, p_size, q_size, \ - p_size_field, q_size_field, #op); \ + WARN_ONCE(fortify_memcpy_chk(__fortify_size, p_size, q_size, \ + p_size_field, q_size_field, #op), \ + #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \ + __fortify_size, \ + "field \"" #p "\" at " __FILE__ ":" __stringify(__LINE__), \ + p_size_field); \ __underlying_##op(p, q, __fortify_size); \ }) /* - * __builtin_object_size() must be captured here to avoid evaluating argument - * side-effects further into the macro layers. + * Notes about compile-time buffer size detection: + * + * With these types... + * + * struct middle { + * u16 a; + * u8 middle_buf[16]; + * int b; + * }; + * struct end { + * u16 a; + * u8 end_buf[16]; + * }; + * struct flex { + * int a; + * u8 flex_buf[]; + * }; + * + * void func(TYPE *ptr) { ... } + * + * Cases where destination size cannot be currently detected: + * - the size of ptr's object (seemingly by design, gcc & clang fail): + * __builtin_object_size(ptr, 1) == SIZE_MAX + * - the size of flexible arrays in ptr's obj (by design, dynamic size): + * __builtin_object_size(ptr->flex_buf, 1) == SIZE_MAX + * - the size of ANY array at the end of ptr's obj (gcc and clang bug): + * __builtin_object_size(ptr->end_buf, 1) == SIZE_MAX + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101836 + * + * Cases where destination size is currently detected: + * - the size of non-array members within ptr's object: + * __builtin_object_size(ptr->a, 1) == 2 + * - the size of non-flexible-array in the middle of ptr's obj: + * __builtin_object_size(ptr->middle_buf, 1) == 16 + * + */ + +/* + * __struct_size() vs __member_size() must be captured here to avoid + * evaluating argument side-effects further into the macro layers. */ #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ - __builtin_object_size(p, 0), __builtin_object_size(q, 0), \ - __builtin_object_size(p, 1), __builtin_object_size(q, 1), \ + __struct_size(p), __struct_size(q), \ + __member_size(p), __member_size(q), \ memcpy) #define memmove(p, q, s) __fortify_memcpy_chk(p, q, s, \ - __builtin_object_size(p, 0), __builtin_object_size(q, 0), \ - __builtin_object_size(p, 1), __builtin_object_size(q, 1), \ + __struct_size(p), __struct_size(q), \ + __member_size(p), __member_size(q), \ memmove) extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan); __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __struct_size(p); - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(__func__); @@ -406,13 +517,13 @@ __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size) __FORTIFY_INLINE __diagnose_as(__builtin_memcmp, 1, 2, 3) int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); + size_t p_size = __struct_size(p); + size_t q_size = __struct_size(q); if (__builtin_constant_p(size)) { - if (p_size < size) + if (__compiletime_lessthan(p_size, size)) __read_overflow(); - if (q_size < size) + if (__compiletime_lessthan(q_size, size)) __read_overflow2(); } if (p_size < size || q_size < size) @@ -423,9 +534,9 @@ int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t __FORTIFY_INLINE __diagnose_as(__builtin_memchr, 1, 2, 3) void *memchr(const void * const POS0 p, int c, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __struct_size(p); - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(__func__); @@ -435,9 +546,9 @@ void *memchr(const void * const POS0 p, int c, __kernel_size_t size) void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv); __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __struct_size(p); - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(__func__); @@ -447,9 +558,9 @@ __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size) extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup); __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __struct_size(p); - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) fortify_panic(__func__); @@ -460,16 +571,18 @@ __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp __FORTIFY_INLINE __diagnose_as(__builtin_strcpy, 1, 2) char *strcpy(char * const POS p, const char * const POS q) { - size_t p_size = __builtin_object_size(p, 1); - size_t q_size = __builtin_object_size(q, 1); + size_t p_size = __member_size(p); + size_t q_size = __member_size(q); size_t size; /* If neither buffer size is known, immediately give up. */ - if (p_size == (size_t)-1 && q_size == (size_t)-1) + if (__builtin_constant_p(p_size) && + __builtin_constant_p(q_size) && + p_size == SIZE_MAX && q_size == SIZE_MAX) return __underlying_strcpy(p, q); size = strlen(q) + 1; /* Compile-time check for const size overflow. */ - if (__builtin_constant_p(size) && p_size < size) + if (__compiletime_lessthan(p_size, size)) __write_overflow(); /* Run-time check for dynamic size overflow. */ if (p_size < size) diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 0621c5f86c39..b303472255be 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -8,9 +8,11 @@ #include <linux/sched.h> #include <linux/wait.h> #include <linux/atomic.h> +#include <linux/jump_label.h> #ifdef CONFIG_FREEZER -extern atomic_t system_freezing_cnt; /* nr of freezing conds in effect */ +DECLARE_STATIC_KEY_FALSE(freezer_active); + extern bool pm_freezing; /* PM freezing in effect */ extern bool pm_nosig_freezing; /* PM nosig freezing in effect */ @@ -22,10 +24,7 @@ extern unsigned int freeze_timeout_msecs; /* * Check if a process has been frozen */ -static inline bool frozen(struct task_struct *p) -{ - return p->flags & PF_FROZEN; -} +extern bool frozen(struct task_struct *p); extern bool freezing_slow_path(struct task_struct *p); @@ -34,9 +33,10 @@ extern bool freezing_slow_path(struct task_struct *p); */ static inline bool freezing(struct task_struct *p) { - if (likely(!atomic_read(&system_freezing_cnt))) - return false; - return freezing_slow_path(p); + if (static_branch_unlikely(&freezer_active)) + return freezing_slow_path(p); + + return false; } /* Takes and releases task alloc lock using task_lock() */ @@ -48,23 +48,14 @@ extern int freeze_kernel_threads(void); extern void thaw_processes(void); extern void thaw_kernel_threads(void); -/* - * DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION - * If try_to_freeze causes a lockdep warning it means the caller may deadlock - */ -static inline bool try_to_freeze_unsafe(void) +static inline bool try_to_freeze(void) { might_sleep(); if (likely(!freezing(current))) return false; - return __refrigerator(false); -} - -static inline bool try_to_freeze(void) -{ if (!(current->flags & PF_NOFREEZE)) debug_check_no_locks_held(); - return try_to_freeze_unsafe(); + return __refrigerator(false); } extern bool freeze_task(struct task_struct *p); @@ -79,195 +70,6 @@ static inline bool cgroup_freezing(struct task_struct *task) } #endif /* !CONFIG_CGROUP_FREEZER */ -/* - * The PF_FREEZER_SKIP flag should be set by a vfork parent right before it - * calls wait_for_completion(&vfork) and reset right after it returns from this - * function. Next, the parent should call try_to_freeze() to freeze itself - * appropriately in case the child has exited before the freezing of tasks is - * complete. However, we don't want kernel threads to be frozen in unexpected - * places, so we allow them to block freeze_processes() instead or to set - * PF_NOFREEZE if needed. Fortunately, in the ____call_usermodehelper() case the - * parent won't really block freeze_processes(), since ____call_usermodehelper() - * (the child) does a little before exec/exit and it can't be frozen before - * waking up the parent. - */ - - -/** - * freezer_do_not_count - tell freezer to ignore %current - * - * Tell freezers to ignore the current task when determining whether the - * target frozen state is reached. IOW, the current task will be - * considered frozen enough by freezers. - * - * The caller shouldn't do anything which isn't allowed for a frozen task - * until freezer_cont() is called. Usually, freezer[_do_not]_count() pair - * wrap a scheduling operation and nothing much else. - */ -static inline void freezer_do_not_count(void) -{ - current->flags |= PF_FREEZER_SKIP; -} - -/** - * freezer_count - tell freezer to stop ignoring %current - * - * Undo freezer_do_not_count(). It tells freezers that %current should be - * considered again and tries to freeze if freezing condition is already in - * effect. - */ -static inline void freezer_count(void) -{ - current->flags &= ~PF_FREEZER_SKIP; - /* - * If freezing is in progress, the following paired with smp_mb() - * in freezer_should_skip() ensures that either we see %true - * freezing() or freezer_should_skip() sees !PF_FREEZER_SKIP. - */ - smp_mb(); - try_to_freeze(); -} - -/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ -static inline void freezer_count_unsafe(void) -{ - current->flags &= ~PF_FREEZER_SKIP; - smp_mb(); - try_to_freeze_unsafe(); -} - -/** - * freezer_should_skip - whether to skip a task when determining frozen - * state is reached - * @p: task in quesion - * - * This function is used by freezers after establishing %true freezing() to - * test whether a task should be skipped when determining the target frozen - * state is reached. IOW, if this function returns %true, @p is considered - * frozen enough. - */ -static inline bool freezer_should_skip(struct task_struct *p) -{ - /* - * The following smp_mb() paired with the one in freezer_count() - * ensures that either freezer_count() sees %true freezing() or we - * see cleared %PF_FREEZER_SKIP and return %false. This makes it - * impossible for a task to slip frozen state testing after - * clearing %PF_FREEZER_SKIP. - */ - smp_mb(); - return p->flags & PF_FREEZER_SKIP; -} - -/* - * These functions are intended to be used whenever you want allow a sleeping - * task to be frozen. Note that neither return any clear indication of - * whether a freeze event happened while in this function. - */ - -/* Like schedule(), but should not block the freezer. */ -static inline void freezable_schedule(void) -{ - freezer_do_not_count(); - schedule(); - freezer_count(); -} - -/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ -static inline void freezable_schedule_unsafe(void) -{ - freezer_do_not_count(); - schedule(); - freezer_count_unsafe(); -} - -/* - * Like schedule_timeout(), but should not block the freezer. Do not - * call this with locks held. - */ -static inline long freezable_schedule_timeout(long timeout) -{ - long __retval; - freezer_do_not_count(); - __retval = schedule_timeout(timeout); - freezer_count(); - return __retval; -} - -/* - * Like schedule_timeout_interruptible(), but should not block the freezer. Do not - * call this with locks held. - */ -static inline long freezable_schedule_timeout_interruptible(long timeout) -{ - long __retval; - freezer_do_not_count(); - __retval = schedule_timeout_interruptible(timeout); - freezer_count(); - return __retval; -} - -/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ -static inline long freezable_schedule_timeout_interruptible_unsafe(long timeout) -{ - long __retval; - - freezer_do_not_count(); - __retval = schedule_timeout_interruptible(timeout); - freezer_count_unsafe(); - return __retval; -} - -/* Like schedule_timeout_killable(), but should not block the freezer. */ -static inline long freezable_schedule_timeout_killable(long timeout) -{ - long __retval; - freezer_do_not_count(); - __retval = schedule_timeout_killable(timeout); - freezer_count(); - return __retval; -} - -/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ -static inline long freezable_schedule_timeout_killable_unsafe(long timeout) -{ - long __retval; - freezer_do_not_count(); - __retval = schedule_timeout_killable(timeout); - freezer_count_unsafe(); - return __retval; -} - -/* - * Like schedule_hrtimeout_range(), but should not block the freezer. Do not - * call this with locks held. - */ -static inline int freezable_schedule_hrtimeout_range(ktime_t *expires, - u64 delta, const enum hrtimer_mode mode) -{ - int __retval; - freezer_do_not_count(); - __retval = schedule_hrtimeout_range(expires, delta, mode); - freezer_count(); - return __retval; -} - -/* - * Freezer-friendly wrappers around wait_event_interruptible(), - * wait_event_killable() and wait_event_interruptible_timeout(), originally - * defined in <linux/wait.h> - */ - -/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ -#define wait_event_freezekillable_unsafe(wq, condition) \ -({ \ - int __retval; \ - freezer_do_not_count(); \ - __retval = wait_event_killable(wq, (condition)); \ - freezer_count_unsafe(); \ - __retval; \ -}) - #else /* !CONFIG_FREEZER */ static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } @@ -281,35 +83,8 @@ static inline void thaw_kernel_threads(void) {} static inline bool try_to_freeze(void) { return false; } -static inline void freezer_do_not_count(void) {} -static inline void freezer_count(void) {} -static inline int freezer_should_skip(struct task_struct *p) { return 0; } static inline void set_freezable(void) {} -#define freezable_schedule() schedule() - -#define freezable_schedule_unsafe() schedule() - -#define freezable_schedule_timeout(timeout) schedule_timeout(timeout) - -#define freezable_schedule_timeout_interruptible(timeout) \ - schedule_timeout_interruptible(timeout) - -#define freezable_schedule_timeout_interruptible_unsafe(timeout) \ - schedule_timeout_interruptible(timeout) - -#define freezable_schedule_timeout_killable(timeout) \ - schedule_timeout_killable(timeout) - -#define freezable_schedule_timeout_killable_unsafe(timeout) \ - schedule_timeout_killable(timeout) - -#define freezable_schedule_hrtimeout_range(expires, delta, mode) \ - schedule_hrtimeout_range(expires, delta, mode) - -#define wait_event_freezekillable_unsafe(wq, condition) \ - wait_event_killable(wq, condition) - #endif /* !CONFIG_FREEZER */ #endif /* FREEZER_H_INCLUDED */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 9eced4cc286e..e654435f1651 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1472,7 +1472,7 @@ struct super_block { const struct xattr_handler **s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; - struct key *s_master_keys; /* master crypto keys in use */ + struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */ #endif #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; @@ -2004,8 +2004,9 @@ static inline int vfs_whiteout(struct user_namespace *mnt_userns, WHITEOUT_DEV); } -struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns, - struct dentry *dentry, umode_t mode, int open_flag); +struct file *vfs_tmpfile_open(struct user_namespace *mnt_userns, + const struct path *parentpath, + umode_t mode, int open_flag, const struct cred *cred); int vfs_mkobj(struct dentry *, umode_t, int (*f)(struct dentry *, umode_t, void *), @@ -2038,9 +2039,10 @@ umode_t mode_strip_sgid(struct user_namespace *mnt_userns, * the kernel specify what kind of dirent layout it wants to have. * This allows the kernel to read directories into kernel space or * to have different dirent layouts depending on the binary type. + * Return 'true' to keep going and 'false' if there are no more entries. */ struct dir_context; -typedef int (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, +typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, unsigned); struct dir_context { @@ -2132,6 +2134,8 @@ struct file_operations { loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); + int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *, + unsigned int poll_flags); } __randomize_layout; struct inode_operations { @@ -2167,7 +2171,7 @@ struct inode_operations { struct file *, unsigned open_flag, umode_t create_mode); int (*tmpfile) (struct user_namespace *, struct inode *, - struct dentry *, umode_t); + struct file *, umode_t); int (*set_acl)(struct user_namespace *, struct inode *, struct posix_acl *, int); int (*fileattr_set)(struct user_namespace *mnt_userns, @@ -2371,13 +2375,14 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * don't have to write inode on fdatasync() when only * e.g. the timestamps have changed. * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. - * I_DIRTY_TIME The inode itself only has dirty timestamps, and the + * I_DIRTY_TIME The inode itself has dirty timestamps, and the * lazytime mount option is enabled. We keep track of this * separately from I_DIRTY_SYNC in order to implement * lazytime. This gets cleared if I_DIRTY_INODE - * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. I.e. - * either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in - * i_state, but not both. I_DIRTY_PAGES may still be set. + * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But + * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already + * in place because writeback might already be in progress + * and we don't want to lose the time update * I_NEW Serves as both a mutex and completion notification. * New inodes set I_NEW. If two processes both create * the same inode, one of them will release its inode and @@ -2779,6 +2784,15 @@ extern int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *)); extern int finish_no_open(struct file *file, struct dentry *dentry); +/* Helper for the simple case when original dentry is used */ +static inline int finish_open_simple(struct file *file, int error) +{ + if (error) + return error; + + return finish_open(file, file->f_path.dentry, NULL); +} + /* fs/dcache.c */ extern void __init vfs_caches_init_early(void); extern void __init vfs_caches_init(void); @@ -3540,17 +3554,17 @@ static inline bool dir_emit(struct dir_context *ctx, const char *name, int namelen, u64 ino, unsigned type) { - return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0; + return ctx->actor(ctx, name, namelen, ctx->pos, ino, type); } static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx) { return ctx->actor(ctx, ".", 1, ctx->pos, - file->f_path.dentry->d_inode->i_ino, DT_DIR) == 0; + file->f_path.dentry->d_inode->i_ino, DT_DIR); } static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx) { return ctx->actor(ctx, "..", 2, ctx->pos, - parent_ino(file->f_path.dentry), DT_DIR) == 0; + parent_ino(file->f_path.dentry), DT_DIR); } static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx) { diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 7d2f1e0f23b1..cad78b569c7e 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -161,24 +161,21 @@ struct fscrypt_operations { int *ino_bits_ret, int *lblk_bits_ret); /* - * Return the number of block devices to which the filesystem may write - * encrypted file contents. + * Return an array of pointers to the block devices to which the + * filesystem may write encrypted file contents, NULL if the filesystem + * only has a single such block device, or an ERR_PTR() on error. + * + * On successful non-NULL return, *num_devs is set to the number of + * devices in the returned array. The caller must free the returned + * array using kfree(). * * If the filesystem can use multiple block devices (other than block * devices that aren't used for encrypted file contents, such as * external journal devices), and wants to support inline encryption, * then it must implement this function. Otherwise it's not needed. */ - int (*get_num_devices)(struct super_block *sb); - - /* - * If ->get_num_devices() returns a value greater than 1, then this - * function is called to get the array of request_queues that the - * filesystem is using -- one per block device. (There may be duplicate - * entries in this array, as block devices can share a request_queue.) - */ - void (*get_devices)(struct super_block *sb, - struct request_queue **devs); + struct block_device **(*get_devices)(struct super_block *sb, + unsigned int *num_devs); }; static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) @@ -295,8 +292,6 @@ int fscrypt_parse_test_dummy_encryption(const struct fs_parameter *param, struct fscrypt_dummy_policy *dummy_policy); bool fscrypt_dummy_policies_equal(const struct fscrypt_dummy_policy *p1, const struct fscrypt_dummy_policy *p2); -int fscrypt_set_test_dummy_encryption(struct super_block *sb, const char *arg, - struct fscrypt_dummy_policy *dummy_policy); void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep, struct super_block *sb); static inline bool @@ -312,7 +307,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) } /* keyring.c */ -void fscrypt_sb_free(struct super_block *sb); +void fscrypt_sb_delete(struct super_block *sb); int fscrypt_ioctl_add_key(struct file *filp, void __user *arg); int fscrypt_add_test_dummy_key(struct super_block *sb, const struct fscrypt_dummy_policy *dummy_policy); @@ -353,7 +348,7 @@ u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); /* bio.c */ -void fscrypt_decrypt_bio(struct bio *bio); +bool fscrypt_decrypt_bio(struct bio *bio); int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len); @@ -526,7 +521,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) } /* keyring.c */ -static inline void fscrypt_sb_free(struct super_block *sb) +static inline void fscrypt_sb_delete(struct super_block *sb) { } @@ -646,8 +641,9 @@ static inline int fscrypt_d_revalidate(struct dentry *dentry, } /* bio.c */ -static inline void fscrypt_decrypt_bio(struct bio *bio) +static inline bool fscrypt_decrypt_bio(struct bio *bio) { + return true; } static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, @@ -768,7 +764,7 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, bool fscrypt_mergeable_bio_bh(struct bio *bio, const struct buffer_head *next_bh); -bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter); +bool fscrypt_dio_supported(struct inode *inode); u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks); @@ -801,11 +797,8 @@ static inline bool fscrypt_mergeable_bio_bh(struct bio *bio, return true; } -static inline bool fscrypt_dio_supported(struct kiocb *iocb, - struct iov_iter *iter) +static inline bool fscrypt_dio_supported(struct inode *inode) { - const struct inode *inode = file_inode(iocb->ki_filp); - return !fscrypt_needs_contents_encryption(inode); } diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 7af030fa3c36..40f14e5fed9d 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -22,6 +22,9 @@ */ #define FS_VERITY_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE +/* Arbitrary limit to bound the kmalloc() size. Can be changed. */ +#define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384 + /* Verity operations for filesystems */ struct fsverity_operations { diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0b61371e287b..62557d4bffc2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1122,47 +1122,6 @@ static inline void unpause_graph_tracing(void) { } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ #ifdef CONFIG_TRACING - -/* flags for current->trace */ -enum { - TSK_TRACE_FL_TRACE_BIT = 0, - TSK_TRACE_FL_GRAPH_BIT = 1, -}; -enum { - TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT, - TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT, -}; - -static inline void set_tsk_trace_trace(struct task_struct *tsk) -{ - set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace); -} - -static inline void clear_tsk_trace_trace(struct task_struct *tsk) -{ - clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace); -} - -static inline int test_tsk_trace_trace(struct task_struct *tsk) -{ - return tsk->trace & TSK_TRACE_FL_TRACE; -} - -static inline void set_tsk_trace_graph(struct task_struct *tsk) -{ - set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace); -} - -static inline void clear_tsk_trace_graph(struct task_struct *tsk) -{ - clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace); -} - -static inline int test_tsk_trace_graph(struct task_struct *tsk) -{ - return tsk->trace & TSK_TRACE_FL_GRAPH; -} - enum ftrace_dump_mode; extern enum ftrace_dump_mode ftrace_dump_on_oops; diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 69081d899492..8c2f00018e89 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -110,7 +110,7 @@ static inline void gameport_free_port(struct gameport *gameport) static inline void gameport_set_name(struct gameport *gameport, const char *name) { - strlcpy(gameport->name, name, sizeof(gameport->name)); + strscpy(gameport->name, name, sizeof(gameport->name)); } /* diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 939b1a8f571b..4a4b387181ad 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -294,6 +294,7 @@ static struct genl_family ZZZ_genl_family __ro_after_init = { .ops = ZZZ_genl_ops, .n_ops = ARRAY_SIZE(ZZZ_genl_ops), .mcgrps = ZZZ_genl_mcgrps, + .resv_start_op = 42, /* drbd is currently the only user */ .n_mcgrps = ARRAY_SIZE(ZZZ_genl_mcgrps), .module = THIS_MODULE, }; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index f314be58fa77..ef4aea3b356e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -18,6 +18,9 @@ static inline int gfp_migratetype(const gfp_t gfp_flags) VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE); BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE); + BUILD_BUG_ON((___GFP_RECLAIMABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_RECLAIMABLE); + BUILD_BUG_ON(((___GFP_MOVABLE | ___GFP_RECLAIMABLE) >> + GFP_MOVABLE_SHIFT) != MIGRATE_HIGHATOMIC); if (unlikely(page_group_by_mobility_disabled)) return MIGRATE_UNMOVABLE; @@ -33,29 +36,6 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) return !!(gfp_flags & __GFP_DIRECT_RECLAIM); } -/** - * gfpflags_normal_context - is gfp_flags a normal sleepable context? - * @gfp_flags: gfp_flags to test - * - * Test whether @gfp_flags indicates that the allocation is from the - * %current context and allowed to sleep. - * - * An allocation being allowed to block doesn't mean it owns the %current - * context. When direct reclaim path tries to allocate memory, the - * allocation context is nested inside whatever %current was doing at the - * time of the original allocation. The nested allocation may be allowed - * to block but modifying anything %current owns can corrupt the outer - * context's expectations. - * - * %true result from this function indicates that the allocation context - * can sleep and use anything that's associated with %current. - */ -static inline bool gfpflags_normal_context(const gfp_t gfp_flags) -{ - return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) == - __GFP_DIRECT_RECLAIM; -} - #ifdef CONFIG_HIGHMEM #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM #else diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index fe0f460d9a3b..36460ced060b 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -174,10 +174,6 @@ int desc_to_gpio(const struct gpio_desc *desc); /* Child properties interface */ struct fwnode_handle; -struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, - const char *propname, int index, - enum gpiod_flags dflags, - const char *label); struct gpio_desc *fwnode_gpiod_get_index(struct fwnode_handle *fwnode, const char *con_id, int index, enum gpiod_flags flags, @@ -554,15 +550,6 @@ static inline int desc_to_gpio(const struct gpio_desc *desc) struct fwnode_handle; static inline -struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, - const char *propname, int index, - enum gpiod_flags dflags, - const char *label) -{ - return ERR_PTR(-ENOSYS); -} - -static inline struct gpio_desc *fwnode_gpiod_get_index(struct fwnode_handle *fwnode, const char *con_id, int index, enum gpiod_flags flags, diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index c8ec982ff498..2f4dcc8d060e 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -336,7 +336,12 @@ ssize_t hdmi_audio_infoframe_pack(struct hdmi_audio_infoframe *frame, void *buffer, size_t size); ssize_t hdmi_audio_infoframe_pack_only(const struct hdmi_audio_infoframe *frame, void *buffer, size_t size); -int hdmi_audio_infoframe_check(struct hdmi_audio_infoframe *frame); +int hdmi_audio_infoframe_check(const struct hdmi_audio_infoframe *frame); + +struct dp_sdp; +ssize_t +hdmi_audio_infoframe_pack_for_dp(const struct hdmi_audio_infoframe *frame, + struct dp_sdp *sdp, u8 dp_version); enum hdmi_3d_structure { HDMI_3D_STRUCTURE_INVALID = -1, diff --git a/include/linux/hid.h b/include/linux/hid.h index 4363a63b9775..8677ae38599e 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -314,15 +314,6 @@ struct hid_item { #define HID_BAT_ABSOLUTESTATEOFCHARGE 0x00850065 #define HID_VD_ASUS_CUSTOM_MEDIA_KEYS 0xff310076 -/* - * HID report types --- Ouch! HID spec says 1 2 3! - */ - -#define HID_INPUT_REPORT 0 -#define HID_OUTPUT_REPORT 1 -#define HID_FEATURE_REPORT 2 - -#define HID_REPORT_TYPES 3 /* * HID connect requests @@ -509,7 +500,7 @@ struct hid_report { struct list_head hidinput_list; struct list_head field_entry_list; /* ordered list of input fields */ unsigned int id; /* id of this report */ - unsigned int type; /* report type */ + enum hid_report_type type; /* report type */ unsigned int application; /* application usage for this report */ struct hid_field *field[HID_MAX_FIELDS]; /* fields of the report */ struct hid_field_entry *field_entries; /* allocated memory of input field_entry */ @@ -658,6 +649,8 @@ struct hid_device { /* device report descriptor */ struct list_head debug_list; spinlock_t debug_list_lock; wait_queue_head_t debug_wait; + + unsigned int id; /* system unique id */ }; #define to_hid_device(pdev) \ @@ -924,20 +917,21 @@ extern int hidinput_connect(struct hid_device *hid, unsigned int force); extern void hidinput_disconnect(struct hid_device *); int hid_set_field(struct hid_field *, unsigned, __s32); -int hid_input_report(struct hid_device *, int type, u8 *, u32, int); +int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, + int interrupt); struct hid_field *hidinput_get_led_field(struct hid_device *hid); unsigned int hidinput_count_leds(struct hid_device *hid); __s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code); void hid_output_report(struct hid_report *report, __u8 *data); -int __hid_request(struct hid_device *hid, struct hid_report *rep, int reqtype); +int __hid_request(struct hid_device *hid, struct hid_report *rep, enum hid_class_request reqtype); u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags); struct hid_device *hid_allocate_device(void); struct hid_report *hid_register_report(struct hid_device *device, - unsigned int type, unsigned int id, + enum hid_report_type type, unsigned int id, unsigned int application); int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size); struct hid_report *hid_validate_values(struct hid_device *hid, - unsigned int type, unsigned int id, + enum hid_report_type type, unsigned int id, unsigned int field_index, unsigned int report_counts); @@ -1106,10 +1100,11 @@ void hid_hw_stop(struct hid_device *hdev); int __must_check hid_hw_open(struct hid_device *hdev); void hid_hw_close(struct hid_device *hdev); void hid_hw_request(struct hid_device *hdev, - struct hid_report *report, int reqtype); + struct hid_report *report, enum hid_class_request reqtype); int hid_hw_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, - size_t len, unsigned char rtype, int reqtype); + size_t len, enum hid_report_type rtype, + enum hid_class_request reqtype); int hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len); /** @@ -1137,7 +1132,7 @@ static inline int hid_hw_power(struct hid_device *hdev, int level) * @reqtype: hid request type */ static inline int hid_hw_idle(struct hid_device *hdev, int report, int idle, - int reqtype) + enum hid_class_request reqtype) { if (hdev->ll_driver->idle) return hdev->ll_driver->idle(hdev, report, idle, reqtype); @@ -1182,8 +1177,8 @@ static inline u32 hid_report_len(struct hid_report *report) return DIV_ROUND_UP(report->size, 8) + (report->id > 0); } -int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, - int interrupt); +int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, + int interrupt); /* HID quirks API */ unsigned long hid_lookup_quirk(const struct hid_device *hdev); diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 25679035ca28..e9912da5441b 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -6,6 +6,7 @@ #include <linux/kernel.h> #include <linux/bug.h> #include <linux/cacheflush.h> +#include <linux/kmsan.h> #include <linux/mm.h> #include <linux/uaccess.h> #include <linux/hardirq.h> @@ -311,6 +312,7 @@ static inline void copy_user_highpage(struct page *to, struct page *from, vfrom = kmap_local_page(from); vto = kmap_local_page(to); copy_user_page(vto, vfrom, vaddr, to); + kmsan_unpoison_memory(page_address(to), PAGE_SIZE); kunmap_local(vto); kunmap_local(vfrom); } @@ -326,6 +328,7 @@ static inline void copy_highpage(struct page *to, struct page *from) vfrom = kmap_local_page(from); vto = kmap_local_page(to); copy_page(vto, vfrom); + kmsan_copy_page_meta(to, from); kunmap_local(vto); kunmap_local(vfrom); } diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 116e8bd68c99..e230c7c46110 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -87,29 +87,6 @@ #define PEH_AXUSER_CFG 0x401001 #define PEH_AXUSER_CFG_ENABLE 0xffffffff -#define QM_AXI_RRESP BIT(0) -#define QM_AXI_BRESP BIT(1) -#define QM_ECC_MBIT BIT(2) -#define QM_ECC_1BIT BIT(3) -#define QM_ACC_GET_TASK_TIMEOUT BIT(4) -#define QM_ACC_DO_TASK_TIMEOUT BIT(5) -#define QM_ACC_WB_NOT_READY_TIMEOUT BIT(6) -#define QM_SQ_CQ_VF_INVALID BIT(7) -#define QM_CQ_VF_INVALID BIT(8) -#define QM_SQ_VF_INVALID BIT(9) -#define QM_DB_TIMEOUT BIT(10) -#define QM_OF_FIFO_OF BIT(11) -#define QM_DB_RANDOM_INVALID BIT(12) -#define QM_MAILBOX_TIMEOUT BIT(13) -#define QM_FLR_TIMEOUT BIT(14) - -#define QM_BASE_NFE (QM_AXI_RRESP | QM_AXI_BRESP | QM_ECC_MBIT | \ - QM_ACC_GET_TASK_TIMEOUT | QM_DB_TIMEOUT | \ - QM_OF_FIFO_OF | QM_DB_RANDOM_INVALID | \ - QM_MAILBOX_TIMEOUT | QM_FLR_TIMEOUT) -#define QM_BASE_CE QM_ECC_1BIT - -#define QM_Q_DEPTH 1024 #define QM_MIN_QNUM 2 #define HISI_ACC_SGL_SGE_NR_MAX 255 #define QM_SHAPER_CFG 0x100164 @@ -168,6 +145,15 @@ enum qm_vf_state { QM_NOT_READY, }; +enum qm_cap_bits { + QM_SUPPORT_DB_ISOLATION = 0x0, + QM_SUPPORT_FUNC_QOS, + QM_SUPPORT_STOP_QP, + QM_SUPPORT_MB_COMMAND, + QM_SUPPORT_SVA_PREFETCH, + QM_SUPPORT_RPM, +}; + struct dfx_diff_registers { u32 *regs; u32 reg_offset; @@ -232,7 +218,10 @@ struct hisi_qm_err_info { char *acpi_rst; u32 msi_wr_port; u32 ecc_2bits_mask; - u32 dev_ce_mask; + u32 qm_shutdown_mask; + u32 dev_shutdown_mask; + u32 qm_reset_mask; + u32 dev_reset_mask; u32 ce; u32 nfe; u32 fe; @@ -258,6 +247,18 @@ struct hisi_qm_err_ini { void (*err_info_init)(struct hisi_qm *qm); }; +struct hisi_qm_cap_info { + u32 type; + /* Register offset */ + u32 offset; + /* Bit offset in register */ + u32 shift; + u32 mask; + u32 v1_val; + u32 v2_val; + u32 v3_val; +}; + struct hisi_qm_list { struct mutex lock; struct list_head list; @@ -278,6 +279,9 @@ struct hisi_qm { struct pci_dev *pdev; void __iomem *io_base; void __iomem *db_io_base; + + /* Capbility version, 0: not supports */ + u32 cap_ver; u32 sqe_size; u32 qp_base; u32 qp_num; @@ -286,6 +290,8 @@ struct hisi_qm { u32 max_qp_num; u32 vfs_num; u32 db_interval; + u16 eq_depth; + u16 aeq_depth; struct list_head list; struct hisi_qm_list *qm_list; @@ -304,6 +310,8 @@ struct hisi_qm { struct hisi_qm_err_info err_info; struct hisi_qm_err_status err_status; unsigned long misc_ctl; /* driver removing and reset sched */ + /* Device capability bit */ + unsigned long caps; struct rw_semaphore qps_lock; struct idr qp_idr; @@ -326,8 +334,6 @@ struct hisi_qm { bool use_sva; bool is_frozen; - /* doorbell isolation enable */ - bool use_db_isolation; resource_size_t phys_base; resource_size_t db_phys_base; struct uacce_device *uacce; @@ -351,6 +357,8 @@ struct hisi_qp_ops { struct hisi_qp { u32 qp_id; + u16 sq_depth; + u16 cq_depth; u8 alg_type; u8 req_type; @@ -501,6 +509,9 @@ void hisi_qm_pm_init(struct hisi_qm *qm); int hisi_qm_get_dfx_access(struct hisi_qm *qm); void hisi_qm_put_dfx_access(struct hisi_qm *qm); void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset); +u32 hisi_qm_get_hw_info(struct hisi_qm *qm, + const struct hisi_qm_cap_info *info_table, + u32 index, bool is_read); /* Used by VFIO ACC live migration driver */ struct pci_driver *hisi_sec_get_pf_driver(void); diff --git a/include/linux/hp_sdc.h b/include/linux/hp_sdc.h index 6f1dee7e67e0..9be8704e2d38 100644 --- a/include/linux/hp_sdc.h +++ b/include/linux/hp_sdc.h @@ -180,7 +180,7 @@ switch (val) { \ #define HP_SDC_CMD_SET_IM 0x40 /* 010xxxxx == set irq mask */ -/* The documents provided do not explicitly state that all registers betweem +/* The documents provided do not explicitly state that all registers between * 0x01 and 0x1f inclusive can be read by sending their register index as a * command, but this is implied and appears to be the case. */ diff --git a/include/linux/htcpld.h b/include/linux/htcpld.h index 842fce69ac06..5f8ac9b1d724 100644 --- a/include/linux/htcpld.h +++ b/include/linux/htcpld.h @@ -13,8 +13,6 @@ struct htcpld_chip_platform_data { }; struct htcpld_core_platform_data { - unsigned int int_reset_gpio_hi; - unsigned int int_reset_gpio_lo; unsigned int i2c_adapter_id; struct htcpld_chip_platform_data *chip; diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 768e5261fdae..a1341fdcf666 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -168,9 +168,8 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma) !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode); } -bool hugepage_vma_check(struct vm_area_struct *vma, - unsigned long vm_flags, - bool smaps, bool in_pf); +bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, + bool smaps, bool in_pf, bool enforce_sysfs); #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ @@ -219,6 +218,9 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice); +int madvise_collapse(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end); void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, long adjust_next); spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); @@ -321,8 +323,8 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, } static inline bool hugepage_vma_check(struct vm_area_struct *vma, - unsigned long vm_flags, - bool smaps, bool in_pf) + unsigned long vm_flags, bool smaps, + bool in_pf, bool enforce_sysfs) { return false; } @@ -362,9 +364,16 @@ static inline void split_huge_pmd_address(struct vm_area_struct *vma, static inline int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) { - BUG(); - return 0; + return -EINVAL; +} + +static inline int madvise_collapse(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end) +{ + return -EINVAL; } + static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, @@ -435,6 +444,11 @@ static inline int split_folio_to_list(struct folio *folio, return split_huge_page_to_list(&folio->page, list); } +static inline int split_folio(struct folio *folio) +{ + return split_folio_to_list(folio, NULL); +} + /* * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to * limitations in the implementation like arm64 MTE can override this to diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 3ec981a0d8b3..8b4f93e84868 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -16,8 +16,9 @@ struct ctl_table; struct user_struct; struct mmu_gather; +struct node; -#ifndef is_hugepd +#ifndef CONFIG_ARCH_HAS_HUGEPD typedef struct { unsigned long pd; } hugepd_t; #define is_hugepd(hugepd) (0) #define __hugepd(x) ((hugepd_t) { (x) }) @@ -114,6 +115,12 @@ struct file_region { #endif }; +struct hugetlb_vma_lock { + struct kref refs; + struct rw_semaphore rw_sema; + struct vm_area_struct *vma; +}; + extern struct resv_map *resv_map_alloc(void); void resv_map_release(struct kref *ref); @@ -126,7 +133,7 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, long min_hpages); void hugepage_put_subpool(struct hugepage_subpool *spool); -void reset_vma_resv_huge_pages(struct vm_area_struct *vma); +void hugetlb_dup_vma_private(struct vm_area_struct *vma); void clear_vma_resv_huge_pages(struct vm_area_struct *vma); int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *, @@ -207,13 +214,21 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, struct page *follow_huge_pd(struct vm_area_struct *vma, unsigned long address, hugepd_t hpd, int flags, int pdshift); -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int flags); +struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, + int flags); struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags); struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int flags); +void hugetlb_vma_lock_read(struct vm_area_struct *vma); +void hugetlb_vma_unlock_read(struct vm_area_struct *vma); +void hugetlb_vma_lock_write(struct vm_area_struct *vma); +void hugetlb_vma_unlock_write(struct vm_area_struct *vma); +int hugetlb_vma_trylock_write(struct vm_area_struct *vma); +void hugetlb_vma_assert_locked(struct vm_area_struct *vma); +void hugetlb_vma_lock_release(struct kref *kref); + int pmd_huge(pmd_t pmd); int pud_huge(pud_t pud); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, @@ -225,7 +240,7 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ -static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) +static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma) { } @@ -312,8 +327,8 @@ static inline struct page *follow_huge_pd(struct vm_area_struct *vma, return NULL; } -static inline struct page *follow_huge_pmd(struct mm_struct *mm, - unsigned long address, pmd_t *pmd, int flags) +static inline struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, + unsigned long address, int flags) { return NULL; } @@ -336,6 +351,31 @@ static inline int prepare_hugepage_range(struct file *file, return -EINVAL; } +static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma) +{ +} + +static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma) +{ +} + +static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma) +{ +} + +static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma) +{ +} + +static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma) +{ + return 1; +} + +static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma) +{ +} + static inline int pmd_huge(pmd_t pmd) { return 0; @@ -665,7 +705,7 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask); struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address); -int huge_add_to_page_cache(struct page *page, struct address_space *mapping, +int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, unsigned long address, struct page *page); @@ -935,6 +975,11 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, } #endif +#ifdef CONFIG_NUMA +void hugetlb_register_node(struct node *node); +void hugetlb_unregister_node(struct node *node); +#endif + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; @@ -1109,6 +1154,14 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { } + +static inline void hugetlb_register_node(struct node *node) +{ +} + +static inline void hugetlb_unregister_node(struct node *node) +{ +} #endif /* CONFIG_HUGETLB_PAGE */ static inline spinlock_t *huge_pte_lock(struct hstate *h, @@ -1123,14 +1176,10 @@ static inline spinlock_t *huge_pte_lock(struct hstate *h, #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) extern void __init hugetlb_cma_reserve(int order); -extern void __init hugetlb_cma_check(void); #else static inline __init void hugetlb_cma_reserve(int order) { } -static inline __init void hugetlb_cma_check(void) -{ -} #endif bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 379344828e78..630cd255d0cf 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -90,32 +90,31 @@ hugetlb_cgroup_from_page_rsvd(struct page *page) return __hugetlb_cgroup_from_page(page, true); } -static inline int __set_hugetlb_cgroup(struct page *page, +static inline void __set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg, bool rsvd) { VM_BUG_ON_PAGE(!PageHuge(page), page); if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) - return -1; + return; if (rsvd) set_page_private(page + SUBPAGE_INDEX_CGROUP_RSVD, (unsigned long)h_cg); else set_page_private(page + SUBPAGE_INDEX_CGROUP, (unsigned long)h_cg); - return 0; } -static inline int set_hugetlb_cgroup(struct page *page, +static inline void set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) { - return __set_hugetlb_cgroup(page, h_cg, false); + __set_hugetlb_cgroup(page, h_cg, false); } -static inline int set_hugetlb_cgroup_rsvd(struct page *page, +static inline void set_hugetlb_cgroup_rsvd(struct page *page, struct hugetlb_cgroup *h_cg) { - return __set_hugetlb_cgroup(page, h_cg, true); + __set_hugetlb_cgroup(page, h_cg, true); } static inline bool hugetlb_cgroup_disabled(void) @@ -199,16 +198,14 @@ hugetlb_cgroup_from_page_rsvd(struct page *page) return NULL; } -static inline int set_hugetlb_cgroup(struct page *page, +static inline void set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) { - return 0; } -static inline int set_hugetlb_cgroup_rsvd(struct page *page, +static inline void set_hugetlb_cgroup_rsvd(struct page *page, struct hugetlb_cgroup *h_cg) { - return 0; } static inline bool hugetlb_cgroup_disabled(void) diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 78dd7035d1e5..f319bd26b030 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -74,12 +74,12 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, extern int register_perf_hw_breakpoint(struct perf_event *bp); extern void unregister_hw_breakpoint(struct perf_event *bp); extern void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events); +extern bool hw_breakpoint_is_used(void); extern int dbg_reserve_bp_slot(struct perf_event *bp); extern int dbg_release_bp_slot(struct perf_event *bp); extern int reserve_bp_slot(struct perf_event *bp); extern void release_bp_slot(struct perf_event *bp); -int hw_breakpoint_weight(struct perf_event *bp); int arch_reserve_bp_slot(struct perf_event *bp); void arch_release_bp_slot(struct perf_event *bp); void arch_unregister_hw_breakpoint(struct perf_event *bp); @@ -121,6 +121,8 @@ register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } static inline void unregister_hw_breakpoint(struct perf_event *bp) { } static inline void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) { } +static inline bool hw_breakpoint_is_used(void) { return false; } + static inline int reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; } static inline void release_bp_slot(struct perf_event *bp) { } diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index aa1d4da03538..77c2885c4c13 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -50,6 +50,7 @@ struct hwrng { struct list_head list; struct kref ref; struct completion cleanup_done; + struct completion dying; }; struct device; @@ -61,4 +62,6 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); +extern long hwrng_msleep(struct hwrng *rng, unsigned int msecs); + #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 8eab5017bff3..f7c49bbdb8a1 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -273,7 +273,7 @@ struct i2c_driver { /* Standard driver model interfaces */ int (*probe)(struct i2c_client *client, const struct i2c_device_id *id); - int (*remove)(struct i2c_client *client); + void (*remove)(struct i2c_client *client); /* New driver model interface to aid the seamless removal of the * current probe()'s, more commonly unused than used second parameter. diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index b6e6d5b40774..79690938d9a2 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2888,7 +2888,8 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) /* Calculate 802.11be EHT capabilities IE Tx/Rx EHT MCS NSS Support Field size */ static inline u8 ieee80211_eht_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap, - const struct ieee80211_eht_cap_elem_fixed *eht_cap) + const struct ieee80211_eht_cap_elem_fixed *eht_cap, + bool from_ap) { u8 count = 0; @@ -2909,7 +2910,10 @@ ieee80211_eht_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap, if (eht_cap->phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) count += 3; - return count ? count : 4; + if (count) + return count; + + return from_ap ? 3 : 4; } /* 802.11be EHT PPE Thresholds */ @@ -2945,7 +2949,8 @@ ieee80211_eht_ppe_size(u16 ppe_thres_hdr, const u8 *phy_cap_info) } static inline bool -ieee80211_eht_capa_size_ok(const u8 *he_capa, const u8 *data, u8 len) +ieee80211_eht_capa_size_ok(const u8 *he_capa, const u8 *data, u8 len, + bool from_ap) { const struct ieee80211_eht_cap_elem_fixed *elem = (const void *)data; u8 needed = sizeof(struct ieee80211_eht_cap_elem_fixed); @@ -2954,7 +2959,8 @@ ieee80211_eht_capa_size_ok(const u8 *he_capa, const u8 *data, u8 len) return false; needed += ieee80211_eht_mcs_nss_size((const void *)he_capa, - (const void *)data); + (const void *)data, + from_ap); if (len < needed) return false; diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h index 96d40942e5a3..c87efd333faa 100644 --- a/include/linux/if_pppol2tp.h +++ b/include/linux/if_pppol2tp.h @@ -4,8 +4,6 @@ * * This file supplies definitions required by the PPP over L2TP driver * (l2tp_ppp.c). All version information wrt this file is located in l2tp_ppp.c - * - * License: */ #ifndef __LINUX_IF_PPPOL2TP_H #define __LINUX_IF_PPPOL2TP_H diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 69e813bcb947..ff3beda1312c 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -5,8 +5,6 @@ * * This file supplies definitions required by the PPP over Ethernet driver * (pppox.c). All version information wrt this file is located in pppox.c - * - * License: */ #ifndef __LINUX_IF_PPPOX_H #define __LINUX_IF_PPPOX_H diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 93c262ecbdc9..78890143f079 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -118,9 +118,9 @@ extern int ip_mc_source(int add, int omode, struct sock *sk, struct ip_mreq_source *mreqs, int ifindex); extern int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf,int ifindex); extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, - struct ip_msfilter __user *optval, int __user *optlen); + sockptr_t optval, sockptr_t optlen); extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, - struct sockaddr_storage __user *p); + sockptr_t optval, size_t offset); extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, int dif, int sdif); extern void ip_mc_init_dev(struct in_device *); diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 5fa5957586cf..6802596b017c 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -13,7 +13,7 @@ struct iio_dev; struct iio_chan_spec; struct device; -struct device_node; +struct fwnode_handle; /** * struct iio_channel - everything needed for a consumer to use a channel @@ -99,26 +99,20 @@ void iio_channel_release_all(struct iio_channel *chan); struct iio_channel *devm_iio_channel_get_all(struct device *dev); /** - * of_iio_channel_get_by_name() - get description of all that is needed to access channel. - * @np: Pointer to consumer device tree node + * fwnode_iio_channel_get_by_name() - get description of all that is needed to access channel. + * @fwnode: Pointer to consumer Firmware node * @consumer_channel: Unique name to identify the channel on the consumer * side. This typically describes the channels use within * the consumer. E.g. 'battery_voltage' */ -#ifdef CONFIG_OF -struct iio_channel *of_iio_channel_get_by_name(struct device_node *np, const char *name); -#else -static inline struct iio_channel * -of_iio_channel_get_by_name(struct device_node *np, const char *name) -{ - return NULL; -} -#endif +struct iio_channel *fwnode_iio_channel_get_by_name(struct fwnode_handle *fwnode, + const char *name); /** - * devm_of_iio_channel_get_by_name() - Resource managed version of of_iio_channel_get_by_name(). + * devm_fwnode_iio_channel_get_by_name() - Resource managed version of + * fwnode_iio_channel_get_by_name(). * @dev: Pointer to consumer device. - * @np: Pointer to consumer device tree node + * @fwnode: Pointer to consumer Firmware node * @consumer_channel: Unique name to identify the channel on the consumer * side. This typically describes the channels use within * the consumer. E.g. 'battery_voltage' @@ -129,9 +123,9 @@ of_iio_channel_get_by_name(struct device_node *np, const char *name) * The allocated iio channel is automatically released when the device is * unbound. */ -struct iio_channel *devm_of_iio_channel_get_by_name(struct device *dev, - struct device_node *np, - const char *consumer_channel); +struct iio_channel *devm_fwnode_iio_channel_get_by_name(struct device *dev, + struct fwnode_handle *fwnode, + const char *consumer_channel); struct iio_cb_buffer; /** diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index 6b3586b3f952..d1f8b30a7c8b 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -11,6 +11,7 @@ * checked by device drivers but should be considered * read-only as this is a core internal bit * @driver_module: used to make it harder to undercut users + * @mlock_key: lockdep class for iio_dev lock * @info_exist_lock: lock to prevent use during removal * @trig_readonly: mark the current trigger immutable * @event_interface: event chrdevs associated with interrupt lines @@ -42,6 +43,7 @@ struct iio_dev_opaque { int currentmode; int id; struct module *driver_module; + struct lock_class_key mlock_key; struct mutex info_exist_lock; bool trig_readonly; struct iio_event_interface *event_interface; diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 5dfbfc991c69..f0ec8a5e5a7a 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -17,7 +17,7 @@ * Currently assumes nano seconds. */ -struct of_phandle_args; +struct fwnode_reference_args; enum iio_shared_by { IIO_SEPARATE, @@ -429,6 +429,8 @@ struct iio_trigger; /* forward declaration */ * provide a custom of_xlate function that reads the * *args* and returns the appropriate index in registered * IIO channels array. + * @fwnode_xlate: fwnode based function pointer to obtain channel specifier index. + * Functionally the same as @of_xlate. * @hwfifo_set_watermark: function pointer to set the current hardware * fifo watermark level; see hwfifo_* entries in * Documentation/ABI/testing/sysfs-bus-iio for details on @@ -508,8 +510,8 @@ struct iio_info { int (*debugfs_reg_access)(struct iio_dev *indio_dev, unsigned reg, unsigned writeval, unsigned *readval); - int (*of_xlate)(struct iio_dev *indio_dev, - const struct of_phandle_args *iiospec); + int (*fwnode_xlate)(struct iio_dev *indio_dev, + const struct fwnode_reference_args *iiospec); int (*hwfifo_set_watermark)(struct iio_dev *indio_dev, unsigned val); int (*hwfifo_flush_to_buffer)(struct iio_dev *indio_dev, unsigned count); diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index a7aa91f3a8dc..82faa98c719a 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -17,6 +17,8 @@ enum iio_event_info { IIO_EV_INFO_HIGH_PASS_FILTER_3DB, IIO_EV_INFO_LOW_PASS_FILTER_3DB, IIO_EV_INFO_TIMEOUT, + IIO_EV_INFO_RESET_TIMEOUT, + IIO_EV_INFO_TAP2_MIN_DELAY, }; #define IIO_VAL_INT 1 @@ -63,6 +65,7 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_OVERSAMPLING_RATIO, IIO_CHAN_INFO_THERMOCOUPLE_TYPE, IIO_CHAN_INFO_CALIBAMBIENT, + IIO_CHAN_INFO_ZEROPOINT, }; #endif /* _IIO_TYPES_H_ */ diff --git a/include/linux/init.h b/include/linux/init.h index baf0b29a7010..077d7f93b402 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -47,7 +47,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(".init.text") __cold __latent_entropy __noinitretpoline __nocfi +#define __init __section(".init.text") __cold __latent_entropy __noinitretpoline #define __initdata __section(".init.data") #define __initconst __section(".init.rodata") #define __exitdata __section(".exit.data") @@ -134,7 +134,7 @@ static inline initcall_t initcall_from_entry(initcall_entry_t *entry) extern initcall_entry_t __con_initcall_start[], __con_initcall_end[]; -/* Used for contructor calls. */ +/* Used for constructor calls. */ typedef void (*ctor_fn_t)(void); struct file_system_type; @@ -220,8 +220,8 @@ extern bool initcall_debug; __initcall_name(initstub, __iid, id) #define __define_initcall_stub(__stub, fn) \ - int __init __cficanonical __stub(void); \ - int __init __cficanonical __stub(void) \ + int __init __stub(void); \ + int __init __stub(void) \ { \ return fn(); \ } \ diff --git a/include/linux/input/auo-pixcir-ts.h b/include/linux/input/auo-pixcir-ts.h deleted file mode 100644 index ed0776997a7a..000000000000 --- a/include/linux/input/auo-pixcir-ts.h +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Driver for AUO in-cell touchscreens - * - * Copyright (c) 2011 Heiko Stuebner <heiko@sntech.de> - * - * based on auo_touch.h from Dell Streak kernel - * - * Copyright (c) 2008 QUALCOMM Incorporated. - * Copyright (c) 2008 QUALCOMM USA, INC. - */ - -#ifndef __AUO_PIXCIR_TS_H__ -#define __AUO_PIXCIR_TS_H__ - -/* - * Interrupt modes: - * periodical: interrupt is asserted periodicaly - * compare coordinates: interrupt is asserted when coordinates change - * indicate touch: interrupt is asserted during touch - */ -#define AUO_PIXCIR_INT_PERIODICAL 0x00 -#define AUO_PIXCIR_INT_COMP_COORD 0x01 -#define AUO_PIXCIR_INT_TOUCH_IND 0x02 - -/* - * @gpio_int interrupt gpio - * @int_setting one of AUO_PIXCIR_INT_* - * @init_hw hardwarespecific init - * @exit_hw hardwarespecific shutdown - * @x_max x-resolution - * @y_max y-resolution - */ -struct auo_pixcir_ts_platdata { - int gpio_int; - int gpio_rst; - - int int_setting; - - unsigned int x_max; - unsigned int y_max; -}; - -#endif diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h index 42faebbaa202..501fa8486749 100644 --- a/include/linux/instrumented.h +++ b/include/linux/instrumented.h @@ -2,7 +2,7 @@ /* * This header provides generic wrappers for memory access instrumentation that - * the compiler cannot emit for: KASAN, KCSAN. + * the compiler cannot emit for: KASAN, KCSAN, KMSAN. */ #ifndef _LINUX_INSTRUMENTED_H #define _LINUX_INSTRUMENTED_H @@ -10,6 +10,7 @@ #include <linux/compiler.h> #include <linux/kasan-checks.h> #include <linux/kcsan-checks.h> +#include <linux/kmsan-checks.h> #include <linux/types.h> /** @@ -117,10 +118,11 @@ instrument_copy_to_user(void __user *to, const void *from, unsigned long n) { kasan_check_read(from, n); kcsan_check_read(from, n); + kmsan_copy_to_user(to, from, n, 0); } /** - * instrument_copy_from_user - instrument writes of copy_from_user + * instrument_copy_from_user_before - add instrumentation before copy_from_user * * Instrument writes to kernel memory, that are due to copy_from_user (and * variants). The instrumentation should be inserted before the accesses. @@ -130,10 +132,61 @@ instrument_copy_to_user(void __user *to, const void *from, unsigned long n) * @n number of bytes to copy */ static __always_inline void -instrument_copy_from_user(const void *to, const void __user *from, unsigned long n) +instrument_copy_from_user_before(const void *to, const void __user *from, unsigned long n) { kasan_check_write(to, n); kcsan_check_write(to, n); } +/** + * instrument_copy_from_user_after - add instrumentation after copy_from_user + * + * Instrument writes to kernel memory, that are due to copy_from_user (and + * variants). The instrumentation should be inserted after the accesses. + * + * @to destination address + * @from source address + * @n number of bytes to copy + * @left number of bytes not copied (as returned by copy_from_user) + */ +static __always_inline void +instrument_copy_from_user_after(const void *to, const void __user *from, + unsigned long n, unsigned long left) +{ + kmsan_unpoison_memory(to, n - left); +} + +/** + * instrument_get_user() - add instrumentation to get_user()-like macros + * + * get_user() and friends are fragile, so it may depend on the implementation + * whether the instrumentation happens before or after the data is copied from + * the userspace. + * + * @to destination variable, may not be address-taken + */ +#define instrument_get_user(to) \ +({ \ + u64 __tmp = (u64)(to); \ + kmsan_unpoison_memory(&__tmp, sizeof(__tmp)); \ + to = __tmp; \ +}) + + +/** + * instrument_put_user() - add instrumentation to put_user()-like macros + * + * put_user() and friends are fragile, so it may depend on the implementation + * whether the instrumentation happens before or after the data is copied from + * the userspace. + * + * @from source address + * @ptr userspace pointer to copy to + * @size number of bytes to copy + */ +#define instrument_put_user(from, ptr, size) \ +({ \ + kmsan_copy_to_user(ptr, &from, sizeof(from), 0); \ +}) + #endif /* _LINUX_INSTRUMENTED_H */ diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index 6bd01f7159c6..cd5c5a27557f 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -123,7 +123,7 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider); void icc_node_del(struct icc_node *node); int icc_nodes_remove(struct icc_provider *provider); int icc_provider_add(struct icc_provider *provider); -int icc_provider_del(struct icc_provider *provider); +void icc_provider_del(struct icc_provider *provider); struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec); void icc_sync_state(struct device *dev); @@ -172,9 +172,8 @@ static inline int icc_provider_add(struct icc_provider *provider) return -ENOTSUPP; } -static inline int icc_provider_del(struct icc_provider *provider) +static inline void icc_provider_del(struct icc_provider *provider) { - return -ENOTSUPP; } static inline struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec) diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index ca98aeadcc80..1f068dfdb140 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -16,7 +16,9 @@ enum io_pgtable_fmt { ARM_V7S, ARM_MALI_LPAE, AMD_IOMMU_V1, + AMD_IOMMU_V2, APPLE_DART, + APPLE_DART2, IO_PGTABLE_NUM_FMTS, }; @@ -260,6 +262,7 @@ extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns; extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns; +extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns; extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns; #endif /* __IO_PGTABLE_H */ diff --git a/include/linux/io.h b/include/linux/io.h index 5fc800390fe4..308f4f0cfb93 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -59,8 +59,6 @@ void __iomem *devm_ioremap_uc(struct device *dev, resource_size_t offset, resource_size_t size); void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset, resource_size_t size); -void __iomem *devm_ioremap_np(struct device *dev, resource_size_t offset, - resource_size_t size); void devm_iounmap(struct device *dev, void __iomem *addr); int check_signature(const volatile void __iomem *io_addr, const unsigned char *signature, int length); diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 4a2f6cc5a492..43bc8a2edccf 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -4,6 +4,7 @@ #include <linux/sched.h> #include <linux/xarray.h> +#include <uapi/linux/io_uring.h> enum io_uring_cmd_flags { IO_URING_F_COMPLETE_DEFER = 1, @@ -20,14 +21,20 @@ enum io_uring_cmd_flags { struct io_uring_cmd { struct file *file; const void *cmd; - /* callback to defer completions to task context */ - void (*task_work_cb)(struct io_uring_cmd *cmd); + union { + /* callback to defer completions to task context */ + void (*task_work_cb)(struct io_uring_cmd *cmd); + /* used for polled completion */ + void *cookie; + }; u32 cmd_op; - u32 pad; + u32 flags; u8 pdu[32]; /* available inline for free use */ }; #if defined(CONFIG_IO_URING) +int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, + struct iov_iter *iter, void *ioucmd); void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2); void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, void (*task_work_cb)(struct io_uring_cmd *)); @@ -55,6 +62,11 @@ static inline void io_uring_free(struct task_struct *tsk) __io_uring_free(tsk); } #else +static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, + struct iov_iter *iter, void *ioucmd) +{ + return -EOPNOTSUPP; +} static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t ret2) { diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 677a25d44d7f..f5b687a787a3 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -34,9 +34,6 @@ struct io_file_table { unsigned int alloc_hint; }; -struct io_notif; -struct io_notif_slot; - struct io_hash_bucket { spinlock_t lock; struct hlist_head list; @@ -184,6 +181,8 @@ struct io_ev_fd { struct eventfd_ctx *cq_ev_fd; unsigned int eventfd_async: 1; struct rcu_head rcu; + atomic_t refs; + atomic_t ops; }; struct io_alloc_cache { @@ -240,8 +239,6 @@ struct io_ring_ctx { unsigned nr_user_files; unsigned nr_user_bufs; struct io_mapped_ubuf **user_bufs; - struct io_notif_slot *notif_slots; - unsigned nr_notif_slots; struct io_submit_state submit_state; @@ -301,6 +298,8 @@ struct io_ring_ctx { struct io_hash_table cancel_table; bool poll_multi_queue; + struct llist_head work_llist; + struct list_head io_buffers_comp; } ____cacheline_aligned_in_smp; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ea30f00dc145..a325532aeab5 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -212,7 +212,7 @@ struct iommu_iotlb_gather { * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) - * @dev_has/enable/disable_feat: per device entries to check/enable/disable + * @dev_enable/disable_feat: per device entries to enable/disable * iommu specific features. * @sva_bind: Bind process address space to device * @sva_unbind: Unbind process address space from device @@ -227,7 +227,7 @@ struct iommu_iotlb_gather { * @owner: Driver module providing these ops */ struct iommu_ops { - bool (*capable)(enum iommu_cap); + bool (*capable)(struct device *dev, enum iommu_cap); /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); @@ -416,11 +416,9 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) return dev->iommu->iommu_dev->ops; } -extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops); extern int bus_iommu_probe(struct bus_type *bus); extern bool iommu_present(struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); -extern bool iommu_capable(struct bus_type *bus, enum iommu_cap cap); extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus); extern struct iommu_group *iommu_group_get_by_id(int id); extern void iommu_domain_free(struct iommu_domain *domain); @@ -697,11 +695,6 @@ static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap) return false; } -static inline bool iommu_capable(struct bus_type *bus, enum iommu_cap cap) -{ - return false; -} - static inline struct iommu_domain *iommu_domain_alloc(struct bus_type *bus) { return NULL; @@ -1070,4 +1063,40 @@ void iommu_debugfs_setup(void); static inline void iommu_debugfs_setup(void) {} #endif +#ifdef CONFIG_IOMMU_DMA +#include <linux/msi.h> + +/* Setup call for arch DMA mapping code */ +void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); + +int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); + +int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); +void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg); + +#else /* CONFIG_IOMMU_DMA */ + +struct msi_desc; +struct msi_msg; + +static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) +{ +} + +static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) +{ + return -ENODEV; +} + +static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) +{ + return 0; +} + +static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) +{ +} + +#endif /* CONFIG_IOMMU_DMA */ + #endif /* __LINUX_IOMMU_H */ diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 616b683563a9..27642ca15d93 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -79,7 +79,8 @@ struct resource { #define IORESOURCE_IRQ_HIGHLEVEL (1<<2) #define IORESOURCE_IRQ_LOWLEVEL (1<<3) #define IORESOURCE_IRQ_SHAREABLE (1<<4) -#define IORESOURCE_IRQ_OPTIONAL (1<<5) +#define IORESOURCE_IRQ_OPTIONAL (1<<5) +#define IORESOURCE_IRQ_WAKECAPABLE (1<<6) /* PnP DMA specific bits (IORESOURCE_BITS) */ #define IORESOURCE_DMA_TYPE_MASK (3<<0) @@ -172,6 +173,11 @@ enum { #define DEFINE_RES_MEM(_start, _size) \ DEFINE_RES_MEM_NAMED((_start), (_size), NULL) +#define DEFINE_RES_REG_NAMED(_start, _size, _name) \ + DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_REG) +#define DEFINE_RES_REG(_start, _size) \ + DEFINE_RES_REG_NAMED((_start), (_size), NULL) + #define DEFINE_RES_IRQ_NAMED(_irq, _name) \ DEFINE_RES_NAMED((_irq), 1, (_name), IORESOURCE_IRQ) #define DEFINE_RES_IRQ(_irq) \ diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index a533cae189d7..cb71aa616bd3 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -46,10 +46,13 @@ * * iosys_map_set_vaddr(&map, 0xdeadbeaf); * - * To set an address in I/O memory, use iosys_map_set_vaddr_iomem(). + * To set an address in I/O memory, use IOSYS_MAP_INIT_VADDR_IOMEM() or + * iosys_map_set_vaddr_iomem(). * * .. code-block:: c * + * struct iosys_map map = IOSYS_MAP_INIT_VADDR_IOMEM(0xdeadbeaf); + * * iosys_map_set_vaddr_iomem(&map, 0xdeadbeaf); * * Instances of struct iosys_map do not have to be cleaned up, but @@ -122,6 +125,16 @@ struct iosys_map { } /** + * IOSYS_MAP_INIT_VADDR_IOMEM - Initializes struct iosys_map to an address in I/O memory + * @vaddr_iomem_: An I/O-memory address + */ +#define IOSYS_MAP_INIT_VADDR_IOMEM(vaddr_iomem_) \ + { \ + .vaddr_iomem = (vaddr_iomem_), \ + .is_iomem = true, \ + } + +/** * IOSYS_MAP_INIT_OFFSET - Initializes struct iosys_map from another iosys_map * @map_: The dma-buf mapping structure to copy from * @offset_: Offset to add to the other mapping diff --git a/include/linux/iova.h b/include/linux/iova.h index c6ba6d95d79c..83c00fac2acb 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -75,7 +75,7 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) return iova >> iova_shift(iovad); } -#if IS_ENABLED(CONFIG_IOMMU_IOVA) +#if IS_REACHABLE(CONFIG_IOMMU_IOVA) int iova_cache_get(void); void iova_cache_put(void); diff --git a/include/linux/iova_bitmap.h b/include/linux/iova_bitmap.h new file mode 100644 index 000000000000..c006cf0a25f3 --- /dev/null +++ b/include/linux/iova_bitmap.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. + * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved + */ +#ifndef _IOVA_BITMAP_H_ +#define _IOVA_BITMAP_H_ + +#include <linux/types.h> + +struct iova_bitmap; + +typedef int (*iova_bitmap_fn_t)(struct iova_bitmap *bitmap, + unsigned long iova, size_t length, + void *opaque); + +struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, + unsigned long page_size, + u64 __user *data); +void iova_bitmap_free(struct iova_bitmap *bitmap); +int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, + iova_bitmap_fn_t fn); +void iova_bitmap_set(struct iova_bitmap *bitmap, + unsigned long iova, size_t length); + +#endif diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index e3e8c8662b49..e8240cf2611a 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -11,6 +11,7 @@ #include <linux/refcount.h> #include <linux/rhashtable-types.h> #include <linux/sysctl.h> +#include <linux/percpu_counter.h> struct user_namespace; @@ -36,8 +37,8 @@ struct ipc_namespace { unsigned int msg_ctlmax; unsigned int msg_ctlmnb; unsigned int msg_ctlmni; - atomic_t msg_bytes; - atomic_t msg_hdrs; + struct percpu_counter percpu_msg_bytes; + struct percpu_counter percpu_msg_hdrs; size_t shm_ctlmax; size_t shm_ctlall; diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h index 3a091d0710ae..d5e6024cb2a8 100644 --- a/include/linux/irqchip.h +++ b/include/linux/irqchip.h @@ -44,7 +44,8 @@ static const struct of_device_id drv_name##_irqchip_match_table[] = { #define IRQCHIP_MATCH(compat, fn) { .compatible = compat, \ .data = typecheck_irq_init_cb(fn), }, -#define IRQCHIP_PLATFORM_DRIVER_END(drv_name) \ + +#define IRQCHIP_PLATFORM_DRIVER_END(drv_name, ...) \ {}, \ }; \ MODULE_DEVICE_TABLE(of, drv_name##_irqchip_match_table); \ @@ -56,6 +57,7 @@ static struct platform_driver drv_name##_driver = { \ .owner = THIS_MODULE, \ .of_match_table = drv_name##_irqchip_match_table, \ .suppress_bind_attrs = true, \ + __VA_ARGS__ \ }, \ }; \ builtin_platform_driver(drv_name##_driver) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 1cd4e36890fb..844a8e30e6de 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -169,6 +169,7 @@ int generic_handle_irq_safe(unsigned int irq); * conversion failed. */ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq); +int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq); int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq); #endif diff --git a/include/linux/isa.h b/include/linux/isa.h index e30963190968..4fbbf5e36e08 100644 --- a/include/linux/isa.h +++ b/include/linux/isa.h @@ -38,6 +38,32 @@ static inline void isa_unregister_driver(struct isa_driver *d) } #endif +#define module_isa_driver_init(__isa_driver, __num_isa_dev) \ +static int __init __isa_driver##_init(void) \ +{ \ + return isa_register_driver(&(__isa_driver), __num_isa_dev); \ +} \ +module_init(__isa_driver##_init) + +#define module_isa_driver_with_irq_init(__isa_driver, __num_isa_dev, __num_irq) \ +static int __init __isa_driver##_init(void) \ +{ \ + if (__num_irq != __num_isa_dev) { \ + pr_err("%s: Number of irq (%u) does not match number of base (%u)\n", \ + __isa_driver.driver.name, __num_irq, __num_isa_dev); \ + return -EINVAL; \ + } \ + return isa_register_driver(&(__isa_driver), __num_isa_dev); \ +} \ +module_init(__isa_driver##_init) + +#define module_isa_driver_exit(__isa_driver) \ +static void __exit __isa_driver##_exit(void) \ +{ \ + isa_unregister_driver(&(__isa_driver)); \ +} \ +module_exit(__isa_driver##_exit) + /** * module_isa_driver() - Helper macro for registering a ISA driver * @__isa_driver: isa_driver struct @@ -48,16 +74,22 @@ static inline void isa_unregister_driver(struct isa_driver *d) * use this macro once, and calling it replaces module_init and module_exit. */ #define module_isa_driver(__isa_driver, __num_isa_dev) \ -static int __init __isa_driver##_init(void) \ -{ \ - return isa_register_driver(&(__isa_driver), __num_isa_dev); \ -} \ -module_init(__isa_driver##_init); \ -static void __exit __isa_driver##_exit(void) \ -{ \ - isa_unregister_driver(&(__isa_driver)); \ -} \ -module_exit(__isa_driver##_exit); +module_isa_driver_init(__isa_driver, __num_isa_dev); \ +module_isa_driver_exit(__isa_driver) + +/** + * module_isa_driver_with_irq() - Helper macro for registering an ISA driver with irq + * @__isa_driver: isa_driver struct + * @__num_isa_dev: number of devices to register + * @__num_irq: number of IRQ to register + * + * Helper macro for ISA drivers with irq that do not do anything special in + * module init/exit. Each module may only use this macro once, and calling it + * replaces module_init and module_exit. + */ +#define module_isa_driver_with_irq(__isa_driver, __num_isa_dev, __num_irq) \ +module_isa_driver_with_irq_init(__isa_driver, __num_isa_dev, __num_irq); \ +module_isa_driver_exit(__isa_driver) /** * max_num_isa_dev() - Maximum possible number registered of an ISA device diff --git a/include/linux/iversion.h b/include/linux/iversion.h index 3bfebde5a1a6..e27bd4f55d84 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -123,17 +123,12 @@ inode_peek_iversion_raw(const struct inode *inode) static inline void inode_set_max_iversion_raw(struct inode *inode, u64 val) { - u64 cur, old; + u64 cur = inode_peek_iversion_raw(inode); - cur = inode_peek_iversion_raw(inode); - for (;;) { + do { if (cur > val) break; - old = atomic64_cmpxchg(&inode->i_version, cur, val); - if (likely(old == cur)) - break; - cur = old; - } + } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, val)); } /** @@ -177,56 +172,7 @@ inode_set_iversion_queried(struct inode *inode, u64 val) I_VERSION_QUERIED); } -/** - * inode_maybe_inc_iversion - increments i_version - * @inode: inode with the i_version that should be updated - * @force: increment the counter even if it's not necessary? - * - * Every time the inode is modified, the i_version field must be seen to have - * changed by any observer. - * - * If "force" is set or the QUERIED flag is set, then ensure that we increment - * the value, and clear the queried flag. - * - * In the common case where neither is set, then we can return "false" without - * updating i_version. - * - * If this function returns false, and no other metadata has changed, then we - * can avoid logging the metadata. - */ -static inline bool -inode_maybe_inc_iversion(struct inode *inode, bool force) -{ - u64 cur, old, new; - - /* - * The i_version field is not strictly ordered with any other inode - * information, but the legacy inode_inc_iversion code used a spinlock - * to serialize increments. - * - * Here, we add full memory barriers to ensure that any de-facto - * ordering with other info is preserved. - * - * This barrier pairs with the barrier in inode_query_iversion() - */ - smp_mb(); - cur = inode_peek_iversion_raw(inode); - for (;;) { - /* If flag is clear then we needn't do anything */ - if (!force && !(cur & I_VERSION_QUERIED)) - return false; - - /* Since lowest bit is flag, add 2 to avoid it */ - new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT; - - old = atomic64_cmpxchg(&inode->i_version, cur, new); - if (likely(old == cur)) - break; - cur = old; - } - return true; -} - +bool inode_maybe_inc_iversion(struct inode *inode, bool force); /** * inode_inc_iversion - forcibly increment i_version @@ -304,10 +250,10 @@ inode_peek_iversion(const struct inode *inode) static inline u64 inode_query_iversion(struct inode *inode) { - u64 cur, old, new; + u64 cur, new; cur = inode_peek_iversion_raw(inode); - for (;;) { + do { /* If flag is already set, then no need to swap */ if (cur & I_VERSION_QUERIED) { /* @@ -320,11 +266,7 @@ inode_query_iversion(struct inode *inode) } new = cur | I_VERSION_QUERIED; - old = atomic64_cmpxchg(&inode->i_version, cur, new); - if (likely(old == cur)) - break; - cur = old; - } + } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new)); return cur >> I_VERSION_QUERIED_SHIFT; } diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index ad39636e0c3f..649faac31ddb 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -15,7 +15,7 @@ #include <asm/sections.h> -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 512 #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s %s]") + \ (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + \ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b092277bf48d..d811b3d7d2a1 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -98,19 +98,13 @@ static inline bool kasan_has_integrated_init(void) #ifdef CONFIG_KASAN struct kasan_cache { +#ifdef CONFIG_KASAN_GENERIC int alloc_meta_offset; int free_meta_offset; +#endif bool is_kmalloc; }; -slab_flags_t __kasan_never_merge(void); -static __always_inline slab_flags_t kasan_never_merge(void) -{ - if (kasan_enabled()) - return __kasan_never_merge(); - return 0; -} - void __kasan_unpoison_range(const void *addr, size_t size); static __always_inline void kasan_unpoison_range(const void *addr, size_t size) { @@ -134,15 +128,6 @@ static __always_inline void kasan_unpoison_pages(struct page *page, __kasan_unpoison_pages(page, order, init); } -void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size, - slab_flags_t *flags); -static __always_inline void kasan_cache_create(struct kmem_cache *cache, - unsigned int *size, slab_flags_t *flags) -{ - if (kasan_enabled()) - __kasan_cache_create(cache, size, flags); -} - void __kasan_cache_create_kmalloc(struct kmem_cache *cache); static __always_inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) { @@ -150,14 +135,6 @@ static __always_inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) __kasan_cache_create_kmalloc(cache); } -size_t __kasan_metadata_size(struct kmem_cache *cache); -static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache) -{ - if (kasan_enabled()) - return __kasan_metadata_size(cache); - return 0; -} - void __kasan_poison_slab(struct slab *slab); static __always_inline void kasan_poison_slab(struct slab *slab) { @@ -269,20 +246,12 @@ static __always_inline bool kasan_check_byte(const void *addr) #else /* CONFIG_KASAN */ -static inline slab_flags_t kasan_never_merge(void) -{ - return 0; -} static inline void kasan_unpoison_range(const void *address, size_t size) {} static inline void kasan_poison_pages(struct page *page, unsigned int order, bool init) {} static inline void kasan_unpoison_pages(struct page *page, unsigned int order, bool init) {} -static inline void kasan_cache_create(struct kmem_cache *cache, - unsigned int *size, - slab_flags_t *flags) {} static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {} -static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } static inline void kasan_poison_slab(struct slab *slab) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) {} @@ -333,6 +302,11 @@ static inline void kasan_unpoison_task_stack(struct task_struct *task) {} #ifdef CONFIG_KASAN_GENERIC +size_t kasan_metadata_size(struct kmem_cache *cache); +slab_flags_t kasan_never_merge(void); +void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, + slab_flags_t *flags); + void kasan_cache_shrink(struct kmem_cache *cache); void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_record_aux_stack(void *ptr); @@ -340,6 +314,21 @@ void kasan_record_aux_stack_noalloc(void *ptr); #else /* CONFIG_KASAN_GENERIC */ +/* Tag-based KASAN modes do not use per-object metadata. */ +static inline size_t kasan_metadata_size(struct kmem_cache *cache) +{ + return 0; +} +/* And thus nothing prevents cache merging. */ +static inline slab_flags_t kasan_never_merge(void) +{ + return 0; +} +/* And no cache-related metadata initialization is required. */ +static inline void kasan_cache_create(struct kmem_cache *cache, + unsigned int *size, + slab_flags_t *flags) {} + static inline void kasan_cache_shrink(struct kmem_cache *cache) {} static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} static inline void kasan_record_aux_stack(void *ptr) {} diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 367044d7708c..73f5c120def8 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -108,10 +108,12 @@ enum kernfs_node_flag { KERNFS_HAS_SEQ_SHOW = 0x0040, KERNFS_HAS_MMAP = 0x0080, KERNFS_LOCKDEP = 0x0100, + KERNFS_HIDDEN = 0x0200, KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, KERNFS_EMPTY_DIR = 0x1000, KERNFS_HAS_RELEASE = 0x2000, + KERNFS_REMOVING = 0x4000, }; /* @flags for kernfs_create_root() */ @@ -429,6 +431,7 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target); void kernfs_activate(struct kernfs_node *kn); +void kernfs_show(struct kernfs_node *kn, bool show); void kernfs_remove(struct kernfs_node *kn); void kernfs_break_active_protection(struct kernfs_node *kn); void kernfs_unbreak_active_protection(struct kernfs_node *kn); diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 13e6c4b58f07..41a686996aaa 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -427,7 +427,7 @@ extern int kexec_load_disabled; extern bool kexec_in_progress; int crash_shrink_memory(unsigned long new_size); -size_t crash_get_memory_size(void); +ssize_t crash_get_memory_size(void); #ifndef arch_kexec_protect_crashkres /* diff --git a/include/linux/key.h b/include/linux/key.h index 7febc4881363..d27477faf00d 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -88,6 +88,12 @@ enum key_need_perm { KEY_DEFER_PERM_CHECK, /* Special: permission check is deferred */ }; +enum key_lookup_flag { + KEY_LOOKUP_CREATE = 0x01, + KEY_LOOKUP_PARTIAL = 0x02, + KEY_LOOKUP_ALL = (KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL), +}; + struct seq_file; struct user_struct; struct signal_struct; diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 384f034ae947..70162d707caf 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -16,11 +16,13 @@ extern void khugepaged_enter_vma(struct vm_area_struct *vma, unsigned long vm_flags); extern void khugepaged_min_free_kbytes_update(void); #ifdef CONFIG_SHMEM -extern void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr); +extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, + bool install_pmd); #else -static inline void collapse_pte_mapped_thp(struct mm_struct *mm, - unsigned long addr) +static inline int collapse_pte_mapped_thp(struct mm_struct *mm, + unsigned long addr, bool install_pmd) { + return 0; } #endif @@ -46,9 +48,10 @@ static inline void khugepaged_enter_vma(struct vm_area_struct *vma, unsigned long vm_flags) { } -static inline void collapse_pte_mapped_thp(struct mm_struct *mm, - unsigned long addr) +static inline int collapse_pte_mapped_thp(struct mm_struct *mm, + unsigned long addr, bool install_pmd) { + return 0; } static inline void khugepaged_min_free_kbytes_update(void) diff --git a/include/linux/kmsan-checks.h b/include/linux/kmsan-checks.h new file mode 100644 index 000000000000..c4cae333deec --- /dev/null +++ b/include/linux/kmsan-checks.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * KMSAN checks to be used for one-off annotations in subsystems. + * + * Copyright (C) 2017-2022 Google LLC + * Author: Alexander Potapenko <glider@google.com> + * + */ + +#ifndef _LINUX_KMSAN_CHECKS_H +#define _LINUX_KMSAN_CHECKS_H + +#include <linux/types.h> + +#ifdef CONFIG_KMSAN + +/** + * kmsan_poison_memory() - Mark the memory range as uninitialized. + * @address: address to start with. + * @size: size of buffer to poison. + * @flags: GFP flags for allocations done by this function. + * + * Until other data is written to this range, KMSAN will treat it as + * uninitialized. Error reports for this memory will reference the call site of + * kmsan_poison_memory() as origin. + */ +void kmsan_poison_memory(const void *address, size_t size, gfp_t flags); + +/** + * kmsan_unpoison_memory() - Mark the memory range as initialized. + * @address: address to start with. + * @size: size of buffer to unpoison. + * + * Until other data is written to this range, KMSAN will treat it as + * initialized. + */ +void kmsan_unpoison_memory(const void *address, size_t size); + +/** + * kmsan_check_memory() - Check the memory range for being initialized. + * @address: address to start with. + * @size: size of buffer to check. + * + * If any piece of the given range is marked as uninitialized, KMSAN will report + * an error. + */ +void kmsan_check_memory(const void *address, size_t size); + +/** + * kmsan_copy_to_user() - Notify KMSAN about a data transfer to userspace. + * @to: destination address in the userspace. + * @from: source address in the kernel. + * @to_copy: number of bytes to copy. + * @left: number of bytes not copied. + * + * If this is a real userspace data transfer, KMSAN checks the bytes that were + * actually copied to ensure there was no information leak. If @to belongs to + * the kernel space (which is possible for compat syscalls), KMSAN just copies + * the metadata. + */ +void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy, + size_t left); + +#else + +static inline void kmsan_poison_memory(const void *address, size_t size, + gfp_t flags) +{ +} +static inline void kmsan_unpoison_memory(const void *address, size_t size) +{ +} +static inline void kmsan_check_memory(const void *address, size_t size) +{ +} +static inline void kmsan_copy_to_user(void __user *to, const void *from, + size_t to_copy, size_t left) +{ +} + +#endif + +#endif /* _LINUX_KMSAN_CHECKS_H */ diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h new file mode 100644 index 000000000000..e38ae3c34618 --- /dev/null +++ b/include/linux/kmsan.h @@ -0,0 +1,330 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * KMSAN API for subsystems. + * + * Copyright (C) 2017-2022 Google LLC + * Author: Alexander Potapenko <glider@google.com> + * + */ +#ifndef _LINUX_KMSAN_H +#define _LINUX_KMSAN_H + +#include <linux/dma-direction.h> +#include <linux/gfp.h> +#include <linux/kmsan-checks.h> +#include <linux/types.h> + +struct page; +struct kmem_cache; +struct task_struct; +struct scatterlist; +struct urb; + +#ifdef CONFIG_KMSAN + +/** + * kmsan_task_create() - Initialize KMSAN state for the task. + * @task: task to initialize. + */ +void kmsan_task_create(struct task_struct *task); + +/** + * kmsan_task_exit() - Notify KMSAN that a task has exited. + * @task: task about to finish. + */ +void kmsan_task_exit(struct task_struct *task); + +/** + * kmsan_init_shadow() - Initialize KMSAN shadow at boot time. + * + * Allocate and initialize KMSAN metadata for early allocations. + */ +void __init kmsan_init_shadow(void); + +/** + * kmsan_init_runtime() - Initialize KMSAN state and enable KMSAN. + */ +void __init kmsan_init_runtime(void); + +/** + * kmsan_memblock_free_pages() - handle freeing of memblock pages. + * @page: struct page to free. + * @order: order of @page. + * + * Freed pages are either returned to buddy allocator or held back to be used + * as metadata pages. + */ +bool __init kmsan_memblock_free_pages(struct page *page, unsigned int order); + +/** + * kmsan_alloc_page() - Notify KMSAN about an alloc_pages() call. + * @page: struct page pointer returned by alloc_pages(). + * @order: order of allocated struct page. + * @flags: GFP flags used by alloc_pages() + * + * KMSAN marks 1<<@order pages starting at @page as uninitialized, unless + * @flags contain __GFP_ZERO. + */ +void kmsan_alloc_page(struct page *page, unsigned int order, gfp_t flags); + +/** + * kmsan_free_page() - Notify KMSAN about a free_pages() call. + * @page: struct page pointer passed to free_pages(). + * @order: order of deallocated struct page. + * + * KMSAN marks freed memory as uninitialized. + */ +void kmsan_free_page(struct page *page, unsigned int order); + +/** + * kmsan_copy_page_meta() - Copy KMSAN metadata between two pages. + * @dst: destination page. + * @src: source page. + * + * KMSAN copies the contents of metadata pages for @src into the metadata pages + * for @dst. If @dst has no associated metadata pages, nothing happens. + * If @src has no associated metadata pages, @dst metadata pages are unpoisoned. + */ +void kmsan_copy_page_meta(struct page *dst, struct page *src); + +/** + * kmsan_slab_alloc() - Notify KMSAN about a slab allocation. + * @s: slab cache the object belongs to. + * @object: object pointer. + * @flags: GFP flags passed to the allocator. + * + * Depending on cache flags and GFP flags, KMSAN sets up the metadata of the + * newly created object, marking it as initialized or uninitialized. + */ +void kmsan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); + +/** + * kmsan_slab_free() - Notify KMSAN about a slab deallocation. + * @s: slab cache the object belongs to. + * @object: object pointer. + * + * KMSAN marks the freed object as uninitialized. + */ +void kmsan_slab_free(struct kmem_cache *s, void *object); + +/** + * kmsan_kmalloc_large() - Notify KMSAN about a large slab allocation. + * @ptr: object pointer. + * @size: object size. + * @flags: GFP flags passed to the allocator. + * + * Similar to kmsan_slab_alloc(), but for large allocations. + */ +void kmsan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); + +/** + * kmsan_kfree_large() - Notify KMSAN about a large slab deallocation. + * @ptr: object pointer. + * + * Similar to kmsan_slab_free(), but for large allocations. + */ +void kmsan_kfree_large(const void *ptr); + +/** + * kmsan_map_kernel_range_noflush() - Notify KMSAN about a vmap. + * @start: start of vmapped range. + * @end: end of vmapped range. + * @prot: page protection flags used for vmap. + * @pages: array of pages. + * @page_shift: page_shift passed to vmap_range_noflush(). + * + * KMSAN maps shadow and origin pages of @pages into contiguous ranges in + * vmalloc metadata address range. + */ +void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end, + pgprot_t prot, struct page **pages, + unsigned int page_shift); + +/** + * kmsan_vunmap_kernel_range_noflush() - Notify KMSAN about a vunmap. + * @start: start of vunmapped range. + * @end: end of vunmapped range. + * + * KMSAN unmaps the contiguous metadata ranges created by + * kmsan_map_kernel_range_noflush(). + */ +void kmsan_vunmap_range_noflush(unsigned long start, unsigned long end); + +/** + * kmsan_ioremap_page_range() - Notify KMSAN about a ioremap_page_range() call. + * @addr: range start. + * @end: range end. + * @phys_addr: physical range start. + * @prot: page protection flags used for ioremap_page_range(). + * @page_shift: page_shift argument passed to vmap_range_noflush(). + * + * KMSAN creates new metadata pages for the physical pages mapped into the + * virtual memory. + */ +void kmsan_ioremap_page_range(unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot, + unsigned int page_shift); + +/** + * kmsan_iounmap_page_range() - Notify KMSAN about a iounmap_page_range() call. + * @start: range start. + * @end: range end. + * + * KMSAN unmaps the metadata pages for the given range and, unlike for + * vunmap_page_range(), also deallocates them. + */ +void kmsan_iounmap_page_range(unsigned long start, unsigned long end); + +/** + * kmsan_handle_dma() - Handle a DMA data transfer. + * @page: first page of the buffer. + * @offset: offset of the buffer within the first page. + * @size: buffer size. + * @dir: one of possible dma_data_direction values. + * + * Depending on @direction, KMSAN: + * * checks the buffer, if it is copied to device; + * * initializes the buffer, if it is copied from device; + * * does both, if this is a DMA_BIDIRECTIONAL transfer. + */ +void kmsan_handle_dma(struct page *page, size_t offset, size_t size, + enum dma_data_direction dir); + +/** + * kmsan_handle_dma_sg() - Handle a DMA transfer using scatterlist. + * @sg: scatterlist holding DMA buffers. + * @nents: number of scatterlist entries. + * @dir: one of possible dma_data_direction values. + * + * Depending on @direction, KMSAN: + * * checks the buffers in the scatterlist, if they are copied to device; + * * initializes the buffers, if they are copied from device; + * * does both, if this is a DMA_BIDIRECTIONAL transfer. + */ +void kmsan_handle_dma_sg(struct scatterlist *sg, int nents, + enum dma_data_direction dir); + +/** + * kmsan_handle_urb() - Handle a USB data transfer. + * @urb: struct urb pointer. + * @is_out: data transfer direction (true means output to hardware). + * + * If @is_out is true, KMSAN checks the transfer buffer of @urb. Otherwise, + * KMSAN initializes the transfer buffer. + */ +void kmsan_handle_urb(const struct urb *urb, bool is_out); + +/** + * kmsan_unpoison_entry_regs() - Handle pt_regs in low-level entry code. + * @regs: struct pt_regs pointer received from assembly code. + * + * KMSAN unpoisons the contents of the passed pt_regs, preventing potential + * false positive reports. Unlike kmsan_unpoison_memory(), + * kmsan_unpoison_entry_regs() can be called from the regions where + * kmsan_in_runtime() returns true, which is the case in early entry code. + */ +void kmsan_unpoison_entry_regs(const struct pt_regs *regs); + +#else + +static inline void kmsan_init_shadow(void) +{ +} + +static inline void kmsan_init_runtime(void) +{ +} + +static inline bool kmsan_memblock_free_pages(struct page *page, + unsigned int order) +{ + return true; +} + +static inline void kmsan_task_create(struct task_struct *task) +{ +} + +static inline void kmsan_task_exit(struct task_struct *task) +{ +} + +static inline int kmsan_alloc_page(struct page *page, unsigned int order, + gfp_t flags) +{ + return 0; +} + +static inline void kmsan_free_page(struct page *page, unsigned int order) +{ +} + +static inline void kmsan_copy_page_meta(struct page *dst, struct page *src) +{ +} + +static inline void kmsan_slab_alloc(struct kmem_cache *s, void *object, + gfp_t flags) +{ +} + +static inline void kmsan_slab_free(struct kmem_cache *s, void *object) +{ +} + +static inline void kmsan_kmalloc_large(const void *ptr, size_t size, + gfp_t flags) +{ +} + +static inline void kmsan_kfree_large(const void *ptr) +{ +} + +static inline void kmsan_vmap_pages_range_noflush(unsigned long start, + unsigned long end, + pgprot_t prot, + struct page **pages, + unsigned int page_shift) +{ +} + +static inline void kmsan_vunmap_range_noflush(unsigned long start, + unsigned long end) +{ +} + +static inline void kmsan_ioremap_page_range(unsigned long start, + unsigned long end, + phys_addr_t phys_addr, + pgprot_t prot, + unsigned int page_shift) +{ +} + +static inline void kmsan_iounmap_page_range(unsigned long start, + unsigned long end) +{ +} + +static inline void kmsan_handle_dma(struct page *page, size_t offset, + size_t size, enum dma_data_direction dir) +{ +} + +static inline void kmsan_handle_dma_sg(struct scatterlist *sg, int nents, + enum dma_data_direction dir) +{ +} + +static inline void kmsan_handle_urb(const struct urb *urb, bool is_out) +{ +} + +static inline void kmsan_unpoison_entry_regs(const struct pt_regs *regs) +{ +} + +#endif + +#endif /* _LINUX_KMSAN_H */ diff --git a/include/linux/kmsan_types.h b/include/linux/kmsan_types.h new file mode 100644 index 000000000000..8bfa6c98176d --- /dev/null +++ b/include/linux/kmsan_types.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * A minimal header declaring types added by KMSAN to existing kernel structs. + * + * Copyright (C) 2017-2022 Google LLC + * Author: Alexander Potapenko <glider@google.com> + * + */ +#ifndef _LINUX_KMSAN_TYPES_H +#define _LINUX_KMSAN_TYPES_H + +/* These constants are defined in the MSan LLVM instrumentation pass. */ +#define KMSAN_RETVAL_SIZE 800 +#define KMSAN_PARAM_SIZE 800 + +struct kmsan_context_state { + char param_tls[KMSAN_PARAM_SIZE]; + char retval_tls[KMSAN_RETVAL_SIZE]; + char va_arg_tls[KMSAN_PARAM_SIZE]; + char va_arg_origin_tls[KMSAN_PARAM_SIZE]; + u64 va_arg_overflow_size_tls; + char param_origin_tls[KMSAN_PARAM_SIZE]; + u32 retval_origin_tls; +}; + +#undef KMSAN_PARAM_SIZE +#undef KMSAN_RETVAL_SIZE + +struct kmsan_ctx { + struct kmsan_context_state cstate; + int kmsan_in_runtime; + bool allow_reporting; +}; + +#endif /* _LINUX_KMSAN_TYPES_H */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 55041d2f884d..a0b92be98984 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -103,6 +103,7 @@ struct kprobe { * this flag is only for optimized_kprobe. */ #define KPROBE_FLAG_FTRACE 8 /* probe is using ftrace */ +#define KPROBE_FLAG_ON_FUNC_ENTRY 16 /* probe is on the function entry */ /* Has this kprobe gone ? */ static inline bool kprobe_gone(struct kprobe *p) diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 0b4f17418f64..7e232ba59b86 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -15,9 +15,6 @@ #include <linux/sched.h> #include <linux/sched/coredump.h> -struct stable_node; -struct mem_cgroup; - #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f4519d3689e1..32f259fa5801 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -151,12 +151,11 @@ static inline bool is_error_page(struct page *page) #define KVM_REQUEST_NO_ACTION BIT(10) /* * Architecture-independent vcpu->requests bit members - * Bits 4-7 are reserved for more arch-independent bits. + * Bits 3-7 are reserved for more arch-independent bits. */ #define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_UNBLOCK 2 -#define KVM_REQ_UNHALT 3 #define KVM_REQUEST_ARCH_BASE 8 /* @@ -2248,6 +2247,19 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ /* + * If more than one page is being (un)accounted, @virt must be the address of + * the first page of a block of pages what were allocated together (i.e + * accounted together). + * + * kvm_account_pgtable_pages() is thread-safe because mod_lruvec_page_state() + * is thread-safe. + */ +static inline void kvm_account_pgtable_pages(void *virt, int nr) +{ + mod_lruvec_page_state(virt_to_page(virt), NR_SECONDARY_PAGETABLE, nr); +} + +/* * This defines how many reserved entries we want to keep before we * kick the vcpu to the userspace to avoid dirty ring full. This * value can be tuned to higher if e.g. PML is enabled on the host. diff --git a/include/linux/libata.h b/include/linux/libata.h index 698032e5ef2d..fe990176e6ee 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -101,7 +101,7 @@ enum { ATA_DFLAG_UNLOCK_HPA = (1 << 18), /* unlock HPA */ ATA_DFLAG_NCQ_SEND_RECV = (1 << 19), /* device supports NCQ SEND and RECV */ ATA_DFLAG_NCQ_PRIO = (1 << 20), /* device supports NCQ priority */ - ATA_DFLAG_NCQ_PRIO_ENABLE = (1 << 21), /* Priority cmds sent to dev */ + ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 21), /* Priority cmds sent to dev */ ATA_DFLAG_INIT_MASK = (1 << 24) - 1, ATA_DFLAG_DETACH = (1 << 24), @@ -1136,8 +1136,8 @@ extern int ata_scsi_slave_config(struct scsi_device *sdev); extern void ata_scsi_slave_destroy(struct scsi_device *sdev); extern int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth); -extern int __ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev, - int queue_depth); +extern int ata_change_queue_depth(struct ata_port *ap, struct ata_device *dev, + struct scsi_device *sdev, int queue_depth); extern struct ata_device *ata_dev_pair(struct ata_device *adev); extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev); extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap); diff --git a/include/linux/linear_range.h b/include/linux/linear_range.h index fd3d0b358f22..2e4f4c3539c0 100644 --- a/include/linux/linear_range.h +++ b/include/linux/linear_range.h @@ -26,6 +26,17 @@ struct linear_range { unsigned int step; }; +#define LINEAR_RANGE(_min, _min_sel, _max_sel, _step) \ + { \ + .min = _min, \ + .min_sel = _min_sel, \ + .max_sel = _max_sel, \ + .step = _step, \ + } + +#define LINEAR_RANGE_IDX(_idx, _min, _min_sel, _max_sel, _step) \ + [_idx] = LINEAR_RANGE(_min, _min_sel, _max_sel, _step) + unsigned int linear_range_values_in_range(const struct linear_range *r); unsigned int linear_range_values_in_range_array(const struct linear_range *r, int ranges); diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 60fff133c0b1..ec119da1d89b 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -224,6 +224,7 @@ LSM_HOOK(int, -ENOSYS, task_prctl, int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p, struct inode *inode) +LSM_HOOK(int, 0, userns_create, const struct cred *cred) LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag) LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp, u32 *secid) @@ -253,7 +254,7 @@ LSM_HOOK(int, 0, sem_semop, struct kern_ipc_perm *perm, struct sembuf *sops, LSM_HOOK(int, 0, netlink_send, struct sock *sk, struct sk_buff *skb) LSM_HOOK(void, LSM_RET_VOID, d_instantiate, struct dentry *dentry, struct inode *inode) -LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, char *name, +LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, const char *name, char **value) LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) LSM_HOOK(int, 0, ismaclabel, const char *name) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 3aa6030302f5..4ec80b96c22e 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -806,6 +806,10 @@ * security attributes, e.g. for /proc/pid inodes. * @p contains the task_struct for the task. * @inode contains the inode structure for the inode. + * @userns_create: + * Check permission prior to creating a new user namespace. + * @cred points to prepared creds. + * Return 0 if successful, otherwise < 0 error code. * * Security hooks for Netlink messaging. * diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h new file mode 100644 index 000000000000..2effab72add1 --- /dev/null +++ b/include/linux/maple_tree.h @@ -0,0 +1,685 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef _LINUX_MAPLE_TREE_H +#define _LINUX_MAPLE_TREE_H +/* + * Maple Tree - An RCU-safe adaptive tree for storing ranges + * Copyright (c) 2018-2022 Oracle + * Authors: Liam R. Howlett <Liam.Howlett@Oracle.com> + * Matthew Wilcox <willy@infradead.org> + */ + +#include <linux/kernel.h> +#include <linux/rcupdate.h> +#include <linux/spinlock.h> +/* #define CONFIG_MAPLE_RCU_DISABLED */ +/* #define CONFIG_DEBUG_MAPLE_TREE_VERBOSE */ + +/* + * Allocated nodes are mutable until they have been inserted into the tree, + * at which time they cannot change their type until they have been removed + * from the tree and an RCU grace period has passed. + * + * Removed nodes have their ->parent set to point to themselves. RCU readers + * check ->parent before relying on the value that they loaded from the + * slots array. This lets us reuse the slots array for the RCU head. + * + * Nodes in the tree point to their parent unless bit 0 is set. + */ +#if defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) +/* 64bit sizes */ +#define MAPLE_NODE_SLOTS 31 /* 256 bytes including ->parent */ +#define MAPLE_RANGE64_SLOTS 16 /* 256 bytes */ +#define MAPLE_ARANGE64_SLOTS 10 /* 240 bytes */ +#define MAPLE_ARANGE64_META_MAX 15 /* Out of range for metadata */ +#define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 1) +#else +/* 32bit sizes */ +#define MAPLE_NODE_SLOTS 63 /* 256 bytes including ->parent */ +#define MAPLE_RANGE64_SLOTS 32 /* 256 bytes */ +#define MAPLE_ARANGE64_SLOTS 21 /* 240 bytes */ +#define MAPLE_ARANGE64_META_MAX 31 /* Out of range for metadata */ +#define MAPLE_ALLOC_SLOTS (MAPLE_NODE_SLOTS - 2) +#endif /* defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) */ + +#define MAPLE_NODE_MASK 255UL + +/* + * The node->parent of the root node has bit 0 set and the rest of the pointer + * is a pointer to the tree itself. No more bits are available in this pointer + * (on m68k, the data structure may only be 2-byte aligned). + * + * Internal non-root nodes can only have maple_range_* nodes as parents. The + * parent pointer is 256B aligned like all other tree nodes. When storing a 32 + * or 64 bit values, the offset can fit into 4 bits. The 16 bit values need an + * extra bit to store the offset. This extra bit comes from a reuse of the last + * bit in the node type. This is possible by using bit 1 to indicate if bit 2 + * is part of the type or the slot. + * + * Once the type is decided, the decision of an allocation range type or a range + * type is done by examining the immutable tree flag for the MAPLE_ALLOC_RANGE + * flag. + * + * Node types: + * 0x??1 = Root + * 0x?00 = 16 bit nodes + * 0x010 = 32 bit nodes + * 0x110 = 64 bit nodes + * + * Slot size and location in the parent pointer: + * type : slot location + * 0x??1 : Root + * 0x?00 : 16 bit values, type in 0-1, slot in 2-6 + * 0x010 : 32 bit values, type in 0-2, slot in 3-6 + * 0x110 : 64 bit values, type in 0-2, slot in 3-6 + */ + +/* + * This metadata is used to optimize the gap updating code and in reverse + * searching for gaps or any other code that needs to find the end of the data. + */ +struct maple_metadata { + unsigned char end; + unsigned char gap; +}; + +/* + * Leaf nodes do not store pointers to nodes, they store user data. Users may + * store almost any bit pattern. As noted above, the optimisation of storing an + * entry at 0 in the root pointer cannot be done for data which have the bottom + * two bits set to '10'. We also reserve values with the bottom two bits set to + * '10' which are below 4096 (ie 2, 6, 10 .. 4094) for internal use. Some APIs + * return errnos as a negative errno shifted right by two bits and the bottom + * two bits set to '10', and while choosing to store these values in the array + * is not an error, it may lead to confusion if you're testing for an error with + * mas_is_err(). + * + * Non-leaf nodes store the type of the node pointed to (enum maple_type in bits + * 3-6), bit 2 is reserved. That leaves bits 0-1 unused for now. + * + * In regular B-Tree terms, pivots are called keys. The term pivot is used to + * indicate that the tree is specifying ranges, Pivots may appear in the + * subtree with an entry attached to the value whereas keys are unique to a + * specific position of a B-tree. Pivot values are inclusive of the slot with + * the same index. + */ + +struct maple_range_64 { + struct maple_pnode *parent; + unsigned long pivot[MAPLE_RANGE64_SLOTS - 1]; + union { + void __rcu *slot[MAPLE_RANGE64_SLOTS]; + struct { + void __rcu *pad[MAPLE_RANGE64_SLOTS - 1]; + struct maple_metadata meta; + }; + }; +}; + +/* + * At tree creation time, the user can specify that they're willing to trade off + * storing fewer entries in a tree in return for storing more information in + * each node. + * + * The maple tree supports recording the largest range of NULL entries available + * in this node, also called gaps. This optimises the tree for allocating a + * range. + */ +struct maple_arange_64 { + struct maple_pnode *parent; + unsigned long pivot[MAPLE_ARANGE64_SLOTS - 1]; + void __rcu *slot[MAPLE_ARANGE64_SLOTS]; + unsigned long gap[MAPLE_ARANGE64_SLOTS]; + struct maple_metadata meta; +}; + +struct maple_alloc { + unsigned long total; + unsigned char node_count; + unsigned int request_count; + struct maple_alloc *slot[MAPLE_ALLOC_SLOTS]; +}; + +struct maple_topiary { + struct maple_pnode *parent; + struct maple_enode *next; /* Overlaps the pivot */ +}; + +enum maple_type { + maple_dense, + maple_leaf_64, + maple_range_64, + maple_arange_64, +}; + + +/** + * DOC: Maple tree flags + * + * * MT_FLAGS_ALLOC_RANGE - Track gaps in this tree + * * MT_FLAGS_USE_RCU - Operate in RCU mode + * * MT_FLAGS_HEIGHT_OFFSET - The position of the tree height in the flags + * * MT_FLAGS_HEIGHT_MASK - The mask for the maple tree height value + * * MT_FLAGS_LOCK_MASK - How the mt_lock is used + * * MT_FLAGS_LOCK_IRQ - Acquired irq-safe + * * MT_FLAGS_LOCK_BH - Acquired bh-safe + * * MT_FLAGS_LOCK_EXTERN - mt_lock is not used + * + * MAPLE_HEIGHT_MAX The largest height that can be stored + */ +#define MT_FLAGS_ALLOC_RANGE 0x01 +#define MT_FLAGS_USE_RCU 0x02 +#define MT_FLAGS_HEIGHT_OFFSET 0x02 +#define MT_FLAGS_HEIGHT_MASK 0x7C +#define MT_FLAGS_LOCK_MASK 0x300 +#define MT_FLAGS_LOCK_IRQ 0x100 +#define MT_FLAGS_LOCK_BH 0x200 +#define MT_FLAGS_LOCK_EXTERN 0x300 + +#define MAPLE_HEIGHT_MAX 31 + + +#define MAPLE_NODE_TYPE_MASK 0x0F +#define MAPLE_NODE_TYPE_SHIFT 0x03 + +#define MAPLE_RESERVED_RANGE 4096 + +#ifdef CONFIG_LOCKDEP +typedef struct lockdep_map *lockdep_map_p; +#define mt_lock_is_held(mt) lock_is_held(mt->ma_external_lock) +#define mt_set_external_lock(mt, lock) \ + (mt)->ma_external_lock = &(lock)->dep_map +#else +typedef struct { /* nothing */ } lockdep_map_p; +#define mt_lock_is_held(mt) 1 +#define mt_set_external_lock(mt, lock) do { } while (0) +#endif + +/* + * If the tree contains a single entry at index 0, it is usually stored in + * tree->ma_root. To optimise for the page cache, an entry which ends in '00', + * '01' or '11' is stored in the root, but an entry which ends in '10' will be + * stored in a node. Bits 3-6 are used to store enum maple_type. + * + * The flags are used both to store some immutable information about this tree + * (set at tree creation time) and dynamic information set under the spinlock. + * + * Another use of flags are to indicate global states of the tree. This is the + * case with the MAPLE_USE_RCU flag, which indicates the tree is currently in + * RCU mode. This mode was added to allow the tree to reuse nodes instead of + * re-allocating and RCU freeing nodes when there is a single user. + */ +struct maple_tree { + union { + spinlock_t ma_lock; + lockdep_map_p ma_external_lock; + }; + void __rcu *ma_root; + unsigned int ma_flags; +}; + +/** + * MTREE_INIT() - Initialize a maple tree + * @name: The maple tree name + * @__flags: The maple tree flags + * + */ +#define MTREE_INIT(name, __flags) { \ + .ma_lock = __SPIN_LOCK_UNLOCKED((name).ma_lock), \ + .ma_flags = __flags, \ + .ma_root = NULL, \ +} + +/** + * MTREE_INIT_EXT() - Initialize a maple tree with an external lock. + * @name: The tree name + * @__flags: The maple tree flags + * @__lock: The external lock + */ +#ifdef CONFIG_LOCKDEP +#define MTREE_INIT_EXT(name, __flags, __lock) { \ + .ma_external_lock = &(__lock).dep_map, \ + .ma_flags = (__flags), \ + .ma_root = NULL, \ +} +#else +#define MTREE_INIT_EXT(name, __flags, __lock) MTREE_INIT(name, __flags) +#endif + +#define DEFINE_MTREE(name) \ + struct maple_tree name = MTREE_INIT(name, 0) + +#define mtree_lock(mt) spin_lock((&(mt)->ma_lock)) +#define mtree_unlock(mt) spin_unlock((&(mt)->ma_lock)) + +/* + * The Maple Tree squeezes various bits in at various points which aren't + * necessarily obvious. Usually, this is done by observing that pointers are + * N-byte aligned and thus the bottom log_2(N) bits are available for use. We + * don't use the high bits of pointers to store additional information because + * we don't know what bits are unused on any given architecture. + * + * Nodes are 256 bytes in size and are also aligned to 256 bytes, giving us 8 + * low bits for our own purposes. Nodes are currently of 4 types: + * 1. Single pointer (Range is 0-0) + * 2. Non-leaf Allocation Range nodes + * 3. Non-leaf Range nodes + * 4. Leaf Range nodes All nodes consist of a number of node slots, + * pivots, and a parent pointer. + */ + +struct maple_node { + union { + struct { + struct maple_pnode *parent; + void __rcu *slot[MAPLE_NODE_SLOTS]; + }; + struct { + void *pad; + struct rcu_head rcu; + struct maple_enode *piv_parent; + unsigned char parent_slot; + enum maple_type type; + unsigned char slot_len; + unsigned int ma_flags; + }; + struct maple_range_64 mr64; + struct maple_arange_64 ma64; + struct maple_alloc alloc; + }; +}; + +/* + * More complicated stores can cause two nodes to become one or three and + * potentially alter the height of the tree. Either half of the tree may need + * to be rebalanced against the other. The ma_topiary struct is used to track + * which nodes have been 'cut' from the tree so that the change can be done + * safely at a later date. This is done to support RCU. + */ +struct ma_topiary { + struct maple_enode *head; + struct maple_enode *tail; + struct maple_tree *mtree; +}; + +void *mtree_load(struct maple_tree *mt, unsigned long index); + +int mtree_insert(struct maple_tree *mt, unsigned long index, + void *entry, gfp_t gfp); +int mtree_insert_range(struct maple_tree *mt, unsigned long first, + unsigned long last, void *entry, gfp_t gfp); +int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, + void *entry, unsigned long size, unsigned long min, + unsigned long max, gfp_t gfp); +int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, + void *entry, unsigned long size, unsigned long min, + unsigned long max, gfp_t gfp); + +int mtree_store_range(struct maple_tree *mt, unsigned long first, + unsigned long last, void *entry, gfp_t gfp); +int mtree_store(struct maple_tree *mt, unsigned long index, + void *entry, gfp_t gfp); +void *mtree_erase(struct maple_tree *mt, unsigned long index); + +void mtree_destroy(struct maple_tree *mt); +void __mt_destroy(struct maple_tree *mt); + +/** + * mtree_empty() - Determine if a tree has any present entries. + * @mt: Maple Tree. + * + * Context: Any context. + * Return: %true if the tree contains only NULL pointers. + */ +static inline bool mtree_empty(const struct maple_tree *mt) +{ + return mt->ma_root == NULL; +} + +/* Advanced API */ + +/* + * The maple state is defined in the struct ma_state and is used to keep track + * of information during operations, and even between operations when using the + * advanced API. + * + * If state->node has bit 0 set then it references a tree location which is not + * a node (eg the root). If bit 1 is set, the rest of the bits are a negative + * errno. Bit 2 (the 'unallocated slots' bit) is clear. Bits 3-6 indicate the + * node type. + * + * state->alloc either has a request number of nodes or an allocated node. If + * stat->alloc has a requested number of nodes, the first bit will be set (0x1) + * and the remaining bits are the value. If state->alloc is a node, then the + * node will be of type maple_alloc. maple_alloc has MAPLE_NODE_SLOTS - 1 for + * storing more allocated nodes, a total number of nodes allocated, and the + * node_count in this node. node_count is the number of allocated nodes in this + * node. The scaling beyond MAPLE_NODE_SLOTS - 1 is handled by storing further + * nodes into state->alloc->slot[0]'s node. Nodes are taken from state->alloc + * by removing a node from the state->alloc node until state->alloc->node_count + * is 1, when state->alloc is returned and the state->alloc->slot[0] is promoted + * to state->alloc. Nodes are pushed onto state->alloc by putting the current + * state->alloc into the pushed node's slot[0]. + * + * The state also contains the implied min/max of the state->node, the depth of + * this search, and the offset. The implied min/max are either from the parent + * node or are 0-oo for the root node. The depth is incremented or decremented + * every time a node is walked down or up. The offset is the slot/pivot of + * interest in the node - either for reading or writing. + * + * When returning a value the maple state index and last respectively contain + * the start and end of the range for the entry. Ranges are inclusive in the + * Maple Tree. + */ +struct ma_state { + struct maple_tree *tree; /* The tree we're operating in */ + unsigned long index; /* The index we're operating on - range start */ + unsigned long last; /* The last index we're operating on - range end */ + struct maple_enode *node; /* The node containing this entry */ + unsigned long min; /* The minimum index of this node - implied pivot min */ + unsigned long max; /* The maximum index of this node - implied pivot max */ + struct maple_alloc *alloc; /* Allocated nodes for this operation */ + unsigned char depth; /* depth of tree descent during write */ + unsigned char offset; + unsigned char mas_flags; +}; + +struct ma_wr_state { + struct ma_state *mas; + struct maple_node *node; /* Decoded mas->node */ + unsigned long r_min; /* range min */ + unsigned long r_max; /* range max */ + enum maple_type type; /* mas->node type */ + unsigned char offset_end; /* The offset where the write ends */ + unsigned char node_end; /* mas->node end */ + unsigned long *pivots; /* mas->node->pivots pointer */ + unsigned long end_piv; /* The pivot at the offset end */ + void __rcu **slots; /* mas->node->slots pointer */ + void *entry; /* The entry to write */ + void *content; /* The existing entry that is being overwritten */ +}; + +#define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock)) +#define mas_unlock(mas) spin_unlock(&((mas)->tree->ma_lock)) + + +/* + * Special values for ma_state.node. + * MAS_START means we have not searched the tree. + * MAS_ROOT means we have searched the tree and the entry we found lives in + * the root of the tree (ie it has index 0, length 1 and is the only entry in + * the tree). + * MAS_NONE means we have searched the tree and there is no node in the + * tree for this entry. For example, we searched for index 1 in an empty + * tree. Or we have a tree which points to a full leaf node and we + * searched for an entry which is larger than can be contained in that + * leaf node. + * MA_ERROR represents an errno. After dropping the lock and attempting + * to resolve the error, the walk would have to be restarted from the + * top of the tree as the tree may have been modified. + */ +#define MAS_START ((struct maple_enode *)1UL) +#define MAS_ROOT ((struct maple_enode *)5UL) +#define MAS_NONE ((struct maple_enode *)9UL) +#define MAS_PAUSE ((struct maple_enode *)17UL) +#define MA_ERROR(err) \ + ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) + +#define MA_STATE(name, mt, first, end) \ + struct ma_state name = { \ + .tree = mt, \ + .index = first, \ + .last = end, \ + .node = MAS_START, \ + .min = 0, \ + .max = ULONG_MAX, \ + .alloc = NULL, \ + } + +#define MA_WR_STATE(name, ma_state, wr_entry) \ + struct ma_wr_state name = { \ + .mas = ma_state, \ + .content = NULL, \ + .entry = wr_entry, \ + } + +#define MA_TOPIARY(name, tree) \ + struct ma_topiary name = { \ + .head = NULL, \ + .tail = NULL, \ + .mtree = tree, \ + } + +void *mas_walk(struct ma_state *mas); +void *mas_store(struct ma_state *mas, void *entry); +void *mas_erase(struct ma_state *mas); +int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp); +void mas_store_prealloc(struct ma_state *mas, void *entry); +void *mas_find(struct ma_state *mas, unsigned long max); +void *mas_find_rev(struct ma_state *mas, unsigned long min); +int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp); +bool mas_is_err(struct ma_state *mas); + +bool mas_nomem(struct ma_state *mas, gfp_t gfp); +void mas_pause(struct ma_state *mas); +void maple_tree_init(void); +void mas_destroy(struct ma_state *mas); +int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries); + +void *mas_prev(struct ma_state *mas, unsigned long min); +void *mas_next(struct ma_state *mas, unsigned long max); + +int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max, + unsigned long size); + +/* Checks if a mas has not found anything */ +static inline bool mas_is_none(struct ma_state *mas) +{ + return mas->node == MAS_NONE; +} + +/* Checks if a mas has been paused */ +static inline bool mas_is_paused(struct ma_state *mas) +{ + return mas->node == MAS_PAUSE; +} + +void mas_dup_tree(struct ma_state *oldmas, struct ma_state *mas); +void mas_dup_store(struct ma_state *mas, void *entry); + +/* + * This finds an empty area from the highest address to the lowest. + * AKA "Topdown" version, + */ +int mas_empty_area_rev(struct ma_state *mas, unsigned long min, + unsigned long max, unsigned long size); +/** + * mas_reset() - Reset a Maple Tree operation state. + * @mas: Maple Tree operation state. + * + * Resets the error or walk state of the @mas so future walks of the + * array will start from the root. Use this if you have dropped the + * lock and want to reuse the ma_state. + * + * Context: Any context. + */ +static inline void mas_reset(struct ma_state *mas) +{ + mas->node = MAS_START; +} + +/** + * mas_for_each() - Iterate over a range of the maple tree. + * @__mas: Maple Tree operation state (maple_state) + * @__entry: Entry retrieved from the tree + * @__max: maximum index to retrieve from the tree + * + * When returned, mas->index and mas->last will hold the entire range for the + * entry. + * + * Note: may return the zero entry. + * + */ +#define mas_for_each(__mas, __entry, __max) \ + while (((__entry) = mas_find((__mas), (__max))) != NULL) + + +/** + * mas_set_range() - Set up Maple Tree operation state for a different index. + * @mas: Maple Tree operation state. + * @start: New start of range in the Maple Tree. + * @last: New end of range in the Maple Tree. + * + * Move the operation state to refer to a different range. This will + * have the effect of starting a walk from the top; see mas_next() + * to move to an adjacent index. + */ +static inline +void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) +{ + mas->index = start; + mas->last = last; + mas->node = MAS_START; +} + +/** + * mas_set() - Set up Maple Tree operation state for a different index. + * @mas: Maple Tree operation state. + * @index: New index into the Maple Tree. + * + * Move the operation state to refer to a different index. This will + * have the effect of starting a walk from the top; see mas_next() + * to move to an adjacent index. + */ +static inline void mas_set(struct ma_state *mas, unsigned long index) +{ + + mas_set_range(mas, index, index); +} + +static inline bool mt_external_lock(const struct maple_tree *mt) +{ + return (mt->ma_flags & MT_FLAGS_LOCK_MASK) == MT_FLAGS_LOCK_EXTERN; +} + +/** + * mt_init_flags() - Initialise an empty maple tree with flags. + * @mt: Maple Tree + * @flags: maple tree flags. + * + * If you need to initialise a Maple Tree with special flags (eg, an + * allocation tree), use this function. + * + * Context: Any context. + */ +static inline void mt_init_flags(struct maple_tree *mt, unsigned int flags) +{ + mt->ma_flags = flags; + if (!mt_external_lock(mt)) + spin_lock_init(&mt->ma_lock); + rcu_assign_pointer(mt->ma_root, NULL); +} + +/** + * mt_init() - Initialise an empty maple tree. + * @mt: Maple Tree + * + * An empty Maple Tree. + * + * Context: Any context. + */ +static inline void mt_init(struct maple_tree *mt) +{ + mt_init_flags(mt, 0); +} + +static inline bool mt_in_rcu(struct maple_tree *mt) +{ +#ifdef CONFIG_MAPLE_RCU_DISABLED + return false; +#endif + return mt->ma_flags & MT_FLAGS_USE_RCU; +} + +/** + * mt_clear_in_rcu() - Switch the tree to non-RCU mode. + * @mt: The Maple Tree + */ +static inline void mt_clear_in_rcu(struct maple_tree *mt) +{ + if (!mt_in_rcu(mt)) + return; + + if (mt_external_lock(mt)) { + BUG_ON(!mt_lock_is_held(mt)); + mt->ma_flags &= ~MT_FLAGS_USE_RCU; + } else { + mtree_lock(mt); + mt->ma_flags &= ~MT_FLAGS_USE_RCU; + mtree_unlock(mt); + } +} + +/** + * mt_set_in_rcu() - Switch the tree to RCU safe mode. + * @mt: The Maple Tree + */ +static inline void mt_set_in_rcu(struct maple_tree *mt) +{ + if (mt_in_rcu(mt)) + return; + + if (mt_external_lock(mt)) { + BUG_ON(!mt_lock_is_held(mt)); + mt->ma_flags |= MT_FLAGS_USE_RCU; + } else { + mtree_lock(mt); + mt->ma_flags |= MT_FLAGS_USE_RCU; + mtree_unlock(mt); + } +} + +void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max); +void *mt_find_after(struct maple_tree *mt, unsigned long *index, + unsigned long max); +void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min); +void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max); + +/** + * mt_for_each - Iterate over each entry starting at index until max. + * @__tree: The Maple Tree + * @__entry: The current entry + * @__index: The index to update to track the location in the tree + * @__max: The maximum limit for @index + * + * Note: Will not return the zero entry. + */ +#define mt_for_each(__tree, __entry, __index, __max) \ + for (__entry = mt_find(__tree, &(__index), __max); \ + __entry; __entry = mt_find_after(__tree, &(__index), __max)) + + +#ifdef CONFIG_DEBUG_MAPLE_TREE +extern atomic_t maple_tree_tests_run; +extern atomic_t maple_tree_tests_passed; + +void mt_dump(const struct maple_tree *mt); +void mt_validate(struct maple_tree *mt); +#define MT_BUG_ON(__tree, __x) do { \ + atomic_inc(&maple_tree_tests_run); \ + if (__x) { \ + pr_info("BUG at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mt_dump(__tree); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ +} while (0) +#else +#define MT_BUG_ON(__tree, __x) BUG_ON(__x) +#endif /* CONFIG_DEBUG_MAPLE_TREE */ + +#endif /*_LINUX_MAPLE_TREE_H */ diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 47ad3b104d9e..139d05b26f82 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -10,6 +10,9 @@ #ifndef MDEV_H #define MDEV_H +#include <linux/device.h> +#include <linux/uuid.h> + struct mdev_type; struct mdev_device { @@ -20,67 +23,67 @@ struct mdev_device { bool active; }; -static inline struct mdev_device *to_mdev_device(struct device *dev) -{ - return container_of(dev, struct mdev_device, dev); -} +struct mdev_type { + /* set by the driver before calling mdev_register parent: */ + const char *sysfs_name; + const char *pretty_name; -unsigned int mdev_get_type_group_id(struct mdev_device *mdev); -unsigned int mtype_get_type_group_id(struct mdev_type *mtype); -struct device *mtype_get_parent_dev(struct mdev_type *mtype); + /* set by the core, can be used drivers */ + struct mdev_parent *parent; -/* interface for exporting mdev supported type attributes */ -struct mdev_type_attribute { - struct attribute attr; - ssize_t (*show)(struct mdev_type *mtype, - struct mdev_type_attribute *attr, char *buf); - ssize_t (*store)(struct mdev_type *mtype, - struct mdev_type_attribute *attr, const char *buf, - size_t count); + /* internal only */ + struct kobject kobj; + struct kobject *devices_kobj; }; -#define MDEV_TYPE_ATTR(_name, _mode, _show, _store) \ -struct mdev_type_attribute mdev_type_attr_##_name = \ - __ATTR(_name, _mode, _show, _store) -#define MDEV_TYPE_ATTR_RW(_name) \ - struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RW(_name) -#define MDEV_TYPE_ATTR_RO(_name) \ - struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RO(_name) -#define MDEV_TYPE_ATTR_WO(_name) \ - struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_WO(_name) +/* embedded into the struct device that the mdev devices hang off */ +struct mdev_parent { + struct device *dev; + struct mdev_driver *mdev_driver; + struct kset *mdev_types_kset; + /* Synchronize device creation/removal with parent unregistration */ + struct rw_semaphore unreg_sem; + struct mdev_type **types; + unsigned int nr_types; + atomic_t available_instances; +}; + +static inline struct mdev_device *to_mdev_device(struct device *dev) +{ + return container_of(dev, struct mdev_device, dev); +} /** * struct mdev_driver - Mediated device driver + * @device_api: string to return for the device_api sysfs + * @max_instances: maximum number of instances supported (optional) * @probe: called when new device created * @remove: called when device removed - * @supported_type_groups: Attributes to define supported types. It is mandatory - * to provide supported types. + * @get_available: Return the max number of instances that can be created + * @show_description: Print a description of the mtype * @driver: device driver structure - * **/ struct mdev_driver { + const char *device_api; + unsigned int max_instances; int (*probe)(struct mdev_device *dev); void (*remove)(struct mdev_device *dev); - struct attribute_group **supported_type_groups; + unsigned int (*get_available)(struct mdev_type *mtype); + ssize_t (*show_description)(struct mdev_type *mtype, char *buf); struct device_driver driver; }; -extern struct bus_type mdev_bus_type; - -int mdev_register_device(struct device *dev, struct mdev_driver *mdev_driver); -void mdev_unregister_device(struct device *dev); +int mdev_register_parent(struct mdev_parent *parent, struct device *dev, + struct mdev_driver *mdev_driver, struct mdev_type **types, + unsigned int nr_types); +void mdev_unregister_parent(struct mdev_parent *parent); int mdev_register_driver(struct mdev_driver *drv); void mdev_unregister_driver(struct mdev_driver *drv); -struct device *mdev_parent_dev(struct mdev_device *mdev); static inline struct device *mdev_dev(struct mdev_device *mdev) { return &mdev->dev; } -static inline struct mdev_device *mdev_from_dev(struct device *dev) -{ - return dev->bus == &mdev_bus_type ? to_mdev_device(dev) : NULL; -} #endif /* MDEV_H */ diff --git a/include/linux/mdio/mdio-i2c.h b/include/linux/mdio/mdio-i2c.h index b1d27f7cd23f..65b550a6fc32 100644 --- a/include/linux/mdio/mdio-i2c.h +++ b/include/linux/mdio/mdio-i2c.h @@ -11,6 +11,14 @@ struct device; struct i2c_adapter; struct mii_bus; -struct mii_bus *mdio_i2c_alloc(struct device *parent, struct i2c_adapter *i2c); +enum mdio_i2c_proto { + MDIO_I2C_NONE, + MDIO_I2C_MARVELL_C22, + MDIO_I2C_C45, + MDIO_I2C_ROLLBALL, +}; + +struct mii_bus *mdio_i2c_alloc(struct device *parent, struct i2c_adapter *i2c, + enum mdio_i2c_proto protocol); #endif diff --git a/include/linux/mei_aux.h b/include/linux/mei_aux.h index 587f25128848..506912ad363b 100644 --- a/include/linux/mei_aux.h +++ b/include/linux/mei_aux.h @@ -7,10 +7,22 @@ #include <linux/auxiliary_bus.h> +/** + * struct mei_aux_device - mei auxiliary device + * @aux_dev: - auxiliary device object + * @irq: interrupt driving the mei auxiliary device + * @bar: mmio resource bar reserved to mei auxiliary device + * @ext_op_mem: resource for extend operational memory + * used in graphics PXP mode. + * @slow_firmware: The device has slow underlying firmware. + * Such firmware will require to use larger operation timeouts. + */ struct mei_aux_device { struct auxiliary_device aux_dev; int irq; struct resource bar; + struct resource ext_op_mem; + bool slow_firmware; }; #define auxiliary_dev_to_mei_aux_dev(auxiliary_dev) \ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6257867fbf95..e1644a24009c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -80,29 +80,8 @@ enum mem_cgroup_events_target { MEM_CGROUP_NTARGETS, }; -struct memcg_vmstats_percpu { - /* Local (CPU and cgroup) page state & events */ - long state[MEMCG_NR_STAT]; - unsigned long events[NR_VM_EVENT_ITEMS]; - - /* Delta calculation for lockless upward propagation */ - long state_prev[MEMCG_NR_STAT]; - unsigned long events_prev[NR_VM_EVENT_ITEMS]; - - /* Cgroup1: threshold notifications & softlimit tree updates */ - unsigned long nr_page_events; - unsigned long targets[MEM_CGROUP_NTARGETS]; -}; - -struct memcg_vmstats { - /* Aggregated (CPU and subtree) page state & events */ - long state[MEMCG_NR_STAT]; - unsigned long events[NR_VM_EVENT_ITEMS]; - - /* Pending child counts during tree propagation */ - long state_pending[MEMCG_NR_STAT]; - unsigned long events_pending[NR_VM_EVENT_ITEMS]; -}; +struct memcg_vmstats_percpu; +struct memcg_vmstats; struct mem_cgroup_reclaim_iter { struct mem_cgroup *position; @@ -185,15 +164,6 @@ struct mem_cgroup_thresholds { struct mem_cgroup_threshold_ary *spare; }; -#if defined(CONFIG_SMP) -struct memcg_padding { - char x[0]; -} ____cacheline_internodealigned_in_smp; -#define MEMCG_PADDING(name) struct memcg_padding name -#else -#define MEMCG_PADDING(name) -#endif - /* * Remember four most recent foreign writebacks with dirty pages in this * cgroup. Inode sharing is expected to be uncommon and, even if we miss @@ -304,10 +274,10 @@ struct mem_cgroup { spinlock_t move_lock; unsigned long move_lock_flags; - MEMCG_PADDING(_pad1_); + CACHELINE_PADDING(_pad1_); /* memory.stat */ - struct memcg_vmstats vmstats; + struct memcg_vmstats *vmstats; /* memory.events */ atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; @@ -326,7 +296,7 @@ struct mem_cgroup { struct list_head objcg_list; #endif - MEMCG_PADDING(_pad2_); + CACHELINE_PADDING(_pad2_); /* * set > 0 if pages under this cgroup are moving to other cgroup. @@ -350,14 +320,20 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; #endif +#ifdef CONFIG_LRU_GEN + /* per-memcg mm_struct list */ + struct lru_gen_mm_list mm_list; +#endif + struct mem_cgroup_per_node *nodeinfo[]; }; /* - * size of first charge trial. "32" comes from vmscan.c's magic value. - * TODO: maybe necessary to use big numbers in big irons. + * size of first charge trial. + * TODO: maybe necessary to use big numbers in big irons or dynamic based of the + * workload. */ -#define MEMCG_CHARGE_BATCH 32U +#define MEMCG_CHARGE_BATCH 64U extern struct mem_cgroup *root_mem_cgroup; @@ -444,6 +420,7 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio) * - LRU isolation * - lock_page_memcg() * - exclusive reference + * - mem_cgroup_trylock_pages() * * For a kmem folio a caller should hold an rcu read lock to protect memcg * associated with a kmem folio from being released. @@ -505,6 +482,7 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) * - LRU isolation * - lock_page_memcg() * - exclusive reference + * - mem_cgroup_trylock_pages() * * For a kmem page a caller should hold an rcu read lock to protect memcg * associated with a kmem page from being released. @@ -689,7 +667,7 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, return __mem_cgroup_charge(folio, mm, gfp); } -int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, +int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); @@ -959,6 +937,23 @@ void unlock_page_memcg(struct page *page); void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val); +/* try to stablize folio_memcg() for all the pages in a memcg */ +static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) +{ + rcu_read_lock(); + + if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account)) + return true; + + rcu_read_unlock(); + return false; +} + +static inline void mem_cgroup_unlock_pages(void) +{ + rcu_read_unlock(); +} + /* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) @@ -985,15 +980,7 @@ static inline void mod_memcg_page_state(struct page *page, rcu_read_unlock(); } -static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) -{ - long x = READ_ONCE(memcg->vmstats.state[idx]); -#ifdef CONFIG_SMP - if (x < 0) - x = 0; -#endif - return x; -} +unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx); static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) @@ -1238,7 +1225,7 @@ static inline int mem_cgroup_charge(struct folio *folio, return 0; } -static inline int mem_cgroup_swapin_charge_page(struct page *page, +static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry) { return 0; @@ -1433,6 +1420,18 @@ static inline void folio_memcg_unlock(struct folio *folio) { } +static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) +{ + /* to match folio_memcg_rcu() */ + rcu_read_lock(); + return true; +} + +static inline void mem_cgroup_unlock_pages(void) +{ + rcu_read_unlock(); +} + static inline void mem_cgroup_handle_over_high(void) { } @@ -1779,7 +1778,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, { struct mem_cgroup *memcg; - if (mem_cgroup_kmem_disabled()) + if (!memcg_kmem_enabled()) return; rcu_read_lock(); @@ -1788,42 +1787,6 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, rcu_read_unlock(); } -/** - * get_mem_cgroup_from_obj - get a memcg associated with passed kernel object. - * @p: pointer to object from which memcg should be extracted. It can be NULL. - * - * Retrieves the memory group into which the memory of the pointed kernel - * object is accounted. If memcg is found, its reference is taken. - * If a passed kernel object is uncharged, or if proper memcg cannot be found, - * as well as if mem_cgroup is disabled, NULL is returned. - * - * Return: valid memcg pointer with taken reference or NULL. - */ -static inline struct mem_cgroup *get_mem_cgroup_from_obj(void *p) -{ - struct mem_cgroup *memcg; - - rcu_read_lock(); - do { - memcg = mem_cgroup_from_obj(p); - } while (memcg && !css_tryget(&memcg->css)); - rcu_read_unlock(); - return memcg; -} - -/** - * mem_cgroup_or_root - always returns a pointer to a valid memory cgroup. - * @memcg: pointer to a valid memory cgroup or NULL. - * - * If passed argument is not NULL, returns it without any additional checks - * and changes. Otherwise, root_mem_cgroup is returned. - * - * NOTE: root_mem_cgroup can be NULL during early boot. - */ -static inline struct mem_cgroup *mem_cgroup_or_root(struct mem_cgroup *memcg) -{ - return memcg ? memcg : root_mem_cgroup; -} #else static inline bool mem_cgroup_kmem_disabled(void) { @@ -1880,15 +1843,6 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, { } -static inline struct mem_cgroup *get_mem_cgroup_from_obj(void *p) -{ - return NULL; -} - -static inline struct mem_cgroup *mem_cgroup_or_root(struct mem_cgroup *memcg) -{ - return NULL; -} #endif /* CONFIG_MEMCG_KMEM */ #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP) diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h new file mode 100644 index 000000000000..965009aa01d7 --- /dev/null +++ b/include/linux/memory-tiers.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MEMORY_TIERS_H +#define _LINUX_MEMORY_TIERS_H + +#include <linux/types.h> +#include <linux/nodemask.h> +#include <linux/kref.h> +#include <linux/mmzone.h> +/* + * Each tier cover a abstrace distance chunk size of 128 + */ +#define MEMTIER_CHUNK_BITS 7 +#define MEMTIER_CHUNK_SIZE (1 << MEMTIER_CHUNK_BITS) +/* + * Smaller abstract distance values imply faster (higher) memory tiers. Offset + * the DRAM adistance so that we can accommodate devices with a slightly lower + * adistance value (slightly faster) than default DRAM adistance to be part of + * the same memory tier. + */ +#define MEMTIER_ADISTANCE_DRAM ((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1)) +#define MEMTIER_HOTPLUG_PRIO 100 + +struct memory_tier; +struct memory_dev_type { + /* list of memory types that are part of same tier as this type */ + struct list_head tier_sibiling; + /* abstract distance for this specific memory type */ + int adistance; + /* Nodes of same abstract distance */ + nodemask_t nodes; + struct kref kref; +}; + +#ifdef CONFIG_NUMA +extern bool numa_demotion_enabled; +struct memory_dev_type *alloc_memory_type(int adistance); +void destroy_memory_type(struct memory_dev_type *memtype); +void init_node_memory_type(int node, struct memory_dev_type *default_type); +void clear_node_memory_type(int node, struct memory_dev_type *memtype); +#ifdef CONFIG_MIGRATION +int next_demotion_node(int node); +void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); +bool node_is_toptier(int node); +#else +static inline int next_demotion_node(int node) +{ + return NUMA_NO_NODE; +} + +static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets) +{ + *targets = NODE_MASK_NONE; +} + +static inline bool node_is_toptier(int node) +{ + return true; +} +#endif + +#else + +#define numa_demotion_enabled false +/* + * CONFIG_NUMA implementation returns non NULL error. + */ +static inline struct memory_dev_type *alloc_memory_type(int adistance) +{ + return NULL; +} + +static inline void destroy_memory_type(struct memory_dev_type *memtype) +{ + +} + +static inline void init_node_memory_type(int node, struct memory_dev_type *default_type) +{ + +} + +static inline void clear_node_memory_type(int node, struct memory_dev_type *memtype) +{ + +} + +static inline int next_demotion_node(int node) +{ + return NUMA_NO_NODE; +} + +static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets) +{ + *targets = NODE_MASK_NONE; +} + +static inline bool node_is_toptier(int node) +{ + return true; +} +#endif /* CONFIG_NUMA */ +#endif /* _LINUX_MEMORY_TIERS_H */ diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index e0b2209ab71c..9fcbf5706595 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -11,7 +11,6 @@ struct page; struct zone; struct pglist_data; struct mem_section; -struct memory_block; struct memory_group; struct resource; struct vmem_altmap; @@ -44,11 +43,6 @@ extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat); ({ \ memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES); \ }) -/* - * This definition is just for error path in node hotadd. - * For node hotremove, we have to replace this. - */ -#define generic_free_nodedata(pgdat) kfree(pgdat) extern pg_data_t *node_data[]; static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) @@ -64,9 +58,6 @@ static inline pg_data_t *generic_alloc_nodedata(int nid) BUG(); return NULL; } -static inline void generic_free_nodedata(pg_data_t *pgdat) -{ -} static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) { } @@ -216,6 +207,22 @@ void put_online_mems(void); void mem_hotplug_begin(void); void mem_hotplug_done(void); +/* See kswapd_is_running() */ +static inline void pgdat_kswapd_lock(pg_data_t *pgdat) +{ + mutex_lock(&pgdat->kswapd_lock); +} + +static inline void pgdat_kswapd_unlock(pg_data_t *pgdat) +{ + mutex_unlock(&pgdat->kswapd_lock); +} + +static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat) +{ + mutex_init(&pgdat->kswapd_lock); +} + #else /* ! CONFIG_MEMORY_HOTPLUG */ #define pfn_to_online_page(pfn) \ ({ \ @@ -252,6 +259,10 @@ static inline bool movable_node_is_enabled(void) { return false; } + +static inline void pgdat_kswapd_lock(pg_data_t *pgdat) {} +static inline void pgdat_kswapd_unlock(pg_data_t *pgdat) {} +static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat) {} #endif /* ! CONFIG_MEMORY_HOTPLUG */ /* @@ -333,7 +344,6 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, extern void remove_pfn_range_from_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); -extern bool is_memblock_offlined(struct memory_block *mem); extern int sparse_add_section(int nid, unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap, struct dev_pagemap *pgmap); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 668389b4b53d..d232de7cdc56 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -151,13 +151,6 @@ extern bool mempolicy_in_oom_domain(struct task_struct *tsk, const nodemask_t *mask); extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy); -static inline nodemask_t *policy_nodemask_current(gfp_t gfp) -{ - struct mempolicy *mpol = get_task_policy(current); - - return policy_nodemask(gfp, mpol); -} - extern unsigned int mempolicy_slab_node(void); extern enum zone_type policy_zone; @@ -189,6 +182,7 @@ static inline bool mpol_is_preferred_many(struct mempolicy *pol) return (pol->mode == MPOL_PREFERRED_MANY); } +extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone); #else @@ -294,11 +288,6 @@ static inline void mpol_put_task_policy(struct task_struct *task) { } -static inline nodemask_t *policy_nodemask_current(gfp_t gfp) -{ - return NULL; -} - static inline bool mpol_is_preferred_many(struct mempolicy *pol) { return false; diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 19010491a603..7fcaf3180a5b 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -139,6 +139,11 @@ struct dev_pagemap { }; }; +static inline bool pgmap_has_memory_failure(struct dev_pagemap *pgmap) +{ + return pgmap->ops && pgmap->ops->memory_failure; +} + static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) { if (pgmap->flags & PGMAP_ALTMAP_VALID) @@ -182,6 +187,7 @@ static inline bool folio_is_device_coherent(const struct folio *folio) } #ifdef CONFIG_ZONE_DEVICE +void zone_device_page_init(struct page *page); void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); diff --git a/include/linux/mfd/ocelot.h b/include/linux/mfd/ocelot.h new file mode 100644 index 000000000000..dd72073d2d4f --- /dev/null +++ b/include/linux/mfd/ocelot.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* Copyright 2022 Innovative Advantage Inc. */ + +#ifndef _LINUX_MFD_OCELOT_H +#define _LINUX_MFD_OCELOT_H + +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/ioport.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/types.h> + +struct resource; + +static inline struct regmap * +ocelot_regmap_from_resource_optional(struct platform_device *pdev, + unsigned int index, + const struct regmap_config *config) +{ + struct device *dev = &pdev->dev; + struct resource *res; + void __iomem *regs; + + /* + * Don't use _get_and_ioremap_resource() here, since that will invoke + * prints of "invalid resource" which will simply add confusion. + */ + res = platform_get_resource(pdev, IORESOURCE_MEM, index); + if (res) { + regs = devm_ioremap_resource(dev, res); + if (IS_ERR(regs)) + return ERR_CAST(regs); + return devm_regmap_init_mmio(dev, regs, config); + } + + /* + * Fall back to using REG and getting the resource from the parent + * device, which is possible in an MFD configuration + */ + if (dev->parent) { + res = platform_get_resource(pdev, IORESOURCE_REG, index); + if (!res) + return NULL; + + return dev_get_regmap(dev->parent, res->name); + } + + return NULL; +} + +static inline struct regmap * +ocelot_regmap_from_resource(struct platform_device *pdev, unsigned int index, + const struct regmap_config *config) +{ + struct regmap *map; + + map = ocelot_regmap_from_resource_optional(pdev, index, config); + return map ?: ERR_PTR(-ENOENT); +} + +#endif diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h index 58602032e642..9af1f3105f80 100644 --- a/include/linux/mfd/rk808.h +++ b/include/linux/mfd/rk808.h @@ -519,6 +519,77 @@ enum rk809_reg_id { #define MIC_DIFF_DIS (0x0 << 7) #define MIC_DIFF_EN (0x1 << 7) +/* RK817 Battery Registers */ +#define RK817_GAS_GAUGE_ADC_CONFIG0 0x50 +#define RK817_GG_EN (0x1 << 7) +#define RK817_SYS_VOL_ADC_EN (0x1 << 6) +#define RK817_TS_ADC_EN (0x1 << 5) +#define RK817_USB_VOL_ADC_EN (0x1 << 4) +#define RK817_BAT_VOL_ADC_EN (0x1 << 3) +#define RK817_BAT_CUR_ADC_EN (0x1 << 2) + +#define RK817_GAS_GAUGE_ADC_CONFIG1 0x55 + +#define RK817_VOL_CUR_CALIB_UPD BIT(7) + +#define RK817_GAS_GAUGE_GG_CON 0x56 +#define RK817_GAS_GAUGE_GG_STS 0x57 + +#define RK817_BAT_CON (0x1 << 4) +#define RK817_RELAX_VOL_UPD (0x3 << 2) +#define RK817_RELAX_STS (0x1 << 1) + +#define RK817_GAS_GAUGE_RELAX_THRE_H 0x58 +#define RK817_GAS_GAUGE_RELAX_THRE_L 0x59 +#define RK817_GAS_GAUGE_OCV_THRE_VOL 0x62 +#define RK817_GAS_GAUGE_OCV_VOL_H 0x63 +#define RK817_GAS_GAUGE_OCV_VOL_L 0x64 +#define RK817_GAS_GAUGE_PWRON_VOL_H 0x6b +#define RK817_GAS_GAUGE_PWRON_VOL_L 0x6c +#define RK817_GAS_GAUGE_PWRON_CUR_H 0x6d +#define RK817_GAS_GAUGE_PWRON_CUR_L 0x6e +#define RK817_GAS_GAUGE_OFF_CNT 0x6f +#define RK817_GAS_GAUGE_Q_INIT_H3 0x70 +#define RK817_GAS_GAUGE_Q_INIT_H2 0x71 +#define RK817_GAS_GAUGE_Q_INIT_L1 0x72 +#define RK817_GAS_GAUGE_Q_INIT_L0 0x73 +#define RK817_GAS_GAUGE_Q_PRES_H3 0x74 +#define RK817_GAS_GAUGE_Q_PRES_H2 0x75 +#define RK817_GAS_GAUGE_Q_PRES_L1 0x76 +#define RK817_GAS_GAUGE_Q_PRES_L0 0x77 +#define RK817_GAS_GAUGE_BAT_VOL_H 0x78 +#define RK817_GAS_GAUGE_BAT_VOL_L 0x79 +#define RK817_GAS_GAUGE_BAT_CUR_H 0x7a +#define RK817_GAS_GAUGE_BAT_CUR_L 0x7b +#define RK817_GAS_GAUGE_USB_VOL_H 0x7e +#define RK817_GAS_GAUGE_USB_VOL_L 0x7f +#define RK817_GAS_GAUGE_SYS_VOL_H 0x80 +#define RK817_GAS_GAUGE_SYS_VOL_L 0x81 +#define RK817_GAS_GAUGE_Q_MAX_H3 0x82 +#define RK817_GAS_GAUGE_Q_MAX_H2 0x83 +#define RK817_GAS_GAUGE_Q_MAX_L1 0x84 +#define RK817_GAS_GAUGE_Q_MAX_L0 0x85 +#define RK817_GAS_GAUGE_SLEEP_CON_SAMP_CUR_H 0x8f +#define RK817_GAS_GAUGE_SLEEP_CON_SAMP_CUR_L 0x90 +#define RK817_GAS_GAUGE_CAL_OFFSET_H 0x91 +#define RK817_GAS_GAUGE_CAL_OFFSET_L 0x92 +#define RK817_GAS_GAUGE_VCALIB0_H 0x93 +#define RK817_GAS_GAUGE_VCALIB0_L 0x94 +#define RK817_GAS_GAUGE_VCALIB1_H 0x95 +#define RK817_GAS_GAUGE_VCALIB1_L 0x96 +#define RK817_GAS_GAUGE_IOFFSET_H 0x97 +#define RK817_GAS_GAUGE_IOFFSET_L 0x98 +#define RK817_GAS_GAUGE_BAT_R1 0x9a +#define RK817_GAS_GAUGE_BAT_R2 0x9b +#define RK817_GAS_GAUGE_BAT_R3 0x9c +#define RK817_GAS_GAUGE_DATA0 0x9d +#define RK817_GAS_GAUGE_DATA1 0x9e +#define RK817_GAS_GAUGE_DATA2 0x9f +#define RK817_GAS_GAUGE_DATA3 0xa0 +#define RK817_GAS_GAUGE_DATA4 0xa1 +#define RK817_GAS_GAUGE_DATA5 0xa2 +#define RK817_GAS_GAUGE_CUR_ADC_K0 0xb0 + #define RK817_POWER_EN_REG(i) (0xb1 + (i)) #define RK817_POWER_SLP_EN_REG(i) (0xb5 + (i)) @@ -544,10 +615,30 @@ enum rk809_reg_id { #define RK817_LDO_ON_VSEL_REG(idx) (0xcc + (idx) * 2) #define RK817_BOOST_OTG_CFG (0xde) +#define RK817_PMIC_CHRG_OUT 0xe4 +#define RK817_CHRG_VOL_SEL (0x07 << 4) +#define RK817_CHRG_CUR_SEL (0x07 << 0) + +#define RK817_PMIC_CHRG_IN 0xe5 +#define RK817_USB_VLIM_EN (0x01 << 7) +#define RK817_USB_VLIM_SEL (0x07 << 4) +#define RK817_USB_ILIM_EN (0x01 << 3) +#define RK817_USB_ILIM_SEL (0x07 << 0) +#define RK817_PMIC_CHRG_TERM 0xe6 +#define RK817_CHRG_TERM_ANA_DIG (0x01 << 2) +#define RK817_CHRG_TERM_ANA_SEL (0x03 << 0) +#define RK817_CHRG_EN (0x01 << 6) + +#define RK817_PMIC_CHRG_STS 0xeb +#define RK817_BAT_EXS BIT(7) +#define RK817_CHG_STS (0x07 << 4) + #define RK817_ID_MSB 0xed #define RK817_ID_LSB 0xee #define RK817_SYS_STS 0xf0 +#define RK817_PLUG_IN_STS (0x1 << 6) + #define RK817_SYS_CFG(i) (0xf1 + (i)) #define RK817_ON_SOURCE_REG 0xf5 diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 22c0a0cf5e0c..3ef77f52a4f0 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -62,6 +62,8 @@ extern const char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION extern void putback_movable_pages(struct list_head *l); +int migrate_folio_extra(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode, int extra_count); int migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, @@ -100,21 +102,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, #endif /* CONFIG_MIGRATION */ -#if defined(CONFIG_MIGRATION) && defined(CONFIG_NUMA) -extern void set_migration_target_nodes(void); -extern void migrate_on_reclaim_init(void); -extern bool numa_demotion_enabled; -extern int next_demotion_node(int node); -#else -static inline void set_migration_target_nodes(void) {} -static inline void migrate_on_reclaim_init(void) {} -static inline int next_demotion_node(int node) -{ - return NUMA_NO_NODE; -} -#define numa_demotion_enabled false -#endif - #ifdef CONFIG_COMPACTION bool PageMovable(struct page *page); void __SetPageMovable(struct page *page, const struct movable_operations *ops); @@ -212,11 +199,24 @@ struct migrate_vma { */ void *pgmap_owner; unsigned long flags; + + /* + * Set to vmf->page if this is being called to migrate a page as part of + * a migrate_to_ram() callback. + */ + struct page *fault_page; }; int migrate_vma_setup(struct migrate_vma *args); void migrate_vma_pages(struct migrate_vma *migrate); void migrate_vma_finalize(struct migrate_vma *migrate); +int migrate_device_range(unsigned long *src_pfns, unsigned long start, + unsigned long npages); +void migrate_device_pages(unsigned long *src_pfns, unsigned long *dst_pfns, + unsigned long npages); +void migrate_device_finalize(unsigned long *src_pfns, + unsigned long *dst_pfns, unsigned long npages); + #endif /* CONFIG_MIGRATION */ #endif /* _LINUX_MIGRATE_H */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index b5f58fd37a0f..1ff91cb79ded 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -325,6 +325,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, MLX5_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, MLX5_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, + MLX5_EVENT_TYPE_OBJECT_CHANGE = 0x27, MLX5_EVENT_TYPE_INTERNAL_ERROR = 0x08, MLX5_EVENT_TYPE_PORT_CHANGE = 0x09, @@ -699,6 +700,12 @@ struct mlx5_eqe_temp_warning { __be64 sensor_warning_lsb; } __packed; +struct mlx5_eqe_obj_change { + u8 rsvd0[2]; + __be16 obj_type; + __be32 obj_id; +} __packed; + #define SYNC_RST_STATE_MASK 0xf enum sync_rst_state_type { @@ -737,6 +744,7 @@ union ev_data { struct mlx5_eqe_xrq_err xrq_err; struct mlx5_eqe_sync_fw_update sync_fw_update; struct mlx5_eqe_vhca_state vhca_state; + struct mlx5_eqe_obj_change obj_change; } __packed; struct mlx5_eqe { @@ -874,12 +882,6 @@ static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe) return cqe->op_own >> 4; } -static inline u8 get_cqe_enhanced_num_mini_cqes(struct mlx5_cqe64 *cqe) -{ - /* num_of_mini_cqes is zero based */ - return get_cqe_opcode(cqe) + 1; -} - static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { return (cqe->lro.tcppsh_abort_dupack >> 6) & 1; @@ -890,11 +892,6 @@ static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe) return (cqe->l4_l3_hdr_type >> 4) & 0x7; } -static inline u8 get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe) -{ - return (cqe->l4_l3_hdr_type >> 2) & 0x3; -} - static inline bool cqe_is_tunneled(struct mlx5_cqe64 *cqe) { return cqe->tls_outer_l3_tunneled & 0x1; @@ -1198,8 +1195,10 @@ enum mlx5_cap_type { MLX5_CAP_DEV_EVENT = 0x14, MLX5_CAP_IPSEC, MLX5_CAP_DEV_SHAMPO = 0x1d, + MLX5_CAP_MACSEC = 0x1f, MLX5_CAP_GENERAL_2 = 0x20, MLX5_CAP_PORT_SELECTION = 0x25, + MLX5_CAP_ADV_VIRTUALIZATION = 0x26, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1365,6 +1364,14 @@ enum mlx5_qcam_feature_groups { MLX5_GET(port_selection_cap, \ mdev->caps.hca[MLX5_CAP_PORT_SELECTION]->max, cap) +#define MLX5_CAP_ADV_VIRTUALIZATION(mdev, cap) \ + MLX5_GET(adv_virtualization_cap, \ + mdev->caps.hca[MLX5_CAP_ADV_VIRTUALIZATION]->cur, cap) + +#define MLX5_CAP_ADV_VIRTUALIZATION_MAX(mdev, cap) \ + MLX5_GET(adv_virtualization_cap, \ + mdev->caps.hca[MLX5_CAP_ADV_VIRTUALIZATION]->max, cap) + #define MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) \ MLX5_CAP_PORT_SELECTION(mdev, flow_table_properties_port_selection.cap) @@ -1446,6 +1453,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_DEV_SHAMPO(mdev, cap)\ MLX5_GET(shampo_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_SHAMPO], cap) +#define MLX5_CAP_MACSEC(mdev, cap)\ + MLX5_GET(macsec_cap, (mdev)->caps.hca[MLX5_CAP_MACSEC]->cur, cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c32de987fa71..a12929bc31b2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -698,6 +698,8 @@ struct mlx5_pps { struct work_struct out_work; u64 start[MAX_PIN_NUM]; u8 enabled; + u64 min_npps_period; + u64 min_out_pulse_duration_ns; }; struct mlx5_timer { @@ -855,11 +857,6 @@ struct mlx5_cmd_work_ent { refcount_t refcnt; }; -struct mlx5_pas { - u64 pa; - u8 log_sz; -}; - enum phy_port_state { MLX5_AAA_111 }; @@ -1016,11 +1013,11 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); -void mlx5_health_flush(struct mlx5_core_dev *dev); void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health); +void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, @@ -1085,8 +1082,6 @@ int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *odp_caps); -int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, - u8 port_num, void *out, size_t sz); int mlx5_init_rl_table(struct mlx5_core_dev *dev); void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); @@ -1153,6 +1148,7 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); bool mlx5_lag_is_roce(struct mlx5_core_dev *dev); bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); +bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev); bool mlx5_lag_is_master(struct mlx5_core_dev *dev); bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); @@ -1293,4 +1289,8 @@ static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev) return mlx5_is_roce_on(dev); } +enum { + MLX5_OCTWORD = 16, +}; + #endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 8e73c377da2c..c7a91981cd5a 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -79,6 +79,7 @@ static inline void build_leftovers_ft_param(int *priority, enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_BYPASS, + MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC, MLX5_FLOW_NAMESPACE_LAG, MLX5_FLOW_NAMESPACE_OFFLOADS, MLX5_FLOW_NAMESPACE_ETHTOOL, @@ -92,7 +93,8 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_SNIFFER_RX, MLX5_FLOW_NAMESPACE_SNIFFER_TX, MLX5_FLOW_NAMESPACE_EGRESS, - MLX5_FLOW_NAMESPACE_EGRESS_KERNEL, + MLX5_FLOW_NAMESPACE_EGRESS_IPSEC, + MLX5_FLOW_NAMESPACE_EGRESS_MACSEC, MLX5_FLOW_NAMESPACE_RDMA_RX, MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL, MLX5_FLOW_NAMESPACE_RDMA_TX, @@ -243,10 +245,10 @@ struct mlx5_flow_act { u32 action; struct mlx5_modify_hdr *modify_hdr; struct mlx5_pkt_reformat *pkt_reformat; - union { - u32 ipsec_obj_id; - uintptr_t esp_id; - }; + struct mlx5_flow_act_crypto_params { + u8 type; + u32 obj_id; + } crypto; u32 flags; struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH]; struct ib_counters *counters; diff --git a/include/linux/mlx5/fs_helpers.h b/include/linux/mlx5/fs_helpers.h index 9db21cd0e92c..bc5125bc0561 100644 --- a/include/linux/mlx5/fs_helpers.h +++ b/include/linux/mlx5/fs_helpers.h @@ -38,46 +38,6 @@ #define MLX5_FS_IPV4_VERSION 4 #define MLX5_FS_IPV6_VERSION 6 -static inline bool mlx5_fs_is_ipsec_flow(const u32 *match_c) -{ - void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, - misc_parameters); - - return MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi); -} - -static inline bool _mlx5_fs_is_outer_ipproto_flow(const u32 *match_c, - const u32 *match_v, u8 match) -{ - const void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - const void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v, - outer_headers); - - return MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_protocol) == 0xff && - MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol) == match; -} - -static inline bool mlx5_fs_is_outer_tcp_flow(const u32 *match_c, - const u32 *match_v) -{ - return _mlx5_fs_is_outer_ipproto_flow(match_c, match_v, IPPROTO_TCP); -} - -static inline bool mlx5_fs_is_outer_udp_flow(const u32 *match_c, - const u32 *match_v) -{ - return _mlx5_fs_is_outer_ipproto_flow(match_c, match_v, IPPROTO_UDP); -} - -static inline bool mlx5_fs_is_vxlan_flow(const u32 *match_c) -{ - void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, - misc_parameters); - - return MLX5_GET(fte_match_set_misc, misc_params_c, vxlan_vni); -} - static inline bool _mlx5_fs_is_outer_ipv_flow(struct mlx5_core_dev *mdev, const u32 *match_c, const u32 *match_v, int version) @@ -131,12 +91,4 @@ mlx5_fs_is_outer_ipv6_flow(struct mlx5_core_dev *mdev, const u32 *match_c, MLX5_FS_IPV6_VERSION); } -static inline bool mlx5_fs_is_outer_ipsec_flow(const u32 *match_c) -{ - void *misc_params_c = - MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); - - return MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi); -} - #endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4acd5610e96b..5a4e914e2a6f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -68,6 +68,7 @@ enum { MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, MLX5_SET_HCA_CAP_OP_MOD_ROCE = 0x4, + MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION = 0x25, }; enum { @@ -82,6 +83,7 @@ enum { MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM = (1ULL << MLX5_OBJ_TYPE_SW_ICM), MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT = (1ULL << 11), MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q = (1ULL << 13), + MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD = (1ULL << 39), }; enum { @@ -89,6 +91,7 @@ enum { MLX5_OBJ_TYPE_VIRTIO_NET_Q = 0x000d, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS = 0x001c, MLX5_OBJ_TYPE_MATCH_DEFINER = 0x0018, + MLX5_OBJ_TYPE_PAGE_TRACK = 0x46, MLX5_OBJ_TYPE_MKEY = 0xff01, MLX5_OBJ_TYPE_QP = 0xff02, MLX5_OBJ_TYPE_PSV = 0xff03, @@ -449,7 +452,12 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_60[0x2]; u8 reformat_insert[0x1]; u8 reformat_remove[0x1]; - u8 reserver_at_64[0x14]; + u8 macsec_encrypt[0x1]; + u8 macsec_decrypt[0x1]; + u8 reserved_at_66[0x2]; + u8 reformat_add_macsec[0x1]; + u8 reformat_remove_macsec[0x1]; + u8 reserved_at_6a[0xe]; u8 log_max_ft_num[0x8]; u8 reserved_at_80[0x10]; @@ -476,6 +484,22 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits { u8 reserved_at_6[0x1a]; }; +struct mlx5_ifc_ipv4_layout_bits { + u8 reserved_at_0[0x60]; + + u8 ipv4[0x20]; +}; + +struct mlx5_ifc_ipv6_layout_bits { + u8 ipv6[16][0x8]; +}; + +union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits { + struct mlx5_ifc_ipv6_layout_bits ipv6_layout; + struct mlx5_ifc_ipv4_layout_bits ipv4_layout; + u8 reserved_at_0[0x80]; +}; + struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 smac_47_16[0x20]; @@ -611,7 +635,11 @@ struct mlx5_ifc_fte_match_set_misc2_bits { u8 metadata_reg_a[0x20]; - u8 reserved_at_1a0[0x60]; + u8 reserved_at_1a0[0x8]; + + u8 macsec_syndrome[0x8]; + + u8 reserved_at_1b0[0x50]; }; struct mlx5_ifc_fte_match_set_misc3_bits { @@ -813,7 +841,9 @@ struct mlx5_ifc_flow_table_nic_cap_bits { struct mlx5_ifc_port_selection_cap_bits { u8 reserved_at_0[0x10]; u8 port_select_flow_table[0x1]; - u8 reserved_at_11[0xf]; + u8 reserved_at_11[0x1]; + u8 port_select_flow_table_bypass[0x1]; + u8 reserved_at_13[0xd]; u8 reserved_at_20[0x1e0]; @@ -1276,6 +1306,24 @@ struct mlx5_ifc_ipsec_cap_bits { u8 reserved_at_30[0x7d0]; }; +struct mlx5_ifc_macsec_cap_bits { + u8 macsec_epn[0x1]; + u8 reserved_at_1[0x2]; + u8 macsec_crypto_esp_aes_gcm_256_encrypt[0x1]; + u8 macsec_crypto_esp_aes_gcm_128_encrypt[0x1]; + u8 macsec_crypto_esp_aes_gcm_256_decrypt[0x1]; + u8 macsec_crypto_esp_aes_gcm_128_decrypt[0x1]; + u8 reserved_at_7[0x4]; + u8 log_max_macsec_offload[0x5]; + u8 reserved_at_10[0x10]; + + u8 min_log_macsec_full_replay_window[0x8]; + u8 max_log_macsec_full_replay_window[0x8]; + u8 reserved_at_30[0x10]; + + u8 reserved_at_40[0x7c0]; +}; + enum { MLX5_WQ_TYPE_LINKED_LIST = 0x0, MLX5_WQ_TYPE_CYCLIC = 0x1, @@ -1443,7 +1491,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_120[0xa]; u8 log_max_ra_req_dc[0x6]; - u8 reserved_at_130[0x9]; + u8 reserved_at_130[0x2]; + u8 eth_wqe_too_small[0x1]; + u8 reserved_at_133[0x6]; u8 vnic_env_cq_overrun[0x1]; u8 log_max_ra_res_dc[0x6]; @@ -1707,7 +1757,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 steering_format_version[0x4]; u8 create_qp_start_hint[0x18]; - u8 reserved_at_460[0x3]; + u8 reserved_at_460[0x1]; + u8 ats[0x1]; + u8 reserved_at_462[0x1]; u8 log_max_uctx[0x5]; u8 reserved_at_468[0x2]; u8 ipsec_offload[0x1]; @@ -1733,7 +1785,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 max_geneve_tlv_options[0x8]; u8 reserved_at_568[0x3]; u8 max_geneve_tlv_option_data_len[0x5]; - u8 reserved_at_570[0x10]; + u8 reserved_at_570[0x9]; + u8 adv_virtualization[0x1]; + u8 reserved_at_57a[0x6]; u8 reserved_at_580[0xb]; u8 log_max_dci_stream_channels[0x5]; @@ -1828,7 +1882,13 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 max_reformat_remove_size[0x8]; u8 max_reformat_remove_offset[0x8]; - u8 reserved_at_c0[0x160]; + u8 reserved_at_c0[0xe0]; + + u8 reserved_at_1a0[0xb]; + u8 log_min_mkey_entity_size[0x5]; + u8 reserved_at_1b0[0x10]; + + u8 reserved_at_1c0[0x60]; u8 reserved_at_220[0x1]; u8 sw_vhca_id_valid[0x1]; @@ -3295,6 +3355,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_device_mem_cap_bits device_mem_cap; struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap; struct mlx5_ifc_shampo_cap_bits shampo_cap; + struct mlx5_ifc_macsec_cap_bits macsec_cap; u8 reserved_at_0[0x8000]; }; @@ -3310,8 +3371,8 @@ enum { MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH = 0x100, MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 = 0x400, MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800, - MLX5_FLOW_CONTEXT_ACTION_IPSEC_DECRYPT = 0x1000, - MLX5_FLOW_CONTEXT_ACTION_IPSEC_ENCRYPT = 0x2000, + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT = 0x1000, + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT = 0x2000, MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO = 0x4000, }; @@ -3321,6 +3382,11 @@ enum { MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT = 0x2, }; +enum { + MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC = 0x0, + MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC = 0x1, +}; + struct mlx5_ifc_vlan_bits { u8 ethtype[0x10]; u8 prio[0x3]; @@ -3374,7 +3440,7 @@ struct mlx5_ifc_flow_context_bits { u8 extended_destination[0x1]; u8 reserved_at_81[0x1]; u8 flow_source[0x2]; - u8 reserved_at_84[0x4]; + u8 encrypt_decrypt_type[0x4]; u8 destination_list_size[0x18]; u8 reserved_at_a0[0x8]; @@ -3386,7 +3452,7 @@ struct mlx5_ifc_flow_context_bits { struct mlx5_ifc_vlan_bits push_vlan_2; - u8 ipsec_obj_id[0x20]; + u8 encrypt_decrypt_obj_id[0x20]; u8 reserved_at_140[0xc0]; struct mlx5_ifc_fte_match_param_bits match_value; @@ -3475,7 +3541,9 @@ struct mlx5_ifc_vnic_diagnostic_statistics_bits { u8 cq_overrun[0x20]; - u8 reserved_at_220[0xde0]; + u8 eth_wqe_too_small[0x20]; + + u8 reserved_at_220[0xdc0]; }; struct mlx5_ifc_traffic_counter_bits { @@ -3873,7 +3941,9 @@ struct mlx5_ifc_mkc_bits { u8 lw[0x1]; u8 lr[0x1]; u8 access_mode_1_0[0x2]; - u8 reserved_at_18[0x8]; + u8 reserved_at_18[0x2]; + u8 ma_translation_mode[0x2]; + u8 reserved_at_1c[0x4]; u8 qpn[0x18]; u8 mkey_7_0[0x8]; @@ -6316,6 +6386,8 @@ enum mlx5_reformat_ctx_type { MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4, MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf, MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10, + MLX5_REFORMAT_TYPE_ADD_MACSEC = 0x11, + MLX5_REFORMAT_TYPE_DEL_MACSEC = 0x12, }; struct mlx5_ifc_alloc_packet_reformat_context_in_bits { @@ -9789,7 +9861,9 @@ struct mlx5_ifc_pcam_reg_bits { struct mlx5_ifc_mcam_enhanced_features_bits { u8 reserved_at_0[0x5d]; u8 mcia_32dwords[0x1]; - u8 reserved_at_5e[0xc]; + u8 out_pulse_duration_ns[0x1]; + u8 npps_period[0x1]; + u8 reserved_at_60[0xa]; u8 reset_state[0x1]; u8 ptpcyc2realtime_modify[0x1]; u8 reserved_at_6c[0x2]; @@ -10289,7 +10363,12 @@ struct mlx5_ifc_mtpps_reg_bits { u8 reserved_at_18[0x4]; u8 cap_max_num_of_pps_out_pins[0x4]; - u8 reserved_at_20[0x24]; + u8 reserved_at_20[0x13]; + u8 cap_log_min_npps_period[0x5]; + u8 reserved_at_38[0x3]; + u8 cap_log_min_out_pulse_duration_ns[0x5]; + + u8 reserved_at_40[0x4]; u8 cap_pin_3_mode[0x4]; u8 reserved_at_48[0x4]; u8 cap_pin_2_mode[0x4]; @@ -10308,7 +10387,9 @@ struct mlx5_ifc_mtpps_reg_bits { u8 cap_pin_4_mode[0x4]; u8 field_select[0x20]; - u8 reserved_at_a0[0x60]; + u8 reserved_at_a0[0x20]; + + u8 npps_period[0x40]; u8 enable[0x1]; u8 reserved_at_101[0xb]; @@ -10317,7 +10398,8 @@ struct mlx5_ifc_mtpps_reg_bits { u8 pin_mode[0x4]; u8 pin[0x8]; - u8 reserved_at_120[0x20]; + u8 reserved_at_120[0x2]; + u8 out_pulse_duration_ns[0x1e]; u8 time_stamp[0x40]; @@ -10920,7 +11002,9 @@ struct mlx5_ifc_lagc_bits { u8 reserved_at_18[0x5]; u8 lag_state[0x3]; - u8 reserved_at_20[0x14]; + u8 reserved_at_20[0xc]; + u8 active_port[0x4]; + u8 reserved_at_30[0x4]; u8 tx_remap_affinity_2[0x4]; u8 reserved_at_38[0x4]; u8 tx_remap_affinity_1[0x4]; @@ -11134,7 +11218,8 @@ struct mlx5_ifc_dealloc_memic_out_bits { struct mlx5_ifc_umem_bits { u8 reserved_at_0[0x80]; - u8 reserved_at_80[0x1b]; + u8 ats[0x1]; + u8 reserved_at_81[0x1a]; u8 log_page_size[0x5]; u8 page_offset[0x20]; @@ -11471,6 +11556,7 @@ enum { MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13, MLX5_GENERAL_OBJECT_TYPES_SAMPLER = 0x20, MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24, + MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27, }; enum { @@ -11521,6 +11607,96 @@ struct mlx5_ifc_modify_ipsec_obj_in_bits { struct mlx5_ifc_ipsec_obj_bits ipsec_object; }; +enum { + MLX5_MACSEC_ASO_REPLAY_PROTECTION = 0x1, +}; + +enum { + MLX5_MACSEC_ASO_REPLAY_WIN_32BIT = 0x0, + MLX5_MACSEC_ASO_REPLAY_WIN_64BIT = 0x1, + MLX5_MACSEC_ASO_REPLAY_WIN_128BIT = 0x2, + MLX5_MACSEC_ASO_REPLAY_WIN_256BIT = 0x3, +}; + +#define MLX5_MACSEC_ASO_INC_SN 0x2 +#define MLX5_MACSEC_ASO_REG_C_4_5 0x2 + +struct mlx5_ifc_macsec_aso_bits { + u8 valid[0x1]; + u8 reserved_at_1[0x1]; + u8 mode[0x2]; + u8 window_size[0x2]; + u8 soft_lifetime_arm[0x1]; + u8 hard_lifetime_arm[0x1]; + u8 remove_flow_enable[0x1]; + u8 epn_event_arm[0x1]; + u8 reserved_at_a[0x16]; + + u8 remove_flow_packet_count[0x20]; + + u8 remove_flow_soft_lifetime[0x20]; + + u8 reserved_at_60[0x80]; + + u8 mode_parameter[0x20]; + + u8 replay_protection_window[8][0x20]; +}; + +struct mlx5_ifc_macsec_offload_obj_bits { + u8 modify_field_select[0x40]; + + u8 confidentiality_en[0x1]; + u8 reserved_at_41[0x1]; + u8 epn_en[0x1]; + u8 epn_overlap[0x1]; + u8 reserved_at_44[0x2]; + u8 confidentiality_offset[0x2]; + u8 reserved_at_48[0x4]; + u8 aso_return_reg[0x4]; + u8 reserved_at_50[0x10]; + + u8 epn_msb[0x20]; + + u8 reserved_at_80[0x8]; + u8 dekn[0x18]; + + u8 reserved_at_a0[0x20]; + + u8 sci[0x40]; + + u8 reserved_at_100[0x8]; + u8 macsec_aso_access_pd[0x18]; + + u8 reserved_at_120[0x60]; + + u8 salt[3][0x20]; + + u8 reserved_at_1e0[0x20]; + + struct mlx5_ifc_macsec_aso_bits macsec_aso; +}; + +struct mlx5_ifc_create_macsec_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + struct mlx5_ifc_macsec_offload_obj_bits macsec_object; +}; + +struct mlx5_ifc_modify_macsec_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + struct mlx5_ifc_macsec_offload_obj_bits macsec_object; +}; + +enum { + MLX5_MODIFY_MACSEC_BITMASK_EPN_OVERLAP = BIT(0), + MLX5_MODIFY_MACSEC_BITMASK_EPN_MSB = BIT(1), +}; + +struct mlx5_ifc_query_macsec_obj_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; + struct mlx5_ifc_macsec_offload_obj_bits macsec_object; +}; + struct mlx5_ifc_encryption_key_obj_bits { u8 modify_field_select[0x40]; @@ -11638,6 +11814,7 @@ enum { enum { MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS = 0x1, MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_IPSEC = 0x2, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_MACSEC = 0x4, }; struct mlx5_ifc_tls_static_params_bits { @@ -11818,4 +11995,82 @@ struct mlx5_ifc_load_vhca_state_out_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_adv_virtualization_cap_bits { + u8 reserved_at_0[0x3]; + u8 pg_track_log_max_num[0x5]; + u8 pg_track_max_num_range[0x8]; + u8 pg_track_log_min_addr_space[0x8]; + u8 pg_track_log_max_addr_space[0x8]; + + u8 reserved_at_20[0x3]; + u8 pg_track_log_min_msg_size[0x5]; + u8 reserved_at_28[0x3]; + u8 pg_track_log_max_msg_size[0x5]; + u8 reserved_at_30[0x3]; + u8 pg_track_log_min_page_size[0x5]; + u8 reserved_at_38[0x3]; + u8 pg_track_log_max_page_size[0x5]; + + u8 reserved_at_40[0x7c0]; +}; + +struct mlx5_ifc_page_track_report_entry_bits { + u8 dirty_address_high[0x20]; + + u8 dirty_address_low[0x20]; +}; + +enum { + MLX5_PAGE_TRACK_STATE_TRACKING, + MLX5_PAGE_TRACK_STATE_REPORTING, + MLX5_PAGE_TRACK_STATE_ERROR, +}; + +struct mlx5_ifc_page_track_range_bits { + u8 start_address[0x40]; + + u8 length[0x40]; +}; + +struct mlx5_ifc_page_track_bits { + u8 modify_field_select[0x40]; + + u8 reserved_at_40[0x10]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x20]; + + u8 state[0x4]; + u8 track_type[0x4]; + u8 log_addr_space_size[0x8]; + u8 reserved_at_90[0x3]; + u8 log_page_size[0x5]; + u8 reserved_at_98[0x3]; + u8 log_msg_size[0x5]; + + u8 reserved_at_a0[0x8]; + u8 reporting_qpn[0x18]; + + u8 reserved_at_c0[0x18]; + u8 num_ranges[0x8]; + + u8 reserved_at_e0[0x20]; + + u8 range_start_address[0x40]; + + u8 length[0x40]; + + struct mlx5_ifc_page_track_range_bits track_range[0]; +}; + +struct mlx5_ifc_create_page_track_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + struct mlx5_ifc_page_track_bits obj_context; +}; + +struct mlx5_ifc_modify_page_track_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + struct mlx5_ifc_page_track_bits obj_context; +}; + #endif /* MLX5_IFC_H */ diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 45c7c0d67635..0596472923ad 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -32,30 +32,6 @@ #ifndef MLX5_IFC_FPGA_H #define MLX5_IFC_FPGA_H -struct mlx5_ifc_ipv4_layout_bits { - u8 reserved_at_0[0x60]; - - u8 ipv4[0x20]; -}; - -struct mlx5_ifc_ipv6_layout_bits { - u8 ipv6[16][0x8]; -}; - -union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits { - struct mlx5_ifc_ipv6_layout_bits ipv6_layout; - struct mlx5_ifc_ipv4_layout_bits ipv4_layout; - u8 reserved_at_0[0x80]; -}; - -enum { - MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX = 0x2c9, -}; - -enum { - MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC = 0x2, -}; - struct mlx5_ifc_fpga_shell_caps_bits { u8 max_num_qps[0x10]; u8 reserved_at_10[0x8]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 8bda3ba5b109..4657d5c54abe 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -162,6 +162,8 @@ enum { MLX5_SEND_WQE_MAX_WQEBBS = 16, }; +#define MLX5_SEND_WQE_MAX_SIZE (MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQE_BB) + enum { MLX5_WQE_FMR_PERM_LOCAL_READ = 1 << 27, MLX5_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28, @@ -252,6 +254,7 @@ enum { enum { MLX5_ETH_WQE_FT_META_IPSEC = BIT(0), + MLX5_ETH_WQE_FT_META_MACSEC = BIT(1), }; struct mlx5_wqe_eth_seg { @@ -475,6 +478,12 @@ struct mlx5_klm { __be64 va; }; +struct mlx5_ksm { + __be32 reserved; + __be32 key; + __be64 va; +}; + struct mlx5_stride_block_entry { __be16 stride; __be16 bcount; diff --git a/include/linux/mm.h b/include/linux/mm.h index 21f8b27bd9fd..8bbcccbc5565 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -661,6 +661,38 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma) return vma->vm_flags & VM_ACCESS_FLAGS; } +static inline +struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max) +{ + return mas_find(&vmi->mas, max); +} + +static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi) +{ + /* + * Uses vma_find() to get the first VMA when the iterator starts. + * Calling mas_next() could skip the first entry. + */ + return vma_find(vmi, ULONG_MAX); +} + +static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi) +{ + return mas_prev(&vmi->mas, 0); +} + +static inline unsigned long vma_iter_addr(struct vma_iterator *vmi) +{ + return vmi->mas.index; +} + +#define for_each_vma(__vmi, __vma) \ + while (((__vma) = vma_next(&(__vmi))) != NULL) + +/* The MM code likes to work with exclusive end addresses */ +#define for_each_vma_range(__vmi, __vma, __end) \ + while (((__vma) = vma_find(&(__vmi), (__end) - 1)) != NULL) + #ifdef CONFIG_SHMEM /* * The vma_is_shmem is not inline because it is used only by slow @@ -697,7 +729,9 @@ static inline unsigned int compound_order(struct page *page) */ static inline unsigned int folio_order(struct folio *folio) { - return compound_order(&folio->page); + if (!folio_test_large(folio)) + return 0; + return folio->_folio_order; } #include <linux/huge_mm.h> @@ -1255,6 +1289,18 @@ static inline int folio_nid(const struct folio *folio) } #ifdef CONFIG_NUMA_BALANCING +/* page access time bits needs to hold at least 4 seconds */ +#define PAGE_ACCESS_TIME_MIN_BITS 12 +#if LAST_CPUPID_SHIFT < PAGE_ACCESS_TIME_MIN_BITS +#define PAGE_ACCESS_TIME_BUCKETS \ + (PAGE_ACCESS_TIME_MIN_BITS - LAST_CPUPID_SHIFT) +#else +#define PAGE_ACCESS_TIME_BUCKETS 0 +#endif + +#define PAGE_ACCESS_TIME_MASK \ + (LAST_CPUPID_MASK << PAGE_ACCESS_TIME_BUCKETS) + static inline int cpu_pid_to_cpupid(int cpu, int pid) { return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK); @@ -1318,12 +1364,25 @@ static inline void page_cpupid_reset_last(struct page *page) page->flags |= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT; } #endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */ + +static inline int xchg_page_access_time(struct page *page, int time) +{ + int last_time; + + last_time = page_cpupid_xchg_last(page, time >> PAGE_ACCESS_TIME_BUCKETS); + return last_time << PAGE_ACCESS_TIME_BUCKETS; +} #else /* !CONFIG_NUMA_BALANCING */ static inline int page_cpupid_xchg_last(struct page *page, int cpupid) { return page_to_nid(page); /* XXX */ } +static inline int xchg_page_access_time(struct page *page, int time) +{ + return 0; +} + static inline int page_cpupid_last(struct page *page) { return page_to_nid(page); /* XXX */ @@ -1465,6 +1524,11 @@ static inline unsigned long folio_pfn(struct folio *folio) return page_to_pfn(&folio->page); } +static inline struct folio *pfn_folio(unsigned long pfn) +{ + return page_folio(pfn_to_page(pfn)); +} + static inline atomic_t *folio_pincount_ptr(struct folio *folio) { return &folio_page(folio, 1)->compound_pincount; @@ -1597,7 +1661,13 @@ static inline void set_page_links(struct page *page, enum zone_type zone, */ static inline long folio_nr_pages(struct folio *folio) { - return compound_nr(&folio->page); + if (!folio_test_large(folio)) + return 1; +#ifdef CONFIG_64BIT + return folio->_folio_nr_pages; +#else + return 1L << folio->_folio_order; +#endif } /** @@ -1776,7 +1846,11 @@ extern void pagefault_out_of_memory(void); */ #define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ -extern void show_free_areas(unsigned int flags, nodemask_t *nodemask); +extern void __show_free_areas(unsigned int flags, nodemask_t *nodemask, int max_zone_idx); +static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodemask) +{ + __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1); +} #ifdef CONFIG_MMU extern bool can_do_mlock(void); @@ -1795,8 +1869,9 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size); -void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, - unsigned long start, unsigned long end); +void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, + struct vm_area_struct *start_vma, unsigned long start, + unsigned long end); struct mmu_notifier_range; @@ -2495,7 +2570,6 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); extern void get_pfn_range_for_nid(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn); -extern unsigned long find_min_pfn_with_active_regions(void); #ifndef CONFIG_NUMA static inline int early_pfn_to_nid(unsigned long pfn) @@ -2516,7 +2590,12 @@ extern void calculate_min_free_kbytes(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); extern void __init mmap_init(void); -extern void show_mem(unsigned int flags, nodemask_t *nodemask); + +extern void __show_mem(unsigned int flags, nodemask_t *nodemask, int max_zone_idx); +static inline void show_mem(unsigned int flags, nodemask_t *nodemask) +{ + __show_mem(flags, nodemask, MAX_NR_ZONES - 1); +} extern long si_mem_available(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); @@ -2593,14 +2672,15 @@ extern int __split_vma(struct mm_struct *, struct vm_area_struct *, extern int split_vma(struct mm_struct *, struct vm_area_struct *, unsigned long addr, int new_below); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); -extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, - struct rb_node **, struct rb_node *); extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff, bool *need_rmap_locks); extern void exit_mmap(struct mm_struct *); +void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas); +void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas); + static inline int check_data_rlimit(unsigned long rlim, unsigned long new, unsigned long start, @@ -2648,8 +2728,9 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr, extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate, struct list_head *uf); -extern int __do_munmap(struct mm_struct *, unsigned long, size_t, - struct list_head *uf, bool downgrade); +extern int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm, + unsigned long start, size_t len, struct list_head *uf, + bool downgrade); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); @@ -2716,26 +2797,12 @@ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long add extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, struct vm_area_struct **pprev); -/** - * find_vma_intersection() - Look up the first VMA which intersects the interval - * @mm: The process address space. - * @start_addr: The inclusive start user address. - * @end_addr: The exclusive end user address. - * - * Returns: The first VMA within the provided range, %NULL otherwise. Assumes - * start_addr < end_addr. +/* + * Look up the first VMA which intersects the interval [start_addr, end_addr) + * NULL if none. Assume start_addr < end_addr. */ -static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm, - unsigned long start_addr, - unsigned long end_addr) -{ - struct vm_area_struct *vma = find_vma(mm, start_addr); - - if (vma && end_addr <= vma->vm_start) - vma = NULL; - return vma; -} + unsigned long start_addr, unsigned long end_addr); /** * vma_lookup() - Find a VMA at a specific address @@ -2747,12 +2814,7 @@ struct vm_area_struct *find_vma_intersection(struct mm_struct *mm, static inline struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) { - struct vm_area_struct *vma = find_vma(mm, addr); - - if (vma && addr < vma->vm_start) - vma = NULL; - - return vma; + return mtree_load(&mm->mm_mt, addr); } static inline unsigned long vm_start_gap(struct vm_area_struct *vma) @@ -2788,7 +2850,7 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma) static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, unsigned long vm_start, unsigned long vm_end) { - struct vm_area_struct *vma = find_vma(mm, vm_start); + struct vm_area_struct *vma = vma_lookup(mm, vm_start); if (vma && (vma->vm_start != vm_start || vma->vm_end != vm_end)) vma = NULL; @@ -2888,7 +2950,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, * and return without waiting upon it */ #define FOLL_NOFAULT 0x80 /* do not fault in pages */ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ -#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ @@ -2975,8 +3036,8 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) * PageAnonExclusive() has to protect against concurrent GUP: * * Ordinary GUP: Using the PT lock * * GUP-fast and fork(): mm->write_protect_seq - * * GUP-fast and KSM or temporary unmapping (swap, migration): - * clear/invalidate+flush of the page table entry + * * GUP-fast and KSM or temporary unmapping (swap, migration): see + * page_try_share_anon_rmap() * * Must be called with the (sub)page that's actually referenced via the * page table entry, which might not necessarily be the head page for a @@ -2997,6 +3058,11 @@ static inline bool gup_must_unshare(unsigned int flags, struct page *page) */ if (!PageAnon(page)) return false; + + /* Paired with a memory barrier in page_try_share_anon_rmap(). */ + if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + smp_rmb(); + /* * Note that PageKsm() pages cannot be exclusive, and consequently, * cannot get pinned. @@ -3004,6 +3070,21 @@ static inline bool gup_must_unshare(unsigned int flags, struct page *page) return !PageAnonExclusive(page); } +/* + * Indicates whether GUP can follow a PROT_NONE mapped page, or whether + * a (NUMA hinting) fault is required. + */ +static inline bool gup_can_follow_protnone(unsigned int flags) +{ + /* + * FOLL_FORCE has to be able to make progress even if the VMA is + * inaccessible. Further, FOLL_FORCE access usually does not represent + * application behaviour and we should avoid triggering NUMA hinting + * faults. + */ + return flags & FOLL_FORCE; +} + typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); @@ -3011,7 +3092,7 @@ extern int apply_to_existing_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); -extern void init_mem_debugging_and_hardening(void); +extern void __init init_mem_debugging_and_hardening(void); #ifdef CONFIG_PAGE_POISONING extern void __kernel_poison_pages(struct page *page, int numpages); extern void __kernel_unpoison_pages(struct page *page, int numpages); diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 7b25b53c474a..e8ed225d8f7c 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -34,15 +34,25 @@ static inline int page_is_file_lru(struct page *page) return folio_is_file_lru(page_folio(page)); } -static __always_inline void update_lru_size(struct lruvec *lruvec, +static __always_inline void __update_lru_size(struct lruvec *lruvec, enum lru_list lru, enum zone_type zid, long nr_pages) { struct pglist_data *pgdat = lruvec_pgdat(lruvec); + lockdep_assert_held(&lruvec->lru_lock); + WARN_ON_ONCE(nr_pages != (int)nr_pages); + __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages); __mod_zone_page_state(&pgdat->node_zones[zid], NR_ZONE_LRU_BASE + lru, nr_pages); +} + +static __always_inline void update_lru_size(struct lruvec *lruvec, + enum lru_list lru, enum zone_type zid, + long nr_pages) +{ + __update_lru_size(lruvec, lru, zid, nr_pages); #ifdef CONFIG_MEMCG mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages); #endif @@ -66,11 +76,6 @@ static __always_inline void __folio_clear_lru_flags(struct folio *folio) __folio_clear_unevictable(folio); } -static __always_inline void __clear_page_lru_flags(struct page *page) -{ - __folio_clear_lru_flags(page_folio(page)); -} - /** * folio_lru_list - Which LRU list should a folio be on? * @folio: The folio to test. @@ -94,11 +99,224 @@ static __always_inline enum lru_list folio_lru_list(struct folio *folio) return lru; } +#ifdef CONFIG_LRU_GEN + +#ifdef CONFIG_LRU_GEN_ENABLED +static inline bool lru_gen_enabled(void) +{ + DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]); + + return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]); +} +#else +static inline bool lru_gen_enabled(void) +{ + DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]); + + return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]); +} +#endif + +static inline bool lru_gen_in_fault(void) +{ + return current->in_lru_fault; +} + +static inline int lru_gen_from_seq(unsigned long seq) +{ + return seq % MAX_NR_GENS; +} + +static inline int lru_hist_from_seq(unsigned long seq) +{ + return seq % NR_HIST_GENS; +} + +static inline int lru_tier_from_refs(int refs) +{ + VM_WARN_ON_ONCE(refs > BIT(LRU_REFS_WIDTH)); + + /* see the comment in folio_lru_refs() */ + return order_base_2(refs + 1); +} + +static inline int folio_lru_refs(struct folio *folio) +{ + unsigned long flags = READ_ONCE(folio->flags); + bool workingset = flags & BIT(PG_workingset); + + /* + * Return the number of accesses beyond PG_referenced, i.e., N-1 if the + * total number of accesses is N>1, since N=0,1 both map to the first + * tier. lru_tier_from_refs() will account for this off-by-one. Also see + * the comment on MAX_NR_TIERS. + */ + return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset; +} + +static inline int folio_lru_gen(struct folio *folio) +{ + unsigned long flags = READ_ONCE(folio->flags); + + return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; +} + +static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen) +{ + unsigned long max_seq = lruvec->lrugen.max_seq; + + VM_WARN_ON_ONCE(gen >= MAX_NR_GENS); + + /* see the comment on MIN_NR_GENS */ + return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1); +} + +static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *folio, + int old_gen, int new_gen) +{ + int type = folio_is_file_lru(folio); + int zone = folio_zonenum(folio); + int delta = folio_nr_pages(folio); + enum lru_list lru = type * LRU_INACTIVE_FILE; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS); + VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS); + VM_WARN_ON_ONCE(old_gen == -1 && new_gen == -1); + + if (old_gen >= 0) + WRITE_ONCE(lrugen->nr_pages[old_gen][type][zone], + lrugen->nr_pages[old_gen][type][zone] - delta); + if (new_gen >= 0) + WRITE_ONCE(lrugen->nr_pages[new_gen][type][zone], + lrugen->nr_pages[new_gen][type][zone] + delta); + + /* addition */ + if (old_gen < 0) { + if (lru_gen_is_active(lruvec, new_gen)) + lru += LRU_ACTIVE; + __update_lru_size(lruvec, lru, zone, delta); + return; + } + + /* deletion */ + if (new_gen < 0) { + if (lru_gen_is_active(lruvec, old_gen)) + lru += LRU_ACTIVE; + __update_lru_size(lruvec, lru, zone, -delta); + return; + } + + /* promotion */ + if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) { + __update_lru_size(lruvec, lru, zone, -delta); + __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta); + } + + /* demotion requires isolation, e.g., lru_deactivate_fn() */ + VM_WARN_ON_ONCE(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen)); +} + +static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + unsigned long seq; + unsigned long flags; + int gen = folio_lru_gen(folio); + int type = folio_is_file_lru(folio); + int zone = folio_zonenum(folio); + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_WARN_ON_ONCE_FOLIO(gen != -1, folio); + + if (folio_test_unevictable(folio) || !lrugen->enabled) + return false; + /* + * There are three common cases for this page: + * 1. If it's hot, e.g., freshly faulted in or previously hot and + * migrated, add it to the youngest generation. + * 2. If it's cold but can't be evicted immediately, i.e., an anon page + * not in swapcache or a dirty page pending writeback, add it to the + * second oldest generation. + * 3. Everything else (clean, cold) is added to the oldest generation. + */ + if (folio_test_active(folio)) + seq = lrugen->max_seq; + else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) || + (folio_test_reclaim(folio) && + (folio_test_dirty(folio) || folio_test_writeback(folio)))) + seq = lrugen->min_seq[type] + 1; + else + seq = lrugen->min_seq[type]; + + gen = lru_gen_from_seq(seq); + flags = (gen + 1UL) << LRU_GEN_PGOFF; + /* see the comment on MIN_NR_GENS about PG_active */ + set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags); + + lru_gen_update_size(lruvec, folio, -1, gen); + /* for folio_rotate_reclaimable() */ + if (reclaiming) + list_add_tail(&folio->lru, &lrugen->lists[gen][type][zone]); + else + list_add(&folio->lru, &lrugen->lists[gen][type][zone]); + + return true; +} + +static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + unsigned long flags; + int gen = folio_lru_gen(folio); + + if (gen < 0) + return false; + + VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio); + VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio); + + /* for folio_migrate_flags() */ + flags = !reclaiming && lru_gen_is_active(lruvec, gen) ? BIT(PG_active) : 0; + flags = set_mask_bits(&folio->flags, LRU_GEN_MASK, flags); + gen = ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; + + lru_gen_update_size(lruvec, folio, gen, -1); + list_del(&folio->lru); + + return true; +} + +#else /* !CONFIG_LRU_GEN */ + +static inline bool lru_gen_enabled(void) +{ + return false; +} + +static inline bool lru_gen_in_fault(void) +{ + return false; +} + +static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + return false; +} + +static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + return false; +} + +#endif /* CONFIG_LRU_GEN */ + static __always_inline void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio) { enum lru_list lru = folio_lru_list(folio); + if (lru_gen_add_folio(lruvec, folio, false)) + return; + update_lru_size(lruvec, lru, folio_zonenum(folio), folio_nr_pages(folio)); if (lru != LRU_UNEVICTABLE) @@ -116,23 +334,23 @@ void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio) { enum lru_list lru = folio_lru_list(folio); + if (lru_gen_add_folio(lruvec, folio, true)) + return; + update_lru_size(lruvec, lru, folio_zonenum(folio), folio_nr_pages(folio)); /* This is not expected to be used on LRU_UNEVICTABLE */ list_add_tail(&folio->lru, &lruvec->lists[lru]); } -static __always_inline void add_page_to_lru_list_tail(struct page *page, - struct lruvec *lruvec) -{ - lruvec_add_folio_tail(lruvec, page_folio(page)); -} - static __always_inline void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio) { enum lru_list lru = folio_lru_list(folio); + if (lru_gen_del_folio(lruvec, folio, false)) + return; + if (lru != LRU_UNEVICTABLE) list_del(&folio->lru); update_lru_size(lruvec, lru, folio_zonenum(folio), diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index cf97f3884fda..500e536796ca 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -9,6 +9,7 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/rbtree.h> +#include <linux/maple_tree.h> #include <linux/rwsem.h> #include <linux/completion.h> #include <linux/cpumask.h> @@ -223,6 +224,18 @@ struct page { not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ +#ifdef CONFIG_KMSAN + /* + * KMSAN metadata for this page: + * - shadow page: every bit indicates whether the corresponding + * bit of the original page is initialized (0) or not (1); + * - origin page: every 4 bytes contain an id of the stack trace + * where the uninitialized value was created. + */ + struct page *kmsan_shadow; + struct page *kmsan_origin; +#endif + #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS int _last_cpupid; #endif @@ -244,6 +257,13 @@ struct page { * @_refcount: Do not access this member directly. Use folio_ref_count() * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. + * @_flags_1: For large folios, additional page flags. + * @__head: Points to the folio. Do not use. + * @_folio_dtor: Which destructor to use for this folio. + * @_folio_order: Do not use directly, call folio_order(). + * @_total_mapcount: Do not use directly, call folio_entire_mapcount(). + * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). + * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). * * A folio is a physically, virtually and logically contiguous set * of bytes. It is a power-of-two in size, and it is aligned to that @@ -282,9 +302,17 @@ struct folio { }; struct page page; }; + unsigned long _flags_1; + unsigned long __head; + unsigned char _folio_dtor; + unsigned char _folio_order; + atomic_t _total_mapcount; + atomic_t _pincount; +#ifdef CONFIG_64BIT + unsigned int _folio_nr_pages; +#endif }; -static_assert(sizeof(struct page) == sizeof(struct folio)); #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl)) FOLIO_MATCH(flags, flags); @@ -299,6 +327,19 @@ FOLIO_MATCH(_refcount, _refcount); FOLIO_MATCH(memcg_data, memcg_data); #endif #undef FOLIO_MATCH +#define FOLIO_MATCH(pg, fl) \ + static_assert(offsetof(struct folio, fl) == \ + offsetof(struct page, pg) + sizeof(struct page)) +FOLIO_MATCH(flags, _flags_1); +FOLIO_MATCH(compound_head, __head); +FOLIO_MATCH(compound_dtor, _folio_dtor); +FOLIO_MATCH(compound_order, _folio_order); +FOLIO_MATCH(compound_mapcount, _total_mapcount); +FOLIO_MATCH(compound_pincount, _pincount); +#ifdef CONFIG_64BIT +FOLIO_MATCH(compound_nr, _folio_nr_pages); +#endif +#undef FOLIO_MATCH static inline atomic_t *folio_mapcount_ptr(struct folio *folio) { @@ -407,21 +448,6 @@ struct vm_area_struct { unsigned long vm_end; /* The first byte after our end address within vm_mm. */ - /* linked list of VM areas per task, sorted by address */ - struct vm_area_struct *vm_next, *vm_prev; - - struct rb_node vm_rb; - - /* - * Largest free memory gap in bytes to the left of this VMA. - * Either between this VMA and vma->vm_prev, or between one of the - * VMAs below us in the VMA rbtree and its ->vm_prev. This helps - * get_unmapped_area find a free area of the right size. - */ - unsigned long rb_subtree_gap; - - /* Second cache line starts here. */ - struct mm_struct *vm_mm; /* The address space we belong to. */ /* @@ -485,9 +511,7 @@ struct vm_area_struct { struct kioctx_table; struct mm_struct { struct { - struct vm_area_struct *mmap; /* list of VMAs */ - struct rb_root mm_rb; - u64 vmacache_seqnum; /* per-thread vmacache */ + struct maple_tree mm_mt; #ifdef CONFIG_MMU unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, @@ -501,7 +525,6 @@ struct mm_struct { unsigned long mmap_compat_legacy_base; #endif unsigned long task_size; /* size of task vm space */ - unsigned long highest_vm_end; /* highest vma end address */ pgd_t * pgd; #ifdef CONFIG_MEMBARRIER @@ -631,22 +654,22 @@ struct mm_struct { #endif #ifdef CONFIG_NUMA_BALANCING /* - * numa_next_scan is the next time that the PTEs will be marked - * pte_numa. NUMA hinting faults will gather statistics and - * migrate pages to new nodes if necessary. + * numa_next_scan is the next time that PTEs will be remapped + * PROT_NONE to trigger NUMA hinting faults; such faults gather + * statistics and migrate pages to new nodes if necessary. */ unsigned long numa_next_scan; - /* Restart point for scanning and setting pte_numa */ + /* Restart point for scanning and remapping PTEs. */ unsigned long numa_scan_offset; - /* numa_scan_seq prevents two threads setting pte_numa */ + /* numa_scan_seq prevents two threads remapping PTEs. */ int numa_scan_seq; #endif /* * An operation with batched TLB flushing is going on. Anything * that can move process memory needs to flush the TLB when - * moving a PROT_NONE or PROT_NUMA mapped page. + * moving a PROT_NONE mapped page. */ atomic_t tlb_flush_pending; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH @@ -671,7 +694,28 @@ struct mm_struct { * merging. */ unsigned long ksm_merging_pages; + /* + * Represent how many pages are checked for ksm merging + * including merged and not merged. + */ + unsigned long ksm_rmap_items; +#endif +#ifdef CONFIG_LRU_GEN + struct { + /* this mm_struct is on lru_gen_mm_list */ + struct list_head list; + /* + * Set when switching to this mm_struct, as a hint of + * whether it has been used since the last time per-node + * page table walkers cleared the corresponding bits. + */ + unsigned long bitmap; +#ifdef CONFIG_MEMCG + /* points to the memcg of "owner" above */ + struct mem_cgroup *memcg; #endif + } lru_gen; +#endif /* CONFIG_LRU_GEN */ } __randomize_layout; /* @@ -681,6 +725,7 @@ struct mm_struct { unsigned long cpu_bitmap[]; }; +#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN) extern struct mm_struct init_mm; /* Pointer magic because the dynamic array size confuses some compilers. */ @@ -698,6 +743,87 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) return (struct cpumask *)&mm->cpu_bitmap; } +#ifdef CONFIG_LRU_GEN + +struct lru_gen_mm_list { + /* mm_struct list for page table walkers */ + struct list_head fifo; + /* protects the list above */ + spinlock_t lock; +}; + +void lru_gen_add_mm(struct mm_struct *mm); +void lru_gen_del_mm(struct mm_struct *mm); +#ifdef CONFIG_MEMCG +void lru_gen_migrate_mm(struct mm_struct *mm); +#endif + +static inline void lru_gen_init_mm(struct mm_struct *mm) +{ + INIT_LIST_HEAD(&mm->lru_gen.list); + mm->lru_gen.bitmap = 0; +#ifdef CONFIG_MEMCG + mm->lru_gen.memcg = NULL; +#endif +} + +static inline void lru_gen_use_mm(struct mm_struct *mm) +{ + /* + * When the bitmap is set, page reclaim knows this mm_struct has been + * used since the last time it cleared the bitmap. So it might be worth + * walking the page tables of this mm_struct to clear the accessed bit. + */ + WRITE_ONCE(mm->lru_gen.bitmap, -1); +} + +#else /* !CONFIG_LRU_GEN */ + +static inline void lru_gen_add_mm(struct mm_struct *mm) +{ +} + +static inline void lru_gen_del_mm(struct mm_struct *mm) +{ +} + +#ifdef CONFIG_MEMCG +static inline void lru_gen_migrate_mm(struct mm_struct *mm) +{ +} +#endif + +static inline void lru_gen_init_mm(struct mm_struct *mm) +{ +} + +static inline void lru_gen_use_mm(struct mm_struct *mm) +{ +} + +#endif /* CONFIG_LRU_GEN */ + +struct vma_iterator { + struct ma_state mas; +}; + +#define VMA_ITERATOR(name, __mm, __addr) \ + struct vma_iterator name = { \ + .mas = { \ + .tree = &(__mm)->mm_mt, \ + .index = __addr, \ + .node = MAS_START, \ + }, \ + } + +static inline void vma_iter_init(struct vma_iterator *vmi, + struct mm_struct *mm, unsigned long addr) +{ + vmi->mas.tree = &mm->mm_mt; + vmi->mas.index = addr; + vmi->mas.node = MAS_START; +} + struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index c1bc6731125c..0bb4b6da9993 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -25,18 +25,6 @@ #define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) /* - * The per task VMA cache array: - */ -#define VMACACHE_BITS 2 -#define VMACACHE_SIZE (1U << VMACACHE_BITS) -#define VMACACHE_MASK (VMACACHE_SIZE - 1) - -struct vmacache { - u64 seqnum; - struct vm_area_struct *vmas[VMACACHE_SIZE]; -}; - -/* * When updating this, please also update struct resident_page_types[] in * kernel/fork.c */ diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 8a30de08e913..c726ea781255 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -293,6 +293,7 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_IRQ_POLLING (1<<11) /* Polling SDIO_CCCR_INTx could create a fake interrupt */ #define MMC_QUIRK_TRIM_BROKEN (1<<12) /* Skip trim */ #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ +#define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ bool reenable_cmdq; /* Re-enable Command Queue */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index eb8bc5b9b0b7..8fdd3cf971a3 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -476,7 +476,7 @@ struct mmc_host { unsigned int sdio_irqs; struct task_struct *sdio_irq_thread; - struct delayed_work sdio_irq_work; + struct work_struct sdio_irq_work; bool sdio_irq_pending; atomic_t sdio_irq_thread_abort; diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 53f0efa0bccf..74f9d9a6d330 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -74,6 +74,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 #define SDIO_DEVICE_ID_BROADCOM_43364 0xa9a4 #define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6 +#define SDIO_DEVICE_ID_BROADCOM_CYPRESS_43439 0xa9af #define SDIO_DEVICE_ID_BROADCOM_43455 0xa9bf #define SDIO_DEVICE_ID_BROADCOM_CYPRESS_43752 0xaae8 diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 15ae78cd2853..b8728d11c949 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -94,6 +94,12 @@ void dump_mm(const struct mm_struct *mm); #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) #endif +#ifdef CONFIG_DEBUG_VM_IRQSOFF +#define VM_WARN_ON_IRQS_ENABLED() WARN_ON_ONCE(!irqs_disabled()) +#else +#define VM_WARN_ON_IRQS_ENABLED() do { } while (0) +#endif + #ifdef CONFIG_DEBUG_VIRTUAL #define VIRTUAL_BUG_ON(cond) BUG_ON(cond) #else diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e24b40c52468..5f74891556f3 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -24,10 +24,10 @@ #include <asm/page.h> /* Free memory management - zoned buddy allocator. */ -#ifndef CONFIG_FORCE_MAX_ZONEORDER +#ifndef CONFIG_ARCH_FORCE_MAX_ORDER #define MAX_ORDER 11 #else -#define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER +#define MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER #endif #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) @@ -121,20 +121,6 @@ static inline bool free_area_empty(struct free_area *area, int migratetype) struct pglist_data; -/* - * Add a wild amount of padding here to ensure data fall into separate - * cachelines. There are very few zone structures in the machine, so space - * consumption is not a concern here. - */ -#if defined(CONFIG_SMP) -struct zone_padding { - char x[0]; -} ____cacheline_internodealigned_in_smp; -#define ZONE_PADDING(name) struct zone_padding name; -#else -#define ZONE_PADDING(name) -#endif - #ifdef CONFIG_NUMA enum numa_stat_item { NUMA_HIT, /* allocated in intended node */ @@ -216,11 +202,13 @@ enum node_stat_item { NR_KERNEL_SCS_KB, /* measured in KiB */ #endif NR_PAGETABLE, /* used for pagetables */ + NR_SECONDARY_PAGETABLE, /* secondary pagetables, e.g. KVM pagetables */ #ifdef CONFIG_SWAP NR_SWAPCACHE, #endif #ifdef CONFIG_NUMA_BALANCING PGPROMOTE_SUCCESS, /* promote successfully */ + PGPROMOTE_CANDIDATE, /* candidate pages to promote */ #endif NR_VM_NODE_STAT_ITEMS }; @@ -306,6 +294,8 @@ static inline bool is_active_lru(enum lru_list lru) return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); } +#define WORKINGSET_ANON 0 +#define WORKINGSET_FILE 1 #define ANON_AND_FILE 2 enum lruvec_flags { @@ -314,6 +304,207 @@ enum lruvec_flags { */ }; +#endif /* !__GENERATING_BOUNDS_H */ + +/* + * Evictable pages are divided into multiple generations. The youngest and the + * oldest generation numbers, max_seq and min_seq, are monotonically increasing. + * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An + * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the + * corresponding generation. The gen counter in folio->flags stores gen+1 while + * a page is on one of lrugen->lists[]. Otherwise it stores 0. + * + * A page is added to the youngest generation on faulting. The aging needs to + * check the accessed bit at least twice before handing this page over to the + * eviction. The first check takes care of the accessed bit set on the initial + * fault; the second check makes sure this page hasn't been used since then. + * This process, AKA second chance, requires a minimum of two generations, + * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive + * LRU, e.g., /proc/vmstat, these two generations are considered active; the + * rest of generations, if they exist, are considered inactive. See + * lru_gen_is_active(). + * + * PG_active is always cleared while a page is on one of lrugen->lists[] so that + * the aging needs not to worry about it. And it's set again when a page + * considered active is isolated for non-reclaiming purposes, e.g., migration. + * See lru_gen_add_folio() and lru_gen_del_folio(). + * + * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the + * number of categories of the active/inactive LRU when keeping track of + * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits + * in folio->flags. + */ +#define MIN_NR_GENS 2U +#define MAX_NR_GENS 4U + +/* + * Each generation is divided into multiple tiers. A page accessed N times + * through file descriptors is in tier order_base_2(N). A page in the first tier + * (N=0,1) is marked by PG_referenced unless it was faulted in through page + * tables or read ahead. A page in any other tier (N>1) is marked by + * PG_referenced and PG_workingset. This implies a minimum of two tiers is + * supported without using additional bits in folio->flags. + * + * In contrast to moving across generations which requires the LRU lock, moving + * across tiers only involves atomic operations on folio->flags and therefore + * has a negligible cost in the buffered access path. In the eviction path, + * comparisons of refaulted/(evicted+protected) from the first tier and the + * rest infer whether pages accessed multiple times through file descriptors + * are statistically hot and thus worth protecting. + * + * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice the + * number of categories of the active/inactive LRU when keeping track of + * accesses through file descriptors. This uses MAX_NR_TIERS-2 spare bits in + * folio->flags. + */ +#define MAX_NR_TIERS 4U + +#ifndef __GENERATING_BOUNDS_H + +struct lruvec; +struct page_vma_mapped_walk; + +#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) +#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) + +#ifdef CONFIG_LRU_GEN + +enum { + LRU_GEN_ANON, + LRU_GEN_FILE, +}; + +enum { + LRU_GEN_CORE, + LRU_GEN_MM_WALK, + LRU_GEN_NONLEAF_YOUNG, + NR_LRU_GEN_CAPS +}; + +#define MIN_LRU_BATCH BITS_PER_LONG +#define MAX_LRU_BATCH (MIN_LRU_BATCH * 64) + +/* whether to keep historical stats from evicted generations */ +#ifdef CONFIG_LRU_GEN_STATS +#define NR_HIST_GENS MAX_NR_GENS +#else +#define NR_HIST_GENS 1U +#endif + +/* + * The youngest generation number is stored in max_seq for both anon and file + * types as they are aged on an equal footing. The oldest generation numbers are + * stored in min_seq[] separately for anon and file types as clean file pages + * can be evicted regardless of swap constraints. + * + * Normally anon and file min_seq are in sync. But if swapping is constrained, + * e.g., out of swap space, file min_seq is allowed to advance and leave anon + * min_seq behind. + * + * The number of pages in each generation is eventually consistent and therefore + * can be transiently negative when reset_batch_size() is pending. + */ +struct lru_gen_struct { + /* the aging increments the youngest generation number */ + unsigned long max_seq; + /* the eviction increments the oldest generation numbers */ + unsigned long min_seq[ANON_AND_FILE]; + /* the birth time of each generation in jiffies */ + unsigned long timestamps[MAX_NR_GENS]; + /* the multi-gen LRU lists, lazily sorted on eviction */ + struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* the multi-gen LRU sizes, eventually consistent */ + long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* the exponential moving average of refaulted */ + unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS]; + /* the exponential moving average of evicted+protected */ + unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS]; + /* the first tier doesn't need protection, hence the minus one */ + unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1]; + /* can be modified without holding the LRU lock */ + atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; + atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; + /* whether the multi-gen LRU is enabled */ + bool enabled; +}; + +enum { + MM_LEAF_TOTAL, /* total leaf entries */ + MM_LEAF_OLD, /* old leaf entries */ + MM_LEAF_YOUNG, /* young leaf entries */ + MM_NONLEAF_TOTAL, /* total non-leaf entries */ + MM_NONLEAF_FOUND, /* non-leaf entries found in Bloom filters */ + MM_NONLEAF_ADDED, /* non-leaf entries added to Bloom filters */ + NR_MM_STATS +}; + +/* double-buffering Bloom filters */ +#define NR_BLOOM_FILTERS 2 + +struct lru_gen_mm_state { + /* set to max_seq after each iteration */ + unsigned long seq; + /* where the current iteration continues (inclusive) */ + struct list_head *head; + /* where the last iteration ended (exclusive) */ + struct list_head *tail; + /* to wait for the last page table walker to finish */ + struct wait_queue_head wait; + /* Bloom filters flip after each iteration */ + unsigned long *filters[NR_BLOOM_FILTERS]; + /* the mm stats for debugging */ + unsigned long stats[NR_HIST_GENS][NR_MM_STATS]; + /* the number of concurrent page table walkers */ + int nr_walkers; +}; + +struct lru_gen_mm_walk { + /* the lruvec under reclaim */ + struct lruvec *lruvec; + /* unstable max_seq from lru_gen_struct */ + unsigned long max_seq; + /* the next address within an mm to scan */ + unsigned long next_addr; + /* to batch promoted pages */ + int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* to batch the mm stats */ + int mm_stats[NR_MM_STATS]; + /* total batched items */ + int batched; + bool can_swap; + bool force_scan; +}; + +void lru_gen_init_lruvec(struct lruvec *lruvec); +void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); + +#ifdef CONFIG_MEMCG +void lru_gen_init_memcg(struct mem_cgroup *memcg); +void lru_gen_exit_memcg(struct mem_cgroup *memcg); +#endif + +#else /* !CONFIG_LRU_GEN */ + +static inline void lru_gen_init_lruvec(struct lruvec *lruvec) +{ +} + +static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +{ +} + +#ifdef CONFIG_MEMCG +static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) +{ +} + +static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) +{ +} +#endif + +#endif /* CONFIG_LRU_GEN */ + struct lruvec { struct list_head lists[NR_LRU_LISTS]; /* per lruvec lru_lock for memcg */ @@ -331,6 +522,12 @@ struct lruvec { unsigned long refaults[ANON_AND_FILE]; /* Various lruvec state flags (enum lruvec_flags) */ unsigned long flags; +#ifdef CONFIG_LRU_GEN + /* evictable pages divided into generations */ + struct lru_gen_struct lrugen; + /* to concurrently iterate lru_gen_mm_list */ + struct lru_gen_mm_state mm_state; +#endif #ifdef CONFIG_MEMCG struct pglist_data *pgdat; #endif @@ -368,13 +565,6 @@ enum zone_watermarks { #define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1)) #define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP) -/* - * Shift to encode migratetype and order in the same integer, with order - * in the least significant bits. - */ -#define NR_PCP_ORDER_WIDTH 8 -#define NR_PCP_ORDER_MASK ((1<<NR_PCP_ORDER_WIDTH) - 1) - #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) @@ -627,7 +817,7 @@ struct zone { int initialized; /* Write-intensive fields used from the page allocator */ - ZONE_PADDING(_pad1_) + CACHELINE_PADDING(_pad1_); /* free areas of different sizes */ struct free_area free_area[MAX_ORDER]; @@ -639,7 +829,7 @@ struct zone { spinlock_t lock; /* Write-intensive fields used by compaction and vmstats. */ - ZONE_PADDING(_pad2_) + CACHELINE_PADDING(_pad2_); /* * When free pages are below this point, additional steps are taken @@ -676,7 +866,7 @@ struct zone { bool contiguous; - ZONE_PADDING(_pad3_) + CACHELINE_PADDING(_pad3_); /* Zone statistics */ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; @@ -746,6 +936,8 @@ static inline bool zone_is_empty(struct zone *zone) #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) #define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) #define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) +#define LRU_GEN_PGOFF (KASAN_TAG_PGOFF - LRU_GEN_WIDTH) +#define LRU_REFS_PGOFF (LRU_GEN_PGOFF - LRU_REFS_WIDTH) /* * Define the bit shifts to access each section. For non-existent @@ -953,8 +1145,10 @@ typedef struct pglist_data { atomic_t nr_writeback_throttled;/* nr of writeback-throttled tasks */ unsigned long nr_reclaim_start; /* nr pages written while throttled * when throttling started. */ - struct task_struct *kswapd; /* Protected by - mem_hotplug_begin/done() */ +#ifdef CONFIG_MEMORY_HOTPLUG + struct mutex kswapd_lock; +#endif + struct task_struct *kswapd; /* Protected by kswapd_lock */ int kswapd_order; enum zone_type kswapd_highest_zoneidx; @@ -982,7 +1176,7 @@ typedef struct pglist_data { #endif /* CONFIG_NUMA */ /* Write-intensive fields used by page reclaim */ - ZONE_PADDING(_pad1_) + CACHELINE_PADDING(_pad1_); #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* @@ -996,6 +1190,21 @@ typedef struct pglist_data { struct deferred_split deferred_split_queue; #endif +#ifdef CONFIG_NUMA_BALANCING + /* start time in ms of current promote rate limit period */ + unsigned int nbp_rl_start; + /* number of promote candidate pages at start time of current rate limit period */ + unsigned long nbp_rl_nr_cand; + /* promote threshold in ms */ + unsigned int nbp_threshold; + /* start time in ms of current promote threshold adjustment period */ + unsigned int nbp_th_start; + /* + * number of promote candidate pages at stat time of current promote + * threshold adjustment period + */ + unsigned long nbp_th_nr_cand; +#endif /* Fields commonly accessed by the page reclaim scanner */ /* @@ -1007,11 +1216,19 @@ typedef struct pglist_data { unsigned long flags; - ZONE_PADDING(_pad2_) +#ifdef CONFIG_LRU_GEN + /* kswap mm walk data */ + struct lru_gen_mm_walk mm_walk; +#endif + + CACHELINE_PADDING(_pad2_); /* Per-node vmstats */ struct per_cpu_nodestat __percpu *per_cpu_nodestats; atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS]; +#ifdef CONFIG_NUMA + struct memory_tier __rcu *memtier; +#endif } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) @@ -1025,11 +1242,6 @@ static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) return pgdat->node_start_pfn + pgdat->node_spanned_pages; } -static inline bool pgdat_is_empty(pg_data_t *pgdat) -{ - return !pgdat->node_start_pfn && !pgdat->node_spanned_pages; -} - #include <linux/memory_hotplug.h> void build_all_zonelists(pg_data_t *pgdat); diff --git a/include/linux/module.h b/include/linux/module.h index 518296ea7f73..ec61fb53979a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -27,7 +27,6 @@ #include <linux/tracepoint-defs.h> #include <linux/srcu.h> #include <linux/static_call_types.h> -#include <linux/cfi.h> #include <linux/percpu.h> #include <asm/module.h> @@ -132,7 +131,7 @@ extern void cleanup_module(void); { return initfn; } \ int init_module(void) __copy(initfn) \ __attribute__((alias(#initfn))); \ - __CFI_ADDRESSABLE(init_module, __initdata); + ___ADDRESSABLE(init_module, __initdata); /* This is only required if you want to be unloadable. */ #define module_exit(exitfn) \ @@ -140,7 +139,7 @@ extern void cleanup_module(void); { return exitfn; } \ void cleanup_module(void) __copy(exitfn) \ __attribute__((alias(#exitfn))); \ - __CFI_ADDRESSABLE(cleanup_module, __exitdata); + ___ADDRESSABLE(cleanup_module, __exitdata); #endif @@ -387,8 +386,9 @@ struct module { const s32 *crcs; unsigned int num_syms; -#ifdef CONFIG_CFI_CLANG - cfi_check_fn cfi_check; +#ifdef CONFIG_ARCH_USES_CFI_TRAPS + s32 *kcfi_traps; + s32 *kcfi_traps_end; #endif /* Kernel parameters. */ diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 6cbbfe94348c..80b8400ab8b2 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -17,7 +17,7 @@ static inline int ip_mroute_opt(int opt) } int ip_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int); -int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *); +int ip_mroute_getsockopt(struct sock *, int, sockptr_t, sockptr_t); int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg); int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); int ip_mr_init(void); @@ -29,8 +29,8 @@ static inline int ip_mroute_setsockopt(struct sock *sock, int optname, return -ENOPROTOOPT; } -static inline int ip_mroute_getsockopt(struct sock *sock, int optname, - char __user *optval, int __user *optlen) +static inline int ip_mroute_getsockopt(struct sock *sk, int optname, + sockptr_t optval, sockptr_t optlen) { return -ENOPROTOOPT; } diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index bc351a85ce9b..8f2b307fb124 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -27,7 +27,7 @@ struct sock; #ifdef CONFIG_IPV6_MROUTE extern int ip6_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int); -extern int ip6_mroute_getsockopt(struct sock *, int, char __user *, int __user *); +extern int ip6_mroute_getsockopt(struct sock *, int, sockptr_t, sockptr_t); extern int ip6_mr_input(struct sk_buff *skb); extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg); extern int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); @@ -42,7 +42,7 @@ static inline int ip6_mroute_setsockopt(struct sock *sock, int optname, static inline int ip6_mroute_getsockopt(struct sock *sock, - int optname, char __user *optval, int __user *optlen) + int optname, sockptr_t optval, sockptr_t optlen) { return -ENOPROTOOPT; } diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 955aee14b0f7..7c58c44662b8 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -40,6 +40,12 @@ struct mtd_erase_region_info { unsigned long *lockmap; /* If keeping bitmap of locks */ }; +struct mtd_req_stats { + unsigned int uncorrectable_errors; + unsigned int corrected_bitflips; + unsigned int max_bitflips; +}; + /** * struct mtd_oob_ops - oob operation operands * @mode: operation mode @@ -70,6 +76,7 @@ struct mtd_oob_ops { uint32_t ooboffs; uint8_t *datbuf; uint8_t *oobbuf; + struct mtd_req_stats *stats; }; /** @@ -677,6 +684,7 @@ extern int mtd_device_unregister(struct mtd_info *master); extern struct mtd_info *get_mtd_device(struct mtd_info *mtd, int num); extern int __get_mtd_device(struct mtd_info *mtd); extern void __put_mtd_device(struct mtd_info *mtd); +extern struct mtd_info *of_get_mtd_device_by_node(struct device_node *np); extern struct mtd_info *get_mtd_device_nm(const char *name); extern void put_mtd_device(struct mtd_info *mtd); diff --git a/include/linux/namei.h b/include/linux/namei.h index caeb08a98536..00fee52df842 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -83,7 +83,7 @@ extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); -extern int __must_check nd_jump_link(struct path *path); +extern int __must_check nd_jump_link(const struct path *path); static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 05d6f3facd5a..a36edb0ec199 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -253,11 +253,17 @@ struct netdev_hw_addr_list { #define netdev_uc_empty(dev) netdev_hw_addr_list_empty(&(dev)->uc) #define netdev_for_each_uc_addr(ha, dev) \ netdev_hw_addr_list_for_each(ha, &(dev)->uc) +#define netdev_for_each_synced_uc_addr(_ha, _dev) \ + netdev_for_each_uc_addr((_ha), (_dev)) \ + if ((_ha)->sync_cnt) #define netdev_mc_count(dev) netdev_hw_addr_list_count(&(dev)->mc) #define netdev_mc_empty(dev) netdev_hw_addr_list_empty(&(dev)->mc) #define netdev_for_each_mc_addr(ha, dev) \ netdev_hw_addr_list_for_each(ha, &(dev)->mc) +#define netdev_for_each_synced_mc_addr(_ha, _dev) \ + netdev_for_each_mc_addr((_ha), (_dev)) \ + if ((_ha)->sync_cnt) struct hh_cache { unsigned int hh_len; @@ -546,8 +552,8 @@ static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n) { unsigned long val, new; + val = READ_ONCE(n->state); do { - val = READ_ONCE(n->state); if (val & NAPIF_STATE_DISABLE) return true; @@ -555,7 +561,7 @@ static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n) return false; new = val | NAPIF_STATE_MISSED; - } while (cmpxchg(&n->state, val, new) != val); + } while (!try_cmpxchg(&n->state, &val, new)); return true; } @@ -934,6 +940,7 @@ struct net_device_path_ctx { }; enum tc_setup_type { + TC_QUERY_CAPS, TC_SETUP_QDISC_MQPRIO, TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, @@ -1851,7 +1858,6 @@ enum netdev_ml_priv_type { * @tipc_ptr: TIPC specific data * @atalk_ptr: AppleTalk link * @ip_ptr: IPv4 specific data - * @dn_ptr: DECnet specific data * @ip6_ptr: IPv6 specific data * @ax25_ptr: AX.25 specific data * @ieee80211_ptr: IEEE 802.11 specific data, assign before registering @@ -2147,9 +2153,6 @@ struct net_device { #if IS_ENABLED(CONFIG_ATALK) void *atalk_ptr; #endif -#if IS_ENABLED(CONFIG_DECNET) - struct dn_dev __rcu *dn_ptr; -#endif #if IS_ENABLED(CONFIG_AX25) void *ax25_ptr; #endif @@ -2551,16 +2554,15 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, * @dev: network device * @napi: NAPI context * @poll: polling function - * @weight: default weight * * netif_napi_add() must be used to initialize a NAPI context prior to calling * *any* of the other NAPI-related functions. */ static inline void netif_napi_add(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), int weight) + int (*poll)(struct napi_struct *, int)) { - netif_napi_add_weight(dev, napi, poll, weight); + netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT); } static inline void @@ -2573,8 +2575,6 @@ netif_napi_add_tx_weight(struct net_device *dev, netif_napi_add_weight(dev, napi, poll, weight); } -#define netif_tx_napi_add netif_napi_add_tx_weight - /** * netif_napi_add_tx() - initialize a NAPI context to be used for Tx only * @dev: network device @@ -3357,6 +3357,16 @@ static inline void netdev_txq_bql_complete_prefetchw(struct netdev_queue *dev_qu #endif } +/** + * netdev_tx_sent_queue - report the number of bytes queued to a given tx queue + * @dev_queue: network device queue + * @bytes: number of bytes queued to the device queue + * + * Report the number of bytes queued for sending/completion to the network + * device hardware queue. @bytes should be a good approximation and should + * exactly match netdev_completed_queue() @bytes. + * This is typically called once per packet, from ndo_start_xmit(). + */ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, unsigned int bytes) { @@ -3402,13 +3412,14 @@ static inline bool __netdev_tx_sent_queue(struct netdev_queue *dev_queue, } /** - * netdev_sent_queue - report the number of bytes queued to hardware - * @dev: network device - * @bytes: number of bytes queued to the hardware device queue + * netdev_sent_queue - report the number of bytes queued to hardware + * @dev: network device + * @bytes: number of bytes queued to the hardware device queue * - * Report the number of bytes queued for sending/completion to the network - * device hardware queue. @bytes should be a good approximation and should - * exactly match netdev_completed_queue() @bytes + * Report the number of bytes queued for sending/completion to the network + * device hardware queue#0. @bytes should be a good approximation and should + * exactly match netdev_completed_queue() @bytes. + * This is typically called once per packet, from ndo_start_xmit(). */ static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes) { @@ -3423,6 +3434,15 @@ static inline bool __netdev_sent_queue(struct net_device *dev, xmit_more); } +/** + * netdev_tx_completed_queue - report number of packets/bytes at TX completion. + * @dev_queue: network device queue + * @pkts: number of packets (currently ignored) + * @bytes: number of bytes dequeued from the device queue + * + * Must be called at most once per TX completion round (and not per + * individual packet), so that BQL can adjust its limits appropriately. + */ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue, unsigned int pkts, unsigned int bytes) { @@ -3643,9 +3663,8 @@ static inline bool netif_attr_test_online(unsigned long j, static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, unsigned int nr_bits) { - /* -1 is a legal arg here. */ - if (n != -1) - cpu_max_bits_warn(n, nr_bits); + /* n is a prior cpu */ + cpu_max_bits_warn(n + 1, nr_bits); if (srcp) return find_next_bit(srcp, nr_bits, n + 1); @@ -3666,9 +3685,8 @@ static inline int netif_attrmask_next_and(int n, const unsigned long *src1p, const unsigned long *src2p, unsigned int nr_bits) { - /* -1 is a legal arg here. */ - if (n != -1) - cpu_max_bits_warn(n, nr_bits); + /* n is a prior cpu */ + cpu_max_bits_warn(n + 1, nr_bits); if (src1p && src2p) return find_next_and_bit(src1p, src2p, nr_bits, n + 1); @@ -3802,6 +3820,7 @@ void netif_receive_skb_list(struct list_head *head); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); +void napi_get_frags_check(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); struct packet_offload *gro_find_receive_by_type(__be16 type); struct packet_offload *gro_find_complete_by_type(__be16 type); diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index c2c6f332fb90..d8817d381c14 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -243,11 +243,6 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, hook_head = rcu_dereference(net->nf.hooks_bridge[hook]); #endif break; -#if IS_ENABLED(CONFIG_DECNET) - case NFPROTO_DECNET: - hook_head = rcu_dereference(net->nf.hooks_decnet[hook]); - break; -#endif default: WARN_ON_ONCE(1); break; diff --git a/include/linux/netfilter_defs.h b/include/linux/netfilter_defs.h index 8dddfb151f00..a5f7bef1b3a4 100644 --- a/include/linux/netfilter_defs.h +++ b/include/linux/netfilter_defs.h @@ -7,14 +7,6 @@ /* in/out/forward only */ #define NF_ARP_NUMHOOKS 3 -/* max hook is NF_DN_ROUTE (6), also see uapi/linux/netfilter_decnet.h */ -#define NF_DN_NUMHOOKS 7 - -#if IS_ENABLED(CONFIG_DECNET) -/* Largest hook number + 1, see uapi/linux/netfilter_decnet.h */ -#define NF_MAX_HOOKS NF_DN_NUMHOOKS -#else #define NF_MAX_HOOKS NF_INET_NUMHOOKS -#endif #endif diff --git a/include/linux/netlink.h b/include/linux/netlink.h index bda1c385cffb..d51e041d2242 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -71,6 +71,8 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) * %NL_SET_ERR_MSG * @bad_attr: attribute with error * @policy: policy for a bad attribute + * @miss_type: attribute type which was missing + * @miss_nest: nest missing an attribute (%NULL if missing top level attr) * @cookie: cookie data to return to userspace (for success) * @cookie_len: actual cookie data length */ @@ -78,6 +80,8 @@ struct netlink_ext_ack { const char *_msg; const struct nlattr *bad_attr; const struct nla_policy *policy; + const struct nlattr *miss_nest; + u16 miss_type; u8 cookie[NETLINK_MAX_COOKIE_LEN]; u8 cookie_len; }; @@ -126,6 +130,26 @@ struct netlink_ext_ack { #define NL_SET_ERR_MSG_ATTR(extack, attr, msg) \ NL_SET_ERR_MSG_ATTR_POL(extack, attr, NULL, msg) +#define NL_SET_ERR_ATTR_MISS(extack, nest, type) do { \ + struct netlink_ext_ack *__extack = (extack); \ + \ + if (__extack) { \ + __extack->miss_nest = (nest); \ + __extack->miss_type = (type); \ + } \ +} while (0) + +#define NL_REQ_ATTR_CHECK(extack, nest, tb, type) ({ \ + struct nlattr **__tb = (tb); \ + u32 __attr = (type); \ + int __retval; \ + \ + __retval = !__tb[__attr]; \ + if (__retval) \ + NL_SET_ERR_ATTR_MISS((extack), (nest), __attr); \ + __retval; \ +}) + static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, u64 cookie) { diff --git a/include/linux/node.h b/include/linux/node.h index 40d641a8bfb0..427a5975cf40 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -2,15 +2,15 @@ /* * include/linux/node.h - generic node definition * - * This is mainly for topological representation. We define the - * basic 'struct node' here, which can be embedded in per-arch + * This is mainly for topological representation. We define the + * basic 'struct node' here, which can be embedded in per-arch * definitions of processors. * * Basic handling of the devices is done in drivers/base/node.c - * and system devices are handled in drivers/base/sys.c. + * and system devices are handled in drivers/base/sys.c. * * Nodes are exported via driverfs in the class/node/devices/ - * directory. + * directory. */ #ifndef _LINUX_NODE_H_ #define _LINUX_NODE_H_ @@ -18,7 +18,6 @@ #include <linux/device.h> #include <linux/cpumask.h> #include <linux/list.h> -#include <linux/workqueue.h> /** * struct node_hmem_attrs - heterogeneous memory performance attributes @@ -84,10 +83,6 @@ static inline void node_set_perf_attrs(unsigned int nid, struct node { struct device dev; struct list_head access_list; - -#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS) - struct work_struct node_work; -#endif #ifdef CONFIG_HMEM_REPORTING struct list_head cache_attrs; struct device *cache_dev; @@ -96,7 +91,6 @@ struct node { struct memory_block; extern struct node *node_devices[]; -typedef void (*node_registration_func_t)(struct node *); #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA) void register_memory_blocks_under_node(int nid, unsigned long start_pfn, @@ -144,11 +138,6 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk); extern int register_memory_node_under_compute_node(unsigned int mem_nid, unsigned int cpu_nid, unsigned access); - -#ifdef CONFIG_HUGETLBFS -extern void register_hugetlbfs_with_node(node_registration_func_t doregister, - node_registration_func_t unregister); -#endif #else static inline void node_dev_init(void) { @@ -176,18 +165,8 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk) { } - -static inline void register_hugetlbfs_with_node(node_registration_func_t reg, - node_registration_func_t unreg) -{ -} #endif #define to_node(device) container_of(device, struct node, dev) -static inline bool node_is_toptier(int node) -{ - return node_state(node, N_CPU); -} - #endif /* _LINUX_NODE_H_ */ diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 4b71a96190a8..efef68c9352a 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -493,6 +493,7 @@ static inline int num_node_state(enum node_states state) #define first_online_node 0 #define first_memory_node 0 #define next_online_node(nid) (MAX_NUMNODES) +#define next_memory_node(nid) (MAX_NUMNODES) #define nr_node_ids 1U #define nr_online_nodes 1U @@ -504,12 +505,20 @@ static inline int num_node_state(enum node_states state) static inline int node_random(const nodemask_t *maskp) { #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1) - int w, bit = NUMA_NO_NODE; + int w, bit; w = nodes_weight(*maskp); - if (w) - bit = bitmap_ord_to_pos(maskp->bits, - get_random_int() % w, MAX_NUMNODES); + switch (w) { + case 0: + bit = NUMA_NO_NODE; + break; + case 1: + bit = first_node(*maskp); + break; + default: + bit = find_nth_bit(maskp->bits, MAX_NUMNODES, prandom_u32_max(w)); + break; + } return bit; #else return 0; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index ae53d74f3696..050d7d0cd81b 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1482,8 +1482,8 @@ struct nvmf_connect_command { }; enum { - NVME_CONNECT_AUTHREQ_ASCR = (1 << 2), - NVME_CONNECT_AUTHREQ_ATR = (1 << 1), + NVME_CONNECT_AUTHREQ_ASCR = (1U << 18), + NVME_CONNECT_AUTHREQ_ATR = (1U << 17), }; struct nvmf_connect_data { diff --git a/include/linux/of.h b/include/linux/of.h index 766d002bddb9..6b79ef9a6541 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -342,7 +342,7 @@ extern int of_property_read_string_helper(const struct device_node *np, const char **out_strs, size_t sz, int index); extern int of_device_is_compatible(const struct device_node *device, const char *); -extern int of_device_compatible_match(struct device_node *device, +extern int of_device_compatible_match(const struct device_node *device, const char *const *compat); extern bool of_device_is_available(const struct device_node *device); extern bool of_device_is_big_endian(const struct device_node *device); @@ -562,7 +562,7 @@ static inline int of_device_is_compatible(const struct device_node *device, return 0; } -static inline int of_device_compatible_match(struct device_node *device, +static inline int of_device_compatible_match(const struct device_node *device, const char *const *compat) { return 0; diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 1d7992a02e36..1a803e4335d3 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -101,8 +101,9 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) } static inline int of_dma_configure_id(struct device *dev, - struct device_node *np, - bool force_dma) + struct device_node *np, + bool force_dma, + const u32 *id) { return 0; } diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 83fccd0c9bba..d6d3eae2f145 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -37,9 +37,8 @@ extern unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data); extern int of_irq_to_resource(struct device_node *dev, int index, struct resource *r); -extern void of_irq_init(const struct of_device_id *matches); - #ifdef CONFIG_OF_IRQ +extern void of_irq_init(const struct of_device_id *matches); extern int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_args *out_irq); extern int of_irq_count(struct device_node *dev); @@ -57,6 +56,9 @@ extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev, extern void of_msi_configure(struct device *dev, struct device_node *np); u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in); #else +static inline void of_irq_init(const struct of_device_id *matches) +{ +} static inline int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_args *out_irq) { diff --git a/include/linux/once.h b/include/linux/once.h index b14d8b309d52..bc714d414448 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -5,10 +5,18 @@ #include <linux/types.h> #include <linux/jump_label.h> +/* Helpers used from arbitrary contexts. + * Hard irqs are blocked, be cautious. + */ bool __do_once_start(bool *done, unsigned long *flags); void __do_once_done(bool *done, struct static_key_true *once_key, unsigned long *flags, struct module *mod); +/* Variant for process contexts only. */ +bool __do_once_sleepable_start(bool *done); +void __do_once_sleepable_done(bool *done, struct static_key_true *once_key, + struct module *mod); + /* Call a function exactly once. The idea of DO_ONCE() is to perform * a function call such as initialization of random seeds, etc, only * once, where DO_ONCE() can live in the fast-path. After @func has @@ -52,7 +60,27 @@ void __do_once_done(bool *done, struct static_key_true *once_key, ___ret; \ }) +/* Variant of DO_ONCE() for process/sleepable contexts. */ +#define DO_ONCE_SLEEPABLE(func, ...) \ + ({ \ + bool ___ret = false; \ + static bool __section(".data.once") ___done = false; \ + static DEFINE_STATIC_KEY_TRUE(___once_key); \ + if (static_branch_unlikely(&___once_key)) { \ + ___ret = __do_once_sleepable_start(&___done); \ + if (unlikely(___ret)) { \ + func(__VA_ARGS__); \ + __do_once_sleepable_done(&___done, &___once_key,\ + THIS_MODULE); \ + } \ + } \ + ___ret; \ + }) + #define get_random_once(buf, nbytes) \ DO_ONCE(get_random_bytes, (buf), (nbytes)) +#define get_random_sleepable_once(buf, nbytes) \ + DO_ONCE_SLEEPABLE(get_random_bytes, (buf), (nbytes)) + #endif /* _LINUX_ONCE_H */ diff --git a/include/linux/oom.h b/include/linux/oom.h index 02d1e7bbd8cd..7d0c9c48a0c5 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -78,15 +78,6 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk) } /* - * Use this helper if tsk->mm != mm and the victim mm needs a special - * handling. This is guaranteed to stay true after once set. - */ -static inline bool mm_is_oom_victim(struct mm_struct *mm) -{ - return test_bit(MMF_OOM_VICTIM, &mm->flags); -} - -/* * Checks whether a page fault on the given mm is still reliable. * This is no longer true if the oom reaper started to reap the * address space which is reflected by MMF_UNSTABLE flag set in @@ -106,8 +97,6 @@ static inline vm_fault_t check_stable_address_space(struct mm_struct *mm) return 0; } -bool __oom_reap_task_mm(struct mm_struct *mm); - long oom_badness(struct task_struct *p, unsigned long totalpages); diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 0eb3b192f07a..19dfdd74835e 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -51,40 +51,50 @@ static inline bool __must_check __must_check_overflow(bool overflow) return unlikely(overflow); } -/* - * For simplicity and code hygiene, the fallback code below insists on - * a, b and *d having the same type (similar to the min() and max() - * macros), whereas gcc's type-generic overflow checkers accept - * different types. Hence we don't just make check_add_overflow an - * alias for __builtin_add_overflow, but add type checks similar to - * below. +/** check_add_overflow() - Calculate addition with overflow checking + * + * @a: first addend + * @b: second addend + * @d: pointer to store sum + * + * Returns 0 on success. + * + * *@d holds the results of the attempted addition, but is not considered + * "safe for use" on a non-zero return value, which indicates that the + * sum has overflowed or been truncated. */ -#define check_add_overflow(a, b, d) __must_check_overflow(({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - __builtin_add_overflow(__a, __b, __d); \ -})) +#define check_add_overflow(a, b, d) \ + __must_check_overflow(__builtin_add_overflow(a, b, d)) -#define check_sub_overflow(a, b, d) __must_check_overflow(({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - __builtin_sub_overflow(__a, __b, __d); \ -})) +/** check_sub_overflow() - Calculate subtraction with overflow checking + * + * @a: minuend; value to subtract from + * @b: subtrahend; value to subtract from @a + * @d: pointer to store difference + * + * Returns 0 on success. + * + * *@d holds the results of the attempted subtraction, but is not considered + * "safe for use" on a non-zero return value, which indicates that the + * difference has underflowed or been truncated. + */ +#define check_sub_overflow(a, b, d) \ + __must_check_overflow(__builtin_sub_overflow(a, b, d)) -#define check_mul_overflow(a, b, d) __must_check_overflow(({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - __builtin_mul_overflow(__a, __b, __d); \ -})) +/** check_mul_overflow() - Calculate multiplication with overflow checking + * + * @a: first factor + * @b: second factor + * @d: pointer to store product + * + * Returns 0 on success. + * + * *@d holds the results of the attempted multiplication, but is not + * considered "safe for use" on a non-zero return value, which indicates + * that the product has overflowed or been truncated. + */ +#define check_mul_overflow(a, b, d) \ + __must_check_overflow(__builtin_mul_overflow(a, b, d)) /** check_shl_overflow() - Calculate a left-shifted value and check overflow * diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index ef1e3e736e14..7d79818dc065 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -55,7 +55,8 @@ #define SECTIONS_WIDTH 0 #endif -#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS +#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_SHIFT \ + <= BITS_PER_LONG - NR_PAGEFLAGS #define NODES_WIDTH NODES_SHIFT #elif defined(CONFIG_SPARSEMEM_VMEMMAP) #error "Vmemmap: No space for nodes field in page flags" @@ -89,8 +90,8 @@ #define LAST_CPUPID_SHIFT 0 #endif -#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT \ - <= BITS_PER_LONG - NR_PAGEFLAGS +#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \ + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else #define LAST_CPUPID_WIDTH 0 @@ -100,10 +101,15 @@ #define LAST_CPUPID_NOT_IN_PAGE_FLAGS #endif -#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH \ - > BITS_PER_LONG - NR_PAGEFLAGS +#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \ + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS #error "Not enough bits in page flags" #endif +/* see the comment on MAX_NR_TIERS */ +#define LRU_REFS_WIDTH min(__LRU_REFS_WIDTH, BITS_PER_LONG - NR_PAGEFLAGS - \ + ZONES_WIDTH - LRU_GEN_WIDTH - SECTIONS_WIDTH - \ + NODES_WIDTH - KASAN_TAG_WIDTH - LAST_CPUPID_WIDTH) + #endif #endif /* _LINUX_PAGE_FLAGS_LAYOUT */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 465ff35a8c00..0b0ae5084e60 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -1058,7 +1058,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) 1UL << PG_private | 1UL << PG_private_2 | \ 1UL << PG_writeback | 1UL << PG_reserved | \ 1UL << PG_slab | 1UL << PG_active | \ - 1UL << PG_unevictable | __PG_MLOCKED) + 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK) /* * Flags checked when a page is prepped for return by the page allocator. @@ -1069,7 +1069,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) * alloc-free cycle to prevent from reusing the page. */ #define PAGE_FLAGS_CHECK_AT_PREP \ - (PAGEFLAGS_MASK & ~__PG_HWPOISON) + ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 679591301994..c141ea9a95ef 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -3,15 +3,17 @@ #define _LINUX_PAGE_COUNTER_H #include <linux/atomic.h> +#include <linux/cache.h> #include <linux/kernel.h> #include <asm/page.h> struct page_counter { + /* + * Make sure 'usage' does not share cacheline with any other field. The + * memcg->memory.usage is a hot member of struct mem_cgroup. + */ atomic_long_t usage; - unsigned long min; - unsigned long low; - unsigned long high; - unsigned long max; + CACHELINE_PADDING(_pad1_); /* effective memory.min and memory.min usage tracking */ unsigned long emin; @@ -23,18 +25,18 @@ struct page_counter { atomic_long_t low_usage; atomic_long_t children_low_usage; - /* legacy */ unsigned long watermark; unsigned long failcnt; - /* - * 'parent' is placed here to be far from 'usage' to reduce - * cache false sharing, as 'usage' is written mostly while - * parent is frequently read for cgroup's hierarchical - * counting nature. - */ + /* Keep all the read most fields in a separete cacheline. */ + CACHELINE_PADDING(_pad2_); + + unsigned long min; + unsigned long low; + unsigned long high; + unsigned long max; struct page_counter *parent; -}; +} ____cacheline_internodealigned_in_smp; #if BITS_PER_LONG == 32 #define PAGE_COUNTER_MAX LONG_MAX diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index fabb2e1e087f..22be4582faae 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -36,9 +36,15 @@ struct page_ext { unsigned long flags; }; +extern bool early_page_ext; extern unsigned long page_ext_size; extern void pgdat_page_ext_init(struct pglist_data *pgdat); +static inline bool early_page_ext_enabled(void) +{ + return early_page_ext; +} + #ifdef CONFIG_SPARSEMEM static inline void page_ext_init_flatmem(void) { @@ -55,7 +61,8 @@ static inline void page_ext_init(void) } #endif -struct page_ext *lookup_page_ext(const struct page *page); +extern struct page_ext *page_ext_get(struct page *page); +extern void page_ext_put(struct page_ext *page_ext); static inline struct page_ext *page_ext_next(struct page_ext *curr) { @@ -67,13 +74,13 @@ static inline struct page_ext *page_ext_next(struct page_ext *curr) #else /* !CONFIG_PAGE_EXTENSION */ struct page_ext; -static inline void pgdat_page_ext_init(struct pglist_data *pgdat) +static inline bool early_page_ext_enabled(void) { + return false; } -static inline struct page_ext *lookup_page_ext(const struct page *page) +static inline void pgdat_page_ext_init(struct pglist_data *pgdat) { - return NULL; } static inline void page_ext_init(void) @@ -87,5 +94,14 @@ static inline void page_ext_init_flatmem_late(void) static inline void page_ext_init_flatmem(void) { } + +static inline struct page_ext *page_ext_get(struct page *page) +{ + return NULL; +} + +static inline void page_ext_put(struct page_ext *page_ext) +{ +} #endif /* CONFIG_PAGE_EXTENSION */ #endif /* __LINUX_PAGE_EXT_H */ diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 4663dfed1293..5cb7bd2078ec 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -13,65 +13,79 @@ * If there is not enough space to store Idle and Young bits in page flags, use * page ext flags instead. */ - static inline bool folio_test_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(&folio->page); + struct page_ext *page_ext = page_ext_get(&folio->page); + bool page_young; if (unlikely(!page_ext)) return false; - return test_bit(PAGE_EXT_YOUNG, &page_ext->flags); + page_young = test_bit(PAGE_EXT_YOUNG, &page_ext->flags); + page_ext_put(page_ext); + + return page_young; } static inline void folio_set_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(&folio->page); + struct page_ext *page_ext = page_ext_get(&folio->page); if (unlikely(!page_ext)) return; set_bit(PAGE_EXT_YOUNG, &page_ext->flags); + page_ext_put(page_ext); } static inline bool folio_test_clear_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(&folio->page); + struct page_ext *page_ext = page_ext_get(&folio->page); + bool page_young; if (unlikely(!page_ext)) return false; - return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); + page_young = test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); + page_ext_put(page_ext); + + return page_young; } static inline bool folio_test_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(&folio->page); + struct page_ext *page_ext = page_ext_get(&folio->page); + bool page_idle; if (unlikely(!page_ext)) return false; - return test_bit(PAGE_EXT_IDLE, &page_ext->flags); + page_idle = test_bit(PAGE_EXT_IDLE, &page_ext->flags); + page_ext_put(page_ext); + + return page_idle; } static inline void folio_set_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(&folio->page); + struct page_ext *page_ext = page_ext_get(&folio->page); if (unlikely(!page_ext)) return; set_bit(PAGE_EXT_IDLE, &page_ext->flags); + page_ext_put(page_ext); } static inline void folio_clear_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(&folio->page); + struct page_ext *page_ext = page_ext_get(&folio->page); if (unlikely(!page_ext)) return; clear_bit(PAGE_EXT_IDLE, &page_ext->flags); + page_ext_put(page_ext); } #endif /* !CONFIG_64BIT */ diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 83c7248053a1..5f1ae07d724b 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -53,6 +53,10 @@ extern unsigned int pageblock_order; #endif /* CONFIG_HUGETLB_PAGE */ #define pageblock_nr_pages (1UL << pageblock_order) +#define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages) +#define pageblock_aligned(pfn) IS_ALIGNED((pfn), pageblock_nr_pages) +#define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages) +#define pageblock_end_pfn(pfn) ALIGN((pfn) + 1, pageblock_nr_pages) /* Forward declaration */ struct page; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0178b2040ea3..bbccb4044222 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -718,8 +718,8 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); -unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, - unsigned int nr_pages, struct page **pages); +unsigned filemap_get_folios_contig(struct address_space *mapping, + pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag, unsigned int nr_pages, struct page **pages); @@ -989,19 +989,16 @@ static inline int lock_page_killable(struct page *page) } /* - * lock_page_or_retry - Lock the page, unless this would block and the + * folio_lock_or_retry - Lock the folio, unless this would block and the * caller indicated that it can handle a retry. * * Return value and mmap_lock implications depend on flags; see * __folio_lock_or_retry(). */ -static inline bool lock_page_or_retry(struct page *page, struct mm_struct *mm, - unsigned int flags) +static inline bool folio_lock_or_retry(struct folio *folio, + struct mm_struct *mm, unsigned int flags) { - struct folio *folio; might_sleep(); - - folio = page_folio(page); return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags); } @@ -1042,7 +1039,6 @@ static inline int wait_on_page_locked_killable(struct page *page) return folio_wait_locked_killable(page_folio(page)); } -int folio_put_wait_locked(struct folio *folio, int state); void wait_on_page_writeback(struct page *page); void folio_wait_writeback(struct folio *folio); int folio_wait_writeback_killable(struct folio *folio); @@ -1173,6 +1169,8 @@ struct readahead_control { pgoff_t _index; unsigned int _nr_pages; unsigned int _batch_count; + bool _workingset; + unsigned long _pflags; }; #define DEFINE_READAHEAD(ractl, f, r, m, i) \ diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index ac7b38ad5903..f3fafb731ffd 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -15,12 +15,12 @@ struct mm_walk; * this handler is required to be able to handle * pmd_trans_huge() pmds. They may simply choose to * split_huge_page() instead of handling it explicitly. - * @pte_entry: if set, called for each non-empty PTE (lowest-level) - * entry + * @pte_entry: if set, called for each PTE (lowest-level) entry, + * including empty ones * @pte_hole: if set, called for each hole at all levels, - * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD - * 4:PTE. Any folded depths (where PTRS_PER_P?D is equal - * to 1) are skipped. + * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD. + * Any folded depths (where PTRS_PER_P?D is equal to 1) + * are skipped. * @hugetlb_entry: if set, called for each hugetlb entry * @test_walk: caller specific callback function to determine whether * we walk over the current vma or not. Returning 0 means diff --git a/include/linux/pci.h b/include/linux/pci.h index 060af91bafcd..2bda4a4e47e8 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -475,6 +475,7 @@ struct pci_dev { unsigned int broken_cmd_compl:1; /* No compl for some cmds */ #endif #ifdef CONFIG_PCIE_PTM + u16 ptm_cap; /* PTM Capability */ unsigned int ptm_root:1; unsigned int ptm_enabled:1; u8 ptm_granularity; @@ -1677,10 +1678,12 @@ bool pci_ats_disabled(void); #ifdef CONFIG_PCIE_PTM int pci_enable_ptm(struct pci_dev *dev, u8 *granularity); +void pci_disable_ptm(struct pci_dev *dev); bool pcie_ptm_enabled(struct pci_dev *dev); #else static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity) { return -EINVAL; } +static inline void pci_disable_ptm(struct pci_dev *dev) { } static inline bool pcie_ptm_enabled(struct pci_dev *dev) { return false; } #endif @@ -2019,8 +2022,8 @@ enum pci_fixup_pass { #ifdef CONFIG_LTO_CLANG #define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ class_shift, hook, stub) \ - void __cficanonical stub(struct pci_dev *dev); \ - void __cficanonical stub(struct pci_dev *dev) \ + void stub(struct pci_dev *dev); \ + void stub(struct pci_dev *dev) \ { \ hook(dev); \ } \ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 6feade66efdb..b362d90eb9b0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -75,6 +75,9 @@ #define PCI_CLASS_COMMUNICATION_MODEM 0x0703 #define PCI_CLASS_COMMUNICATION_OTHER 0x0780 +/* Interface for SERIAL/MODEM */ +#define PCI_SERIAL_16550_COMPATIBLE 0x02 + #define PCI_BASE_CLASS_SYSTEM 0x08 #define PCI_CLASS_SYSTEM_PIC 0x0800 #define PCI_CLASS_SYSTEM_PIC_IOAPIC 0x080010 @@ -2079,6 +2082,9 @@ #define PCI_DEVICE_ID_ICE_1712 0x1712 #define PCI_DEVICE_ID_VT1724 0x1724 +#define PCI_VENDOR_ID_MICROSOFT 0x1414 +#define PCI_DEVICE_ID_HYPERV_VIDEO 0x5353 + #define PCI_VENDOR_ID_OXSEMI 0x1415 #define PCI_DEVICE_ID_OXSEMI_12PCI840 0x8403 #define PCI_DEVICE_ID_OXSEMI_PCIe840 0xC000 diff --git a/include/linux/pcs-altera-tse.h b/include/linux/pcs-altera-tse.h new file mode 100644 index 000000000000..92ab9f08e835 --- /dev/null +++ b/include/linux/pcs-altera-tse.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Bootlin + * + * Maxime Chevallier <maxime.chevallier@bootlin.com> + */ + +#ifndef __LINUX_PCS_ALTERA_TSE_H +#define __LINUX_PCS_ALTERA_TSE_H + +struct phylink_pcs; +struct net_device; + +struct phylink_pcs *alt_tse_pcs_create(struct net_device *ndev, + void __iomem *pcs_base, int reg_width); + +#endif /* __LINUX_PCS_ALTERA_TSE_H */ diff --git a/include/linux/pe.h b/include/linux/pe.h index daf09ffffe38..1d3836ef9d92 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -65,6 +65,8 @@ #define IMAGE_FILE_MACHINE_SH5 0x01a8 #define IMAGE_FILE_MACHINE_THUMB 0x01c2 #define IMAGE_FILE_MACHINE_WCEMIPSV2 0x0169 +#define IMAGE_FILE_MACHINE_LOONGARCH32 0x6232 +#define IMAGE_FILE_MACHINE_LOONGARCH64 0x6264 /* flags */ #define IMAGE_FILE_RELOCS_STRIPPED 0x0001 diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 5fda40f97fe9..36b942b67b7d 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -121,9 +121,15 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem) preempt_enable(); } +extern bool percpu_is_read_locked(struct percpu_rw_semaphore *); extern void percpu_down_write(struct percpu_rw_semaphore *); extern void percpu_up_write(struct percpu_rw_semaphore *); +static inline bool percpu_is_write_locked(struct percpu_rw_semaphore *sem) +{ + return atomic_read(&sem->block); +} + extern int __percpu_init_rwsem(struct percpu_rw_semaphore *, const char *, struct lock_class_key *); diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 01861eebed79..8ed5fba6d156 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -15,6 +15,9 @@ #include <linux/types.h> #include <linux/gfp.h> +/* percpu_counter batch for local add or sub */ +#define PERCPU_COUNTER_LOCAL_BATCH INT_MAX + #ifdef CONFIG_SMP struct percpu_counter { @@ -56,6 +59,22 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) percpu_counter_add_batch(fbc, amount, percpu_counter_batch); } +/* + * With percpu_counter_add_local() and percpu_counter_sub_local(), counts + * are accumulated in local per cpu counter and not in fbc->count until + * local count overflows PERCPU_COUNTER_LOCAL_BATCH. This makes counter + * write efficient. + * But percpu_counter_sum(), instead of percpu_counter_read(), needs to be + * used to add up the counts from each CPU to account for all the local + * counts. So percpu_counter_add_local() and percpu_counter_sub_local() + * should be used when a counter is updated frequently and read rarely. + */ +static inline void +percpu_counter_add_local(struct percpu_counter *fbc, s64 amount) +{ + percpu_counter_add_batch(fbc, amount, PERCPU_COUNTER_LOCAL_BATCH); +} + static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { s64 ret = __percpu_counter_sum(fbc); @@ -138,6 +157,13 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount) preempt_enable(); } +/* non-SMP percpu_counter_add_local is the same with percpu_counter_add */ +static inline void +percpu_counter_add_local(struct percpu_counter *fbc, s64 amount) +{ + percpu_counter_add(fbc, amount); +} + static inline void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { @@ -193,4 +219,10 @@ static inline void percpu_counter_sub(struct percpu_counter *fbc, s64 amount) percpu_counter_add(fbc, -amount); } +static inline void +percpu_counter_sub_local(struct percpu_counter *fbc, s64 amount) +{ + percpu_counter_add_local(fbc, -amount); +} + #endif /* _LINUX_PERCPU_COUNTER_H */ diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 0407a38b470a..0356cb6a215d 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -24,10 +24,11 @@ /* * ARM PMU hw_event flags */ -/* Event uses a 64bit counter */ -#define ARMPMU_EVT_64BIT 1 -/* Event uses a 47bit counter */ -#define ARMPMU_EVT_47BIT 2 +#define ARMPMU_EVT_64BIT 0x00001 /* Event uses a 64bit counter */ +#define ARMPMU_EVT_47BIT 0x00002 /* Event uses a 47bit counter */ + +static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_64BIT) == ARMPMU_EVT_64BIT); +static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_47BIT) == ARMPMU_EVT_47BIT); #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) PERF_COUNT_HW_CACHE_##_x diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index bf66fe011fa8..e17e86ad6f3a 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -45,7 +45,7 @@ struct riscv_pmu { irqreturn_t (*handle_irq)(int irq_num, void *dev); - int num_counters; + unsigned long cmask; u64 (*ctr_read)(struct perf_event *event); int (*ctr_get_idx)(struct perf_event *event); int (*ctr_get_width)(int idx); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ee8b9ecdc03b..853f64b6c8c2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -36,6 +36,7 @@ struct perf_guest_info_callbacks { }; #ifdef CONFIG_HAVE_HW_BREAKPOINT +#include <linux/rhashtable-types.h> #include <asm/hw_breakpoint.h> #endif @@ -60,6 +61,7 @@ struct perf_guest_info_callbacks { #include <linux/refcount.h> #include <linux/security.h> #include <linux/static_call.h> +#include <linux/lockdep.h> #include <asm/local.h> struct perf_callchain_entry { @@ -137,9 +139,11 @@ struct hw_perf_event_extra { * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific * usage. */ -#define PERF_EVENT_FLAG_ARCH 0x0000ffff +#define PERF_EVENT_FLAG_ARCH 0x000fffff #define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 +static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0); + /** * struct hw_perf_event - performance event hardware details: */ @@ -178,7 +182,7 @@ struct hw_perf_event { * creation and event initalization. */ struct arch_hw_breakpoint info; - struct list_head bp_list; + struct rhlist_head bp_list; }; #endif struct { /* amd_iommu */ @@ -631,7 +635,23 @@ struct pmu_event_list { struct list_head list; }; +/* + * event->sibling_list is modified whole holding both ctx->lock and ctx->mutex + * as such iteration must hold either lock. However, since ctx->lock is an IRQ + * safe lock, and is only held by the CPU doing the modification, having IRQs + * disabled is sufficient since it will hold-off the IPIs. + */ +#ifdef CONFIG_PROVE_LOCKING +#define lockdep_assert_event_ctx(event) \ + WARN_ON_ONCE(__lockdep_enabled && \ + (this_cpu_read(hardirqs_enabled) && \ + lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD)) +#else +#define lockdep_assert_event_ctx(event) +#endif + #define for_each_sibling_event(sibling, event) \ + lockdep_assert_event_ctx(event); \ if ((event)->group_leader == (event)) \ list_for_each_entry((sibling), &(event)->sibling_list, sibling_list) @@ -1007,18 +1027,20 @@ struct perf_sample_data { * Fields set by perf_sample_data_init(), group so as to * minimize the cachelines touched. */ - u64 addr; - struct perf_raw_record *raw; - struct perf_branch_stack *br_stack; + u64 sample_flags; u64 period; - union perf_sample_weight weight; - u64 txn; - union perf_mem_data_src data_src; /* * The other fields, optionally {set,used} by * perf_{prepare,output}_sample(). */ + struct perf_branch_stack *br_stack; + union perf_sample_weight weight; + union perf_mem_data_src data_src; + u64 txn; + u64 addr; + struct perf_raw_record *raw; + u64 type; u64 ip; struct { @@ -1056,13 +1078,13 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr, u64 period) { /* remaining struct members initialized in perf_prepare_sample() */ - data->addr = addr; - data->raw = NULL; - data->br_stack = NULL; + data->sample_flags = PERF_SAMPLE_PERIOD; data->period = period; - data->weight.full = 0; - data->data_src.val = PERF_MEM_NA; - data->txn = 0; + + if (addr) { + data->addr = addr; + data->sample_flags |= PERF_SAMPLE_ADDR; + } } /* @@ -1078,6 +1100,7 @@ static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *b br->abort = 0; br->cycles = 0; br->type = 0; + br->spec = PERF_BR_SPEC_NA; br->reserved = 0; } @@ -1684,4 +1707,30 @@ static inline void perf_lopwr_cb(bool mode) } #endif +#ifdef CONFIG_PERF_EVENTS +static inline bool branch_sample_no_flags(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS; +} + +static inline bool branch_sample_no_cycles(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES; +} + +static inline bool branch_sample_type(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE; +} + +static inline bool branch_sample_hw_index(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; +} + +static inline bool branch_sample_priv(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE; +} +#endif /* CONFIG_PERF_EVENTS */ #endif /* _LINUX_PERF_EVENT_H */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 014ee8f0fbaa..a108b60a6962 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -213,7 +213,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) @@ -234,7 +234,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, BUILD_BUG(); return 0; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ #endif #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH @@ -260,6 +260,19 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_pte_young +/* + * Return whether the accessed bit is supported on the local CPU. + * + * This stub assumes accessing through an old PTE triggers a page fault. + * Architectures that automatically set the access bit should overwrite it. + */ +static inline bool arch_has_hw_pte_young(void) +{ + return false; +} +#endif + #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, @@ -1276,8 +1289,7 @@ static inline int pgd_devmap(pgd_t pgd) #endif #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \ - (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ - !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) + !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) static inline int pud_trans_huge(pud_t pud) { return 0; @@ -1598,11 +1610,7 @@ typedef unsigned int pgtbl_mod_mask; #endif #ifndef has_transparent_hugepage -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define has_transparent_hugepage() 1 -#else -#define has_transparent_hugepage() 0 -#endif +#define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE) #endif /* diff --git a/include/linux/phy.h b/include/linux/phy.h index 87638c55d844..ddf66198f751 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -115,6 +115,8 @@ extern const int phy_10gbit_features_array[1]; * @PHY_INTERFACE_MODE_25GBASER: 25G BaseR * @PHY_INTERFACE_MODE_USXGMII: Universal Serial 10GE MII * @PHY_INTERFACE_MODE_10GKR: 10GBASE-KR - with Clause 73 AN + * @PHY_INTERFACE_MODE_QUSGMII: Quad Universal SGMII + * @PHY_INTERFACE_MODE_1000BASEKX: 1000Base-KX - with Clause 73 AN * @PHY_INTERFACE_MODE_MAX: Book keeping * * Describes the interface between the MAC and PHY. @@ -152,6 +154,8 @@ typedef enum { PHY_INTERFACE_MODE_USXGMII, /* 10GBASE-KR - with Clause 73 AN */ PHY_INTERFACE_MODE_10GKR, + PHY_INTERFACE_MODE_QUSGMII, + PHY_INTERFACE_MODE_1000BASEKX, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -249,6 +253,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "trgmii"; case PHY_INTERFACE_MODE_1000BASEX: return "1000base-x"; + case PHY_INTERFACE_MODE_1000BASEKX: + return "1000base-kx"; case PHY_INTERFACE_MODE_2500BASEX: return "2500base-x"; case PHY_INTERFACE_MODE_5GBASER: @@ -267,12 +273,13 @@ static inline const char *phy_modes(phy_interface_t interface) return "10gbase-kr"; case PHY_INTERFACE_MODE_100BASEX: return "100base-x"; + case PHY_INTERFACE_MODE_QUSGMII: + return "qusgmii"; default: return "unknown"; } } - #define PHY_INIT_TIMEOUT 100000 #define PHY_FORCE_TIMEOUT 10 @@ -564,8 +571,10 @@ struct macsec_ops; * @advertising: Currently advertised linkmodes * @adv_old: Saved advertised while power saving for WoL * @lp_advertising: Current link partner advertised linkmodes + * @host_interfaces: PHY interface modes supported by host * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited * @autoneg: Flag autoneg being used + * @rate_matching: Current rate matching mode * @link: Current link state * @autoneg_complete: Flag auto negotiation of the link has completed * @mdix: Current crossover @@ -588,6 +597,7 @@ struct macsec_ops; * @master_slave_get: Current master/slave advertisement * @master_slave_state: Current master/slave configuration * @mii_ts: Pointer to time stamper callbacks + * @psec: Pointer to Power Sourcing Equipment control struct * @lock: Mutex for serialization access to PHY * @state_queue: Work queue for state machine * @shared: Pointer to private data shared by phys in one package @@ -633,6 +643,8 @@ struct phy_device { unsigned irq_suspended:1; unsigned irq_rerun:1; + int rate_matching; + enum phy_state state; u32 dev_flags; @@ -660,6 +672,9 @@ struct phy_device { /* used with phy_speed_down */ __ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old); + /* Host supported PHY interface types. Should be ignored if empty. */ + DECLARE_PHY_INTERFACE_MASK(host_interfaces); + /* Energy efficient ethernet modes which should be prohibited */ u32 eee_broken_modes; @@ -701,6 +716,7 @@ struct phy_device { struct phylink *phylink; struct net_device *attached_dev; struct mii_timestamper *mii_ts; + struct pse_control *psec; u8 mdix; u8 mdix_ctrl; @@ -797,6 +813,21 @@ struct phy_driver { */ int (*get_features)(struct phy_device *phydev); + /** + * @get_rate_matching: Get the supported type of rate matching for a + * particular phy interface. This is used by phy consumers to determine + * whether to advertise lower-speed modes for that interface. It is + * assumed that if a rate matching mode is supported on an interface, + * then that interface's rate can be adapted to all slower link speeds + * supported by the phy. If iface is %PHY_INTERFACE_MODE_NA, and the phy + * supports any kind of rate matching for any interface, then it must + * return that rate matching mode (preferring %RATE_MATCH_PAUSE to + * %RATE_MATCH_CRS). If the interface is not supported, this should + * return %RATE_MATCH_NONE. + */ + int (*get_rate_matching)(struct phy_device *phydev, + phy_interface_t iface); + /* PHY Power Management */ /** @suspend: Suspend the hardware, saving state if needed */ int (*suspend)(struct phy_device *phydev); @@ -963,6 +994,9 @@ struct phy_fixup { const char *phy_speed_to_str(int speed); const char *phy_duplex_to_str(unsigned int duplex); +const char *phy_rate_matching_to_str(int rate_matching); + +int phy_interface_num_ports(phy_interface_t interface); /* A structure for mapping a particular speed and duplex * combination to a particular SUPPORTED and ADVERTISED value @@ -1677,6 +1711,8 @@ int phy_disable_interrupts(struct phy_device *phydev); void phy_request_interrupt(struct phy_device *phydev); void phy_free_interrupt(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); +int phy_get_rate_matching(struct phy_device *phydev, + phy_interface_t iface); void phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); void phy_advertise_supported(struct phy_device *phydev); diff --git a/include/linux/phy/pcie.h b/include/linux/phy/pcie.h new file mode 100644 index 000000000000..e7ac81764576 --- /dev/null +++ b/include/linux/phy/pcie.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 Rockchip Electronics Co., Ltd. + */ +#ifndef __PHY_PCIE_H +#define __PHY_PCIE_H + +#define PHY_MODE_PCIE_RC 20 +#define PHY_MODE_PCIE_EP 21 +#define PHY_MODE_PCIE_BIFURCATION 22 + +#endif diff --git a/include/linux/phy/tegra/xusb.h b/include/linux/phy/tegra/xusb.h index 3a35e74cdc61..70998e6dd6fd 100644 --- a/include/linux/phy/tegra/xusb.h +++ b/include/linux/phy/tegra/xusb.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved. */ #ifndef PHY_TEGRA_XUSB_H @@ -21,6 +21,8 @@ int tegra_xusb_padctl_usb3_set_lfps_detect(struct tegra_xusb_padctl *padctl, unsigned int port, bool enable); int tegra_xusb_padctl_set_vbus_override(struct tegra_xusb_padctl *padctl, bool val); +void tegra_phy_xusb_utmi_pad_power_on(struct phy *phy); +void tegra_phy_xusb_utmi_pad_power_down(struct phy *phy); int tegra_phy_xusb_utmi_port_reset(struct phy *phy); int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl, unsigned int port); diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 6d06896fc20d..664dd409feb9 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -21,6 +21,35 @@ enum { MLO_AN_FIXED, /* Fixed-link mode */ MLO_AN_INBAND, /* In-band protocol */ + /* MAC_SYM_PAUSE and MAC_ASYM_PAUSE are used when configuring our + * autonegotiation advertisement. They correspond to the PAUSE and + * ASM_DIR bits defined by 802.3, respectively. + * + * The following table lists the values of tx_pause and rx_pause which + * might be requested in mac_link_up. The exact values depend on either + * the results of autonegotation (if MLO_PAUSE_AN is set) or user + * configuration (if MLO_PAUSE_AN is not set). + * + * MAC_SYM_PAUSE MAC_ASYM_PAUSE MLO_PAUSE_AN tx_pause/rx_pause + * ============= ============== ============ ================== + * 0 0 0 0/0 + * 0 0 1 0/0 + * 0 1 0 0/0, 0/1, 1/0, 1/1 + * 0 1 1 0/0, 1/0 + * 1 0 0 0/0, 1/1 + * 1 0 1 0/0, 1/1 + * 1 1 0 0/0, 0/1, 1/0, 1/1 + * 1 1 1 0/0, 0/1, 1/1 + * + * If you set MAC_ASYM_PAUSE, the user may request any combination of + * tx_pause and rx_pause. You do not have to support these + * combinations. + * + * However, you should support combinations of tx_pause and rx_pause + * which might be the result of autonegotation. For example, don't set + * MAC_SYM_PAUSE unless your device can support tx_pause and rx_pause + * at the same time. + */ MAC_SYM_PAUSE = BIT(0), MAC_ASYM_PAUSE = BIT(1), MAC_10HD = BIT(2), @@ -59,6 +88,10 @@ static inline bool phylink_autoneg_inband(unsigned int mode) * @speed: link speed, one of the SPEED_* constants. * @duplex: link duplex mode, one of DUPLEX_* constants. * @pause: link pause state, described by MLO_PAUSE_* constants. + * @rate_matching: rate matching being performed, one of the RATE_MATCH_* + * constants. If rate matching is taking place, then the speed/duplex of + * the medium link mode (@speed and @duplex) and the speed/duplex of the phy + * interface mode (@interface) are different. * @link: true if the link is up. * @an_enabled: true if autonegotiation is enabled/desired. * @an_complete: true if autonegotiation has completed. @@ -70,6 +103,7 @@ struct phylink_link_state { int speed; int duplex; int pause; + int rate_matching; unsigned int link:1; unsigned int an_enabled:1; unsigned int an_complete:1; @@ -518,8 +552,10 @@ void pcs_link_up(struct phylink_pcs *pcs, unsigned int mode, phy_interface_t interface, int speed, int duplex); #endif -void phylink_get_linkmodes(unsigned long *linkmodes, phy_interface_t interface, - unsigned long mac_capabilities); +void phylink_caps_to_linkmodes(unsigned long *linkmodes, unsigned long caps); +unsigned long phylink_get_capabilities(phy_interface_t interface, + unsigned long mac_capabilities, + int rate_matching); void phylink_generic_validate(struct phylink_config *config, unsigned long *supported, struct phylink_link_state *state); diff --git a/include/linux/platform_data/adp5588.h b/include/linux/platform_data/adp5588.h deleted file mode 100644 index 6d3f7d911a92..000000000000 --- a/include/linux/platform_data/adp5588.h +++ /dev/null @@ -1,171 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Analog Devices ADP5588 I/O Expander and QWERTY Keypad Controller - * - * Copyright 2009-2010 Analog Devices Inc. - */ - -#ifndef _ADP5588_H -#define _ADP5588_H - -#define DEV_ID 0x00 /* Device ID */ -#define CFG 0x01 /* Configuration Register1 */ -#define INT_STAT 0x02 /* Interrupt Status Register */ -#define KEY_LCK_EC_STAT 0x03 /* Key Lock and Event Counter Register */ -#define Key_EVENTA 0x04 /* Key Event Register A */ -#define Key_EVENTB 0x05 /* Key Event Register B */ -#define Key_EVENTC 0x06 /* Key Event Register C */ -#define Key_EVENTD 0x07 /* Key Event Register D */ -#define Key_EVENTE 0x08 /* Key Event Register E */ -#define Key_EVENTF 0x09 /* Key Event Register F */ -#define Key_EVENTG 0x0A /* Key Event Register G */ -#define Key_EVENTH 0x0B /* Key Event Register H */ -#define Key_EVENTI 0x0C /* Key Event Register I */ -#define Key_EVENTJ 0x0D /* Key Event Register J */ -#define KP_LCK_TMR 0x0E /* Keypad Lock1 to Lock2 Timer */ -#define UNLOCK1 0x0F /* Unlock Key1 */ -#define UNLOCK2 0x10 /* Unlock Key2 */ -#define GPIO_INT_STAT1 0x11 /* GPIO Interrupt Status */ -#define GPIO_INT_STAT2 0x12 /* GPIO Interrupt Status */ -#define GPIO_INT_STAT3 0x13 /* GPIO Interrupt Status */ -#define GPIO_DAT_STAT1 0x14 /* GPIO Data Status, Read twice to clear */ -#define GPIO_DAT_STAT2 0x15 /* GPIO Data Status, Read twice to clear */ -#define GPIO_DAT_STAT3 0x16 /* GPIO Data Status, Read twice to clear */ -#define GPIO_DAT_OUT1 0x17 /* GPIO DATA OUT */ -#define GPIO_DAT_OUT2 0x18 /* GPIO DATA OUT */ -#define GPIO_DAT_OUT3 0x19 /* GPIO DATA OUT */ -#define GPIO_INT_EN1 0x1A /* GPIO Interrupt Enable */ -#define GPIO_INT_EN2 0x1B /* GPIO Interrupt Enable */ -#define GPIO_INT_EN3 0x1C /* GPIO Interrupt Enable */ -#define KP_GPIO1 0x1D /* Keypad or GPIO Selection */ -#define KP_GPIO2 0x1E /* Keypad or GPIO Selection */ -#define KP_GPIO3 0x1F /* Keypad or GPIO Selection */ -#define GPI_EM1 0x20 /* GPI Event Mode 1 */ -#define GPI_EM2 0x21 /* GPI Event Mode 2 */ -#define GPI_EM3 0x22 /* GPI Event Mode 3 */ -#define GPIO_DIR1 0x23 /* GPIO Data Direction */ -#define GPIO_DIR2 0x24 /* GPIO Data Direction */ -#define GPIO_DIR3 0x25 /* GPIO Data Direction */ -#define GPIO_INT_LVL1 0x26 /* GPIO Edge/Level Detect */ -#define GPIO_INT_LVL2 0x27 /* GPIO Edge/Level Detect */ -#define GPIO_INT_LVL3 0x28 /* GPIO Edge/Level Detect */ -#define Debounce_DIS1 0x29 /* Debounce Disable */ -#define Debounce_DIS2 0x2A /* Debounce Disable */ -#define Debounce_DIS3 0x2B /* Debounce Disable */ -#define GPIO_PULL1 0x2C /* GPIO Pull Disable */ -#define GPIO_PULL2 0x2D /* GPIO Pull Disable */ -#define GPIO_PULL3 0x2E /* GPIO Pull Disable */ -#define CMP_CFG_STAT 0x30 /* Comparator Configuration and Status Register */ -#define CMP_CONFG_SENS1 0x31 /* Sensor1 Comparator Configuration Register */ -#define CMP_CONFG_SENS2 0x32 /* L2 Light Sensor Reference Level, Output Falling for Sensor 1 */ -#define CMP1_LVL2_TRIP 0x33 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 1 */ -#define CMP1_LVL2_HYS 0x34 /* L3 Light Sensor Reference Level, Output Falling For Sensor 1 */ -#define CMP1_LVL3_TRIP 0x35 /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 1 */ -#define CMP1_LVL3_HYS 0x36 /* Sensor 2 Comparator Configuration Register */ -#define CMP2_LVL2_TRIP 0x37 /* L2 Light Sensor Reference Level, Output Falling for Sensor 2 */ -#define CMP2_LVL2_HYS 0x38 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 2 */ -#define CMP2_LVL3_TRIP 0x39 /* L3 Light Sensor Reference Level, Output Falling For Sensor 2 */ -#define CMP2_LVL3_HYS 0x3A /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 2 */ -#define CMP1_ADC_DAT_R1 0x3B /* Comparator 1 ADC data Register1 */ -#define CMP1_ADC_DAT_R2 0x3C /* Comparator 1 ADC data Register2 */ -#define CMP2_ADC_DAT_R1 0x3D /* Comparator 2 ADC data Register1 */ -#define CMP2_ADC_DAT_R2 0x3E /* Comparator 2 ADC data Register2 */ - -#define ADP5588_DEVICE_ID_MASK 0xF - - /* Configuration Register1 */ -#define ADP5588_AUTO_INC (1 << 7) -#define ADP5588_GPIEM_CFG (1 << 6) -#define ADP5588_OVR_FLOW_M (1 << 5) -#define ADP5588_INT_CFG (1 << 4) -#define ADP5588_OVR_FLOW_IEN (1 << 3) -#define ADP5588_K_LCK_IM (1 << 2) -#define ADP5588_GPI_IEN (1 << 1) -#define ADP5588_KE_IEN (1 << 0) - -/* Interrupt Status Register */ -#define ADP5588_CMP2_INT (1 << 5) -#define ADP5588_CMP1_INT (1 << 4) -#define ADP5588_OVR_FLOW_INT (1 << 3) -#define ADP5588_K_LCK_INT (1 << 2) -#define ADP5588_GPI_INT (1 << 1) -#define ADP5588_KE_INT (1 << 0) - -/* Key Lock and Event Counter Register */ -#define ADP5588_K_LCK_EN (1 << 6) -#define ADP5588_LCK21 0x30 -#define ADP5588_KEC 0xF - -#define ADP5588_MAXGPIO 18 -#define ADP5588_BANK(offs) ((offs) >> 3) -#define ADP5588_BIT(offs) (1u << ((offs) & 0x7)) - -/* Put one of these structures in i2c_board_info platform_data */ - -#define ADP5588_KEYMAPSIZE 80 - -#define GPI_PIN_ROW0 97 -#define GPI_PIN_ROW1 98 -#define GPI_PIN_ROW2 99 -#define GPI_PIN_ROW3 100 -#define GPI_PIN_ROW4 101 -#define GPI_PIN_ROW5 102 -#define GPI_PIN_ROW6 103 -#define GPI_PIN_ROW7 104 -#define GPI_PIN_COL0 105 -#define GPI_PIN_COL1 106 -#define GPI_PIN_COL2 107 -#define GPI_PIN_COL3 108 -#define GPI_PIN_COL4 109 -#define GPI_PIN_COL5 110 -#define GPI_PIN_COL6 111 -#define GPI_PIN_COL7 112 -#define GPI_PIN_COL8 113 -#define GPI_PIN_COL9 114 - -#define GPI_PIN_ROW_BASE GPI_PIN_ROW0 -#define GPI_PIN_ROW_END GPI_PIN_ROW7 -#define GPI_PIN_COL_BASE GPI_PIN_COL0 -#define GPI_PIN_COL_END GPI_PIN_COL9 - -#define GPI_PIN_BASE GPI_PIN_ROW_BASE -#define GPI_PIN_END GPI_PIN_COL_END - -#define ADP5588_GPIMAPSIZE_MAX (GPI_PIN_END - GPI_PIN_BASE + 1) - -struct adp5588_gpi_map { - unsigned short pin; - unsigned short sw_evt; -}; - -struct adp5588_kpad_platform_data { - int rows; /* Number of rows */ - int cols; /* Number of columns */ - const unsigned short *keymap; /* Pointer to keymap */ - unsigned short keymapsize; /* Keymap size */ - unsigned repeat:1; /* Enable key repeat */ - unsigned en_keylock:1; /* Enable Key Lock feature */ - unsigned short unlock_key1; /* Unlock Key 1 */ - unsigned short unlock_key2; /* Unlock Key 2 */ - const struct adp5588_gpi_map *gpimap; - unsigned short gpimapsize; - const struct adp5588_gpio_platform_data *gpio_data; -}; - -struct i2c_client; /* forward declaration */ - -struct adp5588_gpio_platform_data { - int gpio_start; /* GPIO Chip base # */ - const char *const *names; - unsigned irq_base; /* interrupt base # */ - unsigned pullup_dis_mask; /* Pull-Up Disable Mask */ - int (*setup)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - int (*teardown)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - void *context; -}; - -#endif diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 8b1b795867a1..5744a2d746aa 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -5724,8 +5724,21 @@ enum typec_control_command { TYPEC_CONTROL_COMMAND_EXIT_MODES, TYPEC_CONTROL_COMMAND_CLEAR_EVENTS, TYPEC_CONTROL_COMMAND_ENTER_MODE, + TYPEC_CONTROL_COMMAND_TBT_UFP_REPLY, + TYPEC_CONTROL_COMMAND_USB_MUX_SET, }; +/* Replies the AP may specify to the TBT EnterMode command as a UFP */ +enum typec_tbt_ufp_reply { + TYPEC_TBT_UFP_REPLY_NAK, + TYPEC_TBT_UFP_REPLY_ACK, +}; + +struct typec_usb_mux_set { + uint8_t mux_index; /* Index of the mux to set in the chain */ + uint8_t mux_flags; /* USB_PD_MUX_*-encoded USB mux state to set */ +} __ec_align1; + struct ec_params_typec_control { uint8_t port; uint8_t command; /* enum typec_control_command */ @@ -5739,6 +5752,8 @@ struct ec_params_typec_control { union { uint32_t clear_events_mask; uint8_t mode_to_enter; /* enum typec_mode */ + uint8_t tbt_ufp_reply; /* enum typec_tbt_ufp_reply */ + struct typec_usb_mux_set mux_params; uint8_t placeholder[128]; }; } __ec_align1; @@ -5817,6 +5832,9 @@ enum tcpc_cc_polarity { #define PD_STATUS_EVENT_SOP_DISC_DONE BIT(0) #define PD_STATUS_EVENT_SOP_PRIME_DISC_DONE BIT(1) #define PD_STATUS_EVENT_HARD_RESET BIT(2) +#define PD_STATUS_EVENT_DISCONNECTED BIT(3) +#define PD_STATUS_EVENT_MUX_0_SET_DONE BIT(4) +#define PD_STATUS_EVENT_MUX_1_SET_DONE BIT(5) struct ec_params_typec_status { uint8_t port; diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 408b29ca4004..e43107e0bee1 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -169,6 +169,7 @@ struct cros_ec_device { int event_size; u32 host_event_wake_mask; u32 last_resume_result; + u16 suspend_timeout_ms; ktime_t last_event_time; struct notifier_block notifier_ready; diff --git a/include/linux/platform_data/dma-hsu.h b/include/linux/platform_data/dma-hsu.h index c65b412b2b33..611bae193c1c 100644 --- a/include/linux/platform_data/dma-hsu.h +++ b/include/linux/platform_data/dma-hsu.h @@ -8,7 +8,7 @@ #ifndef _PLATFORM_DATA_DMA_HSU_H #define _PLATFORM_DATA_DMA_HSU_H -#include <linux/device.h> +struct device; struct hsu_dma_slave { struct device *dma_dev; diff --git a/include/linux/platform_data/emc2305.h b/include/linux/platform_data/emc2305.h new file mode 100644 index 000000000000..54d672dd6f7d --- /dev/null +++ b/include/linux/platform_data/emc2305.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LINUX_PLATFORM_DATA_EMC2305__ +#define __LINUX_PLATFORM_DATA_EMC2305__ + +#define EMC2305_PWM_MAX 5 + +/** + * struct emc2305_platform_data - EMC2305 driver platform data + * @max_state: maximum cooling state of the cooling device; + * @pwm_num: number of active channels; + * @pwm_separate: separate PWM settings for every channel; + * @pwm_min: array of minimum PWM per channel; + */ +struct emc2305_platform_data { + u8 max_state; + u8 pwm_num; + bool pwm_separate; + u8 pwm_min[EMC2305_PWM_MAX]; +}; + +#endif diff --git a/include/linux/platform_data/pca953x.h b/include/linux/platform_data/pca953x.h index 4eb53e023997..96c1a14ab365 100644 --- a/include/linux/platform_data/pca953x.h +++ b/include/linux/platform_data/pca953x.h @@ -22,7 +22,7 @@ struct pca953x_platform_data { int (*setup)(struct i2c_client *client, unsigned gpio, unsigned ngpio, void *context); - int (*teardown)(struct i2c_client *client, + void (*teardown)(struct i2c_client *client, unsigned gpio, unsigned ngpio, void *context); const char *const *names; diff --git a/include/linux/platform_data/tps68470.h b/include/linux/platform_data/tps68470.h index 126d082c3f2e..e605a2cab07f 100644 --- a/include/linux/platform_data/tps68470.h +++ b/include/linux/platform_data/tps68470.h @@ -27,9 +27,14 @@ struct tps68470_regulator_platform_data { const struct regulator_init_data *reg_init_data[TPS68470_NUM_REGULATORS]; }; -struct tps68470_clk_platform_data { +struct tps68470_clk_consumer { const char *consumer_dev_name; const char *consumer_con_id; }; +struct tps68470_clk_platform_data { + unsigned int n_consumers; + struct tps68470_clk_consumer consumers[]; +}; + #endif diff --git a/include/linux/platform_data/usb-s3c2410_udc.h b/include/linux/platform_data/usb-s3c2410_udc.h index 07394819d03b..c0fbe1fe3426 100644 --- a/include/linux/platform_data/usb-s3c2410_udc.h +++ b/include/linux/platform_data/usb-s3c2410_udc.h @@ -22,12 +22,6 @@ enum s3c2410_udc_cmd_e { struct s3c2410_udc_mach_info { void (*udc_command)(enum s3c2410_udc_cmd_e); void (*vbus_draw)(unsigned int ma); - - unsigned int pullup_pin; - unsigned int pullup_pin_inverted; - - unsigned int vbus_pin; - unsigned char vbus_pin_inverted; }; extern void __init s3c24xx_udc_set_platdata(struct s3c2410_udc_mach_info *); diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 98f2b2f20f3e..28234dc9fa6a 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -65,6 +65,7 @@ #define ASUS_WMI_DEVID_PANEL_OD 0x00050019 #define ASUS_WMI_DEVID_CAMERA 0x00060013 #define ASUS_WMI_DEVID_LID_FLIP 0x00060062 +#define ASUS_WMI_DEVID_LID_FLIP_ROG 0x00060077 /* Storage */ #define ASUS_WMI_DEVID_CARDREADER 0x00080013 @@ -78,6 +79,7 @@ #define ASUS_WMI_DEVID_THERMAL_CTRL 0x00110011 #define ASUS_WMI_DEVID_FAN_CTRL 0x00110012 /* deprecated */ #define ASUS_WMI_DEVID_CPU_FAN_CTRL 0x00110013 +#define ASUS_WMI_DEVID_GPU_FAN_CTRL 0x00110014 #define ASUS_WMI_DEVID_CPU_FAN_CURVE 0x00110024 #define ASUS_WMI_DEVID_GPU_FAN_CURVE 0x00110025 @@ -99,6 +101,15 @@ /* dgpu on/off */ #define ASUS_WMI_DEVID_DGPU 0x00090020 +/* gpu mux switch, 0 = dGPU, 1 = Optimus */ +#define ASUS_WMI_DEVID_GPU_MUX 0x00090016 + +/* TUF laptop RGB modes/colours */ +#define ASUS_WMI_DEVID_TUF_RGB_MODE 0x00100056 + +/* TUF laptop RGB power/state */ +#define ASUS_WMI_DEVID_TUF_RGB_STATE 0x00100057 + /* DSTS masks */ #define ASUS_WMI_DSTS_STATUS_BIT 0x00000001 #define ASUS_WMI_DSTS_UNKNOWN_BIT 0x00000002 diff --git a/include/linux/platform_data/x86/nvidia-wmi-ec-backlight.h b/include/linux/platform_data/x86/nvidia-wmi-ec-backlight.h new file mode 100644 index 000000000000..23d60130272c --- /dev/null +++ b/include/linux/platform_data/x86/nvidia-wmi-ec-backlight.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + */ + +#ifndef __PLATFORM_DATA_X86_NVIDIA_WMI_EC_BACKLIGHT_H +#define __PLATFORM_DATA_X86_NVIDIA_WMI_EC_BACKLIGHT_H + +#define WMI_BRIGHTNESS_GUID "603E9613-EF25-4338-A3D0-C46177516DB7" + +/** + * enum wmi_brightness_method - WMI method IDs + * @WMI_BRIGHTNESS_METHOD_LEVEL: Get/Set EC brightness level status + * @WMI_BRIGHTNESS_METHOD_SOURCE: Get/Set EC Brightness Source + */ +enum wmi_brightness_method { + WMI_BRIGHTNESS_METHOD_LEVEL = 1, + WMI_BRIGHTNESS_METHOD_SOURCE = 2, + WMI_BRIGHTNESS_METHOD_MAX +}; + +/** + * enum wmi_brightness_mode - Operation mode for WMI-wrapped method + * @WMI_BRIGHTNESS_MODE_GET: Get the current brightness level/source. + * @WMI_BRIGHTNESS_MODE_SET: Set the brightness level. + * @WMI_BRIGHTNESS_MODE_GET_MAX_LEVEL: Get the maximum brightness level. This + * is only valid when the WMI method is + * %WMI_BRIGHTNESS_METHOD_LEVEL. + */ +enum wmi_brightness_mode { + WMI_BRIGHTNESS_MODE_GET = 0, + WMI_BRIGHTNESS_MODE_SET = 1, + WMI_BRIGHTNESS_MODE_GET_MAX_LEVEL = 2, + WMI_BRIGHTNESS_MODE_MAX +}; + +/** + * enum wmi_brightness_source - Backlight brightness control source selection + * @WMI_BRIGHTNESS_SOURCE_GPU: Backlight brightness is controlled by the GPU. + * @WMI_BRIGHTNESS_SOURCE_EC: Backlight brightness is controlled by the + * system's Embedded Controller (EC). + * @WMI_BRIGHTNESS_SOURCE_AUX: Backlight brightness is controlled over the + * DisplayPort AUX channel. + */ +enum wmi_brightness_source { + WMI_BRIGHTNESS_SOURCE_GPU = 1, + WMI_BRIGHTNESS_SOURCE_EC = 2, + WMI_BRIGHTNESS_SOURCE_AUX = 3, + WMI_BRIGHTNESS_SOURCE_MAX +}; + +/** + * struct wmi_brightness_args - arguments for the WMI-wrapped ACPI method + * @mode: Pass in an &enum wmi_brightness_mode value to select between + * getting or setting a value. + * @val: In parameter for value to set when using %WMI_BRIGHTNESS_MODE_SET + * mode. Not used in conjunction with %WMI_BRIGHTNESS_MODE_GET or + * %WMI_BRIGHTNESS_MODE_GET_MAX_LEVEL mode. + * @ret: Out parameter returning retrieved value when operating in + * %WMI_BRIGHTNESS_MODE_GET or %WMI_BRIGHTNESS_MODE_GET_MAX_LEVEL + * mode. Not used in %WMI_BRIGHTNESS_MODE_SET mode. + * @ignored: Padding; not used. The ACPI method expects a 24 byte params struct. + * + * This is the parameters structure for the WmiBrightnessNotify ACPI method as + * wrapped by WMI. The value passed in to @val or returned by @ret will be a + * brightness value when the WMI method ID is %WMI_BRIGHTNESS_METHOD_LEVEL, or + * an &enum wmi_brightness_source value with %WMI_BRIGHTNESS_METHOD_SOURCE. + */ +struct wmi_brightness_args { + u32 mode; + u32 val; + u32 ret; + u32 ignored[3]; +}; + +#endif diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h index dd81f510e4cf..b8a701c77fd0 100644 --- a/include/linux/platform_data/x86/pmc_atom.h +++ b/include/linux/platform_data/x86/pmc_atom.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Intel Atom SOC Power Management Controller Header File - * Copyright (c) 2014, Intel Corporation. + * Intel Atom SoC Power Management Controller Header File + * Copyright (c) 2014-2015,2022 Intel Corporation. */ #ifndef PMC_ATOM_H diff --git a/include/linux/platform_data/x86/simatic-ipc-base.h b/include/linux/platform_data/x86/simatic-ipc-base.h index 39fefd48cf4d..57d6a10dfc9e 100644 --- a/include/linux/platform_data/x86/simatic-ipc-base.h +++ b/include/linux/platform_data/x86/simatic-ipc-base.h @@ -19,6 +19,7 @@ #define SIMATIC_IPC_DEVICE_427E 2 #define SIMATIC_IPC_DEVICE_127E 3 #define SIMATIC_IPC_DEVICE_227E 4 +#define SIMATIC_IPC_DEVICE_227G 5 struct simatic_ipc_platform { u8 devmode; diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h index f3b76b39776b..632320ec8f08 100644 --- a/include/linux/platform_data/x86/simatic-ipc.h +++ b/include/linux/platform_data/x86/simatic-ipc.h @@ -31,6 +31,8 @@ enum simatic_ipc_station_ids { SIMATIC_IPC_IPC427E = 0x00000A01, SIMATIC_IPC_IPC477E = 0x00000A02, SIMATIC_IPC_IPC127E = 0x00000D01, + SIMATIC_IPC_IPC227G = 0x00000F01, + SIMATIC_IPC_IPC427G = 0x00001001, }; static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len) diff --git a/include/linux/pm.h b/include/linux/pm.h index 871c9c49ec9d..93cd34f00822 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -375,19 +375,20 @@ const struct dev_pm_ops name = { \ } #ifdef CONFIG_PM -#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ - runtime_resume_fn, idle_fn, sec, ns) \ - _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ - runtime_resume_fn, idle_fn); \ - __EXPORT_SYMBOL(name, sec, ns) +#define _EXPORT_DEV_PM_OPS(name, sec, ns) \ + const struct dev_pm_ops name; \ + __EXPORT_SYMBOL(name, sec, ns); \ + const struct dev_pm_ops name #else -#define _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, runtime_suspend_fn, \ - runtime_resume_fn, idle_fn, sec, ns) \ -static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \ - resume_fn, runtime_suspend_fn, \ - runtime_resume_fn, idle_fn) +#define _EXPORT_DEV_PM_OPS(name, sec, ns) \ + static __maybe_unused const struct dev_pm_ops __static_##name #endif +#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "") +#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "_gpl", "") +#define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns) +#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "_gpl", #ns) + /* * Use this if you want to use the same suspend and resume callbacks for suspend * to RAM and hibernation. @@ -399,13 +400,21 @@ static __maybe_unused _DEFINE_DEV_PM_OPS(__static_##name, suspend_fn, \ _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL) #define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", "") + EXPORT_DEV_PM_OPS(name) = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + } #define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", "") + EXPORT_GPL_DEV_PM_OPS(name) = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + } #define EXPORT_NS_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ - _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "", #ns) + EXPORT_NS_DEV_PM_OPS(name, ns) = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + } #define EXPORT_NS_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ - _EXPORT_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL, "_gpl", #ns) + EXPORT_NS_GPL_DEV_PM_OPS(name, ns) = { \ + SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + } /* Deprecated. Use DEFINE_SIMPLE_DEV_PM_OPS() instead. */ #define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 0a41b2dcccad..9a8151a2bdea 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -40,17 +40,21 @@ resume_fn, idle_fn) #define EXPORT_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ - _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ - suspend_fn, resume_fn, idle_fn, "", "") + EXPORT_DEV_PM_OPS(name) = { \ + RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ + } #define EXPORT_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ - _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ - suspend_fn, resume_fn, idle_fn, "_gpl", "") + EXPORT_GPL_DEV_PM_OPS(name) = { \ + RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ + } #define EXPORT_NS_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \ - _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ - suspend_fn, resume_fn, idle_fn, "", #ns) + EXPORT_NS_DEV_PM_OPS(name, ns) = { \ + RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ + } #define EXPORT_NS_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \ - _EXPORT_DEV_PM_OPS(name, pm_runtime_force_suspend, pm_runtime_force_resume, \ - suspend_fn, resume_fn, idle_fn, "_gpl", #ns) + EXPORT_NS_GPL_DEV_PM_OPS(name, ns) = { \ + RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ + } #ifdef CONFIG_PM extern struct workqueue_struct *pm_wq; diff --git a/include/linux/poison.h b/include/linux/poison.h index d62ef5a6b4e9..2d3249eb0e62 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -81,4 +81,7 @@ /********** net/core/page_pool.c **********/ #define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA) +/********** kernel/bpf/ **********/ +#define BPF_PTR_POISON ((void *)(0xeB9FUL + POISON_POINTER_DELTA)) + #endif diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index b6bd3eac2bcc..8163dd48c430 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -38,9 +38,6 @@ void posix_acl_fix_xattr_to_user(void *value, size_t size); void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, const struct inode *inode, void *value, size_t size); -void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, - const struct inode *inode, - void *value, size_t size); #else static inline void posix_acl_fix_xattr_from_user(void *value, size_t size) { @@ -54,18 +51,15 @@ posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, size_t size) { } -static inline void -posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, - const struct inode *inode, void *value, - size_t size) -{ -} #endif struct posix_acl *posix_acl_from_xattr(struct user_namespace *user_ns, const void *value, size_t size); int posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, void *buffer, size_t size); +struct posix_acl *vfs_set_acl_prepare(struct user_namespace *mnt_userns, + struct user_namespace *fs_userns, + const void *value, size_t size); extern const struct xattr_handler posix_acl_access_xattr_handler; extern const struct xattr_handler posix_acl_default_xattr_handler; diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index cb380c1d9459..aa2c4a7c4826 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -374,9 +374,37 @@ struct power_supply_vbat_ri_table { * These timers should be chosen to align with the typical discharge curve * for the battery. * - * When the main CC/CV charging is complete the battery can optionally be - * maintenance charged at the voltages from this table: a table of settings is - * traversed using a slightly lower current and voltage than what is used for + * Ordinary CC/CV charging will stop charging when the charge current goes + * below charge_term_current_ua, and then restart it (if the device is still + * plugged into the charger) at charge_restart_voltage_uv. This happens in most + * consumer products because the power usage while connected to a charger is + * not zero, and devices are not manufactured to draw power directly from the + * charger: instead they will at all times dissipate the battery a little, like + * the power used in standby mode. This will over time give a charge graph + * such as this: + * + * Energy + * ^ ... ... ... ... ... ... ... + * | . . . . . . . . . . . . . + * | .. . .. . .. . .. . .. . .. . .. + * |. .. .. .. .. .. .. + * +-------------------------------------------------------------------> t + * + * Practically this means that the Li-ions are wandering back and forth in the + * battery and this causes degeneration of the battery anode and cathode. + * To prolong the life of the battery, maintenance charging is applied after + * reaching charge_term_current_ua to hold up the charge in the battery while + * consuming power, thus lowering the wear on the battery: + * + * Energy + * ^ ....................................... + * | . ...................... + * | .. + * |. + * +-------------------------------------------------------------------> t + * + * Maintenance charging uses the voltages from this table: a table of settings + * is traversed using a slightly lower current and voltage than what is used for * CC/CV charging. The maintenance charging will for safety reasons not go on * indefinately: we lower the current and voltage with successive maintenance * settings, then disable charging completely after we reach the last one, @@ -385,14 +413,22 @@ struct power_supply_vbat_ri_table { * ordinary CC/CV charging from there. * * As an example, a Samsung EB425161LA Lithium-Ion battery is CC/CV charged - * at 900mA to 4340mV, then maintenance charged at 600mA and 4150mV for - * 60 hours, then maintenance charged at 600mA and 4100mV for 200 hours. + * at 900mA to 4340mV, then maintenance charged at 600mA and 4150mV for up to + * 60 hours, then maintenance charged at 600mA and 4100mV for up to 200 hours. * After this the charge cycle is restarted waiting for * charge_restart_voltage_uv. * * For most mobile electronics this type of maintenance charging is enough for * the user to disconnect the device and make use of it before both maintenance - * charging cycles are complete. + * charging cycles are complete, if the current and voltage has been chosen + * appropriately. These need to be determined from battery discharge curves + * and expected standby current. + * + * If the voltage anyway drops to charge_restart_voltage_uv during maintenance + * charging, ordinary CC/CV charging is restarted. This can happen if the + * device is e.g. actively used during charging, so more current is drawn than + * the expected stand-by current. Also overvoltage protection will be applied + * as usual. */ struct power_supply_maintenance_charge_table { int charge_current_max_ua; diff --git a/include/linux/prandom.h b/include/linux/prandom.h index deace5fb4e62..e0a0759dd09c 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -12,16 +12,6 @@ #include <linux/percpu.h> #include <linux/random.h> -static inline u32 prandom_u32(void) -{ - return get_random_u32(); -} - -static inline void prandom_bytes(void *buf, size_t nbytes) -{ - return get_random_bytes(buf, nbytes); -} - struct rnd_state { __u32 s1, s2, s3, s4; }; @@ -37,17 +27,20 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) * @ep_ro: right open interval endpoint * - * Returns a pseudo-random number that is in interval [0, ep_ro). Note - * that the result depends on PRNG being well distributed in [0, ~0U] - * u32 space. Here we use maximally equidistributed combined Tausworthe - * generator, that is, prandom_u32(). This is useful when requesting a - * random index of an array containing ep_ro elements, for example. + * Returns a pseudo-random number that is in interval [0, ep_ro). This is + * useful when requesting a random index of an array containing ep_ro elements, + * for example. The result is somewhat biased when ep_ro is not a power of 2, + * so do not use this for cryptographic purposes. * * Returns: pseudo-random number in interval [0, ep_ro) */ static inline u32 prandom_u32_max(u32 ep_ro) { - return (u32)(((u64) prandom_u32() * ep_ro) >> 32); + if (__builtin_constant_p(ep_ro <= 1U << 8) && ep_ro <= 1U << 8) + return (get_random_u8() * ep_ro) >> 8; + if (__builtin_constant_p(ep_ro <= 1U << 16) && ep_ro <= 1U << 16) + return (get_random_u16() * ep_ro) >> 16; + return ((u64)get_random_u32() * ep_ro) >> 32; } /* diff --git a/include/linux/preempt.h b/include/linux/preempt.h index b4381f255a5c..0df425bf9bd7 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -421,4 +421,46 @@ static inline void migrate_enable(void) { } #endif /* CONFIG_SMP */ +/** + * preempt_disable_nested - Disable preemption inside a normally preempt disabled section + * + * Use for code which requires preemption protection inside a critical + * section which has preemption disabled implicitly on non-PREEMPT_RT + * enabled kernels, by e.g.: + * - holding a spinlock/rwlock + * - soft interrupt context + * - regular interrupt handlers + * + * On PREEMPT_RT enabled kernels spinlock/rwlock held sections, soft + * interrupt context and regular interrupt handlers are preemptible and + * only prevent migration. preempt_disable_nested() ensures that preemption + * is disabled for cases which require CPU local serialization even on + * PREEMPT_RT. For non-PREEMPT_RT kernels this is a NOP. + * + * The use cases are code sequences which are not serialized by a + * particular lock instance, e.g.: + * - seqcount write side critical sections where the seqcount is not + * associated to a particular lock and therefore the automatic + * protection mechanism does not work. This prevents a live lock + * against a preempting high priority reader. + * - RMW per CPU variable updates like vmstat. + */ +/* Macro to avoid header recursion hell vs. lockdep */ +#define preempt_disable_nested() \ +do { \ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) \ + preempt_disable(); \ + else \ + lockdep_assert_preemption_disabled(); \ +} while (0) + +/** + * preempt_enable_nested - Undo the effect of preempt_disable_nested() + */ +static __always_inline void preempt_enable_nested(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); +} + #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/printk.h b/include/linux/printk.h index cf7d666ab1f8..8c81806c2e99 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -169,8 +169,6 @@ extern void __printk_safe_exit(void); #define printk_deferred_enter __printk_safe_enter #define printk_deferred_exit __printk_safe_exit -extern bool pr_flush(int timeout_ms, bool reset_on_progress); - /* * Please don't use printk_ratelimit(), because it shares ratelimiting state * with all other unrelated printk_ratelimit() callsites. Instead use @@ -221,11 +219,6 @@ static inline void printk_deferred_exit(void) { } -static inline bool pr_flush(int timeout_ms, bool reset_on_progress) -{ - return true; -} - static inline int printk_ratelimit(void) { return 0; diff --git a/include/linux/property.h b/include/linux/property.h index a5b429d623f6..117cc200c656 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -32,7 +32,7 @@ enum dev_dma_attr { DEV_DMA_COHERENT, }; -struct fwnode_handle *dev_fwnode(struct device *dev); +struct fwnode_handle *dev_fwnode(const struct device *dev); bool device_property_present(struct device *dev, const char *propname); int device_property_read_u8_array(struct device *dev, const char *propname, @@ -387,7 +387,7 @@ bool device_dma_supported(struct device *dev); enum dev_dma_attr device_get_dma_attr(struct device *dev); -const void *device_get_match_data(struct device *dev); +const void *device_get_match_data(const struct device *dev); int device_get_phy_mode(struct device *dev); int fwnode_get_phy_mode(struct fwnode_handle *fwnode); diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h new file mode 100644 index 000000000000..fb724c65c77b --- /dev/null +++ b/include/linux/pse-pd/pse.h @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* +// Copyright (c) 2022 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> + */ +#ifndef _LINUX_PSE_CONTROLLER_H +#define _LINUX_PSE_CONTROLLER_H + +#include <linux/ethtool.h> +#include <linux/list.h> +#include <uapi/linux/ethtool.h> + +struct phy_device; +struct pse_controller_dev; + +/** + * struct pse_control_config - PSE control/channel configuration. + * + * @admin_cotrol: set PoDL PSE admin control as described in + * IEEE 802.3-2018 30.15.1.2.1 acPoDLPSEAdminControl + */ +struct pse_control_config { + enum ethtool_podl_pse_admin_state admin_cotrol; +}; + +/** + * struct pse_control_status - PSE control/channel status. + * + * @podl_admin_state: operational state of the PoDL PSE + * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState + * @podl_pw_status: power detection status of the PoDL PSE. + * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: + */ +struct pse_control_status { + enum ethtool_podl_pse_admin_state podl_admin_state; + enum ethtool_podl_pse_pw_d_status podl_pw_status; +}; + +/** + * struct pse_controller_ops - PSE controller driver callbacks + * + * @ethtool_get_status: get PSE control status for ethtool interface + * @ethtool_set_config: set PSE control configuration over ethtool interface + */ +struct pse_controller_ops { + int (*ethtool_get_status)(struct pse_controller_dev *pcdev, + unsigned long id, struct netlink_ext_ack *extack, + struct pse_control_status *status); + int (*ethtool_set_config)(struct pse_controller_dev *pcdev, + unsigned long id, struct netlink_ext_ack *extack, + const struct pse_control_config *config); +}; + +struct module; +struct device_node; +struct of_phandle_args; +struct pse_control; + +/** + * struct pse_controller_dev - PSE controller entity that might + * provide multiple PSE controls + * @ops: a pointer to device specific struct pse_controller_ops + * @owner: kernel module of the PSE controller driver + * @list: internal list of PSE controller devices + * @pse_control_head: head of internal list of requested PSE controls + * @dev: corresponding driver model device struct + * @of_pse_n_cells: number of cells in PSE line specifiers + * @of_xlate: translation function to translate from specifier as found in the + * device tree to id as given to the PSE control ops + * @nr_lines: number of PSE controls in this controller device + * @lock: Mutex for serialization access to the PSE controller + */ +struct pse_controller_dev { + const struct pse_controller_ops *ops; + struct module *owner; + struct list_head list; + struct list_head pse_control_head; + struct device *dev; + int of_pse_n_cells; + int (*of_xlate)(struct pse_controller_dev *pcdev, + const struct of_phandle_args *pse_spec); + unsigned int nr_lines; + struct mutex lock; +}; + +#if IS_ENABLED(CONFIG_PSE_CONTROLLER) +int pse_controller_register(struct pse_controller_dev *pcdev); +void pse_controller_unregister(struct pse_controller_dev *pcdev); +struct device; +int devm_pse_controller_register(struct device *dev, + struct pse_controller_dev *pcdev); + +struct pse_control *of_pse_control_get(struct device_node *node); +void pse_control_put(struct pse_control *psec); + +int pse_ethtool_get_status(struct pse_control *psec, + struct netlink_ext_ack *extack, + struct pse_control_status *status); +int pse_ethtool_set_config(struct pse_control *psec, + struct netlink_ext_ack *extack, + const struct pse_control_config *config); + +#else + +static inline struct pse_control *of_pse_control_get(struct device_node *node) +{ + return ERR_PTR(-ENOENT); +} + +static inline void pse_control_put(struct pse_control *psec) +{ +} + +static inline int pse_ethtool_get_status(struct pse_control *psec, + struct netlink_ext_ack *extack, + struct pse_control_status *status) +{ + return -ENOTSUPP; +} + +static inline int pse_ethtool_set_config(struct pse_control *psec, + struct netlink_ext_ack *extack, + const struct pse_control_config *config) +{ + return -ENOTSUPP; +} + +#endif + +#endif diff --git a/include/linux/psi.h b/include/linux/psi.h index dd74411ac21d..b029a847def1 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -7,6 +7,7 @@ #include <linux/sched.h> #include <linux/poll.h> #include <linux/cgroup-defs.h> +#include <linux/cgroup.h> struct seq_file; struct css_set; @@ -18,10 +19,6 @@ extern struct psi_group psi_system; void psi_init(void); -void psi_task_change(struct task_struct *task, int clear, int set); -void psi_task_switch(struct task_struct *prev, struct task_struct *next, - bool sleep); - void psi_memstall_enter(unsigned long *flags); void psi_memstall_leave(unsigned long *flags); @@ -34,9 +31,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait); #ifdef CONFIG_CGROUPS +static inline struct psi_group *cgroup_psi(struct cgroup *cgrp) +{ + return cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi; +} + int psi_cgroup_alloc(struct cgroup *cgrp); void psi_cgroup_free(struct cgroup *cgrp); void cgroup_move_task(struct task_struct *p, struct css_set *to); +void psi_cgroup_restart(struct psi_group *group); #endif #else /* CONFIG_PSI */ @@ -58,6 +61,7 @@ static inline void cgroup_move_task(struct task_struct *p, struct css_set *to) { rcu_assign_pointer(p->cgroups, to); } +static inline void psi_cgroup_restart(struct psi_group *group) {} #endif #endif /* CONFIG_PSI */ diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index c7fe7c089718..6e4372735068 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -16,13 +16,6 @@ enum psi_task_count { NR_MEMSTALL, NR_RUNNING, /* - * This can't have values other than 0 or 1 and could be - * implemented as a bit flag. But for now we still have room - * in the first cacheline of psi_group_cpu, and this way we - * don't have to special case any state tracking for it. - */ - NR_ONCPU, - /* * For IO and CPU stalls the presence of running/oncpu tasks * in the domain means a partial rather than a full stall. * For memory it's not so simple because of page reclaimers: @@ -32,22 +25,27 @@ enum psi_task_count { * threads and memstall ones. */ NR_MEMSTALL_RUNNING, - NR_PSI_TASK_COUNTS = 5, + NR_PSI_TASK_COUNTS = 4, }; /* Task state bitmasks */ #define TSK_IOWAIT (1 << NR_IOWAIT) #define TSK_MEMSTALL (1 << NR_MEMSTALL) #define TSK_RUNNING (1 << NR_RUNNING) -#define TSK_ONCPU (1 << NR_ONCPU) #define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING) +/* Only one task can be scheduled, no corresponding task count */ +#define TSK_ONCPU (1 << NR_PSI_TASK_COUNTS) + /* Resources that workloads could be stalled on */ enum psi_res { PSI_IO, PSI_MEM, PSI_CPU, - NR_PSI_RESOURCES = 3, +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + PSI_IRQ, +#endif + NR_PSI_RESOURCES, }; /* @@ -63,11 +61,17 @@ enum psi_states { PSI_MEM_FULL, PSI_CPU_SOME, PSI_CPU_FULL, +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + PSI_IRQ_FULL, +#endif /* Only per-CPU, to weigh the CPU in the global average: */ PSI_NONIDLE, - NR_PSI_STATES = 7, + NR_PSI_STATES, }; +/* Use one bit in the state mask to track TSK_ONCPU */ +#define PSI_ONCPU (1 << NR_PSI_STATES) + enum psi_aggregators { PSI_AVGS = 0, PSI_POLL, @@ -147,6 +151,9 @@ struct psi_trigger { }; struct psi_group { + struct psi_group *parent; + bool enabled; + /* Protects data used by the aggregator */ struct mutex avgs_lock; @@ -188,6 +195,8 @@ struct psi_group { #else /* CONFIG_PSI */ +#define NR_PSI_RESOURCES 0 + struct psi_group { }; #endif /* CONFIG_PSI */ diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 9429930c5566..d70c6e5a839d 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -403,13 +403,9 @@ struct pwm_device *of_pwm_single_xlate(struct pwm_chip *pc, const struct of_phandle_args *args); struct pwm_device *pwm_get(struct device *dev, const char *con_id); -struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np, - const char *con_id); void pwm_put(struct pwm_device *pwm); struct pwm_device *devm_pwm_get(struct device *dev, const char *con_id); -struct pwm_device *devm_of_pwm_get(struct device *dev, struct device_node *np, - const char *con_id); struct pwm_device *devm_fwnode_pwm_get(struct device *dev, struct fwnode_handle *fwnode, const char *con_id); @@ -497,14 +493,6 @@ static inline struct pwm_device *pwm_get(struct device *dev, return ERR_PTR(-ENODEV); } -static inline struct pwm_device *of_pwm_get(struct device *dev, - struct device_node *np, - const char *con_id) -{ - might_sleep(); - return ERR_PTR(-ENODEV); -} - static inline void pwm_put(struct pwm_device *pwm) { might_sleep(); @@ -517,14 +505,6 @@ static inline struct pwm_device *devm_pwm_get(struct device *dev, return ERR_PTR(-ENODEV); } -static inline struct pwm_device *devm_of_pwm_get(struct device *dev, - struct device_node *np, - const char *con_id) -{ - might_sleep(); - return ERR_PTR(-ENODEV); -} - static inline struct pwm_device * devm_fwnode_pwm_get(struct device *dev, struct fwnode_handle *fwnode, const char *con_id) diff --git a/include/linux/random.h b/include/linux/random.h index 3fec206487f6..147a5e0d0b8e 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -38,12 +38,10 @@ static inline int unregister_random_vmfork_notifier(struct notifier_block *nb) { #endif void get_random_bytes(void *buf, size_t len); +u8 get_random_u8(void); +u16 get_random_u16(void); u32 get_random_u32(void); u64 get_random_u64(void); -static inline unsigned int get_random_int(void) -{ - return get_random_u32(); -} static inline unsigned long get_random_long(void) { #if BITS_PER_LONG == 64 @@ -72,7 +70,8 @@ static inline unsigned long get_random_canary(void) return get_random_long() & CANARY_MASK; } -int __init random_init(const char *command_line); +void __init random_init_early(const char *command_line); +void __init random_init(void); bool rng_is_initialized(void); int wait_for_random_bytes(void); @@ -93,9 +92,10 @@ static inline int get_random_bytes_wait(void *buf, size_t nbytes) *out = get_random_ ## name(); \ return 0; \ } +declare_get_random_var_wait(u8, u8) +declare_get_random_var_wait(u16, u16) declare_get_random_var_wait(u32, u32) declare_get_random_var_wait(u64, u32) -declare_get_random_var_wait(int, unsigned int) declare_get_random_var_wait(long, unsigned long) #undef declare_get_random_var diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f527f27e6438..08605ce7379d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -42,7 +42,31 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier_tasks(void); void rcu_barrier_tasks_rude(void); void synchronize_rcu(void); + +struct rcu_gp_oldstate; unsigned long get_completed_synchronize_rcu(void); +void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); + +// Maximum number of unsigned long values corresponding to +// not-yet-completed RCU grace periods. +#define NUM_ACTIVE_RCU_POLL_OLDSTATE 2 + +/** + * same_state_synchronize_rcu - Are two old-state values identical? + * @oldstate1: First old-state value. + * @oldstate2: Second old-state value. + * + * The two old-state values must have been obtained from either + * get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or + * get_completed_synchronize_rcu(). Returns @true if the two values are + * identical and @false otherwise. This allows structures whose lifetimes + * are tracked by old-state values to push these values to a list header, + * allowing those structures to be slightly smaller. + */ +static inline bool same_state_synchronize_rcu(unsigned long oldstate1, unsigned long oldstate2) +{ + return oldstate1 == oldstate2; +} #ifdef CONFIG_PREEMPT_RCU @@ -496,13 +520,21 @@ do { \ * against NULL. Although rcu_access_pointer() may also be used in cases * where update-side locks prevent the value of the pointer from changing, * you should instead use rcu_dereference_protected() for this use case. + * Within an RCU read-side critical section, there is little reason to + * use rcu_access_pointer(). + * + * It is usually best to test the rcu_access_pointer() return value + * directly in order to avoid accidental dereferences being introduced + * by later inattentive changes. In other words, assigning the + * rcu_access_pointer() return value to a local variable results in an + * accident waiting to happen. * * It is also permissible to use rcu_access_pointer() when read-side - * access to the pointer was removed at least one grace period ago, as - * is the case in the context of the RCU callback that is freeing up - * the data, or after a synchronize_rcu() returns. This can be useful - * when tearing down multi-linked structures after a grace period - * has elapsed. + * access to the pointer was removed at least one grace period ago, as is + * the case in the context of the RCU callback that is freeing up the data, + * or after a synchronize_rcu() returns. This can be useful when tearing + * down multi-linked structures after a grace period has elapsed. However, + * rcu_dereference_protected() is normally preferred for this use case. */ #define rcu_access_pointer(p) __rcu_access_pointer((p), __UNIQUE_ID(rcu), __rcu) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 62815c0a2dce..768196a5f39d 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -14,25 +14,75 @@ #include <asm/param.h> /* for HZ */ +struct rcu_gp_oldstate { + unsigned long rgos_norm; +}; + +// Maximum number of rcu_gp_oldstate values corresponding to +// not-yet-completed RCU grace periods. +#define NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE 2 + +/* + * Are the two oldstate values the same? See the Tree RCU version for + * docbook header. + */ +static inline bool same_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp1, + struct rcu_gp_oldstate *rgosp2) +{ + return rgosp1->rgos_norm == rgosp2->rgos_norm; +} + unsigned long get_state_synchronize_rcu(void); + +static inline void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = get_state_synchronize_rcu(); +} + unsigned long start_poll_synchronize_rcu(void); + +static inline void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = start_poll_synchronize_rcu(); +} + bool poll_state_synchronize_rcu(unsigned long oldstate); +static inline bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + return poll_state_synchronize_rcu(rgosp->rgos_norm); +} + static inline void cond_synchronize_rcu(unsigned long oldstate) { might_sleep(); } +static inline void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + cond_synchronize_rcu(rgosp->rgos_norm); +} + static inline unsigned long start_poll_synchronize_rcu_expedited(void) { return start_poll_synchronize_rcu(); } +static inline void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = start_poll_synchronize_rcu_expedited(); +} + static inline void cond_synchronize_rcu_expedited(unsigned long oldstate) { cond_synchronize_rcu(oldstate); } +static inline void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp) +{ + cond_synchronize_rcu_expedited(rgosp->rgos_norm); +} + extern void rcu_barrier(void); static inline void synchronize_rcu_expedited(void) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 47eaa4cb0df7..5efb51486e8a 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -40,12 +40,52 @@ bool rcu_eqs_special_set(int cpu); void rcu_momentary_dyntick_idle(void); void kfree_rcu_scheduler_running(void); bool rcu_gp_might_be_stalled(void); + +struct rcu_gp_oldstate { + unsigned long rgos_norm; + unsigned long rgos_exp; +}; + +// Maximum number of rcu_gp_oldstate values corresponding to +// not-yet-completed RCU grace periods. +#define NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE 4 + +/** + * same_state_synchronize_rcu_full - Are two old-state values identical? + * @rgosp1: First old-state value. + * @rgosp2: Second old-state value. + * + * The two old-state values must have been obtained from either + * get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), + * or get_completed_synchronize_rcu_full(). Returns @true if the two + * values are identical and @false otherwise. This allows structures + * whose lifetimes are tracked by old-state values to push these values + * to a list header, allowing those structures to be slightly smaller. + * + * Note that equality is judged on a bitwise basis, so that an + * @rcu_gp_oldstate structure with an already-completed state in one field + * will compare not-equal to a structure with an already-completed state + * in the other field. After all, the @rcu_gp_oldstate structure is opaque + * so how did such a situation come to pass in the first place? + */ +static inline bool same_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp1, + struct rcu_gp_oldstate *rgosp2) +{ + return rgosp1->rgos_norm == rgosp2->rgos_norm && rgosp1->rgos_exp == rgosp2->rgos_exp; +} + unsigned long start_poll_synchronize_rcu_expedited(void); +void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu_expedited(unsigned long oldstate); +void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp); unsigned long get_state_synchronize_rcu(void); +void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); unsigned long start_poll_synchronize_rcu(void); +void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); bool poll_state_synchronize_rcu(unsigned long oldstate); +bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu(unsigned long oldstate); +void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); bool rcu_is_idle_cpu(int cpu); diff --git a/include/linux/reboot.h b/include/linux/reboot.h index e5d9ef886179..2b6bb593be5b 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -106,6 +106,14 @@ enum sys_off_mode { SYS_OFF_MODE_POWER_OFF, /** + * @SYS_OFF_MODE_RESTART_PREPARE: + * + * Handlers prepare system to be restarted. Handlers are + * allowed to sleep. + */ + SYS_OFF_MODE_RESTART_PREPARE, + + /** * @SYS_OFF_MODE_RESTART: * * Handlers restart system. Handlers are disallowed to sleep. diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 7cf2157134ac..ca3434dca3a0 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -311,6 +311,8 @@ typedef void (*regmap_unlock)(void *); * This field is a duplicate of a similar file in * 'struct regmap_bus' and serves exact same purpose. * Use it only for "no-bus" cases. + * @io_port: Support IO port accessors. Makes sense only when MMIO vs. IO port + * access can be distinguished. * @max_register: Optional, specifies the maximum valid register address. * @wr_table: Optional, points to a struct regmap_access_table specifying * valid ranges for write access. @@ -399,6 +401,7 @@ struct regmap_config { size_t max_raw_write; bool fast_io; + bool io_port; unsigned int max_register; const struct regmap_access_table *wr_table; @@ -489,8 +492,12 @@ typedef int (*regmap_hw_read)(void *context, void *val_buf, size_t val_size); typedef int (*regmap_hw_reg_read)(void *context, unsigned int reg, unsigned int *val); +typedef int (*regmap_hw_reg_noinc_read)(void *context, unsigned int reg, + void *val, size_t val_count); typedef int (*regmap_hw_reg_write)(void *context, unsigned int reg, unsigned int val); +typedef int (*regmap_hw_reg_noinc_write)(void *context, unsigned int reg, + const void *val, size_t val_count); typedef int (*regmap_hw_reg_update_bits)(void *context, unsigned int reg, unsigned int mask, unsigned int val); typedef struct regmap_async *(*regmap_hw_async_alloc)(void); @@ -511,6 +518,8 @@ typedef void (*regmap_hw_free_context)(void *context); * must serialise with respect to non-async I/O. * @reg_write: Write a single register value to the given register address. This * write operation has to complete when returning from the function. + * @reg_write_noinc: Write multiple register value to the same register. This + * write operation has to complete when returning from the function. * @reg_update_bits: Update bits operation to be used against volatile * registers, intended for devices supporting some mechanism * for setting clearing bits without having to @@ -538,9 +547,11 @@ struct regmap_bus { regmap_hw_gather_write gather_write; regmap_hw_async_write async_write; regmap_hw_reg_write reg_write; + regmap_hw_reg_noinc_write reg_noinc_write; regmap_hw_reg_update_bits reg_update_bits; regmap_hw_read read; regmap_hw_reg_read reg_read; + regmap_hw_reg_noinc_read reg_noinc_read; regmap_hw_free_context free_context; regmap_hw_async_alloc async_alloc; u8 read_flag_mask; diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index bc6cda706d1f..ee3b4a014611 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -207,6 +207,8 @@ struct regulator *__must_check regulator_get_optional(struct device *dev, const char *id); struct regulator *__must_check devm_regulator_get_optional(struct device *dev, const char *id); +int devm_regulator_get_enable(struct device *dev, const char *id); +int devm_regulator_get_enable_optional(struct device *dev, const char *id); void regulator_put(struct regulator *regulator); void devm_regulator_put(struct regulator *regulator); @@ -244,12 +246,15 @@ int __must_check regulator_bulk_get(struct device *dev, int num_consumers, struct regulator_bulk_data *consumers); int __must_check devm_regulator_bulk_get(struct device *dev, int num_consumers, struct regulator_bulk_data *consumers); +void devm_regulator_bulk_put(struct regulator_bulk_data *consumers); int __must_check devm_regulator_bulk_get_const( struct device *dev, int num_consumers, const struct regulator_bulk_data *in_consumers, struct regulator_bulk_data **out_consumers); int __must_check regulator_bulk_enable(int num_consumers, struct regulator_bulk_data *consumers); +int devm_regulator_bulk_get_enable(struct device *dev, int num_consumers, + const char * const *id); int regulator_bulk_disable(int num_consumers, struct regulator_bulk_data *consumers); int regulator_bulk_force_disable(int num_consumers, @@ -354,6 +359,17 @@ devm_regulator_get_exclusive(struct device *dev, const char *id) return ERR_PTR(-ENODEV); } +static inline int devm_regulator_get_enable(struct device *dev, const char *id) +{ + return -ENODEV; +} + +static inline int devm_regulator_get_enable_optional(struct device *dev, + const char *id) +{ + return -ENODEV; +} + static inline struct regulator *__must_check regulator_get_optional(struct device *dev, const char *id) { @@ -375,6 +391,10 @@ static inline void devm_regulator_put(struct regulator *regulator) { } +static inline void devm_regulator_bulk_put(struct regulator_bulk_data *consumers) +{ +} + static inline int regulator_register_supply_alias(struct device *dev, const char *id, struct device *alias_dev, @@ -465,6 +485,13 @@ static inline int regulator_bulk_enable(int num_consumers, return 0; } +static inline int devm_regulator_bulk_get_enable(struct device *dev, + int num_consumers, + const char * const *id) +{ + return 0; +} + static inline int regulator_bulk_disable(int num_consumers, struct regulator_bulk_data *consumers) { diff --git a/include/linux/regulator/gpio-regulator.h b/include/linux/regulator/gpio-regulator.h index fdeb312cdabd..c223e50ff9f7 100644 --- a/include/linux/regulator/gpio-regulator.h +++ b/include/linux/regulator/gpio-regulator.h @@ -42,6 +42,7 @@ struct gpio_regulator_state { /** * struct gpio_regulator_config - config structure * @supply_name: Name of the regulator supply + * @input_supply: Name of the input regulator supply * @enabled_at_boot: Whether regulator has been enabled at * boot or not. 1 = Yes, 0 = No * This is used to keep the regulator at @@ -62,6 +63,7 @@ struct gpio_regulator_state { */ struct gpio_regulator_config { const char *supply_name; + const char *input_supply; unsigned enabled_at_boot:1; unsigned startup_delay; diff --git a/include/linux/regulator/mt6331-regulator.h b/include/linux/regulator/mt6331-regulator.h new file mode 100644 index 000000000000..2801a9879c14 --- /dev/null +++ b/include/linux/regulator/mt6331-regulator.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 Collabora Ltd. + * Author: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com> + */ + +#ifndef __LINUX_REGULATOR_MT6331_H +#define __LINUX_REGULATOR_MT6331_H + +enum { + /* BUCK */ + MT6331_ID_VDVFS11 = 0, + MT6331_ID_VDVFS12, + MT6331_ID_VDVFS13, + MT6331_ID_VDVFS14, + MT6331_ID_VCORE2, + MT6331_ID_VIO18, + /* LDO */ + MT6331_ID_VTCXO1, + MT6331_ID_VTCXO2, + MT6331_ID_AVDD32_AUD, + MT6331_ID_VAUXA32, + MT6331_ID_VCAMA, + MT6331_ID_VIO28, + MT6331_ID_VCAM_AF, + MT6331_ID_VMC, + MT6331_ID_VMCH, + MT6331_ID_VEMC33, + MT6331_ID_VGP1, + MT6331_ID_VSIM1, + MT6331_ID_VSIM2, + MT6331_ID_VMIPI, + MT6331_ID_VIBR, + MT6331_ID_VGP4, + MT6331_ID_VCAMD, + MT6331_ID_VUSB10, + MT6331_ID_VCAM_IO, + MT6331_ID_VSRAM_DVFS1, + MT6331_ID_VGP2, + MT6331_ID_VGP3, + MT6331_ID_VRTC, + MT6331_ID_VDIG18, + MT6331_ID_VREG_MAX +}; + +#endif /* __LINUX_REGULATOR_MT6331_H */ diff --git a/include/linux/regulator/mt6332-regulator.h b/include/linux/regulator/mt6332-regulator.h new file mode 100644 index 000000000000..af5e3ed31029 --- /dev/null +++ b/include/linux/regulator/mt6332-regulator.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 Collabora Ltd. + * Author: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com> + */ + +#ifndef __LINUX_REGULATOR_MT6332_H +#define __LINUX_REGULATOR_MT6332_H + +enum { + /* BUCK */ + MT6332_ID_VDRAM = 0, + MT6332_ID_VDVFS2, + MT6332_ID_VPA, + MT6332_ID_VRF1, + MT6332_ID_VRF2, + MT6332_ID_VSBST, + /* LDO */ + MT6332_ID_VAUXB32, + MT6332_ID_VBIF28, + MT6332_ID_VDIG18, + MT6332_ID_VSRAM_DVFS2, + MT6332_ID_VUSB33, + MT6332_ID_VREG_MAX +}; + +#endif /* __LINUX_REGULATOR_MT6332_H */ diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index aea79c77db0f..fe8978eb69f1 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -490,6 +490,20 @@ struct rproc_dump_segment { }; /** + * enum rproc_features - features supported + * + * @RPROC_FEAT_ATTACH_ON_RECOVERY: The remote processor does not need help + * from Linux to recover, such as firmware + * loading. Linux just needs to attach after + * recovery. + */ + +enum rproc_features { + RPROC_FEAT_ATTACH_ON_RECOVERY, + RPROC_MAX_FEATURES, +}; + +/** * struct rproc - represents a physical remote processor device * @node: list node of this rproc object * @domain: iommu domain @@ -530,6 +544,7 @@ struct rproc_dump_segment { * @elf_machine: firmware ELF machine * @cdev: character device of the rproc * @cdev_put_on_release: flag to indicate if remoteproc should be shutdown on @char_dev release + * @features: indicate remoteproc features */ struct rproc { struct list_head node; @@ -570,6 +585,7 @@ struct rproc { u16 elf_machine; struct cdev cdev; bool cdev_put_on_release; + DECLARE_BITMAP(features, RPROC_MAX_FEATURES); }; /** @@ -616,9 +632,8 @@ struct rproc_vring { /** * struct rproc_vdev - remoteproc state for a supported virtio device - * @refcount: reference counter for the vdev and vring allocations * @subdev: handle for registering the vdev as a rproc subdevice - * @dev: device struct used for reference count semantics + * @pdev: remoteproc virtio platform device * @id: virtio device id (as in virtio_ids.h) * @node: list node * @rproc: the rproc handle @@ -627,10 +642,9 @@ struct rproc_vring { * @index: vdev position versus other vdev declared in resource table */ struct rproc_vdev { - struct kref refcount; struct rproc_subdev subdev; - struct device dev; + struct platform_device *pdev; unsigned int id; struct list_head node; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 21deb5212bbd..0cf5b20c6ddf 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -15,6 +15,9 @@ int proc_resctrl_show(struct seq_file *m, #endif +/* max value for struct rdt_domain's mbps_val */ +#define MBA_MAX_MBPS U32_MAX + /** * enum resctrl_conf_type - The type of configuration. * @CDP_NONE: No prioritisation, both code and data are controlled or monitored. @@ -29,6 +32,16 @@ enum resctrl_conf_type { #define CDP_NUM_TYPES (CDP_DATA + 1) +/* + * Event IDs, the values match those used to program IA32_QM_EVTSEL before + * reading IA32_QM_CTR on RDT systems. + */ +enum resctrl_event_id { + QOS_L3_OCCUP_EVENT_ID = 0x01, + QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, + QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, +}; + /** * struct resctrl_staged_config - parsed configuration to be applied * @new_ctrl: new ctrl value to be loaded @@ -53,6 +66,9 @@ struct resctrl_staged_config { * @cqm_work_cpu: worker CPU for CQM h/w counters * @plr: pseudo-locked region (if any) associated with domain * @staged_config: parsed configuration to be applied + * @mbps_val: When mba_sc is enabled, this holds the array of user + * specified control values for mba_sc in MBps, indexed + * by closid */ struct rdt_domain { struct list_head list; @@ -67,6 +83,7 @@ struct rdt_domain { int cqm_work_cpu; struct pseudo_lock_region *plr; struct resctrl_staged_config staged_config[CDP_NUM_TYPES]; + u32 *mbps_val; }; /** @@ -130,8 +147,6 @@ struct resctrl_schema; /** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource - * @alloc_enabled: Is allocation enabled on this machine - * @mon_enabled: Is monitoring enabled for this feature * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine * @num_rmid: Number of RMIDs available @@ -150,8 +165,6 @@ struct resctrl_schema; */ struct rdt_resource { int rid; - bool alloc_enabled; - bool mon_enabled; bool alloc_capable; bool mon_capable; int num_rmid; @@ -194,7 +207,50 @@ struct resctrl_schema { /* The number of closid supported by this resource regardless of CDP */ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); + +/* + * Update the ctrl_val and apply this config right now. + * Must be called on one of the domain's CPUs. + */ +int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, + u32 closid, enum resctrl_conf_type t, u32 cfg_val); + u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type); +int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); + +/** + * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid + * for this resource and domain. + * @r: resource that the counter should be read from. + * @d: domain that the counter should be read from. + * @rmid: rmid of the counter to read. + * @eventid: eventid to read, e.g. L3 occupancy. + * @val: result of the counter read in bytes. + * + * Call from process context on a CPU that belongs to domain @d. + * + * Return: + * 0 on success, or -EIO, -EINVAL etc on error. + */ +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, + u32 rmid, enum resctrl_event_id eventid, u64 *val); + +/** + * resctrl_arch_reset_rmid() - Reset any private state associated with rmid + * and eventid. + * @r: The domain's resource. + * @d: The rmid's domain. + * @rmid: The rmid whose counter values should be reset. + * @eventid: The eventid whose counter values should be reset. + * + * This can be called from any CPU. + */ +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, + u32 rmid, enum resctrl_event_id eventid); + +extern unsigned int resctrl_rmid_realloc_threshold; +extern unsigned int resctrl_rmid_realloc_limit; #endif /* _RESCTRL_H */ diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index dac53fd3afea..2504df9a0453 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -101,7 +101,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table); - +void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu); #define RING_BUFFER_ALL_CPUS -1 diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b89b4b86951f..bd3504d11b15 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -166,7 +166,7 @@ static inline void anon_vma_merge(struct vm_area_struct *vma, unlink_anon_vmas(next); } -struct anon_vma *page_get_anon_vma(struct page *page); +struct anon_vma *folio_get_anon_vma(struct folio *folio); /* RMAP flags, currently only relevant for some anon rmap operations. */ typedef int __bitwise rmap_t; @@ -270,7 +270,7 @@ dup: * @page: the exclusive anonymous page to try marking possibly shared * * The caller needs to hold the PT lock and has to have the page table entry - * cleared/invalidated+flushed, to properly sync against GUP-fast. + * cleared/invalidated. * * This is similar to page_try_dup_anon_rmap(), however, not used during fork() * to duplicate a mapping, but instead to prepare for KSM or temporarily @@ -286,12 +286,68 @@ static inline int page_try_share_anon_rmap(struct page *page) { VM_BUG_ON_PAGE(!PageAnon(page) || !PageAnonExclusive(page), page); - /* See page_try_dup_anon_rmap(). */ - if (likely(!is_device_private_page(page) && - unlikely(page_maybe_dma_pinned(page)))) - return -EBUSY; + /* device private pages cannot get pinned via GUP. */ + if (unlikely(is_device_private_page(page))) { + ClearPageAnonExclusive(page); + return 0; + } + + /* + * We have to make sure that when we clear PageAnonExclusive, that + * the page is not pinned and that concurrent GUP-fast won't succeed in + * concurrently pinning the page. + * + * Conceptually, PageAnonExclusive clearing consists of: + * (A1) Clear PTE + * (A2) Check if the page is pinned; back off if so. + * (A3) Clear PageAnonExclusive + * (A4) Restore PTE (optional, but certainly not writable) + * + * When clearing PageAnonExclusive, we cannot possibly map the page + * writable again, because anon pages that may be shared must never + * be writable. So in any case, if the PTE was writable it cannot + * be writable anymore afterwards and there would be a PTE change. Only + * if the PTE wasn't writable, there might not be a PTE change. + * + * Conceptually, GUP-fast pinning of an anon page consists of: + * (B1) Read the PTE + * (B2) FOLL_WRITE: check if the PTE is not writable; back off if so. + * (B3) Pin the mapped page + * (B4) Check if the PTE changed by re-reading it; back off if so. + * (B5) If the original PTE is not writable, check if + * PageAnonExclusive is not set; back off if so. + * + * If the PTE was writable, we only have to make sure that GUP-fast + * observes a PTE change and properly backs off. + * + * If the PTE was not writable, we have to make sure that GUP-fast either + * detects a (temporary) PTE change or that PageAnonExclusive is cleared + * and properly backs off. + * + * Consequently, when clearing PageAnonExclusive(), we have to make + * sure that (A1), (A2)/(A3) and (A4) happen in the right memory + * order. In GUP-fast pinning code, we have to make sure that (B3),(B4) + * and (B5) happen in the right memory order. + * + * We assume that there might not be a memory barrier after + * clearing/invalidating the PTE (A1) and before restoring the PTE (A4), + * so we use explicit ones here. + */ + /* Paired with the memory barrier in try_grab_folio(). */ + if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + smp_mb(); + + if (unlikely(page_maybe_dma_pinned(page))) + return -EBUSY; ClearPageAnonExclusive(page); + + /* + * This is conceptually a smp_wmb() paired with the smp_rmb() in + * gup_must_unshare(). + */ + if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + smp_mb__after_atomic(); return 0; } @@ -405,13 +461,8 @@ struct rmap_walk_control { void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc); void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc); - -/* - * Called by memory-failure.c to kill processes. - */ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, struct rmap_walk_control *rwc); -void page_unlock_anon_vma_read(struct anon_vma *anon_vma); #else /* !CONFIG_MMU */ diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 8f416c5e929e..c0ef596f340b 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -1,7 +1,7 @@ #ifndef __LINUX_RWLOCK_H #define __LINUX_RWLOCK_H -#ifndef __LINUX_SPINLOCK_H +#ifndef __LINUX_INSIDE_SPINLOCK_H # error "please don't include this file directly" #endif diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 8f5a86e210b9..4d2d5205ab58 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -575,8 +575,9 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue * on a &struct sbitmap_queue. * @sbq: Bitmap queue to wake up. + * @nr: Number of bits cleared. */ -void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr); /** * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct diff --git a/include/linux/sched.h b/include/linux/sched.h index e7b2f8a5c711..ffb6eb55cd13 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -14,6 +14,7 @@ #include <linux/pid.h> #include <linux/sem.h> #include <linux/shm.h> +#include <linux/kmsan_types.h> #include <linux/mutex.h> #include <linux/plist.h> #include <linux/hrtimer.h> @@ -81,25 +82,34 @@ struct task_group; */ /* Used in tsk->state: */ -#define TASK_RUNNING 0x0000 -#define TASK_INTERRUPTIBLE 0x0001 -#define TASK_UNINTERRUPTIBLE 0x0002 -#define __TASK_STOPPED 0x0004 -#define __TASK_TRACED 0x0008 +#define TASK_RUNNING 0x00000000 +#define TASK_INTERRUPTIBLE 0x00000001 +#define TASK_UNINTERRUPTIBLE 0x00000002 +#define __TASK_STOPPED 0x00000004 +#define __TASK_TRACED 0x00000008 /* Used in tsk->exit_state: */ -#define EXIT_DEAD 0x0010 -#define EXIT_ZOMBIE 0x0020 +#define EXIT_DEAD 0x00000010 +#define EXIT_ZOMBIE 0x00000020 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) /* Used in tsk->state again: */ -#define TASK_PARKED 0x0040 -#define TASK_DEAD 0x0080 -#define TASK_WAKEKILL 0x0100 -#define TASK_WAKING 0x0200 -#define TASK_NOLOAD 0x0400 -#define TASK_NEW 0x0800 -/* RT specific auxilliary flag to mark RT lock waiters */ -#define TASK_RTLOCK_WAIT 0x1000 -#define TASK_STATE_MAX 0x2000 +#define TASK_PARKED 0x00000040 +#define TASK_DEAD 0x00000080 +#define TASK_WAKEKILL 0x00000100 +#define TASK_WAKING 0x00000200 +#define TASK_NOLOAD 0x00000400 +#define TASK_NEW 0x00000800 +#define TASK_RTLOCK_WAIT 0x00001000 +#define TASK_FREEZABLE 0x00002000 +#define __TASK_FREEZABLE_UNSAFE (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP)) +#define TASK_FROZEN 0x00008000 +#define TASK_STATE_MAX 0x00010000 + +#define TASK_ANY (TASK_STATE_MAX-1) + +/* + * DO NOT ADD ANY NEW USERS ! + */ +#define TASK_FREEZABLE_UNSAFE (TASK_FREEZABLE | __TASK_FREEZABLE_UNSAFE) /* Convenience macros for the sake of set_current_state: */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) @@ -860,9 +870,6 @@ struct task_struct { struct mm_struct *mm; struct mm_struct *active_mm; - /* Per-thread vma caching: */ - struct vmacache vmacache; - #ifdef SPLIT_RSS_COUNTING struct task_rss_stat rss_stat; #endif @@ -914,6 +921,10 @@ struct task_struct { #ifdef CONFIG_MEMCG unsigned in_user_fault:1; #endif +#ifdef CONFIG_LRU_GEN + /* whether the LRU algorithm may apply to this access */ + unsigned in_lru_fault:1; +#endif #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif @@ -936,7 +947,7 @@ struct task_struct { #endif #ifdef CONFIG_EVENTFD /* Recursion prevention for eventfd_signal() */ - unsigned in_eventfd_signal:1; + unsigned in_eventfd:1; #endif #ifdef CONFIG_IOMMU_SVA unsigned pasid_activated:1; @@ -944,6 +955,10 @@ struct task_struct { #ifdef CONFIG_CPU_SUP_INTEL unsigned reported_split_lock:1; #endif +#ifdef CONFIG_TASK_DELAY_ACCT + /* delay due to memory thrashing */ + unsigned in_thrashing:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ @@ -1355,6 +1370,10 @@ struct task_struct { #endif #endif +#ifdef CONFIG_KMSAN + struct kmsan_ctx kmsan_ctx; +#endif + #if IS_ENABLED(CONFIG_KUNIT) struct kunit *kunit_test; #endif @@ -1381,9 +1400,6 @@ struct task_struct { #endif #ifdef CONFIG_TRACING - /* State flags for use by tracers: */ - unsigned long trace; - /* Bitmask and counter of trace recursion: */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ @@ -1713,8 +1729,9 @@ extern struct pid *cad_pid; #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ +#define PF__HOLE__00004000 0x00004000 #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ -#define PF_FROZEN 0x00010000 /* Frozen for system suspend */ +#define PF__HOLE__00010000 0x00010000 #define PF_KSWAPD 0x00020000 /* I am kswapd */ #define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */ #define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */ @@ -1722,10 +1739,14 @@ extern struct pid *cad_pid; * I am cleaning dirty pages from some other bdi. */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ +#define PF__HOLE__00800000 0x00800000 +#define PF__HOLE__01000000 0x01000000 +#define PF__HOLE__02000000 0x02000000 #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */ -#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ +#define PF__HOLE__20000000 0x20000000 +#define PF__HOLE__40000000 0x40000000 #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ /* diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 4d0a5be28b70..8270ad7ae14c 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -71,9 +71,8 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ -#define MMF_OOM_VICTIM 25 /* mm is the oom victim */ -#define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ -#define MMF_MULTIPROCESS 27 /* mm is shared between processes */ +#define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */ +#define MMF_MULTIPROCESS 26 /* mm is shared between processes */ /* * MMF_HAS_PINNED: Whether this mm has pinned any pages. This can be either * replaced in the future by mm.pinned_vm when it becomes stable, or grow into @@ -81,7 +80,7 @@ static inline int get_dumpable(struct mm_struct *mm) * pinned pages were unpinned later on, we'll still keep this bit set for the * lifecycle of this mm, just for simplicity. */ -#define MMF_HAS_PINNED 28 /* FOLL_PIN has run, never cleared */ +#define MMF_HAS_PINNED 27 /* FOLL_PIN has run, never cleared */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index cafbe03eed01..20099268fa25 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -94,6 +94,7 @@ struct signal_struct { refcount_t sigcnt; atomic_t live; int nr_threads; + int quick_threads; struct list_head thread_head; wait_queue_head_t wait_chldexit; /* for wait4() */ diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index e650946816d0..303ee7dd0c7e 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -27,6 +27,7 @@ enum sched_tunable_scaling { #ifdef CONFIG_NUMA_BALANCING extern int sysctl_numa_balancing_mode; +extern unsigned int sysctl_numa_balancing_promote_rate_limit; #else #define sysctl_numa_balancing_mode 0 #endif diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 81cab4b01edc..d6c48163c6de 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -127,6 +127,9 @@ static inline void put_task_struct_many(struct task_struct *t, int nr) void put_task_struct_rcu_user(struct task_struct *task); +/* Free all architecture-specific resources held by a thread. */ +void release_thread(struct task_struct *dead_task); + #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT extern int arch_task_struct_size __read_mostly; #else diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index a193884ecf2b..4f765bc788ff 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -84,7 +84,7 @@ struct scmi_protocol_handle; struct scmi_clk_proto_ops { int (*count_get)(const struct scmi_protocol_handle *ph); - const struct scmi_clock_info *(*info_get) + const struct scmi_clock_info __must_check *(*info_get) (const struct scmi_protocol_handle *ph, u32 clk_id); int (*rate_get)(const struct scmi_protocol_handle *ph, u32 clk_id, u64 *rate); @@ -466,7 +466,7 @@ enum scmi_sensor_class { */ struct scmi_sensor_proto_ops { int (*count_get)(const struct scmi_protocol_handle *ph); - const struct scmi_sensor_info *(*info_get) + const struct scmi_sensor_info __must_check *(*info_get) (const struct scmi_protocol_handle *ph, u32 sensor_id); int (*trip_point_config)(const struct scmi_protocol_handle *ph, u32 sensor_id, u8 trip_id, u64 trip_value); diff --git a/include/linux/security.h b/include/linux/security.h index 7bd0c490703d..ca1b7109c0db 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -114,6 +114,7 @@ enum lockdown_reason { LOCKDOWN_IOPORT, LOCKDOWN_MSR, LOCKDOWN_ACPI_TABLES, + LOCKDOWN_DEVICE_TREE, LOCKDOWN_PCMCIA_CIS, LOCKDOWN_TIOCSSERIAL, LOCKDOWN_MODULE_PARAMETERS, @@ -122,6 +123,7 @@ enum lockdown_reason { LOCKDOWN_XMON_WR, LOCKDOWN_BPF_WRITE_USER, LOCKDOWN_DBG_WRITE_KERNEL, + LOCKDOWN_RTAS_ERROR_INJECTION, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES, @@ -437,6 +439,7 @@ int security_task_kill(struct task_struct *p, struct kernel_siginfo *info, int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); void security_task_to_inode(struct task_struct *p, struct inode *inode); +int security_create_user_ns(const struct cred *cred); int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag); void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid); int security_msg_msg_alloc(struct msg_msg *msg); @@ -461,7 +464,7 @@ int security_sem_semctl(struct kern_ipc_perm *sma, int cmd); int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops, unsigned nsops, int alter); void security_d_instantiate(struct dentry *dentry, struct inode *inode); -int security_getprocattr(struct task_struct *p, const char *lsm, char *name, +int security_getprocattr(struct task_struct *p, const char *lsm, const char *name, char **value); int security_setprocattr(const char *lsm, const char *name, void *value, size_t size); @@ -1194,6 +1197,11 @@ static inline int security_task_prctl(int option, unsigned long arg2, static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) { } +static inline int security_create_user_ns(const struct cred *cred) +{ + return 0; +} + static inline int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag) { @@ -1301,7 +1309,7 @@ static inline void security_d_instantiate(struct dentry *dentry, { } static inline int security_getprocattr(struct task_struct *p, const char *lsm, - char *name, char **value) + const char *name, char **value) { return -EINVAL; } diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index 1ac0d712a9c3..6f837bb6c715 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -43,6 +43,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_MBR_DONE: case IOC_OPAL_WRITE_SHADOW_MBR: case IOC_OPAL_GENERIC_TABLE_RW: + case IOC_OPAL_GET_STATUS: return true; } return false; diff --git a/include/linux/serdev.h b/include/linux/serdev.h index 3368c261ab62..66f624fc618c 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -7,6 +7,7 @@ #include <linux/types.h> #include <linux/device.h> +#include <linux/uaccess.h> #include <linux/termios.h> #include <linux/delay.h> diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 8c7b793aa4d7..19376bee9667 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -32,7 +32,7 @@ struct plat_serial8250_port { void (*serial_out)(struct uart_port *, int, int); void (*set_termios)(struct uart_port *, struct ktermios *new, - struct ktermios *old); + const struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); unsigned int (*get_mctrl)(struct uart_port *); @@ -74,6 +74,7 @@ struct uart_8250_port; struct uart_8250_ops { int (*setup_irq)(struct uart_8250_port *); void (*release_irq)(struct uart_8250_port *); + void (*setup_timer)(struct uart_8250_port *); }; struct uart_8250_em485 { @@ -157,7 +158,7 @@ extern int early_serial8250_setup(struct earlycon_device *device, extern void serial8250_update_uartclk(struct uart_port *port, unsigned int uartclk); extern void serial8250_do_set_termios(struct uart_port *port, - struct ktermios *termios, struct ktermios *old); + struct ktermios *termios, const struct ktermios *old); extern void serial8250_do_set_ldisc(struct uart_port *port, struct ktermios *termios); extern unsigned int serial8250_do_get_mctrl(struct uart_port *port); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 6e4f4765d209..d657f2a42a7b 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -387,7 +387,7 @@ struct uart_ops { void (*shutdown)(struct uart_port *); void (*flush_buffer)(struct uart_port *); void (*set_termios)(struct uart_port *, struct ktermios *new, - struct ktermios *old); + const struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); void (*pm)(struct uart_port *, unsigned int state, unsigned int oldstate); @@ -422,7 +422,7 @@ struct uart_icount { __u32 buf_overrun; }; -typedef unsigned int __bitwise upf_t; +typedef u64 __bitwise upf_t; typedef unsigned int __bitwise upstat_t; struct uart_port { @@ -433,7 +433,7 @@ struct uart_port { void (*serial_out)(struct uart_port *, int, int); void (*set_termios)(struct uart_port *, struct ktermios *new, - struct ktermios *old); + const struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); unsigned int (*get_mctrl)(struct uart_port *); @@ -513,23 +513,24 @@ struct uart_port { #define UPF_BUGGY_UART ((__force upf_t) ASYNC_BUGGY_UART /* 14 */ ) #define UPF_MAGIC_MULTIPLIER ((__force upf_t) ASYNC_MAGIC_MULTIPLIER /* 16 */ ) -#define UPF_NO_THRE_TEST ((__force upf_t) (1 << 19)) +#define UPF_NO_THRE_TEST ((__force upf_t) BIT_ULL(19)) /* Port has hardware-assisted h/w flow control */ -#define UPF_AUTO_CTS ((__force upf_t) (1 << 20)) -#define UPF_AUTO_RTS ((__force upf_t) (1 << 21)) +#define UPF_AUTO_CTS ((__force upf_t) BIT_ULL(20)) +#define UPF_AUTO_RTS ((__force upf_t) BIT_ULL(21)) #define UPF_HARD_FLOW ((__force upf_t) (UPF_AUTO_CTS | UPF_AUTO_RTS)) /* Port has hardware-assisted s/w flow control */ -#define UPF_SOFT_FLOW ((__force upf_t) (1 << 22)) -#define UPF_CONS_FLOW ((__force upf_t) (1 << 23)) -#define UPF_SHARE_IRQ ((__force upf_t) (1 << 24)) -#define UPF_EXAR_EFR ((__force upf_t) (1 << 25)) -#define UPF_BUG_THRE ((__force upf_t) (1 << 26)) +#define UPF_SOFT_FLOW ((__force upf_t) BIT_ULL(22)) +#define UPF_CONS_FLOW ((__force upf_t) BIT_ULL(23)) +#define UPF_SHARE_IRQ ((__force upf_t) BIT_ULL(24)) +#define UPF_EXAR_EFR ((__force upf_t) BIT_ULL(25)) +#define UPF_BUG_THRE ((__force upf_t) BIT_ULL(26)) /* The exact UART type is known and should not be probed. */ -#define UPF_FIXED_TYPE ((__force upf_t) (1 << 27)) -#define UPF_BOOT_AUTOCONF ((__force upf_t) (1 << 28)) -#define UPF_FIXED_PORT ((__force upf_t) (1 << 29)) -#define UPF_DEAD ((__force upf_t) (1 << 30)) -#define UPF_IOREMAP ((__force upf_t) (1 << 31)) +#define UPF_FIXED_TYPE ((__force upf_t) BIT_ULL(27)) +#define UPF_BOOT_AUTOCONF ((__force upf_t) BIT_ULL(28)) +#define UPF_FIXED_PORT ((__force upf_t) BIT_ULL(29)) +#define UPF_DEAD ((__force upf_t) BIT_ULL(30)) +#define UPF_IOREMAP ((__force upf_t) BIT_ULL(31)) +#define UPF_FULL_PROBE ((__force upf_t) BIT_ULL(32)) #define __UPF_CHANGE_MASK 0x17fff #define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK) @@ -624,6 +625,23 @@ struct uart_state { /* number of characters left in xmit buffer before we ask for more */ #define WAKEUP_CHARS 256 +/** + * uart_xmit_advance - Advance xmit buffer and account Tx'ed chars + * @up: uart_port structure describing the port + * @chars: number of characters sent + * + * This function advances the tail of circular xmit buffer by the number of + * @chars transmitted and handles accounting of transmitted bytes (into + * @up's icount.tx). + */ +static inline void uart_xmit_advance(struct uart_port *up, unsigned int chars) +{ + struct circ_buf *xmit = &up->state->xmit; + + xmit->tail = (xmit->tail + chars) & (UART_XMIT_SIZE - 1); + up->icount.tx += chars; +} + struct module; struct tty_driver; @@ -652,7 +670,7 @@ void uart_write_wakeup(struct uart_port *port); void uart_update_timeout(struct uart_port *port, unsigned int cflag, unsigned int baud); unsigned int uart_get_baud_rate(struct uart_port *port, struct ktermios *termios, - struct ktermios *old, unsigned int min, + const struct ktermios *old, unsigned int min, unsigned int max); unsigned int uart_get_divisor(struct uart_port *port, unsigned int baud); @@ -933,5 +951,4 @@ static inline int uart_handle_break(struct uart_port *port) !((cflag) & CLOCAL)) int uart_get_rs485_mode(struct uart_port *port); -int uart_rs485_config(struct uart_port *port); #endif /* LINUX_SERIAL_CORE_H */ diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 302094b855fb..d1f343853b6c 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -535,7 +535,7 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); bool sfp_may_have_phy(struct sfp_bus *bus, const struct sfp_eeprom_id *id); void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, - unsigned long *support); + unsigned long *support, unsigned long *interfaces); phy_interface_t sfp_select_interface(struct sfp_bus *bus, unsigned long *link_modes); @@ -568,7 +568,8 @@ static inline bool sfp_may_have_phy(struct sfp_bus *bus, static inline void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, - unsigned long *support) + unsigned long *support, + unsigned long *interfaces) { } diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index ff0b990de83d..d500ea967dc7 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -92,17 +92,19 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); -extern bool shmem_is_huge(struct vm_area_struct *vma, - struct inode *inode, pgoff_t index); -static inline bool shmem_huge_enabled(struct vm_area_struct *vma) +extern bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, + pgoff_t index, bool shmem_huge_force); +static inline bool shmem_huge_enabled(struct vm_area_struct *vma, + bool shmem_huge_force) { - return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff); + return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff, + shmem_huge_force); } extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, pgoff_t start, pgoff_t end); -/* Flag allocation requirements to shmem_getpage */ +/* Flag allocation requirements to shmem_get_folio */ enum sgp_type { SGP_READ, /* don't exceed i_size, don't allocate page */ SGP_NOALLOC, /* similar, but fail on hole or use fallocated page */ @@ -111,8 +113,8 @@ enum sgp_type { SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */ }; -extern int shmem_getpage(struct inode *inode, pgoff_t index, - struct page **pagep, enum sgp_type sgp); +int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop, + enum sgp_type sgp); static inline struct page *shmem_read_mapping_page( struct address_space *mapping, pgoff_t index) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 18e163a3460d..7be5bb4c94b6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -533,6 +533,13 @@ enum { struct ubuf_info { void (*callback)(struct sk_buff *, struct ubuf_info *, bool zerocopy_success); + refcount_t refcnt; + u8 flags; +}; + +struct ubuf_info_msgzc { + struct ubuf_info ubuf; + union { struct { unsigned long desc; @@ -545,8 +552,6 @@ struct ubuf_info { u32 bytelen; }; }; - refcount_t refcnt; - u8 flags; struct mmpin { struct user_struct *user; @@ -555,6 +560,8 @@ struct ubuf_info { }; #define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) +#define uarg_to_msgzc(ubuf_ptr) container_of((ubuf_ptr), struct ubuf_info_msgzc, \ + ubuf) int mm_account_pinned_pages(struct mmpin *mmp, size_t size); void mm_unaccount_pinned_pages(struct mmpin *mmp); @@ -796,6 +803,7 @@ typedef unsigned char *sk_buff_data_t; * @csum_level: indicates the number of consecutive checksums found in * the packet minus one that have been verified as * CHECKSUM_UNNECESSARY (max 3) + * @scm_io_uring: SKB holds io_uring registered files * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required @@ -975,6 +983,7 @@ struct sk_buff { #endif __u8 slow_gro:1; __u8 csum_not_inet:1; + __u8 scm_io_uring:1; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -1195,7 +1204,8 @@ static inline bool skb_unref(struct sk_buff *skb) return true; } -void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); +void __fix_address +kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); /** * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason @@ -1460,8 +1470,8 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, unsigned int key_count); struct bpf_flow_dissector; -bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, - __be16 proto, int nhoff, int hlen, unsigned int flags); +u32 bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, + __be16 proto, int nhoff, int hlen, unsigned int flags); bool __skb_flow_dissect(const struct net *net, const struct sk_buff *skb, @@ -2608,20 +2618,6 @@ void *skb_pull_data(struct sk_buff *skb, size_t len); void *__pskb_pull_tail(struct sk_buff *skb, int delta); -static inline void *__pskb_pull(struct sk_buff *skb, unsigned int len) -{ - if (len > skb_headlen(skb) && - !__pskb_pull_tail(skb, len - skb_headlen(skb))) - return NULL; - skb->len -= len; - return skb->data += len; -} - -static inline void *pskb_pull(struct sk_buff *skb, unsigned int len) -{ - return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len); -} - static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len) { if (likely(len <= skb_headlen(skb))) @@ -2631,6 +2627,15 @@ static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len) return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL; } +static inline void *pskb_pull(struct sk_buff *skb, unsigned int len) +{ + if (!pskb_may_pull(skb, len)) + return NULL; + + skb->len -= len; + return skb->data += len; +} + void skb_condense(struct sk_buff *skb); /** diff --git a/include/linux/slab.h b/include/linux/slab.h index 0fefdf528e0d..90877fcde70b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -29,6 +29,8 @@ #define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400U) /* DEBUG: Poison objects */ #define SLAB_POISON ((slab_flags_t __force)0x00000800U) +/* Indicate a kmalloc slab */ +#define SLAB_KMALLOC ((slab_flags_t __force)0x00001000U) /* Align objs on cache lines */ #define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U) /* Use GFP_DMA memory */ @@ -106,7 +108,7 @@ # define SLAB_ACCOUNT 0 #endif -#ifdef CONFIG_KASAN +#ifdef CONFIG_KASAN_GENERIC #define SLAB_KASAN ((slab_flags_t __force)0x08000000U) #else #define SLAB_KASAN 0 @@ -119,6 +121,12 @@ */ #define SLAB_NO_USER_FLAGS ((slab_flags_t __force)0x10000000U) +#ifdef CONFIG_KFENCE +#define SLAB_SKIP_KFENCE ((slab_flags_t __force)0x20000000U) +#else +#define SLAB_SKIP_KFENCE 0 +#endif + /* The following flags affect the page allocator grouping pages by mobility */ /* Objects are reclaimable */ #define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) @@ -184,11 +192,25 @@ int kmem_cache_shrink(struct kmem_cache *s); /* * Common kmalloc functions provided by all allocators */ -void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __alloc_size(2); +void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __realloc_size(2); void kfree(const void *objp); void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); + +/** + * ksize - Report actual allocation size of associated object + * + * @objp: Pointer returned from a prior kmalloc()-family allocation. + * + * This should not be used for writing beyond the originally requested + * allocation size. Either use krealloc() or round up the allocation size + * with kmalloc_size_roundup() prior to allocation. If this is used to + * access beyond the originally requested allocation size, UBSAN_BOUNDS + * and/or FORTIFY_SOURCE may trip, since they only know about the + * originally allocated size via the __alloc_size attribute. + */ size_t ksize(const void *objp); + #ifdef CONFIG_PRINTK bool kmem_valid_obj(void *object); void kmem_dump_obj(void *object); @@ -243,27 +265,17 @@ static inline unsigned int arch_slab_minalign(void) #ifdef CONFIG_SLAB /* - * The largest kmalloc size supported by the SLAB allocators is - * 32 megabyte (2^25) or the maximum allocatable page order if that is - * less than 32 MB. - * - * WARNING: Its not easy to increase this value since the allocators have - * to do various tricks to work around compiler limitations in order to - * ensure proper constant folding. + * SLAB and SLUB directly allocates requests fitting in to an order-1 page + * (PAGE_SIZE*2). Larger requests are passed to the page allocator. */ -#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \ - (MAX_ORDER + PAGE_SHIFT - 1) : 25) -#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH +#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) +#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 5 #endif #endif #ifdef CONFIG_SLUB -/* - * SLUB directly allocates requests fitting in to an order-1 page - * (PAGE_SIZE*2). Larger requests are passed to the page allocator. - */ #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) #define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) #ifndef KMALLOC_SHIFT_LOW @@ -415,10 +427,6 @@ static __always_inline unsigned int __kmalloc_index(size_t size, if (size <= 512 * 1024) return 19; if (size <= 1024 * 1024) return 20; if (size <= 2 * 1024 * 1024) return 21; - if (size <= 4 * 1024 * 1024) return 22; - if (size <= 8 * 1024 * 1024) return 23; - if (size <= 16 * 1024 * 1024) return 24; - if (size <= 32 * 1024 * 1024) return 25; if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant) BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()"); @@ -428,6 +436,7 @@ static __always_inline unsigned int __kmalloc_index(size_t size, /* Will never be reached. Needed because the compiler may complain */ return -1; } +static_assert(PAGE_SHIFT <= 20); #define kmalloc_index(s) __kmalloc_index(s, true) #endif /* !CONFIG_SLOB */ @@ -456,42 +465,22 @@ static __always_inline void kfree_bulk(size_t size, void **p) kmem_cache_free_bulk(NULL, size, p); } -#ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __alloc_size(1); void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; -#else -static __always_inline __alloc_size(1) void *__kmalloc_node(size_t size, gfp_t flags, int node) -{ - return __kmalloc(size, flags); -} - -static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) -{ - return kmem_cache_alloc(s, flags); -} -#endif #ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) - __assume_slab_alignment __alloc_size(3); - -#ifdef CONFIG_NUMA -extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) __assume_slab_alignment - __alloc_size(4); -#else -static __always_inline __alloc_size(4) void *kmem_cache_alloc_node_trace(struct kmem_cache *s, - gfp_t gfpflags, int node, size_t size) -{ - return kmem_cache_alloc_trace(s, gfpflags, size); -} -#endif /* CONFIG_NUMA */ +void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) + __assume_kmalloc_alignment __alloc_size(3); +void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) __assume_kmalloc_alignment + __alloc_size(4); #else /* CONFIG_TRACING */ -static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_cache *s, - gfp_t flags, size_t size) +/* Save a function call when CONFIG_TRACING=n */ +static __always_inline __alloc_size(3) +void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) { void *ret = kmem_cache_alloc(s, flags); @@ -499,8 +488,9 @@ static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_ return ret; } -static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) +static __always_inline __alloc_size(4) +void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) { void *ret = kmem_cache_alloc_node(s, gfpflags, node); @@ -509,25 +499,11 @@ static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, g } #endif /* CONFIG_TRACING */ -extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment - __alloc_size(1); +void *kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment + __alloc_size(1); -#ifdef CONFIG_TRACING -extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) - __assume_page_alignment __alloc_size(1); -#else -static __always_inline __alloc_size(1) void *kmalloc_order_trace(size_t size, gfp_t flags, - unsigned int order) -{ - return kmalloc_order(size, flags, order); -} -#endif - -static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t flags) -{ - unsigned int order = get_order(size); - return kmalloc_order_trace(size, flags, order); -} +void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_alignment + __alloc_size(1); /** * kmalloc - allocate memory @@ -597,7 +573,7 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) if (!index) return ZERO_SIZE_PTR; - return kmem_cache_alloc_trace( + return kmalloc_trace( kmalloc_caches[kmalloc_type(flags)][index], flags, size); #endif @@ -605,23 +581,35 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) return __kmalloc(size, flags); } +#ifndef CONFIG_SLOB static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) { -#ifndef CONFIG_SLOB - if (__builtin_constant_p(size) && - size <= KMALLOC_MAX_CACHE_SIZE) { - unsigned int i = kmalloc_index(size); + if (__builtin_constant_p(size)) { + unsigned int index; - if (!i) + if (size > KMALLOC_MAX_CACHE_SIZE) + return kmalloc_large_node(size, flags, node); + + index = kmalloc_index(size); + + if (!index) return ZERO_SIZE_PTR; - return kmem_cache_alloc_node_trace( - kmalloc_caches[kmalloc_type(flags)][i], - flags, node, size); + return kmalloc_node_trace( + kmalloc_caches[kmalloc_type(flags)][index], + flags, node, size); } -#endif return __kmalloc_node(size, flags, node); } +#else +static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) +{ + if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE) + return kmalloc_large_node(size, flags, node); + + return __kmalloc_node(size, flags, node); +} +#endif /** * kmalloc_array - allocate memory for an array. @@ -647,10 +635,10 @@ static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_ * @new_size: new size of a single member of the array * @flags: the type of memory to allocate (see kmalloc) */ -static inline __alloc_size(2, 3) void * __must_check krealloc_array(void *p, - size_t new_n, - size_t new_size, - gfp_t flags) +static inline __realloc_size(2, 3) void * __must_check krealloc_array(void *p, + size_t new_n, + size_t new_size, + gfp_t flags) { size_t bytes; @@ -671,6 +659,12 @@ static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flag return kmalloc_array(n, size, flags | __GFP_ZERO); } +void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, + unsigned long caller) __alloc_size(1); +#define kmalloc_node_track_caller(size, flags, node) \ + __kmalloc_node_track_caller(size, flags, node, \ + _RET_IP_) + /* * kmalloc_track_caller is a special version of kmalloc that records the * calling function of the routine calling it for slab leak tracking instead @@ -679,9 +673,9 @@ static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flag * allocator where we care about the real place the memory allocation * request comes from. */ -extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller); #define kmalloc_track_caller(size, flags) \ - __kmalloc_track_caller(size, flags, _RET_IP_) + __kmalloc_node_track_caller(size, flags, \ + NUMA_NO_NODE, _RET_IP_) static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, gfp_t flags, int node) @@ -700,21 +694,6 @@ static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t return kmalloc_array_node(n, size, flags | __GFP_ZERO, node); } - -#ifdef CONFIG_NUMA -extern void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, - unsigned long caller) __alloc_size(1); -#define kmalloc_node_track_caller(size, flags, node) \ - __kmalloc_node_track_caller(size, flags, node, \ - _RET_IP_) - -#else /* CONFIG_NUMA */ - -#define kmalloc_node_track_caller(size, flags, node) \ - kmalloc_track_caller(size, flags) - -#endif /* CONFIG_NUMA */ - /* * Shortcuts */ @@ -774,11 +753,28 @@ static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t fla } extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) - __alloc_size(3); + __realloc_size(3); extern void kvfree(const void *addr); extern void kvfree_sensitive(const void *addr, size_t len); unsigned int kmem_cache_size(struct kmem_cache *s); + +/** + * kmalloc_size_roundup - Report allocation bucket size for the given size + * + * @size: Number of bytes to round up from. + * + * This returns the number of bytes that would be available in a kmalloc() + * allocation of @size bytes. For example, a 126 byte request would be + * rounded up to the next sized kmalloc bucket, 128 bytes. (This is strictly + * for the general-purpose kmalloc()-based allocations, and is not for the + * pre-sized kmem_cache_alloc()-based allocations.) + * + * Use this to kmalloc() the full bucket size ahead of time instead of using + * ksize() to query the size after an allocation. + */ +size_t kmalloc_size_roundup(size_t size); + void __init kmem_cache_init_late(void); #if defined(CONFIG_SMP) && defined(CONFIG_SLAB) diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index e24c9aff6fed..f0ffad6a3365 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -33,7 +33,6 @@ struct kmem_cache { size_t colour; /* cache colouring range */ unsigned int colour_off; /* colour offset */ - struct kmem_cache *freelist_cache; unsigned int freelist_size; /* constructor func */ diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h index 88eb832eac7b..c9cabb679cd1 100644 --- a/include/linux/soc/apple/rtkit.h +++ b/include/linux/soc/apple/rtkit.h @@ -152,4 +152,16 @@ int apple_rtkit_send_message(struct apple_rtkit *rtk, u8 ep, u64 message, int apple_rtkit_send_message_wait(struct apple_rtkit *rtk, u8 ep, u64 message, unsigned long timeout, bool atomic); +/* + * Process incoming messages in atomic context. + * This only guarantees that messages arrive as far as the recv_message_early + * callback; drivers expecting to handle incoming messages synchronously + * by calling this function must do it that way. + * Will return 1 if some data was processed, 0 if none was, or a + * negative error code on failure. + * + * @rtk: RTKit reference + */ +int apple_rtkit_poll(struct apple_rtkit *rtk); + #endif /* _LINUX_APPLE_RTKIT_H_ */ diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h index 59117d970daf..d2b02bb43768 100644 --- a/include/linux/soc/mediatek/mtk-mmsys.h +++ b/include/linux/soc/mediatek/mtk-mmsys.h @@ -65,4 +65,6 @@ void mtk_mmsys_ddp_disconnect(struct device *dev, enum mtk_ddp_comp_id cur, enum mtk_ddp_comp_id next); +void mtk_mmsys_ddp_dpi_fmt_config(struct device *dev, u32 val); + #endif /* __MTK_MMSYS_H */ diff --git a/include/linux/soc/mediatek/mtk-mutex.h b/include/linux/soc/mediatek/mtk-mutex.h index a0f4f51a3b45..b335c2837cd8 100644 --- a/include/linux/soc/mediatek/mtk-mutex.h +++ b/include/linux/soc/mediatek/mtk-mutex.h @@ -20,6 +20,8 @@ enum mtk_mutex_mod_index { MUTEX_MOD_IDX_MDP_WDMA, MUTEX_MOD_IDX_MDP_AAL0, MUTEX_MOD_IDX_MDP_CCORR0, + MUTEX_MOD_IDX_MDP_HDR0, + MUTEX_MOD_IDX_MDP_COLOR0, MUTEX_MOD_IDX_MAX /* ALWAYS keep at the end */ }; diff --git a/include/linux/soc/mediatek/mtk_sip_svc.h b/include/linux/soc/mediatek/mtk_sip_svc.h index 082398e0cfb1..0761128b4354 100644 --- a/include/linux/soc/mediatek/mtk_sip_svc.h +++ b/include/linux/soc/mediatek/mtk_sip_svc.h @@ -22,4 +22,7 @@ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, MTK_SIP_SMC_CONVENTION, \ ARM_SMCCC_OWNER_SIP, fn_id) +/* IOMMU related SMC call */ +#define MTK_SIP_KERNEL_IOMMU_CONTROL MTK_SIP_SMC_CMD(0x514) + #endif diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index 7e00cca06709..4450c8b7a1cb 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -11,9 +11,15 @@ struct mtk_wed_hw; struct mtk_wdma_desc; +enum mtk_wed_bus_tye { + MTK_WED_BUS_PCIE, + MTK_WED_BUS_AXI, +}; + struct mtk_wed_ring { struct mtk_wdma_desc *desc; dma_addr_t desc_phys; + u32 desc_size; int size; u32 reg_base; @@ -42,13 +48,24 @@ struct mtk_wed_device { /* filled by driver: */ struct { - struct pci_dev *pci_dev; + union { + struct platform_device *platform_dev; + struct pci_dev *pci_dev; + }; + enum mtk_wed_bus_tye bus_type; u32 wpdma_phys; + u32 wpdma_int; + u32 wpdma_mask; + u32 wpdma_tx; + u32 wpdma_txfree; u16 token_start; unsigned int nbuf; + u8 tx_tbit[MTK_WED_TX_QUEUES]; + u8 txfree_tbit; + u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id); int (*offload_enable)(struct mtk_wed_device *wed); void (*offload_disable)(struct mtk_wed_device *wed); diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 9ed5384c5ca1..bc2fb8343a94 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -78,11 +78,40 @@ struct llcc_edac_reg_data { u8 ways_shift; }; +struct llcc_edac_reg_offset { + /* LLCC TRP registers */ + u32 trp_ecc_error_status0; + u32 trp_ecc_error_status1; + u32 trp_ecc_sb_err_syn0; + u32 trp_ecc_db_err_syn0; + u32 trp_ecc_error_cntr_clear; + u32 trp_interrupt_0_status; + u32 trp_interrupt_0_clear; + u32 trp_interrupt_0_enable; + + /* LLCC Common registers */ + u32 cmn_status0; + u32 cmn_interrupt_0_enable; + u32 cmn_interrupt_2_enable; + + /* LLCC DRP registers */ + u32 drp_ecc_error_cfg; + u32 drp_ecc_error_cntr_clear; + u32 drp_interrupt_status; + u32 drp_interrupt_clear; + u32 drp_interrupt_enable; + u32 drp_ecc_error_status0; + u32 drp_ecc_error_status1; + u32 drp_ecc_sb_err_syn0; + u32 drp_ecc_db_err_syn0; +}; + /** * struct llcc_drv_data - Data associated with the llcc driver * @regmap: regmap associated with the llcc device * @bcast_regmap: regmap associated with llcc broadcast offset * @cfg: pointer to the data structure for slice configuration + * @edac_reg_offset: Offset of the LLCC EDAC registers * @lock: mutex associated with each slice * @cfg_size: size of the config data table * @max_slices: max slices as read from device tree @@ -96,6 +125,7 @@ struct llcc_drv_data { struct regmap *regmap; struct regmap *bcast_regmap; const struct llcc_slice_config *cfg; + const struct llcc_edac_reg_offset *edac_reg_offset; struct mutex lock; u32 cfg_size; u32 max_slices; diff --git a/include/linux/soc/qcom/qmi.h b/include/linux/soc/qcom/qmi.h index b1f80e756d2a..469e02d2aa0d 100644 --- a/include/linux/soc/qcom/qmi.h +++ b/include/linux/soc/qcom/qmi.h @@ -75,7 +75,7 @@ struct qmi_elem_info { enum qmi_array_type array_type; u8 tlv_type; u32 offset; - struct qmi_elem_info *ei_array; + const struct qmi_elem_info *ei_array; }; #define QMI_RESULT_SUCCESS_V01 0 @@ -102,7 +102,7 @@ struct qmi_response_type_v01 { u16 error; }; -extern struct qmi_elem_info qmi_response_type_v01_ei[]; +extern const struct qmi_elem_info qmi_response_type_v01_ei[]; /** * struct qmi_service - context to track lookup-results @@ -173,7 +173,7 @@ struct qmi_txn { struct completion completion; int result; - struct qmi_elem_info *ei; + const struct qmi_elem_info *ei; void *dest; }; @@ -189,7 +189,7 @@ struct qmi_msg_handler { unsigned int type; unsigned int msg_id; - struct qmi_elem_info *ei; + const struct qmi_elem_info *ei; size_t decoded_size; void (*fn)(struct qmi_handle *qmi, struct sockaddr_qrtr *sq, @@ -249,23 +249,23 @@ void qmi_handle_release(struct qmi_handle *qmi); ssize_t qmi_send_request(struct qmi_handle *qmi, struct sockaddr_qrtr *sq, struct qmi_txn *txn, int msg_id, size_t len, - struct qmi_elem_info *ei, const void *c_struct); + const struct qmi_elem_info *ei, const void *c_struct); ssize_t qmi_send_response(struct qmi_handle *qmi, struct sockaddr_qrtr *sq, struct qmi_txn *txn, int msg_id, size_t len, - struct qmi_elem_info *ei, const void *c_struct); + const struct qmi_elem_info *ei, const void *c_struct); ssize_t qmi_send_indication(struct qmi_handle *qmi, struct sockaddr_qrtr *sq, - int msg_id, size_t len, struct qmi_elem_info *ei, + int msg_id, size_t len, const struct qmi_elem_info *ei, const void *c_struct); void *qmi_encode_message(int type, unsigned int msg_id, size_t *len, - unsigned int txn_id, struct qmi_elem_info *ei, + unsigned int txn_id, const struct qmi_elem_info *ei, const void *c_struct); int qmi_decode_message(const void *buf, size_t len, - struct qmi_elem_info *ei, void *c_struct); + const struct qmi_elem_info *ei, void *c_struct); int qmi_txn_init(struct qmi_handle *qmi, struct qmi_txn *txn, - struct qmi_elem_info *ei, void *c_struct); + const struct qmi_elem_info *ei, void *c_struct); int qmi_txn_wait(struct qmi_txn *txn, unsigned long timeout); void qmi_txn_cancel(struct qmi_txn *txn); diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h index 82c9d489833a..3ab8c07f71c0 100644 --- a/include/linux/soc/qcom/smd-rpm.h +++ b/include/linux/soc/qcom/smd-rpm.h @@ -41,6 +41,7 @@ struct qcom_smd_rpm; #define QCOM_SMD_RPM_HWKM_CLK 0x6d6b7768 #define QCOM_SMD_RPM_PKA_CLK 0x616b70 #define QCOM_SMD_RPM_MCFG_CLK 0x6766636d +#define QCOM_SMD_RPM_MMXI_CLK 0x69786d6d int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm, int state, diff --git a/include/linux/soc/sunxi/sunxi_sram.h b/include/linux/soc/sunxi/sunxi_sram.h index c5f663bba9c2..60e274d1b821 100644 --- a/include/linux/soc/sunxi/sunxi_sram.h +++ b/include/linux/soc/sunxi/sunxi_sram.h @@ -14,6 +14,6 @@ #define _SUNXI_SRAM_H_ int sunxi_sram_claim(struct device *dev); -int sunxi_sram_release(struct device *dev); +void sunxi_sram_release(struct device *dev); #endif /* _SUNXI_SRAM_H_ */ diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index d45902fb4cad..bae5e2369b4f 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -64,6 +64,11 @@ static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset, return 0; } +static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size) +{ + return copy_to_sockptr_offset(dst, 0, src, size); +} + static inline void *memdup_sockptr(sockptr_t src, size_t len) { void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN); diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 822599957b35..9e4537f409c2 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -892,6 +892,9 @@ struct sdw_master_ops { * meaningful if multi_link is set. If set to 1, hardware-based * synchronization will be used even if a stream only uses a single * SoundWire segment. + * @dev_num_ida_min: if set, defines the minimum values for the IDA + * used to allocate system-unique device numbers. This value needs to be + * identical across all SoundWire bus in the system. */ struct sdw_bus { struct device *dev; @@ -916,6 +919,7 @@ struct sdw_bus { u32 bank_switch_timeout; bool multi_link; int hw_sync_min_links; + int dev_num_ida_min; }; int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index ec16ae49e6a4..2e9fd91572d4 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -15,32 +15,21 @@ #define SDW_LINK_SIZE 0x10000 /* Intel SHIM Registers Definition */ +/* LCAP */ #define SDW_SHIM_LCAP 0x0 -#define SDW_SHIM_LCTL 0x4 -#define SDW_SHIM_IPPTR 0x8 -#define SDW_SHIM_SYNC 0xC - -#define SDW_SHIM_CTLSCAP(x) (0x010 + 0x60 * (x)) -#define SDW_SHIM_CTLS0CM(x) (0x012 + 0x60 * (x)) -#define SDW_SHIM_CTLS1CM(x) (0x014 + 0x60 * (x)) -#define SDW_SHIM_CTLS2CM(x) (0x016 + 0x60 * (x)) -#define SDW_SHIM_CTLS3CM(x) (0x018 + 0x60 * (x)) -#define SDW_SHIM_PCMSCAP(x) (0x020 + 0x60 * (x)) +#define SDW_SHIM_LCAP_LCOUNT_MASK GENMASK(2, 0) -#define SDW_SHIM_PCMSYCHM(x, y) (0x022 + (0x60 * (x)) + (0x2 * (y))) -#define SDW_SHIM_PCMSYCHC(x, y) (0x042 + (0x60 * (x)) + (0x2 * (y))) -#define SDW_SHIM_PDMSCAP(x) (0x062 + 0x60 * (x)) -#define SDW_SHIM_IOCTL(x) (0x06C + 0x60 * (x)) -#define SDW_SHIM_CTMCTL(x) (0x06E + 0x60 * (x)) - -#define SDW_SHIM_WAKEEN 0x190 -#define SDW_SHIM_WAKESTS 0x192 +/* LCTL */ +#define SDW_SHIM_LCTL 0x4 #define SDW_SHIM_LCTL_SPA BIT(0) #define SDW_SHIM_LCTL_SPA_MASK GENMASK(3, 0) #define SDW_SHIM_LCTL_CPA BIT(8) #define SDW_SHIM_LCTL_CPA_MASK GENMASK(11, 8) +/* SYNC */ +#define SDW_SHIM_SYNC 0xC + #define SDW_SHIM_SYNC_SYNCPRD_VAL_24 (24000 / SDW_CADENCE_GSYNC_KHZ - 1) #define SDW_SHIM_SYNC_SYNCPRD_VAL_38_4 (38400 / SDW_CADENCE_GSYNC_KHZ - 1) #define SDW_SHIM_SYNC_SYNCPRD GENMASK(14, 0) @@ -49,19 +38,33 @@ #define SDW_SHIM_SYNC_CMDSYNC BIT(16) #define SDW_SHIM_SYNC_SYNCGO BIT(24) +/* Control stream capabililities and channel mask */ +#define SDW_SHIM_CTLSCAP(x) (0x010 + 0x60 * (x)) +#define SDW_SHIM_CTLS0CM(x) (0x012 + 0x60 * (x)) +#define SDW_SHIM_CTLS1CM(x) (0x014 + 0x60 * (x)) +#define SDW_SHIM_CTLS2CM(x) (0x016 + 0x60 * (x)) +#define SDW_SHIM_CTLS3CM(x) (0x018 + 0x60 * (x)) + +/* PCM Stream capabilities */ +#define SDW_SHIM_PCMSCAP(x) (0x020 + 0x60 * (x)) + #define SDW_SHIM_PCMSCAP_ISS GENMASK(3, 0) #define SDW_SHIM_PCMSCAP_OSS GENMASK(7, 4) #define SDW_SHIM_PCMSCAP_BSS GENMASK(12, 8) +/* PCM Stream Channel Map */ +#define SDW_SHIM_PCMSYCHM(x, y) (0x022 + (0x60 * (x)) + (0x2 * (y))) + +/* PCM Stream Channel Count */ +#define SDW_SHIM_PCMSYCHC(x, y) (0x042 + (0x60 * (x)) + (0x2 * (y))) + #define SDW_SHIM_PCMSYCM_LCHN GENMASK(3, 0) #define SDW_SHIM_PCMSYCM_HCHN GENMASK(7, 4) #define SDW_SHIM_PCMSYCM_STREAM GENMASK(13, 8) #define SDW_SHIM_PCMSYCM_DIR BIT(15) -#define SDW_SHIM_PDMSCAP_ISS GENMASK(3, 0) -#define SDW_SHIM_PDMSCAP_OSS GENMASK(7, 4) -#define SDW_SHIM_PDMSCAP_BSS GENMASK(12, 8) -#define SDW_SHIM_PDMSCAP_CPSS GENMASK(15, 13) +/* IO control */ +#define SDW_SHIM_IOCTL(x) (0x06C + 0x60 * (x)) #define SDW_SHIM_IOCTL_MIF BIT(0) #define SDW_SHIM_IOCTL_CO BIT(1) @@ -73,13 +76,23 @@ #define SDW_SHIM_IOCTL_CIBD BIT(8) #define SDW_SHIM_IOCTL_DIBD BIT(9) -#define SDW_SHIM_CTMCTL_DACTQE BIT(0) -#define SDW_SHIM_CTMCTL_DODS BIT(1) -#define SDW_SHIM_CTMCTL_DOAIS GENMASK(4, 3) +/* Wake Enable*/ +#define SDW_SHIM_WAKEEN 0x190 #define SDW_SHIM_WAKEEN_ENABLE BIT(0) + +/* Wake Status */ +#define SDW_SHIM_WAKESTS 0x192 + #define SDW_SHIM_WAKESTS_STATUS BIT(0) +/* AC Timing control */ +#define SDW_SHIM_CTMCTL(x) (0x06E + 0x60 * (x)) + +#define SDW_SHIM_CTMCTL_DACTQE BIT(0) +#define SDW_SHIM_CTMCTL_DODS BIT(1) +#define SDW_SHIM_CTMCTL_DOAIS GENMASK(4, 3) + /* Intel ALH Register definitions */ #define SDW_ALH_STRMZCFG(x) (0x000 + (0x4 * (x))) #define SDW_ALH_NUM_STREAMS 64 diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index f089ee1ead58..fbf8c0d95968 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -378,6 +378,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @cleanup: frees controller-specific state * @can_dma: determine whether this controller supports DMA * @dma_map_dev: device which can be used for DMA mapping + * @cur_rx_dma_dev: device which is currently used for RX DMA mapping + * @cur_tx_dma_dev: device which is currently used for TX DMA mapping * @queued: whether this controller is providing an internal message queue * @kworker: pointer to thread struct for message pump * @pump_messages: work struct for scheduling work to the message pump @@ -610,6 +612,8 @@ struct spi_controller { struct spi_device *spi, struct spi_transfer *xfer); struct device *dma_map_dev; + struct device *cur_rx_dma_dev; + struct device *cur_tx_dma_dev; /* * These hooks are for drivers that want to use the generic @@ -848,6 +852,7 @@ struct spi_res { * @bits_per_word: select a bits_per_word other than the device default * for this transfer. If 0 the default (from @spi_device) is used. * @dummy_data: indicates transfer is dummy bytes transfer. + * @cs_off: performs the transfer with chipselect off. * @cs_change: affects chipselect after this transfer completes * @cs_change_delay: delay between cs deassert and assert when * @cs_change is set and @spi_transfer is not the last in @spi_message @@ -958,6 +963,7 @@ struct spi_transfer { struct sg_table rx_sg; unsigned dummy_data:1; + unsigned cs_off:1; unsigned cs_change:1; unsigned tx_nbits:3; unsigned rx_nbits:3; diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 5c0c5174155d..1341f7d62da4 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_SPINLOCK_H #define __LINUX_SPINLOCK_H +#define __LINUX_INSIDE_SPINLOCK_H /* * include/linux/spinlock.h - generic spinlock/rwlock declarations @@ -492,4 +493,5 @@ int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, void free_bucket_spinlocks(spinlock_t *locks); +#undef __LINUX_INSIDE_SPINLOCK_H #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 51fa0dab68c4..89eb6f4c659c 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -1,7 +1,7 @@ #ifndef __LINUX_SPINLOCK_API_SMP_H #define __LINUX_SPINLOCK_API_SMP_H -#ifndef __LINUX_SPINLOCK_H +#ifndef __LINUX_INSIDE_SPINLOCK_H # error "please don't include this file directly" #endif diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h index b8ba00ccccde..819aeba1c87e 100644 --- a/include/linux/spinlock_api_up.h +++ b/include/linux/spinlock_api_up.h @@ -1,7 +1,7 @@ #ifndef __LINUX_SPINLOCK_API_UP_H #define __LINUX_SPINLOCK_API_UP_H -#ifndef __LINUX_SPINLOCK_H +#ifndef __LINUX_INSIDE_SPINLOCK_H # error "please don't include this file directly" #endif diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h index 835aedaf68ac..61c49b16f69a 100644 --- a/include/linux/spinlock_rt.h +++ b/include/linux/spinlock_rt.h @@ -2,7 +2,7 @@ #ifndef __LINUX_SPINLOCK_RT_H #define __LINUX_SPINLOCK_RT_H -#ifndef __LINUX_SPINLOCK_H +#ifndef __LINUX_INSIDE_SPINLOCK_H #error Do not include directly. Use spinlock.h #endif diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 16521074b6f7..c87204247592 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -1,7 +1,7 @@ #ifndef __LINUX_SPINLOCK_UP_H #define __LINUX_SPINLOCK_UP_H -#ifndef __LINUX_SPINLOCK_H +#ifndef __LINUX_INSIDE_SPINLOCK_H # error "please don't include this file directly" #endif diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 6cfaa0a9a9b9..5aa5e0faf6a1 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -15,10 +15,10 @@ struct srcu_struct { short srcu_lock_nesting[2]; /* srcu_read_lock() nesting depth. */ - unsigned short srcu_idx; /* Current reader array element in bit 0x2. */ - unsigned short srcu_idx_max; /* Furthest future srcu_idx request. */ u8 srcu_gp_running; /* GP workqueue running? */ u8 srcu_gp_waiting; /* GP waiting for readers? */ + unsigned long srcu_idx; /* Current reader array element in bit 0x2. */ + unsigned long srcu_idx_max; /* Furthest future srcu_idx request. */ struct swait_queue_head srcu_wq; /* Last srcu_read_unlock() wakes GP. */ struct rcu_head *srcu_cb_head; /* Pending callbacks: Head. */ @@ -82,10 +82,12 @@ static inline void srcu_torture_stats_print(struct srcu_struct *ssp, int idx; idx = ((data_race(READ_ONCE(ssp->srcu_idx)) + 1) & 0x2) >> 1; - pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n", + pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd) gp: %lu->%lu\n", tt, tf, idx, data_race(READ_ONCE(ssp->srcu_lock_nesting[!idx])), - data_race(READ_ONCE(ssp->srcu_lock_nesting[idx]))); + data_race(READ_ONCE(ssp->srcu_lock_nesting[idx])), + data_race(READ_ONCE(ssp->srcu_idx)), + data_race(READ_ONCE(ssp->srcu_idx_max))); } #endif diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index bc2797955de9..9ca7798d7a31 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -14,9 +14,15 @@ #include <linux/gfp.h> typedef u32 depot_stack_handle_t; +/* + * Number of bits in the handle that stack depot doesn't use. Users may store + * information in them. + */ +#define STACK_DEPOT_EXTRA_BITS 5 depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, + unsigned int extra_bits, gfp_t gfp_flags, bool can_alloc); /* @@ -59,6 +65,8 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); +unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); + int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); diff --git a/include/linux/stat.h b/include/linux/stat.h index 7df06931f25d..ff277ced50e9 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -50,6 +50,8 @@ struct kstat { struct timespec64 btime; /* File creation time */ u64 blocks; u64 mnt_id; + u32 dio_mem_align; + u32 dio_offset_align; }; #endif diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 8df475db88c0..fb2e88614f5d 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -257,7 +257,6 @@ struct plat_stmmacenet_data { u8 vlan_fail_q; unsigned int eee_usecs_rate; struct pci_dev *pdev; - bool has_crossts; int int_snapshot_num; int ext_snapshot_num; bool int_snapshot_en; diff --git a/include/linux/string.h b/include/linux/string.h index 61ec7e4f6311..cf7607b32102 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -261,6 +261,49 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, int pad); /** + * strtomem_pad - Copy NUL-terminated string to non-NUL-terminated buffer + * + * @dest: Pointer of destination character array (marked as __nonstring) + * @src: Pointer to NUL-terminated string + * @pad: Padding character to fill any remaining bytes of @dest after copy + * + * This is a replacement for strncpy() uses where the destination is not + * a NUL-terminated string, but with bounds checking on the source size, and + * an explicit padding character. If padding is not required, use strtomem(). + * + * Note that the size of @dest is not an argument, as the length of @dest + * must be discoverable by the compiler. + */ +#define strtomem_pad(dest, src, pad) do { \ + const size_t _dest_len = __builtin_object_size(dest, 1); \ + \ + BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ + _dest_len == (size_t)-1); \ + memcpy_and_pad(dest, _dest_len, src, strnlen(src, _dest_len), pad); \ +} while (0) + +/** + * strtomem - Copy NUL-terminated string to non-NUL-terminated buffer + * + * @dest: Pointer of destination character array (marked as __nonstring) + * @src: Pointer to NUL-terminated string + * + * This is a replacement for strncpy() uses where the destination is not + * a NUL-terminated string, but with bounds checking on the source size, and + * without trailing padding. If padding is required, use strtomem_pad(). + * + * Note that the size of @dest is not an argument, as the length of @dest + * must be discoverable by the compiler. + */ +#define strtomem(dest, src) do { \ + const size_t _dest_len = __builtin_object_size(dest, 1); \ + \ + BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ + _dest_len == (size_t)-1); \ + memcpy(dest, src, min(_dest_len, strnlen(src, _dest_len))); \ +} while (0) + +/** * memset_after - Set a value after a struct member to the end of a struct * * @obj: Address of target struct instance diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index dc2e726fd820..8530c7328269 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -128,4 +128,9 @@ static inline const char *str_enabled_disabled(bool v) return v ? "enabled" : "disabled"; } +static inline const char *str_read_write(bool v) +{ + return v ? "read" : "write"; +} + #endif diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 75eea5ebb179..770ef2cb5775 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -246,6 +246,7 @@ void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, const struct sockaddr *sap); void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt); +void rpc_clnt_disconnect(struct rpc_clnt *clnt); void rpc_cleanup_clids(void); static inline int rpc_reply_expected(struct rpc_task *task) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index acc62647317c..b8ca3ecaf8d7 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -209,11 +209,17 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); +bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status); +void rpc_task_try_cancel(struct rpc_task *task, int error); void rpc_signal_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_exit(struct rpc_task *, int); void rpc_release_calldata(const struct rpc_call_ops *, void *); void rpc_killall_tasks(struct rpc_clnt *); +unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error, + bool (*fnmatch)(const struct rpc_task *, + const void *), + const void *data); void rpc_execute(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); @@ -252,7 +258,7 @@ int rpc_malloc(struct rpc_task *); void rpc_free(struct rpc_task *); int rpciod_up(void); void rpciod_down(void); -int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *); +int rpc_wait_for_completion_task(struct rpc_task *task); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct net; void rpc_show_tasks(struct net *); @@ -264,11 +270,6 @@ extern struct workqueue_struct *xprtiod_workqueue; void rpc_prepare_task(struct rpc_task *task); gfp_t rpc_task_gfp_mask(void); -static inline int rpc_wait_for_completion_task(struct rpc_task *task) -{ - return __rpc_wait_for_completion_task(task, NULL); -} - #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) static inline const char * rpc_qname(const struct rpc_wait_queue *q) { diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index daecb009c05b..88de45491376 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -472,6 +472,7 @@ struct svc_procedure { /* XDR free result: */ void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ + unsigned int pc_argzero; /* how much of argument to clear */ unsigned int pc_ressize; /* result struct size */ unsigned int pc_cachetype; /* cache info (NFS) */ unsigned int pc_xdrressize; /* maximum size of XDR reply */ @@ -544,16 +545,27 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) } /** - * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding + * svcxdr_init_decode - Prepare an xdr_stream for Call decoding * @rqstp: controlling server RPC transaction context * + * This function currently assumes the RPC header in rq_arg has + * already been decoded. Upon return, xdr->p points to the + * location of the upper layer header. */ static inline void svcxdr_init_decode(struct svc_rqst *rqstp) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; - struct kvec *argv = rqstp->rq_arg.head; + struct xdr_buf *buf = &rqstp->rq_arg; + struct kvec *argv = buf->head; - xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL); + /* + * svc_getnl() and friends do not keep the xdr_buf's ::len + * field up to date. Refresh that field before initializing + * the argument decoding stream. + */ + buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len; + + xdr_init_decode(xdr, buf, argv->iov_base, NULL); xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); } @@ -576,7 +588,7 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp) xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; buf->len = resv->iov_len; xdr->page_ptr = buf->pages - 1; - buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages); + buf->buflen = PAGE_SIZE * (rqstp->rq_page_end - buf->pages); buf->buflen -= rqstp->rq_auth_slack; xdr->rqst = NULL; } diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 69175029abbb..f84e2a1358e1 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -240,6 +240,8 @@ typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); +extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, struct rpc_rqst *rqst); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes); diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 70f2921e2e70..cfe19a028918 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -75,7 +75,7 @@ extern struct suspend_stats suspend_stats; static inline void dpm_save_failed_dev(const char *name) { - strlcpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev], + strscpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev], name, sizeof(suspend_stats.failed_devs[0])); suspend_stats.last_failed_dev++; @@ -191,6 +191,7 @@ struct platform_s2idle_ops { int (*begin)(void); int (*prepare)(void); int (*prepare_late)(void); + void (*check)(void); bool (*wake)(void); void (*restore_early)(void); void (*restore)(void); @@ -510,8 +511,8 @@ extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); extern void pm_print_active_wakeup_sources(void); -extern void lock_system_sleep(void); -extern void unlock_system_sleep(void); +extern unsigned int lock_system_sleep(void); +extern void unlock_system_sleep(unsigned int); #else /* !CONFIG_PM_SLEEP */ @@ -534,8 +535,8 @@ static inline void pm_system_wakeup(void) {} static inline void pm_wakeup_clear(bool reset) {} static inline void pm_system_irq_wakeup(unsigned int irq_number) {} -static inline void lock_system_sleep(void) {} -static inline void unlock_system_sleep(void) {} +static inline unsigned int lock_system_sleep(void) { return 0; } +static inline void unlock_system_sleep(unsigned int flags) {} #endif /* !CONFIG_PM_SLEEP */ diff --git a/include/linux/swab.h b/include/linux/swab.h index bcff5149861a..9b804dbb0d79 100644 --- a/include/linux/swab.h +++ b/include/linux/swab.h @@ -20,4 +20,29 @@ # define swab64s __swab64s # define swahw32s __swahw32s # define swahb32s __swahb32s + +static inline void swab16_array(u16 *buf, unsigned int words) +{ + while (words--) { + swab16s(buf); + buf++; + } +} + +static inline void swab32_array(u32 *buf, unsigned int words) +{ + while (words--) { + swab32s(buf); + buf++; + } +} + +static inline void swab64_array(u64 *buf, unsigned int words) +{ + while (words--) { + swab64s(buf); + buf++; + } +} + #endif /* _LINUX_SWAB_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 43150b9bbc5c..a18cf4b7c724 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -162,6 +162,10 @@ union swap_header { */ struct reclaim_state { unsigned long reclaimed_slab; +#ifdef CONFIG_LRU_GEN + /* per-thread mm walk data */ + struct lru_gen_mm_walk *mm_walk; +#endif }; #ifdef __KERNEL__ @@ -351,6 +355,11 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio) return entry; } +static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry) +{ + folio->private = (void *)entry.val; +} + /* linux/mm/workingset.c */ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg); @@ -375,11 +384,11 @@ extern unsigned long totalreserve_pages; /* linux/mm/swap.c */ -extern void lru_note_cost(struct lruvec *lruvec, bool file, - unsigned int nr_pages); -extern void lru_note_cost_folio(struct folio *); -extern void folio_add_lru(struct folio *); -extern void lru_cache_add(struct page *); +void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); +void lru_note_cost_folio(struct folio *); +void folio_add_lru(struct folio *); +void folio_add_lru_vma(struct folio *, struct vm_area_struct *); +void lru_cache_add(struct page *); void mark_page_accessed(struct page *); void folio_mark_accessed(struct folio *); @@ -481,7 +490,8 @@ static inline long get_nr_swap_pages(void) extern void si_swapinfo(struct sysinfo *); swp_entry_t folio_alloc_swap(struct folio *folio); -extern void put_swap_page(struct page *page, swp_entry_t entry); +bool folio_free_swap(struct folio *folio); +void put_swap_folio(struct folio *folio, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size); extern int add_swap_count_continuation(swp_entry_t, gfp_t); @@ -500,7 +510,6 @@ extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); -extern int try_to_free_swap(struct page *); struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); extern void exit_swap_address_space(unsigned int type); @@ -566,7 +575,7 @@ static inline void swap_free(swp_entry_t swp) { } -static inline void put_swap_page(struct page *page, swp_entry_t swp) +static inline void put_swap_folio(struct folio *folio, swp_entry_t swp) { } @@ -585,11 +594,6 @@ static inline int swp_swapcount(swp_entry_t entry) return 0; } -static inline int try_to_free_swap(struct page *page) -{ - return 0; -} - static inline swp_entry_t folio_alloc_swap(struct folio *folio) { swp_entry_t entry; @@ -597,6 +601,11 @@ static inline swp_entry_t folio_alloc_swap(struct folio *folio) return entry; } +static inline bool folio_free_swap(struct folio *folio) +{ + return false; +} + static inline int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block) @@ -657,7 +666,7 @@ static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp) cgroup_throttle_swaprate(&folio->page, gfp); } -#ifdef CONFIG_MEMCG_SWAP +#if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry); int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry); static inline int mem_cgroup_try_charge_swap(struct folio *folio, @@ -677,7 +686,7 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p } extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); -extern bool mem_cgroup_swap_full(struct page *page); +extern bool mem_cgroup_swap_full(struct folio *folio); #else static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) { @@ -699,7 +708,7 @@ static inline long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg) return get_nr_swap_pages(); } -static inline bool mem_cgroup_swap_full(struct page *page) +static inline bool mem_cgroup_swap_full(struct folio *folio) { return vm_swap_full(); } diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h index a12dd1c3966c..ae73a87775b3 100644 --- a/include/linux/swap_cgroup.h +++ b/include/linux/swap_cgroup.h @@ -4,7 +4,7 @@ #include <linux/swap.h> -#ifdef CONFIG_MEMCG_SWAP +#if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new); @@ -40,6 +40,6 @@ static inline void swap_cgroup_swapoff(int type) return; } -#endif /* CONFIG_MEMCG_SWAP */ +#endif #endif /* __LINUX_SWAP_CGROUP_H */ diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h index 54078542134c..7ed529a77c5b 100644 --- a/include/linux/swapfile.h +++ b/include/linux/swapfile.h @@ -8,6 +8,11 @@ */ extern struct swap_info_struct *swap_info[]; extern unsigned long generic_max_swapfile_size(void); -extern unsigned long max_swapfile_size(void); +unsigned long arch_max_swapfile_size(void); + +/* Maximum swapfile size supported for the arch (not inclusive). */ +extern unsigned long swapfile_maximum_size; +/* Whether swap migration entry supports storing A/D bits for the arch */ +extern bool swap_migration_ad_supported; #endif /* _LINUX_SWAPFILE_H */ diff --git a/include/linux/swapops.h b/include/linux/swapops.h index a3d435bf9f97..86b95ccb81bb 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -8,6 +8,10 @@ #ifdef CONFIG_MMU +#ifdef CONFIG_SWAP +#include <linux/swapfile.h> +#endif /* CONFIG_SWAP */ + /* * swapcache pages are stored in the swapper_space radix tree. We want to * get good packing density in that tree, so the index should be dense in @@ -23,6 +27,45 @@ #define SWP_TYPE_SHIFT (BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT) #define SWP_OFFSET_MASK ((1UL << SWP_TYPE_SHIFT) - 1) +/* + * Definitions only for PFN swap entries (see is_pfn_swap_entry()). To + * store PFN, we only need SWP_PFN_BITS bits. Each of the pfn swap entries + * can use the extra bits to store other information besides PFN. + */ +#ifdef MAX_PHYSMEM_BITS +#define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) +#else /* MAX_PHYSMEM_BITS */ +#define SWP_PFN_BITS (BITS_PER_LONG - PAGE_SHIFT) +#endif /* MAX_PHYSMEM_BITS */ +#define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1) + +/** + * Migration swap entry specific bitfield definitions. Layout: + * + * |----------+--------------------| + * | swp_type | swp_offset | + * |----------+--------+-+-+-------| + * | | resv |D|A| PFN | + * |----------+--------+-+-+-------| + * + * @SWP_MIG_YOUNG_BIT: Whether the page used to have young bit set (bit A) + * @SWP_MIG_DIRTY_BIT: Whether the page used to have dirty bit set (bit D) + * + * Note: A/D bits will be stored in migration entries iff there're enough + * free bits in arch specific swp offset. By default we'll ignore A/D bits + * when migrating a page. Please refer to migration_entry_supports_ad() + * for more information. If there're more bits besides PFN and A/D bits, + * they should be reserved and always be zeros. + */ +#define SWP_MIG_YOUNG_BIT (SWP_PFN_BITS) +#define SWP_MIG_DIRTY_BIT (SWP_PFN_BITS + 1) +#define SWP_MIG_TOTAL_BITS (SWP_PFN_BITS + 2) + +#define SWP_MIG_YOUNG BIT(SWP_MIG_YOUNG_BIT) +#define SWP_MIG_DIRTY BIT(SWP_MIG_DIRTY_BIT) + +static inline bool is_pfn_swap_entry(swp_entry_t entry); + /* Clear all flags but only keep swp_entry_t related information */ static inline pte_t pte_swp_clear_flags(pte_t pte) { @@ -64,6 +107,17 @@ static inline pgoff_t swp_offset(swp_entry_t entry) return entry.val & SWP_OFFSET_MASK; } +/* + * This should only be called upon a pfn swap entry to get the PFN stored + * in the swap entry. Please refers to is_pfn_swap_entry() for definition + * of pfn swap entry. + */ +static inline unsigned long swp_offset_pfn(swp_entry_t entry) +{ + VM_BUG_ON(!is_pfn_swap_entry(entry)); + return swp_offset(entry) & SWP_PFN_MASK; +} + /* check whether a pte points to a swap entry */ static inline int is_swap_pte(pte_t pte) { @@ -240,6 +294,52 @@ static inline swp_entry_t make_writable_migration_entry(pgoff_t offset) return swp_entry(SWP_MIGRATION_WRITE, offset); } +/* + * Returns whether the host has large enough swap offset field to support + * carrying over pgtable A/D bits for page migrations. The result is + * pretty much arch specific. + */ +static inline bool migration_entry_supports_ad(void) +{ +#ifdef CONFIG_SWAP + return swap_migration_ad_supported; +#else /* CONFIG_SWAP */ + return false; +#endif /* CONFIG_SWAP */ +} + +static inline swp_entry_t make_migration_entry_young(swp_entry_t entry) +{ + if (migration_entry_supports_ad()) + return swp_entry(swp_type(entry), + swp_offset(entry) | SWP_MIG_YOUNG); + return entry; +} + +static inline bool is_migration_entry_young(swp_entry_t entry) +{ + if (migration_entry_supports_ad()) + return swp_offset(entry) & SWP_MIG_YOUNG; + /* Keep the old behavior of aging page after migration */ + return false; +} + +static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) +{ + if (migration_entry_supports_ad()) + return swp_entry(swp_type(entry), + swp_offset(entry) | SWP_MIG_DIRTY); + return entry; +} + +static inline bool is_migration_entry_dirty(swp_entry_t entry) +{ + if (migration_entry_supports_ad()) + return swp_offset(entry) & SWP_MIG_DIRTY; + /* Keep the old behavior of clean page after migration */ + return false; +} + extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, spinlock_t *ptl); extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, @@ -247,8 +347,8 @@ extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, #ifdef CONFIG_HUGETLB_PAGE extern void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl); extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte); -#endif -#else +#endif /* CONFIG_HUGETLB_PAGE */ +#else /* CONFIG_MIGRATION */ static inline swp_entry_t make_readable_migration_entry(pgoff_t offset) { return swp_entry(0, 0); @@ -276,7 +376,7 @@ static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, #ifdef CONFIG_HUGETLB_PAGE static inline void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl) { } static inline void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte) { } -#endif +#endif /* CONFIG_HUGETLB_PAGE */ static inline int is_writable_migration_entry(swp_entry_t entry) { return 0; @@ -286,7 +386,26 @@ static inline int is_readable_migration_entry(swp_entry_t entry) return 0; } -#endif +static inline swp_entry_t make_migration_entry_young(swp_entry_t entry) +{ + return entry; +} + +static inline bool is_migration_entry_young(swp_entry_t entry) +{ + return false; +} + +static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) +{ + return entry; +} + +static inline bool is_migration_entry_dirty(swp_entry_t entry) +{ + return false; +} +#endif /* CONFIG_MIGRATION */ typedef unsigned long pte_marker; @@ -369,7 +488,7 @@ static inline int pte_none_mostly(pte_t pte) static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) { - struct page *p = pfn_to_page(swp_offset(entry)); + struct page *p = pfn_to_page(swp_offset_pfn(entry)); /* * Any use of migration entries may only occur while the @@ -387,6 +506,9 @@ static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) */ static inline bool is_pfn_swap_entry(swp_entry_t entry) { + /* Make sure the swp offset can always store the needed fields */ + BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS); + return is_migration_entry(entry) || is_device_private_entry(entry) || is_device_exclusive_entry(entry); } @@ -426,7 +548,7 @@ static inline int is_pmd_migration_entry(pmd_t pmd) { return is_swap_pmd(pmd) && is_migration_entry(pmd_to_swp_entry(pmd)); } -#else +#else /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ static inline int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, struct page *page) { @@ -455,7 +577,7 @@ static inline int is_pmd_migration_entry(pmd_t pmd) { return 0; } -#endif +#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ #ifdef CONFIG_MEMORY_FAILURE @@ -475,27 +597,17 @@ static inline int is_hwpoison_entry(swp_entry_t entry) return swp_type(entry) == SWP_HWPOISON; } -static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry) -{ - return swp_offset(entry); -} - static inline void num_poisoned_pages_inc(void) { atomic_long_inc(&num_poisoned_pages); } -static inline void num_poisoned_pages_dec(void) -{ - atomic_long_dec(&num_poisoned_pages); -} - static inline void num_poisoned_pages_sub(long i) { atomic_long_sub(i, &num_poisoned_pages); } -#else +#else /* CONFIG_MEMORY_FAILURE */ static inline swp_entry_t make_hwpoison_entry(struct page *page) { @@ -514,7 +626,7 @@ static inline void num_poisoned_pages_inc(void) static inline void num_poisoned_pages_sub(long i) { } -#endif +#endif /* CONFIG_MEMORY_FAILURE */ static inline int non_swap_entry(swp_entry_t entry) { diff --git a/include/linux/syslog.h b/include/linux/syslog.h index 86af908e2663..955f80e34d4f 100644 --- a/include/linux/syslog.h +++ b/include/linux/syslog.h @@ -8,6 +8,8 @@ #ifndef _LINUX_SYSLOG_H #define _LINUX_SYSLOG_H +#include <linux/wait.h> + /* Close the log. Currently a NOP. */ #define SYSLOG_ACTION_CLOSE 0 /* Open the log. Currently a NOP. */ @@ -35,5 +37,6 @@ #define SYSLOG_FROM_PROC 1 int do_syslog(int type, char __user *buf, int count, int source); +extern wait_queue_head_t log_wait; #endif /* _LINUX_SYSLOG_H */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a9fbe22732c3..41b1da621a45 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -295,7 +295,7 @@ struct tcp_sock { u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ u32 max_packets_out; /* max packets_out in last window */ - u32 max_packets_seq; /* right edge of max_packets_out flight */ + u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */ u16 urg_data; /* Saved octet of OOB data and control flags */ u8 ecn_flags; /* ECN status bits. */ @@ -388,6 +388,12 @@ struct tcp_sock { u8 bpf_sock_ops_cb_flags; /* Control calling BPF programs * values defined in uapi/linux/tcp.h */ + u8 bpf_chg_cc_inprogress:1; /* In the middle of + * bpf_setsockopt(TCP_CONGESTION), + * it is to avoid the bpf_tcp_cc->init() + * to recur itself by calling + * bpf_setsockopt(TCP_CONGESTION, "itself"). + */ #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG) #else #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0 diff --git a/include/linux/termios_internal.h b/include/linux/termios_internal.h new file mode 100644 index 000000000000..d77f29e5e2b7 --- /dev/null +++ b/include/linux/termios_internal.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TERMIOS_CONV_H +#define _LINUX_TERMIOS_CONV_H + +#include <linux/uaccess.h> +#include <asm/termios.h> + +/* intr=^C quit=^\ erase=del kill=^U + eof=^D vtime=\0 vmin=\1 sxtc=\0 + start=^Q stop=^S susp=^Z eol=\0 + reprint=^R discard=^O werase=^W lnext=^V + eol2=\0 +*/ + +#ifdef VDSUSP +#define INIT_C_CC_VDSUSP_EXTRA [VDSUSP] = 'Y'-0x40, +#else +#define INIT_C_CC_VDSUSP_EXTRA +#endif + +#define INIT_C_CC { \ + [VINTR] = 'C'-0x40, \ + [VQUIT] = '\\'-0x40, \ + [VERASE] = '\177', \ + [VKILL] = 'U'-0x40, \ + [VEOF] = 'D'-0x40, \ + [VSTART] = 'Q'-0x40, \ + [VSTOP] = 'S'-0x40, \ + [VSUSP] = 'Z'-0x40, \ + [VREPRINT] = 'R'-0x40, \ + [VDISCARD] = 'O'-0x40, \ + [VWERASE] = 'W'-0x40, \ + [VLNEXT] = 'V'-0x40, \ + INIT_C_CC_VDSUSP_EXTRA \ + [VMIN] = 1 } + +int user_termio_to_kernel_termios(struct ktermios *, struct termio __user *); +int kernel_termios_to_user_termio(struct termio __user *, struct ktermios *); +#ifdef TCGETS2 +int user_termios_to_kernel_termios(struct ktermios *, struct termios2 __user *); +int kernel_termios_to_user_termios(struct termios2 __user *, struct ktermios *); +int user_termios_to_kernel_termios_1(struct ktermios *, struct termios __user *); +int kernel_termios_to_user_termios_1(struct termios __user *, struct ktermios *); +#else /* TCGETS2 */ +int user_termios_to_kernel_termios(struct ktermios *, struct termios __user *); +int kernel_termios_to_user_termios(struct termios __user *, struct ktermios *); +#endif /* TCGETS2 */ + +#endif /* _LINUX_TERMIOS_CONV_H */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 1386c713885d..9ecc128944a1 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -17,8 +17,6 @@ #include <linux/workqueue.h> #include <uapi/linux/thermal.h> -#define THERMAL_MAX_TRIPS 12 - /* invalid cooling state */ #define THERMAL_CSTATE_INVALID -1UL @@ -296,82 +294,43 @@ struct thermal_zone_params { int offset; }; -/** - * struct thermal_zone_of_device_ops - callbacks for handling DT based zones - * - * Mandatory: - * @get_temp: a pointer to a function that reads the sensor temperature. - * - * Optional: - * @get_trend: a pointer to a function that reads the sensor temperature trend. - * @set_trips: a pointer to a function that sets a temperature window. When - * this window is left the driver must inform the thermal core via - * thermal_zone_device_update. - * @set_emul_temp: a pointer to a function that sets sensor emulated - * temperature. - * @set_trip_temp: a pointer to a function that sets the trip temperature on - * hardware. - * @change_mode: a pointer to a function that notifies the thermal zone - * mode change. - */ -struct thermal_zone_of_device_ops { - int (*get_temp)(void *, int *); - int (*get_trend)(void *, int, enum thermal_trend *); - int (*set_trips)(void *, int, int); - int (*set_emul_temp)(void *, int); - int (*set_trip_temp)(void *, int, int); - int (*change_mode) (void *, enum thermal_device_mode); -}; - /* Function declarations */ #ifdef CONFIG_THERMAL_OF -int thermal_zone_of_get_sensor_id(struct device_node *tz_np, - struct device_node *sensor_np, - u32 *id); -struct thermal_zone_device * -thermal_zone_of_sensor_register(struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops); -void thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz); -struct thermal_zone_device *devm_thermal_zone_of_sensor_register( - struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops); -void devm_thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz); -#else +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data, + const struct thermal_zone_device_ops *ops); -static inline int thermal_zone_of_get_sensor_id(struct device_node *tz_np, - struct device_node *sensor_np, - u32 *id) -{ - return -ENOENT; -} -static inline struct thermal_zone_device * -thermal_zone_of_sensor_register(struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops) +struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data, + const struct thermal_zone_device_ops *ops); + +void thermal_of_zone_unregister(struct thermal_zone_device *tz); + +void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz); + +void thermal_of_zone_unregister(struct thermal_zone_device *tz); + +#else +static inline +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data, + const struct thermal_zone_device_ops *ops) { - return ERR_PTR(-ENODEV); + return ERR_PTR(-ENOTSUPP); } static inline -void thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz) +struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data, + const struct thermal_zone_device_ops *ops) { + return ERR_PTR(-ENOTSUPP); } -static inline struct thermal_zone_device *devm_thermal_zone_of_sensor_register( - struct device *dev, int id, void *data, - const struct thermal_zone_of_device_ops *ops) +static inline void thermal_of_zone_unregister(struct thermal_zone_device *tz) { - return ERR_PTR(-ENODEV); } -static inline -void devm_thermal_zone_of_sensor_unregister(struct device *dev, - struct thermal_zone_device *tz) +static inline void devm_thermal_of_zone_unregister(struct device *dev, + struct thermal_zone_device *tz) { } - #endif #ifdef CONFIG_THERMAL diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 9f442d73f3df..90cd08ab2f5d 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -187,6 +187,7 @@ void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir); * @device_name: Name of the device (or %NULL if not known) * @link_speed: Speed of the link in Gb/s * @link_width: Width of the link (1 or 2) + * @link_usb4: Downstream link is USB4 * @is_unplugged: The XDomain is unplugged * @needs_uuid: If the XDomain does not have @remote_uuid it will be * queried first @@ -234,6 +235,7 @@ struct tb_xdomain { const char *device_name; unsigned int link_speed; unsigned int link_width; + bool link_usb4; bool is_unplugged; bool needs_uuid; struct ida service_ids; diff --git a/include/linux/tnum.h b/include/linux/tnum.h index 498dbcedb451..1c3948a1d6ad 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -21,7 +21,12 @@ struct tnum { struct tnum tnum_const(u64 value); /* A completely unknown value */ extern const struct tnum tnum_unknown; -/* A value that's unknown except that @min <= value <= @max */ +/* An unknown value that is a superset of @min <= value <= @max. + * + * Could include values outside the range of [@min, @max]. + * For example tnum_range(0, 2) is represented by {0, 1, 2, *3*}, + * rather than the intended set of {0, 1, 2}. + */ struct tnum tnum_range(u64 min, u64 max); /* Arithmetic and logical ops */ @@ -73,7 +78,18 @@ static inline bool tnum_is_unknown(struct tnum a) */ bool tnum_is_aligned(struct tnum a, u64 size); -/* Returns true if @b represents a subset of @a. */ +/* Returns true if @b represents a subset of @a. + * + * Note that using tnum_range() as @a requires extra cautions as tnum_in() may + * return true unexpectedly due to tnum limited ability to represent tight + * range, e.g. + * + * tnum_in(tnum_range(0, 2), tnum_const(3)) == true + * + * As a rule of thumb, if @a is explicitly coded rather than coming from + * reg->var_off, it should be in form of tnum_const(), tnum_range(0, 2**n - 1), + * or tnum_range(2**n, 2**(n+1) - 1). + */ bool tnum_in(struct tnum a, struct tnum b); /* Formatting functions. These have snprintf-like semantics: they will write diff --git a/include/linux/trace.h b/include/linux/trace.h index bf169612ffe1..b5e16e438448 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -2,8 +2,6 @@ #ifndef _LINUX_TRACE_H #define _LINUX_TRACE_H -#ifdef CONFIG_TRACING - #define TRACE_EXPORT_FUNCTION BIT(0) #define TRACE_EXPORT_EVENT BIT(1) #define TRACE_EXPORT_MARKER BIT(2) @@ -28,6 +26,8 @@ struct trace_export { int flags; }; +#ifdef CONFIG_TRACING + int register_ftrace_export(struct trace_export *export); int unregister_ftrace_export(struct trace_export *export); @@ -48,6 +48,38 @@ void osnoise_arch_unregister(void); void osnoise_trace_irq_entry(int id); void osnoise_trace_irq_exit(int id, const char *desc); +#else /* CONFIG_TRACING */ +static inline int register_ftrace_export(struct trace_export *export) +{ + return -EINVAL; +} +static inline int unregister_ftrace_export(struct trace_export *export) +{ + return 0; +} +static inline void trace_printk_init_buffers(void) +{ +} +static inline int trace_array_printk(struct trace_array *tr, unsigned long ip, + const char *fmt, ...) +{ + return 0; +} +static inline int trace_array_init_printk(struct trace_array *tr) +{ + return -EINVAL; +} +static inline void trace_array_put(struct trace_array *tr) +{ +} +static inline struct trace_array *trace_array_get_by_name(const char *name) +{ + return NULL; +} +static inline int trace_array_destroy(struct trace_array *tr) +{ + return 0; +} #endif /* CONFIG_TRACING */ #endif /* _LINUX_TRACE_H */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 8401dec93c15..20749bd9db71 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -92,6 +92,7 @@ struct trace_iterator { unsigned int temp_size; char *fmt; /* modified format holder */ unsigned int fmt_size; + long wait_index; /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; diff --git a/include/linux/tty.h b/include/linux/tty.h index 7b0a5d478ef6..730c3301d710 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -122,8 +122,6 @@ struct tty_operations; /** * struct tty_struct - state associated with a tty while open * - * @magic: magic value set early in @alloc_tty_struct to %TTY_MAGIC, for - * debugging purposes * @kref: reference counting by tty_kref_get() and tty_kref_put(), reaching zero * frees the structure * @dev: class device or %NULL (e.g. ptys, serdev) @@ -193,7 +191,6 @@ struct tty_operations; * &struct tty_port. */ struct tty_struct { - int magic; struct kref kref; struct device *dev; struct tty_driver *driver; @@ -260,9 +257,6 @@ struct tty_file_private { struct list_head list; }; -/* tty magic number */ -#define TTY_MAGIC 0x5401 - /** * DOC: TTY Struct Flags * @@ -434,7 +428,7 @@ int tty_hung_up_p(struct file *filp); void do_SAK(struct tty_struct *tty); void __do_SAK(struct tty_struct *tty); void no_tty(void); -speed_t tty_termios_baud_rate(struct ktermios *termios); +speed_t tty_termios_baud_rate(const struct ktermios *termios); void tty_termios_encode_baud_rate(struct ktermios *termios, speed_t ibaud, speed_t obaud); void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, @@ -458,7 +452,7 @@ static inline speed_t tty_get_baud_rate(struct tty_struct *tty) unsigned char tty_get_char_size(unsigned int cflag); unsigned char tty_get_frame_size(unsigned int cflag); -void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old); +void tty_termios_copy_hw(struct ktermios *new, const struct ktermios *old); int tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b); int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 4841d8069c07..e00034118c7b 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -7,6 +7,7 @@ #include <linux/kref.h> #include <linux/list.h> #include <linux/cdev.h> +#include <linux/uaccess.h> #include <linux/termios.h> #include <linux/seq_file.h> @@ -141,7 +142,7 @@ struct serial_struct; * * Optional. * - * @set_termios: ``void ()(struct tty_struct *tty, struct ktermios *old)`` + * @set_termios: ``void ()(struct tty_struct *tty, const struct ktermios *old)`` * * This routine allows the @tty driver to be notified when device's * termios settings have changed. New settings are in @tty->termios. @@ -365,7 +366,7 @@ struct tty_operations { unsigned int cmd, unsigned long arg); long (*compat_ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); - void (*set_termios)(struct tty_struct *tty, struct ktermios * old); + void (*set_termios)(struct tty_struct *tty, const struct ktermios *old); void (*throttle)(struct tty_struct * tty); void (*unthrottle)(struct tty_struct * tty); void (*stop)(struct tty_struct *tty); @@ -396,7 +397,6 @@ struct tty_operations { /** * struct tty_driver -- driver for TTY devices * - * @magic: set to %TTY_DRIVER_MAGIC in __tty_alloc_driver() * @kref: reference counting. Reaching zero frees all the internals and the * driver. * @cdevs: allocated/registered character /dev devices @@ -432,7 +432,6 @@ struct tty_operations { * @driver_name, @name, @type, @subtype, @init_termios, and @ops. */ struct tty_driver { - int magic; struct kref kref; struct cdev **cdevs; struct module *owner; @@ -489,9 +488,6 @@ static inline void tty_set_operations(struct tty_driver *driver, driver->ops = op; } -/* tty driver magic number */ -#define TTY_DRIVER_MAGIC 0x5402 - /** * DOC: TTY Driver Flags * diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index ede6f2157f32..dcb61ec11424 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -130,7 +130,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * a pointer to wordsize-sensitive structure belongs here, but most of * ldiscs will happily leave it %NULL. * - * @set_termios: [TTY] ``void ()(struct tty_struct *tty, struct ktermios *old)`` + * @set_termios: [TTY] ``void ()(struct tty_struct *tty, const struct ktermios *old)`` * * This function notifies the line discpline that a change has been made * to the termios structure. @@ -227,7 +227,7 @@ struct tty_ldisc_ops { unsigned long arg); int (*compat_ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); - void (*set_termios)(struct tty_struct *tty, struct ktermios *old); + void (*set_termios)(struct tty_struct *tty, const struct ktermios *old); __poll_t (*poll)(struct tty_struct *tty, struct file *file, struct poll_table_struct *wait); void (*hangup)(struct tty_struct *tty); diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 6ad4e9032d53..46040d66334a 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -8,7 +8,7 @@ * * Key points : * - * - Use a seqcount on 32-bit SMP, only disable preemption for 32-bit UP. + * - Use a seqcount on 32-bit * - The whole thing is a no-op on 64-bit architectures. * * Usage constraints: @@ -20,7 +20,8 @@ * writer and also spin forever. * * 3) Write side must use the _irqsave() variant if other writers, or a reader, - * can be invoked from an IRQ context. + * can be invoked from an IRQ context. On 64bit systems this variant does not + * disable interrupts. * * 4) If reader fetches several counters, there is no guarantee the whole values * are consistent w.r.t. each other (remember point #2: seqcounts are not @@ -29,11 +30,6 @@ * 5) Readers are allowed to sleep or be preempted/interrupted: they perform * pure reads. * - * 6) Readers must use both u64_stats_fetch_{begin,retry}_irq() if the stats - * might be updated from a hardirq or softirq context (remember point #1: - * seqcounts are not used for UP kernels). 32-bit UP stat readers could read - * corrupted 64-bit values otherwise. - * * Usage : * * Stats producer (writer) should use following template granted it already got @@ -66,7 +62,7 @@ #include <linux/seqlock.h> struct u64_stats_sync { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) +#if BITS_PER_LONG == 32 seqcount_t seq; #endif }; @@ -98,7 +94,22 @@ static inline void u64_stats_inc(u64_stats_t *p) local64_inc(&p->v); } -#else +static inline void u64_stats_init(struct u64_stats_sync *syncp) { } +static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { } +static inline void __u64_stats_update_end(struct u64_stats_sync *syncp) { } +static inline unsigned long __u64_stats_irqsave(void) { return 0; } +static inline void __u64_stats_irqrestore(unsigned long flags) { } +static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) +{ + return 0; +} +static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, + unsigned int start) +{ + return false; +} + +#else /* 64 bit */ typedef struct { u64 v; @@ -123,123 +134,95 @@ static inline void u64_stats_inc(u64_stats_t *p) { p->v++; } -#endif -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) -#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) -#else static inline void u64_stats_init(struct u64_stats_sync *syncp) { + seqcount_init(&syncp->seq); } -#endif -static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) +static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_disable(); + preempt_disable_nested(); write_seqcount_begin(&syncp->seq); -#endif } -static inline void u64_stats_update_end(struct u64_stats_sync *syncp) +static inline void __u64_stats_update_end(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) write_seqcount_end(&syncp->seq); - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_enable(); -#endif + preempt_enable_nested(); } -static inline unsigned long -u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) +static inline unsigned long __u64_stats_irqsave(void) { - unsigned long flags = 0; + unsigned long flags; -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_disable(); - else - local_irq_save(flags); - write_seqcount_begin(&syncp->seq); -#endif + local_irq_save(flags); return flags; } -static inline void -u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, - unsigned long flags) +static inline void __u64_stats_irqrestore(unsigned long flags) { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) - write_seqcount_end(&syncp->seq); - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_enable(); - else - local_irq_restore(flags); -#endif + local_irq_restore(flags); } static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) return read_seqcount_begin(&syncp->seq); -#else - return 0; -#endif } -static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) +static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, + unsigned int start) { -#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) - preempt_disable(); -#endif - return __u64_stats_fetch_begin(syncp); + return read_seqcount_retry(&syncp->seq, start); } +#endif /* !64 bit */ -static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, - unsigned int start) +static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) - return read_seqcount_retry(&syncp->seq, start); -#else - return false; -#endif + __u64_stats_update_begin(syncp); +} + +static inline void u64_stats_update_end(struct u64_stats_sync *syncp) +{ + __u64_stats_update_end(syncp); +} + +static inline unsigned long u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) +{ + unsigned long flags = __u64_stats_irqsave(); + + __u64_stats_update_begin(syncp); + return flags; +} + +static inline void u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, + unsigned long flags) +{ + __u64_stats_update_end(syncp); + __u64_stats_irqrestore(flags); +} + +static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) +{ + return __u64_stats_fetch_begin(syncp); } static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) - preempt_enable(); -#endif return __u64_stats_fetch_retry(syncp, start); } -/* - * In case irq handlers can update u64 counters, readers can use following helpers - * - SMP 32bit arches use seqcount protection, irq safe. - * - UP 32bit must disable irqs. - * - 64bit have no problem atomically reading u64 values, irq safe. - */ +/* Obsolete interfaces */ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) - preempt_disable(); -#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) - local_irq_disable(); -#endif - return __u64_stats_fetch_begin(syncp); + return u64_stats_fetch_begin(syncp); } static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) - preempt_enable(); -#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) - local_irq_enable(); -#endif - return __u64_stats_fetch_retry(syncp, start); + return u64_stats_fetch_retry(syncp, start); } #endif /* _LINUX_U64_STATS_SYNC_H */ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 47e5d374c7eb..afb18f198843 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -58,20 +58,28 @@ static __always_inline __must_check unsigned long __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) { - instrument_copy_from_user(to, from, n); + unsigned long res; + + instrument_copy_from_user_before(to, from, n); check_object_size(to, n, false); - return raw_copy_from_user(to, from, n); + res = raw_copy_from_user(to, from, n); + instrument_copy_from_user_after(to, from, n, res); + return res; } static __always_inline __must_check unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { + unsigned long res; + might_fault(); + instrument_copy_from_user_before(to, from, n); if (should_fail_usercopy()) return n; - instrument_copy_from_user(to, from, n); check_object_size(to, n, false); - return raw_copy_from_user(to, from, n); + res = raw_copy_from_user(to, from, n); + instrument_copy_from_user_after(to, from, n, res); + return res; } /** @@ -115,8 +123,9 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) unsigned long res = n; might_fault(); if (!should_fail_usercopy() && likely(access_ok(from, n))) { - instrument_copy_from_user(to, from, n); + instrument_copy_from_user_before(to, from, n); res = raw_copy_from_user(to, from, n); + instrument_copy_from_user_after(to, from, n, res); } if (unlikely(res)) memset(to + (n - res), 0, res); diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h index 22345391350b..2516082cd3a9 100644 --- a/include/linux/ucb1400.h +++ b/include/linux/ucb1400.h @@ -23,7 +23,7 @@ #include <sound/ac97_codec.h> #include <linux/mutex.h> #include <linux/platform_device.h> -#include <linux/gpio.h> +#include <linux/gpio/driver.h> /* * UCB1400 AC-link registers diff --git a/include/linux/uio.h b/include/linux/uio.h index 5896af36199c..2e3134b14ffd 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -298,7 +298,7 @@ iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes) shorted = iov_iter_count(i) - max_bytes; iov_iter_truncate(i, max_bytes); } - npages = iov_iter_npages(i, INT_MAX); + npages = iov_iter_npages(i, maxpages); if (shorted) iov_iter_reexpand(i, iov_iter_count(i) + shorted); diff --git a/include/linux/umh.h b/include/linux/umh.h index 244aff638220..5d1f6129b847 100644 --- a/include/linux/umh.h +++ b/include/linux/umh.h @@ -11,10 +11,11 @@ struct cred; struct file; -#define UMH_NO_WAIT 0 /* don't wait at all */ -#define UMH_WAIT_EXEC 1 /* wait for the exec, but not the process */ -#define UMH_WAIT_PROC 2 /* wait for the process to complete */ -#define UMH_KILLABLE 4 /* wait for EXEC/PROC killable */ +#define UMH_NO_WAIT 0x00 /* don't wait at all */ +#define UMH_WAIT_EXEC 0x01 /* wait for the exec, but not the process */ +#define UMH_WAIT_PROC 0x02 /* wait for the process to complete */ +#define UMH_KILLABLE 0x04 /* wait for EXEC/PROC killable */ +#define UMH_FREEZABLE 0x08 /* wait for EXEC/PROC freezable */ struct subprocess_info { struct work_struct work; diff --git a/include/linux/units.h b/include/linux/units.h index 681fc652e3d7..2793a41e73a2 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -20,6 +20,9 @@ #define PICO 1000000000000ULL #define FEMTO 1000000000000000ULL +#define NANOHZ_PER_HZ 1000000000UL +#define MICROHZ_PER_HZ 1000000UL +#define MILLIHZ_PER_HZ 1000UL #define HZ_PER_KHZ 1000UL #define KHZ_PER_MHZ 1000UL #define HZ_PER_MHZ 1000000UL diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index edf3342507f1..ee38835ed77c 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -62,6 +62,7 @@ struct ci_hdrc_platform_data { #define CI_HDRC_REQUIRES_ALIGNED_DMA BIT(13) #define CI_HDRC_IMX_IS_HSIC BIT(14) #define CI_HDRC_PMQOS BIT(15) +#define CI_HDRC_PHY_VBUS_CONTROL BIT(16) enum usb_dr_mode dr_mode; #define CI_HDRC_CONTROLLER_RESET_EVENT 0 #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 67f8713d3fa3..78cd566ee238 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -479,7 +479,6 @@ static inline int ehset_single_step_set_feature(struct usb_hcd *hcd, int port) struct pci_dev; struct pci_device_id; extern int usb_hcd_pci_probe(struct pci_dev *dev, - const struct pci_device_id *id, const struct hc_driver *driver); extern void usb_hcd_pci_remove(struct pci_dev *dev); extern void usb_hcd_pci_shutdown(struct pci_dev *dev); diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 8ea319f89e1f..f7bfedb740f5 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -276,8 +276,8 @@ struct usb_serial_driver { unsigned int cmd, unsigned long arg); void (*get_serial)(struct tty_struct *tty, struct serial_struct *ss); int (*set_serial)(struct tty_struct *tty, struct serial_struct *ss); - void (*set_termios)(struct tty_struct *tty, - struct usb_serial_port *port, struct ktermios *old); + void (*set_termios)(struct tty_struct *tty, struct usb_serial_port *port, + const struct ktermios *old); void (*break_ctl)(struct tty_struct *tty, int break_state); unsigned int (*chars_in_buffer)(struct tty_struct *tty); void (*wait_until_sent)(struct tty_struct *tty, long timeout); diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index 20c0bedb8ec8..17657451c762 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -167,6 +167,11 @@ /* I2C_WRITE_BYTE_COUNT + 1 when TX_BUF_BYTE_x is only accessible I2C_WRITE_BYTE_COUNT */ #define TCPC_TRANSMIT_BUFFER_MAX_LEN 31 +#define tcpc_presenting_rd(reg, cc) \ + (!(TCPC_ROLE_CTRL_DRP & (reg)) && \ + (((reg) & (TCPC_ROLE_CTRL_## cc ##_MASK << TCPC_ROLE_CTRL_## cc ##_SHIFT)) == \ + (TCPC_ROLE_CTRL_CC_RD << TCPC_ROLE_CTRL_## cc ##_SHIFT))) + struct tcpci; /* @@ -207,4 +212,21 @@ irqreturn_t tcpci_irq(struct tcpci *tcpci); struct tcpm_port; struct tcpm_port *tcpci_get_tcpm_port(struct tcpci *tcpci); + +static inline enum typec_cc_status tcpci_to_typec_cc(unsigned int cc, bool sink) +{ + switch (cc) { + case 0x1: + return sink ? TYPEC_CC_RP_DEF : TYPEC_CC_RA; + case 0x2: + return sink ? TYPEC_CC_RP_1_5 : TYPEC_CC_RD; + case 0x3: + if (sink) + return TYPEC_CC_RP_3_0; + fallthrough; + case 0x0: + default: + return TYPEC_CC_OPEN; + } +} #endif /* __LINUX_USB_TCPCI_H */ diff --git a/include/linux/user_events.h b/include/linux/user_events.h index 736e05603463..592a3fbed98e 100644 --- a/include/linux/user_events.h +++ b/include/linux/user_events.h @@ -20,15 +20,6 @@ #define USER_EVENTS_SYSTEM "user_events" #define USER_EVENTS_PREFIX "u:" -/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */ -#define EVENT_BIT_FTRACE 0 -#define EVENT_BIT_PERF 1 -#define EVENT_BIT_OTHER 7 - -#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE) -#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF) -#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER) - /* Create dynamic location entry within a 32-bit value */ #define DYN_LOC(offset, size) ((size) << 16 | (offset)) @@ -45,12 +36,12 @@ struct user_reg { /* Input: Pointer to string with event name, description and flags */ __u64 name_args; - /* Output: Byte index of the event within the status page */ - __u32 status_index; + /* Output: Bitwise index of the event within the status page */ + __u32 status_bit; /* Output: Index of the event to use when writing data */ __u32 write_index; -}; +} __attribute__((__packed__)); #define DIAG_IOC_MAGIC '*' diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 33a4240e6a6f..45f09bec02c4 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -54,15 +54,17 @@ enum ucount_type { UCOUNT_FANOTIFY_GROUPS, UCOUNT_FANOTIFY_MARKS, #endif + UCOUNT_COUNTS, +}; + +enum rlimit_type { UCOUNT_RLIMIT_NPROC, UCOUNT_RLIMIT_MSGQUEUE, UCOUNT_RLIMIT_SIGPENDING, UCOUNT_RLIMIT_MEMLOCK, - UCOUNT_COUNTS, + UCOUNT_RLIMIT_COUNTS, }; -#define MAX_PER_NAMESPACE_UCOUNTS UCOUNT_RLIMIT_NPROC - struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -99,6 +101,7 @@ struct user_namespace { #endif struct ucounts *ucounts; long ucount_max[UCOUNT_COUNTS]; + long rlimit_max[UCOUNT_RLIMIT_COUNTS]; } __randomize_layout; struct ucounts { @@ -107,6 +110,7 @@ struct ucounts { kuid_t uid; atomic_t count; atomic_long_t ucount[UCOUNT_COUNTS]; + atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS]; }; extern struct user_namespace init_user_ns; @@ -120,21 +124,26 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); struct ucounts * __must_check get_ucounts(struct ucounts *ucounts); void put_ucounts(struct ucounts *ucounts); -static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type type) +static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type type) { - return atomic_long_read(&ucounts->ucount[type]); + return atomic_long_read(&ucounts->rlimit[type]); } -long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); -bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); -long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type); -void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type); -bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max); +long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); +bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); +long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type); +void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type); +bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max); + +static inline long get_userns_rlimit_max(struct user_namespace *ns, enum rlimit_type type) +{ + return READ_ONCE(ns->rlimit_max[type]); +} -static inline void set_rlimit_ucount_max(struct user_namespace *ns, - enum ucount_type type, unsigned long max) +static inline void set_userns_rlimit_max(struct user_namespace *ns, + enum rlimit_type type, unsigned long max) { - ns->ucount_max[type] = max <= LONG_MAX ? max : LONG_MAX; + ns->rlimit_max[type] = max <= LONG_MAX ? max : LONG_MAX; } #ifdef CONFIG_USER_NS diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index e1b8a915e9e9..f07e6998bb68 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -175,9 +175,8 @@ extern bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end); -extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, - unsigned long start, unsigned long end, - struct list_head *uf); +extern int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start, + unsigned long end, struct list_head *uf); extern void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf); @@ -258,7 +257,7 @@ static inline bool userfaultfd_remove(struct vm_area_struct *vma, return true; } -static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, +static inline int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start, unsigned long end, struct list_head *uf) { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index d282f464d2f1..6d0f5e4e82c2 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -104,6 +104,7 @@ struct vdpa_iova_range { }; struct vdpa_dev_set_config { + u64 device_features; struct { u8 mac[ETH_ALEN]; u16 mtu; diff --git a/include/linux/verification.h b/include/linux/verification.h index a655923335ae..f34e50ebcf60 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -17,6 +17,14 @@ #define VERIFY_USE_SECONDARY_KEYRING ((struct key *)1UL) #define VERIFY_USE_PLATFORM_KEYRING ((struct key *)2UL) +static inline int system_keyring_id_check(u64 id) +{ + if (id > (unsigned long)VERIFY_USE_PLATFORM_KEYRING) + return -EINVAL; + + return 0; +} + /* * The use to which an asymmetric key is being put. */ diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e05ddc6fe6a5..e7cebeb875dd 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -14,6 +14,7 @@ #include <linux/workqueue.h> #include <linux/poll.h> #include <uapi/linux/vfio.h> +#include <linux/iova_bitmap.h> struct kvm; @@ -33,10 +34,11 @@ struct vfio_device { struct device *dev; const struct vfio_device_ops *ops; /* - * mig_ops is a static property of the vfio_device which must be set - * prior to registering the vfio_device. + * mig_ops/log_ops is a static property of the vfio_device which must + * be set prior to registering the vfio_device. */ const struct vfio_migration_ops *mig_ops; + const struct vfio_log_ops *log_ops; struct vfio_group *group; struct vfio_device_set *dev_set; struct list_head dev_set_list; @@ -45,7 +47,9 @@ struct vfio_device { struct kvm *kvm; /* Members below here are private, not for driver use */ - refcount_t refcount; + unsigned int index; + struct device device; /* device.kref covers object life circle */ + refcount_t refcount; /* user count on registered device*/ unsigned int open_count; struct completion comp; struct list_head group_next; @@ -55,6 +59,8 @@ struct vfio_device { /** * struct vfio_device_ops - VFIO bus driver device callbacks * + * @init: initialize private fields in device structure + * @release: Reclaim private fields in device structure * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor @@ -72,6 +78,8 @@ struct vfio_device { */ struct vfio_device_ops { char *name; + int (*init)(struct vfio_device *vdev); + void (*release)(struct vfio_device *vdev); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -109,6 +117,28 @@ struct vfio_migration_ops { }; /** + * @log_start: Optional callback to ask the device start DMA logging. + * @log_stop: Optional callback to ask the device stop DMA logging. + * @log_read_and_clear: Optional callback to ask the device read + * and clear the dirty DMAs in some given range. + * + * The vfio core implementation of the DEVICE_FEATURE_DMA_LOGGING_ set + * of features does not track logging state relative to the device, + * therefore the device implementation of vfio_log_ops must handle + * arbitrary user requests. This includes rejecting subsequent calls + * to log_start without an intervening log_stop, as well as graceful + * handling of log_stop and log_read_and_clear from invalid states. + */ +struct vfio_log_ops { + int (*log_start)(struct vfio_device *device, + struct rb_root_cached *ranges, u32 nnodes, u64 *page_size); + int (*log_stop)(struct vfio_device *device); + int (*log_read_and_clear)(struct vfio_device *device, + unsigned long iova, unsigned long length, + struct iova_bitmap *dirty); +}; + +/** * vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl * @flags: Arg from the device_feature op * @argsz: Arg from the device_feature op @@ -137,9 +167,23 @@ static inline int vfio_check_feature(u32 flags, size_t argsz, u32 supported_ops, return 1; } -void vfio_init_group_dev(struct vfio_device *device, struct device *dev, - const struct vfio_device_ops *ops); -void vfio_uninit_group_dev(struct vfio_device *device); +struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, + const struct vfio_device_ops *ops); +#define vfio_alloc_device(dev_struct, member, dev, ops) \ + container_of(_vfio_alloc_device(sizeof(struct dev_struct) + \ + BUILD_BUG_ON_ZERO(offsetof( \ + struct dev_struct, member)), \ + dev, ops), \ + struct dev_struct, member) + +int vfio_init_device(struct vfio_device *device, struct device *dev, + const struct vfio_device_ops *ops); +void vfio_free_device(struct vfio_device *device); +static inline void vfio_put_device(struct vfio_device *device) +{ + put_device(&device->device); +} + int vfio_register_group_dev(struct vfio_device *device); int vfio_register_emulated_iommu_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device); @@ -155,6 +199,7 @@ int vfio_mig_get_next_state(struct vfio_device *device, * External user API */ struct iommu_group *vfio_file_iommu_group(struct file *file); +bool vfio_file_is_group(struct file *file); bool vfio_file_enforced_coherent(struct file *file); void vfio_file_set_kvm(struct file *file, struct kvm *kvm); bool vfio_file_has_dev(struct file *file, struct vfio_device *device); diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 5579ece4347b..367fd79226a3 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -20,39 +20,10 @@ #define VFIO_PCI_CORE_H #define VFIO_PCI_OFFSET_SHIFT 40 - #define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) -/* Special capability IDs predefined access */ -#define PCI_CAP_ID_INVALID 0xFF /* default raw access */ -#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */ - -/* Cap maximum number of ioeventfds per device (arbitrary) */ -#define VFIO_PCI_IOEVENTFD_MAX 1000 - -struct vfio_pci_ioeventfd { - struct list_head next; - struct vfio_pci_core_device *vdev; - struct virqfd *virqfd; - void __iomem *addr; - uint64_t data; - loff_t pos; - int bar; - int count; - bool test_mem; -}; - -struct vfio_pci_irq_ctx { - struct eventfd_ctx *trigger; - struct virqfd *unmask; - struct virqfd *mask; - char *name; - bool masked; - struct irq_bypass_producer producer; -}; - struct vfio_pci_core_device; struct vfio_pci_region; @@ -78,23 +49,6 @@ struct vfio_pci_region { u32 flags; }; -struct vfio_pci_dummy_resource { - struct resource resource; - int index; - struct list_head res_next; -}; - -struct vfio_pci_vf_token { - struct mutex lock; - uuid_t uuid; - int users; -}; - -struct vfio_pci_mmap_vma { - struct vm_area_struct *vma; - struct list_head vma_next; -}; - struct vfio_pci_core_device { struct vfio_device vdev; struct pci_dev *pdev; @@ -124,11 +78,14 @@ struct vfio_pci_core_device { bool needs_reset; bool nointx; bool needs_pm_restore; + bool pm_intx_masked; + bool pm_runtime_engaged; struct pci_saved_state *pci_saved_state; struct pci_saved_state *pm_save; int ioeventfds_nr; struct eventfd_ctx *err_trigger; struct eventfd_ctx *req_trigger; + struct eventfd_ctx *pm_wake_eventfd_ctx; struct list_head dummy_resources_list; struct mutex ioeventfds_lock; struct list_head ioeventfds_list; @@ -141,100 +98,17 @@ struct vfio_pci_core_device { struct rw_semaphore memory_lock; }; -#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX) -#define is_msi(vdev) (vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX) -#define is_msix(vdev) (vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX) -#define is_irq_none(vdev) (!(is_intx(vdev) || is_msi(vdev) || is_msix(vdev))) -#define irq_is(vdev, type) (vdev->irq_type == type) - -void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev); -void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev); - -int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, - uint32_t flags, unsigned index, - unsigned start, unsigned count, void *data); - -ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite); - -ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite); - -#ifdef CONFIG_VFIO_PCI_VGA -ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite); -#else -static inline ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite) -{ - return -EINVAL; -} -#endif - -long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, - uint64_t data, int count, int fd); - -int vfio_pci_init_perm_bits(void); -void vfio_pci_uninit_perm_bits(void); - -int vfio_config_init(struct vfio_pci_core_device *vdev); -void vfio_config_free(struct vfio_pci_core_device *vdev); - -int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev, - unsigned int type, unsigned int subtype, - const struct vfio_pci_regops *ops, - size_t size, u32 flags, void *data); - -int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, - pci_power_t state); - -bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev); -void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev); -u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev); -void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, - u16 cmd); - -#ifdef CONFIG_VFIO_PCI_IGD -int vfio_pci_igd_init(struct vfio_pci_core_device *vdev); -#else -static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) -{ - return -ENODEV; -} -#endif - -#ifdef CONFIG_VFIO_PCI_ZDEV_KVM -int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, - struct vfio_info_cap *caps); -int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev); -void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev); -#else -static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, - struct vfio_info_cap *caps) -{ - return -ENODEV; -} - -static inline int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev) -{ - return 0; -} - -static inline void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev) -{} -#endif - /* Will be exported for vfio pci drivers usage */ +int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, + unsigned int type, unsigned int subtype, + const struct vfio_pci_regops *ops, + size_t size, u32 flags, void *data); void vfio_pci_core_set_params(bool nointxmask, bool is_disable_vga, bool is_disable_idle_d3); void vfio_pci_core_close_device(struct vfio_device *core_vdev); -void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev, - struct pci_dev *pdev, - const struct vfio_device_ops *vfio_pci_ops); +int vfio_pci_core_init_dev(struct vfio_device *core_vdev); +void vfio_pci_core_release_dev(struct vfio_device *core_vdev); int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev); -void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev); void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev); extern const struct pci_error_handlers vfio_pci_core_err_handlers; int vfio_pci_core_sriov_configure(struct vfio_pci_core_device *vdev, @@ -256,9 +130,4 @@ void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, pci_channel_state_t state); -static inline bool vfio_pci_is_vga(struct pci_dev *pdev) -{ - return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; -} - #endif /* VFIO_PCI_CORE_H */ diff --git a/include/linux/virtio_pci_legacy.h b/include/linux/virtio_pci_legacy.h index e5d665faf00e..a8dc757d0367 100644 --- a/include/linux/virtio_pci_legacy.h +++ b/include/linux/virtio_pci_legacy.h @@ -32,8 +32,6 @@ void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev, u16 index, u32 queue_pfn); bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev, u16 idx); -void vp_legacy_set_queue_size(struct virtio_pci_legacy_device *ldev, - u16 idx, u16 size); u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev, u16 idx); int vp_legacy_probe(struct virtio_pci_legacy_device *ldev); diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index f3fc36cd2276..3518dba1e02f 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -129,10 +129,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NR_TLB_LOCAL_FLUSH_ALL, NR_TLB_LOCAL_FLUSH_ONE, #endif /* CONFIG_DEBUG_TLBFLUSH */ -#ifdef CONFIG_DEBUG_VM_VMACACHE - VMACACHE_FIND_CALLS, - VMACACHE_FIND_HITS, -#endif #ifdef CONFIG_SWAP SWAP_RA, SWAP_RA_HIT, diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h deleted file mode 100644 index 6fce268a4588..000000000000 --- a/include/linux/vmacache.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_VMACACHE_H -#define __LINUX_VMACACHE_H - -#include <linux/sched.h> -#include <linux/mm.h> - -static inline void vmacache_flush(struct task_struct *tsk) -{ - memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas)); -} - -extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma); -extern struct vm_area_struct *vmacache_find(struct mm_struct *mm, - unsigned long addr); - -#ifndef CONFIG_MMU -extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm, - unsigned long start, - unsigned long end); -#endif - -static inline void vmacache_invalidate(struct mm_struct *mm) -{ - mm->vmacache_seqnum++; -} - -#endif /* __LINUX_VMACACHE_H */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index bfe38869498d..19cf5b6892ce 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -125,12 +125,6 @@ static inline void vm_events_fold_cpu(int cpu) #define count_vm_tlb_events(x, y) do { (void)(y); } while (0) #endif -#ifdef CONFIG_DEBUG_VM_VMACACHE -#define count_vm_vmacache_event(x) count_vm_event(x) -#else -#define count_vm_vmacache_event(x) do {} while (0) -#endif - #define __count_zid_vm_events(item, zid, delta) \ __count_vm_events(item##_NORMAL - ZONE_NORMAL + zid, delta) diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index b5ab452fca5b..c1f5aebef170 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -30,7 +30,6 @@ struct vc_data *vc_deallocate(unsigned int console); void reset_palette(struct vc_data *vc); void do_blank_screen(int entering_gfx); void do_unblank_screen(int leaving_gfx); -void unblank_screen(void); void poke_blanked_console(void); int con_font_op(struct vc_data *vc, struct console_font_op *op); int con_set_cmap(unsigned char __user *cmap); diff --git a/include/linux/wait.h b/include/linux/wait.h index 58cfbf81447c..7f5a51aae0a7 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -281,7 +281,7 @@ static inline void wake_up_pollfree(struct wait_queue_head *wq_head) #define ___wait_is_interruptible(state) \ (!__builtin_constant_p(state) || \ - state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \ + (state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags); @@ -361,8 +361,8 @@ do { \ } while (0) #define __wait_event_freezable(wq_head, condition) \ - ___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0, \ - freezable_schedule()) + ___wait_event(wq_head, condition, (TASK_INTERRUPTIBLE|TASK_FREEZABLE), \ + 0, 0, schedule()) /** * wait_event_freezable - sleep (or freeze) until a condition gets true @@ -420,8 +420,8 @@ do { \ #define __wait_event_freezable_timeout(wq_head, condition, timeout) \ ___wait_event(wq_head, ___wait_cond_timeout(condition), \ - TASK_INTERRUPTIBLE, 0, timeout, \ - __ret = freezable_schedule_timeout(__ret)) + (TASK_INTERRUPTIBLE|TASK_FREEZABLE), 0, timeout, \ + __ret = schedule_timeout(__ret)) /* * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid @@ -642,8 +642,8 @@ do { \ #define __wait_event_freezable_exclusive(wq, condition) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \ - freezable_schedule()) + ___wait_event(wq, condition, (TASK_INTERRUPTIBLE|TASK_FREEZABLE), 1, 0,\ + schedule()) #define wait_event_freezable_exclusive(wq, condition) \ ({ \ @@ -932,6 +932,34 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); __ret; \ }) +#define __wait_event_state(wq, condition, state) \ + ___wait_event(wq, condition, state, 0, 0, schedule()) + +/** + * wait_event_state - sleep until a condition gets true + * @wq_head: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @state: state to sleep in + * + * The process is put to sleep (@state) until the @condition evaluates to true + * or a signal is received (when allowed by @state). The @condition is checked + * each time the waitqueue @wq_head is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function will return -ERESTARTSYS if it was interrupted by a signal + * (when allowed by @state) and 0 if @condition evaluated to true. + */ +#define wait_event_state(wq_head, condition, state) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_state(wq_head, condition, state); \ + __ret; \ +}) + #define __wait_event_killable_timeout(wq_head, condition, timeout) \ ___wait_event(wq_head, ___wait_cond_timeout(condition), \ TASK_KILLABLE, 0, timeout, \ diff --git a/include/linux/wireless.h b/include/linux/wireless.h index 2d1b54556eff..e6e34d74dda0 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -26,7 +26,15 @@ struct compat_iw_point { struct __compat_iw_event { __u16 len; /* Real length of this stuff */ __u16 cmd; /* Wireless IOCTL */ - compat_caddr_t pointer; + + union { + compat_caddr_t pointer; + + /* we need ptr_bytes to make memcpy() run-time destination + * buffer bounds checking happy, nothing special + */ + DECLARE_FLEX_ARRAY(__u8, ptr_bytes); + }; }; #define IW_EV_COMPAT_LCP_LEN offsetof(struct __compat_iw_event, pointer) #define IW_EV_COMPAT_POINT_OFF offsetof(struct compat_iw_point, length) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 3f045f6d6c4f..06f9291b6fd5 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -17,20 +17,12 @@ struct bio; DECLARE_PER_CPU(int, dirty_throttle_leaks); /* - * The 1/4 region under the global dirty thresh is for smooth dirty throttling: - * - * (thresh - thresh/DIRTY_FULL_SCOPE, thresh) - * - * Further beyond, all dirtier tasks will enter a loop waiting (possibly long - * time) for the dirty pages to drop, unless written enough pages. - * * The global dirty threshold is normally equal to the global dirty limit, * except when the system suddenly allocates a lot of anonymous memory and * knocks down the global dirty threshold quickly, in which case the global * dirty limit will follow down slowly to prevent livelocking all dirtier tasks. */ #define DIRTY_SCOPE 8 -#define DIRTY_FULL_SCOPE (DIRTY_SCOPE / 2) struct backing_dev_info; diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 979a9d3e5bfb..4c379d23ec6e 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -61,7 +61,7 @@ int __vfs_setxattr_locked(struct user_namespace *, struct dentry *, const char *, const void *, size_t, int, struct inode **); int vfs_setxattr(struct user_namespace *, struct dentry *, const char *, - void *, size_t, int); + const void *, size_t, int); int __vfs_removexattr(struct user_namespace *, struct dentry *, const char *); int __vfs_removexattr_locked(struct user_namespace *, struct dentry *, const char *, struct inode **); |