summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/acpi/acpi_io.h8
-rw-r--r--include/acpi/cppc_acpi.h5
-rw-r--r--include/acpi/platform/acgcc.h9
-rw-r--r--include/asm-generic/cacheflush.h6
-rw-r--r--include/asm-generic/io.h28
-rw-r--r--include/asm-generic/iomap.h10
-rw-r--r--include/asm-generic/mshyperv.h21
-rw-r--r--include/asm-generic/pci_iomap.h3
-rw-r--r--include/asm-generic/vmlinux.lds.h4
-rw-r--r--include/drm/ttm/ttm_tt.h3
-rw-r--r--include/kunit/test.h6
-rw-r--r--include/kvm/arm_pmu.h3
-rw-r--r--include/linux/arm-smccc.h10
-rw-r--r--include/linux/backing-dev.h6
-rw-r--r--include/linux/bootconfig.h4
-rw-r--r--include/linux/bpf.h10
-rw-r--r--include/linux/bpf_types.h8
-rw-r--r--include/linux/buffer_head.h4
-rw-r--r--include/linux/cacheinfo.h18
-rw-r--r--include/linux/cgroup-defs.h107
-rw-r--r--include/linux/cgroup.h22
-rw-r--r--include/linux/compiler-clang.h13
-rw-r--r--include/linux/compiler-gcc.h11
-rw-r--r--include/linux/compiler.h2
-rw-r--r--include/linux/compiler_attributes.h43
-rw-r--r--include/linux/compiler_types.h6
-rw-r--r--include/linux/cpu.h6
-rw-r--r--include/linux/cpuhotplug.h136
-rw-r--r--include/linux/cpumask.h7
-rw-r--r--include/linux/dsa/mv88e6xxx.h13
-rw-r--r--include/linux/dsa/ocelot.h51
-rw-r--r--include/linux/dsa/sja1105.h40
-rw-r--r--include/linux/elfcore.h2
-rw-r--r--include/linux/energy_model.h8
-rw-r--r--include/linux/etherdevice.h2
-rw-r--r--include/linux/file.h7
-rw-r--r--include/linux/filter.h1
-rw-r--r--include/linux/flex_proportions.h9
-rw-r--r--include/linux/fwnode.h11
-rw-r--r--include/linux/genhd.h1
-rw-r--r--include/linux/gfp.h22
-rw-r--r--include/linux/highmem-internal.h11
-rw-r--r--include/linux/highmem.h37
-rw-r--r--include/linux/huge_mm.h15
-rw-r--r--include/linux/irqdomain.h2
-rw-r--r--include/linux/ksm.h4
-rw-r--r--include/linux/kvm_host.h6
-rw-r--r--include/linux/mdio.h3
-rw-r--r--include/linux/memblock.h1
-rw-r--r--include/linux/memcontrol.h264
-rw-r--r--include/linux/memory.h5
-rw-r--r--include/linux/migrate.h10
-rw-r--r--include/linux/mlx5/driver.h1
-rw-r--r--include/linux/mlx5/mlx5_ifc.h10
-rw-r--r--include/linux/mm.h239
-rw-r--r--include/linux/mm_inline.h103
-rw-r--r--include/linux/mm_types.h77
-rw-r--r--include/linux/mmap_lock.h9
-rw-r--r--include/linux/mmdebug.h20
-rw-r--r--include/linux/netfs.h77
-rw-r--r--include/linux/nvmem-consumer.h14
-rw-r--r--include/linux/overflow.h138
-rw-r--r--include/linux/packing.h2
-rw-r--r--include/linux/page-flags.h290
-rw-r--r--include/linux/page_idle.h99
-rw-r--r--include/linux/page_owner.h8
-rw-r--r--include/linux/page_ref.h158
-rw-r--r--include/linux/pagemap.h585
-rw-r--r--include/linux/perf/arm_pmu.h6
-rw-r--r--include/linux/perf_event.h4
-rw-r--r--include/linux/pkeys.h2
-rw-r--r--include/linux/platform_data/usb-omap1.h2
-rw-r--r--include/linux/pwm.h2
-rw-r--r--include/linux/qcom_scm.h69
-rw-r--r--include/linux/rmap.h10
-rw-r--r--include/linux/rwsem.h12
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/secretmem.h2
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--include/linux/skmsg.h1
-rw-r--r--include/linux/spi/spi.h3
-rw-r--r--include/linux/swap.h17
-rw-r--r--include/linux/thermal.h7
-rw-r--r--include/linux/time64.h9
-rw-r--r--include/linux/tpm.h1
-rw-r--r--include/linux/trace_recursion.h49
-rw-r--r--include/linux/tracehook.h2
-rw-r--r--include/linux/uio.h21
-rw-r--r--include/linux/usb/hcd.h2
-rw-r--r--include/linux/user_namespace.h2
-rw-r--r--include/linux/vdpa.h62
-rw-r--r--include/linux/vhost_iotlb.h3
-rw-r--r--include/linux/vmstat.h113
-rw-r--r--include/linux/workqueue.h5
-rw-r--r--include/linux/writeback.h9
-rw-r--r--include/net/cfg80211.h2
-rw-r--r--include/net/dsa.h14
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--include/net/mac80211.h8
-rw-r--r--include/net/mctp.h2
-rw-r--r--include/net/mptcp.h4
-rw-r--r--include/net/netfilter/ipv6/nf_defrag_ipv6.h1
-rw-r--r--include/net/netfilter/nf_tables.h2
-rw-r--r--include/net/netns/netfilter.h6
-rw-r--r--include/net/nexthop.h2
-rw-r--r--include/net/pkt_sched.h1
-rw-r--r--include/net/sctp/sm.h6
-rw-r--r--include/net/sock.h43
-rw-r--r--include/net/tcp.h5
-rw-r--r--include/net/tls.h11
-rw-r--r--include/net/udp.h5
-rw-r--r--include/scsi/scsi_device.h1
-rw-r--r--include/soc/mscc/ocelot.h55
-rw-r--r--include/soc/mscc/ocelot_ptp.h3
-rw-r--r--include/soc/mscc/ocelot_vcap.h4
-rw-r--r--include/sound/hda_codec.h1
-rw-r--r--include/sound/rawmidi.h1
-rw-r--r--include/trace/events/afs.h8
-rw-r--r--include/trace/events/cachefiles.h6
-rw-r--r--include/trace/events/erofs.h6
-rw-r--r--include/trace/events/kyber.h19
-rw-r--r--include/trace/events/pagemap.h46
-rw-r--r--include/trace/events/writeback.h28
-rw-r--r--include/uapi/linux/android/binder.h7
-rw-r--r--include/uapi/linux/cifs/cifs_mount.h1
-rw-r--r--include/uapi/linux/hyperv.h2
-rw-r--r--include/uapi/linux/io_uring.h8
-rw-r--r--include/uapi/linux/mctp.h7
-rw-r--r--include/uapi/linux/vduse.h306
-rw-r--r--include/uapi/linux/virtio_ids.h9
-rw-r--r--include/uapi/linux/virtio_vsock.h3
-rw-r--r--include/uapi/linux/xfrm.h15
-rw-r--r--include/uapi/misc/habanalabs.h192
-rw-r--r--include/uapi/sound/asound.h1
-rw-r--r--include/xen/xen-ops.h27
135 files changed, 2733 insertions, 1431 deletions
diff --git a/include/acpi/acpi_io.h b/include/acpi/acpi_io.h
index a0212e67d6f4..027faa8883aa 100644
--- a/include/acpi/acpi_io.h
+++ b/include/acpi/acpi_io.h
@@ -14,14 +14,6 @@ static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
}
#endif
-#ifndef acpi_os_memmap
-static inline void __iomem *acpi_os_memmap(acpi_physical_address phys,
- acpi_size size)
-{
- return ioremap_cache(phys, size);
-}
-#endif
-
extern bool acpi_permanent_mmap;
void __iomem __ref
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 9f4985b4d64d..bc159a9b4a73 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -135,6 +135,7 @@ struct cppc_cpudata {
#ifdef CONFIG_ACPI_CPPC_LIB
extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
+extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf);
extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
@@ -149,6 +150,10 @@ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
{
return -ENOTSUPP;
}
+static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
+{
+ return -ENOTSUPP;
+}
static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
{
return -ENOTSUPP;
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index fb172a03a753..20ecb004f5a4 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -22,9 +22,14 @@ typedef __builtin_va_list va_list;
#define va_arg(v, l) __builtin_va_arg(v, l)
#define va_copy(d, s) __builtin_va_copy(d, s)
#else
+#ifdef __KERNEL__
#include <linux/stdarg.h>
-#endif
-#endif
+#else
+/* Used to build acpi tools */
+#include <stdarg.h>
+#endif /* __KERNEL__ */
+#endif /* ACPI_USE_BUILTIN_STDARG */
+#endif /* ! va_arg */
#define ACPI_INLINE __inline__
diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h
index 4a674db4e1fa..fedc0dfa4877 100644
--- a/include/asm-generic/cacheflush.h
+++ b/include/asm-generic/cacheflush.h
@@ -49,9 +49,15 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
static inline void flush_dcache_page(struct page *page)
{
}
+
+static inline void flush_dcache_folio(struct folio *folio) { }
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
#endif
+#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
+void flush_dcache_folio(struct folio *folio);
+#endif
#ifndef flush_dcache_mmap_lock
static inline void flush_dcache_mmap_lock(struct address_space *mapping)
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index e93375c710b9..7ce93aaf69f8 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -957,7 +957,7 @@ static inline void __iomem *ioremap(phys_addr_t offset, size_t size)
#ifndef iounmap
#define iounmap iounmap
-static inline void iounmap(void __iomem *addr)
+static inline void iounmap(volatile void __iomem *addr)
{
}
#endif
@@ -1023,16 +1023,7 @@ static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
port &= IO_SPACE_LIMIT;
return (port > MMIO_UPPER_LIMIT) ? NULL : PCI_IOBASE + port;
}
-#define __pci_ioport_unmap __pci_ioport_unmap
-static inline void __pci_ioport_unmap(void __iomem *p)
-{
- uintptr_t start = (uintptr_t) PCI_IOBASE;
- uintptr_t addr = (uintptr_t) p;
-
- if (addr >= start && addr < start + IO_SPACE_LIMIT)
- return;
- iounmap(p);
-}
+#define ARCH_HAS_GENERIC_IOPORT_MAP
#endif
#ifndef ioport_unmap
@@ -1048,21 +1039,10 @@ extern void ioport_unmap(void __iomem *p);
#endif /* CONFIG_HAS_IOPORT_MAP */
#ifndef CONFIG_GENERIC_IOMAP
-struct pci_dev;
-extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
-
-#ifndef __pci_ioport_unmap
-static inline void __pci_ioport_unmap(void __iomem *p) {}
-#endif
-
#ifndef pci_iounmap
-#define pci_iounmap pci_iounmap
-static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p)
-{
- __pci_ioport_unmap(p);
-}
+#define ARCH_WANTS_GENERIC_PCI_IOUNMAP
+#endif
#endif
-#endif /* CONFIG_GENERIC_IOMAP */
#ifndef xlate_dev_mem_ptr
#define xlate_dev_mem_ptr xlate_dev_mem_ptr
diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h
index 9b3eb6d86200..08237ae8b840 100644
--- a/include/asm-generic/iomap.h
+++ b/include/asm-generic/iomap.h
@@ -110,16 +110,6 @@ static inline void __iomem *ioremap_np(phys_addr_t offset, size_t size)
}
#endif
-#ifdef CONFIG_PCI
-/* Destroy a virtual mapping cookie for a PCI BAR (memory or IO) */
-struct pci_dev;
-extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
-#elif defined(CONFIG_GENERIC_IOMAP)
-struct pci_dev;
-static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
-{ }
-#endif
-
#include <asm-generic/pci_iomap.h>
#endif
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index c1ab6a6e72b5..d3eae6cdbacb 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -197,10 +197,12 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number)
return hv_vp_index[cpu_number];
}
-static inline int cpumask_to_vpset(struct hv_vpset *vpset,
- const struct cpumask *cpus)
+static inline int __cpumask_to_vpset(struct hv_vpset *vpset,
+ const struct cpumask *cpus,
+ bool exclude_self)
{
int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
+ int this_cpu = smp_processor_id();
/* valid_bank_mask can represent up to 64 banks */
if (hv_max_vp_index / 64 >= 64)
@@ -218,6 +220,8 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset,
* Some banks may end up being empty but this is acceptable.
*/
for_each_cpu(cpu, cpus) {
+ if (exclude_self && cpu == this_cpu)
+ continue;
vcpu = hv_cpu_number_to_vp_number(cpu);
if (vcpu == VP_INVAL)
return -1;
@@ -232,6 +236,19 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset,
return nr_bank;
}
+static inline int cpumask_to_vpset(struct hv_vpset *vpset,
+ const struct cpumask *cpus)
+{
+ return __cpumask_to_vpset(vpset, cpus, false);
+}
+
+static inline int cpumask_to_vpset_noself(struct hv_vpset *vpset,
+ const struct cpumask *cpus)
+{
+ WARN_ON_ONCE(preemptible());
+ return __cpumask_to_vpset(vpset, cpus, true);
+}
+
void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
bool hv_is_hyperv_initialized(void);
bool hv_is_hibernation_supported(void);
diff --git a/include/asm-generic/pci_iomap.h b/include/asm-generic/pci_iomap.h
index df636c6d8e6c..5a2f9bf53384 100644
--- a/include/asm-generic/pci_iomap.h
+++ b/include/asm-generic/pci_iomap.h
@@ -18,6 +18,7 @@ extern void __iomem *pci_iomap_range(struct pci_dev *dev, int bar,
extern void __iomem *pci_iomap_wc_range(struct pci_dev *dev, int bar,
unsigned long offset,
unsigned long maxlen);
+extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
/* Create a virtual mapping cookie for a port on a given PCI device.
* Do not call this directly, it exists to make it easier for architectures
* to override */
@@ -50,6 +51,8 @@ static inline void __iomem *pci_iomap_wc_range(struct pci_dev *dev, int bar,
{
return NULL;
}
+static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
+{ }
#endif
#endif /* __ASM_GENERIC_PCI_IOMAP_H */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index aa50bf2959fe..f2984af2b85b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -116,11 +116,7 @@
* GCC 4.5 and later have a 32 bytes section alignment for structures.
* Except GCC 4.9, that feels the need to align on 64 bytes.
*/
-#if __GNUC__ == 4 && __GNUC_MINOR__ == 9
-#define STRUCT_ALIGNMENT 64
-#else
#define STRUCT_ALIGNMENT 32
-#endif
#define STRUCT_ALIGN() . = ALIGN(STRUCT_ALIGNMENT)
/*
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 818680c6a8ed..b20e89d321b0 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -27,11 +27,12 @@
#ifndef _TTM_TT_H_
#define _TTM_TT_H_
+#include <linux/pagemap.h>
#include <linux/types.h>
#include <drm/ttm/ttm_caching.h>
#include <drm/ttm/ttm_kmap_iter.h>
-struct ttm_bo_device;
+struct ttm_device;
struct ttm_tt;
struct ttm_resource;
struct ttm_buffer_object;
diff --git a/include/kunit/test.h b/include/kunit/test.h
index 24b40e5c160b..018e776a34b9 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -613,7 +613,7 @@ void kunit_remove_resource(struct kunit *test, struct kunit_resource *res);
* and is automatically cleaned up after the test case concludes. See &struct
* kunit_resource for more information.
*/
-void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t flags);
+void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t gfp);
/**
* kunit_kmalloc() - Like kmalloc() except the allocation is *test managed*.
@@ -657,9 +657,9 @@ static inline void *kunit_kzalloc(struct kunit *test, size_t size, gfp_t gfp)
*
* See kcalloc() and kunit_kmalloc_array() for more information.
*/
-static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp_t flags)
+static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp_t gfp)
{
- return kunit_kmalloc_array(test, n, size, flags | __GFP_ZERO);
+ return kunit_kmalloc_array(test, n, size, gfp | __GFP_ZERO);
}
void kunit_cleanup(struct kunit *test);
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 864b9997efb2..90f21898aad8 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -61,7 +61,6 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu);
-int kvm_pmu_probe_pmuver(void);
#else
struct kvm_pmu {
};
@@ -118,8 +117,6 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
return 0;
}
-static inline int kvm_pmu_probe_pmuver(void) { return 0xf; }
-
#endif
#endif
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 7d1cabe15262..63ccb5252190 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -321,10 +321,20 @@ asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0);
* from register 0 to 3 on return from the SMC instruction. An optional
* quirk structure provides vendor specific behavior.
*/
+#ifdef CONFIG_HAVE_ARM_SMCCC
asmlinkage void __arm_smccc_smc(unsigned long a0, unsigned long a1,
unsigned long a2, unsigned long a3, unsigned long a4,
unsigned long a5, unsigned long a6, unsigned long a7,
struct arm_smccc_res *res, struct arm_smccc_quirk *quirk);
+#else
+static inline void __arm_smccc_smc(unsigned long a0, unsigned long a1,
+ unsigned long a2, unsigned long a3, unsigned long a4,
+ unsigned long a5, unsigned long a6, unsigned long a7,
+ struct arm_smccc_res *res, struct arm_smccc_quirk *quirk)
+{
+ *res = (struct arm_smccc_res){};
+}
+#endif
/**
* __arm_smccc_hvc() - make HVC calls
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index ac7f231b8825..a62e72dd829f 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -64,7 +64,7 @@ static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
return atomic_long_read(&bdi->tot_write_bandwidth);
}
-static inline void __add_wb_stat(struct bdi_writeback *wb,
+static inline void wb_stat_mod(struct bdi_writeback *wb,
enum wb_stat_item item, s64 amount)
{
percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
@@ -72,12 +72,12 @@ static inline void __add_wb_stat(struct bdi_writeback *wb,
static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
{
- __add_wb_stat(wb, item, 1);
+ wb_stat_mod(wb, item, 1);
}
static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
{
- __add_wb_stat(wb, item, -1);
+ wb_stat_mod(wb, item, -1);
}
static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index abe089c27529..537e1b991f11 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -110,7 +110,7 @@ static inline __init bool xbc_node_is_leaf(struct xbc_node *node)
}
/* Tree-based key-value access APIs */
-struct xbc_node * __init xbc_node_find_child(struct xbc_node *parent,
+struct xbc_node * __init xbc_node_find_subkey(struct xbc_node *parent,
const char *key);
const char * __init xbc_node_find_value(struct xbc_node *parent,
@@ -148,7 +148,7 @@ xbc_find_value(const char *key, struct xbc_node **vnode)
*/
static inline struct xbc_node * __init xbc_find_node(const char *key)
{
- return xbc_node_find_child(NULL, key);
+ return xbc_node_find_subkey(NULL, key);
}
/**
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f4c16f19f83e..3db6f6c95489 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -578,11 +578,12 @@ struct btf_func_model {
* programs only. Should not be used with normal calls and indirect calls.
*/
#define BPF_TRAMP_F_SKIP_FRAME BIT(2)
-
/* Store IP address of the caller on the trampoline stack,
* so it's available for trampoline's programs.
*/
#define BPF_TRAMP_F_IP_ARG BIT(3)
+/* Return the return value of fentry prog. Only used by bpf_struct_ops. */
+#define BPF_TRAMP_F_RET_FENTRY_RET BIT(4)
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
@@ -928,8 +929,11 @@ struct bpf_array_aux {
* stored in the map to make sure that all callers and callees have
* the same prog type and JITed flag.
*/
- enum bpf_prog_type type;
- bool jited;
+ struct {
+ spinlock_t lock;
+ enum bpf_prog_type type;
+ bool jited;
+ } owner;
/* Programs with direct jumps into programs part of this array. */
struct list_head poke_progs;
struct bpf_map *map;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 9c81724e4b98..bbe1eefa4c8a 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -101,14 +101,14 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
-#ifdef CONFIG_NET
-BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
#ifdef CONFIG_BPF_LSM
BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
+#ifdef CONFIG_NET
+BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#if defined(CONFIG_XDP_SOCKETS)
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 6486d3c19463..36f33685c8c0 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -194,7 +194,7 @@ void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size,
struct buffer_head *__bread_gfp(struct block_device *,
sector_t block, unsigned size, gfp_t gfp);
void invalidate_bh_lrus(void);
-void invalidate_bh_lrus_cpu(int cpu);
+void invalidate_bh_lrus_cpu(void);
bool has_bh_in_lru(int cpu, void *dummy);
struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
void free_buffer_head(struct buffer_head * bh);
@@ -408,7 +408,7 @@ static inline int inode_has_buffers(struct inode *inode) { return 0; }
static inline void invalidate_inode_buffers(struct inode *inode) {}
static inline int remove_inode_buffers(struct inode *inode) { return 1; }
static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
-static inline void invalidate_bh_lrus_cpu(int cpu) {}
+static inline void invalidate_bh_lrus_cpu(void) {}
static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; }
#define buffer_heads_over_limit 0
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 4f72b47973c3..2f909ed084c6 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -79,24 +79,6 @@ struct cpu_cacheinfo {
bool cpu_map_populated;
};
-/*
- * Helpers to make sure "func" is executed on the cpu whose cache
- * attributes are being detected
- */
-#define DEFINE_SMP_CALL_CACHE_FUNCTION(func) \
-static inline void _##func(void *ret) \
-{ \
- int cpu = smp_processor_id(); \
- *(int *)ret = __##func(cpu); \
-} \
- \
-int func(unsigned int cpu) \
-{ \
- int ret; \
- smp_call_function_single(cpu, _##func, &ret, true); \
- return ret; \
-}
-
struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu);
int init_cache_level(unsigned int cpu);
int populate_cache_leaves(unsigned int cpu);
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index e1c705fdfa7c..db2e147e069f 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -752,107 +752,54 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
* sock_cgroup_data is embedded at sock->sk_cgrp_data and contains
* per-socket cgroup information except for memcg association.
*
- * On legacy hierarchies, net_prio and net_cls controllers directly set
- * attributes on each sock which can then be tested by the network layer.
- * On the default hierarchy, each sock is associated with the cgroup it was
- * created in and the networking layer can match the cgroup directly.
- *
- * To avoid carrying all three cgroup related fields separately in sock,
- * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer.
- * On boot, sock_cgroup_data records the cgroup that the sock was created
- * in so that cgroup2 matches can be made; however, once either net_prio or
- * net_cls starts being used, the area is overridden to carry prioidx and/or
- * classid. The two modes are distinguished by whether the lowest bit is
- * set. Clear bit indicates cgroup pointer while set bit prioidx and
- * classid.
- *
- * While userland may start using net_prio or net_cls at any time, once
- * either is used, cgroup2 matching no longer works. There is no reason to
- * mix the two and this is in line with how legacy and v2 compatibility is
- * handled. On mode switch, cgroup references which are already being
- * pointed to by socks may be leaked. While this can be remedied by adding
- * synchronization around sock_cgroup_data, given that the number of leaked
- * cgroups is bound and highly unlikely to be high, this seems to be the
- * better trade-off.
+ * On legacy hierarchies, net_prio and net_cls controllers directly
+ * set attributes on each sock which can then be tested by the network
+ * layer. On the default hierarchy, each sock is associated with the
+ * cgroup it was created in and the networking layer can match the
+ * cgroup directly.
*/
struct sock_cgroup_data {
- union {
-#ifdef __LITTLE_ENDIAN
- struct {
- u8 is_data : 1;
- u8 no_refcnt : 1;
- u8 unused : 6;
- u8 padding;
- u16 prioidx;
- u32 classid;
- } __packed;
-#else
- struct {
- u32 classid;
- u16 prioidx;
- u8 padding;
- u8 unused : 6;
- u8 no_refcnt : 1;
- u8 is_data : 1;
- } __packed;
+ struct cgroup *cgroup; /* v2 */
+#ifdef CONFIG_CGROUP_NET_CLASSID
+ u32 classid; /* v1 */
+#endif
+#ifdef CONFIG_CGROUP_NET_PRIO
+ u16 prioidx; /* v1 */
#endif
- u64 val;
- };
};
-/*
- * There's a theoretical window where the following accessors race with
- * updaters and return part of the previous pointer as the prioidx or
- * classid. Such races are short-lived and the result isn't critical.
- */
static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
{
- /* fallback to 1 which is always the ID of the root cgroup */
- return (skcd->is_data & 1) ? skcd->prioidx : 1;
+#ifdef CONFIG_CGROUP_NET_PRIO
+ return READ_ONCE(skcd->prioidx);
+#else
+ return 1;
+#endif
}
static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
{
- /* fallback to 0 which is the unconfigured default classid */
- return (skcd->is_data & 1) ? skcd->classid : 0;
+#ifdef CONFIG_CGROUP_NET_CLASSID
+ return READ_ONCE(skcd->classid);
+#else
+ return 0;
+#endif
}
-/*
- * If invoked concurrently, the updaters may clobber each other. The
- * caller is responsible for synchronization.
- */
static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
u16 prioidx)
{
- struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
-
- if (sock_cgroup_prioidx(&skcd_buf) == prioidx)
- return;
-
- if (!(skcd_buf.is_data & 1)) {
- skcd_buf.val = 0;
- skcd_buf.is_data = 1;
- }
-
- skcd_buf.prioidx = prioidx;
- WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
+#ifdef CONFIG_CGROUP_NET_PRIO
+ WRITE_ONCE(skcd->prioidx, prioidx);
+#endif
}
static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
u32 classid)
{
- struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
-
- if (sock_cgroup_classid(&skcd_buf) == classid)
- return;
-
- if (!(skcd_buf.is_data & 1)) {
- skcd_buf.val = 0;
- skcd_buf.is_data = 1;
- }
-
- skcd_buf.classid = classid;
- WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
+#ifdef CONFIG_CGROUP_NET_CLASSID
+ WRITE_ONCE(skcd->classid, classid);
+#endif
}
#else /* CONFIG_SOCK_CGROUP_DATA */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 7bf60454a313..75c151413fda 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -829,33 +829,13 @@ static inline void cgroup_account_cputime_field(struct task_struct *task,
*/
#ifdef CONFIG_SOCK_CGROUP_DATA
-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
-extern spinlock_t cgroup_sk_update_lock;
-#endif
-
-void cgroup_sk_alloc_disable(void);
void cgroup_sk_alloc(struct sock_cgroup_data *skcd);
void cgroup_sk_clone(struct sock_cgroup_data *skcd);
void cgroup_sk_free(struct sock_cgroup_data *skcd);
static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd)
{
-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
- unsigned long v;
-
- /*
- * @skcd->val is 64bit but the following is safe on 32bit too as we
- * just need the lower ulong to be written and read atomically.
- */
- v = READ_ONCE(skcd->val);
-
- if (v & 3)
- return &cgrp_dfl_root.cgrp;
-
- return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp;
-#else
- return (struct cgroup *)(unsigned long)skcd->val;
-#endif
+ return skcd->cgroup;
}
#else /* CONFIG_CGROUP_DATA */
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 49b0ac8b6fd3..3c4de9b6c6e3 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -62,19 +62,6 @@
#define __no_sanitize_coverage
#endif
-/*
- * Not all versions of clang implement the type-generic versions
- * of the builtin overflow checkers. Fortunately, clang implements
- * __has_builtin allowing us to avoid awkward version
- * checks. Unfortunately, we don't know which version of gcc clang
- * pretends to be, so the macro may or may not be defined.
- */
-#if __has_builtin(__builtin_mul_overflow) && \
- __has_builtin(__builtin_add_overflow) && \
- __has_builtin(__builtin_sub_overflow)
-#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
-#endif
-
#if __has_feature(shadow_call_stack)
# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
#endif
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index cb9217fc60af..bd2b881c6b63 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -43,9 +43,6 @@
#define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
-#define __compiletime_warning(message) __attribute__((__warning__(message)))
-#define __compiletime_error(message) __attribute__((__error__(message)))
-
#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
#define __latent_entropy __attribute__((latent_entropy))
#endif
@@ -98,10 +95,8 @@
#if GCC_VERSION >= 70000
#define KASAN_ABI_VERSION 5
-#elif GCC_VERSION >= 50000
+#else
#define KASAN_ABI_VERSION 4
-#elif GCC_VERSION >= 40902
-#define KASAN_ABI_VERSION 3
#endif
#if __has_attribute(__no_sanitize_address__)
@@ -128,10 +123,6 @@
#define __no_sanitize_coverage
#endif
-#if GCC_VERSION >= 50100
-#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
-#endif
-
/*
* Turn individual warnings and errors on and off locally, depending
* on version.
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index b67261a1e3e9..3d5af56337bd 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -188,6 +188,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
(typeof(ptr)) (__ptr + (off)); })
#endif
+#define absolute_pointer(val) RELOC_HIDE((void *)(val), 0)
+
#ifndef OPTIMIZER_HIDE_VAR
/* Make the optimizer believe the variable can be manipulated arbitrarily. */
#define OPTIMIZER_HIDE_VAR(var) \
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 2487be0e7199..e6ec63403965 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -21,26 +21,6 @@
*/
/*
- * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17.
- * In the meantime, to support gcc < 5, we implement __has_attribute
- * by hand.
- */
-#ifndef __has_attribute
-# define __has_attribute(x) __GCC4_has_attribute_##x
-# define __GCC4_has_attribute___assume_aligned__ 1
-# define __GCC4_has_attribute___copy__ 0
-# define __GCC4_has_attribute___designated_init__ 0
-# define __GCC4_has_attribute___externally_visible__ 1
-# define __GCC4_has_attribute___no_caller_saved_registers__ 0
-# define __GCC4_has_attribute___noclone__ 1
-# define __GCC4_has_attribute___no_profile_instrument_function__ 0
-# define __GCC4_has_attribute___nonstring__ 0
-# define __GCC4_has_attribute___no_sanitize_address__ 1
-# define __GCC4_has_attribute___no_sanitize_undefined__ 1
-# define __GCC4_has_attribute___fallthrough__ 0
-#endif
-
-/*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alias-function-attribute
*/
#define __alias(symbol) __attribute__((__alias__(#symbol)))
@@ -74,7 +54,6 @@
* compiler should see some alignment anyway, when the return value is
* massaged by 'flags = ptr & 3; ptr &= ~3;').
*
- * Optional: only supported since gcc >= 4.9
* Optional: not supported by icc
*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute
@@ -138,6 +117,17 @@
#endif
/*
+ * Optional: only supported since clang >= 14.0
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-error-function-attribute
+ */
+#if __has_attribute(__error__)
+# define __compiletime_error(msg) __attribute__((__error__(msg)))
+#else
+# define __compiletime_error(msg)
+#endif
+
+/*
* Optional: not supported by clang
*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-externally_005fvisible-function-attribute
@@ -299,6 +289,17 @@
#define __must_check __attribute__((__warn_unused_result__))
/*
+ * Optional: only supported since clang >= 14.0
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-warning-function-attribute
+ */
+#if __has_attribute(__warning__)
+# define __compiletime_warning(msg) __attribute__((__warning__(msg)))
+#else
+# define __compiletime_warning(msg)
+#endif
+
+/*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute
*/
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index e4ea86fc584d..b6ff83a714ca 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -294,12 +294,6 @@ struct ftrace_likely_data {
#ifndef __compiletime_object_size
# define __compiletime_object_size(obj) -1
#endif
-#ifndef __compiletime_warning
-# define __compiletime_warning(message)
-#endif
-#ifndef __compiletime_error
-# define __compiletime_error(message)
-#endif
#ifdef __OPTIMIZE__
# define __compiletime_assert(condition, msg, prefix, suffix) \
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 94a578a96202..9cf51e41e697 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -143,12 +143,6 @@ static inline int remove_cpu(unsigned int cpu) { return -EPERM; }
static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { }
#endif /* !CONFIG_HOTPLUG_CPU */
-/* Wrappers which go away once all code is converted */
-static inline void cpu_hotplug_begin(void) { cpus_write_lock(); }
-static inline void cpu_hotplug_done(void) { cpus_write_unlock(); }
-static inline void get_online_cpus(void) { cpus_read_lock(); }
-static inline void put_online_cpus(void) { cpus_read_unlock(); }
-
#ifdef CONFIG_PM_SLEEP_SMP
extern int freeze_secondary_cpus(int primary);
extern void thaw_secondary_cpus(void);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 39cf84a30b9f..991911048857 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -22,8 +22,42 @@
* AP_ACTIVE AP_ACTIVE
*/
+/*
+ * CPU hotplug states. The state machine invokes the installed state
+ * startup callbacks sequentially from CPUHP_OFFLINE + 1 to CPUHP_ONLINE
+ * during a CPU online operation. During a CPU offline operation the
+ * installed teardown callbacks are invoked in the reverse order from
+ * CPU_ONLINE - 1 down to CPUHP_OFFLINE.
+ *
+ * The state space has three sections: PREPARE, STARTING and ONLINE.
+ *
+ * PREPARE: The callbacks are invoked on a control CPU before the
+ * hotplugged CPU is started up or after the hotplugged CPU has died.
+ *
+ * STARTING: The callbacks are invoked on the hotplugged CPU from the low level
+ * hotplug startup/teardown code with interrupts disabled.
+ *
+ * ONLINE: The callbacks are invoked on the hotplugged CPU from the per CPU
+ * hotplug thread with interrupts and preemption enabled.
+ *
+ * Adding explicit states to this enum is only necessary when:
+ *
+ * 1) The state is within the STARTING section
+ *
+ * 2) The state has ordering constraints vs. other states in the
+ * same section.
+ *
+ * If neither #1 nor #2 apply, please use the dynamic state space when
+ * setting up a state by using CPUHP_PREPARE_DYN or CPUHP_PREPARE_ONLINE
+ * for the @state argument of the setup function.
+ *
+ * See Documentation/core-api/cpu_hotplug.rst for further information and
+ * examples.
+ */
enum cpuhp_state {
CPUHP_INVALID = -1,
+
+ /* PREPARE section invoked on a control CPU */
CPUHP_OFFLINE = 0,
CPUHP_CREATE_THREADS,
CPUHP_PERF_PREPARE,
@@ -38,6 +72,8 @@ enum cpuhp_state {
CPUHP_SLUB_DEAD,
CPUHP_DEBUG_OBJ_DEAD,
CPUHP_MM_WRITEBACK_DEAD,
+ /* Must be after CPUHP_MM_VMSTAT_DEAD */
+ CPUHP_MM_DEMOTION_DEAD,
CPUHP_MM_VMSTAT_DEAD,
CPUHP_SOFTIRQ_DEAD,
CPUHP_NET_MVNETA_DEAD,
@@ -95,6 +131,11 @@ enum cpuhp_state {
CPUHP_BP_PREPARE_DYN,
CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20,
CPUHP_BRINGUP_CPU,
+
+ /*
+ * STARTING section invoked on the hotplugged CPU in low level
+ * bringup and teardown code.
+ */
CPUHP_AP_IDLE_DEAD,
CPUHP_AP_OFFLINE,
CPUHP_AP_SCHED_STARTING,
@@ -155,6 +196,8 @@ enum cpuhp_state {
CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
CPUHP_AP_ONLINE,
CPUHP_TEARDOWN_CPU,
+
+ /* Online section invoked on the hotplugged CPU from the hotplug thread */
CPUHP_AP_ONLINE_IDLE,
CPUHP_AP_SCHED_WAIT_EMPTY,
CPUHP_AP_SMPBOOT_THREADS,
@@ -199,6 +242,8 @@ enum cpuhp_state {
CPUHP_AP_BASE_CACHEINFO_ONLINE,
CPUHP_AP_ONLINE_DYN,
CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
+ /* Must be after CPUHP_AP_ONLINE_DYN for node_states[N_CPU] update */
+ CPUHP_AP_MM_DEMOTION_ONLINE,
CPUHP_AP_X86_HPET_ONLINE,
CPUHP_AP_X86_KVM_CLK_ONLINE,
CPUHP_AP_DTPM_CPU_ONLINE,
@@ -216,14 +261,15 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name,
int (*teardown)(unsigned int cpu),
bool multi_instance);
/**
- * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
+ * cpuhp_setup_state - Setup hotplug state callbacks with calling the @startup
+ * callback
* @state: The state for which the calls are installed
* @name: Name of the callback (will be used in debug output)
- * @startup: startup callback function
- * @teardown: teardown callback function
+ * @startup: startup callback function or NULL if not required
+ * @teardown: teardown callback function or NULL if not required
*
- * Installs the callback functions and invokes the startup callback on
- * the present cpus which have already reached the @state.
+ * Installs the callback functions and invokes the @startup callback on
+ * the online cpus which have already reached the @state.
*/
static inline int cpuhp_setup_state(enum cpuhp_state state,
const char *name,
@@ -233,6 +279,18 @@ static inline int cpuhp_setup_state(enum cpuhp_state state,
return __cpuhp_setup_state(state, name, true, startup, teardown, false);
}
+/**
+ * cpuhp_setup_state_cpuslocked - Setup hotplug state callbacks with calling
+ * @startup callback from a cpus_read_lock()
+ * held region
+ * @state: The state for which the calls are installed
+ * @name: Name of the callback (will be used in debug output)
+ * @startup: startup callback function or NULL if not required
+ * @teardown: teardown callback function or NULL if not required
+ *
+ * Same as cpuhp_setup_state() except that it must be invoked from within a
+ * cpus_read_lock() held region.
+ */
static inline int cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
const char *name,
int (*startup)(unsigned int cpu),
@@ -244,14 +302,14 @@ static inline int cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
/**
* cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the
- * callbacks
+ * @startup callback
* @state: The state for which the calls are installed
* @name: Name of the callback.
- * @startup: startup callback function
- * @teardown: teardown callback function
+ * @startup: startup callback function or NULL if not required
+ * @teardown: teardown callback function or NULL if not required
*
- * Same as @cpuhp_setup_state except that no calls are executed are invoked
- * during installation of this callback. NOP if SMP=n or HOTPLUG_CPU=n.
+ * Same as cpuhp_setup_state() except that the @startup callback is not
+ * invoked during installation. NOP if SMP=n or HOTPLUG_CPU=n.
*/
static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state,
const char *name,
@@ -262,6 +320,19 @@ static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state,
false);
}
+/**
+ * cpuhp_setup_state_nocalls_cpuslocked - Setup hotplug state callbacks without
+ * invoking the @startup callback from
+ * a cpus_read_lock() held region
+ * callbacks
+ * @state: The state for which the calls are installed
+ * @name: Name of the callback.
+ * @startup: startup callback function or NULL if not required
+ * @teardown: teardown callback function or NULL if not required
+ *
+ * Same as cpuhp_setup_state_nocalls() except that it must be invoked from
+ * within a cpus_read_lock() held region.
+ */
static inline int cpuhp_setup_state_nocalls_cpuslocked(enum cpuhp_state state,
const char *name,
int (*startup)(unsigned int cpu),
@@ -275,13 +346,13 @@ static inline int cpuhp_setup_state_nocalls_cpuslocked(enum cpuhp_state state,
* cpuhp_setup_state_multi - Add callbacks for multi state
* @state: The state for which the calls are installed
* @name: Name of the callback.
- * @startup: startup callback function
- * @teardown: teardown callback function
+ * @startup: startup callback function or NULL if not required
+ * @teardown: teardown callback function or NULL if not required
*
* Sets the internal multi_instance flag and prepares a state to work as a multi
* instance callback. No callbacks are invoked at this point. The callbacks are
* invoked once an instance for this state are registered via
- * @cpuhp_state_add_instance or @cpuhp_state_add_instance_nocalls.
+ * cpuhp_state_add_instance() or cpuhp_state_add_instance_nocalls()
*/
static inline int cpuhp_setup_state_multi(enum cpuhp_state state,
const char *name,
@@ -306,9 +377,10 @@ int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
* @state: The state for which the instance is installed
* @node: The node for this individual state.
*
- * Installs the instance for the @state and invokes the startup callback on
- * the present cpus which have already reached the @state. The @state must have
- * been earlier marked as multi-instance by @cpuhp_setup_state_multi.
+ * Installs the instance for the @state and invokes the registered startup
+ * callback on the online cpus which have already reached the @state. The
+ * @state must have been earlier marked as multi-instance by
+ * cpuhp_setup_state_multi().
*/
static inline int cpuhp_state_add_instance(enum cpuhp_state state,
struct hlist_node *node)
@@ -322,8 +394,9 @@ static inline int cpuhp_state_add_instance(enum cpuhp_state state,
* @state: The state for which the instance is installed
* @node: The node for this individual state.
*
- * Installs the instance for the @state The @state must have been earlier
- * marked as multi-instance by @cpuhp_setup_state_multi.
+ * Installs the instance for the @state. The @state must have been earlier
+ * marked as multi-instance by cpuhp_setup_state_multi. NOP if SMP=n or
+ * HOTPLUG_CPU=n.
*/
static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state,
struct hlist_node *node)
@@ -331,6 +404,17 @@ static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state,
return __cpuhp_state_add_instance(state, node, false);
}
+/**
+ * cpuhp_state_add_instance_nocalls_cpuslocked - Add an instance for a state
+ * without invoking the startup
+ * callback from a cpus_read_lock()
+ * held region.
+ * @state: The state for which the instance is installed
+ * @node: The node for this individual state.
+ *
+ * Same as cpuhp_state_add_instance_nocalls() except that it must be
+ * invoked from within a cpus_read_lock() held region.
+ */
static inline int
cpuhp_state_add_instance_nocalls_cpuslocked(enum cpuhp_state state,
struct hlist_node *node)
@@ -346,7 +430,7 @@ void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke);
* @state: The state for which the calls are removed
*
* Removes the callback functions and invokes the teardown callback on
- * the present cpus which have already reached the @state.
+ * the online cpus which have already reached the @state.
*/
static inline void cpuhp_remove_state(enum cpuhp_state state)
{
@@ -355,7 +439,7 @@ static inline void cpuhp_remove_state(enum cpuhp_state state)
/**
* cpuhp_remove_state_nocalls - Remove hotplug state callbacks without invoking
- * teardown
+ * the teardown callback
* @state: The state for which the calls are removed
*/
static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state)
@@ -363,6 +447,14 @@ static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state)
__cpuhp_remove_state(state, false);
}
+/**
+ * cpuhp_remove_state_nocalls_cpuslocked - Remove hotplug state callbacks without invoking
+ * teardown from a cpus_read_lock() held region.
+ * @state: The state for which the calls are removed
+ *
+ * Same as cpuhp_remove_state nocalls() except that it must be invoked
+ * from within a cpus_read_lock() held region.
+ */
static inline void cpuhp_remove_state_nocalls_cpuslocked(enum cpuhp_state state)
{
__cpuhp_remove_state_cpuslocked(state, false);
@@ -390,8 +482,8 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
* @state: The state from which the instance is removed
* @node: The node for this individual state.
*
- * Removes the instance and invokes the teardown callback on the present cpus
- * which have already reached the @state.
+ * Removes the instance and invokes the teardown callback on the online cpus
+ * which have already reached @state.
*/
static inline int cpuhp_state_remove_instance(enum cpuhp_state state,
struct hlist_node *node)
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 5d4d07a9e1ed..1e7399fc69c0 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -996,14 +996,15 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
* cpumask; Typically used by bin_attribute to export cpumask bitmask
* ABI.
*
- * Returns the length of how many bytes have been copied.
+ * Returns the length of how many bytes have been copied, excluding
+ * terminating '\0'.
*/
static inline ssize_t
cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
loff_t off, size_t count)
{
return bitmap_print_bitmask_to_buf(buf, cpumask_bits(mask),
- nr_cpu_ids, off, count);
+ nr_cpu_ids, off, count) - 1;
}
/**
@@ -1018,7 +1019,7 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
loff_t off, size_t count)
{
return bitmap_print_list_to_buf(buf, cpumask_bits(mask),
- nr_cpu_ids, off, count);
+ nr_cpu_ids, off, count) - 1;
}
#if NR_CPUS <= BITS_PER_LONG
diff --git a/include/linux/dsa/mv88e6xxx.h b/include/linux/dsa/mv88e6xxx.h
new file mode 100644
index 000000000000..8c3d45eca46b
--- /dev/null
+++ b/include/linux/dsa/mv88e6xxx.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright 2021 NXP
+ */
+
+#ifndef _NET_DSA_TAG_MV88E6XXX_H
+#define _NET_DSA_TAG_MV88E6XXX_H
+
+#include <linux/if_vlan.h>
+
+#define MV88E6XXX_VID_STANDALONE 0
+#define MV88E6XXX_VID_BRIDGED (VLAN_N_VID - 1)
+
+#endif
diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h
index c6bc45ae5e03..8ae999f587c4 100644
--- a/include/linux/dsa/ocelot.h
+++ b/include/linux/dsa/ocelot.h
@@ -1,11 +1,32 @@
/* SPDX-License-Identifier: GPL-2.0
- * Copyright 2019-2021 NXP Semiconductors
+ * Copyright 2019-2021 NXP
*/
#ifndef _NET_DSA_TAG_OCELOT_H
#define _NET_DSA_TAG_OCELOT_H
+#include <linux/kthread.h>
#include <linux/packing.h>
+#include <linux/skbuff.h>
+
+struct ocelot_skb_cb {
+ struct sk_buff *clone;
+ unsigned int ptp_class; /* valid only for clones */
+ u8 ptp_cmd;
+ u8 ts_id;
+};
+
+#define OCELOT_SKB_CB(skb) \
+ ((struct ocelot_skb_cb *)((skb)->cb))
+
+#define IFH_TAG_TYPE_C 0
+#define IFH_TAG_TYPE_S 1
+
+#define IFH_REW_OP_NOOP 0x0
+#define IFH_REW_OP_DSCP 0x1
+#define IFH_REW_OP_ONE_STEP_PTP 0x2
+#define IFH_REW_OP_TWO_STEP_PTP 0x3
+#define IFH_REW_OP_ORIGIN_PTP 0x5
#define OCELOT_TAG_LEN 16
#define OCELOT_SHORT_PREFIX_LEN 4
@@ -140,6 +161,17 @@
* +------+------+------+------+------+------+------+------+
*/
+struct felix_deferred_xmit_work {
+ struct dsa_port *dp;
+ struct sk_buff *skb;
+ struct kthread_work work;
+};
+
+struct felix_port {
+ void (*xmit_work_fn)(struct kthread_work *work);
+ struct kthread_worker *xmit_worker;
+};
+
static inline void ocelot_xfh_get_rew_val(void *extraction, u64 *rew_val)
{
packing(extraction, rew_val, 116, 85, OCELOT_TAG_LEN, UNPACK, 0);
@@ -215,4 +247,21 @@ static inline void ocelot_ifh_set_vid(void *injection, u64 vid)
packing(injection, &vid, 11, 0, OCELOT_TAG_LEN, PACK, 0);
}
+/* Determine the PTP REW_OP to use for injecting the given skb */
+static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb)
+{
+ struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone;
+ u8 ptp_cmd = OCELOT_SKB_CB(skb)->ptp_cmd;
+ u32 rew_op = 0;
+
+ if (ptp_cmd == IFH_REW_OP_TWO_STEP_PTP && clone) {
+ rew_op = ptp_cmd;
+ rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;
+ } else if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) {
+ rew_op = ptp_cmd;
+ }
+
+ return rew_op;
+}
+
#endif
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index 171106202fe5..9e07079528a5 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -48,6 +48,10 @@ struct sja1105_tagger_data {
spinlock_t meta_lock;
unsigned long state;
u8 ts_id;
+ /* Used on SJA1110 where meta frames are generated only for
+ * 2-step TX timestamps
+ */
+ struct sk_buff_head skb_txtstamp_queue;
};
struct sja1105_skb_cb {
@@ -69,42 +73,24 @@ struct sja1105_port {
bool hwts_tx_en;
};
-enum sja1110_meta_tstamp {
- SJA1110_META_TSTAMP_TX = 0,
- SJA1110_META_TSTAMP_RX = 1,
-};
-
-#if IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP)
-
-void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, u8 ts_id,
- enum sja1110_meta_tstamp dir, u64 tstamp);
-
-#else
+/* Timestamps are in units of 8 ns clock ticks (equivalent to
+ * a fixed 125 MHz clock).
+ */
+#define SJA1105_TICK_NS 8
-static inline void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port,
- u8 ts_id, enum sja1110_meta_tstamp dir,
- u64 tstamp)
+static inline s64 ns_to_sja1105_ticks(s64 ns)
{
+ return ns / SJA1105_TICK_NS;
}
-#endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) */
-
-#if IS_ENABLED(CONFIG_NET_DSA_SJA1105)
-
-extern const struct dsa_switch_ops sja1105_switch_ops;
-
-static inline bool dsa_port_is_sja1105(struct dsa_port *dp)
+static inline s64 sja1105_ticks_to_ns(s64 ticks)
{
- return dp->ds->ops == &sja1105_switch_ops;
+ return ticks * SJA1105_TICK_NS;
}
-#else
-
static inline bool dsa_port_is_sja1105(struct dsa_port *dp)
{
- return false;
+ return true;
}
-#endif
-
#endif /* _NET_DSA_SJA1105_H */
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 2aaa15779d50..957ebec35aad 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -109,7 +109,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg
#endif
}
-#if defined(CONFIG_UM) || defined(CONFIG_IA64)
+#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64)
/*
* These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out
* extra segments containing the gate DSO contents. Dumping its
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 1834752c5617..39dcadd492b5 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -11,7 +11,7 @@
#include <linux/types.h>
/**
- * em_perf_state - Performance state of a performance domain
+ * struct em_perf_state - Performance state of a performance domain
* @frequency: The frequency in KHz, for consistency with CPUFreq
* @power: The power consumed at this level (by 1 CPU or by a registered
* device). It can be a total power: static and dynamic.
@@ -25,7 +25,7 @@ struct em_perf_state {
};
/**
- * em_perf_domain - Performance domain
+ * struct em_perf_domain - Performance domain
* @table: List of performance states, in ascending order
* @nr_perf_states: Number of performance states
* @milliwatts: Flag indicating the power values are in milli-Watts
@@ -103,12 +103,12 @@ void em_dev_unregister_perf_domain(struct device *dev);
/**
* em_cpu_energy() - Estimates the energy consumed by the CPUs of a
- performance domain
+ * performance domain
* @pd : performance domain for which energy has to be estimated
* @max_util : highest utilization among CPUs of the domain
* @sum_util : sum of the utilization of all CPUs in the domain
* @allowed_cpu_cap : maximum allowed CPU capacity for the @pd, which
- might reflect reduced frequency (due to thermal)
+ * might reflect reduced frequency (due to thermal)
*
* This function must be used only for CPU devices. There is no validation,
* i.e. if the EM is a CPU type and has cpumask allocated. It is called from
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 928c411bd509..c58d50451485 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -308,7 +308,7 @@ static inline void ether_addr_copy(u8 *dst, const u8 *src)
*/
static inline void eth_hw_addr_set(struct net_device *dev, const u8 *addr)
{
- ether_addr_copy(dev->dev_addr, addr);
+ __dev_addr_set(dev, addr, ETH_ALEN);
}
/**
diff --git a/include/linux/file.h b/include/linux/file.h
index 2de2e4613d7b..51e830b4fe3a 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -94,6 +94,9 @@ extern void fd_install(unsigned int fd, struct file *file);
extern int __receive_fd(struct file *file, int __user *ufd,
unsigned int o_flags);
+
+extern int receive_fd(struct file *file, unsigned int o_flags);
+
static inline int receive_fd_user(struct file *file, int __user *ufd,
unsigned int o_flags)
{
@@ -101,10 +104,6 @@ static inline int receive_fd_user(struct file *file, int __user *ufd,
return -EFAULT;
return __receive_fd(file, ufd, o_flags);
}
-static inline int receive_fd(struct file *file, unsigned int o_flags)
-{
- return __receive_fd(file, NULL, o_flags);
-}
int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags);
extern void flush_delayed_fput(void);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4a93c12543ee..ef03ff34234d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1051,6 +1051,7 @@ extern int bpf_jit_enable;
extern int bpf_jit_harden;
extern int bpf_jit_kallsyms;
extern long bpf_jit_limit;
+extern long bpf_jit_limit_max;
typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h
index c12df59d3f5f..3e378b1fb0bc 100644
--- a/include/linux/flex_proportions.h
+++ b/include/linux/flex_proportions.h
@@ -83,9 +83,10 @@ struct fprop_local_percpu {
int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp);
void fprop_local_destroy_percpu(struct fprop_local_percpu *pl);
-void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl);
-void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl,
- int max_frac);
+void __fprop_add_percpu(struct fprop_global *p, struct fprop_local_percpu *pl,
+ long nr);
+void __fprop_add_percpu_max(struct fprop_global *p,
+ struct fprop_local_percpu *pl, int max_frac, long nr);
void fprop_fraction_percpu(struct fprop_global *p,
struct fprop_local_percpu *pl, unsigned long *numerator,
unsigned long *denominator);
@@ -96,7 +97,7 @@ void fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl)
unsigned long flags;
local_irq_save(flags);
- __fprop_inc_percpu(p, pl);
+ __fprop_add_percpu(p, pl, 1);
local_irq_restore(flags);
}
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 59828516ebaf..9f4ad719bfe3 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -22,10 +22,15 @@ struct device;
* LINKS_ADDED: The fwnode has already be parsed to add fwnode links.
* NOT_DEVICE: The fwnode will never be populated as a struct device.
* INITIALIZED: The hardware corresponding to fwnode has been initialized.
+ * NEEDS_CHILD_BOUND_ON_ADD: For this fwnode/device to probe successfully, its
+ * driver needs its child devices to be bound with
+ * their respective drivers as soon as they are
+ * added.
*/
-#define FWNODE_FLAG_LINKS_ADDED BIT(0)
-#define FWNODE_FLAG_NOT_DEVICE BIT(1)
-#define FWNODE_FLAG_INITIALIZED BIT(2)
+#define FWNODE_FLAG_LINKS_ADDED BIT(0)
+#define FWNODE_FLAG_NOT_DEVICE BIT(1)
+#define FWNODE_FLAG_INITIALIZED BIT(2)
+#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD BIT(3)
struct fwnode_handle {
struct fwnode_handle *secondary;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c68d83c87f83..0f5315c2b5a3 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -149,6 +149,7 @@ struct gendisk {
unsigned long state;
#define GD_NEED_PART_SCAN 0
#define GD_READ_ONLY 1
+#define GD_DEAD 2
struct mutex open_mutex; /* open/close mutex */
unsigned open_partitions; /* number of open partitions */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 55b2ec1f965a..3745efd21cf6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -520,15 +520,11 @@ static inline void arch_free_page(struct page *page, int order) { }
#ifndef HAVE_ARCH_ALLOC_PAGE
static inline void arch_alloc_page(struct page *page, int order) { }
#endif
-#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
-static inline int arch_make_page_accessible(struct page *page)
-{
- return 0;
-}
-#endif
struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
nodemask_t *nodemask);
+struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid,
+ nodemask_t *nodemask);
unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
nodemask_t *nodemask, int nr_pages,
@@ -570,6 +566,15 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
return __alloc_pages(gfp_mask, order, nid, NULL);
}
+static inline
+struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
+{
+ VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
+ VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid));
+
+ return __folio_alloc(gfp, order, nid, NULL);
+}
+
/*
* Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE,
* prefer the current CPU's closest node. Otherwise node must be valid and
@@ -586,6 +591,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
#ifdef CONFIG_NUMA
struct page *alloc_pages(gfp_t gfp, unsigned int order);
+struct folio *folio_alloc(gfp_t gfp, unsigned order);
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
struct vm_area_struct *vma, unsigned long addr,
int node, bool hugepage);
@@ -596,6 +602,10 @@ static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
{
return alloc_pages_node(numa_node_id(), gfp_mask, order);
}
+static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
+{
+ return __folio_alloc_node(gfp, order, numa_node_id());
+}
#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
alloc_pages(gfp_mask, order)
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 4aa1031d3e4c..0a0b2b09b1b8 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -73,6 +73,12 @@ static inline void *kmap_local_page(struct page *page)
return __kmap_local_page_prot(page, kmap_prot);
}
+static inline void *kmap_local_folio(struct folio *folio, size_t offset)
+{
+ struct page *page = folio_page(folio, offset / PAGE_SIZE);
+ return __kmap_local_page_prot(page, kmap_prot) + offset % PAGE_SIZE;
+}
+
static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
{
return __kmap_local_page_prot(page, prot);
@@ -171,6 +177,11 @@ static inline void *kmap_local_page(struct page *page)
return page_address(page);
}
+static inline void *kmap_local_folio(struct folio *folio, size_t offset)
+{
+ return page_address(&folio->page) + offset;
+}
+
static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
{
return kmap_local_page(page);
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index b4c49f9cc379..27cdd715c5f9 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -97,6 +97,43 @@ static inline void kmap_flush_unused(void);
static inline void *kmap_local_page(struct page *page);
/**
+ * kmap_local_folio - Map a page in this folio for temporary usage
+ * @folio: The folio containing the page.
+ * @offset: The byte offset within the folio which identifies the page.
+ *
+ * Requires careful handling when nesting multiple mappings because the map
+ * management is stack based. The unmap has to be in the reverse order of
+ * the map operation::
+ *
+ * addr1 = kmap_local_folio(folio1, offset1);
+ * addr2 = kmap_local_folio(folio2, offset2);
+ * ...
+ * kunmap_local(addr2);
+ * kunmap_local(addr1);
+ *
+ * Unmapping addr1 before addr2 is invalid and causes malfunction.
+ *
+ * Contrary to kmap() mappings the mapping is only valid in the context of
+ * the caller and cannot be handed to other contexts.
+ *
+ * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the
+ * virtual address of the direct mapping. Only real highmem pages are
+ * temporarily mapped.
+ *
+ * While it is significantly faster than kmap() for the higmem case it
+ * comes with restrictions about the pointer validity. Only use when really
+ * necessary.
+ *
+ * On HIGHMEM enabled systems mapping a highmem page has the side effect of
+ * disabling migration in order to keep the virtual address stable across
+ * preemption. No caller of kmap_local_folio() can rely on this side effect.
+ *
+ * Context: Can be invoked from any context.
+ * Return: The virtual address of @offset.
+ */
+static inline void *kmap_local_folio(struct folio *folio, size_t offset);
+
+/**
* kmap_atomic - Atomically map a page for temporary usage - Deprecated!
* @page: Pointer to the page to be mapped
*
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f123e15d966e..f280f33ff223 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -251,15 +251,6 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
}
/**
- * thp_head - Head page of a transparent huge page.
- * @page: Any page (tail, head or regular) found in the page cache.
- */
-static inline struct page *thp_head(struct page *page)
-{
- return compound_head(page);
-}
-
-/**
* thp_order - Order of a transparent huge page.
* @page: Head page of a transparent huge page.
*/
@@ -336,12 +327,6 @@ static inline struct list_head *page_deferred_list(struct page *page)
#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
-static inline struct page *thp_head(struct page *page)
-{
- VM_BUG_ON_PGFLAGS(PageTail(page), page);
- return page;
-}
-
static inline unsigned int thp_order(struct page *page)
{
VM_BUG_ON_PGFLAGS(PageTail(page), page);
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 23e4ee523576..9ee238ad29ce 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -251,7 +251,7 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa)
}
void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
+struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
irq_hw_number_t hwirq_max, int direct_max,
const struct irq_domain_ops *ops,
void *host_data);
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 161e8164abcf..a38a5bca1ba5 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -52,7 +52,7 @@ struct page *ksm_might_need_to_copy(struct page *page,
struct vm_area_struct *vma, unsigned long address);
void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
-void ksm_migrate_page(struct page *newpage, struct page *oldpage);
+void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
#else /* !CONFIG_KSM */
@@ -83,7 +83,7 @@ static inline void rmap_walk_ksm(struct page *page,
{
}
-static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
+static inline void folio_migrate_ksm(struct folio *newfolio, struct folio *old)
{
}
#endif /* CONFIG_MMU */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 041ca7f15ea4..0f18df7fe874 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -608,7 +608,6 @@ struct kvm {
unsigned long mmu_notifier_range_start;
unsigned long mmu_notifier_range_end;
#endif
- long tlbs_dirty;
struct list_head devices;
u64 manual_dirty_log_protect;
struct dentry *debugfs_dentry;
@@ -721,11 +720,6 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
return NULL;
}
-static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
-{
- return vcpu->vcpu_idx;
-}
-
#define kvm_for_each_memslot(memslot, slots) \
for (memslot = &slots->memslots[0]; \
memslot < slots->memslots + slots->used_slots; memslot++) \
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index ffb787d5ebde..5e6dc38f418e 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -80,6 +80,9 @@ struct mdio_driver {
/* Clears up any memory if needed */
void (*remove)(struct mdio_device *mdiodev);
+
+ /* Quiesces the device on system shutdown, turns off interrupts etc */
+ void (*shutdown)(struct mdio_device *mdiodev);
};
static inline struct mdio_driver *
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index b066024c62e3..34de69b3b8ba 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -118,6 +118,7 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
void memblock_free_all(void);
+void memblock_free_ptr(void *ptr, size_t size);
void reset_node_managed_pages(pg_data_t *pgdat);
void reset_all_zones_managed_pages(void);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3096c9a0ee01..e34bf0cbdf55 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -369,7 +369,7 @@ enum page_memcg_data_flags {
#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1)
-static inline bool PageMemcgKmem(struct page *page);
+static inline bool folio_memcg_kmem(struct folio *folio);
/*
* After the initialization objcg->memcg is always pointing at
@@ -384,89 +384,95 @@ static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
}
/*
- * __page_memcg - get the memory cgroup associated with a non-kmem page
- * @page: a pointer to the page struct
+ * __folio_memcg - Get the memory cgroup associated with a non-kmem folio
+ * @folio: Pointer to the folio.
*
- * Returns a pointer to the memory cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * Returns a pointer to the memory cgroup associated with the folio,
+ * or NULL. This function assumes that the folio is known to have a
* proper memory cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages or
- * kmem pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios or
+ * kmem folios.
*/
-static inline struct mem_cgroup *__page_memcg(struct page *page)
+static inline struct mem_cgroup *__folio_memcg(struct folio *folio)
{
- unsigned long memcg_data = page->memcg_data;
+ unsigned long memcg_data = folio->memcg_data;
- VM_BUG_ON_PAGE(PageSlab(page), page);
- VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
- VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
+ VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
+ VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio);
+ VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio);
return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
- * __page_objcg - get the object cgroup associated with a kmem page
- * @page: a pointer to the page struct
+ * __folio_objcg - get the object cgroup associated with a kmem folio.
+ * @folio: Pointer to the folio.
*
- * Returns a pointer to the object cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * Returns a pointer to the object cgroup associated with the folio,
+ * or NULL. This function assumes that the folio is known to have a
* proper object cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages or
- * LRU pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios or
+ * LRU folios.
*/
-static inline struct obj_cgroup *__page_objcg(struct page *page)
+static inline struct obj_cgroup *__folio_objcg(struct folio *folio)
{
- unsigned long memcg_data = page->memcg_data;
+ unsigned long memcg_data = folio->memcg_data;
- VM_BUG_ON_PAGE(PageSlab(page), page);
- VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
- VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page);
+ VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
+ VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio);
+ VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio);
return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
- * page_memcg - get the memory cgroup associated with a page
- * @page: a pointer to the page struct
+ * folio_memcg - Get the memory cgroup associated with a folio.
+ * @folio: Pointer to the folio.
*
- * Returns a pointer to the memory cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * Returns a pointer to the memory cgroup associated with the folio,
+ * or NULL. This function assumes that the folio is known to have a
* proper memory cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios.
*
- * For a non-kmem page any of the following ensures page and memcg binding
+ * For a non-kmem folio any of the following ensures folio and memcg binding
* stability:
*
- * - the page lock
+ * - the folio lock
* - LRU isolation
* - lock_page_memcg()
* - exclusive reference
*
- * For a kmem page a caller should hold an rcu read lock to protect memcg
- * associated with a kmem page from being released.
+ * For a kmem folio a caller should hold an rcu read lock to protect memcg
+ * associated with a kmem folio from being released.
*/
+static inline struct mem_cgroup *folio_memcg(struct folio *folio)
+{
+ if (folio_memcg_kmem(folio))
+ return obj_cgroup_memcg(__folio_objcg(folio));
+ return __folio_memcg(folio);
+}
+
static inline struct mem_cgroup *page_memcg(struct page *page)
{
- if (PageMemcgKmem(page))
- return obj_cgroup_memcg(__page_objcg(page));
- else
- return __page_memcg(page);
+ return folio_memcg(page_folio(page));
}
-/*
- * page_memcg_rcu - locklessly get the memory cgroup associated with a page
- * @page: a pointer to the page struct
+/**
+ * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio.
+ * @folio: Pointer to the folio.
*
- * Returns a pointer to the memory cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * This function assumes that the folio is known to have a
* proper memory cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios.
+ *
+ * Return: A pointer to the memory cgroup associated with the folio,
+ * or NULL.
*/
-static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
+static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
{
- unsigned long memcg_data = READ_ONCE(page->memcg_data);
+ unsigned long memcg_data = READ_ONCE(folio->memcg_data);
- VM_BUG_ON_PAGE(PageSlab(page), page);
+ VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
WARN_ON_ONCE(!rcu_read_lock_held());
if (memcg_data & MEMCG_DATA_KMEM) {
@@ -523,17 +529,18 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
#ifdef CONFIG_MEMCG_KMEM
/*
- * PageMemcgKmem - check if the page has MemcgKmem flag set
- * @page: a pointer to the page struct
+ * folio_memcg_kmem - Check if the folio has the memcg_kmem flag set.
+ * @folio: Pointer to the folio.
*
- * Checks if the page has MemcgKmem flag set. The caller must ensure that
- * the page has an associated memory cgroup. It's not safe to call this function
- * against some types of pages, e.g. slab pages.
+ * Checks if the folio has MemcgKmem flag set. The caller must ensure
+ * that the folio has an associated memory cgroup. It's not safe to call
+ * this function against some types of folios, e.g. slab folios.
*/
-static inline bool PageMemcgKmem(struct page *page)
+static inline bool folio_memcg_kmem(struct folio *folio)
{
- VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page);
- return page->memcg_data & MEMCG_DATA_KMEM;
+ VM_BUG_ON_PGFLAGS(PageTail(&folio->page), &folio->page);
+ VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJCGS, folio);
+ return folio->memcg_data & MEMCG_DATA_KMEM;
}
/*
@@ -577,7 +584,7 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
}
#else
-static inline bool PageMemcgKmem(struct page *page)
+static inline bool folio_memcg_kmem(struct folio *folio)
{
return false;
}
@@ -593,6 +600,11 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
}
#endif
+static inline bool PageMemcgKmem(struct page *page)
+{
+ return folio_memcg_kmem(page_folio(page));
+}
+
static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
{
return (memcg == root_mem_cgroup);
@@ -684,26 +696,47 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
page_counter_read(&memcg->memory);
}
-int __mem_cgroup_charge(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask);
-static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask)
+int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
+
+/**
+ * mem_cgroup_charge - Charge a newly allocated folio to a cgroup.
+ * @folio: Folio to charge.
+ * @mm: mm context of the allocating task.
+ * @gfp: Reclaim mode.
+ *
+ * Try to charge @folio to the memcg that @mm belongs to, reclaiming
+ * pages according to @gfp if necessary. If @mm is NULL, try to
+ * charge to the active memcg.
+ *
+ * Do not use this for folios allocated for swapin.
+ *
+ * Return: 0 on success. Otherwise, an error code is returned.
+ */
+static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
+ gfp_t gfp)
{
if (mem_cgroup_disabled())
return 0;
- return __mem_cgroup_charge(page, mm, gfp_mask);
+ return __mem_cgroup_charge(folio, mm, gfp);
}
int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
gfp_t gfp, swp_entry_t entry);
void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
-void __mem_cgroup_uncharge(struct page *page);
-static inline void mem_cgroup_uncharge(struct page *page)
+void __mem_cgroup_uncharge(struct folio *folio);
+
+/**
+ * mem_cgroup_uncharge - Uncharge a folio.
+ * @folio: Folio to uncharge.
+ *
+ * Uncharge a folio previously charged with mem_cgroup_charge().
+ */
+static inline void mem_cgroup_uncharge(struct folio *folio)
{
if (mem_cgroup_disabled())
return;
- __mem_cgroup_uncharge(page);
+ __mem_cgroup_uncharge(folio);
}
void __mem_cgroup_uncharge_list(struct list_head *page_list);
@@ -714,7 +747,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
__mem_cgroup_uncharge_list(page_list);
}
-void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
+void mem_cgroup_migrate(struct folio *old, struct folio *new);
/**
* mem_cgroup_lruvec - get the lru list vector for a memcg & node
@@ -753,33 +786,33 @@ out:
}
/**
- * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
- * @page: the page
+ * folio_lruvec - return lruvec for isolating/putting an LRU folio
+ * @folio: Pointer to the folio.
*
- * This function relies on page->mem_cgroup being stable.
+ * This function relies on folio->mem_cgroup being stable.
*/
-static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
+static inline struct lruvec *folio_lruvec(struct folio *folio)
{
- pg_data_t *pgdat = page_pgdat(page);
- struct mem_cgroup *memcg = page_memcg(page);
+ struct mem_cgroup *memcg = folio_memcg(folio);
- VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page);
- return mem_cgroup_lruvec(memcg, pgdat);
+ VM_WARN_ON_ONCE_FOLIO(!memcg && !mem_cgroup_disabled(), folio);
+ return mem_cgroup_lruvec(memcg, folio_pgdat(folio));
}
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
-struct lruvec *lock_page_lruvec(struct page *page);
-struct lruvec *lock_page_lruvec_irq(struct page *page);
-struct lruvec *lock_page_lruvec_irqsave(struct page *page,
+struct lruvec *folio_lruvec_lock(struct folio *folio);
+struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
+struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
unsigned long *flags);
#ifdef CONFIG_DEBUG_VM
-void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page);
+void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio);
#else
-static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+static inline
+void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
{
}
#endif
@@ -947,6 +980,8 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
extern bool cgroup_memory_noswap;
#endif
+void folio_memcg_lock(struct folio *folio);
+void folio_memcg_unlock(struct folio *folio);
void lock_page_memcg(struct page *page);
void unlock_page_memcg(struct page *page);
@@ -1115,12 +1150,17 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
#define MEM_CGROUP_ID_SHIFT 0
#define MEM_CGROUP_ID_MAX 0
+static inline struct mem_cgroup *folio_memcg(struct folio *folio)
+{
+ return NULL;
+}
+
static inline struct mem_cgroup *page_memcg(struct page *page)
{
return NULL;
}
-static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
+static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
{
WARN_ON_ONCE(!rcu_read_lock_held());
return NULL;
@@ -1131,6 +1171,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
return NULL;
}
+static inline bool folio_memcg_kmem(struct folio *folio)
+{
+ return false;
+}
+
static inline bool PageMemcgKmem(struct page *page)
{
return false;
@@ -1179,8 +1224,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
return false;
}
-static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask)
+static inline int mem_cgroup_charge(struct folio *folio,
+ struct mm_struct *mm, gfp_t gfp)
{
return 0;
}
@@ -1195,7 +1240,7 @@ static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry)
{
}
-static inline void mem_cgroup_uncharge(struct page *page)
+static inline void mem_cgroup_uncharge(struct folio *folio)
{
}
@@ -1203,7 +1248,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
{
}
-static inline void mem_cgroup_migrate(struct page *old, struct page *new)
+static inline void mem_cgroup_migrate(struct folio *old, struct folio *new)
{
}
@@ -1213,14 +1258,14 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
return &pgdat->__lruvec;
}
-static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
+static inline struct lruvec *folio_lruvec(struct folio *folio)
{
- pg_data_t *pgdat = page_pgdat(page);
-
+ struct pglist_data *pgdat = folio_pgdat(folio);
return &pgdat->__lruvec;
}
-static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+static inline
+void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
{
}
@@ -1250,26 +1295,26 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
}
-static inline struct lruvec *lock_page_lruvec(struct page *page)
+static inline struct lruvec *folio_lruvec_lock(struct folio *folio)
{
- struct pglist_data *pgdat = page_pgdat(page);
+ struct pglist_data *pgdat = folio_pgdat(folio);
spin_lock(&pgdat->__lruvec.lru_lock);
return &pgdat->__lruvec;
}
-static inline struct lruvec *lock_page_lruvec_irq(struct page *page)
+static inline struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
{
- struct pglist_data *pgdat = page_pgdat(page);
+ struct pglist_data *pgdat = folio_pgdat(folio);
spin_lock_irq(&pgdat->__lruvec.lru_lock);
return &pgdat->__lruvec;
}
-static inline struct lruvec *lock_page_lruvec_irqsave(struct page *page,
+static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
unsigned long *flagsp)
{
- struct pglist_data *pgdat = page_pgdat(page);
+ struct pglist_data *pgdat = folio_pgdat(folio);
spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp);
return &pgdat->__lruvec;
@@ -1356,6 +1401,14 @@ static inline void unlock_page_memcg(struct page *page)
{
}
+static inline void folio_memcg_lock(struct folio *folio)
+{
+}
+
+static inline void folio_memcg_unlock(struct folio *folio)
+{
+}
+
static inline void mem_cgroup_handle_over_high(void)
{
}
@@ -1517,38 +1570,39 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
}
/* Test requires a stable page->memcg binding, see page_memcg() */
-static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec)
+static inline bool folio_matches_lruvec(struct folio *folio,
+ struct lruvec *lruvec)
{
- return lruvec_pgdat(lruvec) == page_pgdat(page) &&
- lruvec_memcg(lruvec) == page_memcg(page);
+ return lruvec_pgdat(lruvec) == folio_pgdat(folio) &&
+ lruvec_memcg(lruvec) == folio_memcg(folio);
}
/* Don't lock again iff page's lruvec locked */
-static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
+static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio,
struct lruvec *locked_lruvec)
{
if (locked_lruvec) {
- if (page_matches_lruvec(page, locked_lruvec))
+ if (folio_matches_lruvec(folio, locked_lruvec))
return locked_lruvec;
unlock_page_lruvec_irq(locked_lruvec);
}
- return lock_page_lruvec_irq(page);
+ return folio_lruvec_lock_irq(folio);
}
/* Don't lock again iff page's lruvec locked */
-static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page,
+static inline struct lruvec *folio_lruvec_relock_irqsave(struct folio *folio,
struct lruvec *locked_lruvec, unsigned long *flags)
{
if (locked_lruvec) {
- if (page_matches_lruvec(page, locked_lruvec))
+ if (folio_matches_lruvec(folio, locked_lruvec))
return locked_lruvec;
unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
}
- return lock_page_lruvec_irqsave(page, flags);
+ return folio_lruvec_lock_irqsave(folio, flags);
}
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -1558,17 +1612,17 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
unsigned long *pheadroom, unsigned long *pdirty,
unsigned long *pwriteback);
-void mem_cgroup_track_foreign_dirty_slowpath(struct page *page,
+void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
struct bdi_writeback *wb);
-static inline void mem_cgroup_track_foreign_dirty(struct page *page,
+static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
struct bdi_writeback *wb)
{
if (mem_cgroup_disabled())
return;
- if (unlikely(&page_memcg(page)->css != wb->memcg_css))
- mem_cgroup_track_foreign_dirty_slowpath(page, wb);
+ if (unlikely(&folio_memcg(folio)->css != wb->memcg_css))
+ mem_cgroup_track_foreign_dirty_slowpath(folio, wb);
}
void mem_cgroup_flush_foreign(struct bdi_writeback *wb);
@@ -1588,7 +1642,7 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
{
}
-static inline void mem_cgroup_track_foreign_dirty(struct page *page,
+static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
struct bdi_writeback *wb)
{
}
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 7efc0a7c14c9..182c606adb06 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -160,7 +160,10 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
#define register_hotmemory_notifier(nb) register_memory_notifier(nb)
#define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb)
#else
-#define hotplug_memory_notifier(fn, pri) ({ 0; })
+static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
+{
+ return 0;
+}
/* These aren't inline functions due to a GCC bug. */
#define register_hotmemory_notifier(nb) ({ (void)(nb); 0; })
#define unregister_hotmemory_notifier(nb) ({ (void)(nb); })
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 326250996b4e..0d2aeb9b0f66 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -19,6 +19,11 @@ struct migration_target_control;
*/
#define MIGRATEPAGE_SUCCESS 0
+/*
+ * Keep sync with:
+ * - macro MIGRATE_REASON in include/trace/events/migrate.h
+ * - migrate_reason_names[MR_TYPES] in mm/debug.c
+ */
enum migrate_reason {
MR_COMPACTION,
MR_MEMORY_FAILURE,
@@ -32,7 +37,6 @@ enum migrate_reason {
MR_TYPES
};
-/* In mm/debug.c; also keep sync with include/trace/events/migrate.h */
extern const char *migrate_reason_names[MR_TYPES];
#ifdef CONFIG_MIGRATION
@@ -53,6 +57,10 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page);
extern int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page, int extra_count);
+void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
+void folio_migrate_copy(struct folio *newfolio, struct folio *folio);
+int folio_migrate_mapping(struct address_space *mapping,
+ struct folio *newfolio, struct folio *folio, int extra_count);
#else
static inline void putback_movable_pages(struct list_head *l) {}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e23417424373..f17d2101af7a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1138,7 +1138,6 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
-bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f3638d09ba77..993204a6c1a1 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -9475,16 +9475,22 @@ struct mlx5_ifc_pcmr_reg_bits {
u8 reserved_at_0[0x8];
u8 local_port[0x8];
u8 reserved_at_10[0x10];
+
u8 entropy_force_cap[0x1];
u8 entropy_calc_cap[0x1];
u8 entropy_gre_calc_cap[0x1];
- u8 reserved_at_23[0x1b];
+ u8 reserved_at_23[0xf];
+ u8 rx_ts_over_crc_cap[0x1];
+ u8 reserved_at_33[0xb];
u8 fcs_cap[0x1];
u8 reserved_at_3f[0x1];
+
u8 entropy_force[0x1];
u8 entropy_calc[0x1];
u8 entropy_gre_calc[0x1];
- u8 reserved_at_43[0x1b];
+ u8 reserved_at_43[0xf];
+ u8 rx_ts_over_crc[0x1];
+ u8 reserved_at_53[0xb];
u8 fcs_chk[0x1];
u8 reserved_at_5f[0x1];
};
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 73a52aba448f..40ff114aaf9e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -36,10 +36,7 @@
struct mempolicy;
struct anon_vma;
struct anon_vma_chain;
-struct file_ra_state;
struct user_struct;
-struct writeback_control;
-struct bdi_writeback;
struct pt_regs;
extern int sysctl_page_lock_unfairness;
@@ -216,13 +213,6 @@ int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
-/*
- * Any attempt to mark this function as static leads to build failure
- * when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked()
- * is referred to by BPF code. This must be visible for error injection.
- */
-int __add_to_page_cache_locked(struct page *page, struct address_space *mapping,
- pgoff_t index, gfp_t gfp, void **shadowp);
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
@@ -748,13 +738,18 @@ static inline int put_page_testzero(struct page *page)
return page_ref_dec_and_test(page);
}
+static inline int folio_put_testzero(struct folio *folio)
+{
+ return put_page_testzero(&folio->page);
+}
+
/*
* Try to grab a ref unless the page has a refcount of zero, return false if
* that is the case.
* This can be called when MMU is off so it must not access
* any of the virtual mappings.
*/
-static inline int get_page_unless_zero(struct page *page)
+static inline bool get_page_unless_zero(struct page *page)
{
return page_ref_add_unless(page, 1, 0);
}
@@ -907,7 +902,7 @@ void __put_page(struct page *page);
void put_pages_list(struct list_head *pages);
void split_page(struct page *page, unsigned int order);
-void copy_huge_page(struct page *dst, struct page *src);
+void folio_copy(struct folio *dst, struct folio *src);
/*
* Compound pages have a destructor function. Provide a
@@ -950,6 +945,20 @@ static inline unsigned int compound_order(struct page *page)
return page[1].compound_order;
}
+/**
+ * folio_order - The allocation order of a folio.
+ * @folio: The folio.
+ *
+ * A folio is composed of 2^order pages. See get_order() for the definition
+ * of order.
+ *
+ * Return: The order of the folio.
+ */
+static inline unsigned int folio_order(struct folio *folio)
+{
+ return compound_order(&folio->page);
+}
+
static inline bool hpage_pincount_available(struct page *page)
{
/*
@@ -1131,6 +1140,11 @@ static inline enum zone_type page_zonenum(const struct page *page)
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
}
+static inline enum zone_type folio_zonenum(const struct folio *folio)
+{
+ return page_zonenum(&folio->page);
+}
+
#ifdef CONFIG_ZONE_DEVICE
static inline bool is_zone_device_page(const struct page *page)
{
@@ -1200,18 +1214,26 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
}
/* 127: arbitrary random number, small enough to assemble well */
-#define page_ref_zero_or_close_to_overflow(page) \
- ((unsigned int) page_ref_count(page) + 127u <= 127u)
+#define folio_ref_zero_or_close_to_overflow(folio) \
+ ((unsigned int) folio_ref_count(folio) + 127u <= 127u)
+
+/**
+ * folio_get - Increment the reference count on a folio.
+ * @folio: The folio.
+ *
+ * Context: May be called in any context, as long as you know that
+ * you have a refcount on the folio. If you do not already have one,
+ * folio_try_get() may be the right interface for you to use.
+ */
+static inline void folio_get(struct folio *folio)
+{
+ VM_BUG_ON_FOLIO(folio_ref_zero_or_close_to_overflow(folio), folio);
+ folio_ref_inc(folio);
+}
static inline void get_page(struct page *page)
{
- page = compound_head(page);
- /*
- * Getting a normal page or the head of a compound page
- * requires to already have an elevated page->_refcount.
- */
- VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page);
- page_ref_inc(page);
+ folio_get(page_folio(page));
}
bool __must_check try_grab_page(struct page *page, unsigned int flags);
@@ -1228,9 +1250,28 @@ static inline __must_check bool try_get_page(struct page *page)
return true;
}
+/**
+ * folio_put - Decrement the reference count on a folio.
+ * @folio: The folio.
+ *
+ * If the folio's reference count reaches zero, the memory will be
+ * released back to the page allocator and may be used by another
+ * allocation immediately. Do not access the memory or the struct folio
+ * after calling folio_put() unless you can be sure that it wasn't the
+ * last reference.
+ *
+ * Context: May be called in process or interrupt context, but not in NMI
+ * context. May be called while holding a spinlock.
+ */
+static inline void folio_put(struct folio *folio)
+{
+ if (folio_put_testzero(folio))
+ __put_page(&folio->page);
+}
+
static inline void put_page(struct page *page)
{
- page = compound_head(page);
+ struct folio *folio = page_folio(page);
/*
* For devmap managed pages we need to catch refcount transition from
@@ -1238,13 +1279,12 @@ static inline void put_page(struct page *page)
* need to inform the device driver through callback. See
* include/linux/memremap.h and HMM for details.
*/
- if (page_is_devmap_managed(page)) {
- put_devmap_managed_page(page);
+ if (page_is_devmap_managed(&folio->page)) {
+ put_devmap_managed_page(&folio->page);
return;
}
- if (put_page_testzero(page))
- __put_page(page);
+ folio_put(folio);
}
/*
@@ -1379,6 +1419,11 @@ static inline int page_to_nid(const struct page *page)
}
#endif
+static inline int folio_nid(const struct folio *folio)
+{
+ return page_to_nid(&folio->page);
+}
+
#ifdef CONFIG_NUMA_BALANCING
static inline int cpu_pid_to_cpupid(int cpu, int pid)
{
@@ -1546,6 +1591,16 @@ static inline pg_data_t *page_pgdat(const struct page *page)
return NODE_DATA(page_to_nid(page));
}
+static inline struct zone *folio_zone(const struct folio *folio)
+{
+ return page_zone(&folio->page);
+}
+
+static inline pg_data_t *folio_pgdat(const struct folio *folio)
+{
+ return page_pgdat(&folio->page);
+}
+
#ifdef SECTION_IN_PAGE_FLAGS
static inline void set_page_section(struct page *page, unsigned long section)
{
@@ -1559,6 +1614,20 @@ static inline unsigned long page_to_section(const struct page *page)
}
#endif
+/**
+ * folio_pfn - Return the Page Frame Number of a folio.
+ * @folio: The folio.
+ *
+ * A folio may contain multiple pages. The pages have consecutive
+ * Page Frame Numbers.
+ *
+ * Return: The Page Frame Number of the first page in the folio.
+ */
+static inline unsigned long folio_pfn(struct folio *folio)
+{
+ return page_to_pfn(&folio->page);
+}
+
/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */
#ifdef CONFIG_MIGRATION
static inline bool is_pinnable_page(struct page *page)
@@ -1595,6 +1664,89 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
#endif
}
+/**
+ * folio_nr_pages - The number of pages in the folio.
+ * @folio: The folio.
+ *
+ * Return: A positive power of two.
+ */
+static inline long folio_nr_pages(struct folio *folio)
+{
+ return compound_nr(&folio->page);
+}
+
+/**
+ * folio_next - Move to the next physical folio.
+ * @folio: The folio we're currently operating on.
+ *
+ * If you have physically contiguous memory which may span more than
+ * one folio (eg a &struct bio_vec), use this function to move from one
+ * folio to the next. Do not use it if the memory is only virtually
+ * contiguous as the folios are almost certainly not adjacent to each
+ * other. This is the folio equivalent to writing ``page++``.
+ *
+ * Context: We assume that the folios are refcounted and/or locked at a
+ * higher level and do not adjust the reference counts.
+ * Return: The next struct folio.
+ */
+static inline struct folio *folio_next(struct folio *folio)
+{
+ return (struct folio *)folio_page(folio, folio_nr_pages(folio));
+}
+
+/**
+ * folio_shift - The size of the memory described by this folio.
+ * @folio: The folio.
+ *
+ * A folio represents a number of bytes which is a power-of-two in size.
+ * This function tells you which power-of-two the folio is. See also
+ * folio_size() and folio_order().
+ *
+ * Context: The caller should have a reference on the folio to prevent
+ * it from being split. It is not necessary for the folio to be locked.
+ * Return: The base-2 logarithm of the size of this folio.
+ */
+static inline unsigned int folio_shift(struct folio *folio)
+{
+ return PAGE_SHIFT + folio_order(folio);
+}
+
+/**
+ * folio_size - The number of bytes in a folio.
+ * @folio: The folio.
+ *
+ * Context: The caller should have a reference on the folio to prevent
+ * it from being split. It is not necessary for the folio to be locked.
+ * Return: The number of bytes in this folio.
+ */
+static inline size_t folio_size(struct folio *folio)
+{
+ return PAGE_SIZE << folio_order(folio);
+}
+
+#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
+static inline int arch_make_page_accessible(struct page *page)
+{
+ return 0;
+}
+#endif
+
+#ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
+static inline int arch_make_folio_accessible(struct folio *folio)
+{
+ int ret;
+ long i, nr = folio_nr_pages(folio);
+
+ for (i = 0; i < nr; i++) {
+ ret = arch_make_page_accessible(folio_page(folio, i));
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+#endif
+
/*
* Some inline functions in vmstat.h depend on page_zone()
*/
@@ -1635,19 +1787,6 @@ void page_address_init(void);
extern void *page_rmapping(struct page *page);
extern struct anon_vma *page_anon_vma(struct page *page);
-extern struct address_space *page_mapping(struct page *page);
-
-extern struct address_space *__page_file_mapping(struct page *);
-
-static inline
-struct address_space *page_file_mapping(struct page *page)
-{
- if (unlikely(PageSwapCache(page)))
- return __page_file_mapping(page);
-
- return page->mapping;
-}
-
extern pgoff_t __page_file_index(struct page *page);
/*
@@ -1662,7 +1801,7 @@ static inline pgoff_t page_index(struct page *page)
}
bool page_mapped(struct page *page);
-struct address_space *page_mapping(struct page *page);
+bool folio_mapped(struct folio *folio);
/*
* Return true only if the page has been allocated with
@@ -1700,6 +1839,7 @@ extern void pagefault_out_of_memory(void);
#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
#define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1))
+#define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1))
/*
* Flags passed to show_mem() and show_free_areas() to suppress output in
@@ -1854,20 +1994,9 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
extern void do_invalidatepage(struct page *page, unsigned int offset,
unsigned int length);
-int redirty_page_for_writepage(struct writeback_control *wbc,
- struct page *page);
-void account_page_cleaned(struct page *page, struct address_space *mapping,
- struct bdi_writeback *wb);
-int set_page_dirty(struct page *page);
+bool folio_mark_dirty(struct folio *folio);
+bool set_page_dirty(struct page *page);
int set_page_dirty_lock(struct page *page);
-void __cancel_dirty_page(struct page *page);
-static inline void cancel_dirty_page(struct page *page)
-{
- /* Avoid atomic ops, locking, etc. when not actually needed. */
- if (PageDirty(page))
- __cancel_dirty_page(page);
-}
-int clear_page_dirty_for_io(struct page *page);
int get_cmdline(struct task_struct *task, char *buffer, int buflen);
@@ -2659,10 +2788,6 @@ extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
-/* mm/page-writeback.c */
-int __must_check write_one_page(struct page *page);
-void task_dirty_inc(struct task_struct *tsk);
-
extern unsigned long stack_guard_gap;
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 355ea1ee32bd..e2ec68b0515c 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -6,27 +6,33 @@
#include <linux/swap.h>
/**
- * page_is_file_lru - should the page be on a file LRU or anon LRU?
- * @page: the page to test
- *
- * Returns 1 if @page is a regular filesystem backed page cache page or a lazily
- * freed anonymous page (e.g. via MADV_FREE). Returns 0 if @page is a normal
- * anonymous page, a tmpfs page or otherwise ram or swap backed page. Used by
- * functions that manipulate the LRU lists, to sort a page onto the right LRU
- * list.
+ * folio_is_file_lru - Should the folio be on a file LRU or anon LRU?
+ * @folio: The folio to test.
*
* We would like to get this info without a page flag, but the state
- * needs to survive until the page is last deleted from the LRU, which
+ * needs to survive until the folio is last deleted from the LRU, which
* could be as far down as __page_cache_release.
+ *
+ * Return: An integer (not a boolean!) used to sort a folio onto the
+ * right LRU list and to account folios correctly.
+ * 1 if @folio is a regular filesystem backed page cache folio
+ * or a lazily freed anonymous folio (e.g. via MADV_FREE).
+ * 0 if @folio is a normal anonymous folio, a tmpfs folio or otherwise
+ * ram or swap backed folio.
*/
+static inline int folio_is_file_lru(struct folio *folio)
+{
+ return !folio_test_swapbacked(folio);
+}
+
static inline int page_is_file_lru(struct page *page)
{
- return !PageSwapBacked(page);
+ return folio_is_file_lru(page_folio(page));
}
static __always_inline void update_lru_size(struct lruvec *lruvec,
enum lru_list lru, enum zone_type zid,
- int nr_pages)
+ long nr_pages)
{
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@@ -39,69 +45,94 @@ static __always_inline void update_lru_size(struct lruvec *lruvec,
}
/**
- * __clear_page_lru_flags - clear page lru flags before releasing a page
- * @page: the page that was on lru and now has a zero reference
+ * __folio_clear_lru_flags - Clear page lru flags before releasing a page.
+ * @folio: The folio that was on lru and now has a zero reference.
*/
-static __always_inline void __clear_page_lru_flags(struct page *page)
+static __always_inline void __folio_clear_lru_flags(struct folio *folio)
{
- VM_BUG_ON_PAGE(!PageLRU(page), page);
+ VM_BUG_ON_FOLIO(!folio_test_lru(folio), folio);
- __ClearPageLRU(page);
+ __folio_clear_lru(folio);
/* this shouldn't happen, so leave the flags to bad_page() */
- if (PageActive(page) && PageUnevictable(page))
+ if (folio_test_active(folio) && folio_test_unevictable(folio))
return;
- __ClearPageActive(page);
- __ClearPageUnevictable(page);
+ __folio_clear_active(folio);
+ __folio_clear_unevictable(folio);
+}
+
+static __always_inline void __clear_page_lru_flags(struct page *page)
+{
+ __folio_clear_lru_flags(page_folio(page));
}
/**
- * page_lru - which LRU list should a page be on?
- * @page: the page to test
+ * folio_lru_list - Which LRU list should a folio be on?
+ * @folio: The folio to test.
*
- * Returns the LRU list a page should be on, as an index
+ * Return: The LRU list a folio should be on, as an index
* into the array of LRU lists.
*/
-static __always_inline enum lru_list page_lru(struct page *page)
+static __always_inline enum lru_list folio_lru_list(struct folio *folio)
{
enum lru_list lru;
- VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
+ VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio);
- if (PageUnevictable(page))
+ if (folio_test_unevictable(folio))
return LRU_UNEVICTABLE;
- lru = page_is_file_lru(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
- if (PageActive(page))
+ lru = folio_is_file_lru(folio) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
+ if (folio_test_active(folio))
lru += LRU_ACTIVE;
return lru;
}
+static __always_inline
+void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
+{
+ enum lru_list lru = folio_lru_list(folio);
+
+ update_lru_size(lruvec, lru, folio_zonenum(folio),
+ folio_nr_pages(folio));
+ list_add(&folio->lru, &lruvec->lists[lru]);
+}
+
static __always_inline void add_page_to_lru_list(struct page *page,
struct lruvec *lruvec)
{
- enum lru_list lru = page_lru(page);
+ lruvec_add_folio(lruvec, page_folio(page));
+}
- update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
- list_add(&page->lru, &lruvec->lists[lru]);
+static __always_inline
+void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio)
+{
+ enum lru_list lru = folio_lru_list(folio);
+
+ update_lru_size(lruvec, lru, folio_zonenum(folio),
+ folio_nr_pages(folio));
+ list_add_tail(&folio->lru, &lruvec->lists[lru]);
}
static __always_inline void add_page_to_lru_list_tail(struct page *page,
struct lruvec *lruvec)
{
- enum lru_list lru = page_lru(page);
+ lruvec_add_folio_tail(lruvec, page_folio(page));
+}
- update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
- list_add_tail(&page->lru, &lruvec->lists[lru]);
+static __always_inline
+void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
+{
+ list_del(&folio->lru);
+ update_lru_size(lruvec, folio_lru_list(folio), folio_zonenum(folio),
+ -folio_nr_pages(folio));
}
static __always_inline void del_page_from_lru_list(struct page *page,
struct lruvec *lruvec)
{
- list_del(&page->lru);
- update_lru_size(lruvec, page_lru(page), page_zonenum(page),
- -thp_nr_pages(page));
+ lruvec_del_folio(lruvec, page_folio(page));
}
#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7f8ee09c711f..82dab23205c3 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -239,6 +239,72 @@ struct page {
#endif
} _struct_page_alignment;
+/**
+ * struct folio - Represents a contiguous set of bytes.
+ * @flags: Identical to the page flags.
+ * @lru: Least Recently Used list; tracks how recently this folio was used.
+ * @mapping: The file this page belongs to, or refers to the anon_vma for
+ * anonymous memory.
+ * @index: Offset within the file, in units of pages. For anonymous memory,
+ * this is the index from the beginning of the mmap.
+ * @private: Filesystem per-folio data (see folio_attach_private()).
+ * Used for swp_entry_t if folio_test_swapcache().
+ * @_mapcount: Do not access this member directly. Use folio_mapcount() to
+ * find out how many times this folio is mapped by userspace.
+ * @_refcount: Do not access this member directly. Use folio_ref_count()
+ * to find how many references there are to this folio.
+ * @memcg_data: Memory Control Group data.
+ *
+ * A folio is a physically, virtually and logically contiguous set
+ * of bytes. It is a power-of-two in size, and it is aligned to that
+ * same power-of-two. It is at least as large as %PAGE_SIZE. If it is
+ * in the page cache, it is at a file offset which is a multiple of that
+ * power-of-two. It may be mapped into userspace at an address which is
+ * at an arbitrary page offset, but its kernel virtual address is aligned
+ * to its size.
+ */
+struct folio {
+ /* private: don't document the anon union */
+ union {
+ struct {
+ /* public: */
+ unsigned long flags;
+ struct list_head lru;
+ struct address_space *mapping;
+ pgoff_t index;
+ void *private;
+ atomic_t _mapcount;
+ atomic_t _refcount;
+#ifdef CONFIG_MEMCG
+ unsigned long memcg_data;
+#endif
+ /* private: the union with struct page is transitional */
+ };
+ struct page page;
+ };
+};
+
+static_assert(sizeof(struct page) == sizeof(struct folio));
+#define FOLIO_MATCH(pg, fl) \
+ static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl))
+FOLIO_MATCH(flags, flags);
+FOLIO_MATCH(lru, lru);
+FOLIO_MATCH(compound_head, lru);
+FOLIO_MATCH(index, index);
+FOLIO_MATCH(private, private);
+FOLIO_MATCH(_mapcount, _mapcount);
+FOLIO_MATCH(_refcount, _refcount);
+#ifdef CONFIG_MEMCG
+FOLIO_MATCH(memcg_data, memcg_data);
+#endif
+#undef FOLIO_MATCH
+
+static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
+{
+ struct page *tail = &folio->page + 1;
+ return &tail->compound_mapcount;
+}
+
static inline atomic_t *compound_mapcount_ptr(struct page *page)
{
return &page[1].compound_mapcount;
@@ -257,6 +323,12 @@ static inline atomic_t *compound_pincount_ptr(struct page *page)
#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
+/*
+ * page_private can be used on tail pages. However, PagePrivate is only
+ * checked by the VM on the head page. So page_private on the tail pages
+ * should be used for data that's ancillary to the head page (eg attaching
+ * buffer heads to tail pages after attaching buffer heads to the head page)
+ */
#define page_private(page) ((page)->private)
static inline void set_page_private(struct page *page, unsigned long private)
@@ -264,6 +336,11 @@ static inline void set_page_private(struct page *page, unsigned long private)
page->private = private;
}
+static inline void *folio_get_private(struct folio *folio)
+{
+ return folio->private;
+}
+
struct page_frag_cache {
void * va;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index b179f1e3541a..96e113e23d04 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -144,15 +144,6 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
up_read(&mm->mmap_lock);
}
-static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
-{
- if (mmap_read_trylock(mm)) {
- rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
- return true;
- }
- return false;
-}
-
static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
{
__mmap_lock_trace_released(mm, false);
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 1935d4c72d10..d7285f8148a3 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -22,6 +22,13 @@ void dump_mm(const struct mm_struct *mm);
BUG(); \
} \
} while (0)
+#define VM_BUG_ON_FOLIO(cond, folio) \
+ do { \
+ if (unlikely(cond)) { \
+ dump_page(&folio->page, "VM_BUG_ON_FOLIO(" __stringify(cond)")");\
+ BUG(); \
+ } \
+ } while (0)
#define VM_BUG_ON_VMA(cond, vma) \
do { \
if (unlikely(cond)) { \
@@ -47,6 +54,17 @@ void dump_mm(const struct mm_struct *mm);
} \
unlikely(__ret_warn_once); \
})
+#define VM_WARN_ON_ONCE_FOLIO(cond, folio) ({ \
+ static bool __section(".data.once") __warned; \
+ int __ret_warn_once = !!(cond); \
+ \
+ if (unlikely(__ret_warn_once && !__warned)) { \
+ dump_page(&folio->page, "VM_WARN_ON_ONCE_FOLIO(" __stringify(cond)")");\
+ __warned = true; \
+ WARN_ON(1); \
+ } \
+ unlikely(__ret_warn_once); \
+})
#define VM_WARN_ON(cond) (void)WARN_ON(cond)
#define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
@@ -55,11 +73,13 @@ void dump_mm(const struct mm_struct *mm);
#else
#define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
+#define VM_BUG_ON_FOLIO(cond, folio) VM_BUG_ON(cond)
#define VM_BUG_ON_VMA(cond, vma) VM_BUG_ON(cond)
#define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
#define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ON_ONCE_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
#endif
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 5d6a4158a9a6..12c4177f7703 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -22,6 +22,7 @@
* Overload PG_private_2 to give us PG_fscache - this is used to indicate that
* a page is currently backed by a local disk cache
*/
+#define folio_test_fscache(folio) folio_test_private_2(folio)
#define PageFsCache(page) PagePrivate2((page))
#define SetPageFsCache(page) SetPagePrivate2((page))
#define ClearPageFsCache(page) ClearPagePrivate2((page))
@@ -29,60 +30,80 @@
#define TestClearPageFsCache(page) TestClearPagePrivate2((page))
/**
- * set_page_fscache - Set PG_fscache on a page and take a ref
- * @page: The page.
+ * folio_start_fscache - Start an fscache write on a folio.
+ * @folio: The folio.
*
- * Set the PG_fscache (PG_private_2) flag on a page and take the reference
- * needed for the VM to handle its lifetime correctly. This sets the flag and
- * takes the reference unconditionally, so care must be taken not to set the
- * flag again if it's already set.
+ * Call this function before writing a folio to a local cache. Starting a
+ * second write before the first one finishes is not allowed.
*/
-static inline void set_page_fscache(struct page *page)
+static inline void folio_start_fscache(struct folio *folio)
{
- set_page_private_2(page);
+ VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio);
+ folio_get(folio);
+ folio_set_private_2(folio);
}
/**
- * end_page_fscache - Clear PG_fscache and release any waiters
- * @page: The page
- *
- * Clear the PG_fscache (PG_private_2) bit on a page and wake up any sleepers
- * waiting for this. The page ref held for PG_private_2 being set is released.
+ * folio_end_fscache - End an fscache write on a folio.
+ * @folio: The folio.
*
- * This is, for example, used when a netfs page is being written to a local
- * disk cache, thereby allowing writes to the cache for the same page to be
- * serialised.
+ * Call this function after the folio has been written to the local cache.
+ * This will wake any sleepers waiting on this folio.
*/
-static inline void end_page_fscache(struct page *page)
+static inline void folio_end_fscache(struct folio *folio)
{
- end_page_private_2(page);
+ folio_end_private_2(folio);
}
/**
- * wait_on_page_fscache - Wait for PG_fscache to be cleared on a page
- * @page: The page to wait on
+ * folio_wait_fscache - Wait for an fscache write on this folio to end.
+ * @folio: The folio.
*
- * Wait for PG_fscache (aka PG_private_2) to be cleared on a page.
+ * If this folio is currently being written to a local cache, wait for
+ * the write to finish. Another write may start after this one finishes,
+ * unless the caller holds the folio lock.
*/
-static inline void wait_on_page_fscache(struct page *page)
+static inline void folio_wait_fscache(struct folio *folio)
{
- wait_on_page_private_2(page);
+ folio_wait_private_2(folio);
}
/**
- * wait_on_page_fscache_killable - Wait for PG_fscache to be cleared on a page
- * @page: The page to wait on
+ * folio_wait_fscache_killable - Wait for an fscache write on this folio to end.
+ * @folio: The folio.
*
- * Wait for PG_fscache (aka PG_private_2) to be cleared on a page or until a
- * fatal signal is received by the calling task.
+ * If this folio is currently being written to a local cache, wait
+ * for the write to finish or for a fatal signal to be received.
+ * Another write may start after this one finishes, unless the caller
+ * holds the folio lock.
*
* Return:
* - 0 if successful.
* - -EINTR if a fatal signal was encountered.
*/
+static inline int folio_wait_fscache_killable(struct folio *folio)
+{
+ return folio_wait_private_2_killable(folio);
+}
+
+static inline void set_page_fscache(struct page *page)
+{
+ folio_start_fscache(page_folio(page));
+}
+
+static inline void end_page_fscache(struct page *page)
+{
+ folio_end_private_2(page_folio(page));
+}
+
+static inline void wait_on_page_fscache(struct page *page)
+{
+ folio_wait_private_2(page_folio(page));
+}
+
static inline int wait_on_page_fscache_killable(struct page *page)
{
- return wait_on_page_private_2_killable(page);
+ return folio_wait_private_2_killable(page_folio(page));
}
enum netfs_read_source {
diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h
index 923dada24eb4..c0c0cefc3b92 100644
--- a/include/linux/nvmem-consumer.h
+++ b/include/linux/nvmem-consumer.h
@@ -150,6 +150,20 @@ static inline int nvmem_cell_read_u64(struct device *dev,
return -EOPNOTSUPP;
}
+static inline int nvmem_cell_read_variable_le_u32(struct device *dev,
+ const char *cell_id,
+ u32 *val)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int nvmem_cell_read_variable_le_u64(struct device *dev,
+ const char *cell_id,
+ u64 *val)
+{
+ return -EOPNOTSUPP;
+}
+
static inline struct nvmem_device *nvmem_device_get(struct device *dev,
const char *name)
{
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 0f12345c21fb..4669632bd72b 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -6,12 +6,9 @@
#include <linux/limits.h>
/*
- * In the fallback code below, we need to compute the minimum and
- * maximum values representable in a given type. These macros may also
- * be useful elsewhere, so we provide them outside the
- * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
- *
- * It would seem more obvious to do something like
+ * We need to compute the minimum and maximum values representable in a given
+ * type. These macros may also be useful elsewhere. It would seem more obvious
+ * to do something like:
*
* #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
* #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
@@ -54,7 +51,6 @@ static inline bool __must_check __must_check_overflow(bool overflow)
return unlikely(overflow);
}
-#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
/*
* For simplicity and code hygiene, the fallback code below insists on
* a, b and *d having the same type (similar to the min() and max()
@@ -90,134 +86,6 @@ static inline bool __must_check __must_check_overflow(bool overflow)
__builtin_mul_overflow(__a, __b, __d); \
}))
-#else
-
-
-/* Checking for unsigned overflow is relatively easy without causing UB. */
-#define __unsigned_add_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = __a + __b; \
- *__d < __a; \
-})
-#define __unsigned_sub_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = __a - __b; \
- __a < __b; \
-})
-/*
- * If one of a or b is a compile-time constant, this avoids a division.
- */
-#define __unsigned_mul_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = __a * __b; \
- __builtin_constant_p(__b) ? \
- __b > 0 && __a > type_max(typeof(__a)) / __b : \
- __a > 0 && __b > type_max(typeof(__b)) / __a; \
-})
-
-/*
- * For signed types, detecting overflow is much harder, especially if
- * we want to avoid UB. But the interface of these macros is such that
- * we must provide a result in *d, and in fact we must produce the
- * result promised by gcc's builtins, which is simply the possibly
- * wrapped-around value. Fortunately, we can just formally do the
- * operations in the widest relevant unsigned type (u64) and then
- * truncate the result - gcc is smart enough to generate the same code
- * with and without the (u64) casts.
- */
-
-/*
- * Adding two signed integers can overflow only if they have the same
- * sign, and overflow has happened iff the result has the opposite
- * sign.
- */
-#define __signed_add_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = (u64)__a + (u64)__b; \
- (((~(__a ^ __b)) & (*__d ^ __a)) \
- & type_min(typeof(__a))) != 0; \
-})
-
-/*
- * Subtraction is similar, except that overflow can now happen only
- * when the signs are opposite. In this case, overflow has happened if
- * the result has the opposite sign of a.
- */
-#define __signed_sub_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = (u64)__a - (u64)__b; \
- ((((__a ^ __b)) & (*__d ^ __a)) \
- & type_min(typeof(__a))) != 0; \
-})
-
-/*
- * Signed multiplication is rather hard. gcc always follows C99, so
- * division is truncated towards 0. This means that we can write the
- * overflow check like this:
- *
- * (a > 0 && (b > MAX/a || b < MIN/a)) ||
- * (a < -1 && (b > MIN/a || b < MAX/a) ||
- * (a == -1 && b == MIN)
- *
- * The redundant casts of -1 are to silence an annoying -Wtype-limits
- * (included in -Wextra) warning: When the type is u8 or u16, the
- * __b_c_e in check_mul_overflow obviously selects
- * __unsigned_mul_overflow, but unfortunately gcc still parses this
- * code and warns about the limited range of __b.
- */
-
-#define __signed_mul_overflow(a, b, d) ({ \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- typeof(d) __d = (d); \
- typeof(a) __tmax = type_max(typeof(a)); \
- typeof(a) __tmin = type_min(typeof(a)); \
- (void) (&__a == &__b); \
- (void) (&__a == __d); \
- *__d = (u64)__a * (u64)__b; \
- (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \
- (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \
- (__b == (typeof(__b))-1 && __a == __tmin); \
-})
-
-
-#define check_add_overflow(a, b, d) __must_check_overflow( \
- __builtin_choose_expr(is_signed_type(typeof(a)), \
- __signed_add_overflow(a, b, d), \
- __unsigned_add_overflow(a, b, d)))
-
-#define check_sub_overflow(a, b, d) __must_check_overflow( \
- __builtin_choose_expr(is_signed_type(typeof(a)), \
- __signed_sub_overflow(a, b, d), \
- __unsigned_sub_overflow(a, b, d)))
-
-#define check_mul_overflow(a, b, d) __must_check_overflow( \
- __builtin_choose_expr(is_signed_type(typeof(a)), \
- __signed_mul_overflow(a, b, d), \
- __unsigned_mul_overflow(a, b, d)))
-
-#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
-
/** check_shl_overflow() - Calculate a left-shifted value and check overflow
*
* @a: Value to be shifted
diff --git a/include/linux/packing.h b/include/linux/packing.h
index 54667735cc67..8d6571feb95d 100644
--- a/include/linux/packing.h
+++ b/include/linux/packing.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright (c) 2016-2018, NXP Semiconductors
+ * Copyright 2016-2018 NXP
* Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
*/
#ifndef _LINUX_PACKING_H
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a558d67ee86f..d8623d6e1141 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -143,6 +143,8 @@ enum pageflags {
#endif
__NR_PAGEFLAGS,
+ PG_readahead = PG_reclaim,
+
/* Filesystems */
PG_checked = PG_owner_priv_1,
@@ -171,6 +173,15 @@ enum pageflags {
/* Compound pages. Stored in first tail page's flags */
PG_double_map = PG_workingset,
+#ifdef CONFIG_MEMORY_FAILURE
+ /*
+ * Compound pages. Stored in first tail page's flags.
+ * Indicates that at least one subpage is hwpoisoned in the
+ * THP.
+ */
+ PG_has_hwpoisoned = PG_mappedtodisk,
+#endif
+
/* non-lru isolated movable page */
PG_isolated = PG_reclaim,
@@ -193,6 +204,34 @@ static inline unsigned long _compound_head(const struct page *page)
#define compound_head(page) ((typeof(page))_compound_head(page))
+/**
+ * page_folio - Converts from page to folio.
+ * @p: The page.
+ *
+ * Every page is part of a folio. This function cannot be called on a
+ * NULL pointer.
+ *
+ * Context: No reference, nor lock is required on @page. If the caller
+ * does not hold a reference, this call may race with a folio split, so
+ * it should re-check the folio still contains this page after gaining
+ * a reference on the folio.
+ * Return: The folio which contains this page.
+ */
+#define page_folio(p) (_Generic((p), \
+ const struct page *: (const struct folio *)_compound_head(p), \
+ struct page *: (struct folio *)_compound_head(p)))
+
+/**
+ * folio_page - Return a page from a folio.
+ * @folio: The folio.
+ * @n: The page number to return.
+ *
+ * @n is relative to the start of the folio. This function does not
+ * check that the page number lies within @folio; the caller is presumed
+ * to have a reference to the page.
+ */
+#define folio_page(folio, n) nth_page(&(folio)->page, n)
+
static __always_inline int PageTail(struct page *page)
{
return READ_ONCE(page->compound_head) & 1;
@@ -217,6 +256,15 @@ static inline void page_init_poison(struct page *page, size_t size)
}
#endif
+static unsigned long *folio_flags(struct folio *folio, unsigned n)
+{
+ struct page *page = &folio->page;
+
+ VM_BUG_ON_PGFLAGS(PageTail(page), page);
+ VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page);
+ return &page[n].flags;
+}
+
/*
* Page flags policies wrt compound pages
*
@@ -261,36 +309,64 @@ static inline void page_init_poison(struct page *page, size_t size)
VM_BUG_ON_PGFLAGS(!PageHead(page), page); \
PF_POISONED_CHECK(&page[1]); })
+/* Which page is the flag stored in */
+#define FOLIO_PF_ANY 0
+#define FOLIO_PF_HEAD 0
+#define FOLIO_PF_ONLY_HEAD 0
+#define FOLIO_PF_NO_TAIL 0
+#define FOLIO_PF_NO_COMPOUND 0
+#define FOLIO_PF_SECOND 1
+
/*
* Macros to create function definitions for page flags
*/
#define TESTPAGEFLAG(uname, lname, policy) \
+static __always_inline bool folio_test_##lname(struct folio *folio) \
+{ return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline int Page##uname(struct page *page) \
- { return test_bit(PG_##lname, &policy(page, 0)->flags); }
+{ return test_bit(PG_##lname, &policy(page, 0)->flags); }
#define SETPAGEFLAG(uname, lname, policy) \
+static __always_inline \
+void folio_set_##lname(struct folio *folio) \
+{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline void SetPage##uname(struct page *page) \
- { set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ set_bit(PG_##lname, &policy(page, 1)->flags); }
#define CLEARPAGEFLAG(uname, lname, policy) \
+static __always_inline \
+void folio_clear_##lname(struct folio *folio) \
+{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline void ClearPage##uname(struct page *page) \
- { clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ clear_bit(PG_##lname, &policy(page, 1)->flags); }
#define __SETPAGEFLAG(uname, lname, policy) \
+static __always_inline \
+void __folio_set_##lname(struct folio *folio) \
+{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline void __SetPage##uname(struct page *page) \
- { __set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ __set_bit(PG_##lname, &policy(page, 1)->flags); }
#define __CLEARPAGEFLAG(uname, lname, policy) \
+static __always_inline \
+void __folio_clear_##lname(struct folio *folio) \
+{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline void __ClearPage##uname(struct page *page) \
- { __clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ __clear_bit(PG_##lname, &policy(page, 1)->flags); }
#define TESTSETFLAG(uname, lname, policy) \
+static __always_inline \
+bool folio_test_set_##lname(struct folio *folio) \
+{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline int TestSetPage##uname(struct page *page) \
- { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
#define TESTCLEARFLAG(uname, lname, policy) \
+static __always_inline \
+bool folio_test_clear_##lname(struct folio *folio) \
+{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline int TestClearPage##uname(struct page *page) \
- { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
#define PAGEFLAG(uname, lname, policy) \
TESTPAGEFLAG(uname, lname, policy) \
@@ -306,29 +382,37 @@ static __always_inline int TestClearPage##uname(struct page *page) \
TESTSETFLAG(uname, lname, policy) \
TESTCLEARFLAG(uname, lname, policy)
-#define TESTPAGEFLAG_FALSE(uname) \
+#define TESTPAGEFLAG_FALSE(uname, lname) \
+static inline bool folio_test_##lname(const struct folio *folio) { return 0; } \
static inline int Page##uname(const struct page *page) { return 0; }
-#define SETPAGEFLAG_NOOP(uname) \
+#define SETPAGEFLAG_NOOP(uname, lname) \
+static inline void folio_set_##lname(struct folio *folio) { } \
static inline void SetPage##uname(struct page *page) { }
-#define CLEARPAGEFLAG_NOOP(uname) \
+#define CLEARPAGEFLAG_NOOP(uname, lname) \
+static inline void folio_clear_##lname(struct folio *folio) { } \
static inline void ClearPage##uname(struct page *page) { }
-#define __CLEARPAGEFLAG_NOOP(uname) \
+#define __CLEARPAGEFLAG_NOOP(uname, lname) \
+static inline void __folio_clear_##lname(struct folio *folio) { } \
static inline void __ClearPage##uname(struct page *page) { }
-#define TESTSETFLAG_FALSE(uname) \
+#define TESTSETFLAG_FALSE(uname, lname) \
+static inline bool folio_test_set_##lname(struct folio *folio) \
+{ return 0; } \
static inline int TestSetPage##uname(struct page *page) { return 0; }
-#define TESTCLEARFLAG_FALSE(uname) \
+#define TESTCLEARFLAG_FALSE(uname, lname) \
+static inline bool folio_test_clear_##lname(struct folio *folio) \
+{ return 0; } \
static inline int TestClearPage##uname(struct page *page) { return 0; }
-#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \
- SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname)
+#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \
+ SETPAGEFLAG_NOOP(uname, lname) CLEARPAGEFLAG_NOOP(uname, lname)
-#define TESTSCFLAG_FALSE(uname) \
- TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
+#define TESTSCFLAG_FALSE(uname, lname) \
+ TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname)
__PAGEFLAG(Locked, locked, PF_NO_TAIL)
PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
@@ -384,8 +468,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL)
/* PG_readahead is only used for reads; PG_reclaim is only for writes */
PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL)
TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL)
-PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND)
- TESTCLEARFLAG(Readahead, reclaim, PF_NO_COMPOUND)
+PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND)
+ TESTCLEARFLAG(Readahead, readahead, PF_NO_COMPOUND)
#ifdef CONFIG_HIGHMEM
/*
@@ -394,22 +478,25 @@ PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND)
*/
#define PageHighMem(__p) is_highmem_idx(page_zonenum(__p))
#else
-PAGEFLAG_FALSE(HighMem)
+PAGEFLAG_FALSE(HighMem, highmem)
#endif
#ifdef CONFIG_SWAP
-static __always_inline int PageSwapCache(struct page *page)
+static __always_inline bool folio_test_swapcache(struct folio *folio)
{
-#ifdef CONFIG_THP_SWAP
- page = compound_head(page);
-#endif
- return PageSwapBacked(page) && test_bit(PG_swapcache, &page->flags);
+ return folio_test_swapbacked(folio) &&
+ test_bit(PG_swapcache, folio_flags(folio, 0));
+}
+static __always_inline bool PageSwapCache(struct page *page)
+{
+ return folio_test_swapcache(page_folio(page));
}
+
SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
#else
-PAGEFLAG_FALSE(SwapCache)
+PAGEFLAG_FALSE(SwapCache, swapcache)
#endif
PAGEFLAG(Unevictable, unevictable, PF_HEAD)
@@ -421,14 +508,14 @@ PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
__CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL)
#else
-PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked)
- TESTSCFLAG_FALSE(Mlocked)
+PAGEFLAG_FALSE(Mlocked, mlocked) __CLEARPAGEFLAG_NOOP(Mlocked, mlocked)
+ TESTSCFLAG_FALSE(Mlocked, mlocked)
#endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND)
#else
-PAGEFLAG_FALSE(Uncached)
+PAGEFLAG_FALSE(Uncached, uncached)
#endif
#ifdef CONFIG_MEMORY_FAILURE
@@ -437,7 +524,7 @@ TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
#define __PG_HWPOISON (1UL << PG_hwpoison)
extern bool take_page_off_buddy(struct page *page);
#else
-PAGEFLAG_FALSE(HWPoison)
+PAGEFLAG_FALSE(HWPoison, hwpoison)
#define __PG_HWPOISON 0
#endif
@@ -451,7 +538,7 @@ PAGEFLAG(Idle, idle, PF_ANY)
#ifdef CONFIG_KASAN_HW_TAGS
PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD)
#else
-PAGEFLAG_FALSE(SkipKASanPoison)
+PAGEFLAG_FALSE(SkipKASanPoison, skip_kasan_poison)
#endif
/*
@@ -489,10 +576,14 @@ static __always_inline int PageMappingFlags(struct page *page)
return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0;
}
-static __always_inline int PageAnon(struct page *page)
+static __always_inline bool folio_test_anon(struct folio *folio)
+{
+ return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0;
+}
+
+static __always_inline bool PageAnon(struct page *page)
{
- page = compound_head(page);
- return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
+ return folio_test_anon(page_folio(page));
}
static __always_inline int __PageMovable(struct page *page)
@@ -508,30 +599,32 @@ static __always_inline int __PageMovable(struct page *page)
* is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any
* anon_vma, but to that page's node of the stable tree.
*/
-static __always_inline int PageKsm(struct page *page)
+static __always_inline bool folio_test_ksm(struct folio *folio)
{
- page = compound_head(page);
- return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
+ return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) ==
PAGE_MAPPING_KSM;
}
+
+static __always_inline bool PageKsm(struct page *page)
+{
+ return folio_test_ksm(page_folio(page));
+}
#else
-TESTPAGEFLAG_FALSE(Ksm)
+TESTPAGEFLAG_FALSE(Ksm, ksm)
#endif
u64 stable_page_flags(struct page *page);
-static inline int PageUptodate(struct page *page)
+static inline bool folio_test_uptodate(struct folio *folio)
{
- int ret;
- page = compound_head(page);
- ret = test_bit(PG_uptodate, &(page)->flags);
+ bool ret = test_bit(PG_uptodate, folio_flags(folio, 0));
/*
- * Must ensure that the data we read out of the page is loaded
- * _after_ we've loaded page->flags to check for PageUptodate.
- * We can skip the barrier if the page is not uptodate, because
+ * Must ensure that the data we read out of the folio is loaded
+ * _after_ we've loaded folio->flags to check the uptodate bit.
+ * We can skip the barrier if the folio is not uptodate, because
* we wouldn't be reading anything from it.
*
- * See SetPageUptodate() for the other side of the story.
+ * See folio_mark_uptodate() for the other side of the story.
*/
if (ret)
smp_rmb();
@@ -539,47 +632,71 @@ static inline int PageUptodate(struct page *page)
return ret;
}
-static __always_inline void __SetPageUptodate(struct page *page)
+static inline int PageUptodate(struct page *page)
+{
+ return folio_test_uptodate(page_folio(page));
+}
+
+static __always_inline void __folio_mark_uptodate(struct folio *folio)
{
- VM_BUG_ON_PAGE(PageTail(page), page);
smp_wmb();
- __set_bit(PG_uptodate, &page->flags);
+ __set_bit(PG_uptodate, folio_flags(folio, 0));
}
-static __always_inline void SetPageUptodate(struct page *page)
+static __always_inline void folio_mark_uptodate(struct folio *folio)
{
- VM_BUG_ON_PAGE(PageTail(page), page);
/*
* Memory barrier must be issued before setting the PG_uptodate bit,
- * so that all previous stores issued in order to bring the page
- * uptodate are actually visible before PageUptodate becomes true.
+ * so that all previous stores issued in order to bring the folio
+ * uptodate are actually visible before folio_test_uptodate becomes true.
*/
smp_wmb();
- set_bit(PG_uptodate, &page->flags);
+ set_bit(PG_uptodate, folio_flags(folio, 0));
+}
+
+static __always_inline void __SetPageUptodate(struct page *page)
+{
+ __folio_mark_uptodate((struct folio *)page);
+}
+
+static __always_inline void SetPageUptodate(struct page *page)
+{
+ folio_mark_uptodate((struct folio *)page);
}
CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL)
-int test_clear_page_writeback(struct page *page);
-int __test_set_page_writeback(struct page *page, bool keep_write);
+bool __folio_start_writeback(struct folio *folio, bool keep_write);
+bool set_page_writeback(struct page *page);
-#define test_set_page_writeback(page) \
- __test_set_page_writeback(page, false)
-#define test_set_page_writeback_keepwrite(page) \
- __test_set_page_writeback(page, true)
+#define folio_start_writeback(folio) \
+ __folio_start_writeback(folio, false)
+#define folio_start_writeback_keepwrite(folio) \
+ __folio_start_writeback(folio, true)
-static inline void set_page_writeback(struct page *page)
+static inline void set_page_writeback_keepwrite(struct page *page)
{
- test_set_page_writeback(page);
+ folio_start_writeback_keepwrite(page_folio(page));
}
-static inline void set_page_writeback_keepwrite(struct page *page)
+static inline bool test_set_page_writeback(struct page *page)
{
- test_set_page_writeback_keepwrite(page);
+ return set_page_writeback(page);
}
__PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY)
+/* Whether there are one or multiple pages in a folio */
+static inline bool folio_test_single(struct folio *folio)
+{
+ return !folio_test_head(folio);
+}
+
+static inline bool folio_test_multi(struct folio *folio)
+{
+ return folio_test_head(folio);
+}
+
static __always_inline void set_compound_head(struct page *page, struct page *head)
{
WRITE_ONCE(page->compound_head, (unsigned long)head + 1);
@@ -603,12 +720,15 @@ static inline void ClearPageCompound(struct page *page)
#ifdef CONFIG_HUGETLB_PAGE
int PageHuge(struct page *page);
int PageHeadHuge(struct page *page);
+static inline bool folio_test_hugetlb(struct folio *folio)
+{
+ return PageHeadHuge(&folio->page);
+}
#else
-TESTPAGEFLAG_FALSE(Huge)
-TESTPAGEFLAG_FALSE(HeadHuge)
+TESTPAGEFLAG_FALSE(Huge, hugetlb)
+TESTPAGEFLAG_FALSE(HeadHuge, headhuge)
#endif
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* PageHuge() only returns true for hugetlbfs pages, but not for
@@ -624,6 +744,11 @@ static inline int PageTransHuge(struct page *page)
return PageHead(page);
}
+static inline bool folio_test_transhuge(struct folio *folio)
+{
+ return folio_test_head(folio);
+}
+
/*
* PageTransCompound returns true for both transparent huge pages
* and hugetlbfs pages, so it should only be called when it's known
@@ -660,12 +785,26 @@ static inline int PageTransTail(struct page *page)
PAGEFLAG(DoubleMap, double_map, PF_SECOND)
TESTSCFLAG(DoubleMap, double_map, PF_SECOND)
#else
-TESTPAGEFLAG_FALSE(TransHuge)
-TESTPAGEFLAG_FALSE(TransCompound)
-TESTPAGEFLAG_FALSE(TransCompoundMap)
-TESTPAGEFLAG_FALSE(TransTail)
-PAGEFLAG_FALSE(DoubleMap)
- TESTSCFLAG_FALSE(DoubleMap)
+TESTPAGEFLAG_FALSE(TransHuge, transhuge)
+TESTPAGEFLAG_FALSE(TransCompound, transcompound)
+TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap)
+TESTPAGEFLAG_FALSE(TransTail, transtail)
+PAGEFLAG_FALSE(DoubleMap, double_map)
+ TESTSCFLAG_FALSE(DoubleMap, double_map)
+#endif
+
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+/*
+ * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the
+ * compound page.
+ *
+ * This flag is set by hwpoison handler. Cleared by THP split or free page.
+ */
+PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
+ TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
+#else
+PAGEFLAG_FALSE(HasHWPoisoned)
+ TESTSCFLAG_FALSE(HasHWPoisoned)
#endif
/*
@@ -849,6 +988,11 @@ static inline int page_has_private(struct page *page)
return !!(page->flags & PAGE_FLAGS_PRIVATE);
}
+static inline bool folio_has_private(struct folio *folio)
+{
+ return page_has_private(&folio->page);
+}
+
#undef PF_ANY
#undef PF_HEAD
#undef PF_ONLY_HEAD
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
index d8a6aecf99cb..83abf95e9fa7 100644
--- a/include/linux/page_idle.h
+++ b/include/linux/page_idle.h
@@ -8,46 +8,16 @@
#ifdef CONFIG_PAGE_IDLE_FLAG
-#ifdef CONFIG_64BIT
-static inline bool page_is_young(struct page *page)
-{
- return PageYoung(page);
-}
-
-static inline void set_page_young(struct page *page)
-{
- SetPageYoung(page);
-}
-
-static inline bool test_and_clear_page_young(struct page *page)
-{
- return TestClearPageYoung(page);
-}
-
-static inline bool page_is_idle(struct page *page)
-{
- return PageIdle(page);
-}
-
-static inline void set_page_idle(struct page *page)
-{
- SetPageIdle(page);
-}
-
-static inline void clear_page_idle(struct page *page)
-{
- ClearPageIdle(page);
-}
-#else /* !CONFIG_64BIT */
+#ifndef CONFIG_64BIT
/*
* If there is not enough space to store Idle and Young bits in page flags, use
* page ext flags instead.
*/
extern struct page_ext_operations page_idle_ops;
-static inline bool page_is_young(struct page *page)
+static inline bool folio_test_young(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return false;
@@ -55,9 +25,9 @@ static inline bool page_is_young(struct page *page)
return test_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
-static inline void set_page_young(struct page *page)
+static inline void folio_set_young(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return;
@@ -65,9 +35,9 @@ static inline void set_page_young(struct page *page)
set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
-static inline bool test_and_clear_page_young(struct page *page)
+static inline bool folio_test_clear_young(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return false;
@@ -75,9 +45,9 @@ static inline bool test_and_clear_page_young(struct page *page)
return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
-static inline bool page_is_idle(struct page *page)
+static inline bool folio_test_idle(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return false;
@@ -85,9 +55,9 @@ static inline bool page_is_idle(struct page *page)
return test_bit(PAGE_EXT_IDLE, &page_ext->flags);
}
-static inline void set_page_idle(struct page *page)
+static inline void folio_set_idle(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return;
@@ -95,46 +65,75 @@ static inline void set_page_idle(struct page *page)
set_bit(PAGE_EXT_IDLE, &page_ext->flags);
}
-static inline void clear_page_idle(struct page *page)
+static inline void folio_clear_idle(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return;
clear_bit(PAGE_EXT_IDLE, &page_ext->flags);
}
-#endif /* CONFIG_64BIT */
+#endif /* !CONFIG_64BIT */
#else /* !CONFIG_PAGE_IDLE_FLAG */
-static inline bool page_is_young(struct page *page)
+static inline bool folio_test_young(struct folio *folio)
{
return false;
}
-static inline void set_page_young(struct page *page)
+static inline void folio_set_young(struct folio *folio)
{
}
-static inline bool test_and_clear_page_young(struct page *page)
+static inline bool folio_test_clear_young(struct folio *folio)
{
return false;
}
-static inline bool page_is_idle(struct page *page)
+static inline bool folio_test_idle(struct folio *folio)
{
return false;
}
-static inline void set_page_idle(struct page *page)
+static inline void folio_set_idle(struct folio *folio)
{
}
-static inline void clear_page_idle(struct page *page)
+static inline void folio_clear_idle(struct folio *folio)
{
}
#endif /* CONFIG_PAGE_IDLE_FLAG */
+static inline bool page_is_young(struct page *page)
+{
+ return folio_test_young(page_folio(page));
+}
+
+static inline void set_page_young(struct page *page)
+{
+ folio_set_young(page_folio(page));
+}
+
+static inline bool test_and_clear_page_young(struct page *page)
+{
+ return folio_test_clear_young(page_folio(page));
+}
+
+static inline bool page_is_idle(struct page *page)
+{
+ return folio_test_idle(page_folio(page));
+}
+
+static inline void set_page_idle(struct page *page)
+{
+ folio_set_idle(page_folio(page));
+}
+
+static inline void clear_page_idle(struct page *page)
+{
+ folio_clear_idle(page_folio(page));
+}
#endif /* _LINUX_MM_PAGE_IDLE_H */
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 719bfe5108c5..43c638c51c1f 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -12,7 +12,7 @@ extern void __reset_page_owner(struct page *page, unsigned int order);
extern void __set_page_owner(struct page *page,
unsigned int order, gfp_t gfp_mask);
extern void __split_page_owner(struct page *page, unsigned int nr);
-extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
+extern void __folio_copy_owner(struct folio *newfolio, struct folio *old);
extern void __set_page_owner_migrate_reason(struct page *page, int reason);
extern void __dump_page_owner(const struct page *page);
extern void pagetypeinfo_showmixedcount_print(struct seq_file *m,
@@ -36,10 +36,10 @@ static inline void split_page_owner(struct page *page, unsigned int nr)
if (static_branch_unlikely(&page_owner_inited))
__split_page_owner(page, nr);
}
-static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
+static inline void folio_copy_owner(struct folio *newfolio, struct folio *old)
{
if (static_branch_unlikely(&page_owner_inited))
- __copy_page_owner(oldpage, newpage);
+ __folio_copy_owner(newfolio, old);
}
static inline void set_page_owner_migrate_reason(struct page *page, int reason)
{
@@ -63,7 +63,7 @@ static inline void split_page_owner(struct page *page,
unsigned int order)
{
}
-static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
+static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio)
{
}
static inline void set_page_owner_migrate_reason(struct page *page, int reason)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 7ad46f45df39..2e677e6ad09f 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -67,9 +67,31 @@ static inline int page_ref_count(const struct page *page)
return atomic_read(&page->_refcount);
}
+/**
+ * folio_ref_count - The reference count on this folio.
+ * @folio: The folio.
+ *
+ * The refcount is usually incremented by calls to folio_get() and
+ * decremented by calls to folio_put(). Some typical users of the
+ * folio refcount:
+ *
+ * - Each reference from a page table
+ * - The page cache
+ * - Filesystem private data
+ * - The LRU list
+ * - Pipes
+ * - Direct IO which references this page in the process address space
+ *
+ * Return: The number of references to this folio.
+ */
+static inline int folio_ref_count(const struct folio *folio)
+{
+ return page_ref_count(&folio->page);
+}
+
static inline int page_count(const struct page *page)
{
- return atomic_read(&compound_head(page)->_refcount);
+ return folio_ref_count(page_folio(page));
}
static inline void set_page_count(struct page *page, int v)
@@ -79,6 +101,11 @@ static inline void set_page_count(struct page *page, int v)
__page_ref_set(page, v);
}
+static inline void folio_set_count(struct folio *folio, int v)
+{
+ set_page_count(&folio->page, v);
+}
+
/*
* Setup the page count before being freed into the page allocator for
* the first time (boot or memory hotplug)
@@ -95,6 +122,11 @@ static inline void page_ref_add(struct page *page, int nr)
__page_ref_mod(page, nr);
}
+static inline void folio_ref_add(struct folio *folio, int nr)
+{
+ page_ref_add(&folio->page, nr);
+}
+
static inline void page_ref_sub(struct page *page, int nr)
{
atomic_sub(nr, &page->_refcount);
@@ -102,6 +134,11 @@ static inline void page_ref_sub(struct page *page, int nr)
__page_ref_mod(page, -nr);
}
+static inline void folio_ref_sub(struct folio *folio, int nr)
+{
+ page_ref_sub(&folio->page, nr);
+}
+
static inline int page_ref_sub_return(struct page *page, int nr)
{
int ret = atomic_sub_return(nr, &page->_refcount);
@@ -111,6 +148,11 @@ static inline int page_ref_sub_return(struct page *page, int nr)
return ret;
}
+static inline int folio_ref_sub_return(struct folio *folio, int nr)
+{
+ return page_ref_sub_return(&folio->page, nr);
+}
+
static inline void page_ref_inc(struct page *page)
{
atomic_inc(&page->_refcount);
@@ -118,6 +160,11 @@ static inline void page_ref_inc(struct page *page)
__page_ref_mod(page, 1);
}
+static inline void folio_ref_inc(struct folio *folio)
+{
+ page_ref_inc(&folio->page);
+}
+
static inline void page_ref_dec(struct page *page)
{
atomic_dec(&page->_refcount);
@@ -125,6 +172,11 @@ static inline void page_ref_dec(struct page *page)
__page_ref_mod(page, -1);
}
+static inline void folio_ref_dec(struct folio *folio)
+{
+ page_ref_dec(&folio->page);
+}
+
static inline int page_ref_sub_and_test(struct page *page, int nr)
{
int ret = atomic_sub_and_test(nr, &page->_refcount);
@@ -134,6 +186,11 @@ static inline int page_ref_sub_and_test(struct page *page, int nr)
return ret;
}
+static inline int folio_ref_sub_and_test(struct folio *folio, int nr)
+{
+ return page_ref_sub_and_test(&folio->page, nr);
+}
+
static inline int page_ref_inc_return(struct page *page)
{
int ret = atomic_inc_return(&page->_refcount);
@@ -143,6 +200,11 @@ static inline int page_ref_inc_return(struct page *page)
return ret;
}
+static inline int folio_ref_inc_return(struct folio *folio)
+{
+ return page_ref_inc_return(&folio->page);
+}
+
static inline int page_ref_dec_and_test(struct page *page)
{
int ret = atomic_dec_and_test(&page->_refcount);
@@ -152,6 +214,11 @@ static inline int page_ref_dec_and_test(struct page *page)
return ret;
}
+static inline int folio_ref_dec_and_test(struct folio *folio)
+{
+ return page_ref_dec_and_test(&folio->page);
+}
+
static inline int page_ref_dec_return(struct page *page)
{
int ret = atomic_dec_return(&page->_refcount);
@@ -161,15 +228,91 @@ static inline int page_ref_dec_return(struct page *page)
return ret;
}
-static inline int page_ref_add_unless(struct page *page, int nr, int u)
+static inline int folio_ref_dec_return(struct folio *folio)
+{
+ return page_ref_dec_return(&folio->page);
+}
+
+static inline bool page_ref_add_unless(struct page *page, int nr, int u)
{
- int ret = atomic_add_unless(&page->_refcount, nr, u);
+ bool ret = atomic_add_unless(&page->_refcount, nr, u);
if (page_ref_tracepoint_active(page_ref_mod_unless))
__page_ref_mod_unless(page, nr, ret);
return ret;
}
+static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u)
+{
+ return page_ref_add_unless(&folio->page, nr, u);
+}
+
+/**
+ * folio_try_get - Attempt to increase the refcount on a folio.
+ * @folio: The folio.
+ *
+ * If you do not already have a reference to a folio, you can attempt to
+ * get one using this function. It may fail if, for example, the folio
+ * has been freed since you found a pointer to it, or it is frozen for
+ * the purposes of splitting or migration.
+ *
+ * Return: True if the reference count was successfully incremented.
+ */
+static inline bool folio_try_get(struct folio *folio)
+{
+ return folio_ref_add_unless(folio, 1, 0);
+}
+
+static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)
+{
+#ifdef CONFIG_TINY_RCU
+ /*
+ * The caller guarantees the folio will not be freed from interrupt
+ * context, so (on !SMP) we only need preemption to be disabled
+ * and TINY_RCU does that for us.
+ */
+# ifdef CONFIG_PREEMPT_COUNT
+ VM_BUG_ON(!in_atomic() && !irqs_disabled());
+# endif
+ VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio);
+ folio_ref_add(folio, count);
+#else
+ if (unlikely(!folio_ref_add_unless(folio, count, 0))) {
+ /* Either the folio has been freed, or will be freed. */
+ return false;
+ }
+#endif
+ return true;
+}
+
+/**
+ * folio_try_get_rcu - Attempt to increase the refcount on a folio.
+ * @folio: The folio.
+ *
+ * This is a version of folio_try_get() optimised for non-SMP kernels.
+ * If you are still holding the rcu_read_lock() after looking up the
+ * page and know that the page cannot have its refcount decreased to
+ * zero in interrupt context, you can use this instead of folio_try_get().
+ *
+ * Example users include get_user_pages_fast() (as pages are not unmapped
+ * from interrupt context) and the page cache lookups (as pages are not
+ * truncated from interrupt context). We also know that pages are not
+ * frozen in interrupt context for the purposes of splitting or migration.
+ *
+ * You can also use this function if you're holding a lock that prevents
+ * pages being frozen & removed; eg the i_pages lock for the page cache
+ * or the mmap_sem or page table lock for page tables. In this case,
+ * it will always succeed, and you could have used a plain folio_get(),
+ * but it's sometimes more convenient to have a common function called
+ * from both locked and RCU-protected contexts.
+ *
+ * Return: True if the reference count was successfully incremented.
+ */
+static inline bool folio_try_get_rcu(struct folio *folio)
+{
+ return folio_ref_try_add_rcu(folio, 1);
+}
+
static inline int page_ref_freeze(struct page *page, int count)
{
int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count);
@@ -179,6 +322,11 @@ static inline int page_ref_freeze(struct page *page, int count)
return ret;
}
+static inline int folio_ref_freeze(struct folio *folio, int count)
+{
+ return page_ref_freeze(&folio->page, count);
+}
+
static inline void page_ref_unfreeze(struct page *page, int count)
{
VM_BUG_ON_PAGE(page_count(page) != 0, page);
@@ -189,4 +337,8 @@ static inline void page_ref_unfreeze(struct page *page, int count)
__page_ref_unfreeze(page, count);
}
+static inline void folio_ref_unfreeze(struct folio *folio, int count)
+{
+ page_ref_unfreeze(&folio->page, count);
+}
#endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 62db6b0176b9..013cdc90f5fd 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -162,149 +162,119 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping)
void release_pages(struct page **pages, int nr);
-/*
- * For file cache pages, return the address_space, otherwise return NULL
+struct address_space *page_mapping(struct page *);
+struct address_space *folio_mapping(struct folio *);
+struct address_space *swapcache_mapping(struct folio *);
+
+/**
+ * folio_file_mapping - Find the mapping this folio belongs to.
+ * @folio: The folio.
+ *
+ * For folios which are in the page cache, return the mapping that this
+ * page belongs to. Folios in the swap cache return the mapping of the
+ * swap file or swap device where the data is stored. This is different
+ * from the mapping returned by folio_mapping(). The only reason to
+ * use it is if, like NFS, you return 0 from ->activate_swapfile.
+ *
+ * Do not call this for folios which aren't in the page cache or swap cache.
*/
-static inline struct address_space *page_mapping_file(struct page *page)
+static inline struct address_space *folio_file_mapping(struct folio *folio)
{
- if (unlikely(PageSwapCache(page)))
- return NULL;
- return page_mapping(page);
+ if (unlikely(folio_test_swapcache(folio)))
+ return swapcache_mapping(folio);
+
+ return folio->mapping;
+}
+
+static inline struct address_space *page_file_mapping(struct page *page)
+{
+ return folio_file_mapping(page_folio(page));
}
/*
- * speculatively take a reference to a page.
- * If the page is free (_refcount == 0), then _refcount is untouched, and 0
- * is returned. Otherwise, _refcount is incremented by 1 and 1 is returned.
- *
- * This function must be called inside the same rcu_read_lock() section as has
- * been used to lookup the page in the pagecache radix-tree (or page table):
- * this allows allocators to use a synchronize_rcu() to stabilize _refcount.
- *
- * Unless an RCU grace period has passed, the count of all pages coming out
- * of the allocator must be considered unstable. page_count may return higher
- * than expected, and put_page must be able to do the right thing when the
- * page has been finished with, no matter what it is subsequently allocated
- * for (because put_page is what is used here to drop an invalid speculative
- * reference).
- *
- * This is the interesting part of the lockless pagecache (and lockless
- * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page)
- * has the following pattern:
- * 1. find page in radix tree
- * 2. conditionally increment refcount
- * 3. check the page is still in pagecache (if no, goto 1)
- *
- * Remove-side that cares about stability of _refcount (eg. reclaim) has the
- * following (with the i_pages lock held):
- * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
- * B. remove page from pagecache
- * C. free the page
- *
- * There are 2 critical interleavings that matter:
- * - 2 runs before A: in this case, A sees elevated refcount and bails out
- * - A runs before 2: in this case, 2 sees zero refcount and retries;
- * subsequently, B will complete and 1 will find no page, causing the
- * lookup to return NULL.
- *
- * It is possible that between 1 and 2, the page is removed then the exact same
- * page is inserted into the same position in pagecache. That's OK: the
- * old find_get_page using a lock could equally have run before or after
- * such a re-insertion, depending on order that locks are granted.
- *
- * Lookups racing against pagecache insertion isn't a big problem: either 1
- * will find the page or it will not. Likewise, the old find_get_page could run
- * either before the insertion or afterwards, depending on timing.
+ * For file cache pages, return the address_space, otherwise return NULL
*/
-static inline int __page_cache_add_speculative(struct page *page, int count)
+static inline struct address_space *page_mapping_file(struct page *page)
{
-#ifdef CONFIG_TINY_RCU
-# ifdef CONFIG_PREEMPT_COUNT
- VM_BUG_ON(!in_atomic() && !irqs_disabled());
-# endif
- /*
- * Preempt must be disabled here - we rely on rcu_read_lock doing
- * this for us.
- *
- * Pagecache won't be truncated from interrupt context, so if we have
- * found a page in the radix tree here, we have pinned its refcount by
- * disabling preempt, and hence no need for the "speculative get" that
- * SMP requires.
- */
- VM_BUG_ON_PAGE(page_count(page) == 0, page);
- page_ref_add(page, count);
+ struct folio *folio = page_folio(page);
-#else
- if (unlikely(!page_ref_add_unless(page, count, 0))) {
- /*
- * Either the page has been freed, or will be freed.
- * In either case, retry here and the caller should
- * do the right thing (see comments above).
- */
- return 0;
- }
-#endif
- VM_BUG_ON_PAGE(PageTail(page), page);
-
- return 1;
+ if (unlikely(folio_test_swapcache(folio)))
+ return NULL;
+ return folio_mapping(folio);
}
-static inline int page_cache_get_speculative(struct page *page)
+static inline bool page_cache_add_speculative(struct page *page, int count)
{
- return __page_cache_add_speculative(page, 1);
+ VM_BUG_ON_PAGE(PageTail(page), page);
+ return folio_ref_try_add_rcu((struct folio *)page, count);
}
-static inline int page_cache_add_speculative(struct page *page, int count)
+static inline bool page_cache_get_speculative(struct page *page)
{
- return __page_cache_add_speculative(page, count);
+ return page_cache_add_speculative(page, 1);
}
/**
- * attach_page_private - Attach private data to a page.
- * @page: Page to attach data to.
- * @data: Data to attach to page.
+ * folio_attach_private - Attach private data to a folio.
+ * @folio: Folio to attach data to.
+ * @data: Data to attach to folio.
*
- * Attaching private data to a page increments the page's reference count.
- * The data must be detached before the page will be freed.
+ * Attaching private data to a folio increments the page's reference count.
+ * The data must be detached before the folio will be freed.
*/
-static inline void attach_page_private(struct page *page, void *data)
+static inline void folio_attach_private(struct folio *folio, void *data)
{
- get_page(page);
- set_page_private(page, (unsigned long)data);
- SetPagePrivate(page);
+ folio_get(folio);
+ folio->private = data;
+ folio_set_private(folio);
}
/**
- * detach_page_private - Detach private data from a page.
- * @page: Page to detach data from.
+ * folio_detach_private - Detach private data from a folio.
+ * @folio: Folio to detach data from.
*
- * Removes the data that was previously attached to the page and decrements
+ * Removes the data that was previously attached to the folio and decrements
* the refcount on the page.
*
- * Return: Data that was attached to the page.
+ * Return: Data that was attached to the folio.
*/
-static inline void *detach_page_private(struct page *page)
+static inline void *folio_detach_private(struct folio *folio)
{
- void *data = (void *)page_private(page);
+ void *data = folio_get_private(folio);
- if (!PagePrivate(page))
+ if (!folio_test_private(folio))
return NULL;
- ClearPagePrivate(page);
- set_page_private(page, 0);
- put_page(page);
+ folio_clear_private(folio);
+ folio->private = NULL;
+ folio_put(folio);
return data;
}
+static inline void attach_page_private(struct page *page, void *data)
+{
+ folio_attach_private(page_folio(page), data);
+}
+
+static inline void *detach_page_private(struct page *page)
+{
+ return folio_detach_private(page_folio(page));
+}
+
#ifdef CONFIG_NUMA
-extern struct page *__page_cache_alloc(gfp_t gfp);
+struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
#else
-static inline struct page *__page_cache_alloc(gfp_t gfp)
+static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
{
- return alloc_pages(gfp, 0);
+ return folio_alloc(gfp, order);
}
#endif
+static inline struct page *__page_cache_alloc(gfp_t gfp)
+{
+ return &filemap_alloc_folio(gfp, 0)->page;
+}
+
static inline struct page *page_cache_alloc(struct address_space *x)
{
return __page_cache_alloc(mapping_gfp_mask(x));
@@ -331,9 +301,28 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
#define FGP_FOR_MMAP 0x00000040
#define FGP_HEAD 0x00000080
#define FGP_ENTRY 0x00000100
+#define FGP_STABLE 0x00000200
-struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
- int fgp_flags, gfp_t cache_gfp_mask);
+struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
+ int fgp_flags, gfp_t gfp);
+struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
+ int fgp_flags, gfp_t gfp);
+
+/**
+ * filemap_get_folio - Find and get a folio.
+ * @mapping: The address_space to search.
+ * @index: The page index.
+ *
+ * Looks up the page cache entry at @mapping & @index. If a folio is
+ * present, it is returned with an increased refcount.
+ *
+ * Otherwise, %NULL is returned.
+ */
+static inline struct folio *filemap_get_folio(struct address_space *mapping,
+ pgoff_t index)
+{
+ return __filemap_get_folio(mapping, index, 0, 0);
+}
/**
* find_get_page - find and get a page reference
@@ -377,25 +366,6 @@ static inline struct page *find_lock_page(struct address_space *mapping,
}
/**
- * find_lock_head - Locate, pin and lock a pagecache page.
- * @mapping: The address_space to search.
- * @index: The page index.
- *
- * Looks up the page cache entry at @mapping & @index. If there is a
- * page cache page, its head page is returned locked and with an increased
- * refcount.
- *
- * Context: May sleep.
- * Return: A struct page which is !PageTail, or %NULL if there is no page
- * in the cache for this index.
- */
-static inline struct page *find_lock_head(struct address_space *mapping,
- pgoff_t index)
-{
- return pagecache_get_page(mapping, index, FGP_LOCK | FGP_HEAD, 0);
-}
-
-/**
* find_or_create_page - locate or add a pagecache page
* @mapping: the page's address_space
* @index: the page's index into the mapping
@@ -452,6 +422,73 @@ static inline bool thp_contains(struct page *head, pgoff_t index)
return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL));
}
+#define swapcache_index(folio) __page_file_index(&(folio)->page)
+
+/**
+ * folio_index - File index of a folio.
+ * @folio: The folio.
+ *
+ * For a folio which is either in the page cache or the swap cache,
+ * return its index within the address_space it belongs to. If you know
+ * the page is definitely in the page cache, you can look at the folio's
+ * index directly.
+ *
+ * Return: The index (offset in units of pages) of a folio in its file.
+ */
+static inline pgoff_t folio_index(struct folio *folio)
+{
+ if (unlikely(folio_test_swapcache(folio)))
+ return swapcache_index(folio);
+ return folio->index;
+}
+
+/**
+ * folio_next_index - Get the index of the next folio.
+ * @folio: The current folio.
+ *
+ * Return: The index of the folio which follows this folio in the file.
+ */
+static inline pgoff_t folio_next_index(struct folio *folio)
+{
+ return folio->index + folio_nr_pages(folio);
+}
+
+/**
+ * folio_file_page - The page for a particular index.
+ * @folio: The folio which contains this index.
+ * @index: The index we want to look up.
+ *
+ * Sometimes after looking up a folio in the page cache, we need to
+ * obtain the specific page for an index (eg a page fault).
+ *
+ * Return: The page containing the file data for this index.
+ */
+static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
+{
+ /* HugeTLBfs indexes the page cache in units of hpage_size */
+ if (folio_test_hugetlb(folio))
+ return &folio->page;
+ return folio_page(folio, index & (folio_nr_pages(folio) - 1));
+}
+
+/**
+ * folio_contains - Does this folio contain this index?
+ * @folio: The folio.
+ * @index: The page index within the file.
+ *
+ * Context: The caller should have the page locked in order to prevent
+ * (eg) shmem from moving the page between the page cache and swap cache
+ * and changing its index in the middle of the operation.
+ * Return: true or false.
+ */
+static inline bool folio_contains(struct folio *folio, pgoff_t index)
+{
+ /* HugeTLBfs indexes the page cache in units of hpage_size */
+ if (folio_test_hugetlb(folio))
+ return folio->index == index;
+ return index - folio_index(folio) < folio_nr_pages(folio);
+}
+
/*
* Given the page we found in the page cache, return the page corresponding
* to this index in the file
@@ -560,6 +597,27 @@ static inline loff_t page_file_offset(struct page *page)
return ((loff_t)page_index(page)) << PAGE_SHIFT;
}
+/**
+ * folio_pos - Returns the byte position of this folio in its file.
+ * @folio: The folio.
+ */
+static inline loff_t folio_pos(struct folio *folio)
+{
+ return page_offset(&folio->page);
+}
+
+/**
+ * folio_file_pos - Returns the byte position of this folio in its file.
+ * @folio: The folio.
+ *
+ * This differs from folio_pos() for folios which belong to a swap file.
+ * NFS is the only filesystem today which needs to use folio_file_pos().
+ */
+static inline loff_t folio_file_pos(struct folio *folio)
+{
+ return page_file_offset(&folio->page);
+}
+
extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
unsigned long address);
@@ -575,13 +633,13 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
}
struct wait_page_key {
- struct page *page;
+ struct folio *folio;
int bit_nr;
int page_match;
};
struct wait_page_queue {
- struct page *page;
+ struct folio *folio;
int bit_nr;
wait_queue_entry_t wait;
};
@@ -589,7 +647,7 @@ struct wait_page_queue {
static inline bool wake_page_match(struct wait_page_queue *wait_page,
struct wait_page_key *key)
{
- if (wait_page->page != key->page)
+ if (wait_page->folio != key->folio)
return false;
key->page_match = 1;
@@ -599,20 +657,31 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page,
return true;
}
-extern void __lock_page(struct page *page);
-extern int __lock_page_killable(struct page *page);
-extern int __lock_page_async(struct page *page, struct wait_page_queue *wait);
-extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
+void __folio_lock(struct folio *folio);
+int __folio_lock_killable(struct folio *folio);
+bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
unsigned int flags);
-extern void unlock_page(struct page *page);
+void unlock_page(struct page *page);
+void folio_unlock(struct folio *folio);
+
+static inline bool folio_trylock(struct folio *folio)
+{
+ return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0)));
+}
/*
* Return true if the page was successfully locked
*/
static inline int trylock_page(struct page *page)
{
- page = compound_head(page);
- return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
+ return folio_trylock(page_folio(page));
+}
+
+static inline void folio_lock(struct folio *folio)
+{
+ might_sleep();
+ if (!folio_trylock(folio))
+ __folio_lock(folio);
}
/*
@@ -620,38 +689,30 @@ static inline int trylock_page(struct page *page)
*/
static inline void lock_page(struct page *page)
{
+ struct folio *folio;
might_sleep();
- if (!trylock_page(page))
- __lock_page(page);
+
+ folio = page_folio(page);
+ if (!folio_trylock(folio))
+ __folio_lock(folio);
}
-/*
- * lock_page_killable is like lock_page but can be interrupted by fatal
- * signals. It returns 0 if it locked the page and -EINTR if it was
- * killed while waiting.
- */
-static inline int lock_page_killable(struct page *page)
+static inline int folio_lock_killable(struct folio *folio)
{
might_sleep();
- if (!trylock_page(page))
- return __lock_page_killable(page);
+ if (!folio_trylock(folio))
+ return __folio_lock_killable(folio);
return 0;
}
/*
- * lock_page_async - Lock the page, unless this would block. If the page
- * is already locked, then queue a callback when the page becomes unlocked.
- * This callback can then retry the operation.
- *
- * Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page
- * was already locked and the callback defined in 'wait' was queued.
+ * lock_page_killable is like lock_page but can be interrupted by fatal
+ * signals. It returns 0 if it locked the page and -EINTR if it was
+ * killed while waiting.
*/
-static inline int lock_page_async(struct page *page,
- struct wait_page_queue *wait)
+static inline int lock_page_killable(struct page *page)
{
- if (!trylock_page(page))
- return __lock_page_async(page, wait);
- return 0;
+ return folio_lock_killable(page_folio(page));
}
/*
@@ -659,78 +720,108 @@ static inline int lock_page_async(struct page *page,
* caller indicated that it can handle a retry.
*
* Return value and mmap_lock implications depend on flags; see
- * __lock_page_or_retry().
+ * __folio_lock_or_retry().
*/
-static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
+static inline bool lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags)
{
+ struct folio *folio;
might_sleep();
- return trylock_page(page) || __lock_page_or_retry(page, mm, flags);
+
+ folio = page_folio(page);
+ return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags);
}
/*
- * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc.,
+ * This is exported only for folio_wait_locked/folio_wait_writeback, etc.,
* and should not be used directly.
*/
-extern void wait_on_page_bit(struct page *page, int bit_nr);
-extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
+void folio_wait_bit(struct folio *folio, int bit_nr);
+int folio_wait_bit_killable(struct folio *folio, int bit_nr);
/*
- * Wait for a page to be unlocked.
+ * Wait for a folio to be unlocked.
*
- * This must be called with the caller "holding" the page,
- * ie with increased "page->count" so that the page won't
+ * This must be called with the caller "holding" the folio,
+ * ie with increased "page->count" so that the folio won't
* go away during the wait..
*/
+static inline void folio_wait_locked(struct folio *folio)
+{
+ if (folio_test_locked(folio))
+ folio_wait_bit(folio, PG_locked);
+}
+
+static inline int folio_wait_locked_killable(struct folio *folio)
+{
+ if (!folio_test_locked(folio))
+ return 0;
+ return folio_wait_bit_killable(folio, PG_locked);
+}
+
static inline void wait_on_page_locked(struct page *page)
{
- if (PageLocked(page))
- wait_on_page_bit(compound_head(page), PG_locked);
+ folio_wait_locked(page_folio(page));
}
static inline int wait_on_page_locked_killable(struct page *page)
{
- if (!PageLocked(page))
- return 0;
- return wait_on_page_bit_killable(compound_head(page), PG_locked);
+ return folio_wait_locked_killable(page_folio(page));
}
int put_and_wait_on_page_locked(struct page *page, int state);
void wait_on_page_writeback(struct page *page);
-int wait_on_page_writeback_killable(struct page *page);
-extern void end_page_writeback(struct page *page);
+void folio_wait_writeback(struct folio *folio);
+int folio_wait_writeback_killable(struct folio *folio);
+void end_page_writeback(struct page *page);
+void folio_end_writeback(struct folio *folio);
void wait_for_stable_page(struct page *page);
+void folio_wait_stable(struct folio *folio);
+void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn);
+static inline void __set_page_dirty(struct page *page,
+ struct address_space *mapping, int warn)
+{
+ __folio_mark_dirty(page_folio(page), mapping, warn);
+}
+void folio_account_cleaned(struct folio *folio, struct address_space *mapping,
+ struct bdi_writeback *wb);
+static inline void account_page_cleaned(struct page *page,
+ struct address_space *mapping, struct bdi_writeback *wb)
+{
+ return folio_account_cleaned(page_folio(page), mapping, wb);
+}
+void __folio_cancel_dirty(struct folio *folio);
+static inline void folio_cancel_dirty(struct folio *folio)
+{
+ /* Avoid atomic ops, locking, etc. when not actually needed. */
+ if (folio_test_dirty(folio))
+ __folio_cancel_dirty(folio);
+}
+static inline void cancel_dirty_page(struct page *page)
+{
+ folio_cancel_dirty(page_folio(page));
+}
+bool folio_clear_dirty_for_io(struct folio *folio);
+bool clear_page_dirty_for_io(struct page *page);
+int __must_check folio_write_one(struct folio *folio);
+static inline int __must_check write_one_page(struct page *page)
+{
+ return folio_write_one(page_folio(page));
+}
-void __set_page_dirty(struct page *, struct address_space *, int warn);
int __set_page_dirty_nobuffers(struct page *page);
int __set_page_dirty_no_writeback(struct page *page);
void page_endio(struct page *page, bool is_write, int err);
-/**
- * set_page_private_2 - Set PG_private_2 on a page and take a ref
- * @page: The page.
- *
- * Set the PG_private_2 flag on a page and take the reference needed for the VM
- * to handle its lifetime correctly. This sets the flag and takes the
- * reference unconditionally, so care must be taken not to set the flag again
- * if it's already set.
- */
-static inline void set_page_private_2(struct page *page)
-{
- page = compound_head(page);
- get_page(page);
- SetPagePrivate2(page);
-}
-
-void end_page_private_2(struct page *page);
-void wait_on_page_private_2(struct page *page);
-int wait_on_page_private_2_killable(struct page *page);
+void folio_end_private_2(struct folio *folio);
+void folio_wait_private_2(struct folio *folio);
+int folio_wait_private_2_killable(struct folio *folio);
/*
* Add an arbitrary waiter to a page's wait queue
*/
-extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
+void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter);
/*
* Fault everything in given userspace address range in.
@@ -790,9 +881,11 @@ static inline int fault_in_pages_readable(const char __user *uaddr, size_t size)
}
int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
- pgoff_t index, gfp_t gfp_mask);
+ pgoff_t index, gfp_t gfp);
int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
- pgoff_t index, gfp_t gfp_mask);
+ pgoff_t index, gfp_t gfp);
+int filemap_add_folio(struct address_space *mapping, struct folio *folio,
+ pgoff_t index, gfp_t gfp);
extern void delete_from_page_cache(struct page *page);
extern void __delete_from_page_cache(struct page *page, void *shadow);
void replace_page_cache_page(struct page *old, struct page *new);
@@ -817,6 +910,10 @@ static inline int add_to_page_cache(struct page *page,
return error;
}
+/* Must be non-static for BPF error injection */
+int __filemap_add_folio(struct address_space *mapping, struct folio *folio,
+ pgoff_t index, gfp_t gfp, void **shadowp);
+
/**
* struct readahead_control - Describes a readahead request.
*
@@ -906,33 +1003,57 @@ void page_cache_async_readahead(struct address_space *mapping,
page_cache_async_ra(&ractl, page, req_count);
}
+static inline struct folio *__readahead_folio(struct readahead_control *ractl)
+{
+ struct folio *folio;
+
+ BUG_ON(ractl->_batch_count > ractl->_nr_pages);
+ ractl->_nr_pages -= ractl->_batch_count;
+ ractl->_index += ractl->_batch_count;
+
+ if (!ractl->_nr_pages) {
+ ractl->_batch_count = 0;
+ return NULL;
+ }
+
+ folio = xa_load(&ractl->mapping->i_pages, ractl->_index);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ ractl->_batch_count = folio_nr_pages(folio);
+
+ return folio;
+}
+
/**
* readahead_page - Get the next page to read.
- * @rac: The current readahead request.
+ * @ractl: The current readahead request.
*
* Context: The page is locked and has an elevated refcount. The caller
* should decreases the refcount once the page has been submitted for I/O
* and unlock the page once all I/O to that page has completed.
* Return: A pointer to the next page, or %NULL if we are done.
*/
-static inline struct page *readahead_page(struct readahead_control *rac)
+static inline struct page *readahead_page(struct readahead_control *ractl)
{
- struct page *page;
-
- BUG_ON(rac->_batch_count > rac->_nr_pages);
- rac->_nr_pages -= rac->_batch_count;
- rac->_index += rac->_batch_count;
+ struct folio *folio = __readahead_folio(ractl);
- if (!rac->_nr_pages) {
- rac->_batch_count = 0;
- return NULL;
- }
+ return &folio->page;
+}
- page = xa_load(&rac->mapping->i_pages, rac->_index);
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- rac->_batch_count = thp_nr_pages(page);
+/**
+ * readahead_folio - Get the next folio to read.
+ * @ractl: The current readahead request.
+ *
+ * Context: The folio is locked. The caller should unlock the folio once
+ * all I/O to that folio has completed.
+ * Return: A pointer to the next folio, or %NULL if we are done.
+ */
+static inline struct folio *readahead_folio(struct readahead_control *ractl)
+{
+ struct folio *folio = __readahead_folio(ractl);
- return page;
+ if (folio)
+ folio_put(folio);
+ return folio;
}
static inline unsigned int __readahead_batch(struct readahead_control *rac,
@@ -1040,6 +1161,34 @@ static inline unsigned long dir_pages(struct inode *inode)
}
/**
+ * folio_mkwrite_check_truncate - check if folio was truncated
+ * @folio: the folio to check
+ * @inode: the inode to check the folio against
+ *
+ * Return: the number of bytes in the folio up to EOF,
+ * or -EFAULT if the folio was truncated.
+ */
+static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio,
+ struct inode *inode)
+{
+ loff_t size = i_size_read(inode);
+ pgoff_t index = size >> PAGE_SHIFT;
+ size_t offset = offset_in_folio(folio, size);
+
+ if (!folio->mapping)
+ return -EFAULT;
+
+ /* folio is wholly inside EOF */
+ if (folio_next_index(folio) - 1 < index)
+ return folio_size(folio);
+ /* folio is wholly past EOF */
+ if (folio->index > index || !offset)
+ return -EFAULT;
+ /* folio is partially inside EOF */
+ return offset;
+}
+
+/**
* page_mkwrite_check_truncate - check if page was truncated
* @page: the page to check
* @inode: the inode to check the page against
@@ -1068,19 +1217,25 @@ static inline int page_mkwrite_check_truncate(struct page *page,
}
/**
- * i_blocks_per_page - How many blocks fit in this page.
+ * i_blocks_per_folio - How many blocks fit in this folio.
* @inode: The inode which contains the blocks.
- * @page: The page (head page if the page is a THP).
+ * @folio: The folio.
*
- * If the block size is larger than the size of this page, return zero.
+ * If the block size is larger than the size of this folio, return zero.
*
- * Context: The caller should hold a refcount on the page to prevent it
+ * Context: The caller should hold a refcount on the folio to prevent it
* from being split.
- * Return: The number of filesystem blocks covered by this page.
+ * Return: The number of filesystem blocks covered by this folio.
*/
static inline
+unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio)
+{
+ return folio_size(folio) >> inode->i_blkbits;
+}
+
+static inline
unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
{
- return thp_size(page) >> inode->i_blkbits;
+ return i_blocks_per_folio(inode, page_folio(page));
}
#endif /* _LINUX_PAGEMAP_H */
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 505480217cf1..2512e2f9cd4e 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -163,6 +163,12 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn);
static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; }
#endif
+#ifdef CONFIG_KVM
+void kvm_host_pmu_init(struct arm_pmu *pmu);
+#else
+#define kvm_host_pmu_init(x) do { } while(0)
+#endif
+
/* Internal functions only for core arm_pmu code */
struct arm_pmu *armpmu_alloc(void);
struct arm_pmu *armpmu_alloc_atomic(void);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index fe156a8170aa..9b60bb89d86a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -683,7 +683,9 @@ struct perf_event {
/*
* timestamp shadows the actual context timing but it can
* be safely used in NMI interrupt context. It reflects the
- * context time as it was when the event was last scheduled in.
+ * context time as it was when the event was last scheduled in,
+ * or when ctx_sched_in failed to schedule the event because we
+ * run out of PMC.
*
* ctx_time already accounts for ctx->timestamp. Therefore to
* compute ctx_time for a sample, simply add perf_clock().
diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h
index 6beb26b7151d..86be8bf27b41 100644
--- a/include/linux/pkeys.h
+++ b/include/linux/pkeys.h
@@ -4,6 +4,8 @@
#include <linux/mm.h>
+#define ARCH_DEFAULT_PKEY 0
+
#ifdef CONFIG_ARCH_HAS_PKEYS
#include <asm/pkeys.h>
#else /* ! CONFIG_ARCH_HAS_PKEYS */
diff --git a/include/linux/platform_data/usb-omap1.h b/include/linux/platform_data/usb-omap1.h
index 43b5ce139c37..878e572a78bf 100644
--- a/include/linux/platform_data/usb-omap1.h
+++ b/include/linux/platform_data/usb-omap1.h
@@ -48,6 +48,8 @@ struct omap_usb_config {
u32 (*usb2_init)(unsigned nwires, unsigned alt_pingroup);
int (*ocpi_enable)(void);
+
+ void (*lb_reset)(void);
};
#endif /* __LINUX_USB_OMAP1_H */
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index a0b7e43049d5..725c9b784e60 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -404,7 +404,7 @@ int pwm_set_chip_data(struct pwm_device *pwm, void *data);
void *pwm_get_chip_data(struct pwm_device *pwm);
int pwmchip_add(struct pwm_chip *chip);
-int pwmchip_remove(struct pwm_chip *chip);
+void pwmchip_remove(struct pwm_chip *chip);
int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip);
diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index 0165824c5128..81cad9e1e412 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -61,7 +61,6 @@ enum qcom_scm_ice_cipher {
#define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE)
#define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC)
-#if IS_ENABLED(CONFIG_QCOM_SCM)
extern bool qcom_scm_is_available(void);
extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus);
@@ -109,66 +108,10 @@ extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
u32 *resp);
extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en);
-#else
-
-#include <linux/errno.h>
-
-static inline bool qcom_scm_is_available(void) { return false; }
-
-static inline int qcom_scm_set_cold_boot_addr(void *entry,
- const cpumask_t *cpus) { return -ENODEV; }
-static inline int qcom_scm_set_warm_boot_addr(void *entry,
- const cpumask_t *cpus) { return -ENODEV; }
-static inline void qcom_scm_cpu_power_down(u32 flags) {}
-static inline u32 qcom_scm_set_remote_state(u32 state,u32 id)
- { return -ENODEV; }
-
-static inline int qcom_scm_pas_init_image(u32 peripheral, const void *metadata,
- size_t size) { return -ENODEV; }
-static inline int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr,
- phys_addr_t size) { return -ENODEV; }
-static inline int qcom_scm_pas_auth_and_reset(u32 peripheral)
- { return -ENODEV; }
-static inline int qcom_scm_pas_shutdown(u32 peripheral) { return -ENODEV; }
-static inline bool qcom_scm_pas_supported(u32 peripheral) { return false; }
-
-static inline int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val)
- { return -ENODEV; }
-static inline int qcom_scm_io_writel(phys_addr_t addr, unsigned int val)
- { return -ENODEV; }
-
-static inline bool qcom_scm_restore_sec_cfg_available(void) { return false; }
-static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare)
- { return -ENODEV; }
-static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size)
- { return -ENODEV; }
-static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare)
- { return -ENODEV; }
-extern inline int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size,
- u32 cp_nonpixel_start,
- u32 cp_nonpixel_size)
- { return -ENODEV; }
-static inline int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz,
- unsigned int *src, const struct qcom_scm_vmperm *newvm,
- unsigned int dest_cnt) { return -ENODEV; }
-
-static inline bool qcom_scm_ocmem_lock_available(void) { return false; }
-static inline int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset,
- u32 size, u32 mode) { return -ENODEV; }
-static inline int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id,
- u32 offset, u32 size) { return -ENODEV; }
-
-static inline bool qcom_scm_ice_available(void) { return false; }
-static inline int qcom_scm_ice_invalidate_key(u32 index) { return -ENODEV; }
-static inline int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size,
- enum qcom_scm_ice_cipher cipher,
- u32 data_unit_size) { return -ENODEV; }
-
-static inline bool qcom_scm_hdcp_available(void) { return false; }
-static inline int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
- u32 *resp) { return -ENODEV; }
-
-static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en)
- { return -ENODEV; }
-#endif
+
+extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
+ u64 limit_node, u32 node_id, u64 version);
+extern int qcom_scm_lmh_profile_change(u32 profile_id);
+extern bool qcom_scm_lmh_dcvsh_available(void);
+
#endif
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index c976cc6de257..e704b1a4c06c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -235,7 +235,7 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
*
* returns the number of cleaned PTEs.
*/
-int page_mkclean(struct page *);
+int folio_mkclean(struct folio *);
/*
* called in munlock()/munmap() path to check for other vmas holding
@@ -295,12 +295,14 @@ static inline void try_to_unmap(struct page *page, enum ttu_flags flags)
{
}
-static inline int page_mkclean(struct page *page)
+static inline int folio_mkclean(struct folio *folio)
{
return 0;
}
-
-
#endif /* CONFIG_MMU */
+static inline int page_mkclean(struct page *page)
+{
+ return folio_mkclean(page_folio(page));
+}
#endif /* _LINUX_RMAP_H */
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 426e98e0b675..352c6127cb90 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -142,22 +142,14 @@ struct rw_semaphore {
#define DECLARE_RWSEM(lockname) \
struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name,
+extern void __init_rwsem(struct rw_semaphore *rwsem, const char *name,
struct lock_class_key *key);
-#else
-static inline void __rwsem_init(struct rw_semaphore *rwsem, const char *name,
- struct lock_class_key *key)
-{
-}
-#endif
#define init_rwsem(sem) \
do { \
static struct lock_class_key __key; \
\
- init_rwbase_rt(&(sem)->rwbase); \
- __rwsem_init((sem), #sem, &__key); \
+ __init_rwsem((sem), #sem, &__key); \
} while (0)
static __always_inline int rwsem_is_locked(struct rw_semaphore *sem)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e12b524426b0..c1a927ddec64 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1471,6 +1471,7 @@ struct task_struct {
mce_whole_page : 1,
__mce_reserved : 62;
struct callback_head mce_kill_me;
+ int mce_count;
#endif
#ifdef CONFIG_KRETPROBES
@@ -1719,7 +1720,7 @@ extern struct pid *cad_pid;
#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
#define used_math() tsk_used_math(current)
-static inline bool is_percpu_thread(void)
+static __always_inline bool is_percpu_thread(void)
{
#ifdef CONFIG_SMP
return (current->flags & PF_NO_SETAFFINITY) &&
diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
index 21c3771e6a56..988528b5da43 100644
--- a/include/linux/secretmem.h
+++ b/include/linux/secretmem.h
@@ -23,7 +23,7 @@ static inline bool page_is_secretmem(struct page *page)
mapping = (struct address_space *)
((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);
- if (mapping != page->mapping)
+ if (!mapping || mapping != page->mapping)
return false;
return mapping->a_ops == &secretmem_aops;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6bdb0db3e825..841e2f0f5240 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1940,7 +1940,7 @@ static inline void __skb_insert(struct sk_buff *newsk,
WRITE_ONCE(newsk->prev, prev);
WRITE_ONCE(next->prev, newsk);
WRITE_ONCE(prev->next, newsk);
- list->qlen++;
+ WRITE_ONCE(list->qlen, list->qlen + 1);
}
static inline void __skb_queue_splice(const struct sk_buff_head *list,
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 14ab0c0bc924..1ce9a9eb223b 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -128,6 +128,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes);
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
int len, int flags);
+bool sk_msg_is_readable(struct sock *sk);
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
{
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 8371bca13729..6b0b686f6f90 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -531,6 +531,9 @@ struct spi_controller {
/* I/O mutex */
struct mutex io_mutex;
+ /* Used to avoid adding the same CS twice */
+ struct mutex add_lock;
+
/* lock and mutex for SPI bus locking */
spinlock_t bus_lock_spinlock;
struct mutex bus_lock_mutex;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index ba52f3a3478e..cdf0957a88a4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -320,11 +320,17 @@ struct vma_swap_readahead {
#endif
};
+static inline swp_entry_t folio_swap_entry(struct folio *folio)
+{
+ swp_entry_t entry = { .val = page_private(&folio->page) };
+ return entry;
+}
+
/* linux/mm/workingset.c */
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg);
-void workingset_refault(struct page *page, void *shadow);
-void workingset_activation(struct page *page);
+void workingset_refault(struct folio *folio, void *shadow);
+void workingset_activation(struct folio *folio);
/* Only track the nodes of mappings with shadow entries */
void workingset_update_node(struct xa_node *node);
@@ -344,9 +350,11 @@ extern unsigned long nr_free_buffer_pages(void);
/* linux/mm/swap.c */
extern void lru_note_cost(struct lruvec *lruvec, bool file,
unsigned int nr_pages);
-extern void lru_note_cost_page(struct page *);
+extern void lru_note_cost_folio(struct folio *);
+extern void folio_add_lru(struct folio *);
extern void lru_cache_add(struct page *);
-extern void mark_page_accessed(struct page *);
+void mark_page_accessed(struct page *);
+void folio_mark_accessed(struct folio *);
extern atomic_t lru_disable_count;
@@ -365,7 +373,6 @@ extern void lru_add_drain(void);
extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_cpu_zone(struct zone *zone);
extern void lru_add_drain_all(void);
-extern void rotate_reclaimable_page(struct page *page);
extern void deactivate_file_page(struct page *page);
extern void deactivate_page(struct page *page);
extern void mark_page_lazyfree(struct page *page);
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index d296f3b88fb9..c314893970b3 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -285,7 +285,7 @@ struct thermal_zone_params {
};
/**
- * struct thermal_zone_of_device_ops - scallbacks for handling DT based zones
+ * struct thermal_zone_of_device_ops - callbacks for handling DT based zones
*
* Mandatory:
* @get_temp: a pointer to a function that reads the sensor temperature.
@@ -404,12 +404,13 @@ static inline void thermal_zone_device_unregister(
struct thermal_zone_device *tz)
{ }
static inline struct thermal_cooling_device *
-thermal_cooling_device_register(char *type, void *devdata,
+thermal_cooling_device_register(const char *type, void *devdata,
const struct thermal_cooling_device_ops *ops)
{ return ERR_PTR(-ENODEV); }
static inline struct thermal_cooling_device *
thermal_of_cooling_device_register(struct device_node *np,
- char *type, void *devdata, const struct thermal_cooling_device_ops *ops)
+ const char *type, void *devdata,
+ const struct thermal_cooling_device_ops *ops)
{ return ERR_PTR(-ENODEV); }
static inline struct thermal_cooling_device *
devm_thermal_of_cooling_device_register(struct device *dev,
diff --git a/include/linux/time64.h b/include/linux/time64.h
index 5117cb5b5656..81b9686a2079 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -25,7 +25,9 @@ struct itimerspec64 {
#define TIME64_MIN (-TIME64_MAX - 1)
#define KTIME_MAX ((s64)~((u64)1 << 63))
+#define KTIME_MIN (-KTIME_MAX - 1)
#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
+#define KTIME_SEC_MIN (KTIME_MIN / NSEC_PER_SEC)
/*
* Limits for settimeofday():
@@ -124,10 +126,13 @@ static inline bool timespec64_valid_settod(const struct timespec64 *ts)
*/
static inline s64 timespec64_to_ns(const struct timespec64 *ts)
{
- /* Prevent multiplication overflow */
- if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
+ /* Prevent multiplication overflow / underflow */
+ if (ts->tv_sec >= KTIME_SEC_MAX)
return KTIME_MAX;
+ if (ts->tv_sec <= KTIME_SEC_MIN)
+ return KTIME_MIN;
+
return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
}
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index aa11fe323c56..12d827734686 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -269,6 +269,7 @@ enum tpm2_cc_attrs {
#define TPM_VID_INTEL 0x8086
#define TPM_VID_WINBOND 0x1050
#define TPM_VID_STM 0x104A
+#define TPM_VID_ATML 0x1114
enum tpm_chip_flags {
TPM_CHIP_FLAG_TPM2 = BIT(1),
diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index a9f9c5714e65..fe95f0922526 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -16,23 +16,8 @@
* When function tracing occurs, the following steps are made:
* If arch does not support a ftrace feature:
* call internal function (uses INTERNAL bits) which calls...
- * If callback is registered to the "global" list, the list
- * function is called and recursion checks the GLOBAL bits.
- * then this function calls...
* The function callback, which can use the FTRACE bits to
* check for recursion.
- *
- * Now if the arch does not support a feature, and it calls
- * the global list function which calls the ftrace callback
- * all three of these steps will do a recursion protection.
- * There's no reason to do one if the previous caller already
- * did. The recursion that we are protecting against will
- * go through the same steps again.
- *
- * To prevent the multiple recursion checks, if a recursion
- * bit is set that is higher than the MAX bit of the current
- * check, then we know that the check was made by the previous
- * caller, and we can skip the current check.
*/
enum {
/* Function recursion bits */
@@ -40,12 +25,14 @@ enum {
TRACE_FTRACE_NMI_BIT,
TRACE_FTRACE_IRQ_BIT,
TRACE_FTRACE_SIRQ_BIT,
+ TRACE_FTRACE_TRANSITION_BIT,
- /* INTERNAL_BITs must be greater than FTRACE_BITs */
+ /* Internal use recursion bits */
TRACE_INTERNAL_BIT,
TRACE_INTERNAL_NMI_BIT,
TRACE_INTERNAL_IRQ_BIT,
TRACE_INTERNAL_SIRQ_BIT,
+ TRACE_INTERNAL_TRANSITION_BIT,
TRACE_BRANCH_BIT,
/*
@@ -86,12 +73,6 @@ enum {
*/
TRACE_GRAPH_NOTRACE_BIT,
- /*
- * When transitioning between context, the preempt_count() may
- * not be correct. Allow for a single recursion to cover this case.
- */
- TRACE_TRANSITION_BIT,
-
/* Used to prevent recursion recording from recursing. */
TRACE_RECORD_RECURSION_BIT,
};
@@ -113,12 +94,10 @@ enum {
#define TRACE_CONTEXT_BITS 4
#define TRACE_FTRACE_START TRACE_FTRACE_BIT
-#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
#define TRACE_LIST_START TRACE_INTERNAL_BIT
-#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
-#define TRACE_CONTEXT_MASK TRACE_LIST_MAX
+#define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
/*
* Used for setting context
@@ -132,6 +111,7 @@ enum {
TRACE_CTX_IRQ,
TRACE_CTX_SOFTIRQ,
TRACE_CTX_NORMAL,
+ TRACE_CTX_TRANSITION,
};
static __always_inline int trace_get_context_bit(void)
@@ -160,45 +140,34 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip);
#endif
static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip,
- int start, int max)
+ int start)
{
unsigned int val = READ_ONCE(current->trace_recursion);
int bit;
- /* A previous recursion check was made */
- if ((val & TRACE_CONTEXT_MASK) > max)
- return 0;
-
bit = trace_get_context_bit() + start;
if (unlikely(val & (1 << bit))) {
/*
* It could be that preempt_count has not been updated during
* a switch between contexts. Allow for a single recursion.
*/
- bit = TRACE_TRANSITION_BIT;
+ bit = TRACE_CTX_TRANSITION + start;
if (val & (1 << bit)) {
do_ftrace_record_recursion(ip, pip);
return -1;
}
- } else {
- /* Normal check passed, clear the transition to allow it again */
- val &= ~(1 << TRACE_TRANSITION_BIT);
}
val |= 1 << bit;
current->trace_recursion = val;
barrier();
- return bit + 1;
+ return bit;
}
static __always_inline void trace_clear_recursion(int bit)
{
- if (!bit)
- return;
-
barrier();
- bit--;
trace_recursion_clear(bit);
}
@@ -214,7 +183,7 @@ static __always_inline void trace_clear_recursion(int bit)
static __always_inline int ftrace_test_recursion_trylock(unsigned long ip,
unsigned long parent_ip)
{
- return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX);
+ return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START);
}
/**
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 3e80c4bc66f7..2564b7434b4d 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -197,6 +197,8 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
mem_cgroup_handle_over_high();
blkcg_maybe_throttle_current();
+
+ rseq_handle_notify_resume(NULL, regs);
}
/*
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 5265024e8b90..207101a9c5c3 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -27,6 +27,12 @@ enum iter_type {
ITER_DISCARD,
};
+struct iov_iter_state {
+ size_t iov_offset;
+ size_t count;
+ unsigned long nr_segs;
+};
+
struct iov_iter {
u8 iter_type;
bool data_source;
@@ -47,7 +53,6 @@ struct iov_iter {
};
loff_t xarray_start;
};
- size_t truncated;
};
static inline enum iter_type iov_iter_type(const struct iov_iter *i)
@@ -55,6 +60,14 @@ static inline enum iter_type iov_iter_type(const struct iov_iter *i)
return i->iter_type;
}
+static inline void iov_iter_save_state(struct iov_iter *iter,
+ struct iov_iter_state *state)
+{
+ state->iov_offset = iter->iov_offset;
+ state->count = iter->count;
+ state->nr_segs = iter->nr_segs;
+}
+
static inline bool iter_is_iovec(const struct iov_iter *i)
{
return iov_iter_type(i) == ITER_IOVEC;
@@ -233,6 +246,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
size_t maxsize, size_t *start);
int iov_iter_npages(const struct iov_iter *i, int maxpages);
+void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state);
const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
@@ -255,10 +269,8 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
* conversion in assignement is by definition greater than all
* values of size_t, including old i->count.
*/
- if (i->count > count) {
- i->truncated += i->count - count;
+ if (i->count > count)
i->count = count;
- }
}
/*
@@ -267,7 +279,6 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
*/
static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
{
- i->truncated -= count - i->count;
i->count = count;
}
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 548a028f2dab..2c1fc9212cf2 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -124,6 +124,7 @@ struct usb_hcd {
#define HCD_FLAG_RH_RUNNING 5 /* root hub is running? */
#define HCD_FLAG_DEAD 6 /* controller has died? */
#define HCD_FLAG_INTF_AUTHORIZED 7 /* authorize interfaces? */
+#define HCD_FLAG_DEFER_RH_REGISTER 8 /* Defer roothub registration */
/* The flags can be tested using these macros; they are likely to
* be slightly faster than test_bit().
@@ -134,6 +135,7 @@ struct usb_hcd {
#define HCD_WAKEUP_PENDING(hcd) ((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING))
#define HCD_RH_RUNNING(hcd) ((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING))
#define HCD_DEAD(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEAD))
+#define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER))
/*
* Specifies if interfaces are authorized by default
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index eb70cabe6e7f..33a4240e6a6f 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -127,6 +127,8 @@ static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type t
long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
+long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type);
+void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type);
bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
static inline void set_rlimit_ucount_max(struct user_namespace *ns,
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 8cfe49d201dd..3972ab765de1 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -43,17 +43,17 @@ struct vdpa_vq_state_split {
* @last_used_idx: used index
*/
struct vdpa_vq_state_packed {
- u16 last_avail_counter:1;
- u16 last_avail_idx:15;
- u16 last_used_counter:1;
- u16 last_used_idx:15;
+ u16 last_avail_counter:1;
+ u16 last_avail_idx:15;
+ u16 last_used_counter:1;
+ u16 last_used_idx:15;
};
struct vdpa_vq_state {
- union {
- struct vdpa_vq_state_split split;
- struct vdpa_vq_state_packed packed;
- };
+ union {
+ struct vdpa_vq_state_split split;
+ struct vdpa_vq_state_packed packed;
+ };
};
struct vdpa_mgmt_dev;
@@ -65,6 +65,7 @@ struct vdpa_mgmt_dev;
* @config: the configuration ops for this device.
* @index: device index
* @features_valid: were features initialized? for legacy guests
+ * @use_va: indicate whether virtual address must be used by this device
* @nvqs: maximum number of supported virtqueues
* @mdev: management device pointer; caller must setup when registering device as part
* of dev_add() mgmtdev ops callback before invoking _vdpa_register_device().
@@ -75,6 +76,7 @@ struct vdpa_device {
const struct vdpa_config_ops *config;
unsigned int index;
bool features_valid;
+ bool use_va;
int nvqs;
struct vdpa_mgmt_dev *mdev;
};
@@ -90,6 +92,16 @@ struct vdpa_iova_range {
};
/**
+ * Corresponding file area for device memory mapping
+ * @file: vma->vm_file for the mapping
+ * @offset: mapping offset in the vm_file
+ */
+struct vdpa_map_file {
+ struct file *file;
+ u64 offset;
+};
+
+/**
* struct vdpa_config_ops - operations for configuring a vDPA device.
* Note: vDPA device drivers are required to implement all of the
* operations unless it is mentioned to be optional in the following
@@ -131,7 +143,7 @@ struct vdpa_iova_range {
* @vdev: vdpa device
* @idx: virtqueue index
* @state: pointer to returned state (last_avail_idx)
- * @get_vq_notification: Get the notification area for a virtqueue
+ * @get_vq_notification: Get the notification area for a virtqueue
* @vdev: vdpa device
* @idx: virtqueue index
* Returns the notifcation area
@@ -171,6 +183,9 @@ struct vdpa_iova_range {
* @set_status: Set the device status
* @vdev: vdpa device
* @status: virtio device status
+ * @reset: Reset device
+ * @vdev: vdpa device
+ * Returns integer: success (0) or error (< 0)
* @get_config_size: Get the size of the configuration space
* @vdev: vdpa device
* Returns size_t: configuration size
@@ -255,6 +270,7 @@ struct vdpa_config_ops {
u32 (*get_vendor_id)(struct vdpa_device *vdev);
u8 (*get_status)(struct vdpa_device *vdev);
void (*set_status)(struct vdpa_device *vdev, u8 status);
+ int (*reset)(struct vdpa_device *vdev);
size_t (*get_config_size)(struct vdpa_device *vdev);
void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
void *buf, unsigned int len);
@@ -266,7 +282,7 @@ struct vdpa_config_ops {
/* DMA ops */
int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb);
int (*dma_map)(struct vdpa_device *vdev, u64 iova, u64 size,
- u64 pa, u32 perm);
+ u64 pa, u32 perm, void *opaque);
int (*dma_unmap)(struct vdpa_device *vdev, u64 iova, u64 size);
/* Free device resources */
@@ -275,7 +291,8 @@ struct vdpa_config_ops {
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
const struct vdpa_config_ops *config,
- size_t size, const char *name);
+ size_t size, const char *name,
+ bool use_va);
/**
* vdpa_alloc_device - allocate and initilaize a vDPA device
@@ -285,15 +302,16 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
* @parent: the parent device
* @config: the bus operations that is supported by this device
* @name: name of the vdpa device
+ * @use_va: indicate whether virtual address must be used by this device
*
* Return allocated data structure or ERR_PTR upon error
*/
-#define vdpa_alloc_device(dev_struct, member, parent, config, name) \
+#define vdpa_alloc_device(dev_struct, member, parent, config, name, use_va) \
container_of(__vdpa_alloc_device( \
parent, config, \
sizeof(dev_struct) + \
BUILD_BUG_ON_ZERO(offsetof( \
- dev_struct, member)), name), \
+ dev_struct, member)), name, use_va), \
dev_struct, member)
int vdpa_register_device(struct vdpa_device *vdev, int nvqs);
@@ -348,27 +366,27 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)
return vdev->dma_dev;
}
-static inline void vdpa_reset(struct vdpa_device *vdev)
+static inline int vdpa_reset(struct vdpa_device *vdev)
{
- const struct vdpa_config_ops *ops = vdev->config;
+ const struct vdpa_config_ops *ops = vdev->config;
vdev->features_valid = false;
- ops->set_status(vdev, 0);
+ return ops->reset(vdev);
}
static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
{
- const struct vdpa_config_ops *ops = vdev->config;
+ const struct vdpa_config_ops *ops = vdev->config;
vdev->features_valid = true;
- return ops->set_features(vdev, features);
+ return ops->set_features(vdev, features);
}
-
-static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset,
- void *buf, unsigned int len)
+static inline void vdpa_get_config(struct vdpa_device *vdev,
+ unsigned int offset, void *buf,
+ unsigned int len)
{
- const struct vdpa_config_ops *ops = vdev->config;
+ const struct vdpa_config_ops *ops = vdev->config;
/*
* Config accesses aren't supposed to trigger before features are set.
diff --git a/include/linux/vhost_iotlb.h b/include/linux/vhost_iotlb.h
index 6b09b786a762..2d0e2f52f938 100644
--- a/include/linux/vhost_iotlb.h
+++ b/include/linux/vhost_iotlb.h
@@ -17,6 +17,7 @@ struct vhost_iotlb_map {
u32 perm;
u32 flags_padding;
u64 __subtree_last;
+ void *opaque;
};
#define VHOST_IOTLB_FLAG_RETIRE 0x1
@@ -29,6 +30,8 @@ struct vhost_iotlb {
unsigned int flags;
};
+int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb, u64 start, u64 last,
+ u64 addr, unsigned int perm, void *opaque);
int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, u64 start, u64 last,
u64 addr, unsigned int perm);
void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d6a6cf53b127..bfe38869498d 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -415,6 +415,78 @@ static inline void drain_zonestat(struct zone *zone,
struct per_cpu_zonestat *pzstats) { }
#endif /* CONFIG_SMP */
+static inline void __zone_stat_mod_folio(struct folio *folio,
+ enum zone_stat_item item, long nr)
+{
+ __mod_zone_page_state(folio_zone(folio), item, nr);
+}
+
+static inline void __zone_stat_add_folio(struct folio *folio,
+ enum zone_stat_item item)
+{
+ __mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio));
+}
+
+static inline void __zone_stat_sub_folio(struct folio *folio,
+ enum zone_stat_item item)
+{
+ __mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio));
+}
+
+static inline void zone_stat_mod_folio(struct folio *folio,
+ enum zone_stat_item item, long nr)
+{
+ mod_zone_page_state(folio_zone(folio), item, nr);
+}
+
+static inline void zone_stat_add_folio(struct folio *folio,
+ enum zone_stat_item item)
+{
+ mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio));
+}
+
+static inline void zone_stat_sub_folio(struct folio *folio,
+ enum zone_stat_item item)
+{
+ mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio));
+}
+
+static inline void __node_stat_mod_folio(struct folio *folio,
+ enum node_stat_item item, long nr)
+{
+ __mod_node_page_state(folio_pgdat(folio), item, nr);
+}
+
+static inline void __node_stat_add_folio(struct folio *folio,
+ enum node_stat_item item)
+{
+ __mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio));
+}
+
+static inline void __node_stat_sub_folio(struct folio *folio,
+ enum node_stat_item item)
+{
+ __mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio));
+}
+
+static inline void node_stat_mod_folio(struct folio *folio,
+ enum node_stat_item item, long nr)
+{
+ mod_node_page_state(folio_pgdat(folio), item, nr);
+}
+
+static inline void node_stat_add_folio(struct folio *folio,
+ enum node_stat_item item)
+{
+ mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio));
+}
+
+static inline void node_stat_sub_folio(struct folio *folio,
+ enum node_stat_item item)
+{
+ mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio));
+}
+
static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
int migratetype)
{
@@ -525,12 +597,6 @@ static inline void mod_lruvec_page_state(struct page *page,
#endif /* CONFIG_MEMCG */
-static inline void inc_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
- mod_lruvec_state(lruvec, idx, 1);
-}
-
static inline void __inc_lruvec_page_state(struct page *page,
enum node_stat_item idx)
{
@@ -543,6 +609,24 @@ static inline void __dec_lruvec_page_state(struct page *page,
__mod_lruvec_page_state(page, idx, -1);
}
+static inline void __lruvec_stat_mod_folio(struct folio *folio,
+ enum node_stat_item idx, int val)
+{
+ __mod_lruvec_page_state(&folio->page, idx, val);
+}
+
+static inline void __lruvec_stat_add_folio(struct folio *folio,
+ enum node_stat_item idx)
+{
+ __lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio));
+}
+
+static inline void __lruvec_stat_sub_folio(struct folio *folio,
+ enum node_stat_item idx)
+{
+ __lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
+}
+
static inline void inc_lruvec_page_state(struct page *page,
enum node_stat_item idx)
{
@@ -555,4 +639,21 @@ static inline void dec_lruvec_page_state(struct page *page,
mod_lruvec_page_state(page, idx, -1);
}
+static inline void lruvec_stat_mod_folio(struct folio *folio,
+ enum node_stat_item idx, int val)
+{
+ mod_lruvec_page_state(&folio->page, idx, val);
+}
+
+static inline void lruvec_stat_add_folio(struct folio *folio,
+ enum node_stat_item idx)
+{
+ lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio));
+}
+
+static inline void lruvec_stat_sub_folio(struct folio *folio,
+ enum node_stat_item idx)
+{
+ lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
+}
#endif /* _LINUX_VMSTAT_H */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2ebef6b1a3d6..74d3c1efd9bb 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -399,9 +399,8 @@ extern struct workqueue_struct *system_freezable_power_efficient_wq;
* RETURNS:
* Pointer to the allocated workqueue on success, %NULL on failure.
*/
-struct workqueue_struct *alloc_workqueue(const char *fmt,
- unsigned int flags,
- int max_active, ...);
+__printf(1, 4) struct workqueue_struct *
+alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...);
/**
* alloc_ordered_workqueue - allocate an ordered workqueue
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d1f65adf6a26..3571f383dfb6 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -393,7 +393,14 @@ void writeback_set_ratelimit(void);
void tag_pages_for_writeback(struct address_space *mapping,
pgoff_t start, pgoff_t end);
-void account_page_redirty(struct page *page);
+bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio);
+void folio_account_redirty(struct folio *folio);
+static inline void account_page_redirty(struct page *page)
+{
+ folio_account_redirty(page_folio(page));
+}
+bool folio_redirty_for_writepage(struct writeback_control *, struct folio *);
+bool redirty_page_for_writepage(struct writeback_control *, struct page *);
void sb_mark_inode_writeback(struct inode *inode);
void sb_clear_inode_writeback(struct inode *inode);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 62dd8422e0dc..27336fc70467 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5376,7 +5376,6 @@ static inline void wiphy_unlock(struct wiphy *wiphy)
* netdev and may otherwise be used by driver read-only, will be update
* by cfg80211 on change_interface
* @mgmt_registrations: list of registrations for management frames
- * @mgmt_registrations_lock: lock for the list
* @mgmt_registrations_need_update: mgmt registrations were updated,
* need to propagate the update to the driver
* @mtx: mutex used to lock data in this struct, may be used by drivers
@@ -5423,7 +5422,6 @@ struct wireless_dev {
u32 identifier;
struct list_head mgmt_registrations;
- spinlock_t mgmt_registrations_lock;
u8 mgmt_registrations_need_update:1;
struct mutex mtx;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index f9a17145255a..d784e76113b8 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -447,6 +447,11 @@ static inline bool dsa_port_is_user(struct dsa_port *dp)
return dp->type == DSA_PORT_TYPE_USER;
}
+static inline bool dsa_port_is_unused(struct dsa_port *dp)
+{
+ return dp->type == DSA_PORT_TYPE_UNUSED;
+}
+
static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p)
{
return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED;
@@ -580,8 +585,16 @@ struct dsa_switch_ops {
int (*change_tag_protocol)(struct dsa_switch *ds, int port,
enum dsa_tag_protocol proto);
+ /* Optional switch-wide initialization and destruction methods */
int (*setup)(struct dsa_switch *ds);
void (*teardown)(struct dsa_switch *ds);
+
+ /* Per-port initialization and destruction methods. Mandatory if the
+ * driver registers devlink port regions, optional otherwise.
+ */
+ int (*port_setup)(struct dsa_switch *ds, int port);
+ void (*port_teardown)(struct dsa_switch *ds, int port);
+
u32 (*get_phy_flags)(struct dsa_switch *ds, int port);
/*
@@ -1041,6 +1054,7 @@ static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr,
void dsa_unregister_switch(struct dsa_switch *ds);
int dsa_register_switch(struct dsa_switch *ds);
+void dsa_switch_shutdown(struct dsa_switch *ds);
struct dsa_switch *dsa_switch_find(int tree_index, int sw_index);
#ifdef CONFIG_PM_SLEEP
int dsa_switch_suspend(struct dsa_switch *ds);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 21c5386d4a6d..ab5348e57db1 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -597,5 +597,5 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh,
u8 rt_family, unsigned char *flags, bool skip_oif);
int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh,
- int nh_weight, u8 rt_family);
+ int nh_weight, u8 rt_family, u32 nh_tclassid);
#endif /* _NET_FIB_H */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index af0fc13cea34..618d1f427cb2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2818,13 +2818,13 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
* Mac80211 drivers should set the @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0 flag
* when they are able to replace in-use PTK keys according to the following
* requirements:
- * 1) They do not hand over frames decrypted with the old key to
- mac80211 once the call to set_key() with command %DISABLE_KEY has been
- completed when also setting @IEEE80211_KEY_FLAG_GENERATE_IV for any key,
+ * 1) They do not hand over frames decrypted with the old key to mac80211
+ once the call to set_key() with command %DISABLE_KEY has been completed,
2) either drop or continue to use the old key for any outgoing frames queued
at the time of the key deletion (including re-transmits),
3) never send out a frame queued prior to the set_key() %SET_KEY command
- encrypted with the new key and
+ encrypted with the new key when also needing
+ @IEEE80211_KEY_FLAG_GENERATE_IV and
4) never send out a frame unencrypted when it should be encrypted.
Mac80211 will not queue any new frames for a deleted key to the driver.
*/
diff --git a/include/net/mctp.h b/include/net/mctp.h
index a824d47c3c6d..ffd2c23bd76d 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -54,7 +54,7 @@ struct mctp_sock {
struct sock sk;
/* bind() params */
- int bind_net;
+ unsigned int bind_net;
mctp_eid_t bind_addr;
__u8 bind_type;
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 6026bbefbffd..3214848402ec 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -69,6 +69,10 @@ struct mptcp_out_options {
struct {
u64 sndr_key;
u64 rcvr_key;
+ u64 data_seq;
+ u32 subflow_seq;
+ u16 data_len;
+ __sum16 csum;
};
struct {
struct mptcp_addr_info addr;
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index 0fd8a4159662..ceadf8ba25a4 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -17,7 +17,6 @@ struct inet_frags_ctl;
struct nft_ct_frag6_pernet {
struct ctl_table_header *nf_frag_frags_hdr;
struct fqdir *fqdir;
- unsigned int users;
};
#endif /* _NF_DEFRAG_IPV6_H */
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 148f5d8ee5ab..a16171c5fd9e 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1202,7 +1202,7 @@ struct nft_object *nft_obj_lookup(const struct net *net,
void nft_obj_notify(struct net *net, const struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq,
- int event, int family, int report, gfp_t gfp);
+ int event, u16 flags, int family, int report, gfp_t gfp);
/**
* struct nft_object_type - stateful object type
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index 986a2a9cfdfa..b593f95e9991 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -27,5 +27,11 @@ struct netns_nf {
#if IS_ENABLED(CONFIG_DECNET)
struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS];
#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+ unsigned int defrag_ipv4_users;
+#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ unsigned int defrag_ipv6_users;
+#endif
};
#endif
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 10e1777877e6..28085b995ddc 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -325,7 +325,7 @@ int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
struct fib_nh_common *nhc = &nhi->fib_nhc;
int weight = nhg->nh_entries[i].weight;
- if (fib_add_nexthop(skb, nhc, weight, rt_family) < 0)
+ if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0)
return -EMSGSIZE;
}
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 6d7b12cba015..bf79f3a890af 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -11,6 +11,7 @@
#include <uapi/linux/pkt_sched.h>
#define DEFAULT_TX_QUEUE_LEN 1000
+#define STAB_SIZE_LOG_MAX 30
struct qdisc_walker {
int stop;
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 2eb6d7c2c931..f37c7a558d6d 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -384,11 +384,11 @@ sctp_vtag_verify(const struct sctp_chunk *chunk,
* Verification Tag value does not match the receiver's own
* tag value, the receiver shall silently discard the packet...
*/
- if (ntohl(chunk->sctp_hdr->vtag) == asoc->c.my_vtag)
- return 1;
+ if (ntohl(chunk->sctp_hdr->vtag) != asoc->c.my_vtag)
+ return 0;
chunk->transport->encap_port = SCTP_INPUT_CB(chunk->skb)->encap_port;
- return 0;
+ return 1;
}
/* Check VTAG of the packet matches the sender's own tag and the T bit is
diff --git a/include/net/sock.h b/include/net/sock.h
index 66a9a90f9558..463f390d90b3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -307,6 +307,7 @@ struct bpf_local_storage;
* @sk_priority: %SO_PRIORITY setting
* @sk_type: socket type (%SOCK_STREAM, etc)
* @sk_protocol: which protocol this socket belongs in this network family
+ * @sk_peer_lock: lock protecting @sk_peer_pid and @sk_peer_cred
* @sk_peer_pid: &struct pid for this socket's peer
* @sk_peer_cred: %SO_PEERCRED setting
* @sk_rcvlowat: %SO_RCVLOWAT setting
@@ -488,8 +489,10 @@ struct sock {
u8 sk_prefer_busy_poll;
u16 sk_busy_poll_budget;
#endif
+ spinlock_t sk_peer_lock;
struct pid *sk_peer_pid;
const struct cred *sk_peer_cred;
+
long sk_rcvtimeo;
ktime_t sk_stamp;
#if BITS_PER_LONG==32
@@ -1205,7 +1208,7 @@ struct proto {
#endif
bool (*stream_memory_free)(const struct sock *sk, int wake);
- bool (*stream_memory_read)(const struct sock *sk);
+ bool (*sock_is_readable)(struct sock *sk);
/* Memory pressure */
void (*enter_memory_pressure)(struct sock *sk);
void (*leave_memory_pressure)(struct sock *sk);
@@ -1623,7 +1626,36 @@ void release_sock(struct sock *sk);
SINGLE_DEPTH_NESTING)
#define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock))
-bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock);
+bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock);
+
+/**
+ * lock_sock_fast - fast version of lock_sock
+ * @sk: socket
+ *
+ * This version should be used for very small section, where process wont block
+ * return false if fast path is taken:
+ *
+ * sk_lock.slock locked, owned = 0, BH disabled
+ *
+ * return true if slow path is taken:
+ *
+ * sk_lock.slock unlocked, owned = 1, BH enabled
+ */
+static inline bool lock_sock_fast(struct sock *sk)
+{
+ /* The sk_lock has mutex_lock() semantics here. */
+ mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+
+ return __lock_sock_fast(sk);
+}
+
+/* fast socket lock variant for caller already holding a [different] socket lock */
+static inline bool lock_sock_fast_nested(struct sock *sk)
+{
+ mutex_acquire(&sk->sk_lock.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);
+
+ return __lock_sock_fast(sk);
+}
/**
* unlock_sock_fast - complement of lock_sock_fast
@@ -1640,6 +1672,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow)
release_sock(sk);
__release(&sk->sk_lock.slock);
} else {
+ mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
spin_unlock_bh(&sk->sk_lock.slock);
}
}
@@ -2787,4 +2820,10 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs);
int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len);
+static inline bool sk_is_readable(struct sock *sk)
+{
+ if (sk->sk_prot->sock_is_readable)
+ return sk->sk_prot->sock_is_readable(sk);
+ return false;
+}
#endif /* _SOCK_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3166dc15d7d6..60c384569e9c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1576,6 +1576,7 @@ struct tcp_md5sig_key {
u8 keylen;
u8 family; /* AF_INET or AF_INET6 */
u8 prefixlen;
+ u8 flags;
union tcp_md5_addr addr;
int l3index; /* set if key added with L3 scope */
u8 key[TCP_MD5SIG_MAXKEYLEN];
@@ -1621,10 +1622,10 @@ struct tcp_md5sig_pool {
int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
const struct sock *sk, const struct sk_buff *skb);
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, int l3index,
+ int family, u8 prefixlen, int l3index, u8 flags,
const u8 *newkey, u8 newkeylen, gfp_t gfp);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, int l3index);
+ int family, u8 prefixlen, int l3index, u8 flags);
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk);
diff --git a/include/net/tls.h b/include/net/tls.h
index be4b3e1cac46..1fffb206f09f 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -358,6 +358,7 @@ int tls_sk_query(struct sock *sk, int optname, char __user *optval,
int __user *optlen);
int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
unsigned int optlen);
+void tls_err_abort(struct sock *sk, int err);
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
@@ -375,7 +376,7 @@ void tls_sw_release_resources_rx(struct sock *sk);
void tls_sw_free_ctx_rx(struct tls_context *tls_ctx);
int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len);
-bool tls_sw_stream_read(const struct sock *sk);
+bool tls_sw_sock_is_readable(struct sock *sk);
ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
@@ -466,12 +467,6 @@ static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
#endif
}
-static inline void tls_err_abort(struct sock *sk, int err)
-{
- sk->sk_err = err;
- sk_error_report(sk);
-}
-
static inline bool tls_bigint_increment(unsigned char *seq, int len)
{
int i;
@@ -512,7 +507,7 @@ static inline void tls_advance_record_sn(struct sock *sk,
struct cipher_context *ctx)
{
if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size))
- tls_err_abort(sk, EBADMSG);
+ tls_err_abort(sk, -EBADMSG);
if (prot->version != TLS_1_3_VERSION &&
prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305)
diff --git a/include/net/udp.h b/include/net/udp.h
index 360df454356c..909ecf447e0f 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -494,8 +494,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
* CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial
* packets in udp_gro_complete_segment. As does UDP GSO, verified by
* udp_send_skb. But when those packets are looped in dev_loopback_xmit
- * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this
- * specific case, where PARTIAL is both correct and required.
+ * their ip_summed CHECKSUM_NONE is changed to CHECKSUM_UNNECESSARY.
+ * Reset in this specific case, where PARTIAL is both correct and
+ * required.
*/
if (skb->pkt_type == PACKET_LOOPBACK)
skb->ip_summed = CHECKSUM_PARTIAL;
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 09a17f6e93a7..b97e142a7ca9 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -146,7 +146,6 @@ struct scsi_device {
struct scsi_vpd __rcu *vpd_pg83;
struct scsi_vpd __rcu *vpd_pg80;
struct scsi_vpd __rcu *vpd_pg89;
- unsigned char current_tag; /* current tag */
struct scsi_target *sdev_target;
blist_flags_t sdev_bflags; /* black/white flags as also found in
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 06706a9fd5b1..d7055b41982d 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -89,15 +89,6 @@
/* Source PGIDs, one per physical port */
#define PGID_SRC 80
-#define IFH_TAG_TYPE_C 0
-#define IFH_TAG_TYPE_S 1
-
-#define IFH_REW_OP_NOOP 0x0
-#define IFH_REW_OP_DSCP 0x1
-#define IFH_REW_OP_ONE_STEP_PTP 0x2
-#define IFH_REW_OP_TWO_STEP_PTP 0x3
-#define IFH_REW_OP_ORIGIN_PTP 0x5
-
#define OCELOT_NUM_TC 8
#define OCELOT_SPEED_2500 0
@@ -603,10 +594,10 @@ struct ocelot_port {
/* The VLAN ID that will be transmitted as untagged, on egress */
struct ocelot_vlan native_vlan;
+ unsigned int ptp_skbs_in_flight;
u8 ptp_cmd;
struct sk_buff_head tx_skbs;
u8 ts_id;
- spinlock_t ts_id_lock;
phy_interface_t phy_mode;
@@ -680,6 +671,9 @@ struct ocelot {
struct ptp_clock *ptp_clock;
struct ptp_clock_info ptp_info;
struct hwtstamp_config hwtstamp_config;
+ unsigned int ptp_skbs_in_flight;
+ /* Protects the 2-step TX timestamp ID logic */
+ spinlock_t ts_id_lock;
/* Protects the PTP interface state */
struct mutex ptp_lock;
/* Protects the PTP clock */
@@ -692,15 +686,6 @@ struct ocelot_policer {
u32 burst; /* bytes */
};
-struct ocelot_skb_cb {
- struct sk_buff *clone;
- u8 ptp_cmd;
- u8 ts_id;
-};
-
-#define OCELOT_SKB_CB(skb) \
- ((struct ocelot_skb_cb *)((skb)->cb))
-
#define ocelot_read_ix(ocelot, reg, gi, ri) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
#define ocelot_read_gix(ocelot, reg, gi) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi))
#define ocelot_read_rix(ocelot, reg, ri) __ocelot_read_ix(ocelot, reg, reg##_RSZ * (ri))
@@ -752,8 +737,6 @@ u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target,
void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target,
u32 val, u32 reg, u32 offset);
-#if IS_ENABLED(CONFIG_MSCC_OCELOT_SWITCH_LIB)
-
/* Packet I/O */
bool ocelot_can_inject(struct ocelot *ocelot, int grp);
void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
@@ -761,36 +744,6 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb);
void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp);
-u32 ocelot_ptp_rew_op(struct sk_buff *skb);
-#else
-
-static inline bool ocelot_can_inject(struct ocelot *ocelot, int grp)
-{
- return false;
-}
-
-static inline void ocelot_port_inject_frame(struct ocelot *ocelot, int port,
- int grp, u32 rew_op,
- struct sk_buff *skb)
-{
-}
-
-static inline int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp,
- struct sk_buff **skb)
-{
- return -EIO;
-}
-
-static inline void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp)
-{
-}
-
-static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb)
-{
- return 0;
-}
-#endif
-
/* Hardware initialization */
int ocelot_regfields_init(struct ocelot *ocelot,
const struct reg_field *const regfields);
diff --git a/include/soc/mscc/ocelot_ptp.h b/include/soc/mscc/ocelot_ptp.h
index ded497d72bdb..f085884b1fa2 100644
--- a/include/soc/mscc/ocelot_ptp.h
+++ b/include/soc/mscc/ocelot_ptp.h
@@ -13,6 +13,9 @@
#include <linux/ptp_clock_kernel.h>
#include <soc/mscc/ocelot.h>
+#define OCELOT_MAX_PTP_ID 63
+#define OCELOT_PTP_FIFO_SIZE 128
+
#define PTP_PIN_CFG_RSZ 0x20
#define PTP_PIN_TOD_SEC_MSB_RSZ PTP_PIN_CFG_RSZ
#define PTP_PIN_TOD_SEC_LSB_RSZ PTP_PIN_CFG_RSZ
diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h
index 25fd525aaf92..4869ebbd438d 100644
--- a/include/soc/mscc/ocelot_vcap.h
+++ b/include/soc/mscc/ocelot_vcap.h
@@ -694,7 +694,7 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot,
int ocelot_vcap_filter_del(struct ocelot *ocelot,
struct ocelot_vcap_filter *rule);
struct ocelot_vcap_filter *
-ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int id,
- bool tc_offload);
+ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block,
+ unsigned long cookie, bool tc_offload);
#endif /* _OCELOT_VCAP_H_ */
diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h
index 01570dbda503..0e45963bb767 100644
--- a/include/sound/hda_codec.h
+++ b/include/sound/hda_codec.h
@@ -224,6 +224,7 @@ struct hda_codec {
#endif
/* misc flags */
+ unsigned int configured:1; /* codec was configured */
unsigned int in_freeing:1; /* being released */
unsigned int registered:1; /* codec was registered */
unsigned int display_power_control:1; /* needs display power */
diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
index 989e1517332d..7a08ed2acd60 100644
--- a/include/sound/rawmidi.h
+++ b/include/sound/rawmidi.h
@@ -98,6 +98,7 @@ struct snd_rawmidi_file {
struct snd_rawmidi *rmidi;
struct snd_rawmidi_substream *input;
struct snd_rawmidi_substream *output;
+ unsigned int user_pversion; /* supported protocol version */
};
struct snd_rawmidi_str {
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 9f73ed2cf061..bca73e8c8cde 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -306,11 +306,13 @@ enum afs_flock_operation {
enum afs_cb_break_reason {
afs_cb_break_no_break,
+ afs_cb_break_no_promise,
afs_cb_break_for_callback,
afs_cb_break_for_deleted,
afs_cb_break_for_lapsed,
+ afs_cb_break_for_s_reinit,
afs_cb_break_for_unlink,
- afs_cb_break_for_vsbreak,
+ afs_cb_break_for_v_break,
afs_cb_break_for_volume_callback,
afs_cb_break_for_zap,
};
@@ -602,11 +604,13 @@ enum afs_cb_break_reason {
#define afs_cb_break_reasons \
EM(afs_cb_break_no_break, "no-break") \
+ EM(afs_cb_break_no_promise, "no-promise") \
EM(afs_cb_break_for_callback, "break-cb") \
EM(afs_cb_break_for_deleted, "break-del") \
EM(afs_cb_break_for_lapsed, "break-lapsed") \
+ EM(afs_cb_break_for_s_reinit, "s-reinit") \
EM(afs_cb_break_for_unlink, "break-unlink") \
- EM(afs_cb_break_for_vsbreak, "break-vs") \
+ EM(afs_cb_break_for_v_break, "break-v") \
EM(afs_cb_break_for_volume_callback, "break-v-cb") \
E_(afs_cb_break_for_zap, "break-zap")
diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index 9a448fe9355d..920b6a303d60 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -178,7 +178,7 @@ TRACE_EVENT(cachefiles_unlink,
),
TP_fast_assign(
- __entry->obj = obj->fscache.debug_id;
+ __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX;
__entry->de = de;
__entry->why = why;
),
@@ -205,7 +205,7 @@ TRACE_EVENT(cachefiles_rename,
),
TP_fast_assign(
- __entry->obj = obj->fscache.debug_id;
+ __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX;
__entry->de = de;
__entry->to = to;
__entry->why = why;
@@ -305,7 +305,7 @@ TRACE_EVENT(cachefiles_mark_buried,
),
TP_fast_assign(
- __entry->obj = obj->fscache.debug_id;
+ __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX;
__entry->de = de;
__entry->why = why;
),
diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
index bf9806fd1306..db4f2cec8360 100644
--- a/include/trace/events/erofs.h
+++ b/include/trace/events/erofs.h
@@ -35,20 +35,20 @@ TRACE_EVENT(erofs_lookup,
TP_STRUCT__entry(
__field(dev_t, dev )
__field(erofs_nid_t, nid )
- __field(const char *, name )
+ __string(name, dentry->d_name.name )
__field(unsigned int, flags )
),
TP_fast_assign(
__entry->dev = dir->i_sb->s_dev;
__entry->nid = EROFS_I(dir)->nid;
- __entry->name = dentry->d_name.name;
+ __assign_str(name, dentry->d_name.name);
__entry->flags = flags;
),
TP_printk("dev = (%d,%d), pnid = %llu, name:%s, flags:%x",
show_dev_nid(__entry),
- __entry->name,
+ __get_str(name),
__entry->flags)
);
diff --git a/include/trace/events/kyber.h b/include/trace/events/kyber.h
index 491098a0d8ed..bf7533f171ff 100644
--- a/include/trace/events/kyber.h
+++ b/include/trace/events/kyber.h
@@ -13,11 +13,11 @@
TRACE_EVENT(kyber_latency,
- TP_PROTO(struct request_queue *q, const char *domain, const char *type,
+ TP_PROTO(dev_t dev, const char *domain, const char *type,
unsigned int percentile, unsigned int numerator,
unsigned int denominator, unsigned int samples),
- TP_ARGS(q, domain, type, percentile, numerator, denominator, samples),
+ TP_ARGS(dev, domain, type, percentile, numerator, denominator, samples),
TP_STRUCT__entry(
__field( dev_t, dev )
@@ -30,7 +30,7 @@ TRACE_EVENT(kyber_latency,
),
TP_fast_assign(
- __entry->dev = disk_devt(q->disk);
+ __entry->dev = dev;
strlcpy(__entry->domain, domain, sizeof(__entry->domain));
strlcpy(__entry->type, type, sizeof(__entry->type));
__entry->percentile = percentile;
@@ -47,10 +47,9 @@ TRACE_EVENT(kyber_latency,
TRACE_EVENT(kyber_adjust,
- TP_PROTO(struct request_queue *q, const char *domain,
- unsigned int depth),
+ TP_PROTO(dev_t dev, const char *domain, unsigned int depth),
- TP_ARGS(q, domain, depth),
+ TP_ARGS(dev, domain, depth),
TP_STRUCT__entry(
__field( dev_t, dev )
@@ -59,7 +58,7 @@ TRACE_EVENT(kyber_adjust,
),
TP_fast_assign(
- __entry->dev = disk_devt(q->disk);
+ __entry->dev = dev;
strlcpy(__entry->domain, domain, sizeof(__entry->domain));
__entry->depth = depth;
),
@@ -71,9 +70,9 @@ TRACE_EVENT(kyber_adjust,
TRACE_EVENT(kyber_throttled,
- TP_PROTO(struct request_queue *q, const char *domain),
+ TP_PROTO(dev_t dev, const char *domain),
- TP_ARGS(q, domain),
+ TP_ARGS(dev, domain),
TP_STRUCT__entry(
__field( dev_t, dev )
@@ -81,7 +80,7 @@ TRACE_EVENT(kyber_throttled,
),
TP_fast_assign(
- __entry->dev = disk_devt(q->disk);
+ __entry->dev = dev;
strlcpy(__entry->domain, domain, sizeof(__entry->domain));
),
diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h
index 1d28431e85bd..171524d3526d 100644
--- a/include/trace/events/pagemap.h
+++ b/include/trace/events/pagemap.h
@@ -16,38 +16,38 @@
#define PAGEMAP_MAPPEDDISK 0x0020u
#define PAGEMAP_BUFFERS 0x0040u
-#define trace_pagemap_flags(page) ( \
- (PageAnon(page) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \
- (page_mapped(page) ? PAGEMAP_MAPPED : 0) | \
- (PageSwapCache(page) ? PAGEMAP_SWAPCACHE : 0) | \
- (PageSwapBacked(page) ? PAGEMAP_SWAPBACKED : 0) | \
- (PageMappedToDisk(page) ? PAGEMAP_MAPPEDDISK : 0) | \
- (page_has_private(page) ? PAGEMAP_BUFFERS : 0) \
+#define trace_pagemap_flags(folio) ( \
+ (folio_test_anon(folio) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \
+ (folio_mapped(folio) ? PAGEMAP_MAPPED : 0) | \
+ (folio_test_swapcache(folio) ? PAGEMAP_SWAPCACHE : 0) | \
+ (folio_test_swapbacked(folio) ? PAGEMAP_SWAPBACKED : 0) | \
+ (folio_test_mappedtodisk(folio) ? PAGEMAP_MAPPEDDISK : 0) | \
+ (folio_test_private(folio) ? PAGEMAP_BUFFERS : 0) \
)
TRACE_EVENT(mm_lru_insertion,
- TP_PROTO(struct page *page),
+ TP_PROTO(struct folio *folio),
- TP_ARGS(page),
+ TP_ARGS(folio),
TP_STRUCT__entry(
- __field(struct page *, page )
+ __field(struct folio *, folio )
__field(unsigned long, pfn )
__field(enum lru_list, lru )
__field(unsigned long, flags )
),
TP_fast_assign(
- __entry->page = page;
- __entry->pfn = page_to_pfn(page);
- __entry->lru = page_lru(page);
- __entry->flags = trace_pagemap_flags(page);
+ __entry->folio = folio;
+ __entry->pfn = folio_pfn(folio);
+ __entry->lru = folio_lru_list(folio);
+ __entry->flags = trace_pagemap_flags(folio);
),
/* Flag format is based on page-types.c formatting for pagemap */
- TP_printk("page=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s",
- __entry->page,
+ TP_printk("folio=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s",
+ __entry->folio,
__entry->pfn,
__entry->lru,
__entry->flags & PAGEMAP_MAPPED ? "M" : " ",
@@ -60,23 +60,21 @@ TRACE_EVENT(mm_lru_insertion,
TRACE_EVENT(mm_lru_activate,
- TP_PROTO(struct page *page),
+ TP_PROTO(struct folio *folio),
- TP_ARGS(page),
+ TP_ARGS(folio),
TP_STRUCT__entry(
- __field(struct page *, page )
+ __field(struct folio *, folio )
__field(unsigned long, pfn )
),
TP_fast_assign(
- __entry->page = page;
- __entry->pfn = page_to_pfn(page);
+ __entry->folio = folio;
+ __entry->pfn = folio_pfn(folio);
),
- /* Flag format is based on page-types.c formatting for pagemap */
- TP_printk("page=%p pfn=0x%lx", __entry->page, __entry->pfn)
-
+ TP_printk("folio=%p pfn=0x%lx", __entry->folio, __entry->pfn)
);
#endif /* _TRACE_PAGEMAP_H */
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 840d1ba84cf5..7dccb66474f7 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -52,11 +52,11 @@ WB_WORK_REASON
struct wb_writeback_work;
-DECLARE_EVENT_CLASS(writeback_page_template,
+DECLARE_EVENT_CLASS(writeback_folio_template,
- TP_PROTO(struct page *page, struct address_space *mapping),
+ TP_PROTO(struct folio *folio, struct address_space *mapping),
- TP_ARGS(page, mapping),
+ TP_ARGS(folio, mapping),
TP_STRUCT__entry (
__array(char, name, 32)
@@ -69,7 +69,7 @@ DECLARE_EVENT_CLASS(writeback_page_template,
bdi_dev_name(mapping ? inode_to_bdi(mapping->host) :
NULL), 32);
__entry->ino = mapping ? mapping->host->i_ino : 0;
- __entry->index = page->index;
+ __entry->index = folio->index;
),
TP_printk("bdi %s: ino=%lu index=%lu",
@@ -79,18 +79,18 @@ DECLARE_EVENT_CLASS(writeback_page_template,
)
);
-DEFINE_EVENT(writeback_page_template, writeback_dirty_page,
+DEFINE_EVENT(writeback_folio_template, writeback_dirty_folio,
- TP_PROTO(struct page *page, struct address_space *mapping),
+ TP_PROTO(struct folio *folio, struct address_space *mapping),
- TP_ARGS(page, mapping)
+ TP_ARGS(folio, mapping)
);
-DEFINE_EVENT(writeback_page_template, wait_on_page_writeback,
+DEFINE_EVENT(writeback_folio_template, folio_wait_writeback,
- TP_PROTO(struct page *page, struct address_space *mapping),
+ TP_PROTO(struct folio *folio, struct address_space *mapping),
- TP_ARGS(page, mapping)
+ TP_ARGS(folio, mapping)
);
DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
@@ -236,9 +236,9 @@ TRACE_EVENT(inode_switch_wbs,
TRACE_EVENT(track_foreign_dirty,
- TP_PROTO(struct page *page, struct bdi_writeback *wb),
+ TP_PROTO(struct folio *folio, struct bdi_writeback *wb),
- TP_ARGS(page, wb),
+ TP_ARGS(folio, wb),
TP_STRUCT__entry(
__array(char, name, 32)
@@ -250,7 +250,7 @@ TRACE_EVENT(track_foreign_dirty,
),
TP_fast_assign(
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping = folio_mapping(folio);
struct inode *inode = mapping ? mapping->host : NULL;
strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
@@ -258,7 +258,7 @@ TRACE_EVENT(track_foreign_dirty,
__entry->ino = inode ? inode->i_ino : 0;
__entry->memcg_id = wb->memcg_css->id;
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
- __entry->page_cgroup_ino = cgroup_ino(page_memcg(page)->css.cgroup);
+ __entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup);
),
TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu",
diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index 20e435fe657a..3246f2c74696 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -225,7 +225,14 @@ struct binder_freeze_info {
struct binder_frozen_status_info {
__u32 pid;
+
+ /* process received sync transactions since last frozen
+ * bit 0: received sync transaction after being frozen
+ * bit 1: new pending sync transaction during freezing
+ */
__u32 sync_recv;
+
+ /* process received async transactions since last frozen */
__u32 async_recv;
};
diff --git a/include/uapi/linux/cifs/cifs_mount.h b/include/uapi/linux/cifs/cifs_mount.h
index 69829205fdb5..8e87d27b0951 100644
--- a/include/uapi/linux/cifs/cifs_mount.h
+++ b/include/uapi/linux/cifs/cifs_mount.h
@@ -1,6 +1,5 @@
/* SPDX-License-Identifier: LGPL-2.1+ WITH Linux-syscall-note */
/*
- * include/uapi/linux/cifs/cifs_mount.h
*
* Author(s): Scott Lovenberg (scott.lovenberg@gmail.com)
*
diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h
index 6135d92e0d47..daf82a230c0e 100644
--- a/include/uapi/linux/hyperv.h
+++ b/include/uapi/linux/hyperv.h
@@ -26,7 +26,7 @@
#ifndef _UAPI_HYPERV_H
#define _UAPI_HYPERV_H
-#include <linux/uuid.h>
+#include <linux/types.h>
/*
* Framework version for util services.
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 59ef35154e3d..b270a07b285e 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -317,13 +317,19 @@ enum {
IORING_REGISTER_IOWQ_AFF = 17,
IORING_UNREGISTER_IOWQ_AFF = 18,
- /* set/get max number of workers */
+ /* set/get max number of io-wq workers */
IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
/* this goes last */
IORING_REGISTER_LAST
};
+/* io-wq worker categories */
+enum {
+ IO_WQ_BOUND,
+ IO_WQ_UNBOUND,
+};
+
/* deprecated, see struct io_uring_rsrc_update */
struct io_uring_files_update {
__u32 offset;
diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
index 52b54d13f385..6acd4ccafbf7 100644
--- a/include/uapi/linux/mctp.h
+++ b/include/uapi/linux/mctp.h
@@ -10,6 +10,7 @@
#define __UAPI_MCTP_H
#include <linux/types.h>
+#include <linux/socket.h>
typedef __u8 mctp_eid_t;
@@ -18,11 +19,13 @@ struct mctp_addr {
};
struct sockaddr_mctp {
- unsigned short int smctp_family;
- int smctp_network;
+ __kernel_sa_family_t smctp_family;
+ __u16 __smctp_pad0;
+ unsigned int smctp_network;
struct mctp_addr smctp_addr;
__u8 smctp_type;
__u8 smctp_tag;
+ __u8 __smctp_pad1;
};
#define MCTP_NET_ANY 0x0
diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h
new file mode 100644
index 000000000000..7cfe1c1280c0
--- /dev/null
+++ b/include/uapi/linux/vduse.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_VDUSE_H_
+#define _UAPI_VDUSE_H_
+
+#include <linux/types.h>
+
+#define VDUSE_BASE 0x81
+
+/* The ioctls for control device (/dev/vduse/control) */
+
+#define VDUSE_API_VERSION 0
+
+/*
+ * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
+ * This is used for future extension.
+ */
+#define VDUSE_GET_API_VERSION _IOR(VDUSE_BASE, 0x00, __u64)
+
+/* Set the version of VDUSE API that userspace supported. */
+#define VDUSE_SET_API_VERSION _IOW(VDUSE_BASE, 0x01, __u64)
+
+/**
+ * struct vduse_dev_config - basic configuration of a VDUSE device
+ * @name: VDUSE device name, needs to be NUL terminated
+ * @vendor_id: virtio vendor id
+ * @device_id: virtio device id
+ * @features: virtio features
+ * @vq_num: the number of virtqueues
+ * @vq_align: the allocation alignment of virtqueue's metadata
+ * @reserved: for future use, needs to be initialized to zero
+ * @config_size: the size of the configuration space
+ * @config: the buffer of the configuration space
+ *
+ * Structure used by VDUSE_CREATE_DEV ioctl to create VDUSE device.
+ */
+struct vduse_dev_config {
+#define VDUSE_NAME_MAX 256
+ char name[VDUSE_NAME_MAX];
+ __u32 vendor_id;
+ __u32 device_id;
+ __u64 features;
+ __u32 vq_num;
+ __u32 vq_align;
+ __u32 reserved[13];
+ __u32 config_size;
+ __u8 config[];
+};
+
+/* Create a VDUSE device which is represented by a char device (/dev/vduse/$NAME) */
+#define VDUSE_CREATE_DEV _IOW(VDUSE_BASE, 0x02, struct vduse_dev_config)
+
+/*
+ * Destroy a VDUSE device. Make sure there are no more references
+ * to the char device (/dev/vduse/$NAME).
+ */
+#define VDUSE_DESTROY_DEV _IOW(VDUSE_BASE, 0x03, char[VDUSE_NAME_MAX])
+
+/* The ioctls for VDUSE device (/dev/vduse/$NAME) */
+
+/**
+ * struct vduse_iotlb_entry - entry of IOTLB to describe one IOVA region [start, last]
+ * @offset: the mmap offset on returned file descriptor
+ * @start: start of the IOVA region
+ * @last: last of the IOVA region
+ * @perm: access permission of the IOVA region
+ *
+ * Structure used by VDUSE_IOTLB_GET_FD ioctl to find an overlapped IOVA region.
+ */
+struct vduse_iotlb_entry {
+ __u64 offset;
+ __u64 start;
+ __u64 last;
+#define VDUSE_ACCESS_RO 0x1
+#define VDUSE_ACCESS_WO 0x2
+#define VDUSE_ACCESS_RW 0x3
+ __u8 perm;
+};
+
+/*
+ * Find the first IOVA region that overlaps with the range [start, last]
+ * and return the corresponding file descriptor. Return -EINVAL means the
+ * IOVA region doesn't exist. Caller should set start and last fields.
+ */
+#define VDUSE_IOTLB_GET_FD _IOWR(VDUSE_BASE, 0x10, struct vduse_iotlb_entry)
+
+/*
+ * Get the negotiated virtio features. It's a subset of the features in
+ * struct vduse_dev_config which can be accepted by virtio driver. It's
+ * only valid after FEATURES_OK status bit is set.
+ */
+#define VDUSE_DEV_GET_FEATURES _IOR(VDUSE_BASE, 0x11, __u64)
+
+/**
+ * struct vduse_config_data - data used to update configuration space
+ * @offset: the offset from the beginning of configuration space
+ * @length: the length to write to configuration space
+ * @buffer: the buffer used to write from
+ *
+ * Structure used by VDUSE_DEV_SET_CONFIG ioctl to update device
+ * configuration space.
+ */
+struct vduse_config_data {
+ __u32 offset;
+ __u32 length;
+ __u8 buffer[];
+};
+
+/* Set device configuration space */
+#define VDUSE_DEV_SET_CONFIG _IOW(VDUSE_BASE, 0x12, struct vduse_config_data)
+
+/*
+ * Inject a config interrupt. It's usually used to notify virtio driver
+ * that device configuration space has changed.
+ */
+#define VDUSE_DEV_INJECT_CONFIG_IRQ _IO(VDUSE_BASE, 0x13)
+
+/**
+ * struct vduse_vq_config - basic configuration of a virtqueue
+ * @index: virtqueue index
+ * @max_size: the max size of virtqueue
+ * @reserved: for future use, needs to be initialized to zero
+ *
+ * Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue.
+ */
+struct vduse_vq_config {
+ __u32 index;
+ __u16 max_size;
+ __u16 reserved[13];
+};
+
+/*
+ * Setup the specified virtqueue. Make sure all virtqueues have been
+ * configured before the device is attached to vDPA bus.
+ */
+#define VDUSE_VQ_SETUP _IOW(VDUSE_BASE, 0x14, struct vduse_vq_config)
+
+/**
+ * struct vduse_vq_state_split - split virtqueue state
+ * @avail_index: available index
+ */
+struct vduse_vq_state_split {
+ __u16 avail_index;
+};
+
+/**
+ * struct vduse_vq_state_packed - packed virtqueue state
+ * @last_avail_counter: last driver ring wrap counter observed by device
+ * @last_avail_idx: device available index
+ * @last_used_counter: device ring wrap counter
+ * @last_used_idx: used index
+ */
+struct vduse_vq_state_packed {
+ __u16 last_avail_counter;
+ __u16 last_avail_idx;
+ __u16 last_used_counter;
+ __u16 last_used_idx;
+};
+
+/**
+ * struct vduse_vq_info - information of a virtqueue
+ * @index: virtqueue index
+ * @num: the size of virtqueue
+ * @desc_addr: address of desc area
+ * @driver_addr: address of driver area
+ * @device_addr: address of device area
+ * @split: split virtqueue state
+ * @packed: packed virtqueue state
+ * @ready: ready status of virtqueue
+ *
+ * Structure used by VDUSE_VQ_GET_INFO ioctl to get virtqueue's information.
+ */
+struct vduse_vq_info {
+ __u32 index;
+ __u32 num;
+ __u64 desc_addr;
+ __u64 driver_addr;
+ __u64 device_addr;
+ union {
+ struct vduse_vq_state_split split;
+ struct vduse_vq_state_packed packed;
+ };
+ __u8 ready;
+};
+
+/* Get the specified virtqueue's information. Caller should set index field. */
+#define VDUSE_VQ_GET_INFO _IOWR(VDUSE_BASE, 0x15, struct vduse_vq_info)
+
+/**
+ * struct vduse_vq_eventfd - eventfd configuration for a virtqueue
+ * @index: virtqueue index
+ * @fd: eventfd, -1 means de-assigning the eventfd
+ *
+ * Structure used by VDUSE_VQ_SETUP_KICKFD ioctl to setup kick eventfd.
+ */
+struct vduse_vq_eventfd {
+ __u32 index;
+#define VDUSE_EVENTFD_DEASSIGN -1
+ int fd;
+};
+
+/*
+ * Setup kick eventfd for specified virtqueue. The kick eventfd is used
+ * by VDUSE kernel module to notify userspace to consume the avail vring.
+ */
+#define VDUSE_VQ_SETUP_KICKFD _IOW(VDUSE_BASE, 0x16, struct vduse_vq_eventfd)
+
+/*
+ * Inject an interrupt for specific virtqueue. It's used to notify virtio driver
+ * to consume the used vring.
+ */
+#define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32)
+
+/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
+
+/**
+ * enum vduse_req_type - request type
+ * @VDUSE_GET_VQ_STATE: get the state for specified virtqueue from userspace
+ * @VDUSE_SET_STATUS: set the device status
+ * @VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for
+ * specified IOVA range via VDUSE_IOTLB_GET_FD ioctl
+ */
+enum vduse_req_type {
+ VDUSE_GET_VQ_STATE,
+ VDUSE_SET_STATUS,
+ VDUSE_UPDATE_IOTLB,
+};
+
+/**
+ * struct vduse_vq_state - virtqueue state
+ * @index: virtqueue index
+ * @split: split virtqueue state
+ * @packed: packed virtqueue state
+ */
+struct vduse_vq_state {
+ __u32 index;
+ union {
+ struct vduse_vq_state_split split;
+ struct vduse_vq_state_packed packed;
+ };
+};
+
+/**
+ * struct vduse_dev_status - device status
+ * @status: device status
+ */
+struct vduse_dev_status {
+ __u8 status;
+};
+
+/**
+ * struct vduse_iova_range - IOVA range [start, last]
+ * @start: start of the IOVA range
+ * @last: last of the IOVA range
+ */
+struct vduse_iova_range {
+ __u64 start;
+ __u64 last;
+};
+
+/**
+ * struct vduse_dev_request - control request
+ * @type: request type
+ * @request_id: request id
+ * @reserved: for future use
+ * @vq_state: virtqueue state, only index field is available
+ * @s: device status
+ * @iova: IOVA range for updating
+ * @padding: padding
+ *
+ * Structure used by read(2) on /dev/vduse/$NAME.
+ */
+struct vduse_dev_request {
+ __u32 type;
+ __u32 request_id;
+ __u32 reserved[4];
+ union {
+ struct vduse_vq_state vq_state;
+ struct vduse_dev_status s;
+ struct vduse_iova_range iova;
+ __u32 padding[32];
+ };
+};
+
+/**
+ * struct vduse_dev_response - response to control request
+ * @request_id: corresponding request id
+ * @result: the result of request
+ * @reserved: for future use, needs to be initialized to zero
+ * @vq_state: virtqueue state
+ * @padding: padding
+ *
+ * Structure used by write(2) on /dev/vduse/$NAME.
+ */
+struct vduse_dev_response {
+ __u32 request_id;
+#define VDUSE_REQ_RESULT_OK 0x00
+#define VDUSE_REQ_RESULT_FAILED 0x01
+ __u32 result;
+ __u32 reserved[4];
+ union {
+ struct vduse_vq_state vq_state;
+ __u32 padding[32];
+ };
+};
+
+#endif /* _UAPI_VDUSE_H_ */
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 50d352f5e86f..80d76b75bccd 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -54,9 +54,18 @@
#define VIRTIO_ID_SOUND 25 /* virtio sound */
#define VIRTIO_ID_FS 26 /* virtio filesystem */
#define VIRTIO_ID_PMEM 27 /* virtio pmem */
+#define VIRTIO_ID_RPMB 28 /* virtio rpmb */
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_VIDEO_ENCODER 30 /* virtio video encoder */
+#define VIRTIO_ID_VIDEO_DECODER 31 /* virtio video decoder */
#define VIRTIO_ID_SCMI 32 /* virtio SCMI */
+#define VIRTIO_ID_NITRO_SEC_MOD 33 /* virtio nitro secure module*/
#define VIRTIO_ID_I2C_ADAPTER 34 /* virtio i2c adapter */
+#define VIRTIO_ID_WATCHDOG 35 /* virtio watchdog */
+#define VIRTIO_ID_CAN 36 /* virtio can */
+#define VIRTIO_ID_DMABUF 37 /* virtio dmabuf */
+#define VIRTIO_ID_PARAM_SERV 38 /* virtio parameter server */
+#define VIRTIO_ID_AUDIO_POLICY 39 /* virtio audio policy */
#define VIRTIO_ID_BT 40 /* virtio bluetooth */
#define VIRTIO_ID_GPIO 41 /* virtio gpio */
diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
index 3dd3555b2740..64738838bee5 100644
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h
@@ -97,7 +97,8 @@ enum virtio_vsock_shutdown {
/* VIRTIO_VSOCK_OP_RW flags values */
enum virtio_vsock_rw {
- VIRTIO_VSOCK_SEQ_EOR = 1,
+ VIRTIO_VSOCK_SEQ_EOM = 1,
+ VIRTIO_VSOCK_SEQ_EOR = 2,
};
#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index b96c1ea7166d..eda0426ec4c2 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -213,13 +213,13 @@ enum {
XFRM_MSG_GETSPDINFO,
#define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO
+ XFRM_MSG_MAPPING,
+#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING
+
XFRM_MSG_SETDEFAULT,
#define XFRM_MSG_SETDEFAULT XFRM_MSG_SETDEFAULT
XFRM_MSG_GETDEFAULT,
#define XFRM_MSG_GETDEFAULT XFRM_MSG_GETDEFAULT
-
- XFRM_MSG_MAPPING,
-#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING
__XFRM_MSG_MAX
};
#define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1)
@@ -514,9 +514,12 @@ struct xfrm_user_offload {
#define XFRM_OFFLOAD_INBOUND 2
struct xfrm_userpolicy_default {
-#define XFRM_USERPOLICY_DIRMASK_MAX (sizeof(__u8) * 8)
- __u8 dirmask;
- __u8 action;
+#define XFRM_USERPOLICY_UNSPEC 0
+#define XFRM_USERPOLICY_BLOCK 1
+#define XFRM_USERPOLICY_ACCEPT 2
+ __u8 in;
+ __u8 fwd;
+ __u8 out;
};
#ifndef __KERNEL__
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index a47a731e4527..d13bb8c1b450 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -276,7 +276,17 @@ enum hl_device_status {
HL_DEVICE_STATUS_OPERATIONAL,
HL_DEVICE_STATUS_IN_RESET,
HL_DEVICE_STATUS_MALFUNCTION,
- HL_DEVICE_STATUS_NEEDS_RESET
+ HL_DEVICE_STATUS_NEEDS_RESET,
+ HL_DEVICE_STATUS_IN_DEVICE_CREATION,
+ HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION
+};
+
+enum hl_server_type {
+ HL_SERVER_TYPE_UNKNOWN = 0,
+ HL_SERVER_GAUDI_HLS1 = 1,
+ HL_SERVER_GAUDI_HLS1H = 2,
+ HL_SERVER_GAUDI_TYPE1 = 3,
+ HL_SERVER_GAUDI_TYPE2 = 4
};
/* Opcode for management ioctl
@@ -337,17 +347,49 @@ enum hl_device_status {
#define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16
+/**
+ * struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC
+ * @sram_base_address: The first SRAM physical base address that is free to be
+ * used by the user.
+ * @dram_base_address: The first DRAM virtual or physical base address that is
+ * free to be used by the user.
+ * @dram_size: The DRAM size that is available to the user.
+ * @sram_size: The SRAM size that is available to the user.
+ * @num_of_events: The number of events that can be received from the f/w. This
+ * is needed so the user can what is the size of the h/w events
+ * array he needs to pass to the kernel when he wants to fetch
+ * the event counters.
+ * @device_id: PCI device ID of the ASIC.
+ * @module_id: Module ID of the ASIC for mezzanine cards in servers
+ * (From OCP spec).
+ * @first_available_interrupt_id: The first available interrupt ID for the user
+ * to be used when it works with user interrupts.
+ * @server_type: Server type that the Gaudi ASIC is currently installed in.
+ * The value is according to enum hl_server_type
+ * @cpld_version: CPLD version on the board.
+ * @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs.
+ * @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs.
+ * @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs.
+ * @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler
+ * in some ASICs.
+ * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant
+ * for Goya/Gaudi only.
+ * @dram_enabled: Whether the DRAM is enabled.
+ * @cpucp_version: The CPUCP f/w version.
+ * @card_name: The card name as passed by the f/w.
+ * @dram_page_size: The DRAM physical page size.
+ */
struct hl_info_hw_ip_info {
__u64 sram_base_address;
__u64 dram_base_address;
__u64 dram_size;
__u32 sram_size;
__u32 num_of_events;
- __u32 device_id; /* PCI Device ID */
- __u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */
+ __u32 device_id;
+ __u32 module_id;
__u32 reserved;
__u16 first_available_interrupt_id;
- __u16 reserved2;
+ __u16 server_type;
__u32 cpld_version;
__u32 psoc_pci_pll_nr;
__u32 psoc_pci_pll_nf;
@@ -358,7 +400,7 @@ struct hl_info_hw_ip_info {
__u8 pad[2];
__u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
- __u64 reserved3;
+ __u64 reserved2;
__u64 dram_page_size;
};
@@ -628,12 +670,21 @@ struct hl_cs_chunk {
__u64 cb_handle;
/* Relevant only when HL_CS_FLAGS_WAIT or
- * HL_CS_FLAGS_COLLECTIVE_WAIT is set.
+ * HL_CS_FLAGS_COLLECTIVE_WAIT is set
* This holds address of array of u64 values that contain
- * signal CS sequence numbers. The wait described by this job
- * will listen on all those signals (wait event per signal)
+ * signal CS sequence numbers. The wait described by
+ * this job will listen on all those signals
+ * (wait event per signal)
*/
__u64 signal_seq_arr;
+
+ /*
+ * Relevant only when HL_CS_FLAGS_WAIT or
+ * HL_CS_FLAGS_COLLECTIVE_WAIT is set
+ * along with HL_CS_FLAGS_ENCAP_SIGNALS.
+ * This is the CS sequence which has the encapsulated signals.
+ */
+ __u64 encaps_signal_seq;
};
/* Index of queue to put the CB on */
@@ -651,6 +702,17 @@ struct hl_cs_chunk {
* Number of entries in signal_seq_arr
*/
__u32 num_signal_seq_arr;
+
+ /* Relevant only when HL_CS_FLAGS_WAIT or
+ * HL_CS_FLAGS_COLLECTIVE_WAIT is set along
+ * with HL_CS_FLAGS_ENCAP_SIGNALS
+ * This set the signals range that the user want to wait for
+ * out of the whole reserved signals range.
+ * e.g if the signals range is 20, and user don't want
+ * to wait for signal 8, so he set this offset to 7, then
+ * he call the API again with 9 and so on till 20.
+ */
+ __u32 encaps_signal_offset;
};
/* HL_CS_CHUNK_FLAGS_* */
@@ -678,6 +740,28 @@ struct hl_cs_chunk {
#define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200
#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400
+/*
+ * The encapsulated signals CS is merged into the existing CS ioctls.
+ * In order to use this feature need to follow the below procedure:
+ * 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY
+ * the output of this API will be the SOB offset from CFG_BASE.
+ * this address will be used to patch CB cmds to do the signaling for this
+ * SOB by incrementing it's value.
+ * for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY
+ * CS type, note that this might fail if out-of-sync happened to the SOB
+ * value, in case other signaling request to the same SOB occurred between
+ * reserve-unreserve calls.
+ * 2. Use the staged CS to do the encapsulated signaling jobs.
+ * use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
+ * along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset
+ * field. This offset allows app to wait on part of the reserved signals.
+ * 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag
+ * to wait for the encapsulated signals.
+ */
+#define HL_CS_FLAGS_ENCAP_SIGNALS 0x800
+#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000
+#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000
+
#define HL_CS_STATUS_SUCCESS 0
#define HL_MAX_JOBS_PER_CS 512
@@ -690,10 +774,35 @@ struct hl_cs_in {
/* holds address of array of hl_cs_chunk for execution phase */
__u64 chunks_execute;
- /* Sequence number of a staged submission CS
- * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
- */
- __u64 seq;
+ union {
+ /*
+ * Sequence number of a staged submission CS
+ * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and
+ * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset.
+ */
+ __u64 seq;
+
+ /*
+ * Encapsulated signals handle id
+ * Valid for two flows:
+ * 1. CS with encapsulated signals:
+ * when HL_CS_FLAGS_STAGED_SUBMISSION and
+ * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
+ * and HL_CS_FLAGS_ENCAP_SIGNALS are set.
+ * 2. unreserve signals:
+ * valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set.
+ */
+ __u32 encaps_sig_handle_id;
+
+ /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
+ struct {
+ /* Encapsulated signals number */
+ __u32 encaps_signals_count;
+
+ /* Encapsulated signals queue index (stream) */
+ __u32 encaps_signals_q_idx;
+ };
+ };
/* Number of chunks in restore phase array. Maximum number is
* HL_MAX_JOBS_PER_CS
@@ -718,14 +827,31 @@ struct hl_cs_in {
};
struct hl_cs_out {
+ union {
+ /*
+ * seq holds the sequence number of the CS to pass to wait
+ * ioctl. All values are valid except for 0 and ULLONG_MAX
+ */
+ __u64 seq;
+
+ /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
+ struct {
+ /* This is the resereved signal handle id */
+ __u32 handle_id;
+
+ /* This is the signals count */
+ __u32 count;
+ };
+ };
+
+ /* HL_CS_STATUS */
+ __u32 status;
+
/*
- * seq holds the sequence number of the CS to pass to wait ioctl. All
- * values are valid except for 0 and ULLONG_MAX
+ * SOB base address offset
+ * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set
*/
- __u64 seq;
- /* HL_CS_STATUS_* */
- __u32 status;
- __u32 pad;
+ __u32 sob_base_addr_offset;
};
union hl_cs_args {
@@ -735,11 +861,18 @@ union hl_cs_args {
#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2
#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
+#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4
+
+#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32
struct hl_wait_cs_in {
union {
struct {
- /* Command submission sequence number */
+ /*
+ * In case of wait_cs holds the CS sequence number.
+ * In case of wait for multi CS hold a user pointer to
+ * an array of CS sequence numbers
+ */
__u64 seq;
/* Absolute timeout to wait for command submission
* in microseconds
@@ -767,19 +900,23 @@ struct hl_wait_cs_in {
/* Context ID - Currently not in use */
__u32 ctx_id;
+
/* HL_WAIT_CS_FLAGS_*
* If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include
* interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK, in order
* not to specify an interrupt id ,set mask to all 1s.
*/
__u32 flags;
+
+ /* Multi CS API info- valid entries in multi-CS array */
+ __u8 seq_arr_len;
+ __u8 pad[7];
};
#define HL_WAIT_CS_STATUS_COMPLETED 0
#define HL_WAIT_CS_STATUS_BUSY 1
#define HL_WAIT_CS_STATUS_TIMEDOUT 2
#define HL_WAIT_CS_STATUS_ABORTED 3
-#define HL_WAIT_CS_STATUS_INTERRUPTED 4
#define HL_WAIT_CS_STATUS_FLAG_GONE 0x1
#define HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD 0x2
@@ -789,8 +926,15 @@ struct hl_wait_cs_out {
__u32 status;
/* HL_WAIT_CS_STATUS_FLAG* */
__u32 flags;
- /* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set */
+ /*
+ * valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set
+ * for wait_cs: timestamp of CS completion
+ * for wait_multi_cs: timestamp of FIRST CS completion
+ */
__s64 timestamp_nsec;
+ /* multi CS completion bitmap */
+ __u32 cs_completion_map;
+ __u32 pad;
};
union hl_wait_cs_args {
@@ -813,6 +957,7 @@ union hl_wait_cs_args {
#define HL_MEM_CONTIGUOUS 0x1
#define HL_MEM_SHARED 0x2
#define HL_MEM_USERPTR 0x4
+#define HL_MEM_FORCE_HINT 0x8
struct hl_mem_in {
union {
@@ -1140,7 +1285,8 @@ struct hl_debug_args {
* EIO - The CS was aborted (usually because the device was reset)
* ENODEV - The device wants to do hard-reset (so user need to close FD)
*
- * The driver also returns a custom define inside the IOCTL which can be:
+ * The driver also returns a custom define in case the IOCTL call returned 0.
+ * The define can be one of the following:
*
* HL_WAIT_CS_STATUS_COMPLETED - The CS has been completed successfully (0)
* HL_WAIT_CS_STATUS_BUSY - The CS is still executing (0)
@@ -1148,8 +1294,6 @@ struct hl_debug_args {
* (ETIMEDOUT)
* HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the
* device was reset (EIO)
- * HL_WAIT_CS_STATUS_INTERRUPTED - Waiting for the CS was interrupted (EINTR)
- *
*/
#define HL_IOCTL_WAIT_CS \
diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 1d84ec9db93b..5859ca0a1439 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -784,6 +784,7 @@ struct snd_rawmidi_status {
#define SNDRV_RAWMIDI_IOCTL_PVERSION _IOR('W', 0x00, int)
#define SNDRV_RAWMIDI_IOCTL_INFO _IOR('W', 0x01, struct snd_rawmidi_info)
+#define SNDRV_RAWMIDI_IOCTL_USER_PVERSION _IOW('W', 0x02, int)
#define SNDRV_RAWMIDI_IOCTL_PARAMS _IOWR('W', 0x10, struct snd_rawmidi_params)
#define SNDRV_RAWMIDI_IOCTL_STATUS _IOWR('W', 0x20, struct snd_rawmidi_status)
#define SNDRV_RAWMIDI_IOCTL_DROP _IOW('W', 0x30, int)
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 39a5580f8feb..a3584a357f35 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -46,30 +46,18 @@ extern unsigned long *xen_contiguous_bitmap;
int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
unsigned int address_bits,
dma_addr_t *dma_handle);
-
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
-#else
-static inline int xen_create_contiguous_region(phys_addr_t pstart,
- unsigned int order,
- unsigned int address_bits,
- dma_addr_t *dma_handle)
-{
- return 0;
-}
-
-static inline void xen_destroy_contiguous_region(phys_addr_t pstart,
- unsigned int order) { }
#endif
#if defined(CONFIG_XEN_PV)
int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
- unsigned int domid, bool no_translate, struct page **pages);
+ unsigned int domid, bool no_translate);
#else
static inline int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
xen_pfn_t *pfn, int nr, int *err_ptr,
pgprot_t prot, unsigned int domid,
- bool no_translate, struct page **pages)
+ bool no_translate)
{
BUG();
return 0;
@@ -146,7 +134,7 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
*/
BUG_ON(err_ptr == NULL);
return xen_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid,
- false, pages);
+ false);
}
/*
@@ -158,7 +146,6 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
* @err_ptr: Returns per-MFN error status.
* @prot: page protection mask
* @domid: Domain owning the pages
- * @pages: Array of pages if this domain has an auto-translated physmap
*
* @mfn and @err_ptr may point to the same buffer, the MFNs will be
* overwritten by the error codes after they are mapped.
@@ -169,14 +156,13 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
static inline int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
unsigned long addr, xen_pfn_t *mfn,
int nr, int *err_ptr,
- pgprot_t prot, unsigned int domid,
- struct page **pages)
+ pgprot_t prot, unsigned int domid)
{
if (xen_feature(XENFEAT_auto_translated_physmap))
return -EOPNOTSUPP;
return xen_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid,
- true, pages);
+ true);
}
/* xen_remap_domain_gfn_range() - map a range of foreign frames
@@ -200,8 +186,7 @@ static inline int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
if (xen_feature(XENFEAT_auto_translated_physmap))
return -EOPNOTSUPP;
- return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false,
- pages);
+ return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false);
}
int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,