diff options
Diffstat (limited to 'lib')
36 files changed, 1610 insertions, 596 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index e96089499371..7a913937888b 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -55,6 +55,22 @@ config ARCH_USE_CMPXCHG_LOCKREF config ARCH_HAS_FAST_MULTIPLIER bool +config INDIRECT_PIO + bool "Access I/O in non-MMIO mode" + depends on ARM64 + help + On some platforms where no separate I/O space exists, there are I/O + hosts which can not be accessed in MMIO mode. Using the logical PIO + mechanism, the host-local I/O resource can be mapped into system + logic PIO space shared with MMIO hosts, such as PCI/PCIe, then the + system can access the I/O devices with the mapped-logic PIO through + I/O accessors. + + This way has relatively little I/O performance cost. Please make + sure your devices really need this configure item enabled. + + When in doubt, say N. + config CRC_CCITT tristate "CRC-CCITT functions" help @@ -413,15 +429,50 @@ config SGL_ALLOC bool default n +config NEED_SG_DMA_LENGTH + bool + +config NEED_DMA_MAP_STATE + bool + +config ARCH_DMA_ADDR_T_64BIT + def_bool 64BIT || PHYS_ADDR_T_64BIT + +config IOMMU_HELPER + bool + +config ARCH_HAS_SYNC_DMA_FOR_DEVICE + bool + +config ARCH_HAS_SYNC_DMA_FOR_CPU + bool + select NEED_DMA_MAP_STATE + config DMA_DIRECT_OPS bool - depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT) - default n + depends on HAS_DMA + +config DMA_NONCOHERENT_OPS + bool + depends on HAS_DMA + select DMA_DIRECT_OPS + +config DMA_NONCOHERENT_MMAP + bool + depends on DMA_NONCOHERENT_OPS + +config DMA_NONCOHERENT_CACHE_SYNC + bool + depends on DMA_NONCOHERENT_OPS config DMA_VIRT_OPS bool - depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT) - default n + depends on HAS_DMA + +config SWIOTLB + bool + select DMA_DIRECT_OPS + select NEED_DMA_MAP_STATE config CHECK_SIGNATURE bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 51c6bf0d93c6..76555479ae36 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -800,6 +800,30 @@ config SOFTLOCKUP_DETECTOR chance to run. The current stack trace is displayed upon detection and the system will stay locked up. +config BOOTPARAM_SOFTLOCKUP_PANIC + bool "Panic (Reboot) On Soft Lockups" + depends on SOFTLOCKUP_DETECTOR + help + Say Y here to enable the kernel to panic on "soft lockups", + which are bugs that cause the kernel to loop in kernel + mode for more than 20 seconds (configurable using the watchdog_thresh + sysctl), without giving other tasks a chance to run. + + The panic can be used in combination with panic_timeout, + to cause the system to reboot automatically after a + lockup has been detected. This feature is useful for + high-availability systems that have uptime guarantees and + where a lockup must be resolved ASAP. + + Say N if unsure. + +config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE + int + depends on SOFTLOCKUP_DETECTOR + range 0 1 + default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC + default 1 if BOOTPARAM_SOFTLOCKUP_PANIC + config HARDLOCKUP_DETECTOR_PERF bool select SOFTLOCKUP_DETECTOR @@ -849,30 +873,6 @@ config BOOTPARAM_HARDLOCKUP_PANIC_VALUE default 0 if !BOOTPARAM_HARDLOCKUP_PANIC default 1 if BOOTPARAM_HARDLOCKUP_PANIC -config BOOTPARAM_SOFTLOCKUP_PANIC - bool "Panic (Reboot) On Soft Lockups" - depends on SOFTLOCKUP_DETECTOR - help - Say Y here to enable the kernel to panic on "soft lockups", - which are bugs that cause the kernel to loop in kernel - mode for more than 20 seconds (configurable using the watchdog_thresh - sysctl), without giving other tasks a chance to run. - - The panic can be used in combination with panic_timeout, - to cause the system to reboot automatically after a - lockup has been detected. This feature is useful for - high-availability systems that have uptime guarantees and - where a lockup must be resolved ASAP. - - Say N if unsure. - -config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE - int - depends on SOFTLOCKUP_DETECTOR - range 0 1 - default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC - default 1 if BOOTPARAM_SOFTLOCKUP_PANIC - config DETECT_HUNG_TASK bool "Detect Hung Tasks" depends on DEBUG_KERNEL @@ -1634,7 +1634,7 @@ config PROVIDE_OHCI1394_DMA_INIT config DMA_API_DEBUG bool "Enable debugging of DMA-API usage" - depends on HAVE_DMA_API_DEBUG + select NEED_DMA_MAP_STATE help Enable this option to debug the use of the DMA API by device drivers. With this option you will be able to detect common bugs in device @@ -1651,6 +1651,23 @@ config DMA_API_DEBUG If unsure, say N. +config DMA_API_DEBUG_SG + bool "Debug DMA scatter-gather usage" + default y + depends on DMA_API_DEBUG + help + Perform extra checking that callers of dma_map_sg() have respected the + appropriate segment length/boundary limits for the given device when + preparing DMA scatterlists. + + This is particularly likely to have been overlooked in cases where the + dma_map_sg() API is used for general bulk mapping of pages rather than + preparing literal scatter-gather descriptors, where there is a risk of + unexpected behaviour from DMA API implementations if the scatterlist + is technically out-of-spec. + + If unsure, say N. + menuconfig RUNTIME_TESTING_MENU bool "Runtime Testing" def_bool y diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index a669c193b878..19d42ea75ec2 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -46,3 +46,10 @@ config UBSAN_NULL help This option enables detection of memory accesses via a null pointer. + +config TEST_UBSAN + tristate "Module for testing for undefined behavior detection" + depends on m && UBSAN + help + This is a test module for UBSAN. + It triggers various undefined behavior, and detect it. diff --git a/lib/Makefile b/lib/Makefile index 0bd50d71f423..9f18c8152281 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -30,6 +30,7 @@ lib-$(CONFIG_PRINTK) += dump_stack.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o +lib-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o lib-y += kobject.o klist.o @@ -53,6 +54,9 @@ obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o obj-$(CONFIG_TEST_KASAN) += test_kasan.o +CFLAGS_test_kasan.o += -fno-builtin +obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o +UBSAN_SANITIZE_test_ubsan.o := y obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o obj-$(CONFIG_TEST_LKM) += test_module.o @@ -82,6 +86,8 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o +obj-y += logic_pio.o + obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_BTREE) += btree.o @@ -142,7 +148,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o -obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o +obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o diff --git a/lib/bitmap.c b/lib/bitmap.c index 9e498c77ed0e..a42eff7e8c48 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -607,7 +607,7 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, /* if no digit is after '-', it's wrong*/ if (at_start && in_range) return -EINVAL; - if (!(a <= b) || !(used_size <= group_size)) + if (!(a <= b) || group_size == 0 || !(used_size <= group_size)) return -EINVAL; if (b >= nmaskbits) return -ERANGE; diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 7f5cdc1e6b29..c007d25bee09 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -41,6 +41,11 @@ #define HASH_FN_SHIFT 13 #define HASH_FN_MASK (HASH_SIZE - 1) +/* allow architectures to override this if absolutely required */ +#ifndef PREALLOC_DMA_DEBUG_ENTRIES +#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) +#endif + enum { dma_debug_single, dma_debug_page, @@ -127,7 +132,7 @@ static u32 min_free_entries; static u32 nr_total_entries; /* number of preallocated entries requested by kernel cmdline */ -static u32 req_entries; +static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; /* debugfs dentry's for the stuff above */ static struct dentry *dma_debug_dent __read_mostly; @@ -439,7 +444,6 @@ void debug_dma_dump_mappings(struct device *dev) spin_unlock_irqrestore(&bucket->lock, flags); } } -EXPORT_SYMBOL(debug_dma_dump_mappings); /* * For each mapping (initial cacheline in the case of @@ -748,7 +752,6 @@ int dma_debug_resize_entries(u32 num_entries) return ret; } -EXPORT_SYMBOL(dma_debug_resize_entries); /* * DMA-API debugging init code @@ -1004,10 +1007,7 @@ void dma_debug_add_bus(struct bus_type *bus) bus_register_notifier(bus, nb); } -/* - * Let the architectures decide how many entries should be preallocated. - */ -void dma_debug_init(u32 num_entries) +static int dma_debug_init(void) { int i; @@ -1015,7 +1015,7 @@ void dma_debug_init(u32 num_entries) * called to set dma_debug_initialized */ if (global_disable) - return; + return 0; for (i = 0; i < HASH_SIZE; ++i) { INIT_LIST_HEAD(&dma_entry_hash[i].list); @@ -1026,17 +1026,14 @@ void dma_debug_init(u32 num_entries) pr_err("DMA-API: error creating debugfs entries - disabling\n"); global_disable = true; - return; + return 0; } - if (req_entries) - num_entries = req_entries; - - if (prealloc_memory(num_entries) != 0) { + if (prealloc_memory(nr_prealloc_entries) != 0) { pr_err("DMA-API: debugging out of memory error - disabled\n"); global_disable = true; - return; + return 0; } nr_total_entries = num_free_entries; @@ -1044,7 +1041,9 @@ void dma_debug_init(u32 num_entries) dma_debug_initialized = true; pr_info("DMA-API: debugging enabled by kernel config\n"); + return 0; } +core_initcall(dma_debug_init); static __init int dma_debug_cmdline(char *str) { @@ -1061,16 +1060,10 @@ static __init int dma_debug_cmdline(char *str) static __init int dma_debug_entries_cmdline(char *str) { - int res; - if (!str) return -EINVAL; - - res = get_option(&str, &req_entries); - - if (!res) - req_entries = 0; - + if (!get_option(&str, &nr_prealloc_entries)) + nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; return 0; } @@ -1293,6 +1286,32 @@ out: put_hash_bucket(bucket, &flags); } +static void check_sg_segment(struct device *dev, struct scatterlist *sg) +{ +#ifdef CONFIG_DMA_API_DEBUG_SG + unsigned int max_seg = dma_get_max_seg_size(dev); + u64 start, end, boundary = dma_get_seg_boundary(dev); + + /* + * Either the driver forgot to set dma_parms appropriately, or + * whoever generated the list forgot to check them. + */ + if (sg->length > max_seg) + err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n", + sg->length, max_seg); + /* + * In some cases this could potentially be the DMA API + * implementation's fault, but it would usually imply that + * the scatterlist was built inappropriately to begin with. + */ + start = sg_dma_address(sg); + end = start + sg_dma_len(sg) - 1; + if ((start ^ end) & ~boundary) + err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n", + start, end, boundary); +#endif +} + void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, bool map_single) @@ -1423,6 +1442,8 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); } + check_sg_segment(dev, s); + add_dma_entry(entry); } } diff --git a/lib/dma-direct.c b/lib/dma-direct.c index c0bba30fef0a..8be8106270c2 100644 --- a/lib/dma-direct.c +++ b/lib/dma-direct.c @@ -34,6 +34,13 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, const char *caller) { if (unlikely(dev && !dma_capable(dev, dma_addr, size))) { + if (!dev->dma_mask) { + dev_err(dev, + "%s: call on device without dma_mask\n", + caller); + return false; + } + if (*dev->dma_mask >= DMA_BIT_MASK(32)) { dev_err(dev, "%s: overflow %pad+%zu of device mask %llx\n", @@ -84,7 +91,15 @@ again: __free_pages(page, page_order); page = NULL; - if (dev->coherent_dma_mask < DMA_BIT_MASK(32) && + if (IS_ENABLED(CONFIG_ZONE_DMA32) && + dev->coherent_dma_mask < DMA_BIT_MASK(64) && + !(gfp & (GFP_DMA32 | GFP_DMA))) { + gfp |= GFP_DMA32; + goto again; + } + + if (IS_ENABLED(CONFIG_ZONE_DMA) && + dev->coherent_dma_mask < DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) { gfp = (gfp & ~GFP_DMA32) | GFP_DMA; goto again; @@ -120,7 +135,7 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, free_pages((unsigned long)cpu_addr, page_order); } -static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, +dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { @@ -131,8 +146,8 @@ static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, return dma_addr; } -static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) +int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) { int i; struct scatterlist *sg; @@ -164,10 +179,16 @@ int dma_direct_supported(struct device *dev, u64 mask) if (mask < DMA_BIT_MASK(32)) return 0; #endif + /* + * Various PCI/PCIe bridges have broken support for > 32bit DMA even + * if the device itself might support it. + */ + if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32)) + return 0; return 1; } -static int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) +int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) { return dma_addr == DIRECT_MAPPING_ERROR; } @@ -179,6 +200,5 @@ const struct dma_map_ops dma_direct_ops = { .map_sg = dma_direct_map_sg, .dma_supported = dma_direct_supported, .mapping_error = dma_direct_mapping_error, - .is_phys = 1, }; EXPORT_SYMBOL(dma_direct_ops); diff --git a/lib/dma-noncoherent.c b/lib/dma-noncoherent.c new file mode 100644 index 000000000000..79e9a757387f --- /dev/null +++ b/lib/dma-noncoherent.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 Christoph Hellwig. + * + * DMA operations that map physical memory directly without providing cache + * coherence. + */ +#include <linux/export.h> +#include <linux/mm.h> +#include <linux/dma-direct.h> +#include <linux/dma-noncoherent.h> +#include <linux/scatterlist.h> + +static void dma_noncoherent_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir); +} + +static void dma_noncoherent_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); +} + +static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + dma_addr_t addr; + + addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); + if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(dev, page_to_phys(page) + offset, + size, dir); + return addr; +} + +static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs); + if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir); + return nents; +} + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU +static void dma_noncoherent_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir); +} + +static void dma_noncoherent_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); +} + +static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir); +} + +static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir); +} +#endif + +const struct dma_map_ops dma_noncoherent_ops = { + .alloc = arch_dma_alloc, + .free = arch_dma_free, + .mmap = arch_dma_mmap, + .sync_single_for_device = dma_noncoherent_sync_single_for_device, + .sync_sg_for_device = dma_noncoherent_sync_sg_for_device, + .map_page = dma_noncoherent_map_page, + .map_sg = dma_noncoherent_map_sg, +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU + .sync_single_for_cpu = dma_noncoherent_sync_single_for_cpu, + .sync_sg_for_cpu = dma_noncoherent_sync_sg_for_cpu, + .unmap_page = dma_noncoherent_unmap_page, + .unmap_sg = dma_noncoherent_unmap_sg, +#endif + .dma_supported = dma_direct_supported, + .mapping_error = dma_direct_mapping_error, + .cache_sync = arch_dma_cache_sync, +}; +EXPORT_SYMBOL(dma_noncoherent_ops); diff --git a/lib/errseq.c b/lib/errseq.c index df782418b333..81f9e33aa7e7 100644 --- a/lib/errseq.c +++ b/lib/errseq.c @@ -111,27 +111,22 @@ EXPORT_SYMBOL(errseq_set); * errseq_sample() - Grab current errseq_t value. * @eseq: Pointer to errseq_t to be sampled. * - * This function allows callers to sample an errseq_t value, marking it as - * "seen" if required. + * This function allows callers to initialise their errseq_t variable. + * If the error has been "seen", new callers will not see an old error. + * If there is an unseen error in @eseq, the caller of this function will + * see it the next time it checks for an error. * + * Context: Any context. * Return: The current errseq value. */ errseq_t errseq_sample(errseq_t *eseq) { errseq_t old = READ_ONCE(*eseq); - errseq_t new = old; - /* - * For the common case of no errors ever having been set, we can skip - * marking the SEEN bit. Once an error has been set, the value will - * never go back to zero. - */ - if (old != 0) { - new |= ERRSEQ_SEEN; - if (old != new) - cmpxchg(eseq, old, new); - } - return new; + /* If nobody has seen this error yet, then we can be the first. */ + if (!(old & ERRSEQ_SEEN)) + old = 0; + return old; } EXPORT_SYMBOL(errseq_sample); diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index 5985a25e6cbc..5367ffa5c18f 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c @@ -132,7 +132,12 @@ static int __init find_bit_test(void) test_find_next_bit(bitmap, BITMAP_LEN); test_find_next_zero_bit(bitmap, BITMAP_LEN); test_find_last_bit(bitmap, BITMAP_LEN); - test_find_first_bit(bitmap, BITMAP_LEN); + + /* + * test_find_first_bit() may take some time, so + * traverse only part of bitmap to avoid soft lockup. + */ + test_find_first_bit(bitmap, BITMAP_LEN / 10); test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); pr_err("\nStart testing find_bit() with sparse bitmap\n"); diff --git a/lib/iommu-common.c b/lib/iommu-common.c deleted file mode 100644 index 55b00de106b5..000000000000 --- a/lib/iommu-common.c +++ /dev/null @@ -1,267 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * IOMMU mmap management and range allocation functions. - * Based almost entirely upon the powerpc iommu allocator. - */ - -#include <linux/export.h> -#include <linux/bitmap.h> -#include <linux/bug.h> -#include <linux/iommu-helper.h> -#include <linux/iommu-common.h> -#include <linux/dma-mapping.h> -#include <linux/hash.h> - -static unsigned long iommu_large_alloc = 15; - -static DEFINE_PER_CPU(unsigned int, iommu_hash_common); - -static inline bool need_flush(struct iommu_map_table *iommu) -{ - return ((iommu->flags & IOMMU_NEED_FLUSH) != 0); -} - -static inline void set_flush(struct iommu_map_table *iommu) -{ - iommu->flags |= IOMMU_NEED_FLUSH; -} - -static inline void clear_flush(struct iommu_map_table *iommu) -{ - iommu->flags &= ~IOMMU_NEED_FLUSH; -} - -static void setup_iommu_pool_hash(void) -{ - unsigned int i; - static bool do_once; - - if (do_once) - return; - do_once = true; - for_each_possible_cpu(i) - per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS); -} - -/* - * Initialize iommu_pool entries for the iommu_map_table. `num_entries' - * is the number of table entries. If `large_pool' is set to true, - * the top 1/4 of the table will be set aside for pool allocations - * of more than iommu_large_alloc pages. - */ -void iommu_tbl_pool_init(struct iommu_map_table *iommu, - unsigned long num_entries, - u32 table_shift, - void (*lazy_flush)(struct iommu_map_table *), - bool large_pool, u32 npools, - bool skip_span_boundary_check) -{ - unsigned int start, i; - struct iommu_pool *p = &(iommu->large_pool); - - setup_iommu_pool_hash(); - if (npools == 0) - iommu->nr_pools = IOMMU_NR_POOLS; - else - iommu->nr_pools = npools; - BUG_ON(npools > IOMMU_NR_POOLS); - - iommu->table_shift = table_shift; - iommu->lazy_flush = lazy_flush; - start = 0; - if (skip_span_boundary_check) - iommu->flags |= IOMMU_NO_SPAN_BOUND; - if (large_pool) - iommu->flags |= IOMMU_HAS_LARGE_POOL; - - if (!large_pool) - iommu->poolsize = num_entries/iommu->nr_pools; - else - iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools; - for (i = 0; i < iommu->nr_pools; i++) { - spin_lock_init(&(iommu->pools[i].lock)); - iommu->pools[i].start = start; - iommu->pools[i].hint = start; - start += iommu->poolsize; /* start for next pool */ - iommu->pools[i].end = start - 1; - } - if (!large_pool) - return; - /* initialize large_pool */ - spin_lock_init(&(p->lock)); - p->start = start; - p->hint = p->start; - p->end = num_entries; -} -EXPORT_SYMBOL(iommu_tbl_pool_init); - -unsigned long iommu_tbl_range_alloc(struct device *dev, - struct iommu_map_table *iommu, - unsigned long npages, - unsigned long *handle, - unsigned long mask, - unsigned int align_order) -{ - unsigned int pool_hash = __this_cpu_read(iommu_hash_common); - unsigned long n, end, start, limit, boundary_size; - struct iommu_pool *pool; - int pass = 0; - unsigned int pool_nr; - unsigned int npools = iommu->nr_pools; - unsigned long flags; - bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0); - bool largealloc = (large_pool && npages > iommu_large_alloc); - unsigned long shift; - unsigned long align_mask = 0; - - if (align_order > 0) - align_mask = ~0ul >> (BITS_PER_LONG - align_order); - - /* Sanity check */ - if (unlikely(npages == 0)) { - WARN_ON_ONCE(1); - return IOMMU_ERROR_CODE; - } - - if (largealloc) { - pool = &(iommu->large_pool); - pool_nr = 0; /* to keep compiler happy */ - } else { - /* pick out pool_nr */ - pool_nr = pool_hash & (npools - 1); - pool = &(iommu->pools[pool_nr]); - } - spin_lock_irqsave(&pool->lock, flags); - - again: - if (pass == 0 && handle && *handle && - (*handle >= pool->start) && (*handle < pool->end)) - start = *handle; - else - start = pool->hint; - - limit = pool->end; - - /* The case below can happen if we have a small segment appended - * to a large, or when the previous alloc was at the very end of - * the available space. If so, go back to the beginning. If a - * flush is needed, it will get done based on the return value - * from iommu_area_alloc() below. - */ - if (start >= limit) - start = pool->start; - shift = iommu->table_map_base >> iommu->table_shift; - if (limit + shift > mask) { - limit = mask - shift + 1; - /* If we're constrained on address range, first try - * at the masked hint to avoid O(n) search complexity, - * but on second pass, start at 0 in pool 0. - */ - if ((start & mask) >= limit || pass > 0) { - spin_unlock(&(pool->lock)); - pool = &(iommu->pools[0]); - spin_lock(&(pool->lock)); - start = pool->start; - } else { - start &= mask; - } - } - - if (dev) - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - 1 << iommu->table_shift); - else - boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift); - - boundary_size = boundary_size >> iommu->table_shift; - /* - * if the skip_span_boundary_check had been set during init, we set - * things up so that iommu_is_span_boundary() merely checks if the - * (index + npages) < num_tsb_entries - */ - if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) { - shift = 0; - boundary_size = iommu->poolsize * iommu->nr_pools; - } - n = iommu_area_alloc(iommu->map, limit, start, npages, shift, - boundary_size, align_mask); - if (n == -1) { - if (likely(pass == 0)) { - /* First failure, rescan from the beginning. */ - pool->hint = pool->start; - set_flush(iommu); - pass++; - goto again; - } else if (!largealloc && pass <= iommu->nr_pools) { - spin_unlock(&(pool->lock)); - pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1); - pool = &(iommu->pools[pool_nr]); - spin_lock(&(pool->lock)); - pool->hint = pool->start; - set_flush(iommu); - pass++; - goto again; - } else { - /* give up */ - n = IOMMU_ERROR_CODE; - goto bail; - } - } - if (iommu->lazy_flush && - (n < pool->hint || need_flush(iommu))) { - clear_flush(iommu); - iommu->lazy_flush(iommu); - } - - end = n + npages; - pool->hint = end; - - /* Update handle for SG allocations */ - if (handle) - *handle = end; -bail: - spin_unlock_irqrestore(&(pool->lock), flags); - - return n; -} -EXPORT_SYMBOL(iommu_tbl_range_alloc); - -static struct iommu_pool *get_pool(struct iommu_map_table *tbl, - unsigned long entry) -{ - struct iommu_pool *p; - unsigned long largepool_start = tbl->large_pool.start; - bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0); - - /* The large pool is the last pool at the top of the table */ - if (large_pool && entry >= largepool_start) { - p = &tbl->large_pool; - } else { - unsigned int pool_nr = entry / tbl->poolsize; - - BUG_ON(pool_nr >= tbl->nr_pools); - p = &tbl->pools[pool_nr]; - } - return p; -} - -/* Caller supplies the index of the entry into the iommu map table - * itself when the mapping from dma_addr to the entry is not the - * default addr->entry mapping below. - */ -void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr, - unsigned long npages, unsigned long entry) -{ - struct iommu_pool *pool; - unsigned long flags; - unsigned long shift = iommu->table_shift; - - if (entry == IOMMU_ERROR_CODE) /* use default addr->entry mapping */ - entry = (dma_addr - iommu->table_map_base) >> shift; - pool = get_pool(iommu, entry); - - spin_lock_irqsave(&(pool->lock), flags); - bitmap_clear(iommu->map, entry, npages); - spin_unlock_irqrestore(&(pool->lock), flags); -} -EXPORT_SYMBOL(iommu_tbl_range_free); diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c index 23633c0fda4a..92a9f243c0e2 100644 --- a/lib/iommu-helper.c +++ b/lib/iommu-helper.c @@ -3,19 +3,8 @@ * IOMMU helper functions for the free area management */ -#include <linux/export.h> #include <linux/bitmap.h> -#include <linux/bug.h> - -int iommu_is_span_boundary(unsigned int index, unsigned int nr, - unsigned long shift, - unsigned long boundary_size) -{ - BUG_ON(!is_power_of_2(boundary_size)); - - shift = (shift + index) & (boundary_size - 1); - return shift + nr > boundary_size; -} +#include <linux/iommu-helper.h> unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, @@ -38,4 +27,3 @@ again: } return -1; } -EXPORT_SYMBOL(iommu_area_alloc); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 970212670b6a..7e43cd54c84c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -573,6 +573,67 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) } EXPORT_SYMBOL(_copy_to_iter); +#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE +static int copyout_mcsafe(void __user *to, const void *from, size_t n) +{ + if (access_ok(VERIFY_WRITE, to, n)) { + kasan_check_read(from, n); + n = copy_to_user_mcsafe((__force void *) to, from, n); + } + return n; +} + +static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, + const char *from, size_t len) +{ + unsigned long ret; + char *to; + + to = kmap_atomic(page); + ret = memcpy_mcsafe(to + offset, from, len); + kunmap_atomic(to); + + return ret; +} + +size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) +{ + const char *from = addr; + unsigned long rem, curr_addr, s_addr = (unsigned long) addr; + + if (unlikely(i->type & ITER_PIPE)) { + WARN_ON(1); + return 0; + } + if (iter_is_iovec(i)) + might_fault(); + iterate_and_advance(i, bytes, v, + copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), + ({ + rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset, + (from += v.bv_len) - v.bv_len, v.bv_len); + if (rem) { + curr_addr = (unsigned long) from; + bytes = curr_addr - s_addr - rem; + return bytes; + } + }), + ({ + rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, + v.iov_len); + if (rem) { + curr_addr = (unsigned long) from; + bytes = curr_addr - s_addr - rem; + return bytes; + } + }) + ) + + return bytes; +} +EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe); +#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */ + size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; @@ -1012,7 +1073,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) } EXPORT_SYMBOL(iov_iter_gap_alignment); -static inline size_t __pipe_get_pages(struct iov_iter *i, +static inline ssize_t __pipe_get_pages(struct iov_iter *i, size_t maxsize, struct page **pages, int idx, @@ -1102,7 +1163,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, size_t *start) { struct page **p; - size_t n; + ssize_t n; int idx; int npages; diff --git a/lib/kobject.c b/lib/kobject.c index e1d1f290bf35..18989b5b3b56 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -233,13 +233,12 @@ static int kobject_add_internal(struct kobject *kobj) /* be noisy on error issues */ if (error == -EEXIST) - WARN(1, - "%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n", - __func__, kobject_name(kobj)); + pr_err("%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n", + __func__, kobject_name(kobj)); else - WARN(1, "%s failed for %s (error: %d parent: %s)\n", - __func__, kobject_name(kobj), error, - parent ? kobject_name(parent) : "'none'"); + pr_err("%s failed for %s (error: %d parent: %s)\n", + __func__, kobject_name(kobj), error, + parent ? kobject_name(parent) : "'none'"); } else kobj->state_in_sysfs = 1; diff --git a/lib/list_debug.c b/lib/list_debug.c index a34db8d27667..5d5424b51b74 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -21,13 +21,13 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev, struct list_head *next) { if (CHECK_DATA_CORRUPTION(next->prev != prev, - "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n", + "list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n", prev, next->prev, next) || CHECK_DATA_CORRUPTION(prev->next != next, - "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n", + "list_add corruption. prev->next should be next (%px), but was %px. (prev=%px).\n", next, prev->next, prev) || CHECK_DATA_CORRUPTION(new == prev || new == next, - "list_add double add: new=%p, prev=%p, next=%p.\n", + "list_add double add: new=%px, prev=%px, next=%px.\n", new, prev, next)) return false; @@ -43,16 +43,16 @@ bool __list_del_entry_valid(struct list_head *entry) next = entry->next; if (CHECK_DATA_CORRUPTION(next == LIST_POISON1, - "list_del corruption, %p->next is LIST_POISON1 (%p)\n", + "list_del corruption, %px->next is LIST_POISON1 (%px)\n", entry, LIST_POISON1) || CHECK_DATA_CORRUPTION(prev == LIST_POISON2, - "list_del corruption, %p->prev is LIST_POISON2 (%p)\n", + "list_del corruption, %px->prev is LIST_POISON2 (%px)\n", entry, LIST_POISON2) || CHECK_DATA_CORRUPTION(prev->next != entry, - "list_del corruption. prev->next should be %p, but was %p\n", + "list_del corruption. prev->next should be %px, but was %px\n", entry, prev->next) || CHECK_DATA_CORRUPTION(next->prev != entry, - "list_del corruption. next->prev should be %p, but was %p\n", + "list_del corruption. next->prev should be %px, but was %px\n", entry, next->prev)) return false; diff --git a/lib/lockref.c b/lib/lockref.c index 47169ed7e964..3d468b53d4c9 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -81,6 +81,34 @@ int lockref_get_not_zero(struct lockref *lockref) EXPORT_SYMBOL(lockref_get_not_zero); /** + * lockref_put_not_zero - Decrements count unless count <= 1 before decrement + * @lockref: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count would become zero + */ +int lockref_put_not_zero(struct lockref *lockref) +{ + int retval; + + CMPXCHG_LOOP( + new.count--; + if (old.count <= 1) + return 0; + , + return 1; + ); + + spin_lock(&lockref->lock); + retval = 0; + if (lockref->count > 1) { + lockref->count--; + retval = 1; + } + spin_unlock(&lockref->lock); + return retval; +} +EXPORT_SYMBOL(lockref_put_not_zero); + +/** * lockref_get_or_lock - Increments count unless the count is 0 or dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero diff --git a/lib/logic_pio.c b/lib/logic_pio.c new file mode 100644 index 000000000000..feea48fd1a0d --- /dev/null +++ b/lib/logic_pio.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2017 HiSilicon Limited, All Rights Reserved. + * Author: Gabriele Paoloni <gabriele.paoloni@huawei.com> + * Author: Zhichang Yuan <yuanzhichang@hisilicon.com> + */ + +#define pr_fmt(fmt) "LOGIC PIO: " fmt + +#include <linux/of.h> +#include <linux/io.h> +#include <linux/logic_pio.h> +#include <linux/mm.h> +#include <linux/rculist.h> +#include <linux/sizes.h> +#include <linux/slab.h> + +/* The unique hardware address list */ +static LIST_HEAD(io_range_list); +static DEFINE_MUTEX(io_range_mutex); + +/* Consider a kernel general helper for this */ +#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len)) + +/** + * logic_pio_register_range - register logical PIO range for a host + * @new_range: pointer to the IO range to be registered. + * + * Returns 0 on success, the error code in case of failure. + * + * Register a new IO range node in the IO range list. + */ +int logic_pio_register_range(struct logic_pio_hwaddr *new_range) +{ + struct logic_pio_hwaddr *range; + resource_size_t start; + resource_size_t end; + resource_size_t mmio_sz = 0; + resource_size_t iio_sz = MMIO_UPPER_LIMIT; + int ret = 0; + + if (!new_range || !new_range->fwnode || !new_range->size) + return -EINVAL; + + start = new_range->hw_start; + end = new_range->hw_start + new_range->size; + + mutex_lock(&io_range_mutex); + list_for_each_entry_rcu(range, &io_range_list, list) { + if (range->fwnode == new_range->fwnode) { + /* range already there */ + goto end_register; + } + if (range->flags == LOGIC_PIO_CPU_MMIO && + new_range->flags == LOGIC_PIO_CPU_MMIO) { + /* for MMIO ranges we need to check for overlap */ + if (start >= range->hw_start + range->size || + end < range->hw_start) { + mmio_sz += range->size; + } else { + ret = -EFAULT; + goto end_register; + } + } else if (range->flags == LOGIC_PIO_INDIRECT && + new_range->flags == LOGIC_PIO_INDIRECT) { + iio_sz += range->size; + } + } + + /* range not registered yet, check for available space */ + if (new_range->flags == LOGIC_PIO_CPU_MMIO) { + if (mmio_sz + new_range->size - 1 > MMIO_UPPER_LIMIT) { + /* if it's too big check if 64K space can be reserved */ + if (mmio_sz + SZ_64K - 1 > MMIO_UPPER_LIMIT) { + ret = -E2BIG; + goto end_register; + } + new_range->size = SZ_64K; + pr_warn("Requested IO range too big, new size set to 64K\n"); + } + new_range->io_start = mmio_sz; + } else if (new_range->flags == LOGIC_PIO_INDIRECT) { + if (iio_sz + new_range->size - 1 > IO_SPACE_LIMIT) { + ret = -E2BIG; + goto end_register; + } + new_range->io_start = iio_sz; + } else { + /* invalid flag */ + ret = -EINVAL; + goto end_register; + } + + list_add_tail_rcu(&new_range->list, &io_range_list); + +end_register: + mutex_unlock(&io_range_mutex); + return ret; +} + +/** + * find_io_range_by_fwnode - find logical PIO range for given FW node + * @fwnode: FW node handle associated with logical PIO range + * + * Returns pointer to node on success, NULL otherwise. + * + * Traverse the io_range_list to find the registered node for @fwnode. + */ +struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode) +{ + struct logic_pio_hwaddr *range; + + list_for_each_entry_rcu(range, &io_range_list, list) { + if (range->fwnode == fwnode) + return range; + } + return NULL; +} + +/* Return a registered range given an input PIO token */ +static struct logic_pio_hwaddr *find_io_range(unsigned long pio) +{ + struct logic_pio_hwaddr *range; + + list_for_each_entry_rcu(range, &io_range_list, list) { + if (in_range(pio, range->io_start, range->size)) + return range; + } + pr_err("PIO entry token %lx invalid\n", pio); + return NULL; +} + +/** + * logic_pio_to_hwaddr - translate logical PIO to HW address + * @pio: logical PIO value + * + * Returns HW address if valid, ~0 otherwise. + * + * Translate the input logical PIO to the corresponding hardware address. + * The input PIO should be unique in the whole logical PIO space. + */ +resource_size_t logic_pio_to_hwaddr(unsigned long pio) +{ + struct logic_pio_hwaddr *range; + + range = find_io_range(pio); + if (range) + return range->hw_start + pio - range->io_start; + + return (resource_size_t)~0; +} + +/** + * logic_pio_trans_hwaddr - translate HW address to logical PIO + * @fwnode: FW node reference for the host + * @addr: Host-relative HW address + * @size: size to translate + * + * Returns Logical PIO value if successful, ~0UL otherwise + */ +unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode, + resource_size_t addr, resource_size_t size) +{ + struct logic_pio_hwaddr *range; + + range = find_io_range_by_fwnode(fwnode); + if (!range || range->flags == LOGIC_PIO_CPU_MMIO) { + pr_err("IO range not found or invalid\n"); + return ~0UL; + } + if (range->size < size) { + pr_err("resource size %pa cannot fit in IO range size %pa\n", + &size, &range->size); + return ~0UL; + } + return addr - range->hw_start + range->io_start; +} + +unsigned long logic_pio_trans_cpuaddr(resource_size_t addr) +{ + struct logic_pio_hwaddr *range; + + list_for_each_entry_rcu(range, &io_range_list, list) { + if (range->flags != LOGIC_PIO_CPU_MMIO) + continue; + if (in_range(addr, range->hw_start, range->size)) + return addr - range->hw_start + range->io_start; + } + pr_err("addr %llx not registered in io_range_list\n", + (unsigned long long) addr); + return ~0UL; +} + +#if defined(CONFIG_INDIRECT_PIO) && defined(PCI_IOBASE) +#define BUILD_LOGIC_IO(bw, type) \ +type logic_in##bw(unsigned long addr) \ +{ \ + type ret = (type)~0; \ + \ + if (addr < MMIO_UPPER_LIMIT) { \ + ret = read##bw(PCI_IOBASE + addr); \ + } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \ + struct logic_pio_hwaddr *entry = find_io_range(addr); \ + \ + if (entry && entry->ops) \ + ret = entry->ops->in(entry->hostdata, \ + addr, sizeof(type)); \ + else \ + WARN_ON_ONCE(1); \ + } \ + return ret; \ +} \ + \ +void logic_out##bw(type value, unsigned long addr) \ +{ \ + if (addr < MMIO_UPPER_LIMIT) { \ + write##bw(value, PCI_IOBASE + addr); \ + } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \ + struct logic_pio_hwaddr *entry = find_io_range(addr); \ + \ + if (entry && entry->ops) \ + entry->ops->out(entry->hostdata, \ + addr, value, sizeof(type)); \ + else \ + WARN_ON_ONCE(1); \ + } \ +} \ + \ +void logic_ins##bw(unsigned long addr, void *buffer, \ + unsigned int count) \ +{ \ + if (addr < MMIO_UPPER_LIMIT) { \ + reads##bw(PCI_IOBASE + addr, buffer, count); \ + } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \ + struct logic_pio_hwaddr *entry = find_io_range(addr); \ + \ + if (entry && entry->ops) \ + entry->ops->ins(entry->hostdata, \ + addr, buffer, sizeof(type), count); \ + else \ + WARN_ON_ONCE(1); \ + } \ + \ +} \ + \ +void logic_outs##bw(unsigned long addr, const void *buffer, \ + unsigned int count) \ +{ \ + if (addr < MMIO_UPPER_LIMIT) { \ + writes##bw(PCI_IOBASE + addr, buffer, count); \ + } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \ + struct logic_pio_hwaddr *entry = find_io_range(addr); \ + \ + if (entry && entry->ops) \ + entry->ops->outs(entry->hostdata, \ + addr, buffer, sizeof(type), count); \ + else \ + WARN_ON_ONCE(1); \ + } \ +} + +BUILD_LOGIC_IO(b, u8) +EXPORT_SYMBOL(logic_inb); +EXPORT_SYMBOL(logic_insb); +EXPORT_SYMBOL(logic_outb); +EXPORT_SYMBOL(logic_outsb); + +BUILD_LOGIC_IO(w, u16) +EXPORT_SYMBOL(logic_inw); +EXPORT_SYMBOL(logic_insw); +EXPORT_SYMBOL(logic_outw); +EXPORT_SYMBOL(logic_outsw); + +BUILD_LOGIC_IO(l, u32) +EXPORT_SYMBOL(logic_inl); +EXPORT_SYMBOL(logic_insl); +EXPORT_SYMBOL(logic_outl); +EXPORT_SYMBOL(logic_outsl); + +#endif /* CONFIG_INDIRECT_PIO && PCI_IOBASE */ diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8e00138d593f..a9e41aed6de4 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -146,7 +146,7 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent, static inline gfp_t root_gfp_mask(const struct radix_tree_root *root) { - return root->gfp_mask & __GFP_BITS_MASK; + return root->gfp_mask & (__GFP_BITS_MASK & ~GFP_ZONEMASK); } static inline void tag_set(struct radix_tree_node *node, unsigned int tag, @@ -1612,11 +1612,9 @@ static void set_iter_tags(struct radix_tree_iter *iter, static void __rcu **skip_siblings(struct radix_tree_node **nodep, void __rcu **slot, struct radix_tree_iter *iter) { - void *sib = node_to_entry(slot - 1); - while (iter->index < iter->next_index) { *nodep = rcu_dereference_raw(*slot); - if (*nodep && *nodep != sib) + if (*nodep && !is_sibling_entry(iter->node, *nodep)) return slot; slot++; iter->index = __radix_tree_iter_add(iter, 1); @@ -1631,7 +1629,7 @@ void __rcu **__radix_tree_next_slot(void __rcu **slot, struct radix_tree_iter *iter, unsigned flags) { unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; - struct radix_tree_node *node = rcu_dereference_raw(*slot); + struct radix_tree_node *node; slot = skip_siblings(&node, slot, iter); @@ -2036,10 +2034,12 @@ void *radix_tree_delete_item(struct radix_tree_root *root, unsigned long index, void *item) { struct radix_tree_node *node = NULL; - void __rcu **slot; + void __rcu **slot = NULL; void *entry; entry = __radix_tree_lookup(root, index, &node, &slot); + if (!slot) + return NULL; if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE, get_slot_offset(node, slot)))) return NULL; @@ -2285,6 +2285,7 @@ void __init radix_tree_init(void) int ret; BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); + BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK); radix_tree_node_cachep = kmem_cache_create("radix_tree_node", sizeof(struct radix_tree_node), 0, SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore index f01b1cb04f91..3de0d8921286 100644 --- a/lib/raid6/.gitignore +++ b/lib/raid6/.gitignore @@ -4,3 +4,4 @@ int*.c tables.c neon?.c s390vx?.c +vpermxor*.c diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 44d6b46df051..2f8b61dfd9b0 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -5,7 +5,8 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ int8.o int16.o int32.o raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o -raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o +raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \ + vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o @@ -90,6 +91,30 @@ $(obj)/altivec8.c: UNROLL := 8 $(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) +CFLAGS_vpermxor1.o += $(altivec_flags) +targets += vpermxor1.c +$(obj)/vpermxor1.c: UNROLL := 1 +$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_vpermxor2.o += $(altivec_flags) +targets += vpermxor2.c +$(obj)/vpermxor2.c: UNROLL := 2 +$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_vpermxor4.o += $(altivec_flags) +targets += vpermxor4.c +$(obj)/vpermxor4.c: UNROLL := 4 +$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_vpermxor8.o += $(altivec_flags) +targets += vpermxor8.c +$(obj)/vpermxor8.c: UNROLL := 8 +$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + CFLAGS_neon1.o += $(NEON_FLAGS) targets += neon1.c $(obj)/neon1.c: UNROLL := 1 diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index c65aa80d67ed..5065b1e7e327 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_altivec2, &raid6_altivec4, &raid6_altivec8, + &raid6_vpermxor1, + &raid6_vpermxor2, + &raid6_vpermxor4, + &raid6_vpermxor8, #endif #if defined(CONFIG_S390) &raid6_s390vx8, diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc index 682aae8a1fef..d20ed0d11411 100644 --- a/lib/raid6/altivec.uc +++ b/lib/raid6/altivec.uc @@ -24,10 +24,13 @@ #include <linux/raid/pq.h> +#ifdef CONFIG_ALTIVEC + #include <altivec.h> #ifdef __KERNEL__ # include <asm/cputable.h> # include <asm/switch_to.h> +#endif /* __KERNEL__ */ /* * This is the C data type to use. We use a vector of diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index fabc477b1417..5d73f5cb4d8a 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -45,10 +45,12 @@ else ifeq ($(HAS_NEON),yes) CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 else HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\ - gcc -c -x c - >&/dev/null && \ - rm ./-.o && echo yes) + gcc -c -x c - >/dev/null && rm ./-.o && echo yes) ifeq ($(HAS_ALTIVEC),yes) - OBJS += altivec1.o altivec2.o altivec4.o altivec8.o + CFLAGS += -I../../../arch/powerpc/include + CFLAGS += -DCONFIG_ALTIVEC + OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \ + vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o endif endif @@ -95,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk altivec8.c: altivec.uc ../unroll.awk $(AWK) ../unroll.awk -vN=8 < altivec.uc > $@ +vpermxor1.c: vpermxor.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@ + +vpermxor2.c: vpermxor.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@ + +vpermxor4.c: vpermxor.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@ + +vpermxor8.c: vpermxor.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@ + int1.c: int.uc ../unroll.awk $(AWK) ../unroll.awk -vN=1 < int.uc > $@ @@ -117,7 +131,7 @@ tables.c: mktables ./mktables > tables.c clean: - rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test + rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test spotless: clean rm -f *~ diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc new file mode 100644 index 000000000000..10475dc423c1 --- /dev/null +++ b/lib/raid6/vpermxor.uc @@ -0,0 +1,105 @@ +/* + * Copyright 2017, Matt Brown, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * vpermxor$#.c + * + * Based on H. Peter Anvin's paper - The mathematics of RAID-6 + * + * $#-way unrolled portable integer math RAID-6 instruction set + * This file is postprocessed using unroll.awk + * + * vpermxor$#.c makes use of the vpermxor instruction to optimise the RAID6 Q + * syndrome calculations. + * This can be run on systems which have both Altivec and vpermxor instruction. + * + * This instruction was introduced in POWER8 - ISA v2.07. + */ + +#include <linux/raid/pq.h> +#ifdef CONFIG_ALTIVEC + +#include <altivec.h> +#ifdef __KERNEL__ +#include <asm/cputable.h> +#include <asm/ppc-opcode.h> +#include <asm/switch_to.h> +#endif + +typedef vector unsigned char unative_t; +#define NSIZE sizeof(unative_t) + +static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14, + 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08, + 0x06, 0x04, 0x02,0x00}; +static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d, + 0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80, + 0x60, 0x40, 0x20, 0x00}; + +static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes, + void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + unative_t wp$$, wq$$, wd$$; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + for (d = 0; d < bytes; d += NSIZE*$#) { + wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; + + for (z = z0-1; z>=0; z--) { + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + /* P syndrome */ + wp$$ = vec_xor(wp$$, wd$$); + + /* Q syndrome */ + asm(VPERMXOR(%0,%1,%2,%3):"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$)); + wq$$ = vec_xor(wq$$, wd$$); + } + *(unative_t *)&p[d+NSIZE*$$] = wp$$; + *(unative_t *)&q[d+NSIZE*$$] = wq$$; + } +} + +static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + preempt_disable(); + enable_kernel_altivec(); + + raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs); + + disable_kernel_altivec(); + preempt_enable(); +} + +int raid6_have_altivec_vpermxor(void); +#if $# == 1 +int raid6_have_altivec_vpermxor(void) +{ + /* Check if arch has both altivec and the vpermxor instructions */ +# ifdef __KERNEL__ + return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) && + cpu_has_feature(CPU_FTR_ARCH_207S)); +# else + return 1; +#endif + +} +#endif + +const struct raid6_calls raid6_vpermxor$# = { + raid6_vpermxor$#_gen_syndrome, + NULL, + raid6_have_altivec_vpermxor, + "vpermxor$#", + 0 +}; +#endif diff --git a/lib/reed_solomon/decode_rs.c b/lib/reed_solomon/decode_rs.c index 0ec3f257ffdf..1db74eb098d0 100644 --- a/lib/reed_solomon/decode_rs.c +++ b/lib/reed_solomon/decode_rs.c @@ -1,22 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * lib/reed_solomon/decode_rs.c - * - * Overview: - * Generic Reed Solomon encoder / decoder library + * Generic Reed Solomon encoder / decoder library * * Copyright 2002, Phil Karn, KA9Q * May be used under the terms of the GNU General Public License (GPL) * * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de) * - * $Id: decode_rs.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $ - * - */ - -/* Generic data width independent code which is included by the - * wrappers. + * Generic data width independent code which is included by the wrappers. */ { + struct rs_codec *rs = rsc->codec; int deg_lambda, el, deg_omega; int i, j, r, k, pad; int nn = rs->nn; @@ -27,16 +21,22 @@ uint16_t *alpha_to = rs->alpha_to; uint16_t *index_of = rs->index_of; uint16_t u, q, tmp, num1, num2, den, discr_r, syn_error; - /* Err+Eras Locator poly and syndrome poly The maximum value - * of nroots is 8. So the necessary stack size will be about - * 220 bytes max. - */ - uint16_t lambda[nroots + 1], syn[nroots]; - uint16_t b[nroots + 1], t[nroots + 1], omega[nroots + 1]; - uint16_t root[nroots], reg[nroots + 1], loc[nroots]; int count = 0; uint16_t msk = (uint16_t) rs->nn; + /* + * The decoder buffers are in the rs control struct. They are + * arrays sized [nroots + 1] + */ + uint16_t *lambda = rsc->buffers + RS_DECODE_LAMBDA * (nroots + 1); + uint16_t *syn = rsc->buffers + RS_DECODE_SYN * (nroots + 1); + uint16_t *b = rsc->buffers + RS_DECODE_B * (nroots + 1); + uint16_t *t = rsc->buffers + RS_DECODE_T * (nroots + 1); + uint16_t *omega = rsc->buffers + RS_DECODE_OMEGA * (nroots + 1); + uint16_t *root = rsc->buffers + RS_DECODE_ROOT * (nroots + 1); + uint16_t *reg = rsc->buffers + RS_DECODE_REG * (nroots + 1); + uint16_t *loc = rsc->buffers + RS_DECODE_LOC * (nroots + 1); + /* Check length parameter for validity */ pad = nn - nroots - len; BUG_ON(pad < 0 || pad >= nn); diff --git a/lib/reed_solomon/encode_rs.c b/lib/reed_solomon/encode_rs.c index 0b5b1a6728ec..9112d46e869e 100644 --- a/lib/reed_solomon/encode_rs.c +++ b/lib/reed_solomon/encode_rs.c @@ -1,23 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * lib/reed_solomon/encode_rs.c - * - * Overview: - * Generic Reed Solomon encoder / decoder library + * Generic Reed Solomon encoder / decoder library * * Copyright 2002, Phil Karn, KA9Q * May be used under the terms of the GNU General Public License (GPL) * * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de) * - * $Id: encode_rs.c,v 1.5 2005/11/07 11:14:59 gleixner Exp $ - * - */ - -/* Generic data width independent code which is included by the - * wrappers. - * int encode_rsX (struct rs_control *rs, uintX_t *data, int len, uintY_t *par) + * Generic data width independent code which is included by the wrappers. */ { + struct rs_codec *rs = rsc->codec; int i, j, pad; int nn = rs->nn; int nroots = rs->nroots; diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c index 06d04cfa9339..dfcf54242fb9 100644 --- a/lib/reed_solomon/reed_solomon.c +++ b/lib/reed_solomon/reed_solomon.c @@ -1,43 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * lib/reed_solomon/reed_solomon.c - * - * Overview: - * Generic Reed Solomon encoder / decoder library + * Generic Reed Solomon encoder / decoder library * * Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de) * * Reed Solomon code lifted from reed solomon library written by Phil Karn * Copyright 2002 Phil Karn, KA9Q * - * $Id: rslib.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Description: * * The generic Reed Solomon library provides runtime configurable * encoding / decoding of RS codes. - * Each user must call init_rs to get a pointer to a rs_control - * structure for the given rs parameters. This structure is either - * generated or a already available matching control structure is used. - * If a structure is generated then the polynomial arrays for - * fast encoding / decoding are built. This can take some time so - * make sure not to call this function from a time critical path. - * Usually a module / driver should initialize the necessary - * rs_control structure on module / driver init and release it - * on exit. - * The encoding puts the calculated syndrome into a given syndrome - * buffer. - * The decoding is a two step process. The first step calculates - * the syndrome over the received (data + syndrome) and calls the - * second stage, which does the decoding / error correction itself. - * Many hw encoders provide a syndrome calculation over the received - * data + syndrome and can call the second stage directly. * + * Each user must call init_rs to get a pointer to a rs_control structure + * for the given rs parameters. The control struct is unique per instance. + * It points to a codec which can be shared by multiple control structures. + * If a codec is newly allocated then the polynomial arrays for fast + * encoding / decoding are built. This can take some time so make sure not + * to call this function from a time critical path. Usually a module / + * driver should initialize the necessary rs_control structure on module / + * driver init and release it on exit. + * + * The encoding puts the calculated syndrome into a given syndrome buffer. + * + * The decoding is a two step process. The first step calculates the + * syndrome over the received (data + syndrome) and calls the second stage, + * which does the decoding / error correction itself. Many hw encoders + * provide a syndrome calculation over the received data + syndrome and can + * call the second stage directly. */ - #include <linux/errno.h> #include <linux/kernel.h> #include <linux/init.h> @@ -46,32 +37,44 @@ #include <linux/slab.h> #include <linux/mutex.h> -/* This list holds all currently allocated rs control structures */ -static LIST_HEAD (rslist); +enum { + RS_DECODE_LAMBDA, + RS_DECODE_SYN, + RS_DECODE_B, + RS_DECODE_T, + RS_DECODE_OMEGA, + RS_DECODE_ROOT, + RS_DECODE_REG, + RS_DECODE_LOC, + RS_DECODE_NUM_BUFFERS +}; + +/* This list holds all currently allocated rs codec structures */ +static LIST_HEAD(codec_list); /* Protection for the list */ static DEFINE_MUTEX(rslistlock); /** - * rs_init - Initialize a Reed-Solomon codec + * codec_init - Initialize a Reed-Solomon codec * @symsize: symbol size, bits (1-8) * @gfpoly: Field generator polynomial coefficients * @gffunc: Field generator function * @fcr: first root of RS code generator polynomial, index form * @prim: primitive element to generate polynomial roots * @nroots: RS code generator polynomial degree (number of roots) + * @gfp: GFP_ flags for allocations * - * Allocate a control structure and the polynom arrays for faster + * Allocate a codec structure and the polynom arrays for faster * en/decoding. Fill the arrays according to the given parameters. */ -static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int), - int fcr, int prim, int nroots) +static struct rs_codec *codec_init(int symsize, int gfpoly, int (*gffunc)(int), + int fcr, int prim, int nroots, gfp_t gfp) { - struct rs_control *rs; int i, j, sr, root, iprim; + struct rs_codec *rs; - /* Allocate the control structure */ - rs = kmalloc(sizeof (struct rs_control), GFP_KERNEL); - if (rs == NULL) + rs = kzalloc(sizeof(*rs), gfp); + if (!rs) return NULL; INIT_LIST_HEAD(&rs->list); @@ -85,17 +88,17 @@ static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int), rs->gffunc = gffunc; /* Allocate the arrays */ - rs->alpha_to = kmalloc(sizeof(uint16_t) * (rs->nn + 1), GFP_KERNEL); + rs->alpha_to = kmalloc(sizeof(uint16_t) * (rs->nn + 1), gfp); if (rs->alpha_to == NULL) - goto errrs; + goto err; - rs->index_of = kmalloc(sizeof(uint16_t) * (rs->nn + 1), GFP_KERNEL); + rs->index_of = kmalloc(sizeof(uint16_t) * (rs->nn + 1), gfp); if (rs->index_of == NULL) - goto erralp; + goto err; - rs->genpoly = kmalloc(sizeof(uint16_t) * (rs->nroots + 1), GFP_KERNEL); + rs->genpoly = kmalloc(sizeof(uint16_t) * (rs->nroots + 1), gfp); if(rs->genpoly == NULL) - goto erridx; + goto err; /* Generate Galois field lookup tables */ rs->index_of[0] = rs->nn; /* log(zero) = -inf */ @@ -120,7 +123,7 @@ static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int), } /* If it's not primitive, exit */ if(sr != rs->alpha_to[0]) - goto errpol; + goto err; /* Find prim-th root of 1, used in decoding */ for(iprim = 1; (iprim % prim) != 0; iprim += rs->nn); @@ -148,42 +151,52 @@ static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int), /* convert rs->genpoly[] to index form for quicker encoding */ for (i = 0; i <= nroots; i++) rs->genpoly[i] = rs->index_of[rs->genpoly[i]]; + + rs->users = 1; + list_add(&rs->list, &codec_list); return rs; - /* Error exit */ -errpol: +err: kfree(rs->genpoly); -erridx: kfree(rs->index_of); -erralp: kfree(rs->alpha_to); -errrs: kfree(rs); return NULL; } /** - * free_rs - Free the rs control structure, if it is no longer used - * @rs: the control structure which is not longer used by the + * free_rs - Free the rs control structure + * @rs: The control structure which is not longer used by the * caller + * + * Free the control structure. If @rs is the last user of the associated + * codec, free the codec as well. */ void free_rs(struct rs_control *rs) { + struct rs_codec *cd; + + if (!rs) + return; + + cd = rs->codec; mutex_lock(&rslistlock); - rs->users--; - if(!rs->users) { - list_del(&rs->list); - kfree(rs->alpha_to); - kfree(rs->index_of); - kfree(rs->genpoly); - kfree(rs); + cd->users--; + if(!cd->users) { + list_del(&cd->list); + kfree(cd->alpha_to); + kfree(cd->index_of); + kfree(cd->genpoly); + kfree(cd); } mutex_unlock(&rslistlock); + kfree(rs); } +EXPORT_SYMBOL_GPL(free_rs); /** - * init_rs_internal - Find a matching or allocate a new rs control structure + * init_rs_internal - Allocate rs control, find a matching codec or allocate a new one * @symsize: the symbol size (number of bits) * @gfpoly: the extended Galois field generator polynomial coefficients, * with the 0th coefficient in the low order bit. The polynomial @@ -191,55 +204,69 @@ void free_rs(struct rs_control *rs) * @gffunc: pointer to function to generate the next field element, * or the multiplicative identity element if given 0. Used * instead of gfpoly if gfpoly is 0 - * @fcr: the first consecutive root of the rs code generator polynomial + * @fcr: the first consecutive root of the rs code generator polynomial * in index form * @prim: primitive element to generate polynomial roots * @nroots: RS code generator polynomial degree (number of roots) + * @gfp: GFP_ flags for allocations */ static struct rs_control *init_rs_internal(int symsize, int gfpoly, - int (*gffunc)(int), int fcr, - int prim, int nroots) + int (*gffunc)(int), int fcr, + int prim, int nroots, gfp_t gfp) { - struct list_head *tmp; - struct rs_control *rs; + struct list_head *tmp; + struct rs_control *rs; + unsigned int bsize; /* Sanity checks */ if (symsize < 1) return NULL; if (fcr < 0 || fcr >= (1<<symsize)) - return NULL; + return NULL; if (prim <= 0 || prim >= (1<<symsize)) - return NULL; + return NULL; if (nroots < 0 || nroots >= (1<<symsize)) return NULL; + /* + * The decoder needs buffers in each control struct instance to + * avoid variable size or large fixed size allocations on + * stack. Size the buffers to arrays of [nroots + 1]. + */ + bsize = sizeof(uint16_t) * RS_DECODE_NUM_BUFFERS * (nroots + 1); + rs = kzalloc(sizeof(*rs) + bsize, gfp); + if (!rs) + return NULL; + mutex_lock(&rslistlock); /* Walk through the list and look for a matching entry */ - list_for_each(tmp, &rslist) { - rs = list_entry(tmp, struct rs_control, list); - if (symsize != rs->mm) + list_for_each(tmp, &codec_list) { + struct rs_codec *cd = list_entry(tmp, struct rs_codec, list); + + if (symsize != cd->mm) continue; - if (gfpoly != rs->gfpoly) + if (gfpoly != cd->gfpoly) continue; - if (gffunc != rs->gffunc) + if (gffunc != cd->gffunc) continue; - if (fcr != rs->fcr) + if (fcr != cd->fcr) continue; - if (prim != rs->prim) + if (prim != cd->prim) continue; - if (nroots != rs->nroots) + if (nroots != cd->nroots) continue; /* We have a matching one already */ - rs->users++; + cd->users++; + rs->codec = cd; goto out; } /* Create a new one */ - rs = rs_init(symsize, gfpoly, gffunc, fcr, prim, nroots); - if (rs) { - rs->users = 1; - list_add(&rs->list, &rslist); + rs->codec = codec_init(symsize, gfpoly, gffunc, fcr, prim, nroots, gfp); + if (!rs->codec) { + kfree(rs); + rs = NULL; } out: mutex_unlock(&rslistlock); @@ -247,45 +274,48 @@ out: } /** - * init_rs - Find a matching or allocate a new rs control structure + * init_rs_gfp - Create a RS control struct and initialize it * @symsize: the symbol size (number of bits) * @gfpoly: the extended Galois field generator polynomial coefficients, * with the 0th coefficient in the low order bit. The polynomial * must be primitive; - * @fcr: the first consecutive root of the rs code generator polynomial + * @fcr: the first consecutive root of the rs code generator polynomial * in index form * @prim: primitive element to generate polynomial roots * @nroots: RS code generator polynomial degree (number of roots) + * @gfp: GFP_ flags for allocations */ -struct rs_control *init_rs(int symsize, int gfpoly, int fcr, int prim, - int nroots) +struct rs_control *init_rs_gfp(int symsize, int gfpoly, int fcr, int prim, + int nroots, gfp_t gfp) { - return init_rs_internal(symsize, gfpoly, NULL, fcr, prim, nroots); + return init_rs_internal(symsize, gfpoly, NULL, fcr, prim, nroots, gfp); } +EXPORT_SYMBOL_GPL(init_rs_gfp); /** - * init_rs_non_canonical - Find a matching or allocate a new rs control - * structure, for fields with non-canonical - * representation + * init_rs_non_canonical - Allocate rs control struct for fields with + * non-canonical representation * @symsize: the symbol size (number of bits) * @gffunc: pointer to function to generate the next field element, * or the multiplicative identity element if given 0. Used * instead of gfpoly if gfpoly is 0 - * @fcr: the first consecutive root of the rs code generator polynomial + * @fcr: the first consecutive root of the rs code generator polynomial * in index form * @prim: primitive element to generate polynomial roots * @nroots: RS code generator polynomial degree (number of roots) */ struct rs_control *init_rs_non_canonical(int symsize, int (*gffunc)(int), - int fcr, int prim, int nroots) + int fcr, int prim, int nroots) { - return init_rs_internal(symsize, 0, gffunc, fcr, prim, nroots); + return init_rs_internal(symsize, 0, gffunc, fcr, prim, nroots, + GFP_KERNEL); } +EXPORT_SYMBOL_GPL(init_rs_non_canonical); #ifdef CONFIG_REED_SOLOMON_ENC8 /** * encode_rs8 - Calculate the parity for data values (8bit data width) - * @rs: the rs control structure + * @rsc: the rs control structure * @data: data field of a given type * @len: data length * @par: parity data, must be initialized by caller (usually all 0) @@ -295,7 +325,7 @@ struct rs_control *init_rs_non_canonical(int symsize, int (*gffunc)(int), * symbol size > 8. The calling code must take care of encoding of the * syndrome result for storage itself. */ -int encode_rs8(struct rs_control *rs, uint8_t *data, int len, uint16_t *par, +int encode_rs8(struct rs_control *rsc, uint8_t *data, int len, uint16_t *par, uint16_t invmsk) { #include "encode_rs.c" @@ -306,7 +336,7 @@ EXPORT_SYMBOL_GPL(encode_rs8); #ifdef CONFIG_REED_SOLOMON_DEC8 /** * decode_rs8 - Decode codeword (8bit data width) - * @rs: the rs control structure + * @rsc: the rs control structure * @data: data field of a given type * @par: received parity data field * @len: data length @@ -319,9 +349,14 @@ EXPORT_SYMBOL_GPL(encode_rs8); * The syndrome and parity uses a uint16_t data type to enable * symbol size > 8. The calling code must take care of decoding of the * syndrome result and the received parity before calling this code. + * + * Note: The rs_control struct @rsc contains buffers which are used for + * decoding, so the caller has to ensure that decoder invocations are + * serialized. + * * Returns the number of corrected bits or -EBADMSG for uncorrectable errors. */ -int decode_rs8(struct rs_control *rs, uint8_t *data, uint16_t *par, int len, +int decode_rs8(struct rs_control *rsc, uint8_t *data, uint16_t *par, int len, uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk, uint16_t *corr) { @@ -333,7 +368,7 @@ EXPORT_SYMBOL_GPL(decode_rs8); #ifdef CONFIG_REED_SOLOMON_ENC16 /** * encode_rs16 - Calculate the parity for data values (16bit data width) - * @rs: the rs control structure + * @rsc: the rs control structure * @data: data field of a given type * @len: data length * @par: parity data, must be initialized by caller (usually all 0) @@ -341,7 +376,7 @@ EXPORT_SYMBOL_GPL(decode_rs8); * * Each field in the data array contains up to symbol size bits of valid data. */ -int encode_rs16(struct rs_control *rs, uint16_t *data, int len, uint16_t *par, +int encode_rs16(struct rs_control *rsc, uint16_t *data, int len, uint16_t *par, uint16_t invmsk) { #include "encode_rs.c" @@ -352,7 +387,7 @@ EXPORT_SYMBOL_GPL(encode_rs16); #ifdef CONFIG_REED_SOLOMON_DEC16 /** * decode_rs16 - Decode codeword (16bit data width) - * @rs: the rs control structure + * @rsc: the rs control structure * @data: data field of a given type * @par: received parity data field * @len: data length @@ -363,9 +398,14 @@ EXPORT_SYMBOL_GPL(encode_rs16); * @corr: buffer to store correction bitmask on eras_pos * * Each field in the data array contains up to symbol size bits of valid data. + * + * Note: The rc_control struct @rsc contains buffers which are used for + * decoding, so the caller has to ensure that decoder invocations are + * serialized. + * * Returns the number of corrected bits or -EBADMSG for uncorrectable errors. */ -int decode_rs16(struct rs_control *rs, uint16_t *data, uint16_t *par, int len, +int decode_rs16(struct rs_control *rsc, uint16_t *data, uint16_t *par, int len, uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk, uint16_t *corr) { @@ -374,10 +414,6 @@ int decode_rs16(struct rs_control *rs, uint16_t *data, uint16_t *par, int len, EXPORT_SYMBOL_GPL(decode_rs16); #endif -EXPORT_SYMBOL_GPL(init_rs); -EXPORT_SYMBOL_GPL(init_rs_non_canonical); -EXPORT_SYMBOL_GPL(free_rs); - MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Reed Solomon encoder/decoder"); MODULE_AUTHOR("Phil Karn, Thomas Gleixner"); diff --git a/lib/sbitmap.c b/lib/sbitmap.c index e6a9c06ec70c..6fdc6267f4a8 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -270,18 +270,33 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) } EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); -static unsigned int sbq_calc_wake_batch(unsigned int depth) +static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, + unsigned int depth) { unsigned int wake_batch; + unsigned int shallow_depth; /* * For each batch, we wake up one queue. We need to make sure that our - * batch size is small enough that the full depth of the bitmap is - * enough to wake up all of the queues. + * batch size is small enough that the full depth of the bitmap, + * potentially limited by a shallow depth, is enough to wake up all of + * the queues. + * + * Each full word of the bitmap has bits_per_word bits, and there might + * be a partial word. There are depth / bits_per_word full words and + * depth % bits_per_word bits left over. In bitwise arithmetic: + * + * bits_per_word = 1 << shift + * depth / bits_per_word = depth >> shift + * depth % bits_per_word = depth & ((1 << shift) - 1) + * + * Each word can be limited to sbq->min_shallow_depth bits. */ - wake_batch = SBQ_WAKE_BATCH; - if (wake_batch > depth / SBQ_WAIT_QUEUES) - wake_batch = max(1U, depth / SBQ_WAIT_QUEUES); + shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); + depth = ((depth >> sbq->sb.shift) * shallow_depth + + min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); + wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, + SBQ_WAKE_BATCH); return wake_batch; } @@ -307,7 +322,8 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; } - sbq->wake_batch = sbq_calc_wake_batch(depth); + sbq->min_shallow_depth = UINT_MAX; + sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); atomic_set(&sbq->wake_index, 0); sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); @@ -327,21 +343,28 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, } EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); -void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) +static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, + unsigned int depth) { - unsigned int wake_batch = sbq_calc_wake_batch(depth); + unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); int i; if (sbq->wake_batch != wake_batch) { WRITE_ONCE(sbq->wake_batch, wake_batch); /* - * Pairs with the memory barrier in sbq_wake_up() to ensure that - * the batch size is updated before the wait counts. + * Pairs with the memory barrier in sbitmap_queue_wake_up() + * to ensure that the batch size is updated before the wait + * counts. */ smp_mb__before_atomic(); for (i = 0; i < SBQ_WAIT_QUEUES; i++) atomic_set(&sbq->ws[i].wait_cnt, 1); } +} + +void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) +{ + sbitmap_queue_update_wake_batch(sbq, depth); sbitmap_resize(&sbq->sb, depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_resize); @@ -380,6 +403,8 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, unsigned int hint, depth; int nr; + WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); + hint = this_cpu_read(*sbq->alloc_hint); depth = READ_ONCE(sbq->sb.depth); if (unlikely(hint >= depth)) { @@ -403,6 +428,14 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, } EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); +void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, + unsigned int min_shallow_depth) +{ + sbq->min_shallow_depth = min_shallow_depth; + sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth); +} +EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); + static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) { int i, wake_index; @@ -425,52 +458,67 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) return NULL; } -static void sbq_wake_up(struct sbitmap_queue *sbq) +static bool __sbq_wake_up(struct sbitmap_queue *sbq) { struct sbq_wait_state *ws; unsigned int wake_batch; int wait_cnt; - /* - * Pairs with the memory barrier in set_current_state() to ensure the - * proper ordering of clear_bit()/waitqueue_active() in the waker and - * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the - * waiter. See the comment on waitqueue_active(). This is __after_atomic - * because we just did clear_bit_unlock() in the caller. - */ - smp_mb__after_atomic(); - ws = sbq_wake_ptr(sbq); if (!ws) - return; + return false; wait_cnt = atomic_dec_return(&ws->wait_cnt); if (wait_cnt <= 0) { + int ret; + wake_batch = READ_ONCE(sbq->wake_batch); + /* * Pairs with the memory barrier in sbitmap_queue_resize() to * ensure that we see the batch size update before the wait * count is reset. */ smp_mb__before_atomic(); + /* - * If there are concurrent callers to sbq_wake_up(), the last - * one to decrement the wait count below zero will bump it back - * up. If there is a concurrent resize, the count reset will - * either cause the cmpxchg to fail or overwrite after the - * cmpxchg. + * For concurrent callers of this, the one that failed the + * atomic_cmpxhcg() race should call this function again + * to wakeup a new batch on a different 'ws'. */ - atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch); - sbq_index_atomic_inc(&sbq->wake_index); - wake_up_nr(&ws->wait, wake_batch); + ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch); + if (ret == wait_cnt) { + sbq_index_atomic_inc(&sbq->wake_index); + wake_up_nr(&ws->wait, wake_batch); + return false; + } + + return true; } + + return false; +} + +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) +{ + while (__sbq_wake_up(sbq)) + ; } +EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu) { sbitmap_clear_bit_unlock(&sbq->sb, nr); - sbq_wake_up(sbq); + /* + * Pairs with the memory barrier in set_current_state() to ensure the + * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker + * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the + * waiter. See the comment on waitqueue_active(). + */ + smp_mb__after_atomic(); + sbitmap_queue_wake_up(sbq); + if (likely(!sbq->round_robin && nr < sbq->sb.depth)) *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; } @@ -482,7 +530,7 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) /* * Pairs with the memory barrier in set_current_state() like in - * sbq_wake_up(). + * sbitmap_queue_wake_up(). */ smp_mb(); wake_index = atomic_read(&sbq->wake_index); @@ -528,5 +576,6 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) seq_puts(m, "}\n"); seq_printf(m, "round_robin=%d\n", sbq->round_robin); + seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_show); diff --git a/lib/sha256.c b/lib/sha256.c new file mode 100644 index 000000000000..4400c832e2aa --- /dev/null +++ b/lib/sha256.c @@ -0,0 +1,283 @@ +/* + * SHA-256, as specified in + * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf + * + * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>. + * + * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> + * Copyright (c) 2014 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <linux/bitops.h> +#include <linux/sha256.h> +#include <linux/string.h> +#include <asm/byteorder.h> + +static inline u32 Ch(u32 x, u32 y, u32 z) +{ + return z ^ (x & (y ^ z)); +} + +static inline u32 Maj(u32 x, u32 y, u32 z) +{ + return (x & y) | (z & (x | y)); +} + +#define e0(x) (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22)) +#define e1(x) (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25)) +#define s0(x) (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3)) +#define s1(x) (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10)) + +static inline void LOAD_OP(int I, u32 *W, const u8 *input) +{ + W[I] = __be32_to_cpu(((__be32 *)(input))[I]); +} + +static inline void BLEND_OP(int I, u32 *W) +{ + W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16]; +} + +static void sha256_transform(u32 *state, const u8 *input) +{ + u32 a, b, c, d, e, f, g, h, t1, t2; + u32 W[64]; + int i; + + /* load the input */ + for (i = 0; i < 16; i++) + LOAD_OP(i, W, input); + + /* now blend */ + for (i = 16; i < 64; i++) + BLEND_OP(i, W); + + /* load the state into our registers */ + a = state[0]; b = state[1]; c = state[2]; d = state[3]; + e = state[4]; f = state[5]; g = state[6]; h = state[7]; + + /* now iterate */ + t1 = h + e1(e) + Ch(e, f, g) + 0x428a2f98 + W[0]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2; + t1 = g + e1(d) + Ch(d, e, f) + 0x71374491 + W[1]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2; + t1 = f + e1(c) + Ch(c, d, e) + 0xb5c0fbcf + W[2]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2; + t1 = e + e1(b) + Ch(b, c, d) + 0xe9b5dba5 + W[3]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2; + t1 = d + e1(a) + Ch(a, b, c) + 0x3956c25b + W[4]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2; + t1 = c + e1(h) + Ch(h, a, b) + 0x59f111f1 + W[5]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2; + t1 = b + e1(g) + Ch(g, h, a) + 0x923f82a4 + W[6]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2; + t1 = a + e1(f) + Ch(f, g, h) + 0xab1c5ed5 + W[7]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2; + + t1 = h + e1(e) + Ch(e, f, g) + 0xd807aa98 + W[8]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2; + t1 = g + e1(d) + Ch(d, e, f) + 0x12835b01 + W[9]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2; + t1 = f + e1(c) + Ch(c, d, e) + 0x243185be + W[10]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2; + t1 = e + e1(b) + Ch(b, c, d) + 0x550c7dc3 + W[11]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2; + t1 = d + e1(a) + Ch(a, b, c) + 0x72be5d74 + W[12]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2; + t1 = c + e1(h) + Ch(h, a, b) + 0x80deb1fe + W[13]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2; + t1 = b + e1(g) + Ch(g, h, a) + 0x9bdc06a7 + W[14]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2; + t1 = a + e1(f) + Ch(f, g, h) + 0xc19bf174 + W[15]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2; + + t1 = h + e1(e) + Ch(e, f, g) + 0xe49b69c1 + W[16]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2; + t1 = g + e1(d) + Ch(d, e, f) + 0xefbe4786 + W[17]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2; + t1 = f + e1(c) + Ch(c, d, e) + 0x0fc19dc6 + W[18]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2; + t1 = e + e1(b) + Ch(b, c, d) + 0x240ca1cc + W[19]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2; + t1 = d + e1(a) + Ch(a, b, c) + 0x2de92c6f + W[20]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2; + t1 = c + e1(h) + Ch(h, a, b) + 0x4a7484aa + W[21]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2; + t1 = b + e1(g) + Ch(g, h, a) + 0x5cb0a9dc + W[22]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2; + t1 = a + e1(f) + Ch(f, g, h) + 0x76f988da + W[23]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2; + + t1 = h + e1(e) + Ch(e, f, g) + 0x983e5152 + W[24]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2; + t1 = g + e1(d) + Ch(d, e, f) + 0xa831c66d + W[25]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2; + t1 = f + e1(c) + Ch(c, d, e) + 0xb00327c8 + W[26]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2; + t1 = e + e1(b) + Ch(b, c, d) + 0xbf597fc7 + W[27]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2; + t1 = d + e1(a) + Ch(a, b, c) + 0xc6e00bf3 + W[28]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2; + t1 = c + e1(h) + Ch(h, a, b) + 0xd5a79147 + W[29]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2; + t1 = b + e1(g) + Ch(g, h, a) + 0x06ca6351 + W[30]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2; + t1 = a + e1(f) + Ch(f, g, h) + 0x14292967 + W[31]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2; + + t1 = h + e1(e) + Ch(e, f, g) + 0x27b70a85 + W[32]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2; + t1 = g + e1(d) + Ch(d, e, f) + 0x2e1b2138 + W[33]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2; + t1 = f + e1(c) + Ch(c, d, e) + 0x4d2c6dfc + W[34]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2; + t1 = e + e1(b) + Ch(b, c, d) + 0x53380d13 + W[35]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2; + t1 = d + e1(a) + Ch(a, b, c) + 0x650a7354 + W[36]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2; + t1 = c + e1(h) + Ch(h, a, b) + 0x766a0abb + W[37]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2; + t1 = b + e1(g) + Ch(g, h, a) + 0x81c2c92e + W[38]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2; + t1 = a + e1(f) + Ch(f, g, h) + 0x92722c85 + W[39]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2; + + t1 = h + e1(e) + Ch(e, f, g) + 0xa2bfe8a1 + W[40]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2; + t1 = g + e1(d) + Ch(d, e, f) + 0xa81a664b + W[41]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2; + t1 = f + e1(c) + Ch(c, d, e) + 0xc24b8b70 + W[42]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2; + t1 = e + e1(b) + Ch(b, c, d) + 0xc76c51a3 + W[43]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2; + t1 = d + e1(a) + Ch(a, b, c) + 0xd192e819 + W[44]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2; + t1 = c + e1(h) + Ch(h, a, b) + 0xd6990624 + W[45]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2; + t1 = b + e1(g) + Ch(g, h, a) + 0xf40e3585 + W[46]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2; + t1 = a + e1(f) + Ch(f, g, h) + 0x106aa070 + W[47]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2; + + t1 = h + e1(e) + Ch(e, f, g) + 0x19a4c116 + W[48]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2; + t1 = g + e1(d) + Ch(d, e, f) + 0x1e376c08 + W[49]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2; + t1 = f + e1(c) + Ch(c, d, e) + 0x2748774c + W[50]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2; + t1 = e + e1(b) + Ch(b, c, d) + 0x34b0bcb5 + W[51]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2; + t1 = d + e1(a) + Ch(a, b, c) + 0x391c0cb3 + W[52]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2; + t1 = c + e1(h) + Ch(h, a, b) + 0x4ed8aa4a + W[53]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2; + t1 = b + e1(g) + Ch(g, h, a) + 0x5b9cca4f + W[54]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2; + t1 = a + e1(f) + Ch(f, g, h) + 0x682e6ff3 + W[55]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2; + + t1 = h + e1(e) + Ch(e, f, g) + 0x748f82ee + W[56]; + t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2; + t1 = g + e1(d) + Ch(d, e, f) + 0x78a5636f + W[57]; + t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2; + t1 = f + e1(c) + Ch(c, d, e) + 0x84c87814 + W[58]; + t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2; + t1 = e + e1(b) + Ch(b, c, d) + 0x8cc70208 + W[59]; + t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2; + t1 = d + e1(a) + Ch(a, b, c) + 0x90befffa + W[60]; + t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2; + t1 = c + e1(h) + Ch(h, a, b) + 0xa4506ceb + W[61]; + t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2; + t1 = b + e1(g) + Ch(g, h, a) + 0xbef9a3f7 + W[62]; + t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2; + t1 = a + e1(f) + Ch(f, g, h) + 0xc67178f2 + W[63]; + t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2; + + state[0] += a; state[1] += b; state[2] += c; state[3] += d; + state[4] += e; state[5] += f; state[6] += g; state[7] += h; + + /* clear any sensitive info... */ + a = b = c = d = e = f = g = h = t1 = t2 = 0; + memset(W, 0, 64 * sizeof(u32)); +} + +int sha256_init(struct sha256_state *sctx) +{ + sctx->state[0] = SHA256_H0; + sctx->state[1] = SHA256_H1; + sctx->state[2] = SHA256_H2; + sctx->state[3] = SHA256_H3; + sctx->state[4] = SHA256_H4; + sctx->state[5] = SHA256_H5; + sctx->state[6] = SHA256_H6; + sctx->state[7] = SHA256_H7; + sctx->count = 0; + + return 0; +} + +int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len) +{ + unsigned int partial, done; + const u8 *src; + + partial = sctx->count & 0x3f; + sctx->count += len; + done = 0; + src = data; + + if ((partial + len) > 63) { + if (partial) { + done = -partial; + memcpy(sctx->buf + partial, data, done + 64); + src = sctx->buf; + } + + do { + sha256_transform(sctx->state, src); + done += 64; + src = data + done; + } while (done + 63 < len); + + partial = 0; + } + memcpy(sctx->buf + partial, src, len - done); + + return 0; +} + +int sha256_final(struct sha256_state *sctx, u8 *out) +{ + __be32 *dst = (__be32 *)out; + __be64 bits; + unsigned int index, pad_len; + int i; + static const u8 padding[64] = { 0x80, }; + + /* Save number of bits */ + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64. */ + index = sctx->count & 0x3f; + pad_len = (index < 56) ? (56 - index) : ((64+56) - index); + sha256_update(sctx, padding, pad_len); + + /* Append length (before padding) */ + sha256_update(sctx, (const u8 *)&bits, sizeof(bits)); + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Zeroize sensitive information. */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 47aeb04c1997..04b68d9dffac 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -593,9 +593,8 @@ found: } /* - * Allocates bounce buffer and returns its kernel virtual address. + * Allocates bounce buffer and returns its physical address. */ - static phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -614,7 +613,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size, } /* - * dma_addr is the kernel virtual address of the bounce buffer to unmap. + * tlb_addr is the physical address of the bounce buffer to unmap. */ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir, @@ -692,7 +691,6 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, } } -#ifdef CONFIG_DMA_DIRECT_OPS static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr, size_t size) { @@ -714,12 +712,12 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, phys_addr = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start), - 0, size, DMA_FROM_DEVICE, 0); + 0, size, DMA_FROM_DEVICE, attrs); if (phys_addr == SWIOTLB_MAP_ERROR) goto out_warn; *dma_handle = __phys_to_dma(dev, phys_addr); - if (dma_coherent_ok(dev, *dma_handle, size)) + if (!dma_coherent_ok(dev, *dma_handle, size)) goto out_unmap; memset(phys_to_virt(phys_addr), 0, size); @@ -727,7 +725,7 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, out_unmap: dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", - (unsigned long long)(dev ? dev->coherent_dma_mask : 0), + (unsigned long long)dev->coherent_dma_mask, (unsigned long long)*dma_handle); /* @@ -737,7 +735,7 @@ out_unmap: swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); out_warn: - if ((attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) { + if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) { dev_warn(dev, "swiotlb: coherent allocation failed, size=%zu\n", size); @@ -764,7 +762,6 @@ static bool swiotlb_free_buffer(struct device *dev, size_t size, DMA_ATTR_SKIP_CPU_SYNC); return true; } -#endif static void swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, @@ -1045,7 +1042,6 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask) return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask; } -#ifdef CONFIG_DMA_DIRECT_OPS void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { @@ -1087,6 +1083,5 @@ const struct dma_map_ops swiotlb_dma_ops = { .unmap_sg = swiotlb_unmap_sg_attrs, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, - .dma_supported = swiotlb_dma_supported, + .dma_supported = dma_direct_supported, }; -#endif /* CONFIG_DMA_DIRECT_OPS */ diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index b3f235baa05d..6cd7d0740005 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -255,6 +255,10 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = { {-EINVAL, "-1", NULL, 8, 0}, {-EINVAL, "-0", NULL, 8, 0}, {-EINVAL, "10-1", NULL, 8, 0}, + {-EINVAL, "0-31:", NULL, 8, 0}, + {-EINVAL, "0-31:0", NULL, 8, 0}, + {-EINVAL, "0-31:0/0", NULL, 8, 0}, + {-EINVAL, "0-31:1/0", NULL, 8, 0}, {-EINVAL, "0-31:10/1", NULL, 8, 0}, }; @@ -292,15 +296,17 @@ static void __init test_bitmap_parselist(void) } } +#define EXP_BYTES (sizeof(exp) * 8) + static void __init test_bitmap_arr32(void) { - unsigned int nbits, next_bit, len = sizeof(exp) * 8; + unsigned int nbits, next_bit; u32 arr[sizeof(exp) / 4]; - DECLARE_BITMAP(bmap2, len); + DECLARE_BITMAP(bmap2, EXP_BYTES); memset(arr, 0xa5, sizeof(arr)); - for (nbits = 0; nbits < len; ++nbits) { + for (nbits = 0; nbits < EXP_BYTES; ++nbits) { bitmap_to_arr32(arr, exp, nbits); bitmap_from_arr32(bmap2, arr, nbits); expect_eq_bitmap(bmap2, exp, nbits); @@ -312,7 +318,7 @@ static void __init test_bitmap_arr32(void) " tail is not safely cleared: %d\n", nbits, next_bit); - if (nbits < len - 32) + if (nbits < EXP_BYTES - 32) expect_eq_uint(arr[DIV_ROUND_UP(nbits, 32)], 0xa5a5a5a5); } @@ -325,23 +331,32 @@ static void noinline __init test_mem_optimisations(void) unsigned int start, nbits; for (start = 0; start < 1024; start += 8) { - memset(bmap1, 0x5a, sizeof(bmap1)); - memset(bmap2, 0x5a, sizeof(bmap2)); for (nbits = 0; nbits < 1024 - start; nbits += 8) { + memset(bmap1, 0x5a, sizeof(bmap1)); + memset(bmap2, 0x5a, sizeof(bmap2)); + bitmap_set(bmap1, start, nbits); __bitmap_set(bmap2, start, nbits); - if (!bitmap_equal(bmap1, bmap2, 1024)) + if (!bitmap_equal(bmap1, bmap2, 1024)) { printk("set not equal %d %d\n", start, nbits); - if (!__bitmap_equal(bmap1, bmap2, 1024)) + failed_tests++; + } + if (!__bitmap_equal(bmap1, bmap2, 1024)) { printk("set not __equal %d %d\n", start, nbits); + failed_tests++; + } bitmap_clear(bmap1, start, nbits); __bitmap_clear(bmap2, start, nbits); - if (!bitmap_equal(bmap1, bmap2, 1024)) + if (!bitmap_equal(bmap1, bmap2, 1024)) { printk("clear not equal %d %d\n", start, nbits); - if (!__bitmap_equal(bmap1, bmap2, 1024)) + failed_tests++; + } + if (!__bitmap_equal(bmap1, bmap2, 1024)) { printk("clear not __equal %d %d\n", start, nbits); + failed_tests++; + } } } } diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 078a61480573..cee000ac54d8 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -21,6 +21,7 @@ #include <linux/uaccess.h> #include <linux/delay.h> #include <linux/kthread.h> +#include <linux/vmalloc.h> #define TEST_FIRMWARE_NAME "test-firmware.bin" #define TEST_FIRMWARE_NUM_REQS 4 diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 98854a64b014..ec657105edbf 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -567,7 +567,15 @@ static noinline void __init kmem_cache_invalid_free(void) return; } + /* Trigger invalid free, the object doesn't get freed */ kmem_cache_free(cache, p + 1); + + /* + * Properly free the object to prevent the "Objects remaining in + * test_cache on __kmem_cache_shutdown" BUG failure. + */ + kmem_cache_free(cache, p); + kmem_cache_destroy(cache); } diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c new file mode 100644 index 000000000000..280f4979d00e --- /dev/null +++ b/lib/test_ubsan.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> + +typedef void(*test_ubsan_fp)(void); + +static void test_ubsan_add_overflow(void) +{ + volatile int val = INT_MAX; + + val += 2; +} + +static void test_ubsan_sub_overflow(void) +{ + volatile int val = INT_MIN; + volatile int val2 = 2; + + val -= val2; +} + +static void test_ubsan_mul_overflow(void) +{ + volatile int val = INT_MAX / 2; + + val *= 3; +} + +static void test_ubsan_negate_overflow(void) +{ + volatile int val = INT_MIN; + + val = -val; +} + +static void test_ubsan_divrem_overflow(void) +{ + volatile int val = 16; + volatile int val2 = 0; + + val /= val2; +} + +static void test_ubsan_vla_bound_not_positive(void) +{ + volatile int size = -1; + char buf[size]; + + (void)buf; +} + +static void test_ubsan_shift_out_of_bounds(void) +{ + volatile int val = -1; + int val2 = 10; + + val2 <<= val; +} + +static void test_ubsan_out_of_bounds(void) +{ + volatile int i = 4, j = 5; + volatile int arr[i]; + + arr[j] = i; +} + +static void test_ubsan_load_invalid_value(void) +{ + volatile char *dst, *src; + bool val, val2, *ptr; + char c = 4; + + dst = (char *)&val; + src = &c; + *dst = *src; + + ptr = &val2; + val2 = val; +} + +static void test_ubsan_null_ptr_deref(void) +{ + volatile int *ptr = NULL; + int val; + + val = *ptr; +} + +static void test_ubsan_misaligned_access(void) +{ + volatile char arr[5] __aligned(4) = {1, 2, 3, 4, 5}; + volatile int *ptr, val = 6; + + ptr = (int *)(arr + 1); + *ptr = val; +} + +static void test_ubsan_object_size_mismatch(void) +{ + /* "((aligned(8)))" helps this not into be misaligned for ptr-access. */ + volatile int val __aligned(8) = 4; + volatile long long *ptr, val2; + + ptr = (long long *)&val; + val2 = *ptr; +} + +static const test_ubsan_fp test_ubsan_array[] = { + test_ubsan_add_overflow, + test_ubsan_sub_overflow, + test_ubsan_mul_overflow, + test_ubsan_negate_overflow, + test_ubsan_divrem_overflow, + test_ubsan_vla_bound_not_positive, + test_ubsan_shift_out_of_bounds, + test_ubsan_out_of_bounds, + test_ubsan_load_invalid_value, + //test_ubsan_null_ptr_deref, /* exclude it because there is a crash */ + test_ubsan_misaligned_access, + test_ubsan_object_size_mismatch, +}; + +static int __init test_ubsan_init(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(test_ubsan_array); i++) + test_ubsan_array[i](); + + (void)test_ubsan_null_ptr_deref; /* to avoid unsed-function warning */ + return 0; +} +module_init(test_ubsan_init); + +static void __exit test_ubsan_exit(void) +{ + /* do nothing */ +} +module_exit(test_ubsan_exit); + +MODULE_AUTHOR("Jinbum Park <jinb.park7@gmail.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/lib/textsearch.c b/lib/textsearch.c index 0b79908dfe89..5939549c0e7b 100644 --- a/lib/textsearch.c +++ b/lib/textsearch.c @@ -10,7 +10,10 @@ * Pablo Neira Ayuso <pablo@netfilter.org> * * ========================================================================== - * + */ + +/** + * DOC: ts_intro * INTRODUCTION * * The textsearch infrastructure provides text searching facilities for @@ -19,7 +22,9 @@ * * ARCHITECTURE * - * User + * .. code-block:: none + * + * User * +----------------+ * | finish()|<--------------(6)-----------------+ * |get_next_block()|<--------------(5)---------------+ | @@ -33,21 +38,21 @@ * | (3)|----->| find()/next() |-----------+ | * | (7)|----->| destroy() |----------------------+ * +----------------+ +---------------+ - * - * (1) User configures a search by calling _prepare() specifying the - * search parameters such as the pattern and algorithm name. + * + * (1) User configures a search by calling textsearch_prepare() specifying + * the search parameters such as the pattern and algorithm name. * (2) Core requests the algorithm to allocate and initialize a search * configuration according to the specified parameters. - * (3) User starts the search(es) by calling _find() or _next() to - * fetch subsequent occurrences. A state variable is provided - * to the algorithm to store persistent variables. + * (3) User starts the search(es) by calling textsearch_find() or + * textsearch_next() to fetch subsequent occurrences. A state variable + * is provided to the algorithm to store persistent variables. * (4) Core eventually resets the search offset and forwards the find() * request to the algorithm. * (5) Algorithm calls get_next_block() provided by the user continuously * to fetch the data to be searched in block by block. * (6) Algorithm invokes finish() after the last call to get_next_block * to clean up any leftovers from get_next_block. (Optional) - * (7) User destroys the configuration by calling _destroy(). + * (7) User destroys the configuration by calling textsearch_destroy(). * (8) Core notifies the algorithm to destroy algorithm specific * allocations. (Optional) * @@ -62,9 +67,10 @@ * amount of times and even in parallel as long as a separate struct * ts_state variable is provided to every instance. * - * The actual search is performed by either calling textsearch_find_- - * continuous() for linear data or by providing an own get_next_block() - * implementation and calling textsearch_find(). Both functions return + * The actual search is performed by either calling + * textsearch_find_continuous() for linear data or by providing + * an own get_next_block() implementation and + * calling textsearch_find(). Both functions return * the position of the first occurrence of the pattern or UINT_MAX if * no match was found. Subsequent occurrences can be found by calling * textsearch_next() regardless of the linearity of the data. @@ -72,7 +78,7 @@ * Once you're done using a configuration it must be given back via * textsearch_destroy. * - * EXAMPLE + * EXAMPLE:: * * int pos; * struct ts_config *conf; @@ -87,13 +93,13 @@ * goto errout; * } * - * pos = textsearch_find_continuous(conf, &state, example, strlen(example)); + * pos = textsearch_find_continuous(conf, \&state, example, strlen(example)); * if (pos != UINT_MAX) - * panic("Oh my god, dancing chickens at %d\n", pos); + * panic("Oh my god, dancing chickens at \%d\n", pos); * * textsearch_destroy(conf); - * ========================================================================== */ +/* ========================================================================== */ #include <linux/module.h> #include <linux/types.h> @@ -225,7 +231,7 @@ static unsigned int get_linear_data(unsigned int consumed, const u8 **dst, * * Returns the position of first occurrence of the pattern or * %UINT_MAX if no occurrence was found. - */ + */ unsigned int textsearch_find_continuous(struct ts_config *conf, struct ts_state *state, const void *data, unsigned int len) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 8999202ad43b..a48aaa79d352 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -336,7 +336,7 @@ char *put_dec(char *buf, unsigned long long n) * * If speed is not important, use snprintf(). It's easy to read the code. */ -int num_to_str(char *buf, int size, unsigned long long num) +int num_to_str(char *buf, int size, unsigned long long num, unsigned int width) { /* put_dec requires 2-byte alignment of the buffer. */ char tmp[sizeof(num) * 3] __aligned(2); @@ -350,11 +350,21 @@ int num_to_str(char *buf, int size, unsigned long long num) len = put_dec(tmp, num) - tmp; } - if (len > size) + if (len > size || width > size) return 0; + + if (width > len) { + width = width - len; + for (idx = 0; idx < width; idx++) + buf[idx] = ' '; + } else { + width = 0; + } + for (idx = 0; idx < len; ++idx) - buf[idx] = tmp[len - idx - 1]; - return len; + buf[idx + width] = tmp[len - idx - 1]; + + return len + width; } #define SIGN 1 /* unsigned/signed, must be 1 */ @@ -1641,19 +1651,22 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, return widen_string(buf, buf - buf_start, end, spec); } -static bool have_filled_random_ptr_key __read_mostly; +static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key); static siphash_key_t ptr_key __read_mostly; -static void fill_random_ptr_key(struct random_ready_callback *unused) +static void enable_ptr_key_workfn(struct work_struct *work) { get_random_bytes(&ptr_key, sizeof(ptr_key)); - /* - * have_filled_random_ptr_key==true is dependent on get_random_bytes(). - * ptr_to_id() needs to see have_filled_random_ptr_key==true - * after get_random_bytes() returns. - */ - smp_mb(); - WRITE_ONCE(have_filled_random_ptr_key, true); + /* Needs to run from preemptible context */ + static_branch_disable(¬_filled_random_ptr_key); +} + +static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn); + +static void fill_random_ptr_key(struct random_ready_callback *unused) +{ + /* This may be in an interrupt handler. */ + queue_work(system_unbound_wq, &enable_ptr_key_work); } static struct random_ready_callback random_ready = { @@ -1667,7 +1680,8 @@ static int __init initialize_ptr_random(void) if (!ret) { return 0; } else if (ret == -EALREADY) { - fill_random_ptr_key(&random_ready); + /* This is in preemptible context */ + enable_ptr_key_workfn(&enable_ptr_key_work); return 0; } @@ -1681,7 +1695,7 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec) const char *str = sizeof(ptr) == 8 ? "(____ptrval____)" : "(ptrval)"; unsigned long hashval; - if (unlikely(!have_filled_random_ptr_key)) { + if (static_branch_unlikely(¬_filled_random_ptr_key)) { spec.field_width = 2 * sizeof(ptr); /* string length must be less than default_width */ return string(buf, end, str, spec); @@ -2562,6 +2576,8 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) case 's': case 'F': case 'f': + case 'x': + case 'K': save_arg(void *); break; default: @@ -2736,6 +2752,8 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) case 's': case 'F': case 'f': + case 'x': + case 'K': process = true; break; default: |