summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2024-10-30 20:13:46 +0100
committerAlexei Starovoitov <ast@kernel.org>2024-10-30 20:13:55 +0100
commit053b212b3a356e47fe7772fbf19e07721393ba72 (patch)
treefbf780dd9c3ca0a1197f6304c2a5a838c3f4ee40
parentbpf: disallow 40-bytes extra stack for bpf_fastcall patterns (diff)
parentselftests/bpf: Add three test cases for bits_iter (diff)
downloadlinux-053b212b3a356e47fe7772fbf19e07721393ba72.tar.xz
linux-053b212b3a356e47fe7772fbf19e07721393ba72.zip
Merge branch 'fixes-for-bits-iterator'
Hou Tao says: ==================== The patch set fixes several issues in bits iterator. Patch #1 fixes the kmemleak problem of bits iterator. Patch #2~#3 fix the overflow problem of nr_bits. Patch #4 fixes the potential stack corruption when bits iterator is used on 32-bit host. Patch #5 adds more test cases for bits iterator. Please see the individual patches for more details. And comments are always welcome. --- v4: * patch #1: add ack from Yafang * patch #3: revert code-churn like changes: (1) compute nr_bytes and nr_bits before the check of nr_words. (2) use nr_bits == 64 to check for single u64, preventing build warning on 32-bit hosts. * patch #4: use "BITS_PER_LONG == 32" instead of "!defined(CONFIG_64BIT)" v3: https://lore.kernel.org/bpf/20241025013233.804027-1-houtao@huaweicloud.com/T/#t * split the bits-iterator related patches from "Misc fixes for bpf" patch set * patch #1: use "!nr_bits || bits >= nr_bits" to stop the iteration * patch #2: add a new helper for the overflow problem * patch #3: decrease the limitation from 512 to 511 and check whether nr_bytes is too large for bpf memory allocator explicitly * patch #5: add two more test cases for bit iterator v2: http://lore.kernel.org/bpf/d49fa2f4-f743-c763-7579-c3cab4dd88cb@huaweicloud.com ==================== Link: https://lore.kernel.org/r/20241030100516.3633640-1-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/linux/bpf_mem_alloc.h3
-rw-r--r--kernel/bpf/helpers.c54
-rw-r--r--kernel/bpf/memalloc.c14
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bits_iter.c61
4 files changed, 118 insertions, 14 deletions
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index aaf004d94322..e45162ef59bb 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -33,6 +33,9 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg
int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size);
void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
+/* Check the allocation size for kmalloc equivalent allocator */
+int bpf_mem_alloc_check_size(bool percpu, size_t size);
+
/* kmalloc/kfree equivalent: */
void *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size);
void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index ca3f0a2e5ed5..3d45ebe8afb4 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2851,21 +2851,47 @@ struct bpf_iter_bits {
__u64 __opaque[2];
} __aligned(8);
+#define BITS_ITER_NR_WORDS_MAX 511
+
struct bpf_iter_bits_kern {
union {
- unsigned long *bits;
- unsigned long bits_copy;
+ __u64 *bits;
+ __u64 bits_copy;
};
- u32 nr_bits;
+ int nr_bits;
int bit;
} __aligned(8);
+/* On 64-bit hosts, unsigned long and u64 have the same size, so passing
+ * a u64 pointer and an unsigned long pointer to find_next_bit() will
+ * return the same result, as both point to the same 8-byte area.
+ *
+ * For 32-bit little-endian hosts, using a u64 pointer or unsigned long
+ * pointer also makes no difference. This is because the first iterated
+ * unsigned long is composed of bits 0-31 of the u64 and the second unsigned
+ * long is composed of bits 32-63 of the u64.
+ *
+ * However, for 32-bit big-endian hosts, this is not the case. The first
+ * iterated unsigned long will be bits 32-63 of the u64, so swap these two
+ * ulong values within the u64.
+ */
+static void swap_ulong_in_u64(u64 *bits, unsigned int nr)
+{
+#if (BITS_PER_LONG == 32) && defined(__BIG_ENDIAN)
+ unsigned int i;
+
+ for (i = 0; i < nr; i++)
+ bits[i] = (bits[i] >> 32) | ((u64)(u32)bits[i] << 32);
+#endif
+}
+
/**
* bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area
* @it: The new bpf_iter_bits to be created
* @unsafe_ptr__ign: A pointer pointing to a memory area to be iterated over
* @nr_words: The size of the specified memory area, measured in 8-byte units.
- * Due to the limitation of memalloc, it can't be greater than 512.
+ * The maximum value of @nr_words is @BITS_ITER_NR_WORDS_MAX. This limit may be
+ * further reduced by the BPF memory allocator implementation.
*
* This function initializes a new bpf_iter_bits structure for iterating over
* a memory area which is specified by the @unsafe_ptr__ign and @nr_words. It
@@ -2892,6 +2918,8 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
if (!unsafe_ptr__ign || !nr_words)
return -EINVAL;
+ if (nr_words > BITS_ITER_NR_WORDS_MAX)
+ return -E2BIG;
/* Optimization for u64 mask */
if (nr_bits == 64) {
@@ -2899,10 +2927,15 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
if (err)
return -EFAULT;
+ swap_ulong_in_u64(&kit->bits_copy, nr_words);
+
kit->nr_bits = nr_bits;
return 0;
}
+ if (bpf_mem_alloc_check_size(false, nr_bytes))
+ return -E2BIG;
+
/* Fallback to memalloc */
kit->bits = bpf_mem_alloc(&bpf_global_ma, nr_bytes);
if (!kit->bits)
@@ -2914,6 +2947,8 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
return err;
}
+ swap_ulong_in_u64(kit->bits, nr_words);
+
kit->nr_bits = nr_bits;
return 0;
}
@@ -2930,17 +2965,16 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
__bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it)
{
struct bpf_iter_bits_kern *kit = (void *)it;
- u32 nr_bits = kit->nr_bits;
- const unsigned long *bits;
- int bit;
+ int bit = kit->bit, nr_bits = kit->nr_bits;
+ const void *bits;
- if (nr_bits == 0)
+ if (!nr_bits || bit >= nr_bits)
return NULL;
bits = nr_bits == 64 ? &kit->bits_copy : kit->bits;
- bit = find_next_bit(bits, nr_bits, kit->bit + 1);
+ bit = find_next_bit(bits, nr_bits, bit + 1);
if (bit >= nr_bits) {
- kit->nr_bits = 0;
+ kit->bit = bit;
return NULL;
}
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index b3858a76e0b3..146f5b57cfb1 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -35,6 +35,8 @@
*/
#define LLIST_NODE_SZ sizeof(struct llist_node)
+#define BPF_MEM_ALLOC_SIZE_MAX 4096
+
/* similar to kmalloc, but sizeof == 8 bucket is gone */
static u8 size_index[24] __ro_after_init = {
3, /* 8 */
@@ -65,7 +67,7 @@ static u8 size_index[24] __ro_after_init = {
static int bpf_mem_cache_idx(size_t size)
{
- if (!size || size > 4096)
+ if (!size || size > BPF_MEM_ALLOC_SIZE_MAX)
return -1;
if (size <= 192)
@@ -1005,3 +1007,13 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
return !ret ? NULL : ret + LLIST_NODE_SZ;
}
+
+int bpf_mem_alloc_check_size(bool percpu, size_t size)
+{
+ /* The size of percpu allocation doesn't have LLIST_NODE_SZ overhead */
+ if ((percpu && size > BPF_MEM_ALLOC_SIZE_MAX) ||
+ (!percpu && size > BPF_MEM_ALLOC_SIZE_MAX - LLIST_NODE_SZ))
+ return -E2BIG;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c
index f4da4d508ddb..156cc278e2fc 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c
@@ -15,6 +15,8 @@ int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign,
int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym __weak;
void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym __weak;
+u64 bits_array[511] = {};
+
SEC("iter.s/cgroup")
__description("bits iter without destroy")
__failure __msg("Unreleased reference")
@@ -110,16 +112,16 @@ int bit_index(void)
}
SEC("syscall")
-__description("bits nomem")
+__description("bits too big")
__success __retval(0)
-int bits_nomem(void)
+int bits_too_big(void)
{
u64 data[4];
int nr = 0;
int *bit;
__builtin_memset(&data, 0xff, sizeof(data));
- bpf_for_each(bits, bit, &data[0], 513) /* Be greater than 512 */
+ bpf_for_each(bits, bit, &data[0], 512) /* Be greater than 511 */
nr++;
return nr;
}
@@ -151,3 +153,56 @@ int zero_words(void)
nr++;
return nr;
}
+
+SEC("syscall")
+__description("huge words")
+__success __retval(0)
+int huge_words(void)
+{
+ u64 data[8] = {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1};
+ int nr = 0;
+ int *bit;
+
+ bpf_for_each(bits, bit, &data[0], 67108865)
+ nr++;
+ return nr;
+}
+
+SEC("syscall")
+__description("max words")
+__success __retval(4)
+int max_words(void)
+{
+ volatile int nr = 0;
+ int *bit;
+
+ bits_array[0] = (1ULL << 63) | 1U;
+ bits_array[510] = (1ULL << 33) | (1ULL << 32);
+
+ bpf_for_each(bits, bit, bits_array, 511) {
+ if (nr == 0 && *bit != 0)
+ break;
+ if (nr == 2 && *bit != 32672)
+ break;
+ nr++;
+ }
+ return nr;
+}
+
+SEC("syscall")
+__description("bad words")
+__success __retval(0)
+int bad_words(void)
+{
+ void *bad_addr = (void *)(3UL << 30);
+ int nr = 0;
+ int *bit;
+
+ bpf_for_each(bits, bit, bad_addr, 1)
+ nr++;
+
+ bpf_for_each(bits, bit, bad_addr, 4)
+ nr++;
+
+ return nr;
+}