summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig7
-rw-r--r--lib/Kconfig.debug33
-rw-r--r--lib/Makefile3
-rw-r--r--lib/asn1_decoder.c19
-rw-r--r--lib/iov_iter.c123
-rw-r--r--lib/mpi/mpicoder.c122
-rw-r--r--lib/nlattr.c103
-rw-r--r--lib/proportions.c407
-rw-r--r--lib/rhashtable.c6
-rw-r--r--lib/sg_pool.c172
-rw-r--r--lib/test_bpf.c5
-rw-r--r--lib/test_rhashtable.c2
12 files changed, 436 insertions, 566 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 3cca1222578e..61d55bd0ed89 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -523,6 +523,13 @@ config SG_SPLIT
a scatterlist. This should be selected by a driver or an API which
whishes to split a scatterlist amongst multiple DMA channels.
+config SG_POOL
+ def_bool n
+ help
+ Provides a helper to allocate chained scatterlists. This should be
+ selected by a driver or an API which whishes to allocate chained
+ scatterlist.
+
#
# sg chaining option
#
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1e9a607534ca..f4b797a690ba 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1289,6 +1289,39 @@ config TORTURE_TEST
tristate
default n
+config RCU_PERF_TEST
+ tristate "performance tests for RCU"
+ depends on DEBUG_KERNEL
+ select TORTURE_TEST
+ select SRCU
+ select TASKS_RCU
+ default n
+ help
+ This option provides a kernel module that runs performance
+ tests on the RCU infrastructure. The kernel module may be built
+ after the fact on the running kernel to be tested, if desired.
+
+ Say Y here if you want RCU performance tests to be built into
+ the kernel.
+ Say M if you want the RCU performance tests to build as a module.
+ Say N if you are unsure.
+
+config RCU_PERF_TEST_RUNNABLE
+ bool "performance tests for RCU runnable by default"
+ depends on RCU_PERF_TEST = y
+ default n
+ help
+ This option provides a way to build the RCU performance tests
+ directly into the kernel without them starting up at boot time.
+ You can use /sys/module to manually override this setting.
+ This /proc file is available only when the RCU performance
+ tests have been built into the kernel.
+
+ Say Y here if you want the RCU performance tests to start during
+ boot (you probably don't).
+ Say N here if you want the RCU performance tests to start only
+ after being manually enabled via /sys/module.
+
config RCU_TORTURE_TEST
tristate "torture tests for RCU"
depends on DEBUG_KERNEL
diff --git a/lib/Makefile b/lib/Makefile
index 7bd6fd436c97..931396ada5eb 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -23,7 +23,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
rbtree.o radix-tree.o dump_stack.o timerqueue.o\
idr.o int_sqrt.o extable.o \
sha1.o md5.o irq_regs.o argv_split.o \
- proportions.o flex_proportions.o ratelimit.o show_mem.o \
+ flex_proportions.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
earlycpio.o seq_buf.o nmi_backtrace.o
@@ -178,6 +178,7 @@ obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o
obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o
obj-$(CONFIG_SG_SPLIT) += sg_split.o
+obj-$(CONFIG_SG_POOL) += sg_pool.o
obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
obj-$(CONFIG_IRQ_POLL) += irq_poll.o
diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
index 2b3f46c049d4..0bd8a611eb83 100644
--- a/lib/asn1_decoder.c
+++ b/lib/asn1_decoder.c
@@ -12,6 +12,7 @@
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/errno.h>
+#include <linux/module.h>
#include <linux/asn1_decoder.h>
#include <linux/asn1_ber_bytecode.h>
@@ -74,7 +75,7 @@ next_tag:
/* Extract a tag from the data */
tag = data[dp++];
- if (tag == 0) {
+ if (tag == ASN1_EOC) {
/* It appears to be an EOC. */
if (data[dp++] != 0)
goto invalid_eoc;
@@ -96,10 +97,8 @@ next_tag:
/* Extract the length */
len = data[dp++];
- if (len <= 0x7f) {
- dp += len;
- goto next_tag;
- }
+ if (len <= 0x7f)
+ goto check_length;
if (unlikely(len == ASN1_INDEFINITE_LENGTH)) {
/* Indefinite length */
@@ -110,14 +109,18 @@ next_tag:
}
n = len - 0x80;
- if (unlikely(n > sizeof(size_t) - 1))
+ if (unlikely(n > sizeof(len) - 1))
goto length_too_long;
if (unlikely(n > datalen - dp))
goto data_overrun_error;
- for (len = 0; n > 0; n--) {
+ len = 0;
+ for (; n > 0; n--) {
len <<= 8;
len |= data[dp++];
}
+check_length:
+ if (len > datalen - dp)
+ goto data_overrun_error;
dp += len;
goto next_tag;
@@ -504,3 +507,5 @@ error:
return -EBADMSG;
}
EXPORT_SYMBOL_GPL(asn1_ber_decoder);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5fecddc32b1b..28cb4315fe57 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -99,40 +99,44 @@
}
#define iterate_and_advance(i, n, v, I, B, K) { \
- size_t skip = i->iov_offset; \
- if (unlikely(i->type & ITER_BVEC)) { \
- const struct bio_vec *bvec; \
- struct bio_vec v; \
- iterate_bvec(i, n, v, bvec, skip, (B)) \
- if (skip == bvec->bv_len) { \
- bvec++; \
- skip = 0; \
- } \
- i->nr_segs -= bvec - i->bvec; \
- i->bvec = bvec; \
- } else if (unlikely(i->type & ITER_KVEC)) { \
- const struct kvec *kvec; \
- struct kvec v; \
- iterate_kvec(i, n, v, kvec, skip, (K)) \
- if (skip == kvec->iov_len) { \
- kvec++; \
- skip = 0; \
- } \
- i->nr_segs -= kvec - i->kvec; \
- i->kvec = kvec; \
- } else { \
- const struct iovec *iov; \
- struct iovec v; \
- iterate_iovec(i, n, v, iov, skip, (I)) \
- if (skip == iov->iov_len) { \
- iov++; \
- skip = 0; \
+ if (unlikely(i->count < n)) \
+ n = i->count; \
+ if (n) { \
+ size_t skip = i->iov_offset; \
+ if (unlikely(i->type & ITER_BVEC)) { \
+ const struct bio_vec *bvec; \
+ struct bio_vec v; \
+ iterate_bvec(i, n, v, bvec, skip, (B)) \
+ if (skip == bvec->bv_len) { \
+ bvec++; \
+ skip = 0; \
+ } \
+ i->nr_segs -= bvec - i->bvec; \
+ i->bvec = bvec; \
+ } else if (unlikely(i->type & ITER_KVEC)) { \
+ const struct kvec *kvec; \
+ struct kvec v; \
+ iterate_kvec(i, n, v, kvec, skip, (K)) \
+ if (skip == kvec->iov_len) { \
+ kvec++; \
+ skip = 0; \
+ } \
+ i->nr_segs -= kvec - i->kvec; \
+ i->kvec = kvec; \
+ } else { \
+ const struct iovec *iov; \
+ struct iovec v; \
+ iterate_iovec(i, n, v, iov, skip, (I)) \
+ if (skip == iov->iov_len) { \
+ iov++; \
+ skip = 0; \
+ } \
+ i->nr_segs -= iov - i->iov; \
+ i->iov = iov; \
} \
- i->nr_segs -= iov - i->iov; \
- i->iov = iov; \
+ i->count -= n; \
+ i->iov_offset = skip; \
} \
- i->count -= n; \
- i->iov_offset = skip; \
}
static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
@@ -386,12 +390,6 @@ static void memzero_page(struct page *page, size_t offset, size_t len)
size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
const char *from = addr;
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
iterate_and_advance(i, bytes, v,
__copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
v.iov_len),
@@ -407,12 +405,6 @@ EXPORT_SYMBOL(copy_to_iter);
size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
char *to = addr;
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
iterate_and_advance(i, bytes, v,
__copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
v.iov_len),
@@ -428,12 +420,6 @@ EXPORT_SYMBOL(copy_from_iter);
size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
{
char *to = addr;
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
iterate_and_advance(i, bytes, v,
__copy_from_user_nocache((to += v.iov_len) - v.iov_len,
v.iov_base, v.iov_len),
@@ -474,12 +460,6 @@ EXPORT_SYMBOL(copy_page_from_iter);
size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
{
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
iterate_and_advance(i, bytes, v,
__clear_user(v.iov_base, v.iov_len),
memzero_page(v.bv_page, v.bv_offset, v.bv_len),
@@ -569,6 +549,25 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
}
EXPORT_SYMBOL(iov_iter_alignment);
+unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
+{
+ unsigned long res = 0;
+ size_t size = i->count;
+ if (!size)
+ return 0;
+
+ iterate_all_kinds(i, size, v,
+ (res |= (!res ? 0 : (unsigned long)v.iov_base) |
+ (size != v.iov_len ? size : 0), 0),
+ (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
+ (size != v.bv_len ? size : 0)),
+ (res |= (!res ? 0 : (unsigned long)v.iov_base) |
+ (size != v.iov_len ? size : 0))
+ );
+ return res;
+}
+EXPORT_SYMBOL(iov_iter_gap_alignment);
+
ssize_t iov_iter_get_pages(struct iov_iter *i,
struct page **pages, size_t maxsize, unsigned maxpages,
size_t *start)
@@ -666,12 +665,6 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
char *to = addr;
__wsum sum, next;
size_t off = 0;
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
sum = *csum;
iterate_and_advance(i, bytes, v, ({
int err = 0;
@@ -710,12 +703,6 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
const char *from = addr;
__wsum sum, next;
size_t off = 0;
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
sum = *csum;
iterate_and_advance(i, bytes, v, ({
int err = 0;
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index eb15e7dc7b65..747606f9e4a3 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -20,6 +20,8 @@
#include <linux/bitops.h>
#include <linux/count_zeros.h>
+#include <linux/byteorder/generic.h>
+#include <linux/string.h>
#include "mpi-internal.h"
#define MAX_EXTERN_MPI_BITS 16384
@@ -163,7 +165,13 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
int *sign)
{
uint8_t *p;
- mpi_limb_t alimb;
+#if BYTES_PER_MPI_LIMB == 4
+ __be32 alimb;
+#elif BYTES_PER_MPI_LIMB == 8
+ __be64 alimb;
+#else
+#error please implement for this limb size.
+#endif
unsigned int n = mpi_get_size(a);
int i, lzeros;
@@ -183,38 +191,19 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
p = buf;
*nbytes = n - lzeros;
- for (i = a->nlimbs - 1; i >= 0; i--) {
- alimb = a->d[i];
+ for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB,
+ lzeros %= BYTES_PER_MPI_LIMB;
+ i >= 0; i--) {
#if BYTES_PER_MPI_LIMB == 4
- *p++ = alimb >> 24;
- *p++ = alimb >> 16;
- *p++ = alimb >> 8;
- *p++ = alimb;
+ alimb = cpu_to_be32(a->d[i]);
#elif BYTES_PER_MPI_LIMB == 8
- *p++ = alimb >> 56;
- *p++ = alimb >> 48;
- *p++ = alimb >> 40;
- *p++ = alimb >> 32;
- *p++ = alimb >> 24;
- *p++ = alimb >> 16;
- *p++ = alimb >> 8;
- *p++ = alimb;
+ alimb = cpu_to_be64(a->d[i]);
#else
#error please implement for this limb size.
#endif
-
- if (lzeros > 0) {
- if (lzeros >= sizeof(alimb)) {
- p -= sizeof(alimb);
- } else {
- mpi_limb_t *limb1 = (void *)p - sizeof(alimb);
- mpi_limb_t *limb2 = (void *)p - sizeof(alimb)
- + lzeros;
- *limb1 = *limb2;
- p -= lzeros;
- }
- lzeros -= sizeof(alimb);
- }
+ memcpy(p, (u8 *)&alimb + lzeros, BYTES_PER_MPI_LIMB - lzeros);
+ p += BYTES_PER_MPI_LIMB - lzeros;
+ lzeros = 0;
}
return 0;
}
@@ -359,7 +348,13 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
int *sign)
{
u8 *p, *p2;
- mpi_limb_t alimb, alimb2;
+#if BYTES_PER_MPI_LIMB == 4
+ __be32 alimb;
+#elif BYTES_PER_MPI_LIMB == 8
+ __be64 alimb;
+#else
+#error please implement for this limb size.
+#endif
unsigned int n = mpi_get_size(a);
int i, x, y = 0, lzeros, buf_len;
@@ -380,42 +375,22 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
buf_len = sgl->length;
p2 = sg_virt(sgl);
- for (i = a->nlimbs - 1; i >= 0; i--) {
- alimb = a->d[i];
- p = (u8 *)&alimb2;
+ for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB,
+ lzeros %= BYTES_PER_MPI_LIMB;
+ i >= 0; i--) {
#if BYTES_PER_MPI_LIMB == 4
- *p++ = alimb >> 24;
- *p++ = alimb >> 16;
- *p++ = alimb >> 8;
- *p++ = alimb;
+ alimb = cpu_to_be32(a->d[i]);
#elif BYTES_PER_MPI_LIMB == 8
- *p++ = alimb >> 56;
- *p++ = alimb >> 48;
- *p++ = alimb >> 40;
- *p++ = alimb >> 32;
- *p++ = alimb >> 24;
- *p++ = alimb >> 16;
- *p++ = alimb >> 8;
- *p++ = alimb;
+ alimb = cpu_to_be64(a->d[i]);
#else
#error please implement for this limb size.
#endif
- if (lzeros > 0) {
- if (lzeros >= sizeof(alimb)) {
- p -= sizeof(alimb);
- continue;
- } else {
- mpi_limb_t *limb1 = (void *)p - sizeof(alimb);
- mpi_limb_t *limb2 = (void *)p - sizeof(alimb)
- + lzeros;
- *limb1 = *limb2;
- p -= lzeros;
- y = lzeros;
- }
- lzeros -= sizeof(alimb);
+ if (lzeros) {
+ y = lzeros;
+ lzeros = 0;
}
- p = p - (sizeof(alimb) - y);
+ p = (u8 *)&alimb + y;
for (x = 0; x < sizeof(alimb) - y; x++) {
if (!buf_len) {
@@ -443,15 +418,15 @@ EXPORT_SYMBOL_GPL(mpi_write_to_sgl);
* a new MPI and reads the content of the sgl to the MPI.
*
* @sgl: scatterlist to read from
- * @len: number of bytes to read
+ * @nbytes: number of bytes to read
*
* Return: Pointer to a new MPI or NULL on error
*/
-MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len)
+MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
{
struct scatterlist *sg;
int x, i, j, z, lzeros, ents;
- unsigned int nbits, nlimbs, nbytes;
+ unsigned int nbits, nlimbs;
mpi_limb_t a;
MPI val = NULL;
@@ -472,16 +447,12 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len)
break;
ents--;
+ nbytes -= lzeros;
lzeros = 0;
}
sgl = sg;
-
- if (!ents)
- nbytes = 0;
- else
- nbytes = len - lzeros;
-
+ nbytes -= lzeros;
nbits = nbytes * 8;
if (nbits > MAX_EXTERN_MPI_BITS) {
pr_info("MPI: mpi too large (%u bits)\n", nbits);
@@ -489,9 +460,8 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len)
}
if (nbytes > 0)
- nbits -= count_leading_zeros(*(u8 *)(sg_virt(sgl) + lzeros));
- else
- nbits = 0;
+ nbits -= count_leading_zeros(*(u8 *)(sg_virt(sgl) + lzeros)) -
+ (BITS_PER_LONG - 8);
nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
val = mpi_alloc(nlimbs);
@@ -507,19 +477,14 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len)
j = nlimbs - 1;
a = 0;
- z = 0;
- x = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
- x %= BYTES_PER_MPI_LIMB;
+ z = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+ z %= BYTES_PER_MPI_LIMB;
for_each_sg(sgl, sg, ents, i) {
const u8 *buffer = sg_virt(sg) + lzeros;
int len = sg->length - lzeros;
- int buf_shift = x;
-
- if (sg_is_last(sg) && (len % BYTES_PER_MPI_LIMB))
- len += BYTES_PER_MPI_LIMB - (len % BYTES_PER_MPI_LIMB);
- for (; x < len + buf_shift; x++) {
+ for (x = 0; x < len; x++) {
a <<= 8;
a |= *buffer++;
if (((z + x + 1) % BYTES_PER_MPI_LIMB) == 0) {
@@ -528,7 +493,6 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len)
}
}
z += x;
- x = 0;
lzeros = 0;
}
return val;
diff --git a/lib/nlattr.c b/lib/nlattr.c
index f5907d23272d..fce1e9afc6d9 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -355,6 +355,30 @@ struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
EXPORT_SYMBOL(__nla_reserve);
/**
+ * __nla_reserve_64bit - reserve room for attribute on the skb and align it
+ * @skb: socket buffer to reserve room on
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ * @padattr: attribute type for the padding
+ *
+ * Adds a netlink attribute header to a socket buffer and reserves
+ * room for the payload but does not copy it. It also ensure that this
+ * attribute will have a 64-bit aligned nla_data() area.
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the attribute header and payload.
+ */
+struct nlattr *__nla_reserve_64bit(struct sk_buff *skb, int attrtype,
+ int attrlen, int padattr)
+{
+ if (nla_need_padding_for_64bit(skb))
+ nla_align_64bit(skb, padattr);
+
+ return __nla_reserve(skb, attrtype, attrlen);
+}
+EXPORT_SYMBOL(__nla_reserve_64bit);
+
+/**
* __nla_reserve_nohdr - reserve room for attribute without header
* @skb: socket buffer to reserve room on
* @attrlen: length of attribute payload
@@ -397,6 +421,36 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
EXPORT_SYMBOL(nla_reserve);
/**
+ * nla_reserve_64bit - reserve room for attribute on the skb and align it
+ * @skb: socket buffer to reserve room on
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ * @padattr: attribute type for the padding
+ *
+ * Adds a netlink attribute header to a socket buffer and reserves
+ * room for the payload but does not copy it. It also ensure that this
+ * attribute will have a 64-bit aligned nla_data() area.
+ *
+ * Returns NULL if the tailroom of the skb is insufficient to store
+ * the attribute header and payload.
+ */
+struct nlattr *nla_reserve_64bit(struct sk_buff *skb, int attrtype, int attrlen,
+ int padattr)
+{
+ size_t len;
+
+ if (nla_need_padding_for_64bit(skb))
+ len = nla_total_size_64bit(attrlen);
+ else
+ len = nla_total_size(attrlen);
+ if (unlikely(skb_tailroom(skb) < len))
+ return NULL;
+
+ return __nla_reserve_64bit(skb, attrtype, attrlen, padattr);
+}
+EXPORT_SYMBOL(nla_reserve_64bit);
+
+/**
* nla_reserve_nohdr - reserve room for attribute without header
* @skb: socket buffer to reserve room on
* @attrlen: length of attribute payload
@@ -436,6 +490,27 @@ void __nla_put(struct sk_buff *skb, int attrtype, int attrlen,
EXPORT_SYMBOL(__nla_put);
/**
+ * __nla_put_64bit - Add a netlink attribute to a socket buffer and align it
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ * @padattr: attribute type for the padding
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the attribute header and payload.
+ */
+void __nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen,
+ const void *data, int padattr)
+{
+ struct nlattr *nla;
+
+ nla = __nla_reserve_64bit(skb, attrtype, attrlen, padattr);
+ memcpy(nla_data(nla), data, attrlen);
+}
+EXPORT_SYMBOL(__nla_put_64bit);
+
+/**
* __nla_put_nohdr - Add a netlink attribute without header
* @skb: socket buffer to add attribute to
* @attrlen: length of attribute payload
@@ -474,6 +549,34 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
EXPORT_SYMBOL(nla_put);
/**
+ * nla_put_64bit - Add a netlink attribute to a socket buffer and align it
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ * @padattr: attribute type for the padding
+ *
+ * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
+ * the attribute header and payload.
+ */
+int nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen,
+ const void *data, int padattr)
+{
+ size_t len;
+
+ if (nla_need_padding_for_64bit(skb))
+ len = nla_total_size_64bit(attrlen);
+ else
+ len = nla_total_size(attrlen);
+ if (unlikely(skb_tailroom(skb) < len))
+ return -EMSGSIZE;
+
+ __nla_put_64bit(skb, attrtype, attrlen, data, padattr);
+ return 0;
+}
+EXPORT_SYMBOL(nla_put_64bit);
+
+/**
* nla_put_nohdr - Add a netlink attribute without header
* @skb: socket buffer to add attribute to
* @attrlen: length of attribute payload
diff --git a/lib/proportions.c b/lib/proportions.c
deleted file mode 100644
index efa54f259ea9..000000000000
--- a/lib/proportions.c
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * Floating proportions
- *
- * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
- *
- * Description:
- *
- * The floating proportion is a time derivative with an exponentially decaying
- * history:
- *
- * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
- *
- * Where j is an element from {prop_local}, x_{j} is j's number of events,
- * and i the time period over which the differential is taken. So d/dt_{-i} is
- * the differential over the i-th last period.
- *
- * The decaying history gives smooth transitions. The time differential carries
- * the notion of speed.
- *
- * The denominator is 2^(1+i) because we want the series to be normalised, ie.
- *
- * \Sum_{i=0} 1/2^(1+i) = 1
- *
- * Further more, if we measure time (t) in the same events as x; so that:
- *
- * t = \Sum_{j} x_{j}
- *
- * we get that:
- *
- * \Sum_{j} p_{j} = 1
- *
- * Writing this in an iterative fashion we get (dropping the 'd's):
- *
- * if (++x_{j}, ++t > period)
- * t /= 2;
- * for_each (j)
- * x_{j} /= 2;
- *
- * so that:
- *
- * p_{j} = x_{j} / t;
- *
- * We optimize away the '/= 2' for the global time delta by noting that:
- *
- * if (++t > period) t /= 2:
- *
- * Can be approximated by:
- *
- * period/2 + (++t % period/2)
- *
- * [ Furthermore, when we choose period to be 2^n it can be written in terms of
- * binary operations and wraparound artefacts disappear. ]
- *
- * Also note that this yields a natural counter of the elapsed periods:
- *
- * c = t / (period/2)
- *
- * [ Its monotonic increasing property can be applied to mitigate the wrap-
- * around issue. ]
- *
- * This allows us to do away with the loop over all prop_locals on each period
- * expiration. By remembering the period count under which it was last accessed
- * as c_{j}, we can obtain the number of 'missed' cycles from:
- *
- * c - c_{j}
- *
- * We can then lazily catch up to the global period count every time we are
- * going to use x_{j}, by doing:
- *
- * x_{j} /= 2^(c - c_{j}), c_{j} = c
- */
-
-#include <linux/proportions.h>
-#include <linux/rcupdate.h>
-
-int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp)
-{
- int err;
-
- if (shift > PROP_MAX_SHIFT)
- shift = PROP_MAX_SHIFT;
-
- pd->index = 0;
- pd->pg[0].shift = shift;
- mutex_init(&pd->mutex);
- err = percpu_counter_init(&pd->pg[0].events, 0, gfp);
- if (err)
- goto out;
-
- err = percpu_counter_init(&pd->pg[1].events, 0, gfp);
- if (err)
- percpu_counter_destroy(&pd->pg[0].events);
-
-out:
- return err;
-}
-
-/*
- * We have two copies, and flip between them to make it seem like an atomic
- * update. The update is not really atomic wrt the events counter, but
- * it is internally consistent with the bit layout depending on shift.
- *
- * We copy the events count, move the bits around and flip the index.
- */
-void prop_change_shift(struct prop_descriptor *pd, int shift)
-{
- int index;
- int offset;
- u64 events;
- unsigned long flags;
-
- if (shift > PROP_MAX_SHIFT)
- shift = PROP_MAX_SHIFT;
-
- mutex_lock(&pd->mutex);
-
- index = pd->index ^ 1;
- offset = pd->pg[pd->index].shift - shift;
- if (!offset)
- goto out;
-
- pd->pg[index].shift = shift;
-
- local_irq_save(flags);
- events = percpu_counter_sum(&pd->pg[pd->index].events);
- if (offset < 0)
- events <<= -offset;
- else
- events >>= offset;
- percpu_counter_set(&pd->pg[index].events, events);
-
- /*
- * ensure the new pg is fully written before the switch
- */
- smp_wmb();
- pd->index = index;
- local_irq_restore(flags);
-
- synchronize_rcu();
-
-out:
- mutex_unlock(&pd->mutex);
-}
-
-/*
- * wrap the access to the data in an rcu_read_lock() section;
- * this is used to track the active references.
- */
-static struct prop_global *prop_get_global(struct prop_descriptor *pd)
-__acquires(RCU)
-{
- int index;
-
- rcu_read_lock();
- index = pd->index;
- /*
- * match the wmb from vcd_flip()
- */
- smp_rmb();
- return &pd->pg[index];
-}
-
-static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
-__releases(RCU)
-{
- rcu_read_unlock();
-}
-
-static void
-prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
-{
- int offset = *pl_shift - new_shift;
-
- if (!offset)
- return;
-
- if (offset < 0)
- *pl_period <<= -offset;
- else
- *pl_period >>= offset;
-
- *pl_shift = new_shift;
-}
-
-/*
- * PERCPU
- */
-
-#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
-
-int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp)
-{
- raw_spin_lock_init(&pl->lock);
- pl->shift = 0;
- pl->period = 0;
- return percpu_counter_init(&pl->events, 0, gfp);
-}
-
-void prop_local_destroy_percpu(struct prop_local_percpu *pl)
-{
- percpu_counter_destroy(&pl->events);
-}
-
-/*
- * Catch up with missed period expirations.
- *
- * until (c_{j} == c)
- * x_{j} -= x_{j}/2;
- * c_{j}++;
- */
-static
-void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
-{
- unsigned long period = 1UL << (pg->shift - 1);
- unsigned long period_mask = ~(period - 1);
- unsigned long global_period;
- unsigned long flags;
-
- global_period = percpu_counter_read(&pg->events);
- global_period &= period_mask;
-
- /*
- * Fast path - check if the local and global period count still match
- * outside of the lock.
- */
- if (pl->period == global_period)
- return;
-
- raw_spin_lock_irqsave(&pl->lock, flags);
- prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
-
- /*
- * For each missed period, we half the local counter.
- * basically:
- * pl->events >> (global_period - pl->period);
- */
- period = (global_period - pl->period) >> (pg->shift - 1);
- if (period < BITS_PER_LONG) {
- s64 val = percpu_counter_read(&pl->events);
-
- if (val < (nr_cpu_ids * PROP_BATCH))
- val = percpu_counter_sum(&pl->events);
-
- __percpu_counter_add(&pl->events, -val + (val >> period),
- PROP_BATCH);
- } else
- percpu_counter_set(&pl->events, 0);
-
- pl->period = global_period;
- raw_spin_unlock_irqrestore(&pl->lock, flags);
-}
-
-/*
- * ++x_{j}, ++t
- */
-void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
-{
- struct prop_global *pg = prop_get_global(pd);
-
- prop_norm_percpu(pg, pl);
- __percpu_counter_add(&pl->events, 1, PROP_BATCH);
- percpu_counter_add(&pg->events, 1);
- prop_put_global(pd, pg);
-}
-
-/*
- * identical to __prop_inc_percpu, except that it limits this pl's fraction to
- * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
- */
-void __prop_inc_percpu_max(struct prop_descriptor *pd,
- struct prop_local_percpu *pl, long frac)
-{
- struct prop_global *pg = prop_get_global(pd);
-
- prop_norm_percpu(pg, pl);
-
- if (unlikely(frac != PROP_FRAC_BASE)) {
- unsigned long period_2 = 1UL << (pg->shift - 1);
- unsigned long counter_mask = period_2 - 1;
- unsigned long global_count;
- long numerator, denominator;
-
- numerator = percpu_counter_read_positive(&pl->events);
- global_count = percpu_counter_read(&pg->events);
- denominator = period_2 + (global_count & counter_mask);
-
- if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
- goto out_put;
- }
-
- percpu_counter_add(&pl->events, 1);
- percpu_counter_add(&pg->events, 1);
-
-out_put:
- prop_put_global(pd, pg);
-}
-
-/*
- * Obtain a fraction of this proportion
- *
- * p_{j} = x_{j} / (period/2 + t % period/2)
- */
-void prop_fraction_percpu(struct prop_descriptor *pd,
- struct prop_local_percpu *pl,
- long *numerator, long *denominator)
-{
- struct prop_global *pg = prop_get_global(pd);
- unsigned long period_2 = 1UL << (pg->shift - 1);
- unsigned long counter_mask = period_2 - 1;
- unsigned long global_count;
-
- prop_norm_percpu(pg, pl);
- *numerator = percpu_counter_read_positive(&pl->events);
-
- global_count = percpu_counter_read(&pg->events);
- *denominator = period_2 + (global_count & counter_mask);
-
- prop_put_global(pd, pg);
-}
-
-/*
- * SINGLE
- */
-
-int prop_local_init_single(struct prop_local_single *pl)
-{
- raw_spin_lock_init(&pl->lock);
- pl->shift = 0;
- pl->period = 0;
- pl->events = 0;
- return 0;
-}
-
-void prop_local_destroy_single(struct prop_local_single *pl)
-{
-}
-
-/*
- * Catch up with missed period expirations.
- */
-static
-void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
-{
- unsigned long period = 1UL << (pg->shift - 1);
- unsigned long period_mask = ~(period - 1);
- unsigned long global_period;
- unsigned long flags;
-
- global_period = percpu_counter_read(&pg->events);
- global_period &= period_mask;
-
- /*
- * Fast path - check if the local and global period count still match
- * outside of the lock.
- */
- if (pl->period == global_period)
- return;
-
- raw_spin_lock_irqsave(&pl->lock, flags);
- prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
- /*
- * For each missed period, we half the local counter.
- */
- period = (global_period - pl->period) >> (pg->shift - 1);
- if (likely(period < BITS_PER_LONG))
- pl->events >>= period;
- else
- pl->events = 0;
- pl->period = global_period;
- raw_spin_unlock_irqrestore(&pl->lock, flags);
-}
-
-/*
- * ++x_{j}, ++t
- */
-void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
-{
- struct prop_global *pg = prop_get_global(pd);
-
- prop_norm_single(pg, pl);
- pl->events++;
- percpu_counter_add(&pg->events, 1);
- prop_put_global(pd, pg);
-}
-
-/*
- * Obtain a fraction of this proportion
- *
- * p_{j} = x_{j} / (period/2 + t % period/2)
- */
-void prop_fraction_single(struct prop_descriptor *pd,
- struct prop_local_single *pl,
- long *numerator, long *denominator)
-{
- struct prop_global *pg = prop_get_global(pd);
- unsigned long period_2 = 1UL << (pg->shift - 1);
- unsigned long counter_mask = period_2 - 1;
- unsigned long global_count;
-
- prop_norm_single(pg, pl);
- *numerator = pl->events;
-
- global_count = percpu_counter_read(&pg->events);
- *denominator = period_2 + (global_count & counter_mask);
-
- prop_put_global(pd, pg);
-}
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index cc808707d1cf..5d845ffd7982 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -487,6 +487,7 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
* rhashtable_walk_init - Initialise an iterator
* @ht: Table to walk over
* @iter: Hash table Iterator
+ * @gfp: GFP flags for allocations
*
* This function prepares a hash table walk.
*
@@ -504,14 +505,15 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
* You must call rhashtable_walk_exit if this function returns
* successfully.
*/
-int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
+int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter,
+ gfp_t gfp)
{
iter->ht = ht;
iter->p = NULL;
iter->slot = 0;
iter->skip = 0;
- iter->walker = kmalloc(sizeof(*iter->walker), GFP_KERNEL);
+ iter->walker = kmalloc(sizeof(*iter->walker), gfp);
if (!iter->walker)
return -ENOMEM;
diff --git a/lib/sg_pool.c b/lib/sg_pool.c
new file mode 100644
index 000000000000..6dd30615a201
--- /dev/null
+++ b/lib/sg_pool.c
@@ -0,0 +1,172 @@
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+
+#define SG_MEMPOOL_NR ARRAY_SIZE(sg_pools)
+#define SG_MEMPOOL_SIZE 2
+
+struct sg_pool {
+ size_t size;
+ char *name;
+ struct kmem_cache *slab;
+ mempool_t *pool;
+};
+
+#define SP(x) { .size = x, "sgpool-" __stringify(x) }
+#if (SG_CHUNK_SIZE < 32)
+#error SG_CHUNK_SIZE is too small (must be 32 or greater)
+#endif
+static struct sg_pool sg_pools[] = {
+ SP(8),
+ SP(16),
+#if (SG_CHUNK_SIZE > 32)
+ SP(32),
+#if (SG_CHUNK_SIZE > 64)
+ SP(64),
+#if (SG_CHUNK_SIZE > 128)
+ SP(128),
+#if (SG_CHUNK_SIZE > 256)
+#error SG_CHUNK_SIZE is too large (256 MAX)
+#endif
+#endif
+#endif
+#endif
+ SP(SG_CHUNK_SIZE)
+};
+#undef SP
+
+static inline unsigned int sg_pool_index(unsigned short nents)
+{
+ unsigned int index;
+
+ BUG_ON(nents > SG_CHUNK_SIZE);
+
+ if (nents <= 8)
+ index = 0;
+ else
+ index = get_count_order(nents) - 3;
+
+ return index;
+}
+
+static void sg_pool_free(struct scatterlist *sgl, unsigned int nents)
+{
+ struct sg_pool *sgp;
+
+ sgp = sg_pools + sg_pool_index(nents);
+ mempool_free(sgl, sgp->pool);
+}
+
+static struct scatterlist *sg_pool_alloc(unsigned int nents, gfp_t gfp_mask)
+{
+ struct sg_pool *sgp;
+
+ sgp = sg_pools + sg_pool_index(nents);
+ return mempool_alloc(sgp->pool, gfp_mask);
+}
+
+/**
+ * sg_free_table_chained - Free a previously mapped sg table
+ * @table: The sg table header to use
+ * @first_chunk: was first_chunk not NULL in sg_alloc_table_chained?
+ *
+ * Description:
+ * Free an sg table previously allocated and setup with
+ * sg_alloc_table_chained().
+ *
+ **/
+void sg_free_table_chained(struct sg_table *table, bool first_chunk)
+{
+ if (first_chunk && table->orig_nents <= SG_CHUNK_SIZE)
+ return;
+ __sg_free_table(table, SG_CHUNK_SIZE, first_chunk, sg_pool_free);
+}
+EXPORT_SYMBOL_GPL(sg_free_table_chained);
+
+/**
+ * sg_alloc_table_chained - Allocate and chain SGLs in an sg table
+ * @table: The sg table header to use
+ * @nents: Number of entries in sg list
+ * @first_chunk: first SGL
+ *
+ * Description:
+ * Allocate and chain SGLs in an sg table. If @nents@ is larger than
+ * SG_CHUNK_SIZE a chained sg table will be setup.
+ *
+ **/
+int sg_alloc_table_chained(struct sg_table *table, int nents,
+ struct scatterlist *first_chunk)
+{
+ int ret;
+
+ BUG_ON(!nents);
+
+ if (first_chunk) {
+ if (nents <= SG_CHUNK_SIZE) {
+ table->nents = table->orig_nents = nents;
+ sg_init_table(table->sgl, nents);
+ return 0;
+ }
+ }
+
+ ret = __sg_alloc_table(table, nents, SG_CHUNK_SIZE,
+ first_chunk, GFP_ATOMIC, sg_pool_alloc);
+ if (unlikely(ret))
+ sg_free_table_chained(table, (bool)first_chunk);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(sg_alloc_table_chained);
+
+static __init int sg_pool_init(void)
+{
+ int i;
+
+ for (i = 0; i < SG_MEMPOOL_NR; i++) {
+ struct sg_pool *sgp = sg_pools + i;
+ int size = sgp->size * sizeof(struct scatterlist);
+
+ sgp->slab = kmem_cache_create(sgp->name, size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!sgp->slab) {
+ printk(KERN_ERR "SG_POOL: can't init sg slab %s\n",
+ sgp->name);
+ goto cleanup_sdb;
+ }
+
+ sgp->pool = mempool_create_slab_pool(SG_MEMPOOL_SIZE,
+ sgp->slab);
+ if (!sgp->pool) {
+ printk(KERN_ERR "SG_POOL: can't init sg mempool %s\n",
+ sgp->name);
+ goto cleanup_sdb;
+ }
+ }
+
+ return 0;
+
+cleanup_sdb:
+ for (i = 0; i < SG_MEMPOOL_NR; i++) {
+ struct sg_pool *sgp = sg_pools + i;
+ if (sgp->pool)
+ mempool_destroy(sgp->pool);
+ if (sgp->slab)
+ kmem_cache_destroy(sgp->slab);
+ }
+
+ return -ENOMEM;
+}
+
+static __exit void sg_pool_exit(void)
+{
+ int i;
+
+ for (i = 0; i < SG_MEMPOOL_NR; i++) {
+ struct sg_pool *sgp = sg_pools + i;
+ mempool_destroy(sgp->pool);
+ kmem_cache_destroy(sgp->slab);
+ }
+}
+
+module_init(sg_pool_init);
+module_exit(sg_pool_exit);
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 8f22fbedc3a6..93f45011a59d 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5621,7 +5621,10 @@ static struct bpf_prog *generate_filter(int which, int *err)
fp->type = BPF_PROG_TYPE_SOCKET_FILTER;
memcpy(fp->insnsi, fptr, fp->len * sizeof(struct bpf_insn));
- bpf_prog_select_runtime(fp);
+ /* We cannot error here as we don't need type compatibility
+ * checks.
+ */
+ fp = bpf_prog_select_runtime(fp, err);
break;
}
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 270bf7289b1e..297fdb5e74bd 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -143,7 +143,7 @@ static void test_bucket_stats(struct rhashtable *ht)
struct rhashtable_iter hti;
struct rhash_head *pos;
- err = rhashtable_walk_init(ht, &hti);
+ err = rhashtable_walk_init(ht, &hti, GFP_KERNEL);
if (err) {
pr_warn("Test failed: allocation error");
return;