summaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/crypto/crypt_s390.h122
-rw-r--r--arch/s390/crypto/prng.c850
-rw-r--r--arch/s390/hypfs/inode.c18
-rw-r--r--arch/s390/include/asm/dma-mapping.h2
-rw-r--r--arch/s390/include/asm/kexec.h3
-rw-r--r--arch/s390/include/asm/mmu.h4
-rw-r--r--arch/s390/include/asm/mmu_context.h3
-rw-r--r--arch/s390/include/asm/pci.h10
-rw-r--r--arch/s390/include/asm/pgalloc.h1
-rw-r--r--arch/s390/include/asm/pgtable.h169
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/s390/mm/hugetlbpage.c66
-rw-r--r--arch/s390/mm/pgtable.c142
-rw-r--r--arch/s390/net/bpf_jit.S197
-rw-r--r--arch/s390/net/bpf_jit.h58
-rw-r--r--arch/s390/net/bpf_jit_comp.c1781
-rw-r--r--arch/s390/pci/pci.c6
-rw-r--r--arch/s390/pci/pci_debug.c33
-rw-r--r--arch/s390/pci/pci_dma.c8
20 files changed, 2316 insertions, 1161 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index de2726a487b0..b06dc3839268 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -115,7 +115,7 @@ config S390
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
- select HAVE_BPF_JIT if PACK_STACK
+ select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
select HAVE_DEBUG_KMEMLEAK
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index ba3b2aefddf5..d9c4c313fbc6 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -3,9 +3,10 @@
*
* Support for s390 cryptographic instructions.
*
- * Copyright IBM Corp. 2003, 2007
+ * Copyright IBM Corp. 2003, 2015
* Author(s): Thomas Spatzier
* Jan Glauber (jan.glauber@de.ibm.com)
+ * Harald Freudenberger (freude@de.ibm.com)
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -28,15 +29,17 @@
#define CRYPT_S390_MSA 0x1
#define CRYPT_S390_MSA3 0x2
#define CRYPT_S390_MSA4 0x4
+#define CRYPT_S390_MSA5 0x8
/* s390 cryptographic operations */
enum crypt_s390_operations {
- CRYPT_S390_KM = 0x0100,
- CRYPT_S390_KMC = 0x0200,
- CRYPT_S390_KIMD = 0x0300,
- CRYPT_S390_KLMD = 0x0400,
- CRYPT_S390_KMAC = 0x0500,
- CRYPT_S390_KMCTR = 0x0600
+ CRYPT_S390_KM = 0x0100,
+ CRYPT_S390_KMC = 0x0200,
+ CRYPT_S390_KIMD = 0x0300,
+ CRYPT_S390_KLMD = 0x0400,
+ CRYPT_S390_KMAC = 0x0500,
+ CRYPT_S390_KMCTR = 0x0600,
+ CRYPT_S390_PPNO = 0x0700
};
/*
@@ -138,6 +141,16 @@ enum crypt_s390_kmac_func {
KMAC_TDEA_192 = CRYPT_S390_KMAC | 3
};
+/*
+ * function codes for PPNO (PERFORM PSEUDORANDOM NUMBER
+ * OPERATION) instruction
+ */
+enum crypt_s390_ppno_func {
+ PPNO_QUERY = CRYPT_S390_PPNO | 0,
+ PPNO_SHA512_DRNG_GEN = CRYPT_S390_PPNO | 3,
+ PPNO_SHA512_DRNG_SEED = CRYPT_S390_PPNO | 0x83
+};
+
/**
* crypt_s390_km:
* @func: the function code passed to KM; see crypt_s390_km_func
@@ -162,11 +175,11 @@ static inline int crypt_s390_km(long func, void *param,
int ret;
asm volatile(
- "0: .insn rre,0xb92e0000,%3,%1 \n" /* KM opcode */
- "1: brc 1,0b \n" /* handle partial completion */
+ "0: .insn rre,0xb92e0000,%3,%1\n" /* KM opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
" la %0,0\n"
"2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
: "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest)
: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
if (ret < 0)
@@ -198,11 +211,11 @@ static inline int crypt_s390_kmc(long func, void *param,
int ret;
asm volatile(
- "0: .insn rre,0xb92f0000,%3,%1 \n" /* KMC opcode */
- "1: brc 1,0b \n" /* handle partial completion */
+ "0: .insn rre,0xb92f0000,%3,%1\n" /* KMC opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
" la %0,0\n"
"2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
: "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest)
: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
if (ret < 0)
@@ -233,11 +246,11 @@ static inline int crypt_s390_kimd(long func, void *param,
int ret;
asm volatile(
- "0: .insn rre,0xb93e0000,%1,%1 \n" /* KIMD opcode */
- "1: brc 1,0b \n" /* handle partial completion */
+ "0: .insn rre,0xb93e0000,%1,%1\n" /* KIMD opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
" la %0,0\n"
"2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
: "=d" (ret), "+a" (__src), "+d" (__src_len)
: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
if (ret < 0)
@@ -267,11 +280,11 @@ static inline int crypt_s390_klmd(long func, void *param,
int ret;
asm volatile(
- "0: .insn rre,0xb93f0000,%1,%1 \n" /* KLMD opcode */
- "1: brc 1,0b \n" /* handle partial completion */
+ "0: .insn rre,0xb93f0000,%1,%1\n" /* KLMD opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
" la %0,0\n"
"2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
: "=d" (ret), "+a" (__src), "+d" (__src_len)
: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
if (ret < 0)
@@ -302,11 +315,11 @@ static inline int crypt_s390_kmac(long func, void *param,
int ret;
asm volatile(
- "0: .insn rre,0xb91e0000,%1,%1 \n" /* KLAC opcode */
- "1: brc 1,0b \n" /* handle partial completion */
+ "0: .insn rre,0xb91e0000,%1,%1\n" /* KLAC opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
" la %0,0\n"
"2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
: "=d" (ret), "+a" (__src), "+d" (__src_len)
: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
if (ret < 0)
@@ -340,11 +353,11 @@ static inline int crypt_s390_kmctr(long func, void *param, u8 *dest,
int ret = -1;
asm volatile(
- "0: .insn rrf,0xb92d0000,%3,%1,%4,0 \n" /* KMCTR opcode */
- "1: brc 1,0b \n" /* handle partial completion */
+ "0: .insn rrf,0xb92d0000,%3,%1,%4,0\n" /* KMCTR opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
" la %0,0\n"
"2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
: "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest),
"+a" (__ctr)
: "d" (__func), "a" (__param) : "cc", "memory");
@@ -354,6 +367,47 @@ static inline int crypt_s390_kmctr(long func, void *param, u8 *dest,
}
/**
+ * crypt_s390_ppno:
+ * @func: the function code passed to PPNO; see crypt_s390_ppno_func
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @dest_len: size of destination memory area in bytes
+ * @seed: address of seed data
+ * @seed_len: size of seed data in bytes
+ *
+ * Executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION)
+ * operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for the query func, number of random
+ * bytes stored in dest buffer for generate function
+ */
+static inline int crypt_s390_ppno(long func, void *param,
+ u8 *dest, long dest_len,
+ const u8 *seed, long seed_len)
+{
+ register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
+ register void *__param asm("1") = param; /* param block (240 bytes) */
+ register u8 *__dest asm("2") = dest; /* buf for recv random bytes */
+ register long __dest_len asm("3") = dest_len; /* requested random bytes */
+ register const u8 *__seed asm("4") = seed; /* buf with seed data */
+ register long __seed_len asm("5") = seed_len; /* bytes in seed buf */
+ int ret = -1;
+
+ asm volatile (
+ "0: .insn rre,0xb93c0000,%1,%5\n" /* PPNO opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
+ " la %0,0\n"
+ "2:\n"
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+ : "+d" (ret), "+a"(__dest), "+d"(__dest_len)
+ : "d"(__func), "a"(__param), "a"(__seed), "d"(__seed_len)
+ : "cc", "memory");
+ if (ret < 0)
+ return ret;
+ return (func & CRYPT_S390_FUNC_MASK) ? dest_len - __dest_len : 0;
+}
+
+/**
* crypt_s390_func_available:
* @func: the function code of the specific function; 0 if op in general
*
@@ -373,6 +427,9 @@ static inline int crypt_s390_func_available(int func,
return 0;
if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77))
return 0;
+ if (facility_mask & CRYPT_S390_MSA5 && !test_facility(57))
+ return 0;
+
switch (func & CRYPT_S390_OP_MASK) {
case CRYPT_S390_KM:
ret = crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0);
@@ -390,8 +447,12 @@ static inline int crypt_s390_func_available(int func,
ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0);
break;
case CRYPT_S390_KMCTR:
- ret = crypt_s390_kmctr(KMCTR_QUERY, &status, NULL, NULL, 0,
- NULL);
+ ret = crypt_s390_kmctr(KMCTR_QUERY, &status,
+ NULL, NULL, 0, NULL);
+ break;
+ case CRYPT_S390_PPNO:
+ ret = crypt_s390_ppno(PPNO_QUERY, &status,
+ NULL, 0, NULL, 0);
break;
default:
return 0;
@@ -419,15 +480,14 @@ static inline int crypt_s390_pcc(long func, void *param)
int ret = -1;
asm volatile(
- "0: .insn rre,0xb92c0000,0,0 \n" /* PCC opcode */
- "1: brc 1,0b \n" /* handle partial completion */
+ "0: .insn rre,0xb92c0000,0,0\n" /* PCC opcode */
+ "1: brc 1,0b\n" /* handle partial completion */
" la %0,0\n"
"2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
: "+d" (ret)
: "d" (__func), "a" (__param) : "cc", "memory");
return ret;
}
-
#endif /* _CRYPTO_ARCH_S390_CRYPT_S390_H */
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 94a35a4c1b48..9d5192c94963 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -1,106 +1,529 @@
/*
- * Copyright IBM Corp. 2006, 2007
+ * Copyright IBM Corp. 2006, 2015
* Author(s): Jan Glauber <jan.glauber@de.ibm.com>
+ * Harald Freudenberger <freude@de.ibm.com>
* Driver for the s390 pseudo random number generator
*/
+
+#define KMSG_COMPONENT "prng"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <linux/fs.h>
+#include <linux/fips.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/device.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
+#include <linux/mutex.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <asm/debug.h>
#include <asm/uaccess.h>
+#include <asm/timex.h>
#include "crypt_s390.h"
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jan Glauber <jan.glauber@de.ibm.com>");
+MODULE_AUTHOR("IBM Corporation");
MODULE_DESCRIPTION("s390 PRNG interface");
-static int prng_chunk_size = 256;
-module_param(prng_chunk_size, int, S_IRUSR | S_IRGRP | S_IROTH);
+
+#define PRNG_MODE_AUTO 0
+#define PRNG_MODE_TDES 1
+#define PRNG_MODE_SHA512 2
+
+static unsigned int prng_mode = PRNG_MODE_AUTO;
+module_param_named(mode, prng_mode, int, 0);
+MODULE_PARM_DESC(prng_mode, "PRNG mode: 0 - auto, 1 - TDES, 2 - SHA512");
+
+
+#define PRNG_CHUNKSIZE_TDES_MIN 8
+#define PRNG_CHUNKSIZE_TDES_MAX (64*1024)
+#define PRNG_CHUNKSIZE_SHA512_MIN 64
+#define PRNG_CHUNKSIZE_SHA512_MAX (64*1024)
+
+static unsigned int prng_chunk_size = 256;
+module_param_named(chunksize, prng_chunk_size, int, 0);
MODULE_PARM_DESC(prng_chunk_size, "PRNG read chunk size in bytes");
-static int prng_entropy_limit = 4096;
-module_param(prng_entropy_limit, int, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR);
-MODULE_PARM_DESC(prng_entropy_limit,
- "PRNG add entropy after that much bytes were produced");
+
+#define PRNG_RESEED_LIMIT_TDES 4096
+#define PRNG_RESEED_LIMIT_TDES_LOWER 4096
+#define PRNG_RESEED_LIMIT_SHA512 100000
+#define PRNG_RESEED_LIMIT_SHA512_LOWER 10000
+
+static unsigned int prng_reseed_limit;
+module_param_named(reseed_limit, prng_reseed_limit, int, 0);
+MODULE_PARM_DESC(prng_reseed_limit, "PRNG reseed limit");
+
/*
* Any one who considers arithmetical methods of producing random digits is,
* of course, in a state of sin. -- John von Neumann
*/
-struct s390_prng_data {
- unsigned long count; /* how many bytes were produced */
- char *buf;
+static int prng_errorflag;
+
+#define PRNG_GEN_ENTROPY_FAILED 1
+#define PRNG_SELFTEST_FAILED 2
+#define PRNG_INSTANTIATE_FAILED 3
+#define PRNG_SEED_FAILED 4
+#define PRNG_RESEED_FAILED 5
+#define PRNG_GEN_FAILED 6
+
+struct prng_ws_s {
+ u8 parm_block[32];
+ u32 reseed_counter;
+ u64 byte_counter;
};
-static struct s390_prng_data *p;
+struct ppno_ws_s {
+ u32 res;
+ u32 reseed_counter;
+ u64 stream_bytes;
+ u8 V[112];
+ u8 C[112];
+};
-/* copied from libica, use a non-zero initial parameter block */
-static unsigned char parm_block[32] = {
-0x0F,0x2B,0x8E,0x63,0x8C,0x8E,0xD2,0x52,0x64,0xB7,0xA0,0x7B,0x75,0x28,0xB8,0xF4,
-0x75,0x5F,0xD2,0xA6,0x8D,0x97,0x11,0xFF,0x49,0xD8,0x23,0xF3,0x7E,0x21,0xEC,0xA0,
+struct prng_data_s {
+ struct mutex mutex;
+ union {
+ struct prng_ws_s prngws;
+ struct ppno_ws_s ppnows;
+ };
+ u8 *buf;
+ u32 rest;
+ u8 *prev;
};
-static int prng_open(struct inode *inode, struct file *file)
+static struct prng_data_s *prng_data;
+
+/* initial parameter block for tdes mode, copied from libica */
+static const u8 initial_parm_block[32] __initconst = {
+ 0x0F, 0x2B, 0x8E, 0x63, 0x8C, 0x8E, 0xD2, 0x52,
+ 0x64, 0xB7, 0xA0, 0x7B, 0x75, 0x28, 0xB8, 0xF4,
+ 0x75, 0x5F, 0xD2, 0xA6, 0x8D, 0x97, 0x11, 0xFF,
+ 0x49, 0xD8, 0x23, 0xF3, 0x7E, 0x21, 0xEC, 0xA0 };
+
+
+/*** helper functions ***/
+
+static int generate_entropy(u8 *ebuf, size_t nbytes)
{
- return nonseekable_open(inode, file);
+ int n, ret = 0;
+ u8 *pg, *h, hash[32];
+
+ pg = (u8 *) __get_free_page(GFP_KERNEL);
+ if (!pg) {
+ prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
+ return -ENOMEM;
+ }
+
+ while (nbytes) {
+ /* fill page with urandom bytes */
+ get_random_bytes(pg, PAGE_SIZE);
+ /* exor page with stckf values */
+ for (n = 0; n < PAGE_SIZE / sizeof(u64); n++) {
+ u64 *p = ((u64 *)pg) + n;
+ *p ^= get_tod_clock_fast();
+ }
+ n = (nbytes < sizeof(hash)) ? nbytes : sizeof(hash);
+ if (n < sizeof(hash))
+ h = hash;
+ else
+ h = ebuf;
+ /* generate sha256 from this page */
+ if (crypt_s390_kimd(KIMD_SHA_256, h,
+ pg, PAGE_SIZE) != PAGE_SIZE) {
+ prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
+ ret = -EIO;
+ goto out;
+ }
+ if (n < sizeof(hash))
+ memcpy(ebuf, hash, n);
+ ret += n;
+ ebuf += n;
+ nbytes -= n;
+ }
+
+out:
+ free_page((unsigned long)pg);
+ return ret;
}
-static void prng_add_entropy(void)
+
+/*** tdes functions ***/
+
+static void prng_tdes_add_entropy(void)
{
__u64 entropy[4];
unsigned int i;
int ret;
for (i = 0; i < 16; i++) {
- ret = crypt_s390_kmc(KMC_PRNG, parm_block, (char *)entropy,
- (char *)entropy, sizeof(entropy));
+ ret = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block,
+ (char *)entropy, (char *)entropy,
+ sizeof(entropy));
BUG_ON(ret < 0 || ret != sizeof(entropy));
- memcpy(parm_block, entropy, sizeof(entropy));
+ memcpy(prng_data->prngws.parm_block, entropy, sizeof(entropy));
}
}
-static void prng_seed(int nbytes)
+
+static void prng_tdes_seed(int nbytes)
{
char buf[16];
int i = 0;
- BUG_ON(nbytes > 16);
+ BUG_ON(nbytes > sizeof(buf));
+
get_random_bytes(buf, nbytes);
/* Add the entropy */
while (nbytes >= 8) {
- *((__u64 *)parm_block) ^= *((__u64 *)(buf+i));
- prng_add_entropy();
+ *((__u64 *)prng_data->prngws.parm_block) ^= *((__u64 *)(buf+i));
+ prng_tdes_add_entropy();
i += 8;
nbytes -= 8;
}
- prng_add_entropy();
+ prng_tdes_add_entropy();
+ prng_data->prngws.reseed_counter = 0;
+}
+
+
+static int __init prng_tdes_instantiate(void)
+{
+ int datalen;
+
+ pr_debug("prng runs in TDES mode with "
+ "chunksize=%d and reseed_limit=%u\n",
+ prng_chunk_size, prng_reseed_limit);
+
+ /* memory allocation, prng_data struct init, mutex init */
+ datalen = sizeof(struct prng_data_s) + prng_chunk_size;
+ prng_data = kzalloc(datalen, GFP_KERNEL);
+ if (!prng_data) {
+ prng_errorflag = PRNG_INSTANTIATE_FAILED;
+ return -ENOMEM;
+ }
+ mutex_init(&prng_data->mutex);
+ prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s);
+ memcpy(prng_data->prngws.parm_block, initial_parm_block, 32);
+
+ /* initialize the PRNG, add 128 bits of entropy */
+ prng_tdes_seed(16);
+
+ return 0;
}
-static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes,
- loff_t *ppos)
+
+static void prng_tdes_deinstantiate(void)
+{
+ pr_debug("The prng module stopped "
+ "after running in triple DES mode\n");
+ kzfree(prng_data);
+}
+
+
+/*** sha512 functions ***/
+
+static int __init prng_sha512_selftest(void)
{
- int chunk, n;
+ /* NIST DRBG testvector for Hash Drbg, Sha-512, Count #0 */
+ static const u8 seed[] __initconst = {
+ 0x6b, 0x50, 0xa7, 0xd8, 0xf8, 0xa5, 0x5d, 0x7a,
+ 0x3d, 0xf8, 0xbb, 0x40, 0xbc, 0xc3, 0xb7, 0x22,
+ 0xd8, 0x70, 0x8d, 0xe6, 0x7f, 0xda, 0x01, 0x0b,
+ 0x03, 0xc4, 0xc8, 0x4d, 0x72, 0x09, 0x6f, 0x8c,
+ 0x3e, 0xc6, 0x49, 0xcc, 0x62, 0x56, 0xd9, 0xfa,
+ 0x31, 0xdb, 0x7a, 0x29, 0x04, 0xaa, 0xf0, 0x25 };
+ static const u8 V0[] __initconst = {
+ 0x00, 0xad, 0xe3, 0x6f, 0x9a, 0x01, 0xc7, 0x76,
+ 0x61, 0x34, 0x35, 0xf5, 0x4e, 0x24, 0x74, 0x22,
+ 0x21, 0x9a, 0x29, 0x89, 0xc7, 0x93, 0x2e, 0x60,
+ 0x1e, 0xe8, 0x14, 0x24, 0x8d, 0xd5, 0x03, 0xf1,
+ 0x65, 0x5d, 0x08, 0x22, 0x72, 0xd5, 0xad, 0x95,
+ 0xe1, 0x23, 0x1e, 0x8a, 0xa7, 0x13, 0xd9, 0x2b,
+ 0x5e, 0xbc, 0xbb, 0x80, 0xab, 0x8d, 0xe5, 0x79,
+ 0xab, 0x5b, 0x47, 0x4e, 0xdd, 0xee, 0x6b, 0x03,
+ 0x8f, 0x0f, 0x5c, 0x5e, 0xa9, 0x1a, 0x83, 0xdd,
+ 0xd3, 0x88, 0xb2, 0x75, 0x4b, 0xce, 0x83, 0x36,
+ 0x57, 0x4b, 0xf1, 0x5c, 0xca, 0x7e, 0x09, 0xc0,
+ 0xd3, 0x89, 0xc6, 0xe0, 0xda, 0xc4, 0x81, 0x7e,
+ 0x5b, 0xf9, 0xe1, 0x01, 0xc1, 0x92, 0x05, 0xea,
+ 0xf5, 0x2f, 0xc6, 0xc6, 0xc7, 0x8f, 0xbc, 0xf4 };
+ static const u8 C0[] __initconst = {
+ 0x00, 0xf4, 0xa3, 0xe5, 0xa0, 0x72, 0x63, 0x95,
+ 0xc6, 0x4f, 0x48, 0xd0, 0x8b, 0x5b, 0x5f, 0x8e,
+ 0x6b, 0x96, 0x1f, 0x16, 0xed, 0xbc, 0x66, 0x94,
+ 0x45, 0x31, 0xd7, 0x47, 0x73, 0x22, 0xa5, 0x86,
+ 0xce, 0xc0, 0x4c, 0xac, 0x63, 0xb8, 0x39, 0x50,
+ 0xbf, 0xe6, 0x59, 0x6c, 0x38, 0x58, 0x99, 0x1f,
+ 0x27, 0xa7, 0x9d, 0x71, 0x2a, 0xb3, 0x7b, 0xf9,
+ 0xfb, 0x17, 0x86, 0xaa, 0x99, 0x81, 0xaa, 0x43,
+ 0xe4, 0x37, 0xd3, 0x1e, 0x6e, 0xe5, 0xe6, 0xee,
+ 0xc2, 0xed, 0x95, 0x4f, 0x53, 0x0e, 0x46, 0x8a,
+ 0xcc, 0x45, 0xa5, 0xdb, 0x69, 0x0d, 0x81, 0xc9,
+ 0x32, 0x92, 0xbc, 0x8f, 0x33, 0xe6, 0xf6, 0x09,
+ 0x7c, 0x8e, 0x05, 0x19, 0x0d, 0xf1, 0xb6, 0xcc,
+ 0xf3, 0x02, 0x21, 0x90, 0x25, 0xec, 0xed, 0x0e };
+ static const u8 random[] __initconst = {
+ 0x95, 0xb7, 0xf1, 0x7e, 0x98, 0x02, 0xd3, 0x57,
+ 0x73, 0x92, 0xc6, 0xa9, 0xc0, 0x80, 0x83, 0xb6,
+ 0x7d, 0xd1, 0x29, 0x22, 0x65, 0xb5, 0xf4, 0x2d,
+ 0x23, 0x7f, 0x1c, 0x55, 0xbb, 0x9b, 0x10, 0xbf,
+ 0xcf, 0xd8, 0x2c, 0x77, 0xa3, 0x78, 0xb8, 0x26,
+ 0x6a, 0x00, 0x99, 0x14, 0x3b, 0x3c, 0x2d, 0x64,
+ 0x61, 0x1e, 0xee, 0xb6, 0x9a, 0xcd, 0xc0, 0x55,
+ 0x95, 0x7c, 0x13, 0x9e, 0x8b, 0x19, 0x0c, 0x7a,
+ 0x06, 0x95, 0x5f, 0x2c, 0x79, 0x7c, 0x27, 0x78,
+ 0xde, 0x94, 0x03, 0x96, 0xa5, 0x01, 0xf4, 0x0e,
+ 0x91, 0x39, 0x6a, 0xcf, 0x8d, 0x7e, 0x45, 0xeb,
+ 0xdb, 0xb5, 0x3b, 0xbf, 0x8c, 0x97, 0x52, 0x30,
+ 0xd2, 0xf0, 0xff, 0x91, 0x06, 0xc7, 0x61, 0x19,
+ 0xae, 0x49, 0x8e, 0x7f, 0xbc, 0x03, 0xd9, 0x0f,
+ 0x8e, 0x4c, 0x51, 0x62, 0x7a, 0xed, 0x5c, 0x8d,
+ 0x42, 0x63, 0xd5, 0xd2, 0xb9, 0x78, 0x87, 0x3a,
+ 0x0d, 0xe5, 0x96, 0xee, 0x6d, 0xc7, 0xf7, 0xc2,
+ 0x9e, 0x37, 0xee, 0xe8, 0xb3, 0x4c, 0x90, 0xdd,
+ 0x1c, 0xf6, 0xa9, 0xdd, 0xb2, 0x2b, 0x4c, 0xbd,
+ 0x08, 0x6b, 0x14, 0xb3, 0x5d, 0xe9, 0x3d, 0xa2,
+ 0xd5, 0xcb, 0x18, 0x06, 0x69, 0x8c, 0xbd, 0x7b,
+ 0xbb, 0x67, 0xbf, 0xe3, 0xd3, 0x1f, 0xd2, 0xd1,
+ 0xdb, 0xd2, 0xa1, 0xe0, 0x58, 0xa3, 0xeb, 0x99,
+ 0xd7, 0xe5, 0x1f, 0x1a, 0x93, 0x8e, 0xed, 0x5e,
+ 0x1c, 0x1d, 0xe2, 0x3a, 0x6b, 0x43, 0x45, 0xd3,
+ 0x19, 0x14, 0x09, 0xf9, 0x2f, 0x39, 0xb3, 0x67,
+ 0x0d, 0x8d, 0xbf, 0xb6, 0x35, 0xd8, 0xe6, 0xa3,
+ 0x69, 0x32, 0xd8, 0x10, 0x33, 0xd1, 0x44, 0x8d,
+ 0x63, 0xb4, 0x03, 0xdd, 0xf8, 0x8e, 0x12, 0x1b,
+ 0x6e, 0x81, 0x9a, 0xc3, 0x81, 0x22, 0x6c, 0x13,
+ 0x21, 0xe4, 0xb0, 0x86, 0x44, 0xf6, 0x72, 0x7c,
+ 0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 };
+
int ret = 0;
- int tmp;
+ u8 buf[sizeof(random)];
+ struct ppno_ws_s ws;
+
+ memset(&ws, 0, sizeof(ws));
+
+ /* initial seed */
+ ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED,
+ &ws, NULL, 0,
+ seed, sizeof(seed));
+ if (ret < 0) {
+ pr_err("The prng self test seed operation for the "
+ "SHA-512 mode failed with rc=%d\n", ret);
+ prng_errorflag = PRNG_SELFTEST_FAILED;
+ return -EIO;
+ }
+
+ /* check working states V and C */
+ if (memcmp(ws.V, V0, sizeof(V0)) != 0
+ || memcmp(ws.C, C0, sizeof(C0)) != 0) {
+ pr_err("The prng self test state test "
+ "for the SHA-512 mode failed\n");
+ prng_errorflag = PRNG_SELFTEST_FAILED;
+ return -EIO;
+ }
+
+ /* generate random bytes */
+ ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+ &ws, buf, sizeof(buf),
+ NULL, 0);
+ if (ret < 0) {
+ pr_err("The prng self test generate operation for "
+ "the SHA-512 mode failed with rc=%d\n", ret);
+ prng_errorflag = PRNG_SELFTEST_FAILED;
+ return -EIO;
+ }
+ ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+ &ws, buf, sizeof(buf),
+ NULL, 0);
+ if (ret < 0) {
+ pr_err("The prng self test generate operation for "
+ "the SHA-512 mode failed with rc=%d\n", ret);
+ prng_errorflag = PRNG_SELFTEST_FAILED;
+ return -EIO;
+ }
+
+ /* check against expected data */
+ if (memcmp(buf, random, sizeof(random)) != 0) {
+ pr_err("The prng self test data test "
+ "for the SHA-512 mode failed\n");
+ prng_errorflag = PRNG_SELFTEST_FAILED;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+
+static int __init prng_sha512_instantiate(void)
+{
+ int ret, datalen;
+ u8 seed[64];
+
+ pr_debug("prng runs in SHA-512 mode "
+ "with chunksize=%d and reseed_limit=%u\n",
+ prng_chunk_size, prng_reseed_limit);
+
+ /* memory allocation, prng_data struct init, mutex init */
+ datalen = sizeof(struct prng_data_s) + prng_chunk_size;
+ if (fips_enabled)
+ datalen += prng_chunk_size;
+ prng_data = kzalloc(datalen, GFP_KERNEL);
+ if (!prng_data) {
+ prng_errorflag = PRNG_INSTANTIATE_FAILED;
+ return -ENOMEM;
+ }
+ mutex_init(&prng_data->mutex);
+ prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s);
+
+ /* selftest */
+ ret = prng_sha512_selftest();
+ if (ret)
+ goto outfree;
+
+ /* generate initial seed bytestring, first 48 bytes of entropy */
+ ret = generate_entropy(seed, 48);
+ if (ret != 48)
+ goto outfree;
+ /* followed by 16 bytes of unique nonce */
+ get_tod_clock_ext(seed + 48);
+
+ /* initial seed of the ppno drng */
+ ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED,
+ &prng_data->ppnows, NULL, 0,
+ seed, sizeof(seed));
+ if (ret < 0) {
+ prng_errorflag = PRNG_SEED_FAILED;
+ ret = -EIO;
+ goto outfree;
+ }
+
+ /* if fips mode is enabled, generate a first block of random
+ bytes for the FIPS 140-2 Conditional Self Test */
+ if (fips_enabled) {
+ prng_data->prev = prng_data->buf + prng_chunk_size;
+ ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+ &prng_data->ppnows,
+ prng_data->prev,
+ prng_chunk_size,
+ NULL, 0);
+ if (ret < 0 || ret != prng_chunk_size) {
+ prng_errorflag = PRNG_GEN_FAILED;
+ ret = -EIO;
+ goto outfree;
+ }
+ }
+
+ return 0;
+
+outfree:
+ kfree(prng_data);
+ return ret;
+}
+
+
+static void prng_sha512_deinstantiate(void)
+{
+ pr_debug("The prng module stopped after running in SHA-512 mode\n");
+ kzfree(prng_data);
+}
+
+
+static int prng_sha512_reseed(void)
+{
+ int ret;
+ u8 seed[32];
+
+ /* generate 32 bytes of fresh entropy */
+ ret = generate_entropy(seed, sizeof(seed));
+ if (ret != sizeof(seed))
+ return ret;
+
+ /* do a reseed of the ppno drng with this bytestring */
+ ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED,
+ &prng_data->ppnows, NULL, 0,
+ seed, sizeof(seed));
+ if (ret) {
+ prng_errorflag = PRNG_RESEED_FAILED;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+
+static int prng_sha512_generate(u8 *buf, size_t nbytes)
+{
+ int ret;
+
+ /* reseed needed ? */
+ if (prng_data->ppnows.reseed_counter > prng_reseed_limit) {
+ ret = prng_sha512_reseed();
+ if (ret)
+ return ret;
+ }
+
+ /* PPNO generate */
+ ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+ &prng_data->ppnows, buf, nbytes,
+ NULL, 0);
+ if (ret < 0 || ret != nbytes) {
+ prng_errorflag = PRNG_GEN_FAILED;
+ return -EIO;
+ }
+
+ /* FIPS 140-2 Conditional Self Test */
+ if (fips_enabled) {
+ if (!memcmp(prng_data->prev, buf, nbytes)) {
+ prng_errorflag = PRNG_GEN_FAILED;
+ return -EILSEQ;
+ }
+ memcpy(prng_data->prev, buf, nbytes);
+ }
+
+ return ret;
+}
+
+
+/*** file io functions ***/
+
+static int prng_open(struct inode *inode, struct file *file)
+{
+ return nonseekable_open(inode, file);
+}
+
+
+static ssize_t prng_tdes_read(struct file *file, char __user *ubuf,
+ size_t nbytes, loff_t *ppos)
+{
+ int chunk, n, tmp, ret = 0;
+
+ /* lock prng_data struct */
+ if (mutex_lock_interruptible(&prng_data->mutex))
+ return -ERESTARTSYS;
- /* nbytes can be arbitrary length, we split it into chunks */
while (nbytes) {
- /* same as in extract_entropy_user in random.c */
if (need_resched()) {
if (signal_pending(current)) {
if (ret == 0)
ret = -ERESTARTSYS;
break;
}
+ /* give mutex free before calling schedule() */
+ mutex_unlock(&prng_data->mutex);
schedule();
+ /* occopy mutex again */
+ if (mutex_lock_interruptible(&prng_data->mutex)) {
+ if (ret == 0)
+ ret = -ERESTARTSYS;
+ return ret;
+ }
}
/*
@@ -112,12 +535,11 @@ static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes,
/* PRNG only likes multiples of 8 bytes */
n = (chunk + 7) & -8;
- if (p->count > prng_entropy_limit)
- prng_seed(8);
+ if (prng_data->prngws.reseed_counter > prng_reseed_limit)
+ prng_tdes_seed(8);
/* if the CPU supports PRNG stckf is present too */
- asm volatile(".insn s,0xb27c0000,%0"
- : "=m" (*((unsigned long long *)p->buf)) : : "cc");
+ *((unsigned long long *)prng_data->buf) = get_tod_clock_fast();
/*
* Beside the STCKF the input for the TDES-EDE is the output
@@ -132,35 +554,259 @@ static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes,
* Note: you can still get strict X9.17 conformity by setting
* prng_chunk_size to 8 bytes.
*/
- tmp = crypt_s390_kmc(KMC_PRNG, parm_block, p->buf, p->buf, n);
- BUG_ON((tmp < 0) || (tmp != n));
+ tmp = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block,
+ prng_data->buf, prng_data->buf, n);
+ if (tmp < 0 || tmp != n) {
+ ret = -EIO;
+ break;
+ }
- p->count += n;
+ prng_data->prngws.byte_counter += n;
+ prng_data->prngws.reseed_counter += n;
- if (copy_to_user(ubuf, p->buf, chunk))
+ if (copy_to_user(ubuf, prng_data->buf, chunk))
return -EFAULT;
nbytes -= chunk;
ret += chunk;
ubuf += chunk;
}
+
+ /* unlock prng_data struct */
+ mutex_unlock(&prng_data->mutex);
+
return ret;
}
-static const struct file_operations prng_fops = {
+
+static ssize_t prng_sha512_read(struct file *file, char __user *ubuf,
+ size_t nbytes, loff_t *ppos)
+{
+ int n, ret = 0;
+ u8 *p;
+
+ /* if errorflag is set do nothing and return 'broken pipe' */
+ if (prng_errorflag)
+ return -EPIPE;
+
+ /* lock prng_data struct */
+ if (mutex_lock_interruptible(&prng_data->mutex))
+ return -ERESTARTSYS;
+
+ while (nbytes) {
+ if (need_resched()) {
+ if (signal_pending(current)) {
+ if (ret == 0)
+ ret = -ERESTARTSYS;
+ break;
+ }
+ /* give mutex free before calling schedule() */
+ mutex_unlock(&prng_data->mutex);
+ schedule();
+ /* occopy mutex again */
+ if (mutex_lock_interruptible(&prng_data->mutex)) {
+ if (ret == 0)
+ ret = -ERESTARTSYS;
+ return ret;
+ }
+ }
+ if (prng_data->rest) {
+ /* push left over random bytes from the previous read */
+ p = prng_data->buf + prng_chunk_size - prng_data->rest;
+ n = (nbytes < prng_data->rest) ?
+ nbytes : prng_data->rest;
+ prng_data->rest -= n;
+ } else {
+ /* generate one chunk of random bytes into read buf */
+ p = prng_data->buf;
+ n = prng_sha512_generate(p, prng_chunk_size);
+ if (n < 0) {
+ ret = n;
+ break;
+ }
+ if (nbytes < prng_chunk_size) {
+ n = nbytes;
+ prng_data->rest = prng_chunk_size - n;
+ } else {
+ n = prng_chunk_size;
+ prng_data->rest = 0;
+ }
+ }
+ if (copy_to_user(ubuf, p, n)) {
+ ret = -EFAULT;
+ break;
+ }
+ ubuf += n;
+ nbytes -= n;
+ ret += n;
+ }
+
+ /* unlock prng_data struct */
+ mutex_unlock(&prng_data->mutex);
+
+ return ret;
+}
+
+
+/*** sysfs stuff ***/
+
+static const struct file_operations prng_sha512_fops = {
+ .owner = THIS_MODULE,
+ .open = &prng_open,
+ .release = NULL,
+ .read = &prng_sha512_read,
+ .llseek = noop_llseek,
+};
+static const struct file_operations prng_tdes_fops = {
.owner = THIS_MODULE,
.open = &prng_open,
.release = NULL,
- .read = &prng_read,
+ .read = &prng_tdes_read,
.llseek = noop_llseek,
};
-static struct miscdevice prng_dev = {
+static struct miscdevice prng_sha512_dev = {
+ .name = "prandom",
+ .minor = MISC_DYNAMIC_MINOR,
+ .fops = &prng_sha512_fops,
+};
+static struct miscdevice prng_tdes_dev = {
.name = "prandom",
.minor = MISC_DYNAMIC_MINOR,
- .fops = &prng_fops,
+ .fops = &prng_tdes_fops,
};
+
+/* chunksize attribute (ro) */
+static ssize_t prng_chunksize_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size);
+}
+static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL);
+
+/* counter attribute (ro) */
+static ssize_t prng_counter_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ u64 counter;
+
+ if (mutex_lock_interruptible(&prng_data->mutex))
+ return -ERESTARTSYS;
+ if (prng_mode == PRNG_MODE_SHA512)
+ counter = prng_data->ppnows.stream_bytes;
+ else
+ counter = prng_data->prngws.byte_counter;
+ mutex_unlock(&prng_data->mutex);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", counter);
+}
+static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL);
+
+/* errorflag attribute (ro) */
+static ssize_t prng_errorflag_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag);
+}
+static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL);
+
+/* mode attribute (ro) */
+static ssize_t prng_mode_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (prng_mode == PRNG_MODE_TDES)
+ return snprintf(buf, PAGE_SIZE, "TDES\n");
+ else
+ return snprintf(buf, PAGE_SIZE, "SHA512\n");
+}
+static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL);
+
+/* reseed attribute (w) */
+static ssize_t prng_reseed_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ if (mutex_lock_interruptible(&prng_data->mutex))
+ return -ERESTARTSYS;
+ prng_sha512_reseed();
+ mutex_unlock(&prng_data->mutex);
+
+ return count;
+}
+static DEVICE_ATTR(reseed, 0200, NULL, prng_reseed_store);
+
+/* reseed limit attribute (rw) */
+static ssize_t prng_reseed_limit_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit);
+}
+static ssize_t prng_reseed_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned limit;
+
+ if (sscanf(buf, "%u\n", &limit) != 1)
+ return -EINVAL;
+
+ if (prng_mode == PRNG_MODE_SHA512) {
+ if (limit < PRNG_RESEED_LIMIT_SHA512_LOWER)
+ return -EINVAL;
+ } else {
+ if (limit < PRNG_RESEED_LIMIT_TDES_LOWER)
+ return -EINVAL;
+ }
+
+ prng_reseed_limit = limit;
+
+ return count;
+}
+static DEVICE_ATTR(reseed_limit, 0644,
+ prng_reseed_limit_show, prng_reseed_limit_store);
+
+/* strength attribute (ro) */
+static ssize_t prng_strength_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "256\n");
+}
+static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL);
+
+static struct attribute *prng_sha512_dev_attrs[] = {
+ &dev_attr_errorflag.attr,
+ &dev_attr_chunksize.attr,
+ &dev_attr_byte_counter.attr,
+ &dev_attr_mode.attr,
+ &dev_attr_reseed.attr,
+ &dev_attr_reseed_limit.attr,
+ &dev_attr_strength.attr,
+ NULL
+};
+static struct attribute *prng_tdes_dev_attrs[] = {
+ &dev_attr_chunksize.attr,
+ &dev_attr_byte_counter.attr,
+ &dev_attr_mode.attr,
+ NULL
+};
+
+static struct attribute_group prng_sha512_dev_attr_group = {
+ .attrs = prng_sha512_dev_attrs
+};
+static struct attribute_group prng_tdes_dev_attr_group = {
+ .attrs = prng_tdes_dev_attrs
+};
+
+
+/*** module init and exit ***/
+
static int __init prng_init(void)
{
int ret;
@@ -169,43 +815,105 @@ static int __init prng_init(void)
if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA))
return -EOPNOTSUPP;
- if (prng_chunk_size < 8)
- return -EINVAL;
+ /* choose prng mode */
+ if (prng_mode != PRNG_MODE_TDES) {
+ /* check for MSA5 support for PPNO operations */
+ if (!crypt_s390_func_available(PPNO_SHA512_DRNG_GEN,
+ CRYPT_S390_MSA5)) {
+ if (prng_mode == PRNG_MODE_SHA512) {
+ pr_err("The prng module cannot "
+ "start in SHA-512 mode\n");
+ return -EOPNOTSUPP;
+ }
+ prng_mode = PRNG_MODE_TDES;
+ } else
+ prng_mode = PRNG_MODE_SHA512;
+ }
- p = kmalloc(sizeof(struct s390_prng_data), GFP_KERNEL);
- if (!p)
- return -ENOMEM;
- p->count = 0;
+ if (prng_mode == PRNG_MODE_SHA512) {
- p->buf = kmalloc(prng_chunk_size, GFP_KERNEL);
- if (!p->buf) {
- ret = -ENOMEM;
- goto out_free;
- }
+ /* SHA512 mode */
- /* initialize the PRNG, add 128 bits of entropy */
- prng_seed(16);
+ if (prng_chunk_size < PRNG_CHUNKSIZE_SHA512_MIN
+ || prng_chunk_size > PRNG_CHUNKSIZE_SHA512_MAX)
+ return -EINVAL;
+ prng_chunk_size = (prng_chunk_size + 0x3f) & ~0x3f;
- ret = misc_register(&prng_dev);
- if (ret)
- goto out_buf;
- return 0;
+ if (prng_reseed_limit == 0)
+ prng_reseed_limit = PRNG_RESEED_LIMIT_SHA512;
+ else if (prng_reseed_limit < PRNG_RESEED_LIMIT_SHA512_LOWER)
+ return -EINVAL;
+
+ ret = prng_sha512_instantiate();
+ if (ret)
+ goto out;
+
+ ret = misc_register(&prng_sha512_dev);
+ if (ret) {
+ prng_sha512_deinstantiate();
+ goto out;
+ }
+ ret = sysfs_create_group(&prng_sha512_dev.this_device->kobj,
+ &prng_sha512_dev_attr_group);
+ if (ret) {
+ misc_deregister(&prng_sha512_dev);
+ prng_sha512_deinstantiate();
+ goto out;
+ }
-out_buf:
- kfree(p->buf);
-out_free:
- kfree(p);
+ } else {
+
+ /* TDES mode */
+
+ if (prng_chunk_size < PRNG_CHUNKSIZE_TDES_MIN
+ || prng_chunk_size > PRNG_CHUNKSIZE_TDES_MAX)
+ return -EINVAL;
+ prng_chunk_size = (prng_chunk_size + 0x07) & ~0x07;
+
+ if (prng_reseed_limit == 0)
+ prng_reseed_limit = PRNG_RESEED_LIMIT_TDES;
+ else if (prng_reseed_limit < PRNG_RESEED_LIMIT_TDES_LOWER)
+ return -EINVAL;
+
+ ret = prng_tdes_instantiate();
+ if (ret)
+ goto out;
+
+ ret = misc_register(&prng_tdes_dev);
+ if (ret) {
+ prng_tdes_deinstantiate();
+ goto out;
+ }
+ ret = sysfs_create_group(&prng_tdes_dev.this_device->kobj,
+ &prng_tdes_dev_attr_group);
+ if (ret) {
+ misc_deregister(&prng_tdes_dev);
+ prng_tdes_deinstantiate();
+ goto out;
+ }
+
+ }
+
+out:
return ret;
}
+
static void __exit prng_exit(void)
{
- /* wipe me */
- kzfree(p->buf);
- kfree(p);
-
- misc_deregister(&prng_dev);
+ if (prng_mode == PRNG_MODE_SHA512) {
+ sysfs_remove_group(&prng_sha512_dev.this_device->kobj,
+ &prng_sha512_dev_attr_group);
+ misc_deregister(&prng_sha512_dev);
+ prng_sha512_deinstantiate();
+ } else {
+ sysfs_remove_group(&prng_tdes_dev.this_device->kobj,
+ &prng_tdes_dev_attr_group);
+ misc_deregister(&prng_tdes_dev);
+ prng_tdes_deinstantiate();
+ }
}
+
module_init(prng_init);
module_exit(prng_exit);
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 3f5c799b7fb5..d3f896a35b98 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -48,7 +48,7 @@ static struct dentry *hypfs_last_dentry;
static void hypfs_update_update(struct super_block *sb)
{
struct hypfs_sb_info *sb_info = sb->s_fs_info;
- struct inode *inode = sb_info->update_file->d_inode;
+ struct inode *inode = d_inode(sb_info->update_file);
sb_info->last_update = get_seconds();
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -64,7 +64,7 @@ static void hypfs_add_dentry(struct dentry *dentry)
static inline int hypfs_positive(struct dentry *dentry)
{
- return dentry->d_inode && !d_unhashed(dentry);
+ return d_really_is_positive(dentry) && !d_unhashed(dentry);
}
static void hypfs_remove(struct dentry *dentry)
@@ -72,16 +72,16 @@ static void hypfs_remove(struct dentry *dentry)
struct dentry *parent;
parent = dentry->d_parent;
- mutex_lock(&parent->d_inode->i_mutex);
+ mutex_lock(&d_inode(parent)->i_mutex);
if (hypfs_positive(dentry)) {
if (d_is_dir(dentry))
- simple_rmdir(parent->d_inode, dentry);
+ simple_rmdir(d_inode(parent), dentry);
else
- simple_unlink(parent->d_inode, dentry);
+ simple_unlink(d_inode(parent), dentry);
}
d_delete(dentry);
dput(dentry);
- mutex_unlock(&parent->d_inode->i_mutex);
+ mutex_unlock(&d_inode(parent)->i_mutex);
}
static void hypfs_delete_tree(struct dentry *root)
@@ -336,7 +336,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
struct dentry *dentry;
struct inode *inode;
- mutex_lock(&parent->d_inode->i_mutex);
+ mutex_lock(&d_inode(parent)->i_mutex);
dentry = lookup_one_len(name, parent, strlen(name));
if (IS_ERR(dentry)) {
dentry = ERR_PTR(-ENOMEM);
@@ -357,14 +357,14 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
} else if (S_ISDIR(mode)) {
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
- inc_nlink(parent->d_inode);
+ inc_nlink(d_inode(parent));
} else
BUG();
inode->i_private = data;
d_instantiate(dentry, inode);
dget(dentry);
fail:
- mutex_unlock(&parent->d_inode->i_mutex);
+ mutex_unlock(&d_inode(parent)->i_mutex);
return dentry;
}
diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h
index 709955ddaa4d..9d395961e713 100644
--- a/arch/s390/include/asm/dma-mapping.h
+++ b/arch/s390/include/asm/dma-mapping.h
@@ -42,7 +42,7 @@ static inline int dma_supported(struct device *dev, u64 mask)
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
if (!dev->dma_mask)
- return 0;
+ return false;
return addr + size - 1 <= *dev->dma_mask;
}
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index 694bcd6bd927..2f924bc30e35 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -26,6 +26,9 @@
/* Not more than 2GB */
#define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31)
+/* Allocate control page with GFP_DMA */
+#define KEXEC_CONTROL_MEMORY_GFP GFP_DMA
+
/* Maximum address we can use for the crash control pages */
#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL)
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index a5e656260a70..d29ad9545b41 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -14,7 +14,9 @@ typedef struct {
unsigned long asce_bits;
unsigned long asce_limit;
unsigned long vdso_base;
- /* The mmu context has extended page tables. */
+ /* The mmu context allocates 4K page tables. */
+ unsigned int alloc_pgste:1;
+ /* The mmu context uses extended page tables. */
unsigned int has_pgste:1;
/* The mmu context uses storage keys. */
unsigned int use_skey:1;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index d25d9ff10ba8..fb1b93ea3e3f 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -20,8 +20,11 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.flush_mm = 0;
mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
mm->context.asce_bits |= _ASCE_TYPE_REGION3;
+#ifdef CONFIG_PGSTE
+ mm->context.alloc_pgste = page_table_allocate_pgste;
mm->context.has_pgste = 0;
mm->context.use_skey = 0;
+#endif
mm->context.asce_limit = STACK_TOP_MAX;
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index ef803c202d42..a648338c434a 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -7,6 +7,7 @@
#define PCI_BAR_COUNT 6
#include <linux/pci.h>
+#include <linux/mutex.h>
#include <asm-generic/pci.h>
#include <asm-generic/pci-dma-compat.h>
#include <asm/pci_clp.h>
@@ -44,10 +45,6 @@ struct zpci_fmb {
u64 rpcit_ops;
u64 dma_rbytes;
u64 dma_wbytes;
- /* software counters */
- atomic64_t allocated_pages;
- atomic64_t mapped_pages;
- atomic64_t unmapped_pages;
} __packed __aligned(16);
enum zpci_state {
@@ -80,6 +77,7 @@ struct zpci_dev {
u8 pft; /* pci function type */
u16 domain;
+ struct mutex lock;
u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
u32 uid; /* user defined id */
u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */
@@ -111,6 +109,10 @@ struct zpci_dev {
/* Function measurement block */
struct zpci_fmb *fmb;
u16 fmb_update; /* update interval */
+ /* software counters */
+ atomic64_t allocated_pages;
+ atomic64_t mapped_pages;
+ atomic64_t unmapped_pages;
enum pci_bus_speed max_bus_speed;
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 51e7fb634ebc..7b7858f158b4 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -21,6 +21,7 @@ void crst_table_free(struct mm_struct *, unsigned long *);
unsigned long *page_table_alloc(struct mm_struct *);
void page_table_free(struct mm_struct *, unsigned long *);
void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
+extern int page_table_allocate_pgste;
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned long key, bool nq);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 989cfae9e202..ef24a212eeb7 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -12,12 +12,9 @@
#define _ASM_S390_PGTABLE_H
/*
- * The Linux memory management assumes a three-level page table setup. For
- * s390 31 bit we "fold" the mid level into the top-level page table, so
- * that we physically have the same two-level page table as the s390 mmu
- * expects in 31 bit mode. For s390 64 bit we use three of the five levels
- * the hardware provides (region first and region second tables are not
- * used).
+ * The Linux memory management assumes a three-level page table setup.
+ * For s390 64 bit we use up to four of the five levels the hardware
+ * provides (region first tables are not used).
*
* The "pgd_xxx()" functions are trivial for a folded two-level
* setup: the pgd is never bad, and a pmd always exists (as it's folded
@@ -101,8 +98,8 @@ extern unsigned long zero_page_mask;
#ifndef __ASSEMBLY__
/*
- * The vmalloc and module area will always be on the topmost area of the kernel
- * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules.
+ * The vmalloc and module area will always be on the topmost area of the
+ * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules.
* On 64 bit kernels we have a 2GB area at the top of the vmalloc area where
* modules will reside. That makes sure that inter module branches always
* happen without trampolines and in addition the placement within a 2GB frame
@@ -131,38 +128,6 @@ static inline int is_module_addr(void *addr)
}
/*
- * A 31 bit pagetable entry of S390 has following format:
- * | PFRA | | OS |
- * 0 0IP0
- * 00000000001111111111222222222233
- * 01234567890123456789012345678901
- *
- * I Page-Invalid Bit: Page is not available for address-translation
- * P Page-Protection Bit: Store access not possible for page
- *
- * A 31 bit segmenttable entry of S390 has following format:
- * | P-table origin | |PTL
- * 0 IC
- * 00000000001111111111222222222233
- * 01234567890123456789012345678901
- *
- * I Segment-Invalid Bit: Segment is not available for address-translation
- * C Common-Segment Bit: Segment is not private (PoP 3-30)
- * PTL Page-Table-Length: Page-table length (PTL+1*16 entries -> up to 256)
- *
- * The 31 bit segmenttable origin of S390 has following format:
- *
- * |S-table origin | | STL |
- * X **GPS
- * 00000000001111111111222222222233
- * 01234567890123456789012345678901
- *
- * X Space-Switch event:
- * G Segment-Invalid Bit: *
- * P Private-Space Bit: Segment is not private (PoP 3-30)
- * S Storage-Alteration:
- * STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048)
- *
* A 64 bit pagetable entry of S390 has following format:
* | PFRA |0IPC| OS |
* 0000000000111111111122222222223333333333444444444455555555556666
@@ -220,7 +185,6 @@ static inline int is_module_addr(void *addr)
/* Software bits in the page table entry */
#define _PAGE_PRESENT 0x001 /* SW pte present bit */
-#define _PAGE_TYPE 0x002 /* SW pte type bit */
#define _PAGE_YOUNG 0x004 /* SW pte young bit */
#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */
#define _PAGE_READ 0x010 /* SW pte read bit */
@@ -240,31 +204,34 @@ static inline int is_module_addr(void *addr)
* table lock held.
*
* The following table gives the different possible bit combinations for
- * the pte hardware and software bits in the last 12 bits of a pte:
+ * the pte hardware and software bits in the last 12 bits of a pte
+ * (. unassigned bit, x don't care, t swap type):
*
* 842100000000
* 000084210000
* 000000008421
- * .IR...wrdytp
- * empty .10...000000
- * swap .10...xxxx10
- * file .11...xxxxx0
- * prot-none, clean, old .11...000001
- * prot-none, clean, young .11...000101
- * prot-none, dirty, old .10...001001
- * prot-none, dirty, young .10...001101
- * read-only, clean, old .11...010001
- * read-only, clean, young .01...010101
- * read-only, dirty, old .11...011001
- * read-only, dirty, young .01...011101
- * read-write, clean, old .11...110001
- * read-write, clean, young .01...110101
- * read-write, dirty, old .10...111001
- * read-write, dirty, young .00...111101
+ * .IR.uswrdy.p
+ * empty .10.00000000
+ * swap .11..ttttt.0
+ * prot-none, clean, old .11.xx0000.1
+ * prot-none, clean, young .11.xx0001.1
+ * prot-none, dirty, old .10.xx0010.1
+ * prot-none, dirty, young .10.xx0011.1
+ * read-only, clean, old .11.xx0100.1
+ * read-only, clean, young .01.xx0101.1
+ * read-only, dirty, old .11.xx0110.1
+ * read-only, dirty, young .01.xx0111.1
+ * read-write, clean, old .11.xx1100.1
+ * read-write, clean, young .01.xx1101.1
+ * read-write, dirty, old .10.xx1110.1
+ * read-write, dirty, young .00.xx1111.1
+ * HW-bits: R read-only, I invalid
+ * SW-bits: p present, y young, d dirty, r read, w write, s special,
+ * u unused, l large
*
- * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001
- * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400
- * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402
+ * pte_none is true for the bit pattern .10.00000000, pte == 0x400
+ * pte_swap is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200
+ * pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001
*/
/* Bits in the segment/region table address-space-control-element */
@@ -335,6 +302,8 @@ static inline int is_module_addr(void *addr)
* read-write, dirty, young 11..0...0...11
* The segment table origin is used to distinguish empty (origin==0) from
* read-write, old segment table entries (origin!=0)
+ * HW-bits: R read-only, I invalid
+ * SW-bits: y young, d dirty, r read, w write
*/
#define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */
@@ -423,6 +392,15 @@ static inline int mm_has_pgste(struct mm_struct *mm)
return 0;
}
+static inline int mm_alloc_pgste(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (unlikely(mm->context.alloc_pgste))
+ return 1;
+#endif
+ return 0;
+}
+
/*
* In the case that a guest uses storage keys
* faults should no longer be backed by zero pages
@@ -516,7 +494,7 @@ static inline int pmd_large(pmd_t pmd)
return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
}
-static inline int pmd_pfn(pmd_t pmd)
+static inline unsigned long pmd_pfn(pmd_t pmd)
{
unsigned long origin_mask;
@@ -582,10 +560,9 @@ static inline int pte_none(pte_t pte)
static inline int pte_swap(pte_t pte)
{
- /* Bit pattern: (pte & 0x603) == 0x402 */
- return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT |
- _PAGE_TYPE | _PAGE_PRESENT))
- == (_PAGE_INVALID | _PAGE_TYPE);
+ /* Bit pattern: (pte & 0x201) == 0x200 */
+ return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT))
+ == _PAGE_PROTECT;
}
static inline int pte_special(pte_t pte)
@@ -1586,51 +1563,51 @@ static inline int has_transparent_hugepage(void)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
- * 31 bit swap entry format:
- * A page-table entry has some bits we have to treat in a special way.
- * Bits 0, 20 and bit 23 have to be zero, otherwise an specification
- * exception will occur instead of a page translation exception. The
- * specifiation exception has the bad habit not to store necessary
- * information in the lowcore.
- * Bits 21, 22, 30 and 31 are used to indicate the page type.
- * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
- * This leaves the bits 1-19 and bits 24-29 to store type and offset.
- * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19
- * plus 24 for the offset.
- * 0| offset |0110|o|type |00|
- * 0 0000000001111111111 2222 2 22222 33
- * 0 1234567890123456789 0123 4 56789 01
- *
* 64 bit swap entry format:
* A page-table entry has some bits we have to treat in a special way.
* Bits 52 and bit 55 have to be zero, otherwise an specification
* exception will occur instead of a page translation exception. The
* specifiation exception has the bad habit not to store necessary
* information in the lowcore.
- * Bits 53, 54, 62 and 63 are used to indicate the page type.
- * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
- * This leaves the bits 0-51 and bits 56-61 to store type and offset.
- * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51
- * plus 56 for the offset.
- * | offset |0110|o|type |00|
- * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66
- * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23
+ * Bits 54 and 63 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x201) == 0x200
+ * This leaves the bits 0-51 and bits 56-62 to store type and offset.
+ * We use the 5 bits from 57-61 for the type and the 52 bits from 0-51
+ * for the offset.
+ * | offset |01100|type |00|
+ * |0000000000111111111122222222223333333333444444444455|55555|55566|66|
+ * |0123456789012345678901234567890123456789012345678901|23456|78901|23|
*/
-#define __SWP_OFFSET_MASK (~0UL >> 11)
+#define __SWP_OFFSET_MASK ((1UL << 52) - 1)
+#define __SWP_OFFSET_SHIFT 12
+#define __SWP_TYPE_MASK ((1UL << 5) - 1)
+#define __SWP_TYPE_SHIFT 2
static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
{
pte_t pte;
- offset &= __SWP_OFFSET_MASK;
- pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) |
- ((offset & 1UL) << 7) | ((offset & ~1UL) << 11);
+
+ pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT;
+ pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT;
+ pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT;
return pte;
}
-#define __swp_type(entry) (((entry).val >> 2) & 0x1f)
-#define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1))
-#define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) })
+static inline unsigned long __swp_type(swp_entry_t entry)
+{
+ return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK;
+}
+
+static inline unsigned long __swp_offset(swp_entry_t entry)
+{
+ return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK;
+}
+
+static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
+{
+ return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) };
+}
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index afa2bd750ffc..8cd8e7b288c5 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -110,7 +110,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
/* upper facilities limit for kvm */
unsigned long kvm_s390_fac_list_mask[] = {
0xffe6fffbfcfdfc40UL,
- 0x205c800000000000UL,
+ 0x005c800000000000UL,
};
unsigned long kvm_s390_fac_list_mask_size(void)
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 210ffede0153..e617e74b7be2 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -14,20 +14,23 @@ static inline pmd_t __pte_to_pmd(pte_t pte)
/*
* Convert encoding pte bits pmd bits
- * .IR...wrdytp dy..R...I...wr
- * empty .10...000000 -> 00..0...1...00
- * prot-none, clean, old .11...000001 -> 00..1...1...00
- * prot-none, clean, young .11...000101 -> 01..1...1...00
- * prot-none, dirty, old .10...001001 -> 10..1...1...00
- * prot-none, dirty, young .10...001101 -> 11..1...1...00
- * read-only, clean, old .11...010001 -> 00..1...1...01
- * read-only, clean, young .01...010101 -> 01..1...0...01
- * read-only, dirty, old .11...011001 -> 10..1...1...01
- * read-only, dirty, young .01...011101 -> 11..1...0...01
- * read-write, clean, old .11...110001 -> 00..0...1...11
- * read-write, clean, young .01...110101 -> 01..0...0...11
- * read-write, dirty, old .10...111001 -> 10..0...1...11
- * read-write, dirty, young .00...111101 -> 11..0...0...11
+ * lIR.uswrdy.p dy..R...I...wr
+ * empty 010.000000.0 -> 00..0...1...00
+ * prot-none, clean, old 111.000000.1 -> 00..1...1...00
+ * prot-none, clean, young 111.000001.1 -> 01..1...1...00
+ * prot-none, dirty, old 111.000010.1 -> 10..1...1...00
+ * prot-none, dirty, young 111.000011.1 -> 11..1...1...00
+ * read-only, clean, old 111.000100.1 -> 00..1...1...01
+ * read-only, clean, young 101.000101.1 -> 01..1...0...01
+ * read-only, dirty, old 111.000110.1 -> 10..1...1...01
+ * read-only, dirty, young 101.000111.1 -> 11..1...0...01
+ * read-write, clean, old 111.001100.1 -> 00..1...1...11
+ * read-write, clean, young 101.001101.1 -> 01..1...0...11
+ * read-write, dirty, old 110.001110.1 -> 10..0...1...11
+ * read-write, dirty, young 100.001111.1 -> 11..0...0...11
+ * HW-bits: R read-only, I invalid
+ * SW-bits: p present, y young, d dirty, r read, w write, s special,
+ * u unused, l large
*/
if (pte_present(pte)) {
pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
@@ -48,20 +51,23 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
/*
* Convert encoding pmd bits pte bits
- * dy..R...I...wr .IR...wrdytp
- * empty 00..0...1...00 -> .10...001100
- * prot-none, clean, old 00..0...1...00 -> .10...000001
- * prot-none, clean, young 01..0...1...00 -> .10...000101
- * prot-none, dirty, old 10..0...1...00 -> .10...001001
- * prot-none, dirty, young 11..0...1...00 -> .10...001101
- * read-only, clean, old 00..1...1...01 -> .11...010001
- * read-only, clean, young 01..1...1...01 -> .11...010101
- * read-only, dirty, old 10..1...1...01 -> .11...011001
- * read-only, dirty, young 11..1...1...01 -> .11...011101
- * read-write, clean, old 00..0...1...11 -> .10...110001
- * read-write, clean, young 01..0...1...11 -> .10...110101
- * read-write, dirty, old 10..0...1...11 -> .10...111001
- * read-write, dirty, young 11..0...1...11 -> .10...111101
+ * dy..R...I...wr lIR.uswrdy.p
+ * empty 00..0...1...00 -> 010.000000.0
+ * prot-none, clean, old 00..1...1...00 -> 111.000000.1
+ * prot-none, clean, young 01..1...1...00 -> 111.000001.1
+ * prot-none, dirty, old 10..1...1...00 -> 111.000010.1
+ * prot-none, dirty, young 11..1...1...00 -> 111.000011.1
+ * read-only, clean, old 00..1...1...01 -> 111.000100.1
+ * read-only, clean, young 01..1...0...01 -> 101.000101.1
+ * read-only, dirty, old 10..1...1...01 -> 111.000110.1
+ * read-only, dirty, young 11..1...0...01 -> 101.000111.1
+ * read-write, clean, old 00..1...1...11 -> 111.001100.1
+ * read-write, clean, young 01..1...0...11 -> 101.001101.1
+ * read-write, dirty, old 10..0...1...11 -> 110.001110.1
+ * read-write, dirty, young 11..0...0...11 -> 100.001111.1
+ * HW-bits: R read-only, I invalid
+ * SW-bits: p present, y young, d dirty, r read, w write, s special,
+ * u unused, l large
*/
if (pmd_present(pmd)) {
pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE;
@@ -70,8 +76,8 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4;
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5;
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT);
- pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
- pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10;
} else
pte_val(pte) = _PAGE_INVALID;
return pte;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 33f589459113..b33f66110ca9 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -18,6 +18,7 @@
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/swapops.h>
+#include <linux/sysctl.h>
#include <linux/ksm.h>
#include <linux/mman.h>
@@ -920,6 +921,40 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
}
EXPORT_SYMBOL(get_guest_storage_key);
+static int page_table_allocate_pgste_min = 0;
+static int page_table_allocate_pgste_max = 1;
+int page_table_allocate_pgste = 0;
+EXPORT_SYMBOL(page_table_allocate_pgste);
+
+static struct ctl_table page_table_sysctl[] = {
+ {
+ .procname = "allocate_pgste",
+ .data = &page_table_allocate_pgste,
+ .maxlen = sizeof(int),
+ .mode = S_IRUGO | S_IWUSR,
+ .proc_handler = proc_dointvec,
+ .extra1 = &page_table_allocate_pgste_min,
+ .extra2 = &page_table_allocate_pgste_max,
+ },
+ { }
+};
+
+static struct ctl_table page_table_sysctl_dir[] = {
+ {
+ .procname = "vm",
+ .maxlen = 0,
+ .mode = 0555,
+ .child = page_table_sysctl,
+ },
+ { }
+};
+
+static int __init page_table_register_sysctl(void)
+{
+ return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
+}
+__initcall(page_table_register_sysctl);
+
#else /* CONFIG_PGSTE */
static inline int page_table_with_pgste(struct page *page)
@@ -963,7 +998,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
struct page *uninitialized_var(page);
unsigned int mask, bit;
- if (mm_has_pgste(mm))
+ if (mm_alloc_pgste(mm))
return page_table_alloc_pgste(mm);
/* Allocate fragments of a 4K page as 1K/2K page table */
spin_lock_bh(&mm->context.list_lock);
@@ -1165,116 +1200,25 @@ static inline void thp_split_mm(struct mm_struct *mm)
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
- struct mm_struct *mm, pud_t *pud,
- unsigned long addr, unsigned long end)
-{
- unsigned long next, *table, *new;
- struct page *page;
- spinlock_t *ptl;
- pmd_t *pmd;
-
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
-again:
- if (pmd_none_or_clear_bad(pmd))
- continue;
- table = (unsigned long *) pmd_deref(*pmd);
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (page_table_with_pgste(page))
- continue;
- /* Allocate new page table with pgstes */
- new = page_table_alloc_pgste(mm);
- if (!new)
- return -ENOMEM;
-
- ptl = pmd_lock(mm, pmd);
- if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
- /* Nuke pmd entry pointing to the "short" page table */
- pmdp_flush_lazy(mm, addr, pmd);
- pmd_clear(pmd);
- /* Copy ptes from old table to new table */
- memcpy(new, table, PAGE_SIZE/2);
- clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
- /* Establish new table */
- pmd_populate(mm, pmd, (pte_t *) new);
- /* Free old table with rcu, there might be a walker! */
- page_table_free_rcu(tlb, table, addr);
- new = NULL;
- }
- spin_unlock(ptl);
- if (new) {
- page_table_free_pgste(new);
- goto again;
- }
- } while (pmd++, addr = next, addr != end);
-
- return addr;
-}
-
-static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
- struct mm_struct *mm, pgd_t *pgd,
- unsigned long addr, unsigned long end)
-{
- unsigned long next;
- pud_t *pud;
-
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
- if (unlikely(IS_ERR_VALUE(next)))
- return next;
- } while (pud++, addr = next, addr != end);
-
- return addr;
-}
-
-static unsigned long page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long addr, unsigned long end)
-{
- unsigned long next;
- pgd_t *pgd;
-
- pgd = pgd_offset(mm, addr);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
- if (unlikely(IS_ERR_VALUE(next)))
- return next;
- } while (pgd++, addr = next, addr != end);
-
- return 0;
-}
-
/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
{
- struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->mm;
- struct mmu_gather tlb;
+ struct mm_struct *mm = current->mm;
/* Do we have pgstes? if yes, we are done */
- if (mm_has_pgste(tsk->mm))
+ if (mm_has_pgste(mm))
return 0;
-
+ /* Fail if the page tables are 2K */
+ if (!mm_alloc_pgste(mm))
+ return -EINVAL;
down_write(&mm->mmap_sem);
+ mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
- /* Reallocate the page tables with pgstes */
- tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE);
- if (!page_table_realloc(&tlb, mm, 0, TASK_SIZE))
- mm->context.has_pgste = 1;
- tlb_finish_mmu(&tlb, 0, TASK_SIZE);
up_write(&mm->mmap_sem);
- return mm->context.has_pgste ? 0 : -ENOMEM;
+ return 0;
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S
index ba44c9f55346..a1c917d881ec 100644
--- a/arch/s390/net/bpf_jit.S
+++ b/arch/s390/net/bpf_jit.S
@@ -1,134 +1,115 @@
/*
* BPF Jit compiler for s390, help functions.
*
- * Copyright IBM Corp. 2012
+ * Copyright IBM Corp. 2012,2015
*
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Michael Holzheu <holzheu@linux.vnet.ibm.com>
*/
+
#include <linux/linkage.h>
+#include "bpf_jit.h"
/*
* Calling convention:
- * registers %r2, %r6-%r8, %r10-%r11, %r13, %r15 are call saved
- * %r2: skb pointer
- * %r3: offset parameter
- * %r5: BPF A accumulator
- * %r8: return address
- * %r9: save register for skb pointer
- * %r10: skb->data
- * %r11: skb->len - skb->data_len (headlen)
- * %r12: BPF X accumulator
+ * registers %r7-%r10, %r11,%r13, and %r15 are call saved
+ *
+ * Input (64 bit):
+ * %r3 (%b2) = offset into skb data
+ * %r6 (%b5) = return address
+ * %r7 (%b6) = skb pointer
+ * %r12 = skb data pointer
+ *
+ * Output:
+ * %r14= %b0 = return value (read skb value)
+ *
+ * Work registers: %r2,%r4,%r5,%r14
*
* skb_copy_bits takes 4 parameters:
* %r2 = skb pointer
* %r3 = offset into skb data
* %r4 = pointer to temp buffer
* %r5 = length to copy
+ * Return value in %r2: 0 = ok
+ *
+ * bpf_internal_load_pointer_neg_helper takes 3 parameters:
+ * %r2 = skb pointer
+ * %r3 = offset into data
+ * %r4 = length to copy
+ * Return value in %r2: Pointer to data
*/
-#define SKBDATA %r8
-
- /* A = *(u32 *) (skb->data+K+X) */
-ENTRY(sk_load_word_ind)
- ar %r3,%r12 # offset += X
- bmr %r8 # < 0 -> return with cc
-
- /* A = *(u32 *) (skb->data+K) */
-ENTRY(sk_load_word)
- llgfr %r1,%r3 # extend offset
- ahi %r3,4 # offset + 4
- clr %r11,%r3 # hlen <= offset + 4 ?
- jl sk_load_word_slow
- l %r5,0(%r1,%r10) # get word from skb
- xr %r1,%r1 # set cc to zero
- br %r8
-sk_load_word_slow:
- lgr %r9,%r2 # save %r2
- lgr %r3,%r1 # offset
- la %r4,160(%r15) # pointer to temp buffer
- lghi %r5,4 # 4 bytes
- brasl %r14,skb_copy_bits # get data from skb
- l %r5,160(%r15) # load result from temp buffer
- ltgr %r2,%r2 # set cc to (%r2 != 0)
- lgr %r2,%r9 # restore %r2
- br %r8
+#define SKF_MAX_NEG_OFF -0x200000 /* SKF_LL_OFF from filter.h */
- /* A = *(u16 *) (skb->data+K+X) */
-ENTRY(sk_load_half_ind)
- ar %r3,%r12 # offset += X
- bmr %r8 # < 0 -> return with cc
-
- /* A = *(u16 *) (skb->data+K) */
-ENTRY(sk_load_half)
- llgfr %r1,%r3 # extend offset
- ahi %r3,2 # offset + 2
- clr %r11,%r3 # hlen <= offset + 2 ?
- jl sk_load_half_slow
- llgh %r5,0(%r1,%r10) # get half from skb
- xr %r1,%r1 # set cc to zero
- br %r8
-
-sk_load_half_slow:
- lgr %r9,%r2 # save %r2
- lgr %r3,%r1 # offset
- la %r4,162(%r15) # pointer to temp buffer
- lghi %r5,2 # 2 bytes
- brasl %r14,skb_copy_bits # get data from skb
- xc 160(2,%r15),160(%r15)
- l %r5,160(%r15) # load result from temp buffer
- ltgr %r2,%r2 # set cc to (%r2 != 0)
- lgr %r2,%r9 # restore %r2
- br %r8
+/*
+ * Load SIZE bytes from SKB
+ */
+#define sk_load_common(NAME, SIZE, LOAD) \
+ENTRY(sk_load_##NAME); \
+ ltgr %r3,%r3; /* Is offset negative? */ \
+ jl sk_load_##NAME##_slow_neg; \
+ENTRY(sk_load_##NAME##_pos); \
+ aghi %r3,SIZE; /* Offset + SIZE */ \
+ clg %r3,STK_OFF_HLEN(%r15); /* Offset + SIZE > hlen? */ \
+ jh sk_load_##NAME##_slow; \
+ LOAD %r14,-SIZE(%r3,%r12); /* Get data from skb */ \
+ b OFF_OK(%r6); /* Return */ \
+ \
+sk_load_##NAME##_slow:; \
+ lgr %r2,%r7; /* Arg1 = skb pointer */ \
+ aghi %r3,-SIZE; /* Arg2 = offset */ \
+ la %r4,STK_OFF_TMP(%r15); /* Arg3 = temp bufffer */ \
+ lghi %r5,SIZE; /* Arg4 = size */ \
+ brasl %r14,skb_copy_bits; /* Get data from skb */ \
+ LOAD %r14,STK_OFF_TMP(%r15); /* Load from temp bufffer */ \
+ ltgr %r2,%r2; /* Set cc to (%r2 != 0) */ \
+ br %r6; /* Return */
- /* A = *(u8 *) (skb->data+K+X) */
-ENTRY(sk_load_byte_ind)
- ar %r3,%r12 # offset += X
- bmr %r8 # < 0 -> return with cc
+sk_load_common(word, 4, llgf) /* r14 = *(u32 *) (skb->data+offset) */
+sk_load_common(half, 2, llgh) /* r14 = *(u16 *) (skb->data+offset) */
- /* A = *(u8 *) (skb->data+K) */
+/*
+ * Load 1 byte from SKB (optimized version)
+ */
+ /* r14 = *(u8 *) (skb->data+offset) */
ENTRY(sk_load_byte)
- llgfr %r1,%r3 # extend offset
- clr %r11,%r3 # hlen < offset ?
- jle sk_load_byte_slow
- lhi %r5,0
- ic %r5,0(%r1,%r10) # get byte from skb
- xr %r1,%r1 # set cc to zero
- br %r8
+ ltgr %r3,%r3 # Is offset negative?
+ jl sk_load_byte_slow_neg
+ENTRY(sk_load_byte_pos)
+ clg %r3,STK_OFF_HLEN(%r15) # Offset >= hlen?
+ jnl sk_load_byte_slow
+ llgc %r14,0(%r3,%r12) # Get byte from skb
+ b OFF_OK(%r6) # Return OK
sk_load_byte_slow:
- lgr %r9,%r2 # save %r2
- lgr %r3,%r1 # offset
- la %r4,163(%r15) # pointer to temp buffer
- lghi %r5,1 # 1 byte
- brasl %r14,skb_copy_bits # get data from skb
- xc 160(3,%r15),160(%r15)
- l %r5,160(%r15) # load result from temp buffer
- ltgr %r2,%r2 # set cc to (%r2 != 0)
- lgr %r2,%r9 # restore %r2
- br %r8
+ lgr %r2,%r7 # Arg1 = skb pointer
+ # Arg2 = offset
+ la %r4,STK_OFF_TMP(%r15) # Arg3 = pointer to temp buffer
+ lghi %r5,1 # Arg4 = size (1 byte)
+ brasl %r14,skb_copy_bits # Get data from skb
+ llgc %r14,STK_OFF_TMP(%r15) # Load result from temp buffer
+ ltgr %r2,%r2 # Set cc to (%r2 != 0)
+ br %r6 # Return cc
+
+#define sk_negative_common(NAME, SIZE, LOAD) \
+sk_load_##NAME##_slow_neg:; \
+ cgfi %r3,SKF_MAX_NEG_OFF; \
+ jl bpf_error; \
+ lgr %r2,%r7; /* Arg1 = skb pointer */ \
+ /* Arg2 = offset */ \
+ lghi %r4,SIZE; /* Arg3 = size */ \
+ brasl %r14,bpf_internal_load_pointer_neg_helper; \
+ ltgr %r2,%r2; \
+ jz bpf_error; \
+ LOAD %r14,0(%r2); /* Get data from pointer */ \
+ xr %r3,%r3; /* Set cc to zero */ \
+ br %r6; /* Return cc */
- /* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */
-ENTRY(sk_load_byte_msh)
- llgfr %r1,%r3 # extend offset
- clr %r11,%r3 # hlen < offset ?
- jle sk_load_byte_msh_slow
- lhi %r12,0
- ic %r12,0(%r1,%r10) # get byte from skb
- nill %r12,0x0f
- sll %r12,2
- xr %r1,%r1 # set cc to zero
- br %r8
+sk_negative_common(word, 4, llgf)
+sk_negative_common(half, 2, llgh)
+sk_negative_common(byte, 1, llgc)
-sk_load_byte_msh_slow:
- lgr %r9,%r2 # save %r2
- lgr %r3,%r1 # offset
- la %r4,163(%r15) # pointer to temp buffer
- lghi %r5,1 # 1 byte
- brasl %r14,skb_copy_bits # get data from skb
- xc 160(3,%r15),160(%r15)
- l %r12,160(%r15) # load result from temp buffer
- nill %r12,0x0f
- sll %r12,2
- ltgr %r2,%r2 # set cc to (%r2 != 0)
- lgr %r2,%r9 # restore %r2
- br %r8
+bpf_error:
+# force a return 0 from jit handler
+ ltgr %r15,%r15 # Set condition code
+ br %r6
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
new file mode 100644
index 000000000000..ba8593a515ba
--- /dev/null
+++ b/arch/s390/net/bpf_jit.h
@@ -0,0 +1,58 @@
+/*
+ * BPF Jit compiler defines
+ *
+ * Copyright IBM Corp. 2012,2015
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#ifndef __ARCH_S390_NET_BPF_JIT_H
+#define __ARCH_S390_NET_BPF_JIT_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/filter.h>
+#include <linux/types.h>
+
+extern u8 sk_load_word_pos[], sk_load_half_pos[], sk_load_byte_pos[];
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Stackframe layout (packed stack):
+ *
+ * ^ high
+ * +---------------+ |
+ * | old backchain | |
+ * +---------------+ |
+ * | r15 - r6 | |
+ * BFP -> +===============+ |
+ * | | |
+ * | BPF stack | |
+ * | | |
+ * +---------------+ |
+ * | 8 byte hlen | |
+ * R15+168 -> +---------------+ |
+ * | 4 byte align | |
+ * +---------------+ |
+ * | 4 byte temp | |
+ * | for bpf_jit.S | |
+ * R15+160 -> +---------------+ |
+ * | new backchain | |
+ * R15+152 -> +---------------+ |
+ * | + 152 byte SA | |
+ * R15 -> +---------------+ + low
+ *
+ * We get 160 bytes stack space from calling function, but only use
+ * 11 * 8 byte (old backchain + r15 - r6) for storing registers.
+ */
+#define STK_OFF (MAX_BPF_STACK + 8 + 4 + 4 + (160 - 11 * 8))
+#define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */
+#define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */
+
+/* Offset to skip condition code check */
+#define OFF_OK 4
+
+#endif /* __ARCH_S390_NET_BPF_JIT_H */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index bbd1981cc150..20c146d1251a 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1,817 +1,1210 @@
/*
* BPF Jit compiler for s390.
*
- * Copyright IBM Corp. 2012
+ * Minimum build requirements:
+ *
+ * - HAVE_MARCH_Z196_FEATURES: laal, laalg
+ * - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj
+ * - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf
+ * - PACK_STACK
+ * - 64BIT
+ *
+ * Copyright IBM Corp. 2012,2015
*
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Michael Holzheu <holzheu@linux.vnet.ibm.com>
*/
+
+#define KMSG_COMPONENT "bpf_jit"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <linux/netdevice.h>
-#include <linux/if_vlan.h>
#include <linux/filter.h>
#include <linux/init.h>
#include <asm/cacheflush.h>
-#include <asm/facility.h>
#include <asm/dis.h>
+#include "bpf_jit.h"
-/*
- * Conventions:
- * %r2 = skb pointer
- * %r3 = offset parameter
- * %r4 = scratch register / length parameter
- * %r5 = BPF A accumulator
- * %r8 = return address
- * %r9 = save register for skb pointer
- * %r10 = skb->data
- * %r11 = skb->len - skb->data_len (headlen)
- * %r12 = BPF X accumulator
- * %r13 = literal pool pointer
- * 0(%r15) - 63(%r15) scratch memory array with BPF_MEMWORDS
- */
int bpf_jit_enable __read_mostly;
+struct bpf_jit {
+ u32 seen; /* Flags to remember seen eBPF instructions */
+ u32 seen_reg[16]; /* Array to remember which registers are used */
+ u32 *addrs; /* Array with relative instruction addresses */
+ u8 *prg_buf; /* Start of program */
+ int size; /* Size of program and literal pool */
+ int size_prg; /* Size of program */
+ int prg; /* Current position in program */
+ int lit_start; /* Start of literal pool */
+ int lit; /* Current position in literal pool */
+ int base_ip; /* Base address for literal pool */
+ int ret0_ip; /* Address of return 0 */
+ int exit_ip; /* Address of exit */
+};
+
+#define BPF_SIZE_MAX 4096 /* Max size for program */
+
+#define SEEN_SKB 1 /* skb access */
+#define SEEN_MEM 2 /* use mem[] for temporary storage */
+#define SEEN_RET0 4 /* ret0_ip points to a valid return 0 */
+#define SEEN_LITERAL 8 /* code uses literals */
+#define SEEN_FUNC 16 /* calls C functions */
+#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
+
/*
- * assembly code in arch/x86/net/bpf_jit.S
+ * s390 registers
*/
-extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
-extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+#define REG_W0 (__MAX_BPF_REG+0) /* Work register 1 (even) */
+#define REG_W1 (__MAX_BPF_REG+1) /* Work register 2 (odd) */
+#define REG_SKB_DATA (__MAX_BPF_REG+2) /* SKB data register */
+#define REG_L (__MAX_BPF_REG+3) /* Literal pool register */
+#define REG_15 (__MAX_BPF_REG+4) /* Register 15 */
+#define REG_0 REG_W0 /* Register 0 */
+#define REG_2 BPF_REG_1 /* Register 2 */
+#define REG_14 BPF_REG_0 /* Register 14 */
-struct bpf_jit {
- unsigned int seen;
- u8 *start;
- u8 *prg;
- u8 *mid;
- u8 *lit;
- u8 *end;
- u8 *base_ip;
- u8 *ret0_ip;
- u8 *exit_ip;
- unsigned int off_load_word;
- unsigned int off_load_half;
- unsigned int off_load_byte;
- unsigned int off_load_bmsh;
- unsigned int off_load_iword;
- unsigned int off_load_ihalf;
- unsigned int off_load_ibyte;
+/*
+ * Mapping of BPF registers to s390 registers
+ */
+static const int reg2hex[] = {
+ /* Return code */
+ [BPF_REG_0] = 14,
+ /* Function parameters */
+ [BPF_REG_1] = 2,
+ [BPF_REG_2] = 3,
+ [BPF_REG_3] = 4,
+ [BPF_REG_4] = 5,
+ [BPF_REG_5] = 6,
+ /* Call saved registers */
+ [BPF_REG_6] = 7,
+ [BPF_REG_7] = 8,
+ [BPF_REG_8] = 9,
+ [BPF_REG_9] = 10,
+ /* BPF stack pointer */
+ [BPF_REG_FP] = 13,
+ /* SKB data pointer */
+ [REG_SKB_DATA] = 12,
+ /* Work registers for s390x backend */
+ [REG_W0] = 0,
+ [REG_W1] = 1,
+ [REG_L] = 11,
+ [REG_15] = 15,
};
-#define BPF_SIZE_MAX 4096 /* Max size for program */
+static inline u32 reg(u32 dst_reg, u32 src_reg)
+{
+ return reg2hex[dst_reg] << 4 | reg2hex[src_reg];
+}
+
+static inline u32 reg_high(u32 reg)
+{
+ return reg2hex[reg] << 4;
+}
+
+static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
+{
+ u32 r1 = reg2hex[b1];
+
+ if (!jit->seen_reg[r1] && r1 >= 6 && r1 <= 15)
+ jit->seen_reg[r1] = 1;
+}
+
+#define REG_SET_SEEN(b1) \
+({ \
+ reg_set_seen(jit, b1); \
+})
+
+#define REG_SEEN(b1) jit->seen_reg[reg2hex[(b1)]]
+
+/*
+ * EMIT macros for code generation
+ */
+
+#define _EMIT2(op) \
+({ \
+ if (jit->prg_buf) \
+ *(u16 *) (jit->prg_buf + jit->prg) = op; \
+ jit->prg += 2; \
+})
-#define SEEN_DATAREF 1 /* might call external helpers */
-#define SEEN_XREG 2 /* ebx is used */
-#define SEEN_MEM 4 /* use mem[] for temporary storage */
-#define SEEN_RET0 8 /* pc_ret0 points to a valid return 0 */
-#define SEEN_LITERAL 16 /* code uses literals */
-#define SEEN_LOAD_WORD 32 /* code uses sk_load_word */
-#define SEEN_LOAD_HALF 64 /* code uses sk_load_half */
-#define SEEN_LOAD_BYTE 128 /* code uses sk_load_byte */
-#define SEEN_LOAD_BMSH 256 /* code uses sk_load_byte_msh */
-#define SEEN_LOAD_IWORD 512 /* code uses sk_load_word_ind */
-#define SEEN_LOAD_IHALF 1024 /* code uses sk_load_half_ind */
-#define SEEN_LOAD_IBYTE 2048 /* code uses sk_load_byte_ind */
-
-#define EMIT2(op) \
-({ \
- if (jit->prg + 2 <= jit->mid) \
- *(u16 *) jit->prg = op; \
- jit->prg += 2; \
+#define EMIT2(op, b1, b2) \
+({ \
+ _EMIT2(op | reg(b1, b2)); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
})
-#define EMIT4(op) \
-({ \
- if (jit->prg + 4 <= jit->mid) \
- *(u32 *) jit->prg = op; \
- jit->prg += 4; \
+#define _EMIT4(op) \
+({ \
+ if (jit->prg_buf) \
+ *(u32 *) (jit->prg_buf + jit->prg) = op; \
+ jit->prg += 4; \
})
-#define EMIT4_DISP(op, disp) \
-({ \
- unsigned int __disp = (disp) & 0xfff; \
- EMIT4(op | __disp); \
+#define EMIT4(op, b1, b2) \
+({ \
+ _EMIT4(op | reg(b1, b2)); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
})
-#define EMIT4_IMM(op, imm) \
-({ \
- unsigned int __imm = (imm) & 0xffff; \
- EMIT4(op | __imm); \
+#define EMIT4_RRF(op, b1, b2, b3) \
+({ \
+ _EMIT4(op | reg_high(b3) << 8 | reg(b1, b2)); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
+ REG_SET_SEEN(b3); \
})
-#define EMIT4_PCREL(op, pcrel) \
-({ \
- long __pcrel = ((pcrel) >> 1) & 0xffff; \
- EMIT4(op | __pcrel); \
+#define _EMIT4_DISP(op, disp) \
+({ \
+ unsigned int __disp = (disp) & 0xfff; \
+ _EMIT4(op | __disp); \
})
-#define EMIT6(op1, op2) \
-({ \
- if (jit->prg + 6 <= jit->mid) { \
- *(u32 *) jit->prg = op1; \
- *(u16 *) (jit->prg + 4) = op2; \
- } \
- jit->prg += 6; \
+#define EMIT4_DISP(op, b1, b2, disp) \
+({ \
+ _EMIT4_DISP(op | reg_high(b1) << 16 | \
+ reg_high(b2) << 8, disp); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
})
-#define EMIT6_DISP(op1, op2, disp) \
-({ \
- unsigned int __disp = (disp) & 0xfff; \
- EMIT6(op1 | __disp, op2); \
+#define EMIT4_IMM(op, b1, imm) \
+({ \
+ unsigned int __imm = (imm) & 0xffff; \
+ _EMIT4(op | reg_high(b1) << 16 | __imm); \
+ REG_SET_SEEN(b1); \
})
-#define EMIT6_IMM(op, imm) \
-({ \
- unsigned int __imm = (imm); \
- EMIT6(op | (__imm >> 16), __imm & 0xffff); \
+#define EMIT4_PCREL(op, pcrel) \
+({ \
+ long __pcrel = ((pcrel) >> 1) & 0xffff; \
+ _EMIT4(op | __pcrel); \
})
-#define EMIT_CONST(val) \
-({ \
- unsigned int ret; \
- ret = (unsigned int) (jit->lit - jit->base_ip); \
- jit->seen |= SEEN_LITERAL; \
- if (jit->lit + 4 <= jit->end) \
- *(u32 *) jit->lit = val; \
- jit->lit += 4; \
- ret; \
+#define _EMIT6(op1, op2) \
+({ \
+ if (jit->prg_buf) { \
+ *(u32 *) (jit->prg_buf + jit->prg) = op1; \
+ *(u16 *) (jit->prg_buf + jit->prg + 4) = op2; \
+ } \
+ jit->prg += 6; \
})
-#define EMIT_FN_CONST(bit, fn) \
-({ \
- unsigned int ret; \
- ret = (unsigned int) (jit->lit - jit->base_ip); \
- if (jit->seen & bit) { \
- jit->seen |= SEEN_LITERAL; \
- if (jit->lit + 8 <= jit->end) \
- *(void **) jit->lit = fn; \
- jit->lit += 8; \
- } \
- ret; \
+#define _EMIT6_DISP(op1, op2, disp) \
+({ \
+ unsigned int __disp = (disp) & 0xfff; \
+ _EMIT6(op1 | __disp, op2); \
})
-static void bpf_jit_fill_hole(void *area, unsigned int size)
+#define EMIT6_DISP(op1, op2, b1, b2, b3, disp) \
+({ \
+ _EMIT6_DISP(op1 | reg(b1, b2) << 16 | \
+ reg_high(b3) << 8, op2, disp); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
+ REG_SET_SEEN(b3); \
+})
+
+#define _EMIT6_DISP_LH(op1, op2, disp) \
+({ \
+ unsigned int __disp_h = ((u32)disp) & 0xff000; \
+ unsigned int __disp_l = ((u32)disp) & 0x00fff; \
+ _EMIT6(op1 | __disp_l, op2 | __disp_h >> 4); \
+})
+
+#define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp) \
+({ \
+ _EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 | \
+ reg_high(b3) << 8, op2, disp); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
+ REG_SET_SEEN(b3); \
+})
+
+#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \
+({ \
+ /* Branch instruction needs 6 bytes */ \
+ int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
+ _EMIT6(op1 | reg(b1, b2) << 16 | rel, op2 | mask); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
+})
+
+#define _EMIT6_IMM(op, imm) \
+({ \
+ unsigned int __imm = (imm); \
+ _EMIT6(op | (__imm >> 16), __imm & 0xffff); \
+})
+
+#define EMIT6_IMM(op, b1, imm) \
+({ \
+ _EMIT6_IMM(op | reg_high(b1) << 16, imm); \
+ REG_SET_SEEN(b1); \
+})
+
+#define EMIT_CONST_U32(val) \
+({ \
+ unsigned int ret; \
+ ret = jit->lit - jit->base_ip; \
+ jit->seen |= SEEN_LITERAL; \
+ if (jit->prg_buf) \
+ *(u32 *) (jit->prg_buf + jit->lit) = (u32) val; \
+ jit->lit += 4; \
+ ret; \
+})
+
+#define EMIT_CONST_U64(val) \
+({ \
+ unsigned int ret; \
+ ret = jit->lit - jit->base_ip; \
+ jit->seen |= SEEN_LITERAL; \
+ if (jit->prg_buf) \
+ *(u64 *) (jit->prg_buf + jit->lit) = (u64) val; \
+ jit->lit += 8; \
+ ret; \
+})
+
+#define EMIT_ZERO(b1) \
+({ \
+ /* llgfr %dst,%dst (zero extend to 64 bit) */ \
+ EMIT4(0xb9160000, b1, b1); \
+ REG_SET_SEEN(b1); \
+})
+
+/*
+ * Fill whole space with illegal instructions
+ */
+static void jit_fill_hole(void *area, unsigned int size)
{
- /* Fill whole space with illegal instructions */
memset(area, 0, size);
}
-static void bpf_jit_prologue(struct bpf_jit *jit)
+/*
+ * Save registers from "rs" (register start) to "re" (register end) on stack
+ */
+static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
+{
+ u32 off = 72 + (rs - 6) * 8;
+
+ if (rs == re)
+ /* stg %rs,off(%r15) */
+ _EMIT6(0xe300f000 | rs << 20 | off, 0x0024);
+ else
+ /* stmg %rs,%re,off(%r15) */
+ _EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0024, off);
+}
+
+/*
+ * Restore registers from "rs" (register start) to "re" (register end) on stack
+ */
+static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re)
+{
+ u32 off = 72 + (rs - 6) * 8;
+
+ if (jit->seen & SEEN_STACK)
+ off += STK_OFF;
+
+ if (rs == re)
+ /* lg %rs,off(%r15) */
+ _EMIT6(0xe300f000 | rs << 20 | off, 0x0004);
+ else
+ /* lmg %rs,%re,off(%r15) */
+ _EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0004, off);
+}
+
+/*
+ * Return first seen register (from start)
+ */
+static int get_start(struct bpf_jit *jit, int start)
{
- /* Save registers and create stack frame if necessary */
- if (jit->seen & SEEN_DATAREF) {
- /* stmg %r8,%r15,88(%r15) */
- EMIT6(0xeb8ff058, 0x0024);
- /* lgr %r14,%r15 */
- EMIT4(0xb90400ef);
- /* aghi %r15,<offset> */
- EMIT4_IMM(0xa7fb0000, (jit->seen & SEEN_MEM) ? -112 : -80);
- /* stg %r14,152(%r15) */
- EMIT6(0xe3e0f098, 0x0024);
- } else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL))
- /* stmg %r12,%r13,120(%r15) */
- EMIT6(0xebcdf078, 0x0024);
- else if (jit->seen & SEEN_XREG)
- /* stg %r12,120(%r15) */
- EMIT6(0xe3c0f078, 0x0024);
- else if (jit->seen & SEEN_LITERAL)
- /* stg %r13,128(%r15) */
- EMIT6(0xe3d0f080, 0x0024);
+ int i;
+ for (i = start; i <= 15; i++) {
+ if (jit->seen_reg[i])
+ return i;
+ }
+ return 0;
+}
+
+/*
+ * Return last seen register (from start) (gap >= 2)
+ */
+static int get_end(struct bpf_jit *jit, int start)
+{
+ int i;
+
+ for (i = start; i < 15; i++) {
+ if (!jit->seen_reg[i] && !jit->seen_reg[i + 1])
+ return i - 1;
+ }
+ return jit->seen_reg[15] ? 15 : 14;
+}
+
+#define REGS_SAVE 1
+#define REGS_RESTORE 0
+/*
+ * Save and restore clobbered registers (6-15) on stack.
+ * We save/restore registers in chunks with gap >= 2 registers.
+ */
+static void save_restore_regs(struct bpf_jit *jit, int op)
+{
+
+ int re = 6, rs;
+
+ do {
+ rs = get_start(jit, re);
+ if (!rs)
+ break;
+ re = get_end(jit, rs + 1);
+ if (op == REGS_SAVE)
+ save_regs(jit, rs, re);
+ else
+ restore_regs(jit, rs, re);
+ re++;
+ } while (re <= 15);
+}
+
+/*
+ * Emit function prologue
+ *
+ * Save registers and create stack frame if necessary.
+ * See stack frame layout desription in "bpf_jit.h"!
+ */
+static void bpf_jit_prologue(struct bpf_jit *jit)
+{
+ /* Save registers */
+ save_restore_regs(jit, REGS_SAVE);
/* Setup literal pool */
if (jit->seen & SEEN_LITERAL) {
/* basr %r13,0 */
- EMIT2(0x0dd0);
+ EMIT2(0x0d00, REG_L, REG_0);
jit->base_ip = jit->prg;
}
- jit->off_load_word = EMIT_FN_CONST(SEEN_LOAD_WORD, sk_load_word);
- jit->off_load_half = EMIT_FN_CONST(SEEN_LOAD_HALF, sk_load_half);
- jit->off_load_byte = EMIT_FN_CONST(SEEN_LOAD_BYTE, sk_load_byte);
- jit->off_load_bmsh = EMIT_FN_CONST(SEEN_LOAD_BMSH, sk_load_byte_msh);
- jit->off_load_iword = EMIT_FN_CONST(SEEN_LOAD_IWORD, sk_load_word_ind);
- jit->off_load_ihalf = EMIT_FN_CONST(SEEN_LOAD_IHALF, sk_load_half_ind);
- jit->off_load_ibyte = EMIT_FN_CONST(SEEN_LOAD_IBYTE, sk_load_byte_ind);
-
- /* Filter needs to access skb data */
- if (jit->seen & SEEN_DATAREF) {
- /* l %r11,<len>(%r2) */
- EMIT4_DISP(0x58b02000, offsetof(struct sk_buff, len));
- /* s %r11,<data_len>(%r2) */
- EMIT4_DISP(0x5bb02000, offsetof(struct sk_buff, data_len));
- /* lg %r10,<data>(%r2) */
- EMIT6_DISP(0xe3a02000, 0x0004,
- offsetof(struct sk_buff, data));
+ /* Setup stack and backchain */
+ if (jit->seen & SEEN_STACK) {
+ /* lgr %bfp,%r15 (BPF frame pointer) */
+ EMIT4(0xb9040000, BPF_REG_FP, REG_15);
+ /* aghi %r15,-STK_OFF */
+ EMIT4_IMM(0xa70b0000, REG_15, -STK_OFF);
+ if (jit->seen & SEEN_FUNC)
+ /* stg %bfp,152(%r15) (backchain) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_FP, REG_0,
+ REG_15, 152);
}
+ /*
+ * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S"
+ * we store the SKB header length on the stack and the SKB data
+ * pointer in REG_SKB_DATA.
+ */
+ if (jit->seen & SEEN_SKB) {
+ /* Header length: llgf %w1,<len>(%b1) */
+ EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1,
+ offsetof(struct sk_buff, len));
+ /* s %w1,<data_len>(%b1) */
+ EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1,
+ offsetof(struct sk_buff, data_len));
+ /* stg %w1,ST_OFF_HLEN(%r0,%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15,
+ STK_OFF_HLEN);
+ /* lg %skb_data,data_off(%b1) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
+ BPF_REG_1, offsetof(struct sk_buff, data));
+ }
+ /* BPF compatibility: clear A (%b7) and X (%b8) registers */
+ if (REG_SEEN(BPF_REG_7))
+ /* lghi %b7,0 */
+ EMIT4_IMM(0xa7090000, BPF_REG_7, 0);
+ if (REG_SEEN(BPF_REG_8))
+ /* lghi %b8,0 */
+ EMIT4_IMM(0xa7090000, BPF_REG_8, 0);
}
+/*
+ * Function epilogue
+ */
static void bpf_jit_epilogue(struct bpf_jit *jit)
{
/* Return 0 */
if (jit->seen & SEEN_RET0) {
jit->ret0_ip = jit->prg;
- /* lghi %r2,0 */
- EMIT4(0xa7290000);
+ /* lghi %b0,0 */
+ EMIT4_IMM(0xa7090000, BPF_REG_0, 0);
}
jit->exit_ip = jit->prg;
+ /* Load exit code: lgr %r2,%b0 */
+ EMIT4(0xb9040000, REG_2, BPF_REG_0);
/* Restore registers */
- if (jit->seen & SEEN_DATAREF)
- /* lmg %r8,%r15,<offset>(%r15) */
- EMIT6_DISP(0xeb8ff000, 0x0004,
- (jit->seen & SEEN_MEM) ? 200 : 168);
- else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL))
- /* lmg %r12,%r13,120(%r15) */
- EMIT6(0xebcdf078, 0x0004);
- else if (jit->seen & SEEN_XREG)
- /* lg %r12,120(%r15) */
- EMIT6(0xe3c0f078, 0x0004);
- else if (jit->seen & SEEN_LITERAL)
- /* lg %r13,128(%r15) */
- EMIT6(0xe3d0f080, 0x0004);
+ save_restore_regs(jit, REGS_RESTORE);
/* br %r14 */
- EMIT2(0x07fe);
+ _EMIT2(0x07fe);
}
/*
- * make sure we dont leak kernel information to user
+ * Compile one eBPF instruction into s390x code
+ *
+ * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
+ * stack space for the large switch statement.
*/
-static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter)
+static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
{
- /* Clear temporary memory if (seen & SEEN_MEM) */
- if (jit->seen & SEEN_MEM)
- /* xc 0(64,%r15),0(%r15) */
- EMIT6(0xd73ff000, 0xf000);
- /* Clear X if (seen & SEEN_XREG) */
- if (jit->seen & SEEN_XREG)
- /* lhi %r12,0 */
- EMIT4(0xa7c80000);
- /* Clear A if the first register does not set it. */
- switch (filter[0].code) {
- case BPF_LD | BPF_W | BPF_ABS:
- case BPF_LD | BPF_H | BPF_ABS:
- case BPF_LD | BPF_B | BPF_ABS:
- case BPF_LD | BPF_W | BPF_LEN:
- case BPF_LD | BPF_W | BPF_IND:
- case BPF_LD | BPF_H | BPF_IND:
- case BPF_LD | BPF_B | BPF_IND:
- case BPF_LD | BPF_IMM:
- case BPF_LD | BPF_MEM:
- case BPF_MISC | BPF_TXA:
- case BPF_RET | BPF_K:
- /* first instruction sets A register */
- break;
- default: /* A = 0 */
- /* lhi %r5,0 */
- EMIT4(0xa7580000);
- }
-}
+ struct bpf_insn *insn = &fp->insnsi[i];
+ int jmp_off, last, insn_count = 1;
+ unsigned int func_addr, mask;
+ u32 dst_reg = insn->dst_reg;
+ u32 src_reg = insn->src_reg;
+ u32 *addrs = jit->addrs;
+ s32 imm = insn->imm;
+ s16 off = insn->off;
-static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
- unsigned int *addrs, int i, int last)
-{
- unsigned int K;
- int offset;
- unsigned int mask;
- u16 code;
-
- K = filter->k;
- code = bpf_anc_helper(filter);
-
- switch (code) {
- case BPF_ALU | BPF_ADD | BPF_X: /* A += X */
- jit->seen |= SEEN_XREG;
- /* ar %r5,%r12 */
- EMIT2(0x1a5c);
- break;
- case BPF_ALU | BPF_ADD | BPF_K: /* A += K */
- if (!K)
+ switch (insn->code) {
+ /*
+ * BPF_MOV
+ */
+ case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
+ /* llgfr %dst,%src */
+ EMIT4(0xb9160000, dst_reg, src_reg);
+ break;
+ case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+ /* lgr %dst,%src */
+ EMIT4(0xb9040000, dst_reg, src_reg);
+ break;
+ case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
+ /* llilf %dst,imm */
+ EMIT6_IMM(0xc00f0000, dst_reg, imm);
+ break;
+ case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */
+ /* lgfi %dst,imm */
+ EMIT6_IMM(0xc0010000, dst_reg, imm);
+ break;
+ /*
+ * BPF_LD 64
+ */
+ case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+ {
+ /* 16 byte instruction that uses two 'struct bpf_insn' */
+ u64 imm64;
+
+ imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
+ /* lg %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm64));
+ insn_count = 2;
+ break;
+ }
+ /*
+ * BPF_ADD
+ */
+ case BPF_ALU | BPF_ADD | BPF_X: /* dst = (u32) dst + (u32) src */
+ /* ar %dst,%src */
+ EMIT2(0x1a00, dst_reg, src_reg);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_ADD | BPF_X: /* dst = dst + src */
+ /* agr %dst,%src */
+ EMIT4(0xb9080000, dst_reg, src_reg);
+ break;
+ case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */
+ if (!imm)
break;
- if (K <= 16383)
- /* ahi %r5,<K> */
- EMIT4_IMM(0xa75a0000, K);
- else if (test_facility(21))
- /* alfi %r5,<K> */
- EMIT6_IMM(0xc25b0000, K);
- else
- /* a %r5,<d(K)>(%r13) */
- EMIT4_DISP(0x5a50d000, EMIT_CONST(K));
+ /* alfi %dst,imm */
+ EMIT6_IMM(0xc20b0000, dst_reg, imm);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */
+ if (!imm)
+ break;
+ /* agfi %dst,imm */
+ EMIT6_IMM(0xc2080000, dst_reg, imm);
+ break;
+ /*
+ * BPF_SUB
+ */
+ case BPF_ALU | BPF_SUB | BPF_X: /* dst = (u32) dst - (u32) src */
+ /* sr %dst,%src */
+ EMIT2(0x1b00, dst_reg, src_reg);
+ EMIT_ZERO(dst_reg);
break;
- case BPF_ALU | BPF_SUB | BPF_X: /* A -= X */
- jit->seen |= SEEN_XREG;
- /* sr %r5,%r12 */
- EMIT2(0x1b5c);
+ case BPF_ALU64 | BPF_SUB | BPF_X: /* dst = dst - src */
+ /* sgr %dst,%src */
+ EMIT4(0xb9090000, dst_reg, src_reg);
break;
- case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
- if (!K)
+ case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */
+ if (!imm)
break;
- if (K <= 16384)
- /* ahi %r5,-K */
- EMIT4_IMM(0xa75a0000, -K);
- else if (test_facility(21))
- /* alfi %r5,-K */
- EMIT6_IMM(0xc25b0000, -K);
- else
- /* s %r5,<d(K)>(%r13) */
- EMIT4_DISP(0x5b50d000, EMIT_CONST(K));
- break;
- case BPF_ALU | BPF_MUL | BPF_X: /* A *= X */
- jit->seen |= SEEN_XREG;
- /* msr %r5,%r12 */
- EMIT4(0xb252005c);
- break;
- case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
- if (K <= 16383)
- /* mhi %r5,K */
- EMIT4_IMM(0xa75c0000, K);
- else if (test_facility(34))
- /* msfi %r5,<K> */
- EMIT6_IMM(0xc2510000, K);
- else
- /* ms %r5,<d(K)>(%r13) */
- EMIT4_DISP(0x7150d000, EMIT_CONST(K));
+ /* alfi %dst,-imm */
+ EMIT6_IMM(0xc20b0000, dst_reg, -imm);
+ EMIT_ZERO(dst_reg);
break;
- case BPF_ALU | BPF_DIV | BPF_X: /* A /= X */
- jit->seen |= SEEN_XREG | SEEN_RET0;
- /* ltr %r12,%r12 */
- EMIT2(0x12cc);
- /* jz <ret0> */
- EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg));
- /* lhi %r4,0 */
- EMIT4(0xa7480000);
- /* dlr %r4,%r12 */
- EMIT4(0xb997004c);
- break;
- case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
- if (K == 1)
+ case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */
+ if (!imm)
+ break;
+ /* agfi %dst,-imm */
+ EMIT6_IMM(0xc2080000, dst_reg, -imm);
+ break;
+ /*
+ * BPF_MUL
+ */
+ case BPF_ALU | BPF_MUL | BPF_X: /* dst = (u32) dst * (u32) src */
+ /* msr %dst,%src */
+ EMIT4(0xb2520000, dst_reg, src_reg);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_MUL | BPF_X: /* dst = dst * src */
+ /* msgr %dst,%src */
+ EMIT4(0xb90c0000, dst_reg, src_reg);
+ break;
+ case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */
+ if (imm == 1)
+ break;
+ /* msfi %r5,imm */
+ EMIT6_IMM(0xc2010000, dst_reg, imm);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */
+ if (imm == 1)
break;
- /* lhi %r4,0 */
- EMIT4(0xa7480000);
- /* dl %r4,<d(K)>(%r13) */
- EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
- break;
- case BPF_ALU | BPF_MOD | BPF_X: /* A %= X */
- jit->seen |= SEEN_XREG | SEEN_RET0;
- /* ltr %r12,%r12 */
- EMIT2(0x12cc);
+ /* msgfi %dst,imm */
+ EMIT6_IMM(0xc2000000, dst_reg, imm);
+ break;
+ /*
+ * BPF_DIV / BPF_MOD
+ */
+ case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */
+ case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */
+ {
+ int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+ jit->seen |= SEEN_RET0;
+ /* ltr %src,%src (if src == 0 goto fail) */
+ EMIT2(0x1200, src_reg, src_reg);
/* jz <ret0> */
- EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg));
- /* lhi %r4,0 */
- EMIT4(0xa7480000);
- /* dlr %r4,%r12 */
- EMIT4(0xb997004c);
- /* lr %r5,%r4 */
- EMIT2(0x1854);
- break;
- case BPF_ALU | BPF_MOD | BPF_K: /* A %= K */
- if (K == 1) {
- /* lhi %r5,0 */
- EMIT4(0xa7580000);
+ EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
+ /* lhi %w0,0 */
+ EMIT4_IMM(0xa7080000, REG_W0, 0);
+ /* lr %w1,%dst */
+ EMIT2(0x1800, REG_W1, dst_reg);
+ /* dlr %w0,%src */
+ EMIT4(0xb9970000, REG_W0, src_reg);
+ /* llgfr %dst,%rc */
+ EMIT4(0xb9160000, dst_reg, rc_reg);
+ break;
+ }
+ case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
+ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
+ {
+ int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+ jit->seen |= SEEN_RET0;
+ /* ltgr %src,%src (if src == 0 goto fail) */
+ EMIT4(0xb9020000, src_reg, src_reg);
+ /* jz <ret0> */
+ EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
+ /* lghi %w0,0 */
+ EMIT4_IMM(0xa7090000, REG_W0, 0);
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* dlgr %w0,%dst */
+ EMIT4(0xb9870000, REG_W0, src_reg);
+ /* lgr %dst,%rc */
+ EMIT4(0xb9040000, dst_reg, rc_reg);
+ break;
+ }
+ case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */
+ case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */
+ {
+ int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+ if (imm == 1) {
+ if (BPF_OP(insn->code) == BPF_MOD)
+ /* lhgi %dst,0 */
+ EMIT4_IMM(0xa7090000, dst_reg, 0);
break;
}
- /* lhi %r4,0 */
- EMIT4(0xa7480000);
- /* dl %r4,<d(K)>(%r13) */
- EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
- /* lr %r5,%r4 */
- EMIT2(0x1854);
- break;
- case BPF_ALU | BPF_AND | BPF_X: /* A &= X */
- jit->seen |= SEEN_XREG;
- /* nr %r5,%r12 */
- EMIT2(0x145c);
- break;
- case BPF_ALU | BPF_AND | BPF_K: /* A &= K */
- if (test_facility(21))
- /* nilf %r5,<K> */
- EMIT6_IMM(0xc05b0000, K);
- else
- /* n %r5,<d(K)>(%r13) */
- EMIT4_DISP(0x5450d000, EMIT_CONST(K));
- break;
- case BPF_ALU | BPF_OR | BPF_X: /* A |= X */
- jit->seen |= SEEN_XREG;
- /* or %r5,%r12 */
- EMIT2(0x165c);
- break;
- case BPF_ALU | BPF_OR | BPF_K: /* A |= K */
- if (test_facility(21))
- /* oilf %r5,<K> */
- EMIT6_IMM(0xc05d0000, K);
- else
- /* o %r5,<d(K)>(%r13) */
- EMIT4_DISP(0x5650d000, EMIT_CONST(K));
+ /* lhi %w0,0 */
+ EMIT4_IMM(0xa7080000, REG_W0, 0);
+ /* lr %w1,%dst */
+ EMIT2(0x1800, REG_W1, dst_reg);
+ /* dl %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
+ EMIT_CONST_U32(imm));
+ /* llgfr %dst,%rc */
+ EMIT4(0xb9160000, dst_reg, rc_reg);
+ break;
+ }
+ case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
+ case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
+ {
+ int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+ if (imm == 1) {
+ if (BPF_OP(insn->code) == BPF_MOD)
+ /* lhgi %dst,0 */
+ EMIT4_IMM(0xa7090000, dst_reg, 0);
+ break;
+ }
+ /* lghi %w0,0 */
+ EMIT4_IMM(0xa7090000, REG_W0, 0);
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* dlg %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ /* lgr %dst,%rc */
+ EMIT4(0xb9040000, dst_reg, rc_reg);
+ break;
+ }
+ /*
+ * BPF_AND
+ */
+ case BPF_ALU | BPF_AND | BPF_X: /* dst = (u32) dst & (u32) src */
+ /* nr %dst,%src */
+ EMIT2(0x1400, dst_reg, src_reg);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
+ /* ngr %dst,%src */
+ EMIT4(0xb9800000, dst_reg, src_reg);
+ break;
+ case BPF_ALU | BPF_AND | BPF_K: /* dst = (u32) dst & (u32) imm */
+ /* nilf %dst,imm */
+ EMIT6_IMM(0xc00b0000, dst_reg, imm);
+ EMIT_ZERO(dst_reg);
break;
- case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
- case BPF_ALU | BPF_XOR | BPF_X:
- jit->seen |= SEEN_XREG;
- /* xr %r5,%r12 */
- EMIT2(0x175c);
+ case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
+ /* ng %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
break;
- case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
- if (!K)
+ /*
+ * BPF_OR
+ */
+ case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
+ /* or %dst,%src */
+ EMIT2(0x1600, dst_reg, src_reg);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
+ /* ogr %dst,%src */
+ EMIT4(0xb9810000, dst_reg, src_reg);
+ break;
+ case BPF_ALU | BPF_OR | BPF_K: /* dst = (u32) dst | (u32) imm */
+ /* oilf %dst,imm */
+ EMIT6_IMM(0xc00d0000, dst_reg, imm);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
+ /* og %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ break;
+ /*
+ * BPF_XOR
+ */
+ case BPF_ALU | BPF_XOR | BPF_X: /* dst = (u32) dst ^ (u32) src */
+ /* xr %dst,%src */
+ EMIT2(0x1700, dst_reg, src_reg);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_XOR | BPF_X: /* dst = dst ^ src */
+ /* xgr %dst,%src */
+ EMIT4(0xb9820000, dst_reg, src_reg);
+ break;
+ case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */
+ if (!imm)
break;
- /* x %r5,<d(K)>(%r13) */
- EMIT4_DISP(0x5750d000, EMIT_CONST(K));
+ /* xilf %dst,imm */
+ EMIT6_IMM(0xc0070000, dst_reg, imm);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
+ /* xg %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ break;
+ /*
+ * BPF_LSH
+ */
+ case BPF_ALU | BPF_LSH | BPF_X: /* dst = (u32) dst << (u32) src */
+ /* sll %dst,0(%src) */
+ EMIT4_DISP(0x89000000, dst_reg, src_reg, 0);
+ EMIT_ZERO(dst_reg);
break;
- case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
- jit->seen |= SEEN_XREG;
- /* sll %r5,0(%r12) */
- EMIT4(0x8950c000);
+ case BPF_ALU64 | BPF_LSH | BPF_X: /* dst = dst << src */
+ /* sllg %dst,%dst,0(%src) */
+ EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0);
break;
- case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */
- if (K == 0)
+ case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */
+ if (imm == 0)
break;
- /* sll %r5,K */
- EMIT4_DISP(0x89500000, K);
+ /* sll %dst,imm(%r0) */
+ EMIT4_DISP(0x89000000, dst_reg, REG_0, imm);
+ EMIT_ZERO(dst_reg);
break;
- case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
- jit->seen |= SEEN_XREG;
- /* srl %r5,0(%r12) */
- EMIT4(0x8850c000);
+ case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */
+ if (imm == 0)
+ break;
+ /* sllg %dst,%dst,imm(%r0) */
+ EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, REG_0, imm);
+ break;
+ /*
+ * BPF_RSH
+ */
+ case BPF_ALU | BPF_RSH | BPF_X: /* dst = (u32) dst >> (u32) src */
+ /* srl %dst,0(%src) */
+ EMIT4_DISP(0x88000000, dst_reg, src_reg, 0);
+ EMIT_ZERO(dst_reg);
break;
- case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
- if (K == 0)
+ case BPF_ALU64 | BPF_RSH | BPF_X: /* dst = dst >> src */
+ /* srlg %dst,%dst,0(%src) */
+ EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0);
+ break;
+ case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */
+ if (imm == 0)
break;
- /* srl %r5,K */
- EMIT4_DISP(0x88500000, K);
- break;
- case BPF_ALU | BPF_NEG: /* A = -A */
- /* lcr %r5,%r5 */
- EMIT2(0x1355);
- break;
- case BPF_JMP | BPF_JA: /* ip += K */
- offset = addrs[i + K] + jit->start - jit->prg;
- EMIT4_PCREL(0xa7f40000, offset);
- break;
- case BPF_JMP | BPF_JGT | BPF_K: /* ip += (A > K) ? jt : jf */
- mask = 0x200000; /* jh */
- goto kbranch;
- case BPF_JMP | BPF_JGE | BPF_K: /* ip += (A >= K) ? jt : jf */
- mask = 0xa00000; /* jhe */
- goto kbranch;
- case BPF_JMP | BPF_JEQ | BPF_K: /* ip += (A == K) ? jt : jf */
- mask = 0x800000; /* je */
-kbranch: /* Emit compare if the branch targets are different */
- if (filter->jt != filter->jf) {
- if (test_facility(21))
- /* clfi %r5,<K> */
- EMIT6_IMM(0xc25f0000, K);
- else
- /* cl %r5,<d(K)>(%r13) */
- EMIT4_DISP(0x5550d000, EMIT_CONST(K));
- }
-branch: if (filter->jt == filter->jf) {
- if (filter->jt == 0)
- break;
- /* j <jt> */
- offset = addrs[i + filter->jt] + jit->start - jit->prg;
- EMIT4_PCREL(0xa7f40000, offset);
+ /* srl %dst,imm(%r0) */
+ EMIT4_DISP(0x88000000, dst_reg, REG_0, imm);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */
+ if (imm == 0)
break;
- }
- if (filter->jt != 0) {
- /* brc <mask>,<jt> */
- offset = addrs[i + filter->jt] + jit->start - jit->prg;
- EMIT4_PCREL(0xa7040000 | mask, offset);
- }
- if (filter->jf != 0) {
- /* brc <mask^15>,<jf> */
- offset = addrs[i + filter->jf] + jit->start - jit->prg;
- EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset);
- }
+ /* srlg %dst,%dst,imm(%r0) */
+ EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, REG_0, imm);
break;
- case BPF_JMP | BPF_JSET | BPF_K: /* ip += (A & K) ? jt : jf */
- mask = 0x700000; /* jnz */
- /* Emit test if the branch targets are different */
- if (filter->jt != filter->jf) {
- if (K > 65535) {
- /* lr %r4,%r5 */
- EMIT2(0x1845);
- /* n %r4,<d(K)>(%r13) */
- EMIT4_DISP(0x5440d000, EMIT_CONST(K));
- } else
- /* tmll %r5,K */
- EMIT4_IMM(0xa7510000, K);
- }
- goto branch;
- case BPF_JMP | BPF_JGT | BPF_X: /* ip += (A > X) ? jt : jf */
- mask = 0x200000; /* jh */
- goto xbranch;
- case BPF_JMP | BPF_JGE | BPF_X: /* ip += (A >= X) ? jt : jf */
- mask = 0xa00000; /* jhe */
- goto xbranch;
- case BPF_JMP | BPF_JEQ | BPF_X: /* ip += (A == X) ? jt : jf */
- mask = 0x800000; /* je */
-xbranch: /* Emit compare if the branch targets are different */
- if (filter->jt != filter->jf) {
- jit->seen |= SEEN_XREG;
- /* clr %r5,%r12 */
- EMIT2(0x155c);
- }
- goto branch;
- case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */
- mask = 0x700000; /* jnz */
- /* Emit test if the branch targets are different */
- if (filter->jt != filter->jf) {
- jit->seen |= SEEN_XREG;
- /* lr %r4,%r5 */
- EMIT2(0x1845);
- /* nr %r4,%r12 */
- EMIT2(0x144c);
+ /*
+ * BPF_ARSH
+ */
+ case BPF_ALU64 | BPF_ARSH | BPF_X: /* ((s64) dst) >>= src */
+ /* srag %dst,%dst,0(%src) */
+ EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0);
+ break;
+ case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */
+ if (imm == 0)
+ break;
+ /* srag %dst,%dst,imm(%r0) */
+ EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, REG_0, imm);
+ break;
+ /*
+ * BPF_NEG
+ */
+ case BPF_ALU | BPF_NEG: /* dst = (u32) -dst */
+ /* lcr %dst,%dst */
+ EMIT2(0x1300, dst_reg, dst_reg);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_NEG: /* dst = -dst */
+ /* lcgr %dst,%dst */
+ EMIT4(0xb9130000, dst_reg, dst_reg);
+ break;
+ /*
+ * BPF_FROM_BE/LE
+ */
+ case BPF_ALU | BPF_END | BPF_FROM_BE:
+ /* s390 is big endian, therefore only clear high order bytes */
+ switch (imm) {
+ case 16: /* dst = (u16) cpu_to_be16(dst) */
+ /* llghr %dst,%dst */
+ EMIT4(0xb9850000, dst_reg, dst_reg);
+ break;
+ case 32: /* dst = (u32) cpu_to_be32(dst) */
+ /* llgfr %dst,%dst */
+ EMIT4(0xb9160000, dst_reg, dst_reg);
+ break;
+ case 64: /* dst = (u64) cpu_to_be64(dst) */
+ break;
}
- goto branch;
- case BPF_LD | BPF_W | BPF_ABS: /* A = *(u32 *) (skb->data+K) */
- jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD;
- offset = jit->off_load_word;
- goto load_abs;
- case BPF_LD | BPF_H | BPF_ABS: /* A = *(u16 *) (skb->data+K) */
- jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF;
- offset = jit->off_load_half;
- goto load_abs;
- case BPF_LD | BPF_B | BPF_ABS: /* A = *(u8 *) (skb->data+K) */
- jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE;
- offset = jit->off_load_byte;
-load_abs: if ((int) K < 0)
- goto out;
-call_fn: /* lg %r1,<d(function)>(%r13) */
- EMIT6_DISP(0xe310d000, 0x0004, offset);
- /* l %r3,<d(K)>(%r13) */
- EMIT4_DISP(0x5830d000, EMIT_CONST(K));
- /* basr %r8,%r1 */
- EMIT2(0x0d81);
- /* jnz <ret0> */
- EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg));
break;
- case BPF_LD | BPF_W | BPF_IND: /* A = *(u32 *) (skb->data+K+X) */
- jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD;
- offset = jit->off_load_iword;
- goto call_fn;
- case BPF_LD | BPF_H | BPF_IND: /* A = *(u16 *) (skb->data+K+X) */
- jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF;
- offset = jit->off_load_ihalf;
- goto call_fn;
- case BPF_LD | BPF_B | BPF_IND: /* A = *(u8 *) (skb->data+K+X) */
- jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE;
- offset = jit->off_load_ibyte;
- goto call_fn;
- case BPF_LDX | BPF_B | BPF_MSH:
- /* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */
- jit->seen |= SEEN_RET0;
- if ((int) K < 0) {
- /* j <ret0> */
- EMIT4_PCREL(0xa7f40000, (jit->ret0_ip - jit->prg));
+ case BPF_ALU | BPF_END | BPF_FROM_LE:
+ switch (imm) {
+ case 16: /* dst = (u16) cpu_to_le16(dst) */
+ /* lrvr %dst,%dst */
+ EMIT4(0xb91f0000, dst_reg, dst_reg);
+ /* srl %dst,16(%r0) */
+ EMIT4_DISP(0x88000000, dst_reg, REG_0, 16);
+ /* llghr %dst,%dst */
+ EMIT4(0xb9850000, dst_reg, dst_reg);
+ break;
+ case 32: /* dst = (u32) cpu_to_le32(dst) */
+ /* lrvr %dst,%dst */
+ EMIT4(0xb91f0000, dst_reg, dst_reg);
+ /* llgfr %dst,%dst */
+ EMIT4(0xb9160000, dst_reg, dst_reg);
+ break;
+ case 64: /* dst = (u64) cpu_to_le64(dst) */
+ /* lrvgr %dst,%dst */
+ EMIT4(0xb90f0000, dst_reg, dst_reg);
break;
}
- jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH;
- offset = jit->off_load_bmsh;
- goto call_fn;
- case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
- /* l %r5,<d(len)>(%r2) */
- EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len));
- break;
- case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
- jit->seen |= SEEN_XREG;
- /* l %r12,<d(len)>(%r2) */
- EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len));
- break;
- case BPF_LD | BPF_IMM: /* A = K */
- if (K <= 16383)
- /* lhi %r5,K */
- EMIT4_IMM(0xa7580000, K);
- else if (test_facility(21))
- /* llilf %r5,<K> */
- EMIT6_IMM(0xc05f0000, K);
- else
- /* l %r5,<d(K)>(%r13) */
- EMIT4_DISP(0x5850d000, EMIT_CONST(K));
- break;
- case BPF_LDX | BPF_IMM: /* X = K */
- jit->seen |= SEEN_XREG;
- if (K <= 16383)
- /* lhi %r12,<K> */
- EMIT4_IMM(0xa7c80000, K);
- else if (test_facility(21))
- /* llilf %r12,<K> */
- EMIT6_IMM(0xc0cf0000, K);
- else
- /* l %r12,<d(K)>(%r13) */
- EMIT4_DISP(0x58c0d000, EMIT_CONST(K));
break;
- case BPF_LD | BPF_MEM: /* A = mem[K] */
+ /*
+ * BPF_ST(X)
+ */
+ case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */
+ /* stcy %src,off(%dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
+ /* sthy %src,off(%dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
+ /* sty %src,off(%dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
+ /* stg %src,off(%dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+ /* lhi %w0,imm */
+ EMIT4_IMM(0xa7080000, REG_W0, (u8) imm);
+ /* stcy %w0,off(dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+ /* lhi %w0,imm */
+ EMIT4_IMM(0xa7080000, REG_W0, (u16) imm);
+ /* sthy %w0,off(dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg, REG_0, off);
jit->seen |= SEEN_MEM;
- /* l %r5,<K>(%r15) */
- EMIT4_DISP(0x5850f000,
- (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
- break;
- case BPF_LDX | BPF_MEM: /* X = mem[K] */
- jit->seen |= SEEN_XREG | SEEN_MEM;
- /* l %r12,<K>(%r15) */
- EMIT4_DISP(0x58c0f000,
- (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
- break;
- case BPF_MISC | BPF_TAX: /* X = A */
- jit->seen |= SEEN_XREG;
- /* lr %r12,%r5 */
- EMIT2(0x18c5);
- break;
- case BPF_MISC | BPF_TXA: /* A = X */
- jit->seen |= SEEN_XREG;
- /* lr %r5,%r12 */
- EMIT2(0x185c);
- break;
- case BPF_RET | BPF_K:
- if (K == 0) {
- jit->seen |= SEEN_RET0;
- if (last)
- break;
- /* j <ret0> */
- EMIT4_PCREL(0xa7f40000, jit->ret0_ip - jit->prg);
- } else {
- if (K <= 16383)
- /* lghi %r2,K */
- EMIT4_IMM(0xa7290000, K);
- else
- /* llgf %r2,<K>(%r13) */
- EMIT6_DISP(0xe320d000, 0x0016, EMIT_CONST(K));
- /* j <exit> */
- if (last && !(jit->seen & SEEN_RET0))
- break;
- EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
- }
break;
- case BPF_RET | BPF_A:
- /* llgfr %r2,%r5 */
- EMIT4(0xb9160025);
+ case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+ /* llilf %w0,imm */
+ EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm);
+ /* sty %w0,off(%dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+ /* lgfi %w0,imm */
+ EMIT6_IMM(0xc0010000, REG_W0, imm);
+ /* stg %w0,off(%dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ /*
+ * BPF_STX XADD (atomic_add)
+ */
+ case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */
+ /* laal %w0,%src,off(%dst) */
+ EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W0, src_reg,
+ dst_reg, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */
+ /* laalg %w0,%src,off(%dst) */
+ EMIT6_DISP_LH(0xeb000000, 0x00ea, REG_W0, src_reg,
+ dst_reg, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ /*
+ * BPF_LDX
+ */
+ case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+ /* llgc %dst,0(off,%src) */
+ EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+ /* llgh %dst,0(off,%src) */
+ EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+ /* llgf %dst,off(%src) */
+ jit->seen |= SEEN_MEM;
+ EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
+ break;
+ case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+ /* lg %dst,0(off,%src) */
+ jit->seen |= SEEN_MEM;
+ EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
+ break;
+ /*
+ * BPF_JMP / CALL
+ */
+ case BPF_JMP | BPF_CALL:
+ {
+ /*
+ * b0 = (__bpf_call_base + imm)(b1, b2, b3, b4, b5)
+ */
+ const u64 func = (u64)__bpf_call_base + imm;
+
+ REG_SET_SEEN(BPF_REG_5);
+ jit->seen |= SEEN_FUNC;
+ /* lg %w1,<d(imm)>(%l) */
+ EMIT6_DISP(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
+ EMIT_CONST_U64(func));
+ /* basr %r14,%w1 */
+ EMIT2(0x0d00, REG_14, REG_W1);
+ /* lgr %b0,%r2: load return value into %b0 */
+ EMIT4(0xb9040000, BPF_REG_0, REG_2);
+ break;
+ }
+ case BPF_JMP | BPF_EXIT: /* return b0 */
+ last = (i == fp->len - 1) ? 1 : 0;
+ if (last && !(jit->seen & SEEN_RET0))
+ break;
/* j <exit> */
EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
break;
- case BPF_ST: /* mem[K] = A */
- jit->seen |= SEEN_MEM;
- /* st %r5,<K>(%r15) */
- EMIT4_DISP(0x5050f000,
- (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
- break;
- case BPF_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
- jit->seen |= SEEN_XREG | SEEN_MEM;
- /* st %r12,<K>(%r15) */
- EMIT4_DISP(0x50c0f000,
- (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
- break;
- case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
- /* lhi %r5,0 */
- EMIT4(0xa7580000);
- /* icm %r5,3,<d(protocol)>(%r2) */
- EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol));
- break;
- case BPF_ANC | SKF_AD_IFINDEX: /* if (!skb->dev) return 0;
- * A = skb->dev->ifindex */
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
- jit->seen |= SEEN_RET0;
- /* lg %r1,<d(dev)>(%r2) */
- EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev));
- /* ltgr %r1,%r1 */
- EMIT4(0xb9020011);
- /* jz <ret0> */
- EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
- /* l %r5,<d(ifindex)>(%r1) */
- EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex));
- break;
- case BPF_ANC | SKF_AD_MARK: /* A = skb->mark */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
- /* l %r5,<d(mark)>(%r2) */
- EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark));
- break;
- case BPF_ANC | SKF_AD_QUEUE: /* A = skb->queue_mapping */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
- /* lhi %r5,0 */
- EMIT4(0xa7580000);
- /* icm %r5,3,<d(queue_mapping)>(%r2) */
- EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping));
- break;
- case BPF_ANC | SKF_AD_HATYPE: /* if (!skb->dev) return 0;
- * A = skb->dev->type */
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
- jit->seen |= SEEN_RET0;
- /* lg %r1,<d(dev)>(%r2) */
- EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev));
- /* ltgr %r1,%r1 */
- EMIT4(0xb9020011);
- /* jz <ret0> */
- EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
- /* lhi %r5,0 */
- EMIT4(0xa7580000);
- /* icm %r5,3,<d(type)>(%r1) */
- EMIT4_DISP(0xbf531000, offsetof(struct net_device, type));
- break;
- case BPF_ANC | SKF_AD_RXHASH: /* A = skb->hash */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
- /* l %r5,<d(hash)>(%r2) */
- EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash));
- break;
- case BPF_ANC | SKF_AD_VLAN_TAG:
- case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
- BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
- /* lhi %r5,0 */
- EMIT4(0xa7580000);
- /* icm %r5,3,<d(vlan_tci)>(%r2) */
- EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, vlan_tci));
- if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
- /* nill %r5,0xefff */
- EMIT4_IMM(0xa5570000, ~VLAN_TAG_PRESENT);
- } else {
- /* nill %r5,0x1000 */
- EMIT4_IMM(0xa5570000, VLAN_TAG_PRESENT);
- /* srl %r5,12 */
- EMIT4_DISP(0x88500000, 12);
- }
+ /*
+ * Branch relative (number of skipped instructions) to offset on
+ * condition.
+ *
+ * Condition code to mask mapping:
+ *
+ * CC | Description | Mask
+ * ------------------------------
+ * 0 | Operands equal | 8
+ * 1 | First operand low | 4
+ * 2 | First operand high | 2
+ * 3 | Unused | 1
+ *
+ * For s390x relative branches: ip = ip + off_bytes
+ * For BPF relative branches: insn = insn + off_insns + 1
+ *
+ * For example for s390x with offset 0 we jump to the branch
+ * instruction itself (loop) and for BPF with offset 0 we
+ * branch to the instruction behind the branch.
+ */
+ case BPF_JMP | BPF_JA: /* if (true) */
+ mask = 0xf000; /* j */
+ goto branch_oc;
+ case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
+ mask = 0x2000; /* jh */
+ goto branch_ks;
+ case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
+ mask = 0xa000; /* jhe */
+ goto branch_ks;
+ case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
+ mask = 0x2000; /* jh */
+ goto branch_ku;
+ case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
+ mask = 0xa000; /* jhe */
+ goto branch_ku;
+ case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
+ mask = 0x7000; /* jne */
+ goto branch_ku;
+ case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
+ mask = 0x8000; /* je */
+ goto branch_ku;
+ case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
+ mask = 0x7000; /* jnz */
+ /* lgfi %w1,imm (load sign extend imm) */
+ EMIT6_IMM(0xc0010000, REG_W1, imm);
+ /* ngr %w1,%dst */
+ EMIT4(0xb9800000, REG_W1, dst_reg);
+ goto branch_oc;
+
+ case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
+ mask = 0x2000; /* jh */
+ goto branch_xs;
+ case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
+ mask = 0xa000; /* jhe */
+ goto branch_xs;
+ case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
+ mask = 0x2000; /* jh */
+ goto branch_xu;
+ case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
+ mask = 0xa000; /* jhe */
+ goto branch_xu;
+ case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
+ mask = 0x7000; /* jne */
+ goto branch_xu;
+ case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
+ mask = 0x8000; /* je */
+ goto branch_xu;
+ case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
+ mask = 0x7000; /* jnz */
+ /* ngrk %w1,%dst,%src */
+ EMIT4_RRF(0xb9e40000, REG_W1, dst_reg, src_reg);
+ goto branch_oc;
+branch_ks:
+ /* lgfi %w1,imm (load sign extend imm) */
+ EMIT6_IMM(0xc0010000, REG_W1, imm);
+ /* cgrj %dst,%w1,mask,off */
+ EMIT6_PCREL(0xec000000, 0x0064, dst_reg, REG_W1, i, off, mask);
+ break;
+branch_ku:
+ /* lgfi %w1,imm (load sign extend imm) */
+ EMIT6_IMM(0xc0010000, REG_W1, imm);
+ /* clgrj %dst,%w1,mask,off */
+ EMIT6_PCREL(0xec000000, 0x0065, dst_reg, REG_W1, i, off, mask);
+ break;
+branch_xs:
+ /* cgrj %dst,%src,mask,off */
+ EMIT6_PCREL(0xec000000, 0x0064, dst_reg, src_reg, i, off, mask);
+ break;
+branch_xu:
+ /* clgrj %dst,%src,mask,off */
+ EMIT6_PCREL(0xec000000, 0x0065, dst_reg, src_reg, i, off, mask);
+ break;
+branch_oc:
+ /* brc mask,jmp_off (branch instruction needs 4 bytes) */
+ jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
+ EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
break;
- case BPF_ANC | SKF_AD_PKTTYPE:
- /* lhi %r5,0 */
- EMIT4(0xa7580000);
- /* ic %r5,<d(pkt_type_offset)>(%r2) */
- EMIT4_DISP(0x43502000, PKT_TYPE_OFFSET());
- /* srl %r5,5 */
- EMIT4_DISP(0x88500000, 5);
- break;
- case BPF_ANC | SKF_AD_CPU: /* A = smp_processor_id() */
-#ifdef CONFIG_SMP
- /* l %r5,<d(cpu_nr)> */
- EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr));
-#else
- /* lhi %r5,0 */
- EMIT4(0xa7580000);
-#endif
+ /*
+ * BPF_LD
+ */
+ case BPF_LD | BPF_ABS | BPF_B: /* b0 = *(u8 *) (skb->data+imm) */
+ case BPF_LD | BPF_IND | BPF_B: /* b0 = *(u8 *) (skb->data+imm+src) */
+ if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
+ func_addr = __pa(sk_load_byte_pos);
+ else
+ func_addr = __pa(sk_load_byte);
+ goto call_fn;
+ case BPF_LD | BPF_ABS | BPF_H: /* b0 = *(u16 *) (skb->data+imm) */
+ case BPF_LD | BPF_IND | BPF_H: /* b0 = *(u16 *) (skb->data+imm+src) */
+ if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
+ func_addr = __pa(sk_load_half_pos);
+ else
+ func_addr = __pa(sk_load_half);
+ goto call_fn;
+ case BPF_LD | BPF_ABS | BPF_W: /* b0 = *(u32 *) (skb->data+imm) */
+ case BPF_LD | BPF_IND | BPF_W: /* b0 = *(u32 *) (skb->data+imm+src) */
+ if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
+ func_addr = __pa(sk_load_word_pos);
+ else
+ func_addr = __pa(sk_load_word);
+ goto call_fn;
+call_fn:
+ jit->seen |= SEEN_SKB | SEEN_RET0 | SEEN_FUNC;
+ REG_SET_SEEN(REG_14); /* Return address of possible func call */
+
+ /*
+ * Implicit input:
+ * BPF_REG_6 (R7) : skb pointer
+ * REG_SKB_DATA (R12): skb data pointer
+ *
+ * Calculated input:
+ * BPF_REG_2 (R3) : offset of byte(s) to fetch in skb
+ * BPF_REG_5 (R6) : return address
+ *
+ * Output:
+ * BPF_REG_0 (R14): data read from skb
+ *
+ * Scratch registers (BPF_REG_1-5)
+ */
+
+ /* Call function: llilf %w1,func_addr */
+ EMIT6_IMM(0xc00f0000, REG_W1, func_addr);
+
+ /* Offset: lgfi %b2,imm */
+ EMIT6_IMM(0xc0010000, BPF_REG_2, imm);
+ if (BPF_MODE(insn->code) == BPF_IND)
+ /* agfr %b2,%src (%src is s32 here) */
+ EMIT4(0xb9180000, BPF_REG_2, src_reg);
+
+ /* basr %b5,%w1 (%b5 is call saved) */
+ EMIT2(0x0d00, BPF_REG_5, REG_W1);
+
+ /*
+ * Note: For fast access we jump directly after the
+ * jnz instruction from bpf_jit.S
+ */
+ /* jnz <ret0> */
+ EMIT4_PCREL(0xa7740000, jit->ret0_ip - jit->prg);
break;
default: /* too complex, give up */
- goto out;
+ pr_err("Unknown opcode %02x\n", insn->code);
+ return -1;
+ }
+ return insn_count;
+}
+
+/*
+ * Compile eBPF program into s390x code
+ */
+static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
+{
+ int i, insn_count;
+
+ jit->lit = jit->lit_start;
+ jit->prg = 0;
+
+ bpf_jit_prologue(jit);
+ for (i = 0; i < fp->len; i += insn_count) {
+ insn_count = bpf_jit_insn(jit, fp, i);
+ if (insn_count < 0)
+ return -1;
+ jit->addrs[i + 1] = jit->prg; /* Next instruction address */
}
- addrs[i] = jit->prg - jit->start;
+ bpf_jit_epilogue(jit);
+
+ jit->lit_start = jit->prg;
+ jit->size = jit->lit;
+ jit->size_prg = jit->prg;
return 0;
-out:
- return -1;
}
+/*
+ * Classic BPF function stub. BPF programs will be converted into
+ * eBPF and then bpf_int_jit_compile() will be called.
+ */
void bpf_jit_compile(struct bpf_prog *fp)
{
- struct bpf_binary_header *header = NULL;
- unsigned long size, prg_len, lit_len;
- struct bpf_jit jit, cjit;
- unsigned int *addrs;
- int pass, i;
+}
+
+/*
+ * Compile eBPF program "fp"
+ */
+void bpf_int_jit_compile(struct bpf_prog *fp)
+{
+ struct bpf_binary_header *header;
+ struct bpf_jit jit;
+ int pass;
if (!bpf_jit_enable)
return;
- addrs = kcalloc(fp->len, sizeof(*addrs), GFP_KERNEL);
- if (addrs == NULL)
+ memset(&jit, 0, sizeof(jit));
+ jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
+ if (jit.addrs == NULL)
return;
- memset(&jit, 0, sizeof(cjit));
- memset(&cjit, 0, sizeof(cjit));
-
- for (pass = 0; pass < 10; pass++) {
- jit.prg = jit.start;
- jit.lit = jit.mid;
-
- bpf_jit_prologue(&jit);
- bpf_jit_noleaks(&jit, fp->insns);
- for (i = 0; i < fp->len; i++) {
- if (bpf_jit_insn(&jit, fp->insns + i, addrs, i,
- i == fp->len - 1))
- goto out;
- }
- bpf_jit_epilogue(&jit);
- if (jit.start) {
- WARN_ON(jit.prg > cjit.prg || jit.lit > cjit.lit);
- if (memcmp(&jit, &cjit, sizeof(jit)) == 0)
- break;
- } else if (jit.prg == cjit.prg && jit.lit == cjit.lit) {
- prg_len = jit.prg - jit.start;
- lit_len = jit.lit - jit.mid;
- size = prg_len + lit_len;
- if (size >= BPF_SIZE_MAX)
- goto out;
- header = bpf_jit_binary_alloc(size, &jit.start,
- 2, bpf_jit_fill_hole);
- if (!header)
- goto out;
- jit.prg = jit.mid = jit.start + prg_len;
- jit.lit = jit.end = jit.start + prg_len + lit_len;
- jit.base_ip += (unsigned long) jit.start;
- jit.exit_ip += (unsigned long) jit.start;
- jit.ret0_ip += (unsigned long) jit.start;
- }
- cjit = jit;
+ /*
+ * Three initial passes:
+ * - 1/2: Determine clobbered registers
+ * - 3: Calculate program size and addrs arrray
+ */
+ for (pass = 1; pass <= 3; pass++) {
+ if (bpf_jit_prog(&jit, fp))
+ goto free_addrs;
}
+ /*
+ * Final pass: Allocate and generate program
+ */
+ if (jit.size >= BPF_SIZE_MAX)
+ goto free_addrs;
+ header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
+ if (!header)
+ goto free_addrs;
+ if (bpf_jit_prog(&jit, fp))
+ goto free_addrs;
if (bpf_jit_enable > 1) {
- bpf_jit_dump(fp->len, jit.end - jit.start, pass, jit.start);
- if (jit.start)
- print_fn_code(jit.start, jit.mid - jit.start);
+ bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
+ if (jit.prg_buf)
+ print_fn_code(jit.prg_buf, jit.size_prg);
}
- if (jit.start) {
+ if (jit.prg_buf) {
set_memory_ro((unsigned long)header, header->pages);
- fp->bpf_func = (void *) jit.start;
+ fp->bpf_func = (void *) jit.prg_buf;
fp->jited = true;
}
-out:
- kfree(addrs);
+free_addrs:
+ kfree(jit.addrs);
}
+/*
+ * Free eBPF program
+ */
void bpf_jit_free(struct bpf_prog *fp)
{
unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 98336200c7b2..598f023cf8a6 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -190,6 +190,11 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
return -ENOMEM;
WARN_ON((u64) zdev->fmb & 0xf);
+ /* reset software counters */
+ atomic64_set(&zdev->allocated_pages, 0);
+ atomic64_set(&zdev->mapped_pages, 0);
+ atomic64_set(&zdev->unmapped_pages, 0);
+
args.fmb_addr = virt_to_phys(zdev->fmb);
return mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args);
}
@@ -822,6 +827,7 @@ int zpci_create_device(struct zpci_dev *zdev)
if (rc)
goto out;
+ mutex_init(&zdev->lock);
if (zdev->state == ZPCI_FN_STATE_CONFIGURED) {
rc = zpci_enable_device(zdev);
if (rc)
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index c22d4402ae45..4129b0a5fd78 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -31,12 +31,25 @@ static char *pci_perf_names[] = {
"Refresh operations",
"DMA read bytes",
"DMA write bytes",
- /* software counters */
+};
+
+static char *pci_sw_names[] = {
"Allocated pages",
"Mapped pages",
"Unmapped pages",
};
+static void pci_sw_counter_show(struct seq_file *m)
+{
+ struct zpci_dev *zdev = m->private;
+ atomic64_t *counter = &zdev->allocated_pages;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
+ seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
+ atomic64_read(counter));
+}
+
static int pci_perf_show(struct seq_file *m, void *v)
{
struct zpci_dev *zdev = m->private;
@@ -45,7 +58,10 @@ static int pci_perf_show(struct seq_file *m, void *v)
if (!zdev)
return 0;
+
+ mutex_lock(&zdev->lock);
if (!zdev->fmb) {
+ mutex_unlock(&zdev->lock);
seq_puts(m, "FMB statistics disabled\n");
return 0;
}
@@ -65,12 +81,9 @@ static int pci_perf_show(struct seq_file *m, void *v)
for (i = 4; i < 6; i++)
seq_printf(m, "%26s:\t%llu\n",
pci_perf_names[i], *(stat + i));
- /* software counters */
- for (i = 6; i < ARRAY_SIZE(pci_perf_names); i++)
- seq_printf(m, "%26s:\t%llu\n",
- pci_perf_names[i],
- atomic64_read((atomic64_t *) (stat + i)));
+ pci_sw_counter_show(m);
+ mutex_unlock(&zdev->lock);
return 0;
}
@@ -88,19 +101,17 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
if (rc)
return rc;
+ mutex_lock(&zdev->lock);
switch (val) {
case 0:
rc = zpci_fmb_disable_device(zdev);
- if (rc)
- return rc;
break;
case 1:
rc = zpci_fmb_enable_device(zdev);
- if (rc)
- return rc;
break;
}
- return count;
+ mutex_unlock(&zdev->lock);
+ return rc ? rc : count;
}
static int pci_perf_seq_open(struct inode *inode, struct file *filp)
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 4cbb29a4d615..6fd8d5836138 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -300,7 +300,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
flags |= ZPCI_TABLE_PROTECTED;
if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
- atomic64_add(nr_pages, &zdev->fmb->mapped_pages);
+ atomic64_add(nr_pages, &zdev->mapped_pages);
return dma_addr + (offset & ~PAGE_MASK);
}
@@ -328,7 +328,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
zpci_err_hex(&dma_addr, sizeof(dma_addr));
}
- atomic64_add(npages, &zdev->fmb->unmapped_pages);
+ atomic64_add(npages, &zdev->unmapped_pages);
iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
dma_free_iommu(zdev, iommu_page_index, npages);
}
@@ -357,7 +357,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
return NULL;
}
- atomic64_add(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
+ atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
if (dma_handle)
*dma_handle = map;
return (void *) pa;
@@ -370,7 +370,7 @@ static void s390_dma_free(struct device *dev, size_t size,
struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
size = PAGE_ALIGN(size);
- atomic64_sub(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
+ atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
free_pages((unsigned long) pa, get_order(size));
}