diff options
Diffstat (limited to 'samples')
57 files changed, 6166 insertions, 10 deletions
diff --git a/samples/Kconfig b/samples/Kconfig index 85c405fcccb0..a6d2a43bbf2e 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -99,4 +99,10 @@ config SAMPLE_SECCOMP Build samples of seccomp filters using various methods of BPF filter construction. +config SAMPLE_BLACKFIN_GPTIMERS + tristate "Build blackfin gptimers sample code -- loadable modules only" + depends on BLACKFIN && BFIN_GPTIMERS && m + help + Build samples of blackfin gptimers sample module. + endif # SAMPLES diff --git a/samples/Makefile b/samples/Makefile index 1a20169d85ac..e17d66d77f09 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -2,4 +2,4 @@ obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \ hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \ - configfs/ connector/ v4l/ trace_printk/ + configfs/ connector/ v4l/ trace_printk/ blackfin/ diff --git a/samples/auxdisplay/.gitignore b/samples/auxdisplay/.gitignore new file mode 100644 index 000000000000..7af222860a96 --- /dev/null +++ b/samples/auxdisplay/.gitignore @@ -0,0 +1 @@ +cfag12864b-example diff --git a/samples/auxdisplay/Makefile b/samples/auxdisplay/Makefile new file mode 100644 index 000000000000..05e471feb6e5 --- /dev/null +++ b/samples/auxdisplay/Makefile @@ -0,0 +1,9 @@ +CC := $(CROSS_COMPILE)gcc +CFLAGS := -I../../usr/include + +PROGS := cfag12864b-example + +all: $(PROGS) + +clean: + rm -fr $(PROGS) diff --git a/samples/auxdisplay/cfag12864b-example.c b/samples/auxdisplay/cfag12864b-example.c new file mode 100644 index 000000000000..e7823ffb1ca0 --- /dev/null +++ b/samples/auxdisplay/cfag12864b-example.c @@ -0,0 +1,281 @@ +/* + * Filename: cfag12864b-example.c + * Version: 0.1.0 + * Description: cfag12864b LCD userspace example program + * License: GPLv2 + * + * Author: Copyright (C) Miguel Ojeda Sandonis + * Date: 2006-10-31 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* + * ------------------------ + * start of cfag12864b code + * ------------------------ + */ + +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> + +#define CFAG12864B_WIDTH (128) +#define CFAG12864B_HEIGHT (64) +#define CFAG12864B_SIZE (128 * 64 / 8) +#define CFAG12864B_BPB (8) +#define CFAG12864B_ADDRESS(x, y) ((y) * CFAG12864B_WIDTH / \ + CFAG12864B_BPB + (x) / CFAG12864B_BPB) +#define CFAG12864B_BIT(n) (((unsigned char) 1) << (n)) + +#undef CFAG12864B_DOCHECK +#ifdef CFAG12864B_DOCHECK + #define CFAG12864B_CHECK(x, y) ((x) < CFAG12864B_WIDTH && \ + (y) < CFAG12864B_HEIGHT) +#else + #define CFAG12864B_CHECK(x, y) (1) +#endif + +int cfag12864b_fd; +unsigned char * cfag12864b_mem; +unsigned char cfag12864b_buffer[CFAG12864B_SIZE]; + +/* + * init a cfag12864b framebuffer device + * + * No error: return = 0 + * Unable to open: return = -1 + * Unable to mmap: return = -2 + */ +static int cfag12864b_init(char *path) +{ + cfag12864b_fd = open(path, O_RDWR); + if (cfag12864b_fd == -1) + return -1; + + cfag12864b_mem = mmap(0, CFAG12864B_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, cfag12864b_fd, 0); + if (cfag12864b_mem == MAP_FAILED) { + close(cfag12864b_fd); + return -2; + } + + return 0; +} + +/* + * exit a cfag12864b framebuffer device + */ +static void cfag12864b_exit(void) +{ + munmap(cfag12864b_mem, CFAG12864B_SIZE); + close(cfag12864b_fd); +} + +/* + * set (x, y) pixel + */ +static void cfag12864b_set(unsigned char x, unsigned char y) +{ + if (CFAG12864B_CHECK(x, y)) + cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] |= + CFAG12864B_BIT(x % CFAG12864B_BPB); +} + +/* + * unset (x, y) pixel + */ +static void cfag12864b_unset(unsigned char x, unsigned char y) +{ + if (CFAG12864B_CHECK(x, y)) + cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] &= + ~CFAG12864B_BIT(x % CFAG12864B_BPB); +} + +/* + * is set (x, y) pixel? + * + * Pixel off: return = 0 + * Pixel on: return = 1 + */ +static unsigned char cfag12864b_isset(unsigned char x, unsigned char y) +{ + if (CFAG12864B_CHECK(x, y)) + if (cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] & + CFAG12864B_BIT(x % CFAG12864B_BPB)) + return 1; + + return 0; +} + +/* + * not (x, y) pixel + */ +static void cfag12864b_not(unsigned char x, unsigned char y) +{ + if (cfag12864b_isset(x, y)) + cfag12864b_unset(x, y); + else + cfag12864b_set(x, y); +} + +/* + * fill (set all pixels) + */ +static void cfag12864b_fill(void) +{ + unsigned short i; + + for (i = 0; i < CFAG12864B_SIZE; i++) + cfag12864b_buffer[i] = 0xFF; +} + +/* + * clear (unset all pixels) + */ +static void cfag12864b_clear(void) +{ + unsigned short i; + + for (i = 0; i < CFAG12864B_SIZE; i++) + cfag12864b_buffer[i] = 0; +} + +/* + * format a [128*64] matrix + * + * Pixel off: src[i] = 0 + * Pixel on: src[i] > 0 + */ +static void cfag12864b_format(unsigned char * matrix) +{ + unsigned char i, j, n; + + for (i = 0; i < CFAG12864B_HEIGHT; i++) + for (j = 0; j < CFAG12864B_WIDTH / CFAG12864B_BPB; j++) { + cfag12864b_buffer[i * CFAG12864B_WIDTH / CFAG12864B_BPB + + j] = 0; + for (n = 0; n < CFAG12864B_BPB; n++) + if (matrix[i * CFAG12864B_WIDTH + + j * CFAG12864B_BPB + n]) + cfag12864b_buffer[i * CFAG12864B_WIDTH / + CFAG12864B_BPB + j] |= + CFAG12864B_BIT(n); + } +} + +/* + * blit buffer to lcd + */ +static void cfag12864b_blit(void) +{ + memcpy(cfag12864b_mem, cfag12864b_buffer, CFAG12864B_SIZE); +} + +/* + * ---------------------- + * end of cfag12864b code + * ---------------------- + */ + +#include <stdio.h> + +#define EXAMPLES 6 + +static void example(unsigned char n) +{ + unsigned short i, j; + unsigned char matrix[CFAG12864B_WIDTH * CFAG12864B_HEIGHT]; + + if (n > EXAMPLES) + return; + + printf("Example %i/%i - ", n, EXAMPLES); + + switch (n) { + case 1: + printf("Draw points setting bits"); + cfag12864b_clear(); + for (i = 0; i < CFAG12864B_WIDTH; i += 2) + for (j = 0; j < CFAG12864B_HEIGHT; j += 2) + cfag12864b_set(i, j); + break; + + case 2: + printf("Clear the LCD"); + cfag12864b_clear(); + break; + + case 3: + printf("Draw rows formatting a [128*64] matrix"); + memset(matrix, 0, CFAG12864B_WIDTH * CFAG12864B_HEIGHT); + for (i = 0; i < CFAG12864B_WIDTH; i++) + for (j = 0; j < CFAG12864B_HEIGHT; j += 2) + matrix[j * CFAG12864B_WIDTH + i] = 1; + cfag12864b_format(matrix); + break; + + case 4: + printf("Fill the lcd"); + cfag12864b_fill(); + break; + + case 5: + printf("Draw columns unsetting bits"); + for (i = 0; i < CFAG12864B_WIDTH; i += 2) + for (j = 0; j < CFAG12864B_HEIGHT; j++) + cfag12864b_unset(i, j); + break; + + case 6: + printf("Do negative not-ing all bits"); + for (i = 0; i < CFAG12864B_WIDTH; i++) + for (j = 0; j < CFAG12864B_HEIGHT; j ++) + cfag12864b_not(i, j); + break; + } + + puts(" - [Press Enter]"); +} + +int main(int argc, char *argv[]) +{ + unsigned char n; + + if (argc != 2) { + printf( + "Sintax: %s fbdev\n" + "Usually: /dev/fb0, /dev/fb1...\n", argv[0]); + return -1; + } + + if (cfag12864b_init(argv[1])) { + printf("Can't init %s fbdev\n", argv[1]); + return -2; + } + + for (n = 1; n <= EXAMPLES; n++) { + example(n); + cfag12864b_blit(); + while (getchar() != '\n'); + } + + cfag12864b_exit(); + + return 0; +} diff --git a/samples/blackfin/Makefile b/samples/blackfin/Makefile new file mode 100644 index 000000000000..89b86cfd83a2 --- /dev/null +++ b/samples/blackfin/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SAMPLE_BLACKFIN_GPTIMERS) += gptimers-example.o diff --git a/samples/blackfin/gptimers-example.c b/samples/blackfin/gptimers-example.c new file mode 100644 index 000000000000..283eba993d9d --- /dev/null +++ b/samples/blackfin/gptimers-example.c @@ -0,0 +1,91 @@ +/* + * Simple gptimers example + * http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:drivers:gptimers + * + * Copyright 2007-2009 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#include <linux/interrupt.h> +#include <linux/module.h> + +#include <asm/gptimers.h> +#include <asm/portmux.h> + +/* ... random driver includes ... */ + +#define DRIVER_NAME "gptimer_example" + +#ifdef IRQ_TIMER5 +#define SAMPLE_IRQ_TIMER IRQ_TIMER5 +#else +#define SAMPLE_IRQ_TIMER IRQ_TIMER2 +#endif + +struct gptimer_data { + uint32_t period, width; +}; +static struct gptimer_data data; + +/* ... random driver state ... */ + +static irqreturn_t gptimer_example_irq(int irq, void *dev_id) +{ + struct gptimer_data *data = dev_id; + + /* make sure it was our timer which caused the interrupt */ + if (!get_gptimer_intr(TIMER5_id)) + return IRQ_NONE; + + /* read the width/period values that were captured for the waveform */ + data->width = get_gptimer_pwidth(TIMER5_id); + data->period = get_gptimer_period(TIMER5_id); + + /* acknowledge the interrupt */ + clear_gptimer_intr(TIMER5_id); + + /* tell the upper layers we took care of things */ + return IRQ_HANDLED; +} + +/* ... random driver code ... */ + +static int __init gptimer_example_init(void) +{ + int ret; + + /* grab the peripheral pins */ + ret = peripheral_request(P_TMR5, DRIVER_NAME); + if (ret) { + printk(KERN_NOTICE DRIVER_NAME ": peripheral request failed\n"); + return ret; + } + + /* grab the IRQ for the timer */ + ret = request_irq(SAMPLE_IRQ_TIMER, gptimer_example_irq, + IRQF_SHARED, DRIVER_NAME, &data); + if (ret) { + printk(KERN_NOTICE DRIVER_NAME ": IRQ request failed\n"); + peripheral_free(P_TMR5); + return ret; + } + + /* setup the timer and enable it */ + set_gptimer_config(TIMER5_id, + WDTH_CAP | PULSE_HI | PERIOD_CNT | IRQ_ENA); + enable_gptimers(TIMER5bit); + + return 0; +} +module_init(gptimer_example_init); + +static void __exit gptimer_example_exit(void) +{ + disable_gptimers(TIMER5bit); + free_irq(SAMPLE_IRQ_TIMER, &data); + peripheral_free(P_TMR5); +} +module_exit(gptimer_example_exit); + +MODULE_LICENSE("BSD"); diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 5c53fdb67ca7..bfc2cb88a1f7 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -2,6 +2,7 @@ obj- := dummy.o # List of programs to build +hostprogs-y := test_lru_dist hostprogs-y += sock_example hostprogs-y += fds_example hostprogs-y += sockex1 @@ -21,12 +22,18 @@ hostprogs-y += spintest hostprogs-y += map_perf_test hostprogs-y += test_overhead hostprogs-y += test_cgrp2_array_pin +hostprogs-y += test_cgrp2_attach +hostprogs-y += test_cgrp2_sock +hostprogs-y += test_cgrp2_sock2 hostprogs-y += xdp1 hostprogs-y += xdp2 hostprogs-y += test_current_task_under_cgroup hostprogs-y += trace_event hostprogs-y += sampleip +hostprogs-y += tc_l2_redirect +hostprogs-y += lwt_len_hist +test_lru_dist-objs := test_lru_dist.o libbpf.o sock_example-objs := sock_example.o libbpf.o fds_example-objs := bpf_load.o libbpf.o fds_example.o sockex1-objs := bpf_load.o libbpf.o sockex1_user.o @@ -46,6 +53,9 @@ spintest-objs := bpf_load.o libbpf.o spintest_user.o map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o +test_cgrp2_attach-objs := libbpf.o test_cgrp2_attach.o +test_cgrp2_sock-objs := libbpf.o test_cgrp2_sock.o +test_cgrp2_sock2-objs := bpf_load.o libbpf.o test_cgrp2_sock2.o xdp1-objs := bpf_load.o libbpf.o xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := bpf_load.o libbpf.o xdp1_user.o @@ -53,6 +63,8 @@ test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \ test_current_task_under_cgroup_user.o trace_event-objs := bpf_load.o libbpf.o trace_event_user.o sampleip-objs := bpf_load.o libbpf.o sampleip_user.o +tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o +lwt_len_hist-objs := bpf_load.o libbpf.o lwt_len_hist_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -65,10 +77,12 @@ always += tracex3_kern.o always += tracex4_kern.o always += tracex5_kern.o always += tracex6_kern.o +always += sock_flags_kern.o always += test_probe_write_user_kern.o always += trace_output_kern.o always += tcbpf1_kern.o always += tcbpf2_kern.o +always += tc_l2_redirect_kern.o always += lathist_kern.o always += offwaketime_kern.o always += spintest_kern.o @@ -82,8 +96,10 @@ always += xdp2_kern.o always += test_current_task_under_cgroup_kern.o always += trace_event_kern.o always += sampleip_kern.o +always += lwt_len_hist_kern.o HOSTCFLAGS += -I$(objtree)/usr/include +HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable HOSTLOADLIBES_fds_example += -lelf @@ -96,6 +112,7 @@ HOSTLOADLIBES_tracex3 += -lelf HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex6 += -lelf +HOSTLOADLIBES_test_cgrp2_sock2 += -lelf HOSTLOADLIBES_test_probe_write_user += -lelf HOSTLOADLIBES_trace_output += -lelf -lrt HOSTLOADLIBES_lathist += -lelf @@ -108,6 +125,8 @@ HOSTLOADLIBES_xdp2 += -lelf HOSTLOADLIBES_test_current_task_under_cgroup += -lelf HOSTLOADLIBES_trace_event += -lelf HOSTLOADLIBES_sampleip += -lelf +HOSTLOADLIBES_tc_l2_redirect += -l elf +HOSTLOADLIBES_lwt_len_hist += -l elf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 90f44bd2045e..a246c6122629 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -80,6 +80,8 @@ struct bpf_map_def { unsigned int map_flags; }; +static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) = + (void *) BPF_FUNC_skb_load_bytes; static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) = (void *) BPF_FUNC_skb_store_bytes; static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) = @@ -88,6 +90,8 @@ static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flag (void *) BPF_FUNC_l4_csum_replace; static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = (void *) BPF_FUNC_skb_under_cgroup; +static int (*bpf_skb_change_head)(void *, int len, int flags) = + (void *) BPF_FUNC_skb_change_head; #if defined(__x86_64__) diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 97913e109b14..49b45ccbe153 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -52,6 +52,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; bool is_xdp = strncmp(event, "xdp", 3) == 0; bool is_perf_event = strncmp(event, "perf_event", 10) == 0; + bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; + bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; enum bpf_prog_type prog_type; char buf[256]; int fd, efd, err, id; @@ -72,6 +74,10 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_type = BPF_PROG_TYPE_XDP; } else if (is_perf_event) { prog_type = BPF_PROG_TYPE_PERF_EVENT; + } else if (is_cgroup_skb) { + prog_type = BPF_PROG_TYPE_CGROUP_SKB; + } else if (is_cgroup_sk) { + prog_type = BPF_PROG_TYPE_CGROUP_SOCK; } else { printf("Unknown event '%s'\n", event); return -1; @@ -85,7 +91,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_fd[prog_cnt++] = fd; - if (is_xdp || is_perf_event) + if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk) return 0; if (is_socket) { @@ -317,6 +323,10 @@ int load_bpf_file(char *path) &shdr_prog, &data_prog)) continue; + if (shdr_prog.sh_type != SHT_PROGBITS || + !(shdr_prog.sh_flags & SHF_EXECINSTR)) + continue; + insns = (struct bpf_insn *) data_prog->d_buf; processed_sec[shdr.sh_info] = true; @@ -330,7 +340,8 @@ int load_bpf_file(char *path) memcmp(shname_prog, "tracepoint/", 11) == 0 || memcmp(shname_prog, "xdp", 3) == 0 || memcmp(shname_prog, "perf_event", 10) == 0 || - memcmp(shname_prog, "socket", 6) == 0) + memcmp(shname_prog, "socket", 6) == 0 || + memcmp(shname_prog, "cgroup/", 7) == 0) load_and_attach(shname_prog, insns, data_prog->d_size); } } @@ -349,7 +360,8 @@ int load_bpf_file(char *path) memcmp(shname, "tracepoint/", 11) == 0 || memcmp(shname, "xdp", 3) == 0 || memcmp(shname, "perf_event", 10) == 0 || - memcmp(shname, "socket", 6) == 0) + memcmp(shname, "socket", 6) == 0 || + memcmp(shname, "cgroup/", 7) == 0) load_and_attach(shname, data->d_buf, data->d_size); } diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index dfa57fe65c8e..4adeeef53ad6 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -7,6 +7,7 @@ extern int map_fd[MAX_MAPS]; extern int prog_fd[MAX_PROGS]; extern int event_fd[MAX_PROGS]; +extern int prog_cnt; /* parses elf file compiled by llvm .c->.o * . parses 'maps' section and creates maps via BPF syscall diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c index 9969e35550c3..9ce707bf02a7 100644 --- a/samples/bpf/libbpf.c +++ b/samples/bpf/libbpf.c @@ -104,6 +104,27 @@ int bpf_prog_load(enum bpf_prog_type prog_type, return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); } +int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr = { + .target_fd = target_fd, + .attach_bpf_fd = prog_fd, + .attach_type = type, + }; + + return syscall(__NR_bpf, BPF_PROG_ATTACH, &attr, sizeof(attr)); +} + +int bpf_prog_detach(int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr = { + .target_fd = target_fd, + .attach_type = type, + }; + + return syscall(__NR_bpf, BPF_PROG_DETACH, &attr, sizeof(attr)); +} + int bpf_obj_pin(int fd, const char *pathname) { union bpf_attr attr = { diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index ac6edb61b64a..94a901d86fc2 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -15,10 +15,13 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, int insn_len, const char *license, int kern_version); +int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type); +int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); + int bpf_obj_pin(int fd, const char *pathname); int bpf_obj_get(const char *pathname); -#define LOG_BUF_SIZE 65536 +#define LOG_BUF_SIZE (256 * 1024) extern char bpf_log_buf[LOG_BUF_SIZE]; /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ diff --git a/samples/bpf/lwt_len_hist.sh b/samples/bpf/lwt_len_hist.sh new file mode 100644 index 000000000000..7d567744c7fa --- /dev/null +++ b/samples/bpf/lwt_len_hist.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +NS1=lwt_ns1 +VETH0=tst_lwt1a +VETH1=tst_lwt1b + +TRACE_ROOT=/sys/kernel/debug/tracing + +function cleanup { + ip route del 192.168.253.2/32 dev $VETH0 2> /dev/null + ip link del $VETH0 2> /dev/null + ip link del $VETH1 2> /dev/null + ip netns exec $NS1 killall netserver + ip netns delete $NS1 2> /dev/null +} + +cleanup + +ip netns add $NS1 +ip link add $VETH0 type veth peer name $VETH1 +ip link set dev $VETH0 up +ip addr add 192.168.253.1/24 dev $VETH0 +ip link set $VETH1 netns $NS1 +ip netns exec $NS1 ip link set dev $VETH1 up +ip netns exec $NS1 ip addr add 192.168.253.2/24 dev $VETH1 +ip netns exec $NS1 netserver + +echo 1 > ${TRACE_ROOT}/tracing_on +cp /dev/null ${TRACE_ROOT}/trace +ip route add 192.168.253.2/32 encap bpf out obj lwt_len_hist_kern.o section len_hist dev $VETH0 +netperf -H 192.168.253.2 -t TCP_STREAM +cat ${TRACE_ROOT}/trace | grep -v '^#' +./lwt_len_hist +cleanup +echo 0 > ${TRACE_ROOT}/tracing_on + +exit 0 diff --git a/samples/bpf/lwt_len_hist_kern.c b/samples/bpf/lwt_len_hist_kern.c new file mode 100644 index 000000000000..df75383280f9 --- /dev/null +++ b/samples/bpf/lwt_len_hist_kern.c @@ -0,0 +1,82 @@ +/* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/ip.h> +#include <uapi/linux/in.h> +#include "bpf_helpers.h" + +# define printk(fmt, ...) \ + ({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ + }) + +struct bpf_elf_map { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; + __u32 flags; + __u32 id; + __u32 pinning; +}; + +struct bpf_elf_map SEC("maps") lwt_len_hist_map = { + .type = BPF_MAP_TYPE_PERCPU_HASH, + .size_key = sizeof(__u64), + .size_value = sizeof(__u64), + .pinning = 2, + .max_elem = 1024, +}; + +static unsigned int log2(unsigned int v) +{ + unsigned int r; + unsigned int shift; + + r = (v > 0xFFFF) << 4; v >>= r; + shift = (v > 0xFF) << 3; v >>= shift; r |= shift; + shift = (v > 0xF) << 2; v >>= shift; r |= shift; + shift = (v > 0x3) << 1; v >>= shift; r |= shift; + r |= (v >> 1); + return r; +} + +static unsigned int log2l(unsigned long v) +{ + unsigned int hi = v >> 32; + if (hi) + return log2(hi) + 32; + else + return log2(v); +} + +SEC("len_hist") +int do_len_hist(struct __sk_buff *skb) +{ + __u64 *value, key, init_val = 1; + + key = log2l(skb->len); + + value = bpf_map_lookup_elem(&lwt_len_hist_map, &key); + if (value) + __sync_fetch_and_add(value, 1); + else + bpf_map_update_elem(&lwt_len_hist_map, &key, &init_val, BPF_ANY); + + return BPF_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/lwt_len_hist_user.c b/samples/bpf/lwt_len_hist_user.c new file mode 100644 index 000000000000..05d783fc5daf --- /dev/null +++ b/samples/bpf/lwt_len_hist_user.c @@ -0,0 +1,76 @@ +#include <linux/unistd.h> +#include <linux/bpf.h> + +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <arpa/inet.h> + +#include "libbpf.h" +#include "bpf_util.h" + +#define MAX_INDEX 64 +#define MAX_STARS 38 + +static void stars(char *str, long val, long max, int width) +{ + int i; + + for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++) + str[i] = '*'; + if (val > max) + str[i - 1] = '+'; + str[i] = '\0'; +} + +int main(int argc, char **argv) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + const char *map_filename = "/sys/fs/bpf/tc/globals/lwt_len_hist_map"; + uint64_t values[nr_cpus], sum, max_value = 0, data[MAX_INDEX] = {}; + uint64_t key = 0, next_key, max_key = 0; + char starstr[MAX_STARS]; + int i, map_fd; + + map_fd = bpf_obj_get(map_filename); + if (map_fd < 0) { + fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n", + map_filename, strerror(errno), errno); + return -1; + } + + while (bpf_get_next_key(map_fd, &key, &next_key) == 0) { + if (next_key >= MAX_INDEX) { + fprintf(stderr, "Key %lu out of bounds\n", next_key); + continue; + } + + bpf_lookup_elem(map_fd, &next_key, values); + + sum = 0; + for (i = 0; i < nr_cpus; i++) + sum += values[i]; + + data[next_key] = sum; + if (sum && next_key > max_key) + max_key = next_key; + + if (sum > max_value) + max_value = sum; + + key = next_key; + } + + for (i = 1; i <= max_key + 1; i++) { + stars(starstr, data[i - 1], max_value, MAX_STARS); + printf("%8ld -> %-8ld : %-8ld |%-*s|\n", + (1l << i) >> 1, (1l << i) - 1, data[i - 1], + MAX_STARS, starstr); + } + + close(map_fd); + + return 0; +} diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index 311538e5a701..7ee1574c8ccf 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -19,6 +19,21 @@ struct bpf_map_def SEC("maps") hash_map = { .max_entries = MAX_ENTRIES, }; +struct bpf_map_def SEC("maps") lru_hash_map = { + .type = BPF_MAP_TYPE_LRU_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 10000, +}; + +struct bpf_map_def SEC("maps") percpu_lru_hash_map = { + .type = BPF_MAP_TYPE_LRU_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 10000, + .map_flags = BPF_F_NO_COMMON_LRU, +}; + struct bpf_map_def SEC("maps") percpu_hash_map = { .type = BPF_MAP_TYPE_PERCPU_HASH, .key_size = sizeof(u32), @@ -53,6 +68,7 @@ int stress_hmap(struct pt_regs *ctx) value = bpf_map_lookup_elem(&hash_map, &key); if (value) bpf_map_delete_elem(&hash_map, &key); + return 0; } @@ -96,5 +112,28 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx) bpf_map_delete_elem(&percpu_hash_map_alloc, &key); return 0; } + +SEC("kprobe/sys_getpid") +int stress_lru_hmap_alloc(struct pt_regs *ctx) +{ + u32 key = bpf_get_prandom_u32(); + long val = 1; + + bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY); + + return 0; +} + +SEC("kprobe/sys_getppid") +int stress_percpu_lru_hmap_alloc(struct pt_regs *ctx) +{ + u32 key = bpf_get_prandom_u32(); + long val = 1; + + bpf_map_update_elem(&percpu_lru_hash_map, &key, &val, BPF_ANY); + + return 0; +} + char _license[] SEC("license") = "GPL"; u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 3147377e8fd3..9505b4d112f4 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -35,6 +35,8 @@ static __u64 time_get_ns(void) #define PERCPU_HASH_PREALLOC (1 << 1) #define HASH_KMALLOC (1 << 2) #define PERCPU_HASH_KMALLOC (1 << 3) +#define LRU_HASH_PREALLOC (1 << 4) +#define PERCPU_LRU_HASH_PREALLOC (1 << 5) static int test_flags = ~0; @@ -50,6 +52,30 @@ static void test_hash_prealloc(int cpu) cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); } +static void test_lru_hash_prealloc(int cpu) +{ + __u64 start_time; + int i; + + start_time = time_get_ns(); + for (i = 0; i < MAX_CNT; i++) + syscall(__NR_getpid); + printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n", + cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); +} + +static void test_percpu_lru_hash_prealloc(int cpu) +{ + __u64 start_time; + int i; + + start_time = time_get_ns(); + for (i = 0; i < MAX_CNT; i++) + syscall(__NR_getppid); + printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n", + cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); +} + static void test_percpu_hash_prealloc(int cpu) { __u64 start_time; @@ -105,6 +131,12 @@ static void loop(int cpu) if (test_flags & PERCPU_HASH_KMALLOC) test_percpu_hash_kmalloc(cpu); + + if (test_flags & LRU_HASH_PREALLOC) + test_lru_hash_prealloc(cpu); + + if (test_flags & PERCPU_LRU_HASH_PREALLOC) + test_percpu_lru_hash_prealloc(cpu); } static void run_perf_test(int tasks) diff --git a/samples/bpf/parse_ldabs.c b/samples/bpf/parse_ldabs.c index d17550198d06..6db6b21fdc6d 100644 --- a/samples/bpf/parse_ldabs.c +++ b/samples/bpf/parse_ldabs.c @@ -4,6 +4,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/in.h> diff --git a/samples/bpf/parse_simple.c b/samples/bpf/parse_simple.c index cf2511c33905..10af53d33cc2 100644 --- a/samples/bpf/parse_simple.c +++ b/samples/bpf/parse_simple.c @@ -4,6 +4,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/in.h> diff --git a/samples/bpf/parse_varlen.c b/samples/bpf/parse_varlen.c index edab34dce79b..95c16324760c 100644 --- a/samples/bpf/parse_varlen.c +++ b/samples/bpf/parse_varlen.c @@ -4,6 +4,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include <linux/if_ether.h> #include <linux/ip.h> #include <linux/ipv6.h> diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c new file mode 100644 index 000000000000..533dd11a6baa --- /dev/null +++ b/samples/bpf/sock_flags_kern.c @@ -0,0 +1,44 @@ +#include <uapi/linux/bpf.h> +#include <linux/socket.h> +#include <linux/net.h> +#include <uapi/linux/in.h> +#include <uapi/linux/in6.h> +#include "bpf_helpers.h" + +SEC("cgroup/sock1") +int bpf_prog1(struct bpf_sock *sk) +{ + char fmt[] = "socket: family %d type %d protocol %d\n"; + + bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol); + + /* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets + * ie., make ping6 fail + */ + if (sk->family == PF_INET6 && + sk->type == SOCK_RAW && + sk->protocol == IPPROTO_ICMPV6) + return 0; + + return 1; +} + +SEC("cgroup/sock2") +int bpf_prog2(struct bpf_sock *sk) +{ + char fmt[] = "socket: family %d type %d protocol %d\n"; + + bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol); + + /* block PF_INET, SOCK_RAW, IPPROTO_ICMP sockets + * ie., make ping fail + */ + if (sk->family == PF_INET && + sk->type == SOCK_RAW && + sk->protocol == IPPROTO_ICMP) + return 0; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c index 44e5846c988f..f58acfc92556 100644 --- a/samples/bpf/sockex2_kern.c +++ b/samples/bpf/sockex2_kern.c @@ -198,7 +198,7 @@ struct bpf_map_def SEC("maps") hash_map = { SEC("socket2") int bpf_prog2(struct __sk_buff *skb) { - struct bpf_flow_keys flow; + struct bpf_flow_keys flow = {}; struct pair *value; u32 key; diff --git a/samples/bpf/tc_l2_redirect.sh b/samples/bpf/tc_l2_redirect.sh new file mode 100755 index 000000000000..80a05591a140 --- /dev/null +++ b/samples/bpf/tc_l2_redirect.sh @@ -0,0 +1,173 @@ +#!/bin/bash + +[[ -z $TC ]] && TC='tc' +[[ -z $IP ]] && IP='ip' + +REDIRECT_USER='./tc_l2_redirect' +REDIRECT_BPF='./tc_l2_redirect_kern.o' + +RP_FILTER=$(< /proc/sys/net/ipv4/conf/all/rp_filter) +IPV6_FORWARDING=$(< /proc/sys/net/ipv6/conf/all/forwarding) + +function config_common { + local tun_type=$1 + + $IP netns add ns1 + $IP netns add ns2 + $IP link add ve1 type veth peer name vens1 + $IP link add ve2 type veth peer name vens2 + $IP link set dev ve1 up + $IP link set dev ve2 up + $IP link set dev ve1 mtu 1500 + $IP link set dev ve2 mtu 1500 + $IP link set dev vens1 netns ns1 + $IP link set dev vens2 netns ns2 + + $IP -n ns1 link set dev lo up + $IP -n ns1 link set dev vens1 up + $IP -n ns1 addr add 10.1.1.101/24 dev vens1 + $IP -n ns1 addr add 2401:db01::65/64 dev vens1 nodad + $IP -n ns1 route add default via 10.1.1.1 dev vens1 + $IP -n ns1 route add default via 2401:db01::1 dev vens1 + + $IP -n ns2 link set dev lo up + $IP -n ns2 link set dev vens2 up + $IP -n ns2 addr add 10.2.1.102/24 dev vens2 + $IP -n ns2 addr add 2401:db02::66/64 dev vens2 nodad + $IP -n ns2 addr add 10.10.1.102 dev lo + $IP -n ns2 addr add 2401:face::66/64 dev lo nodad + $IP -n ns2 link add ipt2 type ipip local 10.2.1.102 remote 10.2.1.1 + $IP -n ns2 link add ip6t2 type ip6tnl mode any local 2401:db02::66 remote 2401:db02::1 + $IP -n ns2 link set dev ipt2 up + $IP -n ns2 link set dev ip6t2 up + $IP netns exec ns2 $TC qdisc add dev vens2 clsact + $IP netns exec ns2 $TC filter add dev vens2 ingress bpf da obj $REDIRECT_BPF sec drop_non_tun_vip + if [[ $tun_type == "ipip" ]]; then + $IP -n ns2 route add 10.1.1.0/24 dev ipt2 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ipt2.rp_filter=0 + else + $IP -n ns2 route add 10.1.1.0/24 dev ip6t2 + $IP -n ns2 route add 2401:db01::/64 dev ip6t2 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ip6t2.rp_filter=0 + fi + + $IP addr add 10.1.1.1/24 dev ve1 + $IP addr add 2401:db01::1/64 dev ve1 nodad + $IP addr add 10.2.1.1/24 dev ve2 + $IP addr add 2401:db02::1/64 dev ve2 nodad + + $TC qdisc add dev ve2 clsact + $TC filter add dev ve2 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_forward + + sysctl -q -w net.ipv4.conf.all.rp_filter=0 + sysctl -q -w net.ipv6.conf.all.forwarding=1 +} + +function cleanup { + set +e + [[ -z $DEBUG ]] || set +x + $IP netns delete ns1 >& /dev/null + $IP netns delete ns2 >& /dev/null + $IP link del ve1 >& /dev/null + $IP link del ve2 >& /dev/null + $IP link del ipt >& /dev/null + $IP link del ip6t >& /dev/null + sysctl -q -w net.ipv4.conf.all.rp_filter=$RP_FILTER + sysctl -q -w net.ipv6.conf.all.forwarding=$IPV6_FORWARDING + rm -f /sys/fs/bpf/tc/globals/tun_iface + [[ -z $DEBUG ]] || set -x + set -e +} + +function l2_to_ipip { + echo -n "l2_to_ipip $1: " + + local dir=$1 + + config_common ipip + + $IP link add ipt type ipip external + $IP link set dev ipt up + sysctl -q -w net.ipv4.conf.ipt.rp_filter=0 + sysctl -q -w net.ipv4.conf.ipt.forwarding=1 + + if [[ $dir == "egress" ]]; then + $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2 + $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect + sysctl -q -w net.ipv4.conf.ve1.forwarding=1 + else + $TC qdisc add dev ve1 clsact + $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect + fi + + $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ipt/ifindex) + + $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null + + if [[ $dir == "egress" ]]; then + # test direct egress to ve2 (i.e. not forwarding from + # ve1 to ve2). + ping -c1 10.10.1.102 >& /dev/null + fi + + cleanup + + echo "OK" +} + +function l2_to_ip6tnl { + echo -n "l2_to_ip6tnl $1: " + + local dir=$1 + + config_common ip6tnl + + $IP link add ip6t type ip6tnl mode any external + $IP link set dev ip6t up + sysctl -q -w net.ipv4.conf.ip6t.rp_filter=0 + sysctl -q -w net.ipv4.conf.ip6t.forwarding=1 + + if [[ $dir == "egress" ]]; then + $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2 + $IP route add 2401:face::/64 via 2401:db02::66 dev ve2 + $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect + sysctl -q -w net.ipv4.conf.ve1.forwarding=1 + else + $TC qdisc add dev ve1 clsact + $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect + fi + + $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ip6t/ifindex) + + $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null + $IP netns exec ns1 ping -6 -c1 2401:face::66 >& /dev/null + + if [[ $dir == "egress" ]]; then + # test direct egress to ve2 (i.e. not forwarding from + # ve1 to ve2). + ping -c1 10.10.1.102 >& /dev/null + ping -6 -c1 2401:face::66 >& /dev/null + fi + + cleanup + + echo "OK" +} + +cleanup +test_names="l2_to_ipip l2_to_ip6tnl" +test_dirs="ingress egress" +if [[ $# -ge 2 ]]; then + test_names=$1 + test_dirs=$2 +elif [[ $# -ge 1 ]]; then + test_names=$1 +fi + +for t in $test_names; do + for d in $test_dirs; do + $t $d + done +done diff --git a/samples/bpf/tc_l2_redirect_kern.c b/samples/bpf/tc_l2_redirect_kern.c new file mode 100644 index 000000000000..92a44729dbe4 --- /dev/null +++ b/samples/bpf/tc_l2_redirect_kern.c @@ -0,0 +1,236 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/if_packet.h> +#include <uapi/linux/ip.h> +#include <uapi/linux/ipv6.h> +#include <uapi/linux/in.h> +#include <uapi/linux/tcp.h> +#include <uapi/linux/filter.h> +#include <uapi/linux/pkt_cls.h> +#include <net/ipv6.h> +#include "bpf_helpers.h" + +#define _htonl __builtin_bswap32 + +#define PIN_GLOBAL_NS 2 +struct bpf_elf_map { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; + __u32 flags; + __u32 id; + __u32 pinning; +}; + +/* copy of 'struct ethhdr' without __packed */ +struct eth_hdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + unsigned short h_proto; +}; + +struct bpf_elf_map SEC("maps") tun_iface = { + .type = BPF_MAP_TYPE_ARRAY, + .size_key = sizeof(int), + .size_value = sizeof(int), + .pinning = PIN_GLOBAL_NS, + .max_elem = 1, +}; + +static __always_inline bool is_vip_addr(__be16 eth_proto, __be32 daddr) +{ + if (eth_proto == htons(ETH_P_IP)) + return (_htonl(0xffffff00) & daddr) == _htonl(0x0a0a0100); + else if (eth_proto == htons(ETH_P_IPV6)) + return (daddr == _htonl(0x2401face)); + + return false; +} + +SEC("l2_to_iptun_ingress_forward") +int _l2_to_iptun_ingress_forward(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + int key = 0, *ifindex; + + int ret; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + ifindex = bpf_map_lookup_elem(&tun_iface, &key); + if (!ifindex) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + char fmt4[] = "ingress forward to ifindex:%d daddr4:%x\n"; + struct iphdr *iph = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (iph->protocol != IPPROTO_IPIP) + return TC_ACT_OK; + + bpf_trace_printk(fmt4, sizeof(fmt4), *ifindex, + _htonl(iph->daddr)); + return bpf_redirect(*ifindex, BPF_F_INGRESS); + } else if (eth->h_proto == htons(ETH_P_IPV6)) { + char fmt6[] = "ingress forward to ifindex:%d daddr6:%x::%x\n"; + struct ipv6hdr *ip6h = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (ip6h->nexthdr != IPPROTO_IPIP && + ip6h->nexthdr != IPPROTO_IPV6) + return TC_ACT_OK; + + bpf_trace_printk(fmt6, sizeof(fmt6), *ifindex, + _htonl(ip6h->daddr.s6_addr32[0]), + _htonl(ip6h->daddr.s6_addr32[3])); + return bpf_redirect(*ifindex, BPF_F_INGRESS); + } + + return TC_ACT_OK; +} + +SEC("l2_to_iptun_ingress_redirect") +int _l2_to_iptun_ingress_redirect(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + int key = 0, *ifindex; + + int ret; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + ifindex = bpf_map_lookup_elem(&tun_iface, &key); + if (!ifindex) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n"; + struct iphdr *iph = data + sizeof(*eth); + __be32 daddr = iph->daddr; + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (!is_vip_addr(eth->h_proto, daddr)) + return TC_ACT_OK; + + bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(daddr), *ifindex); + } else { + return TC_ACT_OK; + } + + tkey.tunnel_id = 10000; + tkey.tunnel_ttl = 64; + tkey.remote_ipv4 = 0x0a020166; /* 10.2.1.102 */ + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), 0); + return bpf_redirect(*ifindex, 0); +} + +SEC("l2_to_ip6tun_ingress_redirect") +int _l2_to_ip6tun_ingress_redirect(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + int key = 0, *ifindex; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + ifindex = bpf_map_lookup_elem(&tun_iface, &key); + if (!ifindex) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n"; + struct iphdr *iph = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (!is_vip_addr(eth->h_proto, iph->daddr)) + return TC_ACT_OK; + + bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(iph->daddr), + *ifindex); + } else if (eth->h_proto == htons(ETH_P_IPV6)) { + char fmt6[] = "e/ingress redirect daddr6:%x to ifindex:%d\n"; + struct ipv6hdr *ip6h = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (!is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0])) + return TC_ACT_OK; + + bpf_trace_printk(fmt6, sizeof(fmt6), + _htonl(ip6h->daddr.s6_addr32[0]), *ifindex); + } else { + return TC_ACT_OK; + } + + tkey.tunnel_id = 10000; + tkey.tunnel_ttl = 64; + /* 2401:db02:0:0:0:0:0:66 */ + tkey.remote_ipv6[0] = _htonl(0x2401db02); + tkey.remote_ipv6[1] = 0; + tkey.remote_ipv6[2] = 0; + tkey.remote_ipv6[3] = _htonl(0x00000066); + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), BPF_F_TUNINFO_IPV6); + return bpf_redirect(*ifindex, 0); +} + +SEC("drop_non_tun_vip") +int _drop_non_tun_vip(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (is_vip_addr(eth->h_proto, iph->daddr)) + return TC_ACT_SHOT; + } else if (eth->h_proto == htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0])) + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tc_l2_redirect_user.c b/samples/bpf/tc_l2_redirect_user.c new file mode 100644 index 000000000000..4013c5337b91 --- /dev/null +++ b/samples/bpf/tc_l2_redirect_user.c @@ -0,0 +1,73 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/unistd.h> +#include <linux/bpf.h> + +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> + +#include "libbpf.h" + +static void usage(void) +{ + printf("Usage: tc_l2_ipip_redirect [...]\n"); + printf(" -U <file> Update an already pinned BPF array\n"); + printf(" -i <ifindex> Interface index\n"); + printf(" -h Display this help\n"); +} + +int main(int argc, char **argv) +{ + const char *pinned_file = NULL; + int ifindex = -1; + int array_key = 0; + int array_fd = -1; + int ret = -1; + int opt; + + while ((opt = getopt(argc, argv, "F:U:i:")) != -1) { + switch (opt) { + /* General args */ + case 'U': + pinned_file = optarg; + break; + case 'i': + ifindex = atoi(optarg); + break; + default: + usage(); + goto out; + } + } + + if (ifindex < 0 || !pinned_file) { + usage(); + goto out; + } + + array_fd = bpf_obj_get(pinned_file); + if (array_fd < 0) { + fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n", + pinned_file, strerror(errno), errno); + goto out; + } + + /* bpf_tunnel_key.remote_ipv4 expects host byte orders */ + ret = bpf_update_elem(array_fd, &array_key, &ifindex, 0); + if (ret) { + perror("bpf_update_elem"); + goto out; + } + +out: + if (array_fd != -1) + close(array_fd); + return ret; +} diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c index fa051b3d53ee..274c884c87fe 100644 --- a/samples/bpf/tcbpf1_kern.c +++ b/samples/bpf/tcbpf1_kern.c @@ -1,3 +1,4 @@ +#define KBUILD_MODNAME "foo" #include <uapi/linux/bpf.h> #include <uapi/linux/if_ether.h> #include <uapi/linux/if_packet.h> diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c index 3303bb85593b..9c823a609e75 100644 --- a/samples/bpf/tcbpf2_kern.c +++ b/samples/bpf/tcbpf2_kern.c @@ -5,6 +5,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include <uapi/linux/bpf.h> #include <uapi/linux/if_ether.h> #include <uapi/linux/if_packet.h> diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c new file mode 100644 index 000000000000..a19484c45b79 --- /dev/null +++ b/samples/bpf/test_cgrp2_attach.c @@ -0,0 +1,167 @@ +/* eBPF example program: + * + * - Creates arraymap in kernel with 4 bytes keys and 8 byte values + * + * - Loads eBPF program + * + * The eBPF program accesses the map passed in to store two pieces of + * information. The number of invocations of the program, which maps + * to the number of packets received, is stored to key 0. Key 1 is + * incremented on each iteration by the number of bytes stored in + * the skb. + * + * - Attaches the new program to a cgroup using BPF_PROG_ATTACH + * + * - Every second, reads map[0] and map[1] to see how many bytes and + * packets were seen on any socket of tasks in the given cgroup. + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> +#include <errno.h> +#include <fcntl.h> + +#include <linux/bpf.h> + +#include "libbpf.h" + +enum { + MAP_KEY_PACKETS, + MAP_KEY_BYTES, +}; + +static int prog_load(int map_fd, int verdict) +{ + struct bpf_insn prog[] = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), /* save r6 so it's not clobbered by BPF_CALL */ + + /* Count packets */ + BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), /* load map fd to r1 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */ + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + + /* Count bytes */ + BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */ + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + + BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ + BPF_EXIT_INSN(), + }; + + return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB, + prog, sizeof(prog), "GPL", 0); +} + +static int usage(const char *argv0) +{ + printf("Usage: %s [-d] [-D] <cg-path> <egress|ingress>\n", argv0); + printf(" -d Drop Traffic\n"); + printf(" -D Detach filter, and exit\n"); + return EXIT_FAILURE; +} + +static int attach_filter(int cg_fd, int type, int verdict) +{ + int prog_fd, map_fd, ret, key; + long long pkt_cnt, byte_cnt; + + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, + sizeof(key), sizeof(byte_cnt), + 256, 0); + if (map_fd < 0) { + printf("Failed to create map: '%s'\n", strerror(errno)); + return EXIT_FAILURE; + } + + prog_fd = prog_load(map_fd, verdict); + printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf); + + if (prog_fd < 0) { + printf("Failed to load prog: '%s'\n", strerror(errno)); + return EXIT_FAILURE; + } + + ret = bpf_prog_attach(prog_fd, cg_fd, type); + if (ret < 0) { + printf("Failed to attach prog to cgroup: '%s'\n", + strerror(errno)); + return EXIT_FAILURE; + } + while (1) { + key = MAP_KEY_PACKETS; + assert(bpf_lookup_elem(map_fd, &key, &pkt_cnt) == 0); + + key = MAP_KEY_BYTES; + assert(bpf_lookup_elem(map_fd, &key, &byte_cnt) == 0); + + printf("cgroup received %lld packets, %lld bytes\n", + pkt_cnt, byte_cnt); + sleep(1); + } + + return EXIT_SUCCESS; +} + +int main(int argc, char **argv) +{ + int detach_only = 0, verdict = 1; + enum bpf_attach_type type; + int opt, cg_fd, ret; + + while ((opt = getopt(argc, argv, "Dd")) != -1) { + switch (opt) { + case 'd': + verdict = 0; + break; + case 'D': + detach_only = 1; + break; + default: + return usage(argv[0]); + } + } + + if (argc - optind < 2) + return usage(argv[0]); + + if (strcmp(argv[optind + 1], "ingress") == 0) + type = BPF_CGROUP_INET_INGRESS; + else if (strcmp(argv[optind + 1], "egress") == 0) + type = BPF_CGROUP_INET_EGRESS; + else + return usage(argv[0]); + + cg_fd = open(argv[optind], O_DIRECTORY | O_RDONLY); + if (cg_fd < 0) { + printf("Failed to open cgroup path: '%s'\n", strerror(errno)); + return EXIT_FAILURE; + } + + if (detach_only) { + ret = bpf_prog_detach(cg_fd, type); + printf("bpf_prog_detach() returned '%s' (%d)\n", + strerror(errno), errno); + } else + ret = attach_filter(cg_fd, type, verdict); + + return ret; +} diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c new file mode 100644 index 000000000000..d467b3c1c55c --- /dev/null +++ b/samples/bpf/test_cgrp2_sock.c @@ -0,0 +1,83 @@ +/* eBPF example program: + * + * - Loads eBPF program + * + * The eBPF program sets the sk_bound_dev_if index in new AF_INET{6} + * sockets opened by processes in the cgroup. + * + * - Attaches the new program to a cgroup using BPF_PROG_ATTACH + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <net/if.h> +#include <linux/bpf.h> + +#include "libbpf.h" + +static int prog_load(int idx) +{ + struct bpf_insn prog[] = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_3, idx), + BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)), + BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ + BPF_EXIT_INSN(), + }; + + return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog), + "GPL", 0); +} + +static int usage(const char *argv0) +{ + printf("Usage: %s cg-path device-index\n", argv0); + return EXIT_FAILURE; +} + +int main(int argc, char **argv) +{ + int cg_fd, prog_fd, ret; + unsigned int idx; + + if (argc < 2) + return usage(argv[0]); + + idx = if_nametoindex(argv[2]); + if (!idx) { + printf("Invalid device name\n"); + return EXIT_FAILURE; + } + + cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY); + if (cg_fd < 0) { + printf("Failed to open cgroup path: '%s'\n", strerror(errno)); + return EXIT_FAILURE; + } + + prog_fd = prog_load(idx); + printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf); + + if (prog_fd < 0) { + printf("Failed to load prog: '%s'\n", strerror(errno)); + return EXIT_FAILURE; + } + + ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE); + if (ret < 0) { + printf("Failed to attach prog to cgroup: '%s'\n", + strerror(errno)); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh new file mode 100755 index 000000000000..925fd467c7cc --- /dev/null +++ b/samples/bpf/test_cgrp2_sock.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +function config_device { + ip netns add at_ns0 + ip link add veth0 type veth peer name veth0b + ip link set veth0b up + ip link set veth0 netns at_ns0 + ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 + ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad + ip netns exec at_ns0 ip link set dev veth0 up + ip link add foo type vrf table 1234 + ip link set foo up + ip addr add 172.16.1.101/24 dev veth0b + ip addr add 2401:db00::2/64 dev veth0b nodad + ip link set veth0b master foo +} + +function attach_bpf { + rm -rf /tmp/cgroupv2 + mkdir -p /tmp/cgroupv2 + mount -t cgroup2 none /tmp/cgroupv2 + mkdir -p /tmp/cgroupv2/foo + test_cgrp2_sock /tmp/cgroupv2/foo foo + echo $$ >> /tmp/cgroupv2/foo/cgroup.procs +} + +function cleanup { + set +ex + ip netns delete at_ns0 + ip link del veth0 + ip link del foo + umount /tmp/cgroupv2 + rm -rf /tmp/cgroupv2 + set -ex +} + +function do_test { + ping -c1 -w1 172.16.1.100 + ping6 -c1 -w1 2401:db00::1 +} + +cleanup 2>/dev/null +config_device +attach_bpf +do_test +cleanup +echo "*** PASS ***" diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c new file mode 100644 index 000000000000..455ef0d06e93 --- /dev/null +++ b/samples/bpf/test_cgrp2_sock2.c @@ -0,0 +1,66 @@ +/* eBPF example program: + * + * - Loads eBPF program + * + * The eBPF program loads a filter from file and attaches the + * program to a cgroup using BPF_PROG_ATTACH + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <net/if.h> +#include <linux/bpf.h> + +#include "libbpf.h" +#include "bpf_load.h" + +static int usage(const char *argv0) +{ + printf("Usage: %s cg-path filter-path [filter-id]\n", argv0); + return EXIT_FAILURE; +} + +int main(int argc, char **argv) +{ + int cg_fd, ret, filter_id = 0; + + if (argc < 3) + return usage(argv[0]); + + cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY); + if (cg_fd < 0) { + printf("Failed to open cgroup path: '%s'\n", strerror(errno)); + return EXIT_FAILURE; + } + + if (load_bpf_file(argv[2])) + return EXIT_FAILURE; + + printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf); + + if (argc > 3) + filter_id = atoi(argv[3]); + + if (filter_id > prog_cnt) { + printf("Invalid program id; program not found in file\n"); + return EXIT_FAILURE; + } + + ret = bpf_prog_attach(prog_fd[filter_id], cg_fd, + BPF_CGROUP_INET_SOCK_CREATE); + if (ret < 0) { + printf("Failed to attach prog to cgroup: '%s'\n", + strerror(errno)); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh new file mode 100755 index 000000000000..891f12a0e26f --- /dev/null +++ b/samples/bpf/test_cgrp2_sock2.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +function config_device { + ip netns add at_ns0 + ip link add veth0 type veth peer name veth0b + ip link set veth0b up + ip link set veth0 netns at_ns0 + ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 + ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad + ip netns exec at_ns0 ip link set dev veth0 up + ip addr add 172.16.1.101/24 dev veth0b + ip addr add 2401:db00::2/64 dev veth0b nodad +} + +function config_cgroup { + rm -rf /tmp/cgroupv2 + mkdir -p /tmp/cgroupv2 + mount -t cgroup2 none /tmp/cgroupv2 + mkdir -p /tmp/cgroupv2/foo + echo $$ >> /tmp/cgroupv2/foo/cgroup.procs +} + + +function attach_bpf { + test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1 + [ $? -ne 0 ] && exit 1 +} + +function cleanup { + ip link del veth0b + ip netns delete at_ns0 + umount /tmp/cgroupv2 + rm -rf /tmp/cgroupv2 +} + +cleanup 2>/dev/null + +set -e +config_device +config_cgroup +set +e + +# +# Test 1 - fail ping6 +# +attach_bpf 0 +ping -c1 -w1 172.16.1.100 +if [ $? -ne 0 ]; then + echo "ping failed when it should succeed" + cleanup + exit 1 +fi + +ping6 -c1 -w1 2401:db00::1 +if [ $? -eq 0 ]; then + echo "ping6 succeeded when it should not" + cleanup + exit 1 +fi + +# +# Test 2 - fail ping +# +attach_bpf 1 +ping6 -c1 -w1 2401:db00::1 +if [ $? -ne 0 ]; then + echo "ping6 failed when it should succeed" + cleanup + exit 1 +fi + +ping -c1 -w1 172.16.1.100 +if [ $? -eq 0 ]; then + echo "ping succeeded when it should not" + cleanup + exit 1 +fi + +cleanup +echo +echo "*** PASS ***" diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c index 10ff73404e3a..1547b36a7b7b 100644 --- a/samples/bpf/test_cgrp2_tc_kern.c +++ b/samples/bpf/test_cgrp2_tc_kern.c @@ -4,6 +4,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include <uapi/linux/if_ether.h> #include <uapi/linux/in6.h> #include <uapi/linux/ipv6.h> diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c new file mode 100644 index 000000000000..316230a0ed23 --- /dev/null +++ b/samples/bpf/test_lru_dist.c @@ -0,0 +1,541 @@ +/* + * Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#define _GNU_SOURCE +#include <linux/types.h> +#include <stdio.h> +#include <unistd.h> +#include <linux/bpf.h> +#include <errno.h> +#include <string.h> +#include <assert.h> +#include <sched.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <stdlib.h> +#include <time.h> + +#include "libbpf.h" +#include "bpf_util.h" + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +static int nr_cpus; +static unsigned long long *dist_keys; +static unsigned int dist_key_counts; + +struct list_head { + struct list_head *next, *prev; +}; + +static inline void INIT_LIST_HEAD(struct list_head *list) +{ + list->next = list; + list->prev = list; +} + +static inline int list_empty(const struct list_head *head) +{ + return head->next == head; +} + +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +static inline void __list_del(struct list_head *prev, struct list_head *next) +{ + next->prev = prev; + prev->next = next; +} + +static inline void __list_del_entry(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); +} + +static inline void list_move(struct list_head *list, struct list_head *head) +{ + __list_del_entry(list); + list_add(list, head); +} + +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + +struct pfect_lru_node { + struct list_head list; + unsigned long long key; +}; + +struct pfect_lru { + struct list_head list; + struct pfect_lru_node *free_nodes; + unsigned int cur_size; + unsigned int lru_size; + unsigned int nr_unique; + unsigned int nr_misses; + unsigned int total; + int map_fd; +}; + +static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size, + unsigned int nr_possible_elems) +{ + lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, + sizeof(unsigned long long), + sizeof(struct pfect_lru_node *), + nr_possible_elems, 0); + assert(lru->map_fd != -1); + + lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node)); + assert(lru->free_nodes); + + INIT_LIST_HEAD(&lru->list); + lru->cur_size = 0; + lru->lru_size = lru_size; + lru->nr_unique = lru->nr_misses = lru->total = 0; +} + +static void pfect_lru_destroy(struct pfect_lru *lru) +{ + close(lru->map_fd); + free(lru->free_nodes); +} + +static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, + unsigned long long key) +{ + struct pfect_lru_node *node = NULL; + int seen = 0; + + lru->total++; + if (!bpf_lookup_elem(lru->map_fd, &key, &node)) { + if (node) { + list_move(&node->list, &lru->list); + return 1; + } + seen = 1; + } + + if (lru->cur_size < lru->lru_size) { + node = &lru->free_nodes[lru->cur_size++]; + INIT_LIST_HEAD(&node->list); + } else { + struct pfect_lru_node *null_node = NULL; + + node = list_last_entry(&lru->list, + struct pfect_lru_node, + list); + bpf_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST); + } + + node->key = key; + list_move(&node->list, &lru->list); + + lru->nr_misses++; + if (seen) { + assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_EXIST)); + } else { + lru->nr_unique++; + assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST)); + } + + return seen; +} + +static unsigned int read_keys(const char *dist_file, + unsigned long long **keys) +{ + struct stat fst; + unsigned long long *retkeys; + unsigned int counts = 0; + int dist_fd; + char *b, *l; + int i; + + dist_fd = open(dist_file, 0); + assert(dist_fd != -1); + + assert(fstat(dist_fd, &fst) == 0); + b = malloc(fst.st_size); + assert(b); + + assert(read(dist_fd, b, fst.st_size) == fst.st_size); + close(dist_fd); + for (i = 0; i < fst.st_size; i++) { + if (b[i] == '\n') + counts++; + } + counts++; /* in case the last line has no \n */ + + retkeys = malloc(counts * sizeof(unsigned long long)); + assert(retkeys); + + counts = 0; + for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n")) + retkeys[counts++] = strtoull(l, NULL, 10); + free(b); + + *keys = retkeys; + + return counts; +} + +static int create_map(int map_type, int map_flags, unsigned int size) +{ + int map_fd; + + map_fd = bpf_create_map(map_type, sizeof(unsigned long long), + sizeof(unsigned long long), size, map_flags); + + if (map_fd == -1) + perror("bpf_create_map"); + + return map_fd; +} + +static int sched_next_online(int pid, int next_to_try) +{ + cpu_set_t cpuset; + + if (next_to_try == nr_cpus) + return -1; + + while (next_to_try < nr_cpus) { + CPU_ZERO(&cpuset); + CPU_SET(next_to_try++, &cpuset); + if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) + break; + } + + return next_to_try; +} + +static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data), + void *data) +{ + int next_sched_cpu = 0; + pid_t pid[tasks]; + int i; + + for (i = 0; i < tasks; i++) { + pid[i] = fork(); + if (pid[i] == 0) { + next_sched_cpu = sched_next_online(0, next_sched_cpu); + fn(i, data); + exit(0); + } else if (pid[i] == -1) { + printf("couldn't spawn #%d process\n", i); + exit(1); + } + /* It is mostly redundant and just allow the parent + * process to update next_shced_cpu for the next child + * process + */ + next_sched_cpu = sched_next_online(pid[i], next_sched_cpu); + } + for (i = 0; i < tasks; i++) { + int status; + + assert(waitpid(pid[i], &status, 0) == pid[i]); + assert(status == 0); + } +} + +static void do_test_lru_dist(int task, void *data) +{ + unsigned int nr_misses = 0; + struct pfect_lru pfect_lru; + unsigned long long key, value = 1234; + unsigned int i; + + unsigned int lru_map_fd = ((unsigned int *)data)[0]; + unsigned int lru_size = ((unsigned int *)data)[1]; + unsigned long long key_offset = task * dist_key_counts; + + pfect_lru_init(&pfect_lru, lru_size, dist_key_counts); + + for (i = 0; i < dist_key_counts; i++) { + key = dist_keys[i] + key_offset; + + pfect_lru_lookup_or_insert(&pfect_lru, key); + + if (!bpf_lookup_elem(lru_map_fd, &key, &value)) + continue; + + if (bpf_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) { + printf("bpf_update_elem(lru_map_fd, %llu): errno:%d\n", + key, errno); + assert(0); + } + + nr_misses++; + } + + printf(" task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n", + task, pfect_lru.nr_unique, dist_key_counts, nr_misses, + dist_key_counts); + printf(" task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n", + task, pfect_lru.nr_unique, pfect_lru.total, + pfect_lru.nr_misses, pfect_lru.total); + + pfect_lru_destroy(&pfect_lru); + close(lru_map_fd); +} + +static void test_parallel_lru_dist(int map_type, int map_flags, + int nr_tasks, unsigned int lru_size) +{ + int child_data[2]; + int lru_map_fd; + + printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type, + map_flags); + + if (map_flags & BPF_F_NO_COMMON_LRU) + lru_map_fd = create_map(map_type, map_flags, + nr_cpus * lru_size); + else + lru_map_fd = create_map(map_type, map_flags, + nr_tasks * lru_size); + assert(lru_map_fd != -1); + + child_data[0] = lru_map_fd; + child_data[1] = lru_size; + + run_parallel(nr_tasks, do_test_lru_dist, child_data); + + close(lru_map_fd); +} + +static void test_lru_loss0(int map_type, int map_flags) +{ + unsigned long long key, value[nr_cpus]; + unsigned int old_unused_losses = 0; + unsigned int new_unused_losses = 0; + unsigned int used_losses = 0; + int map_fd; + + printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, + map_flags); + + assert(sched_next_online(0, 0) != -1); + + if (map_flags & BPF_F_NO_COMMON_LRU) + map_fd = create_map(map_type, map_flags, 900 * nr_cpus); + else + map_fd = create_map(map_type, map_flags, 900); + + assert(map_fd != -1); + + value[0] = 1234; + + for (key = 1; key <= 1000; key++) { + int start_key, end_key; + + assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0); + + start_key = 101; + end_key = min(key, 900); + + while (start_key <= end_key) { + bpf_lookup_elem(map_fd, &start_key, value); + start_key++; + } + } + + for (key = 1; key <= 1000; key++) { + if (bpf_lookup_elem(map_fd, &key, value)) { + if (key <= 100) + old_unused_losses++; + else if (key <= 900) + used_losses++; + else + new_unused_losses++; + } + } + + close(map_fd); + + printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) " + "newer-elem-losses:%d(/100)\n", + old_unused_losses, used_losses, new_unused_losses); +} + +static void test_lru_loss1(int map_type, int map_flags) +{ + unsigned long long key, value[nr_cpus]; + int map_fd; + unsigned int nr_losses = 0; + + printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, + map_flags); + + assert(sched_next_online(0, 0) != -1); + + if (map_flags & BPF_F_NO_COMMON_LRU) + map_fd = create_map(map_type, map_flags, 1000 * nr_cpus); + else + map_fd = create_map(map_type, map_flags, 1000); + + assert(map_fd != -1); + + value[0] = 1234; + + for (key = 1; key <= 1000; key++) + assert(!bpf_update_elem(map_fd, &key, value, BPF_NOEXIST)); + + for (key = 1; key <= 1000; key++) { + if (bpf_lookup_elem(map_fd, &key, value)) + nr_losses++; + } + + close(map_fd); + + printf("nr_losses:%d(/1000)\n", nr_losses); +} + +static void do_test_parallel_lru_loss(int task, void *data) +{ + const unsigned int nr_stable_elems = 1000; + const unsigned int nr_repeats = 100000; + + int map_fd = *(int *)data; + unsigned long long stable_base; + unsigned long long key, value[nr_cpus]; + unsigned long long next_ins_key; + unsigned int nr_losses = 0; + unsigned int i; + + stable_base = task * nr_repeats * 2 + 1; + next_ins_key = stable_base; + value[0] = 1234; + for (i = 0; i < nr_stable_elems; i++) { + assert(bpf_update_elem(map_fd, &next_ins_key, value, + BPF_NOEXIST) == 0); + next_ins_key++; + } + + for (i = 0; i < nr_repeats; i++) { + int rn; + + rn = rand(); + + if (rn % 10) { + key = rn % nr_stable_elems + stable_base; + bpf_lookup_elem(map_fd, &key, value); + } else { + bpf_update_elem(map_fd, &next_ins_key, value, + BPF_NOEXIST); + next_ins_key++; + } + } + + key = stable_base; + for (i = 0; i < nr_stable_elems; i++) { + if (bpf_lookup_elem(map_fd, &key, value)) + nr_losses++; + key++; + } + + printf(" task:%d nr_losses:%u\n", task, nr_losses); +} + +static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks) +{ + int map_fd; + + printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type, + map_flags); + + /* Give 20% more than the active working set */ + if (map_flags & BPF_F_NO_COMMON_LRU) + map_fd = create_map(map_type, map_flags, + nr_cpus * (1000 + 200)); + else + map_fd = create_map(map_type, map_flags, + nr_tasks * (1000 + 200)); + + assert(map_fd != -1); + + run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd); + + close(map_fd); +} + +int main(int argc, char **argv) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + int map_flags[] = {0, BPF_F_NO_COMMON_LRU}; + const char *dist_file; + int nr_tasks = 1; + int lru_size; + int f; + + if (argc < 4) { + printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n", + argv[0]); + return -1; + } + + dist_file = argv[1]; + lru_size = atoi(argv[2]); + nr_tasks = atoi(argv[3]); + + setbuf(stdout, NULL); + + assert(!setrlimit(RLIMIT_MEMLOCK, &r)); + + srand(time(NULL)); + + nr_cpus = bpf_num_possible_cpus(); + assert(nr_cpus != -1); + printf("nr_cpus:%d\n\n", nr_cpus); + + nr_tasks = min(nr_tasks, nr_cpus); + + dist_key_counts = read_keys(dist_file, &dist_keys); + if (!dist_key_counts) { + printf("%s has no key\n", dist_file); + return -1; + } + + for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) { + test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]); + test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]); + test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f], + nr_tasks); + test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f], + nr_tasks, lru_size); + printf("\n"); + } + + free(dist_keys); + + return 0; +} diff --git a/samples/bpf/test_lwt_bpf.c b/samples/bpf/test_lwt_bpf.c new file mode 100644 index 000000000000..bacc8013436b --- /dev/null +++ b/samples/bpf/test_lwt_bpf.c @@ -0,0 +1,253 @@ +/* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <stdint.h> +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/ip.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/icmpv6.h> +#include <linux/if_ether.h> +#include "bpf_helpers.h" +#include <string.h> + +# define printk(fmt, ...) \ + ({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ + }) + +#define CB_MAGIC 1234 + +/* Test: Pass all packets through */ +SEC("nop") +int do_nop(struct __sk_buff *skb) +{ + return BPF_OK; +} + +/* Test: Verify context information can be accessed */ +SEC("test_ctx") +int do_test_ctx(struct __sk_buff *skb) +{ + skb->cb[0] = CB_MAGIC; + printk("len %d hash %d protocol %d\n", skb->len, skb->hash, + skb->protocol); + printk("cb %d ingress_ifindex %d ifindex %d\n", skb->cb[0], + skb->ingress_ifindex, skb->ifindex); + + return BPF_OK; +} + +/* Test: Ensure skb->cb[] buffer is cleared */ +SEC("test_cb") +int do_test_cb(struct __sk_buff *skb) +{ + printk("cb0: %x cb1: %x cb2: %x\n", skb->cb[0], skb->cb[1], + skb->cb[2]); + printk("cb3: %x cb4: %x\n", skb->cb[3], skb->cb[4]); + + return BPF_OK; +} + +/* Test: Verify skb data can be read */ +SEC("test_data") +int do_test_data(struct __sk_buff *skb) +{ + void *data = (void *)(long)skb->data; + void *data_end = (void *)(long)skb->data_end; + struct iphdr *iph = data; + + if (data + sizeof(*iph) > data_end) { + printk("packet truncated\n"); + return BPF_DROP; + } + + printk("src: %x dst: %x\n", iph->saddr, iph->daddr); + + return BPF_OK; +} + +#define IP_CSUM_OFF offsetof(struct iphdr, check) +#define IP_DST_OFF offsetof(struct iphdr, daddr) +#define IP_SRC_OFF offsetof(struct iphdr, saddr) +#define IP_PROTO_OFF offsetof(struct iphdr, protocol) +#define TCP_CSUM_OFF offsetof(struct tcphdr, check) +#define UDP_CSUM_OFF offsetof(struct udphdr, check) +#define IS_PSEUDO 0x10 + +static inline int rewrite(struct __sk_buff *skb, uint32_t old_ip, + uint32_t new_ip, int rw_daddr) +{ + int ret, off = 0, flags = IS_PSEUDO; + uint8_t proto; + + ret = bpf_skb_load_bytes(skb, IP_PROTO_OFF, &proto, 1); + if (ret < 0) { + printk("bpf_l4_csum_replace failed: %d\n", ret); + return BPF_DROP; + } + + switch (proto) { + case IPPROTO_TCP: + off = TCP_CSUM_OFF; + break; + + case IPPROTO_UDP: + off = UDP_CSUM_OFF; + flags |= BPF_F_MARK_MANGLED_0; + break; + + case IPPROTO_ICMPV6: + off = offsetof(struct icmp6hdr, icmp6_cksum); + break; + } + + if (off) { + ret = bpf_l4_csum_replace(skb, off, old_ip, new_ip, + flags | sizeof(new_ip)); + if (ret < 0) { + printk("bpf_l4_csum_replace failed: %d\n"); + return BPF_DROP; + } + } + + ret = bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip)); + if (ret < 0) { + printk("bpf_l3_csum_replace failed: %d\n", ret); + return BPF_DROP; + } + + if (rw_daddr) + ret = bpf_skb_store_bytes(skb, IP_DST_OFF, &new_ip, sizeof(new_ip), 0); + else + ret = bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0); + + if (ret < 0) { + printk("bpf_skb_store_bytes() failed: %d\n", ret); + return BPF_DROP; + } + + return BPF_OK; +} + +/* Test: Verify skb data can be modified */ +SEC("test_rewrite") +int do_test_rewrite(struct __sk_buff *skb) +{ + uint32_t old_ip, new_ip = 0x3fea8c0; + int ret; + + ret = bpf_skb_load_bytes(skb, IP_DST_OFF, &old_ip, 4); + if (ret < 0) { + printk("bpf_skb_load_bytes failed: %d\n", ret); + return BPF_DROP; + } + + if (old_ip == 0x2fea8c0) { + printk("out: rewriting from %x to %x\n", old_ip, new_ip); + return rewrite(skb, old_ip, new_ip, 1); + } + + return BPF_OK; +} + +static inline int __do_push_ll_and_redirect(struct __sk_buff *skb) +{ + uint64_t smac = SRC_MAC, dmac = DST_MAC; + int ret, ifindex = DST_IFINDEX; + struct ethhdr ehdr; + + ret = bpf_skb_change_head(skb, 14, 0); + if (ret < 0) { + printk("skb_change_head() failed: %d\n", ret); + } + + ehdr.h_proto = __constant_htons(ETH_P_IP); + memcpy(&ehdr.h_source, &smac, 6); + memcpy(&ehdr.h_dest, &dmac, 6); + + ret = bpf_skb_store_bytes(skb, 0, &ehdr, sizeof(ehdr), 0); + if (ret < 0) { + printk("skb_store_bytes() failed: %d\n", ret); + return BPF_DROP; + } + + return bpf_redirect(ifindex, 0); +} + +SEC("push_ll_and_redirect_silent") +int do_push_ll_and_redirect_silent(struct __sk_buff *skb) +{ + return __do_push_ll_and_redirect(skb); +} + +SEC("push_ll_and_redirect") +int do_push_ll_and_redirect(struct __sk_buff *skb) +{ + int ret, ifindex = DST_IFINDEX; + + ret = __do_push_ll_and_redirect(skb); + if (ret >= 0) + printk("redirected to %d\n", ifindex); + + return ret; +} + +static inline void __fill_garbage(struct __sk_buff *skb) +{ + uint64_t f = 0xFFFFFFFFFFFFFFFF; + + bpf_skb_store_bytes(skb, 0, &f, sizeof(f), 0); + bpf_skb_store_bytes(skb, 8, &f, sizeof(f), 0); + bpf_skb_store_bytes(skb, 16, &f, sizeof(f), 0); + bpf_skb_store_bytes(skb, 24, &f, sizeof(f), 0); + bpf_skb_store_bytes(skb, 32, &f, sizeof(f), 0); + bpf_skb_store_bytes(skb, 40, &f, sizeof(f), 0); + bpf_skb_store_bytes(skb, 48, &f, sizeof(f), 0); + bpf_skb_store_bytes(skb, 56, &f, sizeof(f), 0); + bpf_skb_store_bytes(skb, 64, &f, sizeof(f), 0); + bpf_skb_store_bytes(skb, 72, &f, sizeof(f), 0); + bpf_skb_store_bytes(skb, 80, &f, sizeof(f), 0); + bpf_skb_store_bytes(skb, 88, &f, sizeof(f), 0); +} + +SEC("fill_garbage") +int do_fill_garbage(struct __sk_buff *skb) +{ + __fill_garbage(skb); + printk("Set initial 96 bytes of header to FF\n"); + return BPF_OK; +} + +SEC("fill_garbage_and_redirect") +int do_fill_garbage_and_redirect(struct __sk_buff *skb) +{ + int ifindex = DST_IFINDEX; + __fill_garbage(skb); + printk("redirected to %d\n", ifindex); + return bpf_redirect(ifindex, 0); +} + +/* Drop all packets */ +SEC("drop_all") +int do_drop_all(struct __sk_buff *skb) +{ + printk("dropping with: %d\n", BPF_DROP); + return BPF_DROP; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh new file mode 100644 index 000000000000..a695ae268c40 --- /dev/null +++ b/samples/bpf/test_lwt_bpf.sh @@ -0,0 +1,399 @@ +#!/bin/bash + +# Uncomment to see generated bytecode +#VERBOSE=verbose + +NS1=lwt_ns1 +NS2=lwt_ns2 +VETH0=tst_lwt1a +VETH1=tst_lwt1b +VETH2=tst_lwt2a +VETH3=tst_lwt2b +IPVETH0="192.168.254.1" +IPVETH1="192.168.254.2" +IPVETH1b="192.168.254.3" + +IPVETH2="192.168.111.1" +IPVETH3="192.168.111.2" + +IP_LOCAL="192.168.99.1" + +TRACE_ROOT=/sys/kernel/debug/tracing + +function lookup_mac() +{ + set +x + if [ ! -z "$2" ]; then + MAC=$(ip netns exec $2 ip link show $1 | grep ether | awk '{print $2}') + else + MAC=$(ip link show $1 | grep ether | awk '{print $2}') + fi + MAC="${MAC//:/}" + echo "0x${MAC:10:2}${MAC:8:2}${MAC:6:2}${MAC:4:2}${MAC:2:2}${MAC:0:2}" + set -x +} + +function cleanup { + set +ex + rm test_lwt_bpf.o 2> /dev/null + ip link del $VETH0 2> /dev/null + ip link del $VETH1 2> /dev/null + ip link del $VETH2 2> /dev/null + ip link del $VETH3 2> /dev/null + ip netns exec $NS1 killall netserver + ip netns delete $NS1 2> /dev/null + ip netns delete $NS2 2> /dev/null + set -ex +} + +function setup_one_veth { + ip netns add $1 + ip link add $2 type veth peer name $3 + ip link set dev $2 up + ip addr add $4/24 dev $2 + ip link set $3 netns $1 + ip netns exec $1 ip link set dev $3 up + ip netns exec $1 ip addr add $5/24 dev $3 + + if [ "$6" ]; then + ip netns exec $1 ip addr add $6/32 dev $3 + fi +} + +function get_trace { + set +x + cat ${TRACE_ROOT}/trace | grep -v '^#' + set -x +} + +function cleanup_routes { + ip route del ${IPVETH1}/32 dev $VETH0 2> /dev/null || true + ip route del table local local ${IP_LOCAL}/32 dev lo 2> /dev/null || true +} + +function install_test { + cleanup_routes + cp /dev/null ${TRACE_ROOT}/trace + + OPTS="encap bpf headroom 14 $1 obj test_lwt_bpf.o section $2 $VERBOSE" + + if [ "$1" == "in" ]; then + ip route add table local local ${IP_LOCAL}/32 $OPTS dev lo + else + ip route add ${IPVETH1}/32 $OPTS dev $VETH0 + fi +} + +function remove_prog { + if [ "$1" == "in" ]; then + ip route del table local local ${IP_LOCAL}/32 dev lo + else + ip route del ${IPVETH1}/32 dev $VETH0 + fi +} + +function filter_trace { + # Add newline to allow starting EXPECT= variables on newline + NL=$'\n' + echo "${NL}$*" | sed -e 's/^.*: : //g' +} + +function expect_fail { + set +x + echo "FAIL:" + echo "Expected: $1" + echo "Got: $2" + set -x + exit 1 +} + +function match_trace { + set +x + RET=0 + TRACE=$1 + EXPECT=$2 + GOT="$(filter_trace "$TRACE")" + + [ "$GOT" != "$EXPECT" ] && { + expect_fail "$EXPECT" "$GOT" + RET=1 + } + set -x + return $RET +} + +function test_start { + set +x + echo "----------------------------------------------------------------" + echo "Starting test: $*" + echo "----------------------------------------------------------------" + set -x +} + +function failure { + get_trace + echo "FAIL: $*" + exit 1 +} + +function test_ctx_xmit { + test_start "test_ctx on lwt xmit" + install_test xmit test_ctx + ping -c 3 $IPVETH1 || { + failure "test_ctx xmit: packets are dropped" + } + match_trace "$(get_trace)" " +len 84 hash 0 protocol 8 +cb 1234 ingress_ifindex 0 ifindex $DST_IFINDEX +len 84 hash 0 protocol 8 +cb 1234 ingress_ifindex 0 ifindex $DST_IFINDEX +len 84 hash 0 protocol 8 +cb 1234 ingress_ifindex 0 ifindex $DST_IFINDEX" || exit 1 + remove_prog xmit +} + +function test_ctx_out { + test_start "test_ctx on lwt out" + install_test out test_ctx + ping -c 3 $IPVETH1 || { + failure "test_ctx out: packets are dropped" + } + match_trace "$(get_trace)" " +len 84 hash 0 protocol 0 +cb 1234 ingress_ifindex 0 ifindex 0 +len 84 hash 0 protocol 0 +cb 1234 ingress_ifindex 0 ifindex 0 +len 84 hash 0 protocol 0 +cb 1234 ingress_ifindex 0 ifindex 0" || exit 1 + remove_prog out +} + +function test_ctx_in { + test_start "test_ctx on lwt in" + install_test in test_ctx + ping -c 3 $IP_LOCAL || { + failure "test_ctx out: packets are dropped" + } + # We will both request & reply packets as the packets will + # be from $IP_LOCAL => $IP_LOCAL + match_trace "$(get_trace)" " +len 84 hash 0 protocol 8 +cb 1234 ingress_ifindex 1 ifindex 1 +len 84 hash 0 protocol 8 +cb 1234 ingress_ifindex 1 ifindex 1 +len 84 hash 0 protocol 8 +cb 1234 ingress_ifindex 1 ifindex 1 +len 84 hash 0 protocol 8 +cb 1234 ingress_ifindex 1 ifindex 1 +len 84 hash 0 protocol 8 +cb 1234 ingress_ifindex 1 ifindex 1 +len 84 hash 0 protocol 8 +cb 1234 ingress_ifindex 1 ifindex 1" || exit 1 + remove_prog in +} + +function test_data { + test_start "test_data on lwt $1" + install_test $1 test_data + ping -c 3 $IPVETH1 || { + failure "test_data ${1}: packets are dropped" + } + match_trace "$(get_trace)" " +src: 1fea8c0 dst: 2fea8c0 +src: 1fea8c0 dst: 2fea8c0 +src: 1fea8c0 dst: 2fea8c0" || exit 1 + remove_prog $1 +} + +function test_data_in { + test_start "test_data on lwt in" + install_test in test_data + ping -c 3 $IP_LOCAL || { + failure "test_data in: packets are dropped" + } + # We will both request & reply packets as the packets will + # be from $IP_LOCAL => $IP_LOCAL + match_trace "$(get_trace)" " +src: 163a8c0 dst: 163a8c0 +src: 163a8c0 dst: 163a8c0 +src: 163a8c0 dst: 163a8c0 +src: 163a8c0 dst: 163a8c0 +src: 163a8c0 dst: 163a8c0 +src: 163a8c0 dst: 163a8c0" || exit 1 + remove_prog in +} + +function test_cb { + test_start "test_cb on lwt $1" + install_test $1 test_cb + ping -c 3 $IPVETH1 || { + failure "test_cb ${1}: packets are dropped" + } + match_trace "$(get_trace)" " +cb0: 0 cb1: 0 cb2: 0 +cb3: 0 cb4: 0 +cb0: 0 cb1: 0 cb2: 0 +cb3: 0 cb4: 0 +cb0: 0 cb1: 0 cb2: 0 +cb3: 0 cb4: 0" || exit 1 + remove_prog $1 +} + +function test_cb_in { + test_start "test_cb on lwt in" + install_test in test_cb + ping -c 3 $IP_LOCAL || { + failure "test_cb in: packets are dropped" + } + # We will both request & reply packets as the packets will + # be from $IP_LOCAL => $IP_LOCAL + match_trace "$(get_trace)" " +cb0: 0 cb1: 0 cb2: 0 +cb3: 0 cb4: 0 +cb0: 0 cb1: 0 cb2: 0 +cb3: 0 cb4: 0 +cb0: 0 cb1: 0 cb2: 0 +cb3: 0 cb4: 0 +cb0: 0 cb1: 0 cb2: 0 +cb3: 0 cb4: 0 +cb0: 0 cb1: 0 cb2: 0 +cb3: 0 cb4: 0 +cb0: 0 cb1: 0 cb2: 0 +cb3: 0 cb4: 0" || exit 1 + remove_prog in +} + +function test_drop_all { + test_start "test_drop_all on lwt $1" + install_test $1 drop_all + ping -c 3 $IPVETH1 && { + failure "test_drop_all ${1}: Unexpected success of ping" + } + match_trace "$(get_trace)" " +dropping with: 2 +dropping with: 2 +dropping with: 2" || exit 1 + remove_prog $1 +} + +function test_drop_all_in { + test_start "test_drop_all on lwt in" + install_test in drop_all + ping -c 3 $IP_LOCAL && { + failure "test_drop_all in: Unexpected success of ping" + } + match_trace "$(get_trace)" " +dropping with: 2 +dropping with: 2 +dropping with: 2" || exit 1 + remove_prog in +} + +function test_push_ll_and_redirect { + test_start "test_push_ll_and_redirect on lwt xmit" + install_test xmit push_ll_and_redirect + ping -c 3 $IPVETH1 || { + failure "Redirected packets appear to be dropped" + } + match_trace "$(get_trace)" " +redirected to $DST_IFINDEX +redirected to $DST_IFINDEX +redirected to $DST_IFINDEX" || exit 1 + remove_prog xmit +} + +function test_no_l2_and_redirect { + test_start "test_no_l2_and_redirect on lwt xmit" + install_test xmit fill_garbage_and_redirect + ping -c 3 $IPVETH1 && { + failure "Unexpected success despite lack of L2 header" + } + match_trace "$(get_trace)" " +redirected to $DST_IFINDEX +redirected to $DST_IFINDEX +redirected to $DST_IFINDEX" || exit 1 + remove_prog xmit +} + +function test_rewrite { + test_start "test_rewrite on lwt xmit" + install_test xmit test_rewrite + ping -c 3 $IPVETH1 || { + failure "Rewritten packets appear to be dropped" + } + match_trace "$(get_trace)" " +out: rewriting from 2fea8c0 to 3fea8c0 +out: rewriting from 2fea8c0 to 3fea8c0 +out: rewriting from 2fea8c0 to 3fea8c0" || exit 1 + remove_prog out +} + +function test_fill_garbage { + test_start "test_fill_garbage on lwt xmit" + install_test xmit fill_garbage + ping -c 3 $IPVETH1 && { + failure "test_drop_all ${1}: Unexpected success of ping" + } + match_trace "$(get_trace)" " +Set initial 96 bytes of header to FF +Set initial 96 bytes of header to FF +Set initial 96 bytes of header to FF" || exit 1 + remove_prog xmit +} + +function test_netperf_nop { + test_start "test_netperf_nop on lwt xmit" + install_test xmit nop + netperf -H $IPVETH1 -t TCP_STREAM || { + failure "packets appear to be dropped" + } + match_trace "$(get_trace)" ""|| exit 1 + remove_prog xmit +} + +function test_netperf_redirect { + test_start "test_netperf_redirect on lwt xmit" + install_test xmit push_ll_and_redirect_silent + netperf -H $IPVETH1 -t TCP_STREAM || { + failure "Rewritten packets appear to be dropped" + } + match_trace "$(get_trace)" ""|| exit 1 + remove_prog xmit +} + +cleanup +setup_one_veth $NS1 $VETH0 $VETH1 $IPVETH0 $IPVETH1 $IPVETH1b +setup_one_veth $NS2 $VETH2 $VETH3 $IPVETH2 $IPVETH3 +ip netns exec $NS1 netserver +echo 1 > ${TRACE_ROOT}/tracing_on + +DST_MAC=$(lookup_mac $VETH1 $NS1) +SRC_MAC=$(lookup_mac $VETH0) +DST_IFINDEX=$(cat /sys/class/net/$VETH0/ifindex) + +CLANG_OPTS="-O2 -target bpf -I ../include/" +CLANG_OPTS+=" -DSRC_MAC=$SRC_MAC -DDST_MAC=$DST_MAC -DDST_IFINDEX=$DST_IFINDEX" +clang $CLANG_OPTS -c test_lwt_bpf.c -o test_lwt_bpf.o + +test_ctx_xmit +test_ctx_out +test_ctx_in +test_data "xmit" +test_data "out" +test_data_in +test_cb "xmit" +test_cb "out" +test_cb_in +test_drop_all "xmit" +test_drop_all "out" +test_drop_all_in +test_rewrite +test_push_ll_and_redirect +test_no_l2_and_redirect +test_fill_garbage +test_netperf_nop +test_netperf_redirect + +cleanup +echo 0 > ${TRACE_ROOT}/tracing_on +exit 0 diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index ab5b19e68acf..3e225e331f66 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -4,8 +4,10 @@ #include <signal.h> #include <linux/bpf.h> #include <string.h> + #include "libbpf.h" #include "bpf_load.h" +#include "bpf_util.h" #define MAX_INDEX 64 #define MAX_STARS 38 @@ -36,8 +38,8 @@ struct hist_key { static void print_hist_for_pid(int fd, void *task) { + unsigned int nr_cpus = bpf_num_possible_cpus(); struct hist_key key = {}, next_key; - unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); long values[nr_cpus]; char starstr[MAX_STARS]; long value; diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c index 48716f7f0d8b..d0851cb4fa8d 100644 --- a/samples/bpf/tracex3_user.c +++ b/samples/bpf/tracex3_user.c @@ -11,8 +11,10 @@ #include <stdbool.h> #include <string.h> #include <linux/bpf.h> + #include "libbpf.h" #include "bpf_load.h" +#include "bpf_util.h" #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) @@ -20,7 +22,7 @@ static void clear_stats(int fd) { - unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + unsigned int nr_cpus = bpf_num_possible_cpus(); __u64 values[nr_cpus]; __u32 key; @@ -77,7 +79,7 @@ static void print_banner(void) static void print_hist(int fd) { - unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + unsigned int nr_cpus = bpf_num_possible_cpus(); __u64 total_events = 0; long values[nr_cpus]; __u64 max_cnt = 0; diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index a5e109e398a1..2b2150d6d6f7 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -15,7 +15,9 @@ #include <string.h> #include <sys/socket.h> #include <unistd.h> + #include "bpf_load.h" +#include "bpf_util.h" #include "libbpf.h" static int set_link_xdp_fd(int ifindex, int fd) @@ -120,7 +122,7 @@ static void int_exit(int sig) */ static void poll_stats(int interval) { - unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + unsigned int nr_cpus = bpf_num_possible_cpus(); const unsigned int nr_keys = 256; __u64 values[nr_cpus], prev[nr_keys][nr_cpus]; __u32 key; diff --git a/samples/mei/.gitignore b/samples/mei/.gitignore new file mode 100644 index 000000000000..f356b81ca1ec --- /dev/null +++ b/samples/mei/.gitignore @@ -0,0 +1 @@ +mei-amt-version diff --git a/samples/mei/Makefile b/samples/mei/Makefile new file mode 100644 index 000000000000..7aac216dc420 --- /dev/null +++ b/samples/mei/Makefile @@ -0,0 +1,9 @@ +CC := $(CROSS_COMPILE)gcc +CFLAGS := -I../../usr/include + +PROGS := mei-amt-version + +all: $(PROGS) + +clean: + rm -fr $(PROGS) diff --git a/samples/mei/TODO b/samples/mei/TODO new file mode 100644 index 000000000000..6b3625d3058c --- /dev/null +++ b/samples/mei/TODO @@ -0,0 +1,2 @@ +TODO: + - Cleanup and split the timer function diff --git a/samples/mei/mei-amt-version.c b/samples/mei/mei-amt-version.c new file mode 100644 index 000000000000..57d0d871dcf7 --- /dev/null +++ b/samples/mei/mei-amt-version.c @@ -0,0 +1,479 @@ +/****************************************************************************** + * Intel Management Engine Interface (Intel MEI) Linux driver + * Intel MEI Interface Header + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2012 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, + * USA + * + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation. + * linux-mei@linux.intel.com + * http://www.intel.com + * + * BSD LICENSE + * + * Copyright(c) 2003 - 2012 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *****************************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <unistd.h> +#include <errno.h> +#include <stdint.h> +#include <stdbool.h> +#include <bits/wordsize.h> +#include <linux/mei.h> + +/***************************************************************************** + * Intel Management Engine Interface + *****************************************************************************/ + +#define mei_msg(_me, fmt, ARGS...) do { \ + if (_me->verbose) \ + fprintf(stderr, fmt, ##ARGS); \ +} while (0) + +#define mei_err(_me, fmt, ARGS...) do { \ + fprintf(stderr, "Error: " fmt, ##ARGS); \ +} while (0) + +struct mei { + uuid_le guid; + bool initialized; + bool verbose; + unsigned int buf_size; + unsigned char prot_ver; + int fd; +}; + +static void mei_deinit(struct mei *cl) +{ + if (cl->fd != -1) + close(cl->fd); + cl->fd = -1; + cl->buf_size = 0; + cl->prot_ver = 0; + cl->initialized = false; +} + +static bool mei_init(struct mei *me, const uuid_le *guid, + unsigned char req_protocol_version, bool verbose) +{ + int result; + struct mei_client *cl; + struct mei_connect_client_data data; + + me->verbose = verbose; + + me->fd = open("/dev/mei", O_RDWR); + if (me->fd == -1) { + mei_err(me, "Cannot establish a handle to the Intel MEI driver\n"); + goto err; + } + memcpy(&me->guid, guid, sizeof(*guid)); + memset(&data, 0, sizeof(data)); + me->initialized = true; + + memcpy(&data.in_client_uuid, &me->guid, sizeof(me->guid)); + result = ioctl(me->fd, IOCTL_MEI_CONNECT_CLIENT, &data); + if (result) { + mei_err(me, "IOCTL_MEI_CONNECT_CLIENT receive message. err=%d\n", result); + goto err; + } + cl = &data.out_client_properties; + mei_msg(me, "max_message_length %d\n", cl->max_msg_length); + mei_msg(me, "protocol_version %d\n", cl->protocol_version); + + if ((req_protocol_version > 0) && + (cl->protocol_version != req_protocol_version)) { + mei_err(me, "Intel MEI protocol version not supported\n"); + goto err; + } + + me->buf_size = cl->max_msg_length; + me->prot_ver = cl->protocol_version; + + return true; +err: + mei_deinit(me); + return false; +} + +static ssize_t mei_recv_msg(struct mei *me, unsigned char *buffer, + ssize_t len, unsigned long timeout) +{ + ssize_t rc; + + mei_msg(me, "call read length = %zd\n", len); + + rc = read(me->fd, buffer, len); + if (rc < 0) { + mei_err(me, "read failed with status %zd %s\n", + rc, strerror(errno)); + mei_deinit(me); + } else { + mei_msg(me, "read succeeded with result %zd\n", rc); + } + return rc; +} + +static ssize_t mei_send_msg(struct mei *me, const unsigned char *buffer, + ssize_t len, unsigned long timeout) +{ + struct timeval tv; + ssize_t written; + ssize_t rc; + fd_set set; + + tv.tv_sec = timeout / 1000; + tv.tv_usec = (timeout % 1000) * 1000000; + + mei_msg(me, "call write length = %zd\n", len); + + written = write(me->fd, buffer, len); + if (written < 0) { + rc = -errno; + mei_err(me, "write failed with status %zd %s\n", + written, strerror(errno)); + goto out; + } + + FD_ZERO(&set); + FD_SET(me->fd, &set); + rc = select(me->fd + 1 , &set, NULL, NULL, &tv); + if (rc > 0 && FD_ISSET(me->fd, &set)) { + mei_msg(me, "write success\n"); + } else if (rc == 0) { + mei_err(me, "write failed on timeout with status\n"); + goto out; + } else { /* rc < 0 */ + mei_err(me, "write failed on select with status %zd\n", rc); + goto out; + } + + rc = written; +out: + if (rc < 0) + mei_deinit(me); + + return rc; +} + +/*************************************************************************** + * Intel Advanced Management Technology ME Client + ***************************************************************************/ + +#define AMT_MAJOR_VERSION 1 +#define AMT_MINOR_VERSION 1 + +#define AMT_STATUS_SUCCESS 0x0 +#define AMT_STATUS_INTERNAL_ERROR 0x1 +#define AMT_STATUS_NOT_READY 0x2 +#define AMT_STATUS_INVALID_AMT_MODE 0x3 +#define AMT_STATUS_INVALID_MESSAGE_LENGTH 0x4 + +#define AMT_STATUS_HOST_IF_EMPTY_RESPONSE 0x4000 +#define AMT_STATUS_SDK_RESOURCES 0x1004 + + +#define AMT_BIOS_VERSION_LEN 65 +#define AMT_VERSIONS_NUMBER 50 +#define AMT_UNICODE_STRING_LEN 20 + +struct amt_unicode_string { + uint16_t length; + char string[AMT_UNICODE_STRING_LEN]; +} __attribute__((packed)); + +struct amt_version_type { + struct amt_unicode_string description; + struct amt_unicode_string version; +} __attribute__((packed)); + +struct amt_version { + uint8_t major; + uint8_t minor; +} __attribute__((packed)); + +struct amt_code_versions { + uint8_t bios[AMT_BIOS_VERSION_LEN]; + uint32_t count; + struct amt_version_type versions[AMT_VERSIONS_NUMBER]; +} __attribute__((packed)); + +/*************************************************************************** + * Intel Advanced Management Technology Host Interface + ***************************************************************************/ + +struct amt_host_if_msg_header { + struct amt_version version; + uint16_t _reserved; + uint32_t command; + uint32_t length; +} __attribute__((packed)); + +struct amt_host_if_resp_header { + struct amt_host_if_msg_header header; + uint32_t status; + unsigned char data[0]; +} __attribute__((packed)); + +const uuid_le MEI_IAMTHIF = UUID_LE(0x12f80028, 0xb4b7, 0x4b2d, \ + 0xac, 0xa8, 0x46, 0xe0, 0xff, 0x65, 0x81, 0x4c); + +#define AMT_HOST_IF_CODE_VERSIONS_REQUEST 0x0400001A +#define AMT_HOST_IF_CODE_VERSIONS_RESPONSE 0x0480001A + +const struct amt_host_if_msg_header CODE_VERSION_REQ = { + .version = {AMT_MAJOR_VERSION, AMT_MINOR_VERSION}, + ._reserved = 0, + .command = AMT_HOST_IF_CODE_VERSIONS_REQUEST, + .length = 0 +}; + + +struct amt_host_if { + struct mei mei_cl; + unsigned long send_timeout; + bool initialized; +}; + + +static bool amt_host_if_init(struct amt_host_if *acmd, + unsigned long send_timeout, bool verbose) +{ + acmd->send_timeout = (send_timeout) ? send_timeout : 20000; + acmd->initialized = mei_init(&acmd->mei_cl, &MEI_IAMTHIF, 0, verbose); + return acmd->initialized; +} + +static void amt_host_if_deinit(struct amt_host_if *acmd) +{ + mei_deinit(&acmd->mei_cl); + acmd->initialized = false; +} + +static uint32_t amt_verify_code_versions(const struct amt_host_if_resp_header *resp) +{ + uint32_t status = AMT_STATUS_SUCCESS; + struct amt_code_versions *code_ver; + size_t code_ver_len; + uint32_t ver_type_cnt; + uint32_t len; + uint32_t i; + + code_ver = (struct amt_code_versions *)resp->data; + /* length - sizeof(status) */ + code_ver_len = resp->header.length - sizeof(uint32_t); + ver_type_cnt = code_ver_len - + sizeof(code_ver->bios) - + sizeof(code_ver->count); + if (code_ver->count != ver_type_cnt / sizeof(struct amt_version_type)) { + status = AMT_STATUS_INTERNAL_ERROR; + goto out; + } + + for (i = 0; i < code_ver->count; i++) { + len = code_ver->versions[i].description.length; + + if (len > AMT_UNICODE_STRING_LEN) { + status = AMT_STATUS_INTERNAL_ERROR; + goto out; + } + + len = code_ver->versions[i].version.length; + if (code_ver->versions[i].version.string[len] != '\0' || + len != strlen(code_ver->versions[i].version.string)) { + status = AMT_STATUS_INTERNAL_ERROR; + goto out; + } + } +out: + return status; +} + +static uint32_t amt_verify_response_header(uint32_t command, + const struct amt_host_if_msg_header *resp_hdr, + uint32_t response_size) +{ + if (response_size < sizeof(struct amt_host_if_resp_header)) { + return AMT_STATUS_INTERNAL_ERROR; + } else if (response_size != (resp_hdr->length + + sizeof(struct amt_host_if_msg_header))) { + return AMT_STATUS_INTERNAL_ERROR; + } else if (resp_hdr->command != command) { + return AMT_STATUS_INTERNAL_ERROR; + } else if (resp_hdr->_reserved != 0) { + return AMT_STATUS_INTERNAL_ERROR; + } else if (resp_hdr->version.major != AMT_MAJOR_VERSION || + resp_hdr->version.minor < AMT_MINOR_VERSION) { + return AMT_STATUS_INTERNAL_ERROR; + } + return AMT_STATUS_SUCCESS; +} + +static uint32_t amt_host_if_call(struct amt_host_if *acmd, + const unsigned char *command, ssize_t command_sz, + uint8_t **read_buf, uint32_t rcmd, + unsigned int expected_sz) +{ + uint32_t in_buf_sz; + uint32_t out_buf_sz; + ssize_t written; + uint32_t status; + struct amt_host_if_resp_header *msg_hdr; + + in_buf_sz = acmd->mei_cl.buf_size; + *read_buf = (uint8_t *)malloc(sizeof(uint8_t) * in_buf_sz); + if (*read_buf == NULL) + return AMT_STATUS_SDK_RESOURCES; + memset(*read_buf, 0, in_buf_sz); + msg_hdr = (struct amt_host_if_resp_header *)*read_buf; + + written = mei_send_msg(&acmd->mei_cl, + command, command_sz, acmd->send_timeout); + if (written != command_sz) + return AMT_STATUS_INTERNAL_ERROR; + + out_buf_sz = mei_recv_msg(&acmd->mei_cl, *read_buf, in_buf_sz, 2000); + if (out_buf_sz <= 0) + return AMT_STATUS_HOST_IF_EMPTY_RESPONSE; + + status = msg_hdr->status; + if (status != AMT_STATUS_SUCCESS) + return status; + + status = amt_verify_response_header(rcmd, + &msg_hdr->header, out_buf_sz); + if (status != AMT_STATUS_SUCCESS) + return status; + + if (expected_sz && expected_sz != out_buf_sz) + return AMT_STATUS_INTERNAL_ERROR; + + return AMT_STATUS_SUCCESS; +} + + +static uint32_t amt_get_code_versions(struct amt_host_if *cmd, + struct amt_code_versions *versions) +{ + struct amt_host_if_resp_header *response = NULL; + uint32_t status; + + status = amt_host_if_call(cmd, + (const unsigned char *)&CODE_VERSION_REQ, + sizeof(CODE_VERSION_REQ), + (uint8_t **)&response, + AMT_HOST_IF_CODE_VERSIONS_RESPONSE, 0); + + if (status != AMT_STATUS_SUCCESS) + goto out; + + status = amt_verify_code_versions(response); + if (status != AMT_STATUS_SUCCESS) + goto out; + + memcpy(versions, response->data, sizeof(struct amt_code_versions)); +out: + if (response != NULL) + free(response); + + return status; +} + +/************************** end of amt_host_if_command ***********************/ +int main(int argc, char **argv) +{ + struct amt_code_versions ver; + struct amt_host_if acmd; + unsigned int i; + uint32_t status; + int ret; + bool verbose; + + verbose = (argc > 1 && strcmp(argv[1], "-v") == 0); + + if (!amt_host_if_init(&acmd, 5000, verbose)) { + ret = 1; + goto out; + } + + status = amt_get_code_versions(&acmd, &ver); + + amt_host_if_deinit(&acmd); + + switch (status) { + case AMT_STATUS_HOST_IF_EMPTY_RESPONSE: + printf("Intel AMT: DISABLED\n"); + ret = 0; + break; + case AMT_STATUS_SUCCESS: + printf("Intel AMT: ENABLED\n"); + for (i = 0; i < ver.count; i++) { + printf("%s:\t%s\n", ver.versions[i].description.string, + ver.versions[i].version.string); + } + ret = 0; + break; + default: + printf("An error has occurred\n"); + ret = 1; + break; + } + +out: + return ret; +} diff --git a/samples/mic/mpssd/.gitignore b/samples/mic/mpssd/.gitignore new file mode 100644 index 000000000000..8b7c72f07c92 --- /dev/null +++ b/samples/mic/mpssd/.gitignore @@ -0,0 +1 @@ +mpssd diff --git a/samples/mic/mpssd/Makefile b/samples/mic/mpssd/Makefile new file mode 100644 index 000000000000..3e3ef91fed6b --- /dev/null +++ b/samples/mic/mpssd/Makefile @@ -0,0 +1,27 @@ +ifndef CROSS_COMPILE +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) + +ifeq ($(ARCH),x86) + +PROGS := mpssd +CC = $(CROSS_COMPILE)gcc +CFLAGS := -I../../../usr/include -I../../../tools/include + +ifdef DEBUG +CFLAGS += -DDEBUG=$(DEBUG) +endif + +all: $(PROGS) +mpssd: mpssd.c sysfs.c + $(CC) $(CFLAGS) mpssd.c sysfs.c -o mpssd -lpthread + +install: + install mpssd /usr/sbin/mpssd + install micctrl /usr/sbin/micctrl + +clean: + rm -fr $(PROGS) + +endif +endif diff --git a/samples/mic/mpssd/micctrl b/samples/mic/mpssd/micctrl new file mode 100755 index 000000000000..8f2629b41c5f --- /dev/null +++ b/samples/mic/mpssd/micctrl @@ -0,0 +1,173 @@ +#!/bin/bash +# Intel MIC Platform Software Stack (MPSS) +# +# Copyright(c) 2013 Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# The full GNU General Public License is included in this distribution in +# the file called "COPYING". +# +# Intel MIC User Space Tools. +# +# micctrl - Controls MIC boot/start/stop. +# +# chkconfig: 2345 95 05 +# description: start MPSS stack processing. +# +### BEGIN INIT INFO +# Provides: micctrl +### END INIT INFO + +# Source function library. +. /etc/init.d/functions + +sysfs="/sys/class/mic" + +_status() +{ + f=$sysfs/$1 + echo -e $1 state: "`cat $f/state`" shutdown_status: "`cat $f/shutdown_status`" +} + +status() +{ + if [ "`echo $1 | head -c3`" == "mic" ]; then + _status $1 + return $? + fi + for f in $sysfs/* + do + _status `basename $f` + RETVAL=$? + [ $RETVAL -ne 0 ] && return $RETVAL + done + return 0 +} + +_reset() +{ + f=$sysfs/$1 + echo reset > $f/state +} + +reset() +{ + if [ "`echo $1 | head -c3`" == "mic" ]; then + _reset $1 + return $? + fi + for f in $sysfs/* + do + _reset `basename $f` + RETVAL=$? + [ $RETVAL -ne 0 ] && return $RETVAL + done + return 0 +} + +_boot() +{ + f=$sysfs/$1 + echo "linux" > $f/bootmode + echo "mic/uos.img" > $f/firmware + echo "mic/$1.image" > $f/ramdisk + echo "boot" > $f/state +} + +boot() +{ + if [ "`echo $1 | head -c3`" == "mic" ]; then + _boot $1 + return $? + fi + for f in $sysfs/* + do + _boot `basename $f` + RETVAL=$? + [ $RETVAL -ne 0 ] && return $RETVAL + done + return 0 +} + +_shutdown() +{ + f=$sysfs/$1 + echo shutdown > $f/state +} + +shutdown() +{ + if [ "`echo $1 | head -c3`" == "mic" ]; then + _shutdown $1 + return $? + fi + for f in $sysfs/* + do + _shutdown `basename $f` + RETVAL=$? + [ $RETVAL -ne 0 ] && return $RETVAL + done + return 0 +} + +_wait() +{ + f=$sysfs/$1 + while [ "`cat $f/state`" != "offline" -a "`cat $f/state`" != "online" ] + do + sleep 1 + echo -e "Waiting for $1 to go offline" + done +} + +wait() +{ + if [ "`echo $1 | head -c3`" == "mic" ]; then + _wait $1 + return $? + fi + # Wait for the cards to go offline + for f in $sysfs/* + do + _wait `basename $f` + RETVAL=$? + [ $RETVAL -ne 0 ] && return $RETVAL + done + return 0 +} + +if [ ! -d "$sysfs" ]; then + echo -e $"Module unloaded " + exit 3 +fi + +case $1 in + -s) + status $2 + ;; + -r) + reset $2 + ;; + -b) + boot $2 + ;; + -S) + shutdown $2 + ;; + -w) + wait $2 + ;; + *) + echo $"Usage: $0 {-s (status) |-r (reset) |-b (boot) |-S (shutdown) |-w (wait)}" + exit 2 +esac + +exit $? diff --git a/samples/mic/mpssd/mpss b/samples/mic/mpssd/mpss new file mode 100755 index 000000000000..5fcf9fa4b082 --- /dev/null +++ b/samples/mic/mpssd/mpss @@ -0,0 +1,200 @@ +#!/bin/bash +# Intel MIC Platform Software Stack (MPSS) +# +# Copyright(c) 2013 Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# The full GNU General Public License is included in this distribution in +# the file called "COPYING". +# +# Intel MIC User Space Tools. +# +# mpss Start mpssd. +# +# chkconfig: 2345 95 05 +# description: start MPSS stack processing. +# +### BEGIN INIT INFO +# Provides: mpss +# Required-Start: +# Required-Stop: +# Short-Description: MPSS stack control +# Description: MPSS stack control +### END INIT INFO + +# Source function library. +. /etc/init.d/functions + +exec=/usr/sbin/mpssd +sysfs="/sys/class/mic" +mic_modules="mic_host mic_x100_dma scif vop" + +start() +{ + [ -x $exec ] || exit 5 + + if [ "`ps -e | awk '{print $4}' | grep mpssd | head -1`" = "mpssd" ]; then + echo -e $"MPSSD already running! " + success + echo + return 0 + fi + + echo -e $"Starting MPSS Stack" + echo -e $"Loading MIC drivers:" $mic_modules + + modprobe -a $mic_modules + RETVAL=$? + if [ $RETVAL -ne 0 ]; then + failure + echo + return $RETVAL + fi + + # Start the daemon + echo -n $"Starting MPSSD " + $exec + RETVAL=$? + if [ $RETVAL -ne 0 ]; then + failure + echo + return $RETVAL + fi + success + echo + + sleep 5 + + # Boot the cards + micctrl -b + + # Wait till ping works + for f in $sysfs/* + do + count=100 + ipaddr=`cat $f/cmdline` + ipaddr=${ipaddr#*address,} + ipaddr=`echo $ipaddr | cut -d, -f1 | cut -d\; -f1` + while [ $count -ge 0 ] + do + echo -e "Pinging "`basename $f`" " + ping -c 1 $ipaddr &> /dev/null + RETVAL=$? + if [ $RETVAL -eq 0 ]; then + success + break + fi + sleep 1 + count=`expr $count - 1` + done + [ $RETVAL -ne 0 ] && failure || success + echo + done + return $RETVAL +} + +stop() +{ + echo -e $"Shutting down MPSS Stack: " + + # Bail out if module is unloaded + if [ ! -d "$sysfs" ]; then + echo -n $"Module unloaded " + success + echo + return 0 + fi + + # Shut down the cards. + micctrl -S + + # Wait for the cards to go offline + for f in $sysfs/* + do + while [ "`cat $f/state`" != "ready" ] + do + sleep 1 + echo -e "Waiting for "`basename $f`" to become ready" + done + done + + # Display the status of the cards + micctrl -s + + # Kill MPSSD now + echo -n $"Killing MPSSD" + killall -9 mpssd 2>/dev/null + RETVAL=$? + [ $RETVAL -ne 0 ] && failure || success + echo + return $RETVAL +} + +restart() +{ + stop + sleep 5 + start +} + +status() +{ + micctrl -s + if [ "`ps -e | awk '{print $4}' | grep mpssd | head -n 1`" = "mpssd" ]; then + echo "mpssd is running" + else + echo "mpssd is stopped" + fi + return 0 +} + +unload() +{ + if [ ! -d "$sysfs" ]; then + echo -n $"No MIC_HOST Module: " + success + echo + return + fi + + stop + + sleep 5 + echo -n $"Removing MIC drivers:" $mic_modules + modprobe -r $mic_modules + RETVAL=$? + [ $RETVAL -ne 0 ] && failure || success + echo + return $RETVAL +} + +case $1 in + start) + start + ;; + stop) + stop + ;; + restart) + restart + ;; + status) + status + ;; + unload) + unload + ;; + *) + echo $"Usage: $0 {start|stop|restart|status|unload}" + exit 2 +esac + +exit $? diff --git a/samples/mic/mpssd/mpssd.c b/samples/mic/mpssd/mpssd.c new file mode 100644 index 000000000000..49db1def1721 --- /dev/null +++ b/samples/mic/mpssd/mpssd.c @@ -0,0 +1,1826 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC User Space Tools. + */ + +#define _GNU_SOURCE + +#include <stdlib.h> +#include <fcntl.h> +#include <getopt.h> +#include <assert.h> +#include <unistd.h> +#include <stdbool.h> +#include <signal.h> +#include <poll.h> +#include <features.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <linux/virtio_ring.h> +#include <linux/virtio_net.h> +#include <linux/virtio_console.h> +#include <linux/virtio_blk.h> +#include <linux/version.h> +#include "mpssd.h" +#include <linux/mic_ioctl.h> +#include <linux/mic_common.h> +#include <tools/endian.h> + +static void *init_mic(void *arg); + +static FILE *logfp; +static struct mic_info mic_list; + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define min_t(type, x, y) ({ \ + type __min1 = (x); \ + type __min2 = (y); \ + __min1 < __min2 ? __min1 : __min2; }) + +/* align addr on a size boundary - adjust address up/down if needed */ +#define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1))) +#define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size) + +/* align addr on a size boundary - adjust address up if needed */ +#define _ALIGN(addr, size) _ALIGN_UP(addr, size) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) + +#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) + +#define GSO_ENABLED 1 +#define MAX_GSO_SIZE (64 * 1024) +#define ETH_H_LEN 14 +#define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64)) +#define MIC_DEVICE_PAGE_END 0x1000 + +#ifndef VIRTIO_NET_HDR_F_DATA_VALID +#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ +#endif + +static struct { + struct mic_device_desc dd; + struct mic_vqconfig vqconfig[2]; + __u32 host_features, guest_acknowledgements; + struct virtio_console_config cons_config; +} virtcons_dev_page = { + .dd = { + .type = VIRTIO_ID_CONSOLE, + .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig), + .feature_len = sizeof(virtcons_dev_page.host_features), + .config_len = sizeof(virtcons_dev_page.cons_config), + }, + .vqconfig[0] = { + .num = htole16(MIC_VRING_ENTRIES), + }, + .vqconfig[1] = { + .num = htole16(MIC_VRING_ENTRIES), + }, +}; + +static struct { + struct mic_device_desc dd; + struct mic_vqconfig vqconfig[2]; + __u32 host_features, guest_acknowledgements; + struct virtio_net_config net_config; +} virtnet_dev_page = { + .dd = { + .type = VIRTIO_ID_NET, + .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig), + .feature_len = sizeof(virtnet_dev_page.host_features), + .config_len = sizeof(virtnet_dev_page.net_config), + }, + .vqconfig[0] = { + .num = htole16(MIC_VRING_ENTRIES), + }, + .vqconfig[1] = { + .num = htole16(MIC_VRING_ENTRIES), + }, +#if GSO_ENABLED + .host_features = htole32( + 1 << VIRTIO_NET_F_CSUM | + 1 << VIRTIO_NET_F_GSO | + 1 << VIRTIO_NET_F_GUEST_TSO4 | + 1 << VIRTIO_NET_F_GUEST_TSO6 | + 1 << VIRTIO_NET_F_GUEST_ECN), +#else + .host_features = 0, +#endif +}; + +static const char *mic_config_dir = "/etc/mpss"; +static const char *virtblk_backend = "VIRTBLK_BACKEND"; +static struct { + struct mic_device_desc dd; + struct mic_vqconfig vqconfig[1]; + __u32 host_features, guest_acknowledgements; + struct virtio_blk_config blk_config; +} virtblk_dev_page = { + .dd = { + .type = VIRTIO_ID_BLOCK, + .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig), + .feature_len = sizeof(virtblk_dev_page.host_features), + .config_len = sizeof(virtblk_dev_page.blk_config), + }, + .vqconfig[0] = { + .num = htole16(MIC_VRING_ENTRIES), + }, + .host_features = + htole32(1<<VIRTIO_BLK_F_SEG_MAX), + .blk_config = { + .seg_max = htole32(MIC_VRING_ENTRIES - 2), + .capacity = htole64(0), + } +}; + +static char *myname; + +static int +tap_configure(struct mic_info *mic, char *dev) +{ + pid_t pid; + char *ifargv[7]; + char ipaddr[IFNAMSIZ]; + int ret = 0; + + pid = fork(); + if (pid == 0) { + ifargv[0] = "ip"; + ifargv[1] = "link"; + ifargv[2] = "set"; + ifargv[3] = dev; + ifargv[4] = "up"; + ifargv[5] = NULL; + mpsslog("Configuring %s\n", dev); + ret = execvp("ip", ifargv); + if (ret < 0) { + mpsslog("%s execvp failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + } + if (pid < 0) { + mpsslog("%s fork failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + + ret = waitpid(pid, NULL, 0); + if (ret < 0) { + mpsslog("%s waitpid failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + + snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1); + + pid = fork(); + if (pid == 0) { + ifargv[0] = "ip"; + ifargv[1] = "addr"; + ifargv[2] = "add"; + ifargv[3] = ipaddr; + ifargv[4] = "dev"; + ifargv[5] = dev; + ifargv[6] = NULL; + mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr); + ret = execvp("ip", ifargv); + if (ret < 0) { + mpsslog("%s execvp failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + } + if (pid < 0) { + mpsslog("%s fork failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + + ret = waitpid(pid, NULL, 0); + if (ret < 0) { + mpsslog("%s waitpid failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + mpsslog("MIC name %s %s %d DONE!\n", + mic->name, __func__, __LINE__); + return 0; +} + +static int tun_alloc(struct mic_info *mic, char *dev) +{ + struct ifreq ifr; + int fd, err; +#if GSO_ENABLED + unsigned offload; +#endif + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) { + mpsslog("Could not open /dev/net/tun %s\n", strerror(errno)); + goto done; + } + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; + if (*dev) + strncpy(ifr.ifr_name, dev, IFNAMSIZ); + + err = ioctl(fd, TUNSETIFF, (void *)&ifr); + if (err < 0) { + mpsslog("%s %s %d TUNSETIFF failed %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + close(fd); + return err; + } +#if GSO_ENABLED + offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN; + + err = ioctl(fd, TUNSETOFFLOAD, offload); + if (err < 0) { + mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + close(fd); + return err; + } +#endif + strcpy(dev, ifr.ifr_name); + mpsslog("Created TAP %s\n", dev); +done: + return fd; +} + +#define NET_FD_VIRTIO_NET 0 +#define NET_FD_TUN 1 +#define MAX_NET_FD 2 + +static void set_dp(struct mic_info *mic, int type, void *dp) +{ + switch (type) { + case VIRTIO_ID_CONSOLE: + mic->mic_console.console_dp = dp; + return; + case VIRTIO_ID_NET: + mic->mic_net.net_dp = dp; + return; + case VIRTIO_ID_BLOCK: + mic->mic_virtblk.block_dp = dp; + return; + } + mpsslog("%s %s %d not found\n", mic->name, __func__, type); + assert(0); +} + +static void *get_dp(struct mic_info *mic, int type) +{ + switch (type) { + case VIRTIO_ID_CONSOLE: + return mic->mic_console.console_dp; + case VIRTIO_ID_NET: + return mic->mic_net.net_dp; + case VIRTIO_ID_BLOCK: + return mic->mic_virtblk.block_dp; + } + mpsslog("%s %s %d not found\n", mic->name, __func__, type); + assert(0); + return NULL; +} + +static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type) +{ + struct mic_device_desc *d; + int i; + void *dp = get_dp(mic, type); + + for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE; + i += mic_total_desc_size(d)) { + d = dp + i; + + /* End of list */ + if (d->type == 0) + break; + + if (d->type == -1) + continue; + + mpsslog("%s %s d-> type %d d %p\n", + mic->name, __func__, d->type, d); + + if (d->type == (__u8)type) + return d; + } + mpsslog("%s %s %d not found\n", mic->name, __func__, type); + return NULL; +} + +/* See comments in vhost.c for explanation of next_desc() */ +static unsigned next_desc(struct vring_desc *desc) +{ + unsigned int next; + + if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) + return -1U; + next = le16toh(desc->next); + return next; +} + +/* Sum up all the IOVEC length */ +static ssize_t +sum_iovec_len(struct mic_copy_desc *copy) +{ + ssize_t sum = 0; + unsigned int i; + + for (i = 0; i < copy->iovcnt; i++) + sum += copy->iov[i].iov_len; + return sum; +} + +static inline void verify_out_len(struct mic_info *mic, + struct mic_copy_desc *copy) +{ + if (copy->out_len != sum_iovec_len(copy)) { + mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n", + mic->name, __func__, __LINE__, + copy->out_len, sum_iovec_len(copy)); + assert(copy->out_len == sum_iovec_len(copy)); + } +} + +/* Display an iovec */ +static void +disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy, + const char *s, int line) +{ + unsigned int i; + + for (i = 0; i < copy->iovcnt; i++) + mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n", + mic->name, s, line, i, + copy->iov[i].iov_base, copy->iov[i].iov_len); +} + +static inline __u16 read_avail_idx(struct mic_vring *vr) +{ + return ACCESS_ONCE(vr->info->avail_idx); +} + +static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr, + struct mic_copy_desc *copy, ssize_t len) +{ + copy->vr_idx = tx ? 0 : 1; + copy->update_used = true; + if (type == VIRTIO_ID_NET) + copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr); + else + copy->iov[0].iov_len = len; +} + +/* Central API which triggers the copies */ +static int +mic_virtio_copy(struct mic_info *mic, int fd, + struct mic_vring *vr, struct mic_copy_desc *copy) +{ + int ret; + + ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy); + if (ret) { + mpsslog("%s %s %d errno %s ret %d\n", + mic->name, __func__, __LINE__, + strerror(errno), ret); + } + return ret; +} + +static inline unsigned _vring_size(unsigned int num, unsigned long align) +{ + return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num) + + align - 1) & ~(align - 1)) + + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num; +} + +/* + * This initialization routine requires at least one + * vring i.e. vr0. vr1 is optional. + */ +static void * +init_vr(struct mic_info *mic, int fd, int type, + struct mic_vring *vr0, struct mic_vring *vr1, int num_vq) +{ + int vr_size; + char *va; + + vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES, + MIC_VIRTIO_RING_ALIGN) + + sizeof(struct _mic_vring_info)); + va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq, + PROT_READ, MAP_SHARED, fd, 0); + if (MAP_FAILED == va) { + mpsslog("%s %s %d mmap failed errno %s\n", + mic->name, __func__, __LINE__, + strerror(errno)); + goto done; + } + set_dp(mic, type, va); + vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END]; + vr0->info = vr0->va + + _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN); + vring_init(&vr0->vr, + MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN); + mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ", + __func__, mic->name, vr0->va, vr0->info, vr_size, + _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); + mpsslog("magic 0x%x expected 0x%x\n", + le32toh(vr0->info->magic), MIC_MAGIC + type); + assert(le32toh(vr0->info->magic) == MIC_MAGIC + type); + if (vr1) { + vr1->va = (struct mic_vring *) + &va[MIC_DEVICE_PAGE_END + vr_size]; + vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES, + MIC_VIRTIO_RING_ALIGN); + vring_init(&vr1->vr, + MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN); + mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ", + __func__, mic->name, vr1->va, vr1->info, vr_size, + _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); + mpsslog("magic 0x%x expected 0x%x\n", + le32toh(vr1->info->magic), MIC_MAGIC + type + 1); + assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1); + } +done: + return va; +} + +static int +wait_for_card_driver(struct mic_info *mic, int fd, int type) +{ + struct pollfd pollfd; + int err; + struct mic_device_desc *desc = get_device_desc(mic, type); + __u8 prev_status; + + if (!desc) + return -ENODEV; + prev_status = desc->status; + pollfd.fd = fd; + mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n", + mic->name, __func__, type, desc->status); + + while (1) { + pollfd.events = POLLIN; + pollfd.revents = 0; + err = poll(&pollfd, 1, -1); + if (err < 0) { + mpsslog("%s %s poll failed %s\n", + mic->name, __func__, strerror(errno)); + continue; + } + + if (pollfd.revents) { + if (desc->status != prev_status) { + mpsslog("%s %s Waiting... desc-> type %d " + "status 0x%x\n", + mic->name, __func__, type, + desc->status); + prev_status = desc->status; + } + if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { + mpsslog("%s %s poll.revents %d\n", + mic->name, __func__, pollfd.revents); + mpsslog("%s %s desc-> type %d status 0x%x\n", + mic->name, __func__, type, + desc->status); + break; + } + } + } + return 0; +} + +/* Spin till we have some descriptors */ +static void +spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr) +{ + __u16 avail_idx = read_avail_idx(vr); + + while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) { +#ifdef DEBUG + mpsslog("%s %s waiting for desc avail %d info_avail %d\n", + mic->name, __func__, + le16toh(vr->vr.avail->idx), vr->info->avail_idx); +#endif + sched_yield(); + } +} + +static void * +virtio_net(void *arg) +{ + static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)]; + static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64))); + struct iovec vnet_iov[2][2] = { + { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) }, + { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } }, + { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) }, + { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } }, + }; + struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1]; + struct mic_info *mic = (struct mic_info *)arg; + char if_name[IFNAMSIZ]; + struct pollfd net_poll[MAX_NET_FD]; + struct mic_vring tx_vr, rx_vr; + struct mic_copy_desc copy; + struct mic_device_desc *desc; + int err; + + snprintf(if_name, IFNAMSIZ, "mic%d", mic->id); + mic->mic_net.tap_fd = tun_alloc(mic, if_name); + if (mic->mic_net.tap_fd < 0) + goto done; + + if (tap_configure(mic, if_name)) + goto done; + mpsslog("MIC name %s id %d\n", mic->name, mic->id); + + net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd; + net_poll[NET_FD_VIRTIO_NET].events = POLLIN; + net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd; + net_poll[NET_FD_TUN].events = POLLIN; + + if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd, + VIRTIO_ID_NET, &tx_vr, &rx_vr, + virtnet_dev_page.dd.num_vq)) { + mpsslog("%s init_vr failed %s\n", + mic->name, strerror(errno)); + goto done; + } + + copy.iovcnt = 2; + desc = get_device_desc(mic, VIRTIO_ID_NET); + + while (1) { + ssize_t len; + + net_poll[NET_FD_VIRTIO_NET].revents = 0; + net_poll[NET_FD_TUN].revents = 0; + + /* Start polling for data from tap and virtio net */ + err = poll(net_poll, 2, -1); + if (err < 0) { + mpsslog("%s poll failed %s\n", + __func__, strerror(errno)); + continue; + } + if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) { + err = wait_for_card_driver(mic, + mic->mic_net.virtio_net_fd, + VIRTIO_ID_NET); + if (err) { + mpsslog("%s %s %d Exiting...\n", + mic->name, __func__, __LINE__); + break; + } + } + /* + * Check if there is data to be read from TUN and write to + * virtio net fd if there is. + */ + if (net_poll[NET_FD_TUN].revents & POLLIN) { + copy.iov = iov0; + len = readv(net_poll[NET_FD_TUN].fd, + copy.iov, copy.iovcnt); + if (len > 0) { + struct virtio_net_hdr *hdr + = (struct virtio_net_hdr *)vnet_hdr[0]; + + /* Disable checksums on the card since we are on + a reliable PCIe link */ + hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID; +#ifdef DEBUG + mpsslog("%s %s %d hdr->flags 0x%x ", mic->name, + __func__, __LINE__, hdr->flags); + mpsslog("copy.out_len %d hdr->gso_type 0x%x\n", + copy.out_len, hdr->gso_type); +#endif +#ifdef DEBUG + disp_iovec(mic, copy, __func__, __LINE__); + mpsslog("%s %s %d read from tap 0x%lx\n", + mic->name, __func__, __LINE__, + len); +#endif + spin_for_descriptors(mic, &tx_vr); + txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, ©, + len); + + err = mic_virtio_copy(mic, + mic->mic_net.virtio_net_fd, &tx_vr, + ©); + if (err < 0) { + mpsslog("%s %s %d mic_virtio_copy %s\n", + mic->name, __func__, __LINE__, + strerror(errno)); + } + if (!err) + verify_out_len(mic, ©); +#ifdef DEBUG + disp_iovec(mic, copy, __func__, __LINE__); + mpsslog("%s %s %d wrote to net 0x%lx\n", + mic->name, __func__, __LINE__, + sum_iovec_len(©)); +#endif + /* Reinitialize IOV for next run */ + iov0[1].iov_len = MAX_NET_PKT_SIZE; + } else if (len < 0) { + disp_iovec(mic, ©, __func__, __LINE__); + mpsslog("%s %s %d read failed %s ", mic->name, + __func__, __LINE__, strerror(errno)); + mpsslog("cnt %d sum %zd\n", + copy.iovcnt, sum_iovec_len(©)); + } + } + + /* + * Check if there is data to be read from virtio net and + * write to TUN if there is. + */ + if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) { + while (rx_vr.info->avail_idx != + le16toh(rx_vr.vr.avail->idx)) { + copy.iov = iov1; + txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, ©, + MAX_NET_PKT_SIZE + + sizeof(struct virtio_net_hdr)); + + err = mic_virtio_copy(mic, + mic->mic_net.virtio_net_fd, &rx_vr, + ©); + if (!err) { +#ifdef DEBUG + struct virtio_net_hdr *hdr + = (struct virtio_net_hdr *) + vnet_hdr[1]; + + mpsslog("%s %s %d hdr->flags 0x%x, ", + mic->name, __func__, __LINE__, + hdr->flags); + mpsslog("out_len %d gso_type 0x%x\n", + copy.out_len, + hdr->gso_type); +#endif + /* Set the correct output iov_len */ + iov1[1].iov_len = copy.out_len - + sizeof(struct virtio_net_hdr); + verify_out_len(mic, ©); +#ifdef DEBUG + disp_iovec(mic, copy, __func__, + __LINE__); + mpsslog("%s %s %d ", + mic->name, __func__, __LINE__); + mpsslog("read from net 0x%lx\n", + sum_iovec_len(copy)); +#endif + len = writev(net_poll[NET_FD_TUN].fd, + copy.iov, copy.iovcnt); + if (len != sum_iovec_len(©)) { + mpsslog("Tun write failed %s ", + strerror(errno)); + mpsslog("len 0x%zx ", len); + mpsslog("read_len 0x%zx\n", + sum_iovec_len(©)); + } else { +#ifdef DEBUG + disp_iovec(mic, ©, __func__, + __LINE__); + mpsslog("%s %s %d ", + mic->name, __func__, + __LINE__); + mpsslog("wrote to tap 0x%lx\n", + len); +#endif + } + } else { + mpsslog("%s %s %d mic_virtio_copy %s\n", + mic->name, __func__, __LINE__, + strerror(errno)); + break; + } + } + } + if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR) + mpsslog("%s: %s: POLLERR\n", __func__, mic->name); + } +done: + pthread_exit(NULL); +} + +/* virtio_console */ +#define VIRTIO_CONSOLE_FD 0 +#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1) +#define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */ +#define MAX_BUFFER_SIZE PAGE_SIZE + +static void * +virtio_console(void *arg) +{ + static __u8 vcons_buf[2][PAGE_SIZE]; + struct iovec vcons_iov[2] = { + { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) }, + { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) }, + }; + struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1]; + struct mic_info *mic = (struct mic_info *)arg; + int err; + struct pollfd console_poll[MAX_CONSOLE_FD]; + int pty_fd; + char *pts_name; + ssize_t len; + struct mic_vring tx_vr, rx_vr; + struct mic_copy_desc copy; + struct mic_device_desc *desc; + + pty_fd = posix_openpt(O_RDWR); + if (pty_fd < 0) { + mpsslog("can't open a pseudoterminal master device: %s\n", + strerror(errno)); + goto _return; + } + pts_name = ptsname(pty_fd); + if (pts_name == NULL) { + mpsslog("can't get pts name\n"); + goto _close_pty; + } + printf("%s console message goes to %s\n", mic->name, pts_name); + mpsslog("%s console message goes to %s\n", mic->name, pts_name); + err = grantpt(pty_fd); + if (err < 0) { + mpsslog("can't grant access: %s %s\n", + pts_name, strerror(errno)); + goto _close_pty; + } + err = unlockpt(pty_fd); + if (err < 0) { + mpsslog("can't unlock a pseudoterminal: %s %s\n", + pts_name, strerror(errno)); + goto _close_pty; + } + console_poll[MONITOR_FD].fd = pty_fd; + console_poll[MONITOR_FD].events = POLLIN; + + console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd; + console_poll[VIRTIO_CONSOLE_FD].events = POLLIN; + + if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd, + VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr, + virtcons_dev_page.dd.num_vq)) { + mpsslog("%s init_vr failed %s\n", + mic->name, strerror(errno)); + goto _close_pty; + } + + copy.iovcnt = 1; + desc = get_device_desc(mic, VIRTIO_ID_CONSOLE); + + for (;;) { + console_poll[MONITOR_FD].revents = 0; + console_poll[VIRTIO_CONSOLE_FD].revents = 0; + err = poll(console_poll, MAX_CONSOLE_FD, -1); + if (err < 0) { + mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__, + strerror(errno)); + continue; + } + if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) { + err = wait_for_card_driver(mic, + mic->mic_console.virtio_console_fd, + VIRTIO_ID_CONSOLE); + if (err) { + mpsslog("%s %s %d Exiting...\n", + mic->name, __func__, __LINE__); + break; + } + } + + if (console_poll[MONITOR_FD].revents & POLLIN) { + copy.iov = iov0; + len = readv(pty_fd, copy.iov, copy.iovcnt); + if (len > 0) { +#ifdef DEBUG + disp_iovec(mic, copy, __func__, __LINE__); + mpsslog("%s %s %d read from tap 0x%lx\n", + mic->name, __func__, __LINE__, + len); +#endif + spin_for_descriptors(mic, &tx_vr); + txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr, + ©, len); + + err = mic_virtio_copy(mic, + mic->mic_console.virtio_console_fd, + &tx_vr, ©); + if (err < 0) { + mpsslog("%s %s %d mic_virtio_copy %s\n", + mic->name, __func__, __LINE__, + strerror(errno)); + } + if (!err) + verify_out_len(mic, ©); +#ifdef DEBUG + disp_iovec(mic, copy, __func__, __LINE__); + mpsslog("%s %s %d wrote to net 0x%lx\n", + mic->name, __func__, __LINE__, + sum_iovec_len(copy)); +#endif + /* Reinitialize IOV for next run */ + iov0->iov_len = PAGE_SIZE; + } else if (len < 0) { + disp_iovec(mic, ©, __func__, __LINE__); + mpsslog("%s %s %d read failed %s ", + mic->name, __func__, __LINE__, + strerror(errno)); + mpsslog("cnt %d sum %zd\n", + copy.iovcnt, sum_iovec_len(©)); + } + } + + if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) { + while (rx_vr.info->avail_idx != + le16toh(rx_vr.vr.avail->idx)) { + copy.iov = iov1; + txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr, + ©, PAGE_SIZE); + + err = mic_virtio_copy(mic, + mic->mic_console.virtio_console_fd, + &rx_vr, ©); + if (!err) { + /* Set the correct output iov_len */ + iov1->iov_len = copy.out_len; + verify_out_len(mic, ©); +#ifdef DEBUG + disp_iovec(mic, copy, __func__, + __LINE__); + mpsslog("%s %s %d ", + mic->name, __func__, __LINE__); + mpsslog("read from net 0x%lx\n", + sum_iovec_len(copy)); +#endif + len = writev(pty_fd, + copy.iov, copy.iovcnt); + if (len != sum_iovec_len(©)) { + mpsslog("Tun write failed %s ", + strerror(errno)); + mpsslog("len 0x%zx ", len); + mpsslog("read_len 0x%zx\n", + sum_iovec_len(©)); + } else { +#ifdef DEBUG + disp_iovec(mic, copy, __func__, + __LINE__); + mpsslog("%s %s %d ", + mic->name, __func__, + __LINE__); + mpsslog("wrote to tap 0x%lx\n", + len); +#endif + } + } else { + mpsslog("%s %s %d mic_virtio_copy %s\n", + mic->name, __func__, __LINE__, + strerror(errno)); + break; + } + } + } + if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR) + mpsslog("%s: %s: POLLERR\n", __func__, mic->name); + } +_close_pty: + close(pty_fd); +_return: + pthread_exit(NULL); +} + +static void +add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd) +{ + char path[PATH_MAX]; + int fd, err; + + snprintf(path, PATH_MAX, "/dev/vop_virtio%d", mic->id); + fd = open(path, O_RDWR); + if (fd < 0) { + mpsslog("Could not open %s %s\n", path, strerror(errno)); + return; + } + + err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd); + if (err < 0) { + mpsslog("Could not add %d %s\n", dd->type, strerror(errno)); + close(fd); + return; + } + switch (dd->type) { + case VIRTIO_ID_NET: + mic->mic_net.virtio_net_fd = fd; + mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name); + break; + case VIRTIO_ID_CONSOLE: + mic->mic_console.virtio_console_fd = fd; + mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name); + break; + case VIRTIO_ID_BLOCK: + mic->mic_virtblk.virtio_block_fd = fd; + mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name); + break; + } +} + +static bool +set_backend_file(struct mic_info *mic) +{ + FILE *config; + char buff[PATH_MAX], *line, *evv, *p; + + snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id); + config = fopen(buff, "r"); + if (config == NULL) + return false; + do { /* look for "virtblk_backend=XXXX" */ + line = fgets(buff, PATH_MAX, config); + if (line == NULL) + break; + if (*line == '#') + continue; + p = strchr(line, '\n'); + if (p) + *p = '\0'; + } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0); + fclose(config); + if (line == NULL) + return false; + evv = strchr(line, '='); + if (evv == NULL) + return false; + mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1); + if (mic->mic_virtblk.backend_file == NULL) { + mpsslog("%s %d can't allocate memory\n", mic->name, mic->id); + return false; + } + strcpy(mic->mic_virtblk.backend_file, evv + 1); + return true; +} + +#define SECTOR_SIZE 512 +static bool +set_backend_size(struct mic_info *mic) +{ + mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0, + SEEK_END); + if (mic->mic_virtblk.backend_size < 0) { + mpsslog("%s: can't seek: %s\n", + mic->name, mic->mic_virtblk.backend_file); + return false; + } + virtblk_dev_page.blk_config.capacity = + mic->mic_virtblk.backend_size / SECTOR_SIZE; + if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0) + virtblk_dev_page.blk_config.capacity++; + + virtblk_dev_page.blk_config.capacity = + htole64(virtblk_dev_page.blk_config.capacity); + + return true; +} + +static bool +open_backend(struct mic_info *mic) +{ + if (!set_backend_file(mic)) + goto _error_exit; + mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR); + if (mic->mic_virtblk.backend < 0) { + mpsslog("%s: can't open: %s\n", mic->name, + mic->mic_virtblk.backend_file); + goto _error_free; + } + if (!set_backend_size(mic)) + goto _error_close; + mic->mic_virtblk.backend_addr = mmap(NULL, + mic->mic_virtblk.backend_size, + PROT_READ|PROT_WRITE, MAP_SHARED, + mic->mic_virtblk.backend, 0L); + if (mic->mic_virtblk.backend_addr == MAP_FAILED) { + mpsslog("%s: can't map: %s %s\n", + mic->name, mic->mic_virtblk.backend_file, + strerror(errno)); + goto _error_close; + } + return true; + + _error_close: + close(mic->mic_virtblk.backend); + _error_free: + free(mic->mic_virtblk.backend_file); + _error_exit: + return false; +} + +static void +close_backend(struct mic_info *mic) +{ + munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size); + close(mic->mic_virtblk.backend); + free(mic->mic_virtblk.backend_file); +} + +static bool +start_virtblk(struct mic_info *mic, struct mic_vring *vring) +{ + if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) { + mpsslog("%s: blk_config is not 8 byte aligned.\n", + mic->name); + return false; + } + add_virtio_device(mic, &virtblk_dev_page.dd); + if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd, + VIRTIO_ID_BLOCK, vring, NULL, + virtblk_dev_page.dd.num_vq)) { + mpsslog("%s init_vr failed %s\n", + mic->name, strerror(errno)); + return false; + } + return true; +} + +static void +stop_virtblk(struct mic_info *mic) +{ + int vr_size, ret; + + vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES, + MIC_VIRTIO_RING_ALIGN) + + sizeof(struct _mic_vring_info)); + ret = munmap(mic->mic_virtblk.block_dp, + MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq); + if (ret < 0) + mpsslog("%s munmap errno %d\n", mic->name, errno); + close(mic->mic_virtblk.virtio_block_fd); +} + +static __u8 +header_error_check(struct vring_desc *desc) +{ + if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) { + mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n", + __func__, __LINE__); + return -EIO; + } + if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) { + mpsslog("%s() %d: alone\n", + __func__, __LINE__); + return -EIO; + } + if (le16toh(desc->flags) & VRING_DESC_F_WRITE) { + mpsslog("%s() %d: not read\n", + __func__, __LINE__); + return -EIO; + } + return 0; +} + +static int +read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx) +{ + struct iovec iovec; + struct mic_copy_desc copy; + + iovec.iov_len = sizeof(*hdr); + iovec.iov_base = hdr; + copy.iov = &iovec; + copy.iovcnt = 1; + copy.vr_idx = 0; /* only one vring on virtio_block */ + copy.update_used = false; /* do not update used index */ + return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); +} + +static int +transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt) +{ + struct mic_copy_desc copy; + + copy.iov = iovec; + copy.iovcnt = iovcnt; + copy.vr_idx = 0; /* only one vring on virtio_block */ + copy.update_used = false; /* do not update used index */ + return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); +} + +static __u8 +status_error_check(struct vring_desc *desc) +{ + if (le32toh(desc->len) != sizeof(__u8)) { + mpsslog("%s() %d: length is not sizeof(status)\n", + __func__, __LINE__); + return -EIO; + } + return 0; +} + +static int +write_status(int fd, __u8 *status) +{ + struct iovec iovec; + struct mic_copy_desc copy; + + iovec.iov_base = status; + iovec.iov_len = sizeof(*status); + copy.iov = &iovec; + copy.iovcnt = 1; + copy.vr_idx = 0; /* only one vring on virtio_block */ + copy.update_used = true; /* Update used index */ + return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); +} + +#ifndef VIRTIO_BLK_T_GET_ID +#define VIRTIO_BLK_T_GET_ID 8 +#endif + +static void * +virtio_block(void *arg) +{ + struct mic_info *mic = (struct mic_info *)arg; + int ret; + struct pollfd block_poll; + struct mic_vring vring; + __u16 avail_idx; + __u32 desc_idx; + struct vring_desc *desc; + struct iovec *iovec, *piov; + __u8 status; + __u32 buffer_desc_idx; + struct virtio_blk_outhdr hdr; + void *fos; + + for (;;) { /* forever */ + if (!open_backend(mic)) { /* No virtblk */ + for (mic->mic_virtblk.signaled = 0; + !mic->mic_virtblk.signaled;) + sleep(1); + continue; + } + + /* backend file is specified. */ + if (!start_virtblk(mic, &vring)) + goto _close_backend; + iovec = malloc(sizeof(*iovec) * + le32toh(virtblk_dev_page.blk_config.seg_max)); + if (!iovec) { + mpsslog("%s: can't alloc iovec: %s\n", + mic->name, strerror(ENOMEM)); + goto _stop_virtblk; + } + + block_poll.fd = mic->mic_virtblk.virtio_block_fd; + block_poll.events = POLLIN; + for (mic->mic_virtblk.signaled = 0; + !mic->mic_virtblk.signaled;) { + block_poll.revents = 0; + /* timeout in 1 sec to see signaled */ + ret = poll(&block_poll, 1, 1000); + if (ret < 0) { + mpsslog("%s %d: poll failed: %s\n", + __func__, __LINE__, + strerror(errno)); + continue; + } + + if (!(block_poll.revents & POLLIN)) { +#ifdef DEBUG + mpsslog("%s %d: block_poll.revents=0x%x\n", + __func__, __LINE__, block_poll.revents); +#endif + continue; + } + + /* POLLIN */ + while (vring.info->avail_idx != + le16toh(vring.vr.avail->idx)) { + /* read header element */ + avail_idx = + vring.info->avail_idx & + (vring.vr.num - 1); + desc_idx = le16toh( + vring.vr.avail->ring[avail_idx]); + desc = &vring.vr.desc[desc_idx]; +#ifdef DEBUG + mpsslog("%s() %d: avail_idx=%d ", + __func__, __LINE__, + vring.info->avail_idx); + mpsslog("vring.vr.num=%d desc=%p\n", + vring.vr.num, desc); +#endif + status = header_error_check(desc); + ret = read_header( + mic->mic_virtblk.virtio_block_fd, + &hdr, desc_idx); + if (ret < 0) { + mpsslog("%s() %d %s: ret=%d %s\n", + __func__, __LINE__, + mic->name, ret, + strerror(errno)); + break; + } + /* buffer element */ + piov = iovec; + status = 0; + fos = mic->mic_virtblk.backend_addr + + (hdr.sector * SECTOR_SIZE); + buffer_desc_idx = next_desc(desc); + desc_idx = buffer_desc_idx; + for (desc = &vring.vr.desc[buffer_desc_idx]; + desc->flags & VRING_DESC_F_NEXT; + desc_idx = next_desc(desc), + desc = &vring.vr.desc[desc_idx]) { + piov->iov_len = desc->len; + piov->iov_base = fos; + piov++; + fos += desc->len; + } + /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */ + if (hdr.type & ~(VIRTIO_BLK_T_OUT | + VIRTIO_BLK_T_GET_ID)) { + /* + VIRTIO_BLK_T_IN - does not do + anything. Probably for documenting. + VIRTIO_BLK_T_SCSI_CMD - for + virtio_scsi. + VIRTIO_BLK_T_FLUSH - turned off in + config space. + VIRTIO_BLK_T_BARRIER - defined but not + used in anywhere. + */ + mpsslog("%s() %d: type %x ", + __func__, __LINE__, + hdr.type); + mpsslog("is not supported\n"); + status = -ENOTSUP; + + } else { + ret = transfer_blocks( + mic->mic_virtblk.virtio_block_fd, + iovec, + piov - iovec); + if (ret < 0 && + status != 0) + status = ret; + } + /* write status and update used pointer */ + if (status != 0) + status = status_error_check(desc); + ret = write_status( + mic->mic_virtblk.virtio_block_fd, + &status); +#ifdef DEBUG + mpsslog("%s() %d: write status=%d on desc=%p\n", + __func__, __LINE__, + status, desc); +#endif + } + } + free(iovec); +_stop_virtblk: + stop_virtblk(mic); +_close_backend: + close_backend(mic); + } /* forever */ + + pthread_exit(NULL); +} + +static void +reset(struct mic_info *mic) +{ +#define RESET_TIMEOUT 120 + int i = RESET_TIMEOUT; + setsysfs(mic->name, "state", "reset"); + while (i) { + char *state; + state = readsysfs(mic->name, "state"); + if (!state) + goto retry; + mpsslog("%s: %s %d state %s\n", + mic->name, __func__, __LINE__, state); + + if (!strcmp(state, "ready")) { + free(state); + break; + } + free(state); +retry: + sleep(1); + i--; + } +} + +static int +get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status) +{ + if (!strcmp(shutdown_status, "nop")) + return MIC_NOP; + if (!strcmp(shutdown_status, "crashed")) + return MIC_CRASHED; + if (!strcmp(shutdown_status, "halted")) + return MIC_HALTED; + if (!strcmp(shutdown_status, "poweroff")) + return MIC_POWER_OFF; + if (!strcmp(shutdown_status, "restart")) + return MIC_RESTART; + mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status); + /* Invalid state */ + assert(0); +}; + +static int get_mic_state(struct mic_info *mic) +{ + char *state = NULL; + enum mic_states mic_state; + + while (!state) { + state = readsysfs(mic->name, "state"); + sleep(1); + } + mpsslog("%s: %s %d state %s\n", + mic->name, __func__, __LINE__, state); + + if (!strcmp(state, "ready")) { + mic_state = MIC_READY; + } else if (!strcmp(state, "booting")) { + mic_state = MIC_BOOTING; + } else if (!strcmp(state, "online")) { + mic_state = MIC_ONLINE; + } else if (!strcmp(state, "shutting_down")) { + mic_state = MIC_SHUTTING_DOWN; + } else if (!strcmp(state, "reset_failed")) { + mic_state = MIC_RESET_FAILED; + } else if (!strcmp(state, "resetting")) { + mic_state = MIC_RESETTING; + } else { + mpsslog("%s: BUG invalid state %s\n", mic->name, state); + assert(0); + } + + free(state); + return mic_state; +}; + +static void mic_handle_shutdown(struct mic_info *mic) +{ +#define SHUTDOWN_TIMEOUT 60 + int i = SHUTDOWN_TIMEOUT; + char *shutdown_status; + while (i) { + shutdown_status = readsysfs(mic->name, "shutdown_status"); + if (!shutdown_status) { + sleep(1); + continue; + } + mpsslog("%s: %s %d shutdown_status %s\n", + mic->name, __func__, __LINE__, shutdown_status); + switch (get_mic_shutdown_status(mic, shutdown_status)) { + case MIC_RESTART: + mic->restart = 1; + case MIC_HALTED: + case MIC_POWER_OFF: + case MIC_CRASHED: + free(shutdown_status); + goto reset; + default: + break; + } + free(shutdown_status); + sleep(1); + i--; + } +reset: + if (!i) + mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n", + mic->name, __func__, __LINE__, shutdown_status); + reset(mic); +} + +static int open_state_fd(struct mic_info *mic) +{ + char pathname[PATH_MAX]; + int fd; + + snprintf(pathname, PATH_MAX - 1, "%s/%s/%s", + MICSYSFSDIR, mic->name, "state"); + + fd = open(pathname, O_RDONLY); + if (fd < 0) + mpsslog("%s: opening file %s failed %s\n", + mic->name, pathname, strerror(errno)); + return fd; +} + +static int block_till_state_change(int fd, struct mic_info *mic) +{ + struct pollfd ufds[1]; + char value[PAGE_SIZE]; + int ret; + + ufds[0].fd = fd; + ufds[0].events = POLLERR | POLLPRI; + ret = poll(ufds, 1, -1); + if (ret < 0) { + mpsslog("%s: %s %d poll failed %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + return ret; + } + + ret = lseek(fd, 0, SEEK_SET); + if (ret < 0) { + mpsslog("%s: %s %d Failed to seek to 0: %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + return ret; + } + + ret = read(fd, value, sizeof(value)); + if (ret < 0) { + mpsslog("%s: %s %d Failed to read sysfs entry: %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + return ret; + } + + return 0; +} + +static void * +mic_config(void *arg) +{ + struct mic_info *mic = (struct mic_info *)arg; + int fd, ret, stat = 0; + + fd = open_state_fd(mic); + if (fd < 0) { + mpsslog("%s: %s %d open state fd failed %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + goto exit; + } + + do { + ret = block_till_state_change(fd, mic); + if (ret < 0) { + mpsslog("%s: %s %d block_till_state_change error %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + goto close_exit; + } + + switch (get_mic_state(mic)) { + case MIC_SHUTTING_DOWN: + mic_handle_shutdown(mic); + break; + case MIC_READY: + case MIC_RESET_FAILED: + ret = kill(mic->pid, SIGTERM); + mpsslog("%s: %s %d kill pid %d ret %d\n", + mic->name, __func__, __LINE__, + mic->pid, ret); + if (!ret) { + ret = waitpid(mic->pid, &stat, + WIFSIGNALED(stat)); + mpsslog("%s: %s %d waitpid ret %d pid %d\n", + mic->name, __func__, __LINE__, + ret, mic->pid); + } + if (mic->boot_on_resume) { + setsysfs(mic->name, "state", "boot"); + mic->boot_on_resume = 0; + } + goto close_exit; + default: + break; + } + } while (1); + +close_exit: + close(fd); +exit: + init_mic(mic); + pthread_exit(NULL); +} + +static void +set_cmdline(struct mic_info *mic) +{ + char buffer[PATH_MAX]; + int len; + + len = snprintf(buffer, PATH_MAX, + "clocksource=tsc highres=off nohz=off "); + len += snprintf(buffer + len, PATH_MAX - len, + "cpufreq_on;corec6_off;pc3_off;pc6_off "); + len += snprintf(buffer + len, PATH_MAX - len, + "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0", + mic->id + 1); + + setsysfs(mic->name, "cmdline", buffer); + mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer); + snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1); + mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer); +} + +static void +set_log_buf_info(struct mic_info *mic) +{ + int fd; + off_t len; + char system_map[] = "/lib/firmware/mic/System.map"; + char *map, *temp, log_buf[17] = {'\0'}; + + fd = open(system_map, O_RDONLY); + if (fd < 0) { + mpsslog("%s: Opening System.map failed: %d\n", + mic->name, errno); + return; + } + len = lseek(fd, 0, SEEK_END); + if (len < 0) { + mpsslog("%s: Reading System.map size failed: %d\n", + mic->name, errno); + close(fd); + return; + } + map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); + if (map == MAP_FAILED) { + mpsslog("%s: mmap of System.map failed: %d\n", + mic->name, errno); + close(fd); + return; + } + temp = strstr(map, "__log_buf"); + if (!temp) { + mpsslog("%s: __log_buf not found: %d\n", mic->name, errno); + munmap(map, len); + close(fd); + return; + } + strncpy(log_buf, temp - 19, 16); + setsysfs(mic->name, "log_buf_addr", log_buf); + mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf); + temp = strstr(map, "log_buf_len"); + if (!temp) { + mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno); + munmap(map, len); + close(fd); + return; + } + strncpy(log_buf, temp - 19, 16); + setsysfs(mic->name, "log_buf_len", log_buf); + mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf); + munmap(map, len); + close(fd); +} + +static void +change_virtblk_backend(int x, siginfo_t *siginfo, void *p) +{ + struct mic_info *mic; + + for (mic = mic_list.next; mic != NULL; mic = mic->next) + mic->mic_virtblk.signaled = 1/* true */; +} + +static void +set_mic_boot_params(struct mic_info *mic) +{ + set_log_buf_info(mic); + set_cmdline(mic); +} + +static void * +init_mic(void *arg) +{ + struct mic_info *mic = (struct mic_info *)arg; + struct sigaction ignore = { + .sa_flags = 0, + .sa_handler = SIG_IGN + }; + struct sigaction act = { + .sa_flags = SA_SIGINFO, + .sa_sigaction = change_virtblk_backend, + }; + char buffer[PATH_MAX]; + int err, fd; + + /* + * Currently, one virtio block device is supported for each MIC card + * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon. + * The signal informs the virtio block backend about a change in the + * configuration file which specifies the virtio backend file name on + * the host. Virtio block backend then re-reads the configuration file + * and switches to the new block device. This signalling mechanism may + * not be required once multiple virtio block devices are supported by + * the MIC daemon. + */ + sigaction(SIGUSR1, &ignore, NULL); +retry: + fd = open_state_fd(mic); + if (fd < 0) { + mpsslog("%s: %s %d open state fd failed %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + sleep(2); + goto retry; + } + + if (mic->restart) { + snprintf(buffer, PATH_MAX, "boot"); + setsysfs(mic->name, "state", buffer); + mpsslog("%s restarting mic %d\n", + mic->name, mic->restart); + mic->restart = 0; + } + + while (1) { + while (block_till_state_change(fd, mic)) { + mpsslog("%s: %s %d block_till_state_change error %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + sleep(2); + continue; + } + + if (get_mic_state(mic) == MIC_BOOTING) + break; + } + + mic->pid = fork(); + switch (mic->pid) { + case 0: + add_virtio_device(mic, &virtcons_dev_page.dd); + add_virtio_device(mic, &virtnet_dev_page.dd); + err = pthread_create(&mic->mic_console.console_thread, NULL, + virtio_console, mic); + if (err) + mpsslog("%s virtcons pthread_create failed %s\n", + mic->name, strerror(err)); + err = pthread_create(&mic->mic_net.net_thread, NULL, + virtio_net, mic); + if (err) + mpsslog("%s virtnet pthread_create failed %s\n", + mic->name, strerror(err)); + err = pthread_create(&mic->mic_virtblk.block_thread, NULL, + virtio_block, mic); + if (err) + mpsslog("%s virtblk pthread_create failed %s\n", + mic->name, strerror(err)); + sigemptyset(&act.sa_mask); + err = sigaction(SIGUSR1, &act, NULL); + if (err) + mpsslog("%s sigaction SIGUSR1 failed %s\n", + mic->name, strerror(errno)); + while (1) + sleep(60); + case -1: + mpsslog("fork failed MIC name %s id %d errno %d\n", + mic->name, mic->id, errno); + break; + default: + err = pthread_create(&mic->config_thread, NULL, + mic_config, mic); + if (err) + mpsslog("%s mic_config pthread_create failed %s\n", + mic->name, strerror(err)); + } + + return NULL; +} + +static void +start_daemon(void) +{ + struct mic_info *mic; + int err; + + for (mic = mic_list.next; mic; mic = mic->next) { + set_mic_boot_params(mic); + err = pthread_create(&mic->init_thread, NULL, init_mic, mic); + if (err) + mpsslog("%s init_mic pthread_create failed %s\n", + mic->name, strerror(err)); + } + + while (1) + sleep(60); +} + +static int +init_mic_list(void) +{ + struct mic_info *mic = &mic_list; + struct dirent *file; + DIR *dp; + int cnt = 0; + + dp = opendir(MICSYSFSDIR); + if (!dp) + return 0; + + while ((file = readdir(dp)) != NULL) { + if (!strncmp(file->d_name, "mic", 3)) { + mic->next = calloc(1, sizeof(struct mic_info)); + if (mic->next) { + mic = mic->next; + mic->id = atoi(&file->d_name[3]); + mic->name = malloc(strlen(file->d_name) + 16); + if (mic->name) + strcpy(mic->name, file->d_name); + mpsslog("MIC name %s id %d\n", mic->name, + mic->id); + cnt++; + } + } + } + + closedir(dp); + return cnt; +} + +void +mpsslog(char *format, ...) +{ + va_list args; + char buffer[4096]; + char ts[52], *ts1; + time_t t; + + if (logfp == NULL) + return; + + va_start(args, format); + vsprintf(buffer, format, args); + va_end(args); + + time(&t); + ts1 = ctime_r(&t, ts); + ts1[strlen(ts1) - 1] = '\0'; + fprintf(logfp, "%s: %s", ts1, buffer); + + fflush(logfp); +} + +int +main(int argc, char *argv[]) +{ + int cnt; + pid_t pid; + + myname = argv[0]; + + logfp = fopen(LOGFILE_NAME, "a+"); + if (!logfp) { + fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME); + exit(1); + } + pid = fork(); + switch (pid) { + case 0: + break; + case -1: + exit(2); + default: + exit(0); + } + + mpsslog("MIC Daemon start\n"); + + cnt = init_mic_list(); + if (cnt == 0) { + mpsslog("MIC module not loaded\n"); + exit(3); + } + mpsslog("MIC found %d devices\n", cnt); + + start_daemon(); + + exit(0); +} diff --git a/samples/mic/mpssd/mpssd.h b/samples/mic/mpssd/mpssd.h new file mode 100644 index 000000000000..8bd64944aacc --- /dev/null +++ b/samples/mic/mpssd/mpssd.h @@ -0,0 +1,103 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC User Space Tools. + */ +#ifndef _MPSSD_H_ +#define _MPSSD_H_ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <dirent.h> +#include <libgen.h> +#include <pthread.h> +#include <stdarg.h> +#include <time.h> +#include <errno.h> +#include <sys/dir.h> +#include <sys/ioctl.h> +#include <sys/poll.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/utsname.h> +#include <sys/wait.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <netdb.h> +#include <pthread.h> +#include <signal.h> +#include <limits.h> +#include <syslog.h> +#include <getopt.h> +#include <net/if.h> +#include <linux/if_tun.h> +#include <linux/if_tun.h> +#include <linux/virtio_ids.h> + +#define MICSYSFSDIR "/sys/class/mic" +#define LOGFILE_NAME "/var/log/mpssd" +#define PAGE_SIZE 4096 + +struct mic_console_info { + pthread_t console_thread; + int virtio_console_fd; + void *console_dp; +}; + +struct mic_net_info { + pthread_t net_thread; + int virtio_net_fd; + int tap_fd; + void *net_dp; +}; + +struct mic_virtblk_info { + pthread_t block_thread; + int virtio_block_fd; + void *block_dp; + volatile sig_atomic_t signaled; + char *backend_file; + int backend; + void *backend_addr; + long backend_size; +}; + +struct mic_info { + int id; + char *name; + pthread_t config_thread; + pthread_t init_thread; + pid_t pid; + struct mic_console_info mic_console; + struct mic_net_info mic_net; + struct mic_virtblk_info mic_virtblk; + int restart; + int boot_on_resume; + struct mic_info *next; +}; + +__attribute__((format(printf, 1, 2))) +void mpsslog(char *format, ...); +char *readsysfs(char *dir, char *entry); +int setsysfs(char *dir, char *entry, char *value); +#endif diff --git a/samples/mic/mpssd/sysfs.c b/samples/mic/mpssd/sysfs.c new file mode 100644 index 000000000000..8dd326936083 --- /dev/null +++ b/samples/mic/mpssd/sysfs.c @@ -0,0 +1,102 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC User Space Tools. + */ + +#include "mpssd.h" + +#define PAGE_SIZE 4096 + +char * +readsysfs(char *dir, char *entry) +{ + char filename[PATH_MAX]; + char value[PAGE_SIZE]; + char *string = NULL; + int fd; + int len; + + if (dir == NULL) + snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry); + else + snprintf(filename, PATH_MAX, + "%s/%s/%s", MICSYSFSDIR, dir, entry); + + fd = open(filename, O_RDONLY); + if (fd < 0) { + mpsslog("Failed to open sysfs entry '%s': %s\n", + filename, strerror(errno)); + return NULL; + } + + len = read(fd, value, sizeof(value)); + if (len < 0) { + mpsslog("Failed to read sysfs entry '%s': %s\n", + filename, strerror(errno)); + goto readsys_ret; + } + if (len == 0) + goto readsys_ret; + + value[len - 1] = '\0'; + + string = malloc(strlen(value) + 1); + if (string) + strcpy(string, value); + +readsys_ret: + close(fd); + return string; +} + +int +setsysfs(char *dir, char *entry, char *value) +{ + char filename[PATH_MAX]; + char *oldvalue; + int fd, ret = 0; + + if (dir == NULL) + snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry); + else + snprintf(filename, PATH_MAX, "%s/%s/%s", + MICSYSFSDIR, dir, entry); + + oldvalue = readsysfs(dir, entry); + + fd = open(filename, O_RDWR); + if (fd < 0) { + ret = errno; + mpsslog("Failed to open sysfs entry '%s': %s\n", + filename, strerror(errno)); + goto done; + } + + if (!oldvalue || strcmp(value, oldvalue)) { + if (write(fd, value, strlen(value)) < 0) { + ret = errno; + mpsslog("Failed to write new sysfs entry '%s': %s\n", + filename, strerror(errno)); + } + } + close(fd); +done: + if (oldvalue) + free(oldvalue); + return ret; +} diff --git a/samples/timers/.gitignore b/samples/timers/.gitignore new file mode 100644 index 000000000000..c5c45d7ec0df --- /dev/null +++ b/samples/timers/.gitignore @@ -0,0 +1 @@ +hpet_example diff --git a/samples/timers/Makefile b/samples/timers/Makefile new file mode 100644 index 000000000000..a5c3c4a35ca1 --- /dev/null +++ b/samples/timers/Makefile @@ -0,0 +1,15 @@ +ifndef CROSS_COMPILE +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) + +ifeq ($(ARCH),x86) +CC := $(CROSS_COMPILE)gcc +PROGS := hpet_example + +all: $(PROGS) + +clean: + rm -fr $(PROGS) + +endif +endif diff --git a/samples/timers/hpet_example.c b/samples/timers/hpet_example.c new file mode 100644 index 000000000000..3ab4993d85e0 --- /dev/null +++ b/samples/timers/hpet_example.c @@ -0,0 +1,294 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <memory.h> +#include <malloc.h> +#include <time.h> +#include <ctype.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <signal.h> +#include <errno.h> +#include <sys/time.h> +#include <linux/hpet.h> + + +extern void hpet_open_close(int, const char **); +extern void hpet_info(int, const char **); +extern void hpet_poll(int, const char **); +extern void hpet_fasync(int, const char **); +extern void hpet_read(int, const char **); + +#include <sys/poll.h> +#include <sys/ioctl.h> + +struct hpet_command { + char *command; + void (*func)(int argc, const char ** argv); +} hpet_command[] = { + { + "open-close", + hpet_open_close + }, + { + "info", + hpet_info + }, + { + "poll", + hpet_poll + }, + { + "fasync", + hpet_fasync + }, +}; + +int +main(int argc, const char ** argv) +{ + unsigned int i; + + argc--; + argv++; + + if (!argc) { + fprintf(stderr, "-hpet: requires command\n"); + return -1; + } + + + for (i = 0; i < (sizeof (hpet_command) / sizeof (hpet_command[0])); i++) + if (!strcmp(argv[0], hpet_command[i].command)) { + argc--; + argv++; + fprintf(stderr, "-hpet: executing %s\n", + hpet_command[i].command); + hpet_command[i].func(argc, argv); + return 0; + } + + fprintf(stderr, "do_hpet: command %s not implemented\n", argv[0]); + + return -1; +} + +void +hpet_open_close(int argc, const char **argv) +{ + int fd; + + if (argc != 1) { + fprintf(stderr, "hpet_open_close: device-name\n"); + return; + } + + fd = open(argv[0], O_RDONLY); + if (fd < 0) + fprintf(stderr, "hpet_open_close: open failed\n"); + else + close(fd); + + return; +} + +void +hpet_info(int argc, const char **argv) +{ + struct hpet_info info; + int fd; + + if (argc != 1) { + fprintf(stderr, "hpet_info: device-name\n"); + return; + } + + fd = open(argv[0], O_RDONLY); + if (fd < 0) { + fprintf(stderr, "hpet_info: open of %s failed\n", argv[0]); + return; + } + + if (ioctl(fd, HPET_INFO, &info) < 0) { + fprintf(stderr, "hpet_info: failed to get info\n"); + goto out; + } + + fprintf(stderr, "hpet_info: hi_irqfreq 0x%lx hi_flags 0x%lx ", + info.hi_ireqfreq, info.hi_flags); + fprintf(stderr, "hi_hpet %d hi_timer %d\n", + info.hi_hpet, info.hi_timer); + +out: + close(fd); + return; +} + +void +hpet_poll(int argc, const char **argv) +{ + unsigned long freq; + int iterations, i, fd; + struct pollfd pfd; + struct hpet_info info; + struct timeval stv, etv; + struct timezone tz; + long usec; + + if (argc != 3) { + fprintf(stderr, "hpet_poll: device-name freq iterations\n"); + return; + } + + freq = atoi(argv[1]); + iterations = atoi(argv[2]); + + fd = open(argv[0], O_RDONLY); + + if (fd < 0) { + fprintf(stderr, "hpet_poll: open of %s failed\n", argv[0]); + return; + } + + if (ioctl(fd, HPET_IRQFREQ, freq) < 0) { + fprintf(stderr, "hpet_poll: HPET_IRQFREQ failed\n"); + goto out; + } + + if (ioctl(fd, HPET_INFO, &info) < 0) { + fprintf(stderr, "hpet_poll: failed to get info\n"); + goto out; + } + + fprintf(stderr, "hpet_poll: info.hi_flags 0x%lx\n", info.hi_flags); + + if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) { + fprintf(stderr, "hpet_poll: HPET_EPI failed\n"); + goto out; + } + + if (ioctl(fd, HPET_IE_ON, 0) < 0) { + fprintf(stderr, "hpet_poll, HPET_IE_ON failed\n"); + goto out; + } + + pfd.fd = fd; + pfd.events = POLLIN; + + for (i = 0; i < iterations; i++) { + pfd.revents = 0; + gettimeofday(&stv, &tz); + if (poll(&pfd, 1, -1) < 0) + fprintf(stderr, "hpet_poll: poll failed\n"); + else { + long data; + + gettimeofday(&etv, &tz); + usec = stv.tv_sec * 1000000 + stv.tv_usec; + usec = (etv.tv_sec * 1000000 + etv.tv_usec) - usec; + + fprintf(stderr, + "hpet_poll: expired time = 0x%lx\n", usec); + + fprintf(stderr, "hpet_poll: revents = 0x%x\n", + pfd.revents); + + if (read(fd, &data, sizeof(data)) != sizeof(data)) { + fprintf(stderr, "hpet_poll: read failed\n"); + } + else + fprintf(stderr, "hpet_poll: data 0x%lx\n", + data); + } + } + +out: + close(fd); + return; +} + +static int hpet_sigio_count; + +static void +hpet_sigio(int val) +{ + fprintf(stderr, "hpet_sigio: called\n"); + hpet_sigio_count++; +} + +void +hpet_fasync(int argc, const char **argv) +{ + unsigned long freq; + int iterations, i, fd, value; + sig_t oldsig; + struct hpet_info info; + + hpet_sigio_count = 0; + fd = -1; + + if ((oldsig = signal(SIGIO, hpet_sigio)) == SIG_ERR) { + fprintf(stderr, "hpet_fasync: failed to set signal handler\n"); + return; + } + + if (argc != 3) { + fprintf(stderr, "hpet_fasync: device-name freq iterations\n"); + goto out; + } + + fd = open(argv[0], O_RDONLY); + + if (fd < 0) { + fprintf(stderr, "hpet_fasync: failed to open %s\n", argv[0]); + return; + } + + + if ((fcntl(fd, F_SETOWN, getpid()) == 1) || + ((value = fcntl(fd, F_GETFL)) == 1) || + (fcntl(fd, F_SETFL, value | O_ASYNC) == 1)) { + fprintf(stderr, "hpet_fasync: fcntl failed\n"); + goto out; + } + + freq = atoi(argv[1]); + iterations = atoi(argv[2]); + + if (ioctl(fd, HPET_IRQFREQ, freq) < 0) { + fprintf(stderr, "hpet_fasync: HPET_IRQFREQ failed\n"); + goto out; + } + + if (ioctl(fd, HPET_INFO, &info) < 0) { + fprintf(stderr, "hpet_fasync: failed to get info\n"); + goto out; + } + + fprintf(stderr, "hpet_fasync: info.hi_flags 0x%lx\n", info.hi_flags); + + if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) { + fprintf(stderr, "hpet_fasync: HPET_EPI failed\n"); + goto out; + } + + if (ioctl(fd, HPET_IE_ON, 0) < 0) { + fprintf(stderr, "hpet_fasync, HPET_IE_ON failed\n"); + goto out; + } + + for (i = 0; i < iterations; i++) { + (void) pause(); + fprintf(stderr, "hpet_fasync: count = %d\n", hpet_sigio_count); + } + +out: + signal(SIGIO, oldsig); + + if (fd >= 0) + close(fd); + + return; +} diff --git a/samples/watchdog/.gitignore b/samples/watchdog/.gitignore new file mode 100644 index 000000000000..ff0ebb540333 --- /dev/null +++ b/samples/watchdog/.gitignore @@ -0,0 +1 @@ +watchdog-simple diff --git a/samples/watchdog/Makefile b/samples/watchdog/Makefile new file mode 100644 index 000000000000..9b53d89b1ccf --- /dev/null +++ b/samples/watchdog/Makefile @@ -0,0 +1,8 @@ +CC := $(CROSS_COMPILE)gcc +PROGS := watchdog-simple + +all: $(PROGS) + +clean: + rm -fr $(PROGS) + diff --git a/samples/watchdog/watchdog-simple.c b/samples/watchdog/watchdog-simple.c new file mode 100644 index 000000000000..ba45803a2216 --- /dev/null +++ b/samples/watchdog/watchdog-simple.c @@ -0,0 +1,24 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> + +int main(void) +{ + int fd = open("/dev/watchdog", O_WRONLY); + int ret = 0; + if (fd == -1) { + perror("watchdog"); + exit(EXIT_FAILURE); + } + while (1) { + ret = write(fd, "\0", 1); + if (ret != 1) { + ret = -1; + break; + } + sleep(10); + } + close(fd); + return ret; +} |