diff options
Diffstat (limited to 'tools')
197 files changed, 14298 insertions, 1715 deletions
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 61d6049f4c1e..833ed9a16adf 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -443,6 +443,31 @@ struct kvm_ppc_rmmu_info { __u32 ap_encodings[8]; }; +/* For KVM_PPC_GET_CPU_CHAR */ +struct kvm_ppc_cpu_char { + __u64 character; /* characteristics of the CPU */ + __u64 behaviour; /* recommended software behaviour */ + __u64 character_mask; /* valid bits in character */ + __u64 behaviour_mask; /* valid bits in behaviour */ +}; + +/* + * Values for character and character_mask. + * These are identical to the values used by H_GET_CPU_CHARACTERISTICS. + */ +#define KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 (1ULL << 63) +#define KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED (1ULL << 62) +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 (1ULL << 61) +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 (1ULL << 60) +#define KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV (1ULL << 59) +#define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58) +#define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57) +#define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56) + +#define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63) +#define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62) +#define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61) + /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) @@ -607,6 +632,8 @@ struct kvm_ppc_rmmu_info { #define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc) #define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) +#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 38535a57fef8..4cdaa55fabfe 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -224,6 +224,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_RICCB (1UL << 7) #define KVM_SYNC_FPRS (1UL << 8) #define KVM_SYNC_GSCB (1UL << 9) +#define KVM_SYNC_BPBC (1UL << 10) /* length and alignment of the sdnx as a power of two */ #define SDNXC 8 #define SDNXL (1UL << SDNXC) @@ -247,7 +248,9 @@ struct kvm_sync_regs { }; __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ - __u8 padding1[52]; /* riccb needs to be 64byte aligned */ + __u8 bpbc : 1; /* bp mode */ + __u8 reserved2 : 7; + __u8 padding1[51]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ union { diff --git a/tools/arch/s390/include/uapi/asm/unistd.h b/tools/arch/s390/include/uapi/asm/unistd.h deleted file mode 100644 index 725120939051..000000000000 --- a/tools/arch/s390/include/uapi/asm/unistd.h +++ /dev/null @@ -1,412 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * S390 version - * - * Derived from "include/asm-i386/unistd.h" - */ - -#ifndef _UAPI_ASM_S390_UNISTD_H_ -#define _UAPI_ASM_S390_UNISTD_H_ - -/* - * This file contains the system call numbers. - */ - -#define __NR_exit 1 -#define __NR_fork 2 -#define __NR_read 3 -#define __NR_write 4 -#define __NR_open 5 -#define __NR_close 6 -#define __NR_restart_syscall 7 -#define __NR_creat 8 -#define __NR_link 9 -#define __NR_unlink 10 -#define __NR_execve 11 -#define __NR_chdir 12 -#define __NR_mknod 14 -#define __NR_chmod 15 -#define __NR_lseek 19 -#define __NR_getpid 20 -#define __NR_mount 21 -#define __NR_umount 22 -#define __NR_ptrace 26 -#define __NR_alarm 27 -#define __NR_pause 29 -#define __NR_utime 30 -#define __NR_access 33 -#define __NR_nice 34 -#define __NR_sync 36 -#define __NR_kill 37 -#define __NR_rename 38 -#define __NR_mkdir 39 -#define __NR_rmdir 40 -#define __NR_dup 41 -#define __NR_pipe 42 -#define __NR_times 43 -#define __NR_brk 45 -#define __NR_signal 48 -#define __NR_acct 51 -#define __NR_umount2 52 -#define __NR_ioctl 54 -#define __NR_fcntl 55 -#define __NR_setpgid 57 -#define __NR_umask 60 -#define __NR_chroot 61 -#define __NR_ustat 62 -#define __NR_dup2 63 -#define __NR_getppid 64 -#define __NR_getpgrp 65 -#define __NR_setsid 66 -#define __NR_sigaction 67 -#define __NR_sigsuspend 72 -#define __NR_sigpending 73 -#define __NR_sethostname 74 -#define __NR_setrlimit 75 -#define __NR_getrusage 77 -#define __NR_gettimeofday 78 -#define __NR_settimeofday 79 -#define __NR_symlink 83 -#define __NR_readlink 85 -#define __NR_uselib 86 -#define __NR_swapon 87 -#define __NR_reboot 88 -#define __NR_readdir 89 -#define __NR_mmap 90 -#define __NR_munmap 91 -#define __NR_truncate 92 -#define __NR_ftruncate 93 -#define __NR_fchmod 94 -#define __NR_getpriority 96 -#define __NR_setpriority 97 -#define __NR_statfs 99 -#define __NR_fstatfs 100 -#define __NR_socketcall 102 -#define __NR_syslog 103 -#define __NR_setitimer 104 -#define __NR_getitimer 105 -#define __NR_stat 106 -#define __NR_lstat 107 -#define __NR_fstat 108 -#define __NR_lookup_dcookie 110 -#define __NR_vhangup 111 -#define __NR_idle 112 -#define __NR_wait4 114 -#define __NR_swapoff 115 -#define __NR_sysinfo 116 -#define __NR_ipc 117 -#define __NR_fsync 118 -#define __NR_sigreturn 119 -#define __NR_clone 120 -#define __NR_setdomainname 121 -#define __NR_uname 122 -#define __NR_adjtimex 124 -#define __NR_mprotect 125 -#define __NR_sigprocmask 126 -#define __NR_create_module 127 -#define __NR_init_module 128 -#define __NR_delete_module 129 -#define __NR_get_kernel_syms 130 -#define __NR_quotactl 131 -#define __NR_getpgid 132 -#define __NR_fchdir 133 -#define __NR_bdflush 134 -#define __NR_sysfs 135 -#define __NR_personality 136 -#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ -#define __NR_getdents 141 -#define __NR_flock 143 -#define __NR_msync 144 -#define __NR_readv 145 -#define __NR_writev 146 -#define __NR_getsid 147 -#define __NR_fdatasync 148 -#define __NR__sysctl 149 -#define __NR_mlock 150 -#define __NR_munlock 151 -#define __NR_mlockall 152 -#define __NR_munlockall 153 -#define __NR_sched_setparam 154 -#define __NR_sched_getparam 155 -#define __NR_sched_setscheduler 156 -#define __NR_sched_getscheduler 157 -#define __NR_sched_yield 158 -#define __NR_sched_get_priority_max 159 -#define __NR_sched_get_priority_min 160 -#define __NR_sched_rr_get_interval 161 -#define __NR_nanosleep 162 -#define __NR_mremap 163 -#define __NR_query_module 167 -#define __NR_poll 168 -#define __NR_nfsservctl 169 -#define __NR_prctl 172 -#define __NR_rt_sigreturn 173 -#define __NR_rt_sigaction 174 -#define __NR_rt_sigprocmask 175 -#define __NR_rt_sigpending 176 -#define __NR_rt_sigtimedwait 177 -#define __NR_rt_sigqueueinfo 178 -#define __NR_rt_sigsuspend 179 -#define __NR_pread64 180 -#define __NR_pwrite64 181 -#define __NR_getcwd 183 -#define __NR_capget 184 -#define __NR_capset 185 -#define __NR_sigaltstack 186 -#define __NR_sendfile 187 -#define __NR_getpmsg 188 -#define __NR_putpmsg 189 -#define __NR_vfork 190 -#define __NR_pivot_root 217 -#define __NR_mincore 218 -#define __NR_madvise 219 -#define __NR_getdents64 220 -#define __NR_readahead 222 -#define __NR_setxattr 224 -#define __NR_lsetxattr 225 -#define __NR_fsetxattr 226 -#define __NR_getxattr 227 -#define __NR_lgetxattr 228 -#define __NR_fgetxattr 229 -#define __NR_listxattr 230 -#define __NR_llistxattr 231 -#define __NR_flistxattr 232 -#define __NR_removexattr 233 -#define __NR_lremovexattr 234 -#define __NR_fremovexattr 235 -#define __NR_gettid 236 -#define __NR_tkill 237 -#define __NR_futex 238 -#define __NR_sched_setaffinity 239 -#define __NR_sched_getaffinity 240 -#define __NR_tgkill 241 -/* Number 242 is reserved for tux */ -#define __NR_io_setup 243 -#define __NR_io_destroy 244 -#define __NR_io_getevents 245 -#define __NR_io_submit 246 -#define __NR_io_cancel 247 -#define __NR_exit_group 248 -#define __NR_epoll_create 249 -#define __NR_epoll_ctl 250 -#define __NR_epoll_wait 251 -#define __NR_set_tid_address 252 -#define __NR_fadvise64 253 -#define __NR_timer_create 254 -#define __NR_timer_settime 255 -#define __NR_timer_gettime 256 -#define __NR_timer_getoverrun 257 -#define __NR_timer_delete 258 -#define __NR_clock_settime 259 -#define __NR_clock_gettime 260 -#define __NR_clock_getres 261 -#define __NR_clock_nanosleep 262 -/* Number 263 is reserved for vserver */ -#define __NR_statfs64 265 -#define __NR_fstatfs64 266 -#define __NR_remap_file_pages 267 -#define __NR_mbind 268 -#define __NR_get_mempolicy 269 -#define __NR_set_mempolicy 270 -#define __NR_mq_open 271 -#define __NR_mq_unlink 272 -#define __NR_mq_timedsend 273 -#define __NR_mq_timedreceive 274 -#define __NR_mq_notify 275 -#define __NR_mq_getsetattr 276 -#define __NR_kexec_load 277 -#define __NR_add_key 278 -#define __NR_request_key 279 -#define __NR_keyctl 280 -#define __NR_waitid 281 -#define __NR_ioprio_set 282 -#define __NR_ioprio_get 283 -#define __NR_inotify_init 284 -#define __NR_inotify_add_watch 285 -#define __NR_inotify_rm_watch 286 -#define __NR_migrate_pages 287 -#define __NR_openat 288 -#define __NR_mkdirat 289 -#define __NR_mknodat 290 -#define __NR_fchownat 291 -#define __NR_futimesat 292 -#define __NR_unlinkat 294 -#define __NR_renameat 295 -#define __NR_linkat 296 -#define __NR_symlinkat 297 -#define __NR_readlinkat 298 -#define __NR_fchmodat 299 -#define __NR_faccessat 300 -#define __NR_pselect6 301 -#define __NR_ppoll 302 -#define __NR_unshare 303 -#define __NR_set_robust_list 304 -#define __NR_get_robust_list 305 -#define __NR_splice 306 -#define __NR_sync_file_range 307 -#define __NR_tee 308 -#define __NR_vmsplice 309 -#define __NR_move_pages 310 -#define __NR_getcpu 311 -#define __NR_epoll_pwait 312 -#define __NR_utimes 313 -#define __NR_fallocate 314 -#define __NR_utimensat 315 -#define __NR_signalfd 316 -#define __NR_timerfd 317 -#define __NR_eventfd 318 -#define __NR_timerfd_create 319 -#define __NR_timerfd_settime 320 -#define __NR_timerfd_gettime 321 -#define __NR_signalfd4 322 -#define __NR_eventfd2 323 -#define __NR_inotify_init1 324 -#define __NR_pipe2 325 -#define __NR_dup3 326 -#define __NR_epoll_create1 327 -#define __NR_preadv 328 -#define __NR_pwritev 329 -#define __NR_rt_tgsigqueueinfo 330 -#define __NR_perf_event_open 331 -#define __NR_fanotify_init 332 -#define __NR_fanotify_mark 333 -#define __NR_prlimit64 334 -#define __NR_name_to_handle_at 335 -#define __NR_open_by_handle_at 336 -#define __NR_clock_adjtime 337 -#define __NR_syncfs 338 -#define __NR_setns 339 -#define __NR_process_vm_readv 340 -#define __NR_process_vm_writev 341 -#define __NR_s390_runtime_instr 342 -#define __NR_kcmp 343 -#define __NR_finit_module 344 -#define __NR_sched_setattr 345 -#define __NR_sched_getattr 346 -#define __NR_renameat2 347 -#define __NR_seccomp 348 -#define __NR_getrandom 349 -#define __NR_memfd_create 350 -#define __NR_bpf 351 -#define __NR_s390_pci_mmio_write 352 -#define __NR_s390_pci_mmio_read 353 -#define __NR_execveat 354 -#define __NR_userfaultfd 355 -#define __NR_membarrier 356 -#define __NR_recvmmsg 357 -#define __NR_sendmmsg 358 -#define __NR_socket 359 -#define __NR_socketpair 360 -#define __NR_bind 361 -#define __NR_connect 362 -#define __NR_listen 363 -#define __NR_accept4 364 -#define __NR_getsockopt 365 -#define __NR_setsockopt 366 -#define __NR_getsockname 367 -#define __NR_getpeername 368 -#define __NR_sendto 369 -#define __NR_sendmsg 370 -#define __NR_recvfrom 371 -#define __NR_recvmsg 372 -#define __NR_shutdown 373 -#define __NR_mlock2 374 -#define __NR_copy_file_range 375 -#define __NR_preadv2 376 -#define __NR_pwritev2 377 -#define __NR_s390_guarded_storage 378 -#define __NR_statx 379 -#define __NR_s390_sthyi 380 -#define NR_syscalls 381 - -/* - * There are some system calls that are not present on 64 bit, some - * have a different name although they do the same (e.g. __NR_chown32 - * is __NR_chown on 64 bit). - */ -#ifndef __s390x__ - -#define __NR_time 13 -#define __NR_lchown 16 -#define __NR_setuid 23 -#define __NR_getuid 24 -#define __NR_stime 25 -#define __NR_setgid 46 -#define __NR_getgid 47 -#define __NR_geteuid 49 -#define __NR_getegid 50 -#define __NR_setreuid 70 -#define __NR_setregid 71 -#define __NR_getrlimit 76 -#define __NR_getgroups 80 -#define __NR_setgroups 81 -#define __NR_fchown 95 -#define __NR_ioperm 101 -#define __NR_setfsuid 138 -#define __NR_setfsgid 139 -#define __NR__llseek 140 -#define __NR__newselect 142 -#define __NR_setresuid 164 -#define __NR_getresuid 165 -#define __NR_setresgid 170 -#define __NR_getresgid 171 -#define __NR_chown 182 -#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ -#define __NR_mmap2 192 -#define __NR_truncate64 193 -#define __NR_ftruncate64 194 -#define __NR_stat64 195 -#define __NR_lstat64 196 -#define __NR_fstat64 197 -#define __NR_lchown32 198 -#define __NR_getuid32 199 -#define __NR_getgid32 200 -#define __NR_geteuid32 201 -#define __NR_getegid32 202 -#define __NR_setreuid32 203 -#define __NR_setregid32 204 -#define __NR_getgroups32 205 -#define __NR_setgroups32 206 -#define __NR_fchown32 207 -#define __NR_setresuid32 208 -#define __NR_getresuid32 209 -#define __NR_setresgid32 210 -#define __NR_getresgid32 211 -#define __NR_chown32 212 -#define __NR_setuid32 213 -#define __NR_setgid32 214 -#define __NR_setfsuid32 215 -#define __NR_setfsgid32 216 -#define __NR_fcntl64 221 -#define __NR_sendfile64 223 -#define __NR_fadvise64_64 264 -#define __NR_fstatat64 293 - -#else - -#define __NR_select 142 -#define __NR_getrlimit 191 /* SuS compliant getrlimit */ -#define __NR_lchown 198 -#define __NR_getuid 199 -#define __NR_getgid 200 -#define __NR_geteuid 201 -#define __NR_getegid 202 -#define __NR_setreuid 203 -#define __NR_setregid 204 -#define __NR_getgroups 205 -#define __NR_setgroups 206 -#define __NR_fchown 207 -#define __NR_setresuid 208 -#define __NR_getresuid 209 -#define __NR_setresgid 210 -#define __NR_getresgid 211 -#define __NR_chown 212 -#define __NR_setuid 213 -#define __NR_setgid 214 -#define __NR_setfsuid 215 -#define __NR_setfsgid 216 -#define __NR_newfstatat 293 - -#endif - -#endif /* _UAPI_ASM_S390_UNISTD_H_ */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 21ac898df2d8..0dfe4d3f74e2 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 18 /* N 32-bit words worth of info */ +#define NCAPINTS 19 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -203,12 +203,16 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ -#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ +#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ + +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -243,6 +247,7 @@ #define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ @@ -268,6 +273,9 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ +#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -316,6 +324,13 @@ #define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ #define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ + /* * BUG word(s) */ @@ -342,5 +357,7 @@ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index b027633e7300..33833d1909af 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -77,6 +77,7 @@ #define DISABLED_MASK15 0 #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP) #define DISABLED_MASK17 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) +#define DISABLED_MASK18 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index d91ba04dd007..fb3a6de7440b 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -106,6 +106,7 @@ #define REQUIRED_MASK15 0 #define REQUIRED_MASK16 (NEED_LA57) #define REQUIRED_MASK17 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) +#define REQUIRED_MASK18 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 07a6697466ef..c8ec0ae16bf0 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -9,6 +9,35 @@ MAKE = make CFLAGS += -Wall -O2 CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif + +FEATURE_USER = .bpf +FEATURE_TESTS = libbfd disassembler-four-args +FEATURE_DISPLAY = libbfd disassembler-four-args + +check_feat := 1 +NON_CHECK_FEAT_TARGETS := clean bpftool_clean +ifdef MAKECMDGOALS +ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) + check_feat := 0 +endif +endif + +ifeq ($(check_feat),1) +ifeq ($(FEATURES_DUMP),) +include $(srctree)/tools/build/Makefile.feature +else +include $(FEATURES_DUMP) +endif +endif + +ifeq ($(feature-disassembler-four-args), 1) +CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE +endif + %.yacc.c: %.y $(YACC) -o $@ -d $< diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c index 75bf526a0168..58c2bab4ef6e 100644 --- a/tools/bpf/bpf_jit_disasm.c +++ b/tools/bpf/bpf_jit_disasm.c @@ -72,7 +72,14 @@ static void get_asm_insns(uint8_t *image, size_t len, int opcodes) disassemble_init_for_target(&info); +#ifdef DISASM_FOUR_ARGS_SIGNATURE + disassemble = disassembler(info.arch, + bfd_big_endian(bfdf), + info.mach, + bfdf); +#else disassemble = disassembler(bfdf); +#endif assert(disassemble); do { @@ -165,7 +172,8 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen, { char *ptr, *pptr, *tmp; off_t off = 0; - int ret, flen, proglen, pass, ulen = 0; + unsigned int proglen; + int ret, flen, pass, ulen = 0; regmatch_t pmatch[1]; unsigned long base; regex_t regex; @@ -192,7 +200,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen, } ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so); - ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx", + ret = sscanf(ptr, "flen=%d proglen=%u pass=%d image=%lx", &flen, &proglen, &pass, &base); if (ret != 4) { regfree(®ex); @@ -232,7 +240,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen, } assert(ulen == proglen); - printf("%d bytes emitted from JIT compiler (pass:%d, flen:%d)\n", + printf("%u bytes emitted from JIT compiler (pass:%d, flen:%d)\n", proglen, pass, flen); printf("%lx + <x>:\n", base); diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index 37292bb5ce60..a9d47c1558bb 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -3,12 +3,16 @@ include ../../../scripts/utilities.mak INSTALL ?= install RM ?= rm -f +RMDIR ?= rmdir --ignore-fail-on-non-empty -# Make the path relative to DESTDIR, not prefix -ifndef DESTDIR -prefix ?= /usr/local +ifeq ($(V),1) + Q = +else + Q = @ endif -mandir ?= $(prefix)/share/man + +prefix ?= /usr/local +mandir ?= $(prefix)/man man8dir = $(mandir)/man8 MAN8_RST = $(wildcard *.rst) @@ -19,16 +23,27 @@ DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8)) man: man8 man8: $(DOC_MAN8) +RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) + $(OUTPUT)%.8: %.rst - rst2man $< > $@ +ifndef RST2MAN_DEP + $(error "rst2man not found, but required to generate man pages") +endif + $(QUIET_GEN)rst2man $< > $@ clean: - $(call QUIET_CLEAN, Documentation) $(RM) $(DOC_MAN8) + $(call QUIET_CLEAN, Documentation) + $(Q)$(RM) $(DOC_MAN8) install: man - $(call QUIET_INSTALL, Documentation-man) \ - $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir); \ - $(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir); + $(call QUIET_INSTALL, Documentation-man) + $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir) + $(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir) + +uninstall: + $(call QUIET_UNINST, Documentation-man) + $(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8)) + $(Q)$(RMDIR) $(DESTDIR)$(man8dir) -.PHONY: man man8 clean install +.PHONY: man man8 clean install uninstall .DEFAULT_GOAL := man diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst new file mode 100644 index 000000000000..0e4e923235b6 --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -0,0 +1,118 @@ +================ +bpftool-cgroup +================ +------------------------------------------------------------------------------- +tool for inspection and simple manipulation of eBPF progs +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **cgroup** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } + + *COMMANDS* := + { **show** | **list** | **attach** | **detach** | **help** } + +MAP COMMANDS +============= + +| **bpftool** **cgroup { show | list }** *CGROUP* +| **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] +| **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* +| **bpftool** **cgroup help** +| +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } +| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** } +| *ATTACH_FLAGS* := { **multi** | **override** } + +DESCRIPTION +=========== + **bpftool cgroup { show | list }** *CGROUP* + List all programs attached to the cgroup *CGROUP*. + + Output will start with program ID followed by attach type, + attach flags and program name. + + **bpftool cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] + Attach program *PROG* to the cgroup *CGROUP* with attach type + *ATTACH_TYPE* and optional *ATTACH_FLAGS*. + + *ATTACH_FLAGS* can be one of: **override** if a sub-cgroup installs + some bpf program, the program in this cgroup yields to sub-cgroup + program; **multi** if a sub-cgroup installs some bpf program, + that cgroup program gets run in addition to the program in this + cgroup. + + Only one program is allowed to be attached to a cgroup with + no attach flags or the **override** flag. Attaching another + program will release old program and attach the new one. + + Multiple programs are allowed to be attached to a cgroup with + **multi**. They are executed in FIFO order (those that were + attached first, run first). + + Non-default *ATTACH_FLAGS* are supported by kernel version 4.14 + and later. + + *ATTACH_TYPE* can be on of: + **ingress** ingress path of the inet socket (since 4.10); + **egress** egress path of the inet socket (since 4.10); + **sock_create** opening of an inet socket (since 4.10); + **sock_ops** various socket operations (since 4.12); + **device** device access (since 4.15). + + **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* + Detach *PROG* from the cgroup *CGROUP* and attach type + *ATTACH_TYPE*. + + **bpftool prog help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -v, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + + -f, --bpffs + Show file names of pinned programs. + +EXAMPLES +======== +| +| **# mount -t bpf none /sys/fs/bpf/** +| **# mkdir /sys/fs/cgroup/test.slice** +| **# bpftool prog load ./device_cgroup.o /sys/fs/bpf/prog** +| **# bpftool cgroup attach /sys/fs/cgroup/test.slice/ device id 1 allow_multi** + +**# bpftool cgroup list /sys/fs/cgroup/test.slice/** + +:: + + ID AttachType AttachFlags Name + 1 device allow_multi bpf_prog1 + +| +| **# bpftool cgroup detach /sys/fs/cgroup/test.slice/ device id 1** +| **# bpftool cgroup list /sys/fs/cgroup/test.slice/** + +:: + + ID AttachType AttachFlags Name + +SEE ALSO +======== + **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 9f51a268eb06..457e868bd32f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -15,13 +15,13 @@ SYNOPSIS *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := - { **show** | **dump** | **update** | **lookup** | **getnext** | **delete** + { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete** | **pin** | **help** } MAP COMMANDS ============= -| **bpftool** **map show** [*MAP*] +| **bpftool** **map { show | list }** [*MAP*] | **bpftool** **map dump** *MAP* | **bpftool** **map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*] | **bpftool** **map lookup** *MAP* **key** *BYTES* @@ -31,12 +31,13 @@ MAP COMMANDS | **bpftool** **map help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } -| *VALUE* := { *BYTES* | *MAP* | *PROGRAM* } +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } +| *VALUE* := { *BYTES* | *MAP* | *PROG* } | *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } DESCRIPTION =========== - **bpftool map show** [*MAP*] + **bpftool map { show | list }** [*MAP*] Show information about loaded maps. If *MAP* is specified show information only about given map, otherwise list all maps currently loaded on the system. @@ -128,4 +129,4 @@ EXAMPLES SEE ALSO ======== - **bpftool**\ (8), **bpftool-prog**\ (8) + **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 36e8d1c3c40d..e4ceee7f2dff 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -15,22 +15,23 @@ SYNOPSIS *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := - { **show** | **dump xlated** | **dump jited** | **pin** | **help** } + { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | **help** } MAP COMMANDS ============= -| **bpftool** **prog show** [*PROG*] +| **bpftool** **prog { show | list }** [*PROG*] | **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog pin** *PROG* *FILE* +| **bpftool** **prog load** *OBJ* *FILE* | **bpftool** **prog help** | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } DESCRIPTION =========== - **bpftool prog show** [*PROG*] + **bpftool prog { show | list }** [*PROG*] Show information about loaded programs. If *PROG* is specified show information only about given program, otherwise list all programs currently loaded on the system. @@ -57,6 +58,11 @@ DESCRIPTION Note: *FILE* must be located in *bpffs* mount. + **bpftool prog load** *OBJ* *FILE* + Load bpf program from binary *OBJ* and pin as *FILE*. + + Note: *FILE* must be located in *bpffs* mount. + **bpftool prog help** Print short help message. @@ -126,8 +132,10 @@ EXAMPLES | | **# mount -t bpf none /sys/fs/bpf/** | **# bpftool prog pin id 10 /sys/fs/bpf/prog** +| **# bpftool prog load ./my_prog.o /sys/fs/bpf/prog2** | **# ls -l /sys/fs/bpf/** | -rw------- 1 root root 0 Jul 22 01:43 prog +| -rw------- 1 root root 0 Jul 22 01:44 prog2 **# bpftool prog dum jited pinned /sys/fs/bpf/prog opcodes** @@ -147,4 +155,4 @@ EXAMPLES SEE ALSO ======== - **bpftool**\ (8), **bpftool-map**\ (8) + **bpftool**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 926c03d5a8da..20689a321ffe 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -16,17 +16,19 @@ SYNOPSIS **bpftool** **version** - *OBJECT* := { **map** | **program** } + *OBJECT* := { **map** | **program** | **cgroup** } *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } | { **-j** | **--json** } [{ **-p** | **--pretty** }] } *MAP-COMMANDS* := - { **show** | **dump** | **update** | **lookup** | **getnext** | **delete** + { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete** | **pin** | **help** } - *PROG-COMMANDS* := { **show** | **dump jited** | **dump xlated** | **pin** - | **help** } + *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** + | **load** | **help** } + + *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } DESCRIPTION =========== @@ -53,4 +55,4 @@ OPTIONS SEE ALSO ======== - **bpftool-map**\ (8), **bpftool-prog**\ (8) + **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index ec3052c0b004..26901ec87361 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -1,25 +1,10 @@ include ../../scripts/Makefile.include - include ../../scripts/utilities.mak ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) srctree := $(patsubst %/,%,$(dir $(srctree))) -#$(info Determined 'srctree' to be $(srctree)) -endif - -ifneq ($(objtree),) -#$(info Determined 'objtree' to be $(objtree)) -endif - -ifneq ($(OUTPUT),) -#$(info Determined 'OUTPUT' to be $(OUTPUT)) -# Adding $(OUTPUT) as a directory to look for source files, -# because use generated output files as sources dependency -# for flex/bison parsers. -VPATH += $(OUTPUT) -export VPATH endif ifeq ($(V),1) @@ -28,16 +13,18 @@ else Q = @ endif -BPF_DIR = $(srctree)/tools/lib/bpf/ +BPF_DIR = $(srctree)/tools/lib/bpf/ ifneq ($(OUTPUT),) - BPF_PATH=$(OUTPUT) + BPF_PATH = $(OUTPUT) else - BPF_PATH=$(BPF_DIR) + BPF_PATH = $(BPF_DIR) endif LIBBPF = $(BPF_PATH)libbpf.a +BPFTOOL_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion) + $(LIBBPF): FORCE $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) @@ -45,22 +32,50 @@ $(LIBBPF)-clean: $(call QUIET_CLEAN, libbpf) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null -prefix = /usr/local +prefix ?= /usr/local bash_compdir ?= /usr/share/bash-completion/completions CC = gcc CFLAGS += -O2 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ +CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ +CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' LIBS = -lelf -lbfd -lopcodes $(LIBBPF) +INSTALL ?= install +RM ?= rm -f + +FEATURE_USER = .bpftool +FEATURE_TESTS = libbfd disassembler-four-args +FEATURE_DISPLAY = libbfd disassembler-four-args + +check_feat := 1 +NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall +ifdef MAKECMDGOALS +ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) + check_feat := 0 +endif +endif + +ifeq ($(check_feat),1) +ifeq ($(FEATURES_DUMP),) +include $(srctree)/tools/build/Makefile.feature +else +include $(FEATURES_DUMP) +endif +endif + +ifeq ($(feature-disassembler-four-args), 1) +CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE +endif + include $(wildcard *.d) all: $(OUTPUT)bpftool -SRCS=$(wildcard *.c) -OBJS=$(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o +SRCS = $(wildcard *.c) +OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< @@ -73,21 +88,34 @@ $(OUTPUT)%.o: %.c clean: $(LIBBPF)-clean $(call QUIET_CLEAN, bpftool) - $(Q)rm -rf $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d + $(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d + +install: $(OUTPUT)bpftool + $(call QUIET_INSTALL, bpftool) + $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/sbin + $(Q)$(INSTALL) $(OUTPUT)bpftool $(DESTDIR)$(prefix)/sbin/bpftool + $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(bash_compdir) + $(Q)$(INSTALL) -m 0644 bash-completion/bpftool $(DESTDIR)$(bash_compdir) -install: - install -m 0755 -d $(prefix)/sbin - install $(OUTPUT)bpftool $(prefix)/sbin/bpftool - install -m 0755 -d $(bash_compdir) - install -m 0644 bash-completion/bpftool $(bash_compdir) +uninstall: + $(call QUIET_UNINST, bpftool) + $(Q)$(RM) $(DESTDIR)$(prefix)/sbin/bpftool + $(Q)$(RM) $(DESTDIR)$(bash_compdir)/bpftool doc: - $(Q)$(MAKE) -C Documentation/ + $(call descend,Documentation) + +doc-clean: + $(call descend,Documentation,clean) doc-install: - $(Q)$(MAKE) -C Documentation/ install + $(call descend,Documentation,install) + +doc-uninstall: + $(call descend,Documentation,uninstall) FORCE: -.PHONY: all clean FORCE install doc doc-install +.PHONY: all FORCE clean install uninstall +.PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 7febee05c8e7..08719c54a614 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -52,16 +52,24 @@ _bpftool_once_attr() done } -# Takes a list of words in argument; adds them all to COMPREPLY if none of them -# is already present on the command line. Returns no value. -_bpftool_one_of_list() +# Takes a list of words as argument; if any of those words is present on the +# command line, return 0. Otherwise, return 1. +_bpftool_search_list() { local w idx for w in $*; do for (( idx=3; idx < ${#words[@]}-1; idx++ )); do - [[ $w == ${words[idx]} ]] && return 1 + [[ $w == ${words[idx]} ]] && return 0 done done + return 1 +} + +# Takes a list of words in argument; adds them all to COMPREPLY if none of them +# is already present on the command line. Returns no value. +_bpftool_one_of_list() +{ + _bpftool_search_list $* && return 1 COMPREPLY+=( $( compgen -W "$*" -- "$cur" ) ) } @@ -197,7 +205,7 @@ _bpftool() local PROG_TYPE='id pinned tag' case $command in - show) + show|list) [[ $prev != "$command" ]] && return 0 COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) return 0 @@ -230,17 +238,21 @@ _bpftool() fi return 0 ;; + load) + _filedir + return 0 + ;; *) [[ $prev == $object ]] && \ - COMPREPLY=( $( compgen -W 'dump help pin show' -- \ - "$cur" ) ) + COMPREPLY=( $( compgen -W 'dump help pin load \ + show list' -- "$cur" ) ) ;; esac ;; map) local MAP_TYPE='id pinned' case $command in - show|dump) + show|list|dump) case $prev in $command) COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) @@ -343,7 +355,55 @@ _bpftool() *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'delete dump getnext help \ - lookup pin show update' -- "$cur" ) ) + lookup pin show list update' -- "$cur" ) ) + ;; + esac + ;; + cgroup) + case $command in + show|list) + _filedir + return 0 + ;; + attach|detach) + local ATTACH_TYPES='ingress egress sock_create sock_ops \ + device' + local ATTACH_FLAGS='multi override' + local PROG_TYPE='id pinned tag' + case $prev in + $command) + _filedir + return 0 + ;; + ingress|egress|sock_create|sock_ops|device) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ + "$cur" ) ) + return 0 + ;; + id) + _bpftool_get_prog_ids + return 0 + ;; + *) + if ! _bpftool_search_list "$ATTACH_TYPES"; then + COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- \ + "$cur" ) ) + elif [[ "$command" == "attach" ]]; then + # We have an attach type on the command line, + # but it is not the previous word, or + # "id|pinned|tag" (we already checked for + # that). This should only leave the case when + # we need attach flags for "attach" commamnd. + _bpftool_one_of_list "$ATTACH_FLAGS" + fi + return 0 + ;; + esac + ;; + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'help attach detach \ + show list' -- "$cur" ) ) ;; esac ;; diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c new file mode 100644 index 000000000000..cae32a61cb18 --- /dev/null +++ b/tools/bpf/bpftool/cgroup.c @@ -0,0 +1,308 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (C) 2017 Facebook +// Author: Roman Gushchin <guro@fb.com> + +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <bpf.h> + +#include "main.h" + +#define HELP_SPEC_ATTACH_FLAGS \ + "ATTACH_FLAGS := { multi | override }" + +#define HELP_SPEC_ATTACH_TYPES \ + "ATTACH_TYPE := { ingress | egress | sock_create | sock_ops | device }" + +static const char * const attach_type_strings[] = { + [BPF_CGROUP_INET_INGRESS] = "ingress", + [BPF_CGROUP_INET_EGRESS] = "egress", + [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create", + [BPF_CGROUP_SOCK_OPS] = "sock_ops", + [BPF_CGROUP_DEVICE] = "device", + [__MAX_BPF_ATTACH_TYPE] = NULL, +}; + +static enum bpf_attach_type parse_attach_type(const char *str) +{ + enum bpf_attach_type type; + + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + if (attach_type_strings[type] && + is_prefix(str, attach_type_strings[type])) + return type; + } + + return __MAX_BPF_ATTACH_TYPE; +} + +static int show_bpf_prog(int id, const char *attach_type_str, + const char *attach_flags_str) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + int prog_fd; + + prog_fd = bpf_prog_get_fd_by_id(id); + if (prog_fd < 0) + return -1; + + if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) { + close(prog_fd); + return -1; + } + + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "id", info.id); + jsonw_string_field(json_wtr, "attach_type", + attach_type_str); + jsonw_string_field(json_wtr, "attach_flags", + attach_flags_str); + jsonw_string_field(json_wtr, "name", info.name); + jsonw_end_object(json_wtr); + } else { + printf("%-8u %-15s %-15s %-15s\n", info.id, + attach_type_str, + attach_flags_str, + info.name); + } + + close(prog_fd); + return 0; +} + +static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) +{ + __u32 prog_ids[1024] = {0}; + char *attach_flags_str; + __u32 prog_cnt, iter; + __u32 attach_flags; + char buf[32]; + int ret; + + prog_cnt = ARRAY_SIZE(prog_ids); + ret = bpf_prog_query(cgroup_fd, type, 0, &attach_flags, prog_ids, + &prog_cnt); + if (ret) + return ret; + + if (prog_cnt == 0) + return 0; + + switch (attach_flags) { + case BPF_F_ALLOW_MULTI: + attach_flags_str = "multi"; + break; + case BPF_F_ALLOW_OVERRIDE: + attach_flags_str = "override"; + break; + case 0: + attach_flags_str = ""; + break; + default: + snprintf(buf, sizeof(buf), "unknown(%x)", attach_flags); + attach_flags_str = buf; + } + + for (iter = 0; iter < prog_cnt; iter++) + show_bpf_prog(prog_ids[iter], attach_type_strings[type], + attach_flags_str); + + return 0; +} + +static int do_show(int argc, char **argv) +{ + enum bpf_attach_type type; + int cgroup_fd; + int ret = -1; + + if (argc < 1) { + p_err("too few parameters for cgroup show"); + goto exit; + } else if (argc > 1) { + p_err("too many parameters for cgroup show"); + goto exit; + } + + cgroup_fd = open(argv[0], O_RDONLY); + if (cgroup_fd < 0) { + p_err("can't open cgroup %s", argv[1]); + goto exit; + } + + if (json_output) + jsonw_start_array(json_wtr); + else + printf("%-8s %-15s %-15s %-15s\n", "ID", "AttachType", + "AttachFlags", "Name"); + + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + /* + * Not all attach types may be supported, so it's expected, + * that some requests will fail. + * If we were able to get the show for at least one + * attach type, let's return 0. + */ + if (show_attached_bpf_progs(cgroup_fd, type) == 0) + ret = 0; + } + + if (json_output) + jsonw_end_array(json_wtr); + + close(cgroup_fd); +exit: + return ret; +} + +static int do_attach(int argc, char **argv) +{ + enum bpf_attach_type attach_type; + int cgroup_fd, prog_fd; + int attach_flags = 0; + int ret = -1; + int i; + + if (argc < 4) { + p_err("too few parameters for cgroup attach"); + goto exit; + } + + cgroup_fd = open(argv[0], O_RDONLY); + if (cgroup_fd < 0) { + p_err("can't open cgroup %s", argv[1]); + goto exit; + } + + attach_type = parse_attach_type(argv[1]); + if (attach_type == __MAX_BPF_ATTACH_TYPE) { + p_err("invalid attach type"); + goto exit_cgroup; + } + + argc -= 2; + argv = &argv[2]; + prog_fd = prog_parse_fd(&argc, &argv); + if (prog_fd < 0) + goto exit_cgroup; + + for (i = 0; i < argc; i++) { + if (is_prefix(argv[i], "multi")) { + attach_flags |= BPF_F_ALLOW_MULTI; + } else if (is_prefix(argv[i], "override")) { + attach_flags |= BPF_F_ALLOW_OVERRIDE; + } else { + p_err("unknown option: %s", argv[i]); + goto exit_cgroup; + } + } + + if (bpf_prog_attach(prog_fd, cgroup_fd, attach_type, attach_flags)) { + p_err("failed to attach program"); + goto exit_prog; + } + + if (json_output) + jsonw_null(json_wtr); + + ret = 0; + +exit_prog: + close(prog_fd); +exit_cgroup: + close(cgroup_fd); +exit: + return ret; +} + +static int do_detach(int argc, char **argv) +{ + enum bpf_attach_type attach_type; + int prog_fd, cgroup_fd; + int ret = -1; + + if (argc < 4) { + p_err("too few parameters for cgroup detach"); + goto exit; + } + + cgroup_fd = open(argv[0], O_RDONLY); + if (cgroup_fd < 0) { + p_err("can't open cgroup %s", argv[1]); + goto exit; + } + + attach_type = parse_attach_type(argv[1]); + if (attach_type == __MAX_BPF_ATTACH_TYPE) { + p_err("invalid attach type"); + goto exit_cgroup; + } + + argc -= 2; + argv = &argv[2]; + prog_fd = prog_parse_fd(&argc, &argv); + if (prog_fd < 0) + goto exit_cgroup; + + if (bpf_prog_detach2(prog_fd, cgroup_fd, attach_type)) { + p_err("failed to detach program"); + goto exit_prog; + } + + if (json_output) + jsonw_null(json_wtr); + + ret = 0; + +exit_prog: + close(prog_fd); +exit_cgroup: + close(cgroup_fd); +exit: + return ret; +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s %s { show | list } CGROUP\n" + " %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n" + " %s %s detach CGROUP ATTACH_TYPE PROG\n" + " %s %s help\n" + "\n" + " " HELP_SPEC_ATTACH_TYPES "\n" + " " HELP_SPEC_ATTACH_FLAGS "\n" + " " HELP_SPEC_PROGRAM "\n" + " " HELP_SPEC_OPTIONS "\n" + "", + bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "show", do_show }, + { "list", do_show }, + { "attach", do_attach }, + { "detach", do_detach }, + { "help", do_help }, + { 0 } +}; + +int do_cgroup(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 2bd3b280e6dd..0b482c0070e0 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -34,6 +34,7 @@ /* Author: Jakub Kicinski <kubakici@wp.pl> */ #include <errno.h> +#include <fcntl.h> #include <fts.h> #include <libgen.h> #include <mntent.h> @@ -44,7 +45,9 @@ #include <unistd.h> #include <linux/limits.h> #include <linux/magic.h> +#include <net/if.h> #include <sys/mount.h> +#include <sys/stat.h> #include <sys/types.h> #include <sys/vfs.h> @@ -163,13 +166,49 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) return fd; } -int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) +int do_pin_fd(int fd, const char *name) { char err_str[ERR_MAX_LEN]; - unsigned int id; - char *endptr; char *file; char *dir; + int err = 0; + + err = bpf_obj_pin(fd, name); + if (!err) + goto out; + + file = malloc(strlen(name) + 1); + strcpy(file, name); + dir = dirname(file); + + if (errno != EPERM || is_bpffs(dir)) { + p_err("can't pin the object (%s): %s", name, strerror(errno)); + goto out_free; + } + + /* Attempt to mount bpffs, then retry pinning. */ + err = mnt_bpffs(dir, err_str, ERR_MAX_LEN); + if (!err) { + err = bpf_obj_pin(fd, name); + if (err) + p_err("can't pin the object (%s): %s", name, + strerror(errno)); + } else { + err_str[ERR_MAX_LEN - 1] = '\0'; + p_err("can't mount BPF file system to pin the object (%s): %s", + name, err_str); + } + +out_free: + free(file); +out: + return err; +} + +int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) +{ + unsigned int id; + char *endptr; int err; int fd; @@ -195,35 +234,8 @@ int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) return -1; } - err = bpf_obj_pin(fd, *argv); - if (!err) - goto out_close; - - file = malloc(strlen(*argv) + 1); - strcpy(file, *argv); - dir = dirname(file); - - if (errno != EPERM || is_bpffs(dir)) { - p_err("can't pin the object (%s): %s", *argv, strerror(errno)); - goto out_free; - } - - /* Attempt to mount bpffs, then retry pinning. */ - err = mnt_bpffs(dir, err_str, ERR_MAX_LEN); - if (!err) { - err = bpf_obj_pin(fd, *argv); - if (err) - p_err("can't pin the object (%s): %s", *argv, - strerror(errno)); - } else { - err_str[ERR_MAX_LEN - 1] = '\0'; - p_err("can't mount BPF file system to pin the object (%s): %s", - *argv, err_str); - } + err = do_pin_fd(fd, *argv); -out_free: - free(file); -out_close: close(fd); return err; } @@ -403,3 +415,124 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab) free(obj); } } + +static char * +ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf) +{ + struct stat st; + int err; + + err = stat("/proc/self/ns/net", &st); + if (err) { + p_err("Can't stat /proc/self: %s", strerror(errno)); + return NULL; + } + + if (st.st_dev != ns_dev || st.st_ino != ns_ino) + return NULL; + + return if_indextoname(ifindex, buf); +} + +static int read_sysfs_hex_int(char *path) +{ + char vendor_id_buf[8]; + int len; + int fd; + + fd = open(path, O_RDONLY); + if (fd < 0) { + p_err("Can't open %s: %s", path, strerror(errno)); + return -1; + } + + len = read(fd, vendor_id_buf, sizeof(vendor_id_buf)); + close(fd); + if (len < 0) { + p_err("Can't read %s: %s", path, strerror(errno)); + return -1; + } + if (len >= (int)sizeof(vendor_id_buf)) { + p_err("Value in %s too long", path); + return -1; + } + + vendor_id_buf[len] = 0; + + return strtol(vendor_id_buf, NULL, 0); +} + +static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name) +{ + char full_path[64]; + + snprintf(full_path, sizeof(full_path), "/sys/class/net/%s/device/%s", + devname, entry_name); + + return read_sysfs_hex_int(full_path); +} + +const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino) +{ + char devname[IF_NAMESIZE]; + int vendor_id; + int device_id; + + if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) { + p_err("Can't get net device name for ifindex %d: %s", ifindex, + strerror(errno)); + return NULL; + } + + vendor_id = read_sysfs_netdev_hex_int(devname, "vendor"); + if (vendor_id < 0) { + p_err("Can't get device vendor id for %s", devname); + return NULL; + } + + switch (vendor_id) { + case 0x19ee: + device_id = read_sysfs_netdev_hex_int(devname, "device"); + if (device_id != 0x4000 && + device_id != 0x6000 && + device_id != 0x6003) + p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch"); + return "NFP-6xxx"; + default: + p_err("Can't get bfd arch name for device vendor id 0x%04x", + vendor_id); + return NULL; + } +} + +void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode) +{ + char name[IF_NAMESIZE]; + + if (!ifindex) + return; + + printf(" dev "); + if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name)) + printf("%s", name); + else + printf("ifindex %u ns_dev %llu ns_ino %llu", + ifindex, ns_dev, ns_inode); +} + +void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode) +{ + char name[IF_NAMESIZE]; + + if (!ifindex) + return; + + jsonw_name(json_wtr, "dev"); + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "ifindex", ifindex); + jsonw_uint_field(json_wtr, "ns_dev", ns_dev); + jsonw_uint_field(json_wtr, "ns_inode", ns_inode); + if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name)) + jsonw_string_field(json_wtr, "ifname", name); + jsonw_end_object(json_wtr); +} diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index 1551d3918d4c..87439320ef70 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -76,7 +76,8 @@ static int fprintf_json(void *out, const char *fmt, ...) return 0; } -void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) +void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch) { disassembler_ftype disassemble; struct disassemble_info info; @@ -100,6 +101,19 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) else init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf); + + /* Update architecture info for offload. */ + if (arch) { + const bfd_arch_info_type *inf = bfd_scan_arch(arch); + + if (inf) { + bfdf->arch_info = inf; + } else { + p_err("No libfd support for %s", arch); + return; + } + } + info.arch = bfd_get_arch(bfdf); info.mach = bfd_get_mach(bfdf); info.buffer = image; @@ -107,7 +121,14 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) disassemble_init_for_target(&info); +#ifdef DISASM_FOUR_ARGS_SIGNATURE + disassemble = disassembler(info.arch, + bfd_big_endian(bfdf), + info.mach, + bfdf); +#else disassemble = disassembler(bfdf); +#endif assert(disassemble); if (json_output) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index d294bc8168be..3a0396d87c42 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -38,7 +38,6 @@ #include <errno.h> #include <getopt.h> #include <linux/bpf.h> -#include <linux/version.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -85,7 +84,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map }\n" + " OBJECT := { prog | map | cgroup }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -95,21 +94,13 @@ static int do_help(int argc, char **argv) static int do_version(int argc, char **argv) { - unsigned int version[3]; - - version[0] = LINUX_VERSION_CODE >> 16; - version[1] = LINUX_VERSION_CODE >> 8 & 0xf; - version[2] = LINUX_VERSION_CODE & 0xf; - if (json_output) { jsonw_start_object(json_wtr); jsonw_name(json_wtr, "version"); - jsonw_printf(json_wtr, "\"%u.%u.%u\"", - version[0], version[1], version[2]); + jsonw_printf(json_wtr, "\"%s\"", BPFTOOL_VERSION); jsonw_end_object(json_wtr); } else { - printf("%s v%u.%u.%u\n", bin_name, - version[0], version[1], version[2]); + printf("%s v%s\n", bin_name, BPFTOOL_VERSION); } return 0; } @@ -173,6 +164,7 @@ static const struct cmd cmds[] = { { "batch", do_batch }, { "prog", do_prog }, { "map", do_map }, + { "cgroup", do_cgroup }, { "version", do_version }, { 0 } }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index bff330b49791..b8e9584d6246 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -96,6 +96,8 @@ struct pinned_obj { int build_pinned_obj_table(struct pinned_obj_table *table, enum bpf_obj_type type); void delete_pinned_obj_table(struct pinned_obj_table *tab); +void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); +void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); struct cmd { const char *cmd; @@ -111,13 +113,18 @@ char *get_fdinfo(int fd, const char *key); int open_obj_pinned(char *path); int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)); +int do_pin_fd(int fd, const char *name); int do_prog(int argc, char **arg); int do_map(int argc, char **arg); +int do_cgroup(int argc, char **arg); int prog_parse_fd(int *argc, char ***argv); -void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes); +void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch); void print_hex_data_json(uint8_t *data, size_t len); +const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); + #endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index a8c3a33dd185..f95fa67bb498 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -66,6 +66,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", [BPF_MAP_TYPE_DEVMAP] = "devmap", [BPF_MAP_TYPE_SOCKMAP] = "sockmap", + [BPF_MAP_TYPE_CPUMAP] = "cpumap", }; static unsigned int get_possible_cpus(void) @@ -428,6 +429,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_name(json_wtr, "flags"); jsonw_printf(json_wtr, "%#x", info->map_flags); + + print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); + jsonw_uint_field(json_wtr, "bytes_key", info->key_size); jsonw_uint_field(json_wtr, "bytes_value", info->value_size); jsonw_uint_field(json_wtr, "max_entries", info->max_entries); @@ -469,7 +473,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (*info->name) printf("name %s ", info->name); - printf("flags 0x%x\n", info->map_flags); + printf("flags 0x%x", info->map_flags); + print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); + printf("\n"); printf("\tkey %uB value %uB max_entries %u", info->key_size, info->value_size, info->max_entries); @@ -861,7 +867,7 @@ static int do_help(int argc, char **argv) } fprintf(stderr, - "Usage: %s %s show [MAP]\n" + "Usage: %s %s { show | list } [MAP]\n" " %s %s dump MAP\n" " %s %s update MAP key BYTES value VALUE [UPDATE_FLAGS]\n" " %s %s lookup MAP key BYTES\n" @@ -885,6 +891,7 @@ static int do_help(int argc, char **argv) static const struct cmd cmds[] = { { "show", do_show }, + { "list", do_show }, { "help", do_help }, { "dump", do_dump }, { "update", do_update }, diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index dded77345bfb..e8e2baaf93c2 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -45,6 +45,7 @@ #include <sys/stat.h> #include <bpf.h> +#include <libbpf.h> #include "main.h" #include "disasm.h" @@ -65,6 +66,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", [BPF_PROG_TYPE_SK_SKB] = "sk_skb", + [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", }; static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) @@ -229,6 +231,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) info->tag[0], info->tag[1], info->tag[2], info->tag[3], info->tag[4], info->tag[5], info->tag[6], info->tag[7]); + print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); + if (info->load_time) { char buf[32]; @@ -286,6 +290,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) printf("tag "); fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); + print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); printf("\n"); if (info->load_time) { @@ -402,6 +407,88 @@ static int do_show(int argc, char **argv) return err; } +#define SYM_MAX_NAME 256 + +struct kernel_sym { + unsigned long address; + char name[SYM_MAX_NAME]; +}; + +struct dump_data { + unsigned long address_call_base; + struct kernel_sym *sym_mapping; + __u32 sym_count; + char scratch_buff[SYM_MAX_NAME]; +}; + +static int kernel_syms_cmp(const void *sym_a, const void *sym_b) +{ + return ((struct kernel_sym *)sym_a)->address - + ((struct kernel_sym *)sym_b)->address; +} + +static void kernel_syms_load(struct dump_data *dd) +{ + struct kernel_sym *sym; + char buff[256]; + void *tmp, *address; + FILE *fp; + + fp = fopen("/proc/kallsyms", "r"); + if (!fp) + return; + + while (!feof(fp)) { + if (!fgets(buff, sizeof(buff), fp)) + break; + tmp = realloc(dd->sym_mapping, + (dd->sym_count + 1) * + sizeof(*dd->sym_mapping)); + if (!tmp) { +out: + free(dd->sym_mapping); + dd->sym_mapping = NULL; + fclose(fp); + return; + } + dd->sym_mapping = tmp; + sym = &dd->sym_mapping[dd->sym_count]; + if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2) + continue; + sym->address = (unsigned long)address; + if (!strcmp(sym->name, "__bpf_call_base")) { + dd->address_call_base = sym->address; + /* sysctl kernel.kptr_restrict was set */ + if (!sym->address) + goto out; + } + if (sym->address) + dd->sym_count++; + } + + fclose(fp); + + qsort(dd->sym_mapping, dd->sym_count, + sizeof(*dd->sym_mapping), kernel_syms_cmp); +} + +static void kernel_syms_destroy(struct dump_data *dd) +{ + free(dd->sym_mapping); +} + +static struct kernel_sym *kernel_syms_search(struct dump_data *dd, + unsigned long key) +{ + struct kernel_sym sym = { + .address = key, + }; + + return dd->sym_mapping ? + bsearch(&sym, dd->sym_mapping, dd->sym_count, + sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL; +} + static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...) { va_list args; @@ -411,8 +498,71 @@ static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...) va_end(args); } -static void dump_xlated_plain(void *buf, unsigned int len, bool opcodes) +static const char *print_call_pcrel(struct dump_data *dd, + struct kernel_sym *sym, + unsigned long address, + const struct bpf_insn *insn) +{ + if (sym) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%+d#%s", insn->off, sym->name); + else + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%+d#0x%lx", insn->off, address); + return dd->scratch_buff; +} + +static const char *print_call_helper(struct dump_data *dd, + struct kernel_sym *sym, + unsigned long address) +{ + if (sym) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%s", sym->name); + else + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "0x%lx", address); + return dd->scratch_buff; +} + +static const char *print_call(void *private_data, + const struct bpf_insn *insn) +{ + struct dump_data *dd = private_data; + unsigned long address = dd->address_call_base + insn->imm; + struct kernel_sym *sym; + + sym = kernel_syms_search(dd, address); + if (insn->src_reg == BPF_PSEUDO_CALL) + return print_call_pcrel(dd, sym, address, insn); + else + return print_call_helper(dd, sym, address); +} + +static const char *print_imm(void *private_data, + const struct bpf_insn *insn, + __u64 full_imm) +{ + struct dump_data *dd = private_data; + + if (insn->src_reg == BPF_PSEUDO_MAP_FD) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "map[id:%u]", insn->imm); + else + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "0x%llx", (unsigned long long)full_imm); + return dd->scratch_buff; +} + +static void dump_xlated_plain(struct dump_data *dd, void *buf, + unsigned int len, bool opcodes) { + const struct bpf_insn_cbs cbs = { + .cb_print = print_insn, + .cb_call = print_call, + .cb_imm = print_imm, + .private_data = dd, + }; struct bpf_insn *insn = buf; bool double_insn = false; unsigned int i; @@ -426,7 +576,7 @@ static void dump_xlated_plain(void *buf, unsigned int len, bool opcodes) double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); printf("% 4d: ", i); - print_bpf_insn(print_insn, NULL, insn + i, true); + print_bpf_insn(&cbs, NULL, insn + i, true); if (opcodes) { printf(" "); @@ -455,8 +605,15 @@ static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...) va_end(args); } -static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) +static void dump_xlated_json(struct dump_data *dd, void *buf, + unsigned int len, bool opcodes) { + const struct bpf_insn_cbs cbs = { + .cb_print = print_insn_json, + .cb_call = print_call, + .cb_imm = print_imm, + .private_data = dd, + }; struct bpf_insn *insn = buf; bool double_insn = false; unsigned int i; @@ -471,7 +628,7 @@ static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) jsonw_start_object(json_wtr); jsonw_name(json_wtr, "disasm"); - print_bpf_insn(print_insn_json, NULL, insn + i, true); + print_bpf_insn(&cbs, NULL, insn + i, true); if (opcodes) { jsonw_name(json_wtr, "opcodes"); @@ -506,6 +663,7 @@ static void dump_xlated_json(void *buf, unsigned int len, bool opcodes) static int do_dump(int argc, char **argv) { struct bpf_prog_info info = {}; + struct dump_data dd = {}; __u32 len = sizeof(info); unsigned int buf_size; char *filepath = NULL; @@ -593,6 +751,14 @@ static int do_dump(int argc, char **argv) goto err_free; } + if ((member_len == &info.jited_prog_len && + info.jited_prog_insns == 0) || + (member_len == &info.xlated_prog_len && + info.xlated_prog_insns == 0)) { + p_err("error retrieving insn dump: kernel.kptr_restrict set?"); + goto err_free; + } + if (filepath) { fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd < 0) { @@ -609,17 +775,29 @@ static int do_dump(int argc, char **argv) goto err_free; } } else { - if (member_len == &info.jited_prog_len) - disasm_print_insn(buf, *member_len, opcodes); - else + if (member_len == &info.jited_prog_len) { + const char *name = NULL; + + if (info.ifindex) { + name = ifindex_to_bfd_name_ns(info.ifindex, + info.netns_dev, + info.netns_ino); + if (!name) + goto err_free; + } + + disasm_print_insn(buf, *member_len, opcodes, name); + } else { + kernel_syms_load(&dd); if (json_output) - dump_xlated_json(buf, *member_len, opcodes); + dump_xlated_json(&dd, buf, *member_len, opcodes); else - dump_xlated_plain(buf, *member_len, opcodes); + dump_xlated_plain(&dd, buf, *member_len, opcodes); + kernel_syms_destroy(&dd); + } } free(buf); - return 0; err_free: @@ -637,6 +815,30 @@ static int do_pin(int argc, char **argv) return err; } +static int do_load(int argc, char **argv) +{ + struct bpf_object *obj; + int prog_fd; + + if (argc != 2) + usage(); + + if (bpf_prog_load(argv[0], BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) { + p_err("failed to load program"); + return -1; + } + + if (do_pin_fd(prog_fd, argv[1])) { + p_err("failed to pin program"); + return -1; + } + + if (json_output) + jsonw_null(json_wtr); + + return 0; +} + static int do_help(int argc, char **argv) { if (json_output) { @@ -645,26 +847,29 @@ static int do_help(int argc, char **argv) } fprintf(stderr, - "Usage: %s %s show [PROG]\n" + "Usage: %s %s { show | list } [PROG]\n" " %s %s dump xlated PROG [{ file FILE | opcodes }]\n" " %s %s dump jited PROG [{ file FILE | opcodes }]\n" " %s %s pin PROG FILE\n" + " %s %s load OBJ FILE\n" " %s %s help\n" "\n" " " HELP_SPEC_PROGRAM "\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2], bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); return 0; } static const struct cmd cmds[] = { { "show", do_show }, + { "list", do_show }, { "help", do_help }, { "dump", do_dump }, { "pin", do_pin }, + { "load", do_load }, { 0 } }; diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 59585fe20221..0a490cb15149 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -13,6 +13,7 @@ FILES= \ test-hello.bin \ test-libaudit.bin \ test-libbfd.bin \ + test-disassembler-four-args.bin \ test-liberty.bin \ test-liberty-z.bin \ test-cplus-demangle.bin \ @@ -197,6 +198,9 @@ $(OUTPUT)test-libpython-version.bin: $(OUTPUT)test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl +$(OUTPUT)test-disassembler-four-args.bin: + $(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes + $(OUTPUT)test-liberty.bin: $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty diff --git a/tools/build/feature/test-disassembler-four-args.c b/tools/build/feature/test-disassembler-four-args.c new file mode 100644 index 000000000000..45ce65cfddf0 --- /dev/null +++ b/tools/build/feature/test-disassembler-four-args.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <bfd.h> +#include <dis-asm.h> + +int main(void) +{ + bfd *abfd = bfd_openr(NULL, NULL); + + disassembler(bfd_get_arch(abfd), + bfd_big_endian(abfd), + bfd_get_mach(abfd), + abfd); + + return 0; +} diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 1c14c2595158..dac4d4131d9b 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -14,6 +14,7 @@ #include <unistd.h> #include <stdlib.h> #include <stdbool.h> +#include <stdint.h> #include <stdio.h> #include <dirent.h> #include <errno.h> @@ -23,12 +24,13 @@ #include <getopt.h> #include <inttypes.h> #include <sys/ioctl.h> +#include <sys/types.h> #include <linux/gpio.h> int monitor_device(const char *device_name, unsigned int line, - u_int32_t handleflags, - u_int32_t eventflags, + uint32_t handleflags, + uint32_t eventflags, unsigned int loops) { struct gpioevent_request req; @@ -145,8 +147,8 @@ int main(int argc, char **argv) const char *device_name = NULL; unsigned int line = -1; unsigned int loops = 0; - u_int32_t handleflags = GPIOHANDLE_REQUEST_INPUT; - u_int32_t eventflags = 0; + uint32_t handleflags = GPIOHANDLE_REQUEST_INPUT; + uint32_t eventflags = 0; int c; while ((c = getopt(argc, argv, "c:n:o:dsrf?")) != -1) { diff --git a/tools/hv/Makefile b/tools/hv/Makefile index 31503819454d..1139d71fa0cf 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -7,9 +7,30 @@ CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS) CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include -all: hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon +sbindir ?= /usr/sbin +libexecdir ?= /usr/libexec +sharedstatedir ?= /var/lib + +ALL_PROGRAMS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon + +ALL_SCRIPTS := hv_get_dhcp_info.sh hv_get_dns_info.sh hv_set_ifconfig.sh + +all: $(ALL_PROGRAMS) + %: %.c $(CC) $(CFLAGS) -o $@ $^ clean: $(RM) hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon + +install: all + install -d -m 755 $(DESTDIR)$(sbindir); \ + install -d -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd; \ + install -d -m 755 $(DESTDIR)$(sharedstatedir); \ + for program in $(ALL_PROGRAMS); do \ + install $$program -m 755 $(DESTDIR)$(sbindir); \ + done; \ + install -m 755 lsvmbus $(DESTDIR)$(sbindir); \ + for script in $(ALL_SCRIPTS); do \ + install $$script -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd/$${script%.sh}; \ + done diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h index 9311fadaaab2..16ed1982cb34 100644 --- a/tools/include/asm-generic/bitops/find.h +++ b/tools/include/asm-generic/bitops/find.h @@ -16,6 +16,22 @@ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset); #endif +#ifndef find_next_and_bit +/** + * find_next_and_bit - find the next set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +extern unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset); +#endif + #ifndef find_next_zero_bit /** diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index ac3c6503ca27..536ee4febd74 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -86,6 +86,62 @@ enum i915_mocs_table_index { I915_MOCS_CACHED, }; +/* + * Different engines serve different roles, and there may be more than one + * engine serving each role. enum drm_i915_gem_engine_class provides a + * classification of the role of the engine, which may be used when requesting + * operations to be performed on a certain subset of engines, or for providing + * information about that group. + */ +enum drm_i915_gem_engine_class { + I915_ENGINE_CLASS_RENDER = 0, + I915_ENGINE_CLASS_COPY = 1, + I915_ENGINE_CLASS_VIDEO = 2, + I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, + + I915_ENGINE_CLASS_INVALID = -1 +}; + +/** + * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915 + * + */ + +enum drm_i915_pmu_engine_sample { + I915_SAMPLE_BUSY = 0, + I915_SAMPLE_WAIT = 1, + I915_SAMPLE_SEMA = 2 +}; + +#define I915_PMU_SAMPLE_BITS (4) +#define I915_PMU_SAMPLE_MASK (0xf) +#define I915_PMU_SAMPLE_INSTANCE_BITS (8) +#define I915_PMU_CLASS_SHIFT \ + (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS) + +#define __I915_PMU_ENGINE(class, instance, sample) \ + ((class) << I915_PMU_CLASS_SHIFT | \ + (instance) << I915_PMU_SAMPLE_BITS | \ + (sample)) + +#define I915_PMU_ENGINE_BUSY(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY) + +#define I915_PMU_ENGINE_WAIT(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT) + +#define I915_PMU_ENGINE_SEMA(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) + +#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) + +#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) +#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) +#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) +#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) + +#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use @@ -450,6 +506,27 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 +/* + * Query whether every context (both per-file default and user created) is + * isolated (insofar as HW supports). If this parameter is not true, then + * freshly created contexts may inherit values from an existing context, + * rather than default HW values. If true, it also ensures (insofar as HW + * supports) that all state set by this context will not leak to any other + * context. + * + * As not every engine across every gen support contexts, the returned + * value reports the support of context isolation for individual engines by + * returning a bitmask of each engine class set to true if that class supports + * isolation. + */ +#define I915_PARAM_HAS_CONTEXT_ISOLATION 50 + +/* Frequency of the command streamer timestamps given by the *_TIMESTAMP + * registers. This used to be fixed per platform but from CNL onwards, this + * might vary depending on the parts. + */ +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51 + typedef struct drm_i915_getparam { __s32 param; /* diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4c223ab30293..db6bdc375126 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -17,7 +17,7 @@ #define BPF_ALU64 0x07 /* alu mode in double word width */ /* ld/ldx fields */ -#define BPF_DW 0x18 /* double word */ +#define BPF_DW 0x18 /* double word (64-bit) */ #define BPF_XADD 0xc0 /* exclusive add */ /* alu/jmp fields */ @@ -197,8 +197,14 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 +/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative + * offset to another bpf function + */ +#define BPF_PSEUDO_CALL 1 + /* flags for BPF_MAP_UPDATE_ELEM command */ #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ @@ -239,6 +245,7 @@ union bpf_attr { * BPF_F_NUMA_NODE is set). */ char map_name[BPF_OBJ_NAME_LEN]; + __u32 map_ifindex; /* ifindex of netdev to create on */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -635,6 +642,14 @@ union bpf_attr { * @optlen: length of optval in bytes * Return: 0 or negative error * + * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags) + * Set callback flags for sock_ops + * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct + * @flags: flags value + * Return: 0 for no error + * -EINVAL if there is no full tcp socket + * bits in flags that are not supported by current kernel + * * int bpf_skb_adjust_room(skb, len_diff, mode, flags) * Grow or shrink room in sk_buff. * @skb: pointer to skb @@ -677,6 +692,10 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code + * + * int bpf_override_return(pt_regs, rc) + * @pt_regs: pointer to struct pt_regs + * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -736,7 +755,9 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), + FN(getsockopt), \ + FN(override_return), \ + FN(sock_ops_cb_flags_set), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -888,6 +909,9 @@ struct xdp_md { __u32 data; __u32 data_end; __u32 data_meta; + /* Below access go through struct xdp_rxq_info */ + __u32 ingress_ifindex; /* rxq->dev->ifindex */ + __u32 rx_queue_index; /* rxq->queue_index */ }; enum sk_action { @@ -910,6 +934,9 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); struct bpf_map_info { @@ -920,6 +947,9 @@ struct bpf_map_info { __u32 max_entries; __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops @@ -931,8 +961,9 @@ struct bpf_map_info { struct bpf_sock_ops { __u32 op; union { - __u32 reply; - __u32 replylong[4]; + __u32 args[4]; /* Optionally passed to bpf program */ + __u32 reply; /* Returned by bpf program */ + __u32 replylong[4]; /* Optionally returned by bpf prog */ }; __u32 family; __u32 remote_ip4; /* Stored in network byte order */ @@ -941,8 +972,45 @@ struct bpf_sock_ops { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + __u32 is_fullsock; /* Some TCP fields are only valid if + * there is a full socket. If not, the + * fields read as zero. + */ + __u32 snd_cwnd; + __u32 srtt_us; /* Averaged RTT << 3 in usecs */ + __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ + __u32 state; + __u32 rtt_min; + __u32 snd_ssthresh; + __u32 rcv_nxt; + __u32 snd_nxt; + __u32 snd_una; + __u32 mss_cache; + __u32 ecn_flags; + __u32 rate_delivered; + __u32 rate_interval_us; + __u32 packets_out; + __u32 retrans_out; + __u32 total_retrans; + __u32 segs_in; + __u32 data_segs_in; + __u32 segs_out; + __u32 data_segs_out; + __u32 lost_out; + __u32 sacked_out; + __u32 sk_txhash; + __u64 bytes_received; + __u64 bytes_acked; }; +/* Definitions for bpf_sock_ops_cb_flags */ +#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) +#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) +#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently + * supported cb flags + */ + /* List of known BPF sock_ops operators. * New entries can only be added at the end */ @@ -976,6 +1044,43 @@ enum { * a congestion threshold. RTTs above * this indicate congestion */ + BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered. + * Arg1: value of icsk_retransmits + * Arg2: value of icsk_rto + * Arg3: whether RTO has expired + */ + BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted. + * Arg1: sequence number of 1st byte + * Arg2: # segments + * Arg3: return value of + * tcp_transmit_skb (0 => success) + */ + BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state. + * Arg1: old_state + * Arg2: new_state + */ +}; + +/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect + * changes between the TCP and BPF versions. Ideally this should never happen. + * If it does, we need to add code to convert them before calling + * the BPF sock_ops function. + */ +enum { + BPF_TCP_ESTABLISHED = 1, + BPF_TCP_SYN_SENT, + BPF_TCP_SYN_RECV, + BPF_TCP_FIN_WAIT1, + BPF_TCP_FIN_WAIT2, + BPF_TCP_TIME_WAIT, + BPF_TCP_CLOSE, + BPF_TCP_CLOSE_WAIT, + BPF_TCP_LAST_ACK, + BPF_TCP_LISTEN, + BPF_TCP_CLOSING, /* Now a valid state */ + BPF_TCP_NEW_SYN_RECV, + + BPF_TCP_MAX_STATES /* Leave at the end! */ }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ @@ -995,7 +1100,8 @@ struct bpf_perf_event_value { #define BPF_DEVCG_DEV_CHAR (1ULL << 1) struct bpf_cgroup_dev_ctx { - __u32 access_type; /* (access << 16) | type */ + /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ + __u32 access_type; __u32 major; __u32 minor; }; diff --git a/tools/include/uapi/linux/bpf_common.h b/tools/include/uapi/linux/bpf_common.h index 18be90725ab0..ee97668bdadb 100644 --- a/tools/include/uapi/linux/bpf_common.h +++ b/tools/include/uapi/linux/bpf_common.h @@ -15,9 +15,10 @@ /* ld/ldx fields */ #define BPF_SIZE(code) ((code) & 0x18) -#define BPF_W 0x00 -#define BPF_H 0x08 -#define BPF_B 0x10 +#define BPF_W 0x00 /* 32-bit */ +#define BPF_H 0x08 /* 16-bit */ +#define BPF_B 0x10 /* 8-bit */ +/* eBPF BPF_DW 0x18 64-bit */ #define BPF_MODE(code) ((code) & 0xe0) #define BPF_IMM 0x00 #define BPF_ABS 0x20 diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h new file mode 100644 index 000000000000..6d9447700e18 --- /dev/null +++ b/tools/include/uapi/linux/if_link.h @@ -0,0 +1,944 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_IF_LINK_H +#define _UAPI_LINUX_IF_LINK_H + +#include <linux/types.h> +#include <linux/netlink.h> + +/* This struct should be in sync with struct rtnl_link_stats64 */ +struct rtnl_link_stats { + __u32 rx_packets; /* total packets received */ + __u32 tx_packets; /* total packets transmitted */ + __u32 rx_bytes; /* total bytes received */ + __u32 tx_bytes; /* total bytes transmitted */ + __u32 rx_errors; /* bad packets received */ + __u32 tx_errors; /* packet transmit problems */ + __u32 rx_dropped; /* no space in linux buffers */ + __u32 tx_dropped; /* no space available in linux */ + __u32 multicast; /* multicast packets received */ + __u32 collisions; + + /* detailed rx_errors: */ + __u32 rx_length_errors; + __u32 rx_over_errors; /* receiver ring buff overflow */ + __u32 rx_crc_errors; /* recved pkt with crc error */ + __u32 rx_frame_errors; /* recv'd frame alignment error */ + __u32 rx_fifo_errors; /* recv'r fifo overrun */ + __u32 rx_missed_errors; /* receiver missed packet */ + + /* detailed tx_errors */ + __u32 tx_aborted_errors; + __u32 tx_carrier_errors; + __u32 tx_fifo_errors; + __u32 tx_heartbeat_errors; + __u32 tx_window_errors; + + /* for cslip etc */ + __u32 rx_compressed; + __u32 tx_compressed; + + __u32 rx_nohandler; /* dropped, no handler found */ +}; + +/* The main device statistics structure */ +struct rtnl_link_stats64 { + __u64 rx_packets; /* total packets received */ + __u64 tx_packets; /* total packets transmitted */ + __u64 rx_bytes; /* total bytes received */ + __u64 tx_bytes; /* total bytes transmitted */ + __u64 rx_errors; /* bad packets received */ + __u64 tx_errors; /* packet transmit problems */ + __u64 rx_dropped; /* no space in linux buffers */ + __u64 tx_dropped; /* no space available in linux */ + __u64 multicast; /* multicast packets received */ + __u64 collisions; + + /* detailed rx_errors: */ + __u64 rx_length_errors; + __u64 rx_over_errors; /* receiver ring buff overflow */ + __u64 rx_crc_errors; /* recved pkt with crc error */ + __u64 rx_frame_errors; /* recv'd frame alignment error */ + __u64 rx_fifo_errors; /* recv'r fifo overrun */ + __u64 rx_missed_errors; /* receiver missed packet */ + + /* detailed tx_errors */ + __u64 tx_aborted_errors; + __u64 tx_carrier_errors; + __u64 tx_fifo_errors; + __u64 tx_heartbeat_errors; + __u64 tx_window_errors; + + /* for cslip etc */ + __u64 rx_compressed; + __u64 tx_compressed; + + __u64 rx_nohandler; /* dropped, no handler found */ +}; + +/* The struct should be in sync with struct ifmap */ +struct rtnl_link_ifmap { + __u64 mem_start; + __u64 mem_end; + __u64 base_addr; + __u16 irq; + __u8 dma; + __u8 port; +}; + +/* + * IFLA_AF_SPEC + * Contains nested attributes for address family specific attributes. + * Each address family may create a attribute with the address family + * number as type and create its own attribute structure in it. + * + * Example: + * [IFLA_AF_SPEC] = { + * [AF_INET] = { + * [IFLA_INET_CONF] = ..., + * }, + * [AF_INET6] = { + * [IFLA_INET6_FLAGS] = ..., + * [IFLA_INET6_CONF] = ..., + * } + * } + */ + +enum { + IFLA_UNSPEC, + IFLA_ADDRESS, + IFLA_BROADCAST, + IFLA_IFNAME, + IFLA_MTU, + IFLA_LINK, + IFLA_QDISC, + IFLA_STATS, + IFLA_COST, +#define IFLA_COST IFLA_COST + IFLA_PRIORITY, +#define IFLA_PRIORITY IFLA_PRIORITY + IFLA_MASTER, +#define IFLA_MASTER IFLA_MASTER + IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ +#define IFLA_WIRELESS IFLA_WIRELESS + IFLA_PROTINFO, /* Protocol specific information for a link */ +#define IFLA_PROTINFO IFLA_PROTINFO + IFLA_TXQLEN, +#define IFLA_TXQLEN IFLA_TXQLEN + IFLA_MAP, +#define IFLA_MAP IFLA_MAP + IFLA_WEIGHT, +#define IFLA_WEIGHT IFLA_WEIGHT + IFLA_OPERSTATE, + IFLA_LINKMODE, + IFLA_LINKINFO, +#define IFLA_LINKINFO IFLA_LINKINFO + IFLA_NET_NS_PID, + IFLA_IFALIAS, + IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */ + IFLA_VFINFO_LIST, + IFLA_STATS64, + IFLA_VF_PORTS, + IFLA_PORT_SELF, + IFLA_AF_SPEC, + IFLA_GROUP, /* Group the device belongs to */ + IFLA_NET_NS_FD, + IFLA_EXT_MASK, /* Extended info mask, VFs, etc */ + IFLA_PROMISCUITY, /* Promiscuity count: > 0 means acts PROMISC */ +#define IFLA_PROMISCUITY IFLA_PROMISCUITY + IFLA_NUM_TX_QUEUES, + IFLA_NUM_RX_QUEUES, + IFLA_CARRIER, + IFLA_PHYS_PORT_ID, + IFLA_CARRIER_CHANGES, + IFLA_PHYS_SWITCH_ID, + IFLA_LINK_NETNSID, + IFLA_PHYS_PORT_NAME, + IFLA_PROTO_DOWN, + IFLA_GSO_MAX_SEGS, + IFLA_GSO_MAX_SIZE, + IFLA_PAD, + IFLA_XDP, + IFLA_EVENT, + IFLA_NEW_NETNSID, + IFLA_IF_NETNSID, + IFLA_CARRIER_UP_COUNT, + IFLA_CARRIER_DOWN_COUNT, + IFLA_NEW_IFINDEX, + __IFLA_MAX +}; + + +#define IFLA_MAX (__IFLA_MAX - 1) + +/* backwards compatibility for userspace */ +#ifndef __KERNEL__ +#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) +#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg)) +#endif + +enum { + IFLA_INET_UNSPEC, + IFLA_INET_CONF, + __IFLA_INET_MAX, +}; + +#define IFLA_INET_MAX (__IFLA_INET_MAX - 1) + +/* ifi_flags. + + IFF_* flags. + + The only change is: + IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are + more not changeable by user. They describe link media + characteristics and set by device driver. + + Comments: + - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid + - If neither of these three flags are set; + the interface is NBMA. + + - IFF_MULTICAST does not mean anything special: + multicasts can be used on all not-NBMA links. + IFF_MULTICAST means that this media uses special encapsulation + for multicast frames. Apparently, all IFF_POINTOPOINT and + IFF_BROADCAST devices are able to use multicasts too. + */ + +/* IFLA_LINK. + For usual devices it is equal ifi_index. + If it is a "virtual interface" (f.e. tunnel), ifi_link + can point to real physical interface (f.e. for bandwidth calculations), + or maybe 0, what means, that real media is unknown (usual + for IPIP tunnels, when route to endpoint is allowed to change) + */ + +/* Subtype attributes for IFLA_PROTINFO */ +enum { + IFLA_INET6_UNSPEC, + IFLA_INET6_FLAGS, /* link flags */ + IFLA_INET6_CONF, /* sysctl parameters */ + IFLA_INET6_STATS, /* statistics */ + IFLA_INET6_MCAST, /* MC things. What of them? */ + IFLA_INET6_CACHEINFO, /* time values and max reasm size */ + IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ + IFLA_INET6_TOKEN, /* device token */ + IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */ + __IFLA_INET6_MAX +}; + +#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) + +enum in6_addr_gen_mode { + IN6_ADDR_GEN_MODE_EUI64, + IN6_ADDR_GEN_MODE_NONE, + IN6_ADDR_GEN_MODE_STABLE_PRIVACY, + IN6_ADDR_GEN_MODE_RANDOM, +}; + +/* Bridge section */ + +enum { + IFLA_BR_UNSPEC, + IFLA_BR_FORWARD_DELAY, + IFLA_BR_HELLO_TIME, + IFLA_BR_MAX_AGE, + IFLA_BR_AGEING_TIME, + IFLA_BR_STP_STATE, + IFLA_BR_PRIORITY, + IFLA_BR_VLAN_FILTERING, + IFLA_BR_VLAN_PROTOCOL, + IFLA_BR_GROUP_FWD_MASK, + IFLA_BR_ROOT_ID, + IFLA_BR_BRIDGE_ID, + IFLA_BR_ROOT_PORT, + IFLA_BR_ROOT_PATH_COST, + IFLA_BR_TOPOLOGY_CHANGE, + IFLA_BR_TOPOLOGY_CHANGE_DETECTED, + IFLA_BR_HELLO_TIMER, + IFLA_BR_TCN_TIMER, + IFLA_BR_TOPOLOGY_CHANGE_TIMER, + IFLA_BR_GC_TIMER, + IFLA_BR_GROUP_ADDR, + IFLA_BR_FDB_FLUSH, + IFLA_BR_MCAST_ROUTER, + IFLA_BR_MCAST_SNOOPING, + IFLA_BR_MCAST_QUERY_USE_IFADDR, + IFLA_BR_MCAST_QUERIER, + IFLA_BR_MCAST_HASH_ELASTICITY, + IFLA_BR_MCAST_HASH_MAX, + IFLA_BR_MCAST_LAST_MEMBER_CNT, + IFLA_BR_MCAST_STARTUP_QUERY_CNT, + IFLA_BR_MCAST_LAST_MEMBER_INTVL, + IFLA_BR_MCAST_MEMBERSHIP_INTVL, + IFLA_BR_MCAST_QUERIER_INTVL, + IFLA_BR_MCAST_QUERY_INTVL, + IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, + IFLA_BR_MCAST_STARTUP_QUERY_INTVL, + IFLA_BR_NF_CALL_IPTABLES, + IFLA_BR_NF_CALL_IP6TABLES, + IFLA_BR_NF_CALL_ARPTABLES, + IFLA_BR_VLAN_DEFAULT_PVID, + IFLA_BR_PAD, + IFLA_BR_VLAN_STATS_ENABLED, + IFLA_BR_MCAST_STATS_ENABLED, + IFLA_BR_MCAST_IGMP_VERSION, + IFLA_BR_MCAST_MLD_VERSION, + __IFLA_BR_MAX, +}; + +#define IFLA_BR_MAX (__IFLA_BR_MAX - 1) + +struct ifla_bridge_id { + __u8 prio[2]; + __u8 addr[6]; /* ETH_ALEN */ +}; + +enum { + BRIDGE_MODE_UNSPEC, + BRIDGE_MODE_HAIRPIN, +}; + +enum { + IFLA_BRPORT_UNSPEC, + IFLA_BRPORT_STATE, /* Spanning tree state */ + IFLA_BRPORT_PRIORITY, /* " priority */ + IFLA_BRPORT_COST, /* " cost */ + IFLA_BRPORT_MODE, /* mode (hairpin) */ + IFLA_BRPORT_GUARD, /* bpdu guard */ + IFLA_BRPORT_PROTECT, /* root port protection */ + IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */ + IFLA_BRPORT_LEARNING, /* mac learning */ + IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */ + IFLA_BRPORT_PROXYARP, /* proxy ARP */ + IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */ + IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */ + IFLA_BRPORT_ROOT_ID, /* designated root */ + IFLA_BRPORT_BRIDGE_ID, /* designated bridge */ + IFLA_BRPORT_DESIGNATED_PORT, + IFLA_BRPORT_DESIGNATED_COST, + IFLA_BRPORT_ID, + IFLA_BRPORT_NO, + IFLA_BRPORT_TOPOLOGY_CHANGE_ACK, + IFLA_BRPORT_CONFIG_PENDING, + IFLA_BRPORT_MESSAGE_AGE_TIMER, + IFLA_BRPORT_FORWARD_DELAY_TIMER, + IFLA_BRPORT_HOLD_TIMER, + IFLA_BRPORT_FLUSH, + IFLA_BRPORT_MULTICAST_ROUTER, + IFLA_BRPORT_PAD, + IFLA_BRPORT_MCAST_FLOOD, + IFLA_BRPORT_MCAST_TO_UCAST, + IFLA_BRPORT_VLAN_TUNNEL, + IFLA_BRPORT_BCAST_FLOOD, + IFLA_BRPORT_GROUP_FWD_MASK, + IFLA_BRPORT_NEIGH_SUPPRESS, + __IFLA_BRPORT_MAX +}; +#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) + +struct ifla_cacheinfo { + __u32 max_reasm_len; + __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ + __u32 reachable_time; + __u32 retrans_time; +}; + +enum { + IFLA_INFO_UNSPEC, + IFLA_INFO_KIND, + IFLA_INFO_DATA, + IFLA_INFO_XSTATS, + IFLA_INFO_SLAVE_KIND, + IFLA_INFO_SLAVE_DATA, + __IFLA_INFO_MAX, +}; + +#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1) + +/* VLAN section */ + +enum { + IFLA_VLAN_UNSPEC, + IFLA_VLAN_ID, + IFLA_VLAN_FLAGS, + IFLA_VLAN_EGRESS_QOS, + IFLA_VLAN_INGRESS_QOS, + IFLA_VLAN_PROTOCOL, + __IFLA_VLAN_MAX, +}; + +#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1) + +struct ifla_vlan_flags { + __u32 flags; + __u32 mask; +}; + +enum { + IFLA_VLAN_QOS_UNSPEC, + IFLA_VLAN_QOS_MAPPING, + __IFLA_VLAN_QOS_MAX +}; + +#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1) + +struct ifla_vlan_qos_mapping { + __u32 from; + __u32 to; +}; + +/* MACVLAN section */ +enum { + IFLA_MACVLAN_UNSPEC, + IFLA_MACVLAN_MODE, + IFLA_MACVLAN_FLAGS, + IFLA_MACVLAN_MACADDR_MODE, + IFLA_MACVLAN_MACADDR, + IFLA_MACVLAN_MACADDR_DATA, + IFLA_MACVLAN_MACADDR_COUNT, + __IFLA_MACVLAN_MAX, +}; + +#define IFLA_MACVLAN_MAX (__IFLA_MACVLAN_MAX - 1) + +enum macvlan_mode { + MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */ + MACVLAN_MODE_VEPA = 2, /* talk to other ports through ext bridge */ + MACVLAN_MODE_BRIDGE = 4, /* talk to bridge ports directly */ + MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */ + MACVLAN_MODE_SOURCE = 16,/* use source MAC address list to assign */ +}; + +enum macvlan_macaddr_mode { + MACVLAN_MACADDR_ADD, + MACVLAN_MACADDR_DEL, + MACVLAN_MACADDR_FLUSH, + MACVLAN_MACADDR_SET, +}; + +#define MACVLAN_FLAG_NOPROMISC 1 + +/* VRF section */ +enum { + IFLA_VRF_UNSPEC, + IFLA_VRF_TABLE, + __IFLA_VRF_MAX +}; + +#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1) + +enum { + IFLA_VRF_PORT_UNSPEC, + IFLA_VRF_PORT_TABLE, + __IFLA_VRF_PORT_MAX +}; + +#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1) + +/* MACSEC section */ +enum { + IFLA_MACSEC_UNSPEC, + IFLA_MACSEC_SCI, + IFLA_MACSEC_PORT, + IFLA_MACSEC_ICV_LEN, + IFLA_MACSEC_CIPHER_SUITE, + IFLA_MACSEC_WINDOW, + IFLA_MACSEC_ENCODING_SA, + IFLA_MACSEC_ENCRYPT, + IFLA_MACSEC_PROTECT, + IFLA_MACSEC_INC_SCI, + IFLA_MACSEC_ES, + IFLA_MACSEC_SCB, + IFLA_MACSEC_REPLAY_PROTECT, + IFLA_MACSEC_VALIDATION, + IFLA_MACSEC_PAD, + __IFLA_MACSEC_MAX, +}; + +#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1) + +enum macsec_validation_type { + MACSEC_VALIDATE_DISABLED = 0, + MACSEC_VALIDATE_CHECK = 1, + MACSEC_VALIDATE_STRICT = 2, + __MACSEC_VALIDATE_END, + MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1, +}; + +/* IPVLAN section */ +enum { + IFLA_IPVLAN_UNSPEC, + IFLA_IPVLAN_MODE, + IFLA_IPVLAN_FLAGS, + __IFLA_IPVLAN_MAX +}; + +#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1) + +enum ipvlan_mode { + IPVLAN_MODE_L2 = 0, + IPVLAN_MODE_L3, + IPVLAN_MODE_L3S, + IPVLAN_MODE_MAX +}; + +#define IPVLAN_F_PRIVATE 0x01 +#define IPVLAN_F_VEPA 0x02 + +/* VXLAN section */ +enum { + IFLA_VXLAN_UNSPEC, + IFLA_VXLAN_ID, + IFLA_VXLAN_GROUP, /* group or remote address */ + IFLA_VXLAN_LINK, + IFLA_VXLAN_LOCAL, + IFLA_VXLAN_TTL, + IFLA_VXLAN_TOS, + IFLA_VXLAN_LEARNING, + IFLA_VXLAN_AGEING, + IFLA_VXLAN_LIMIT, + IFLA_VXLAN_PORT_RANGE, /* source port */ + IFLA_VXLAN_PROXY, + IFLA_VXLAN_RSC, + IFLA_VXLAN_L2MISS, + IFLA_VXLAN_L3MISS, + IFLA_VXLAN_PORT, /* destination port */ + IFLA_VXLAN_GROUP6, + IFLA_VXLAN_LOCAL6, + IFLA_VXLAN_UDP_CSUM, + IFLA_VXLAN_UDP_ZERO_CSUM6_TX, + IFLA_VXLAN_UDP_ZERO_CSUM6_RX, + IFLA_VXLAN_REMCSUM_TX, + IFLA_VXLAN_REMCSUM_RX, + IFLA_VXLAN_GBP, + IFLA_VXLAN_REMCSUM_NOPARTIAL, + IFLA_VXLAN_COLLECT_METADATA, + IFLA_VXLAN_LABEL, + IFLA_VXLAN_GPE, + __IFLA_VXLAN_MAX +}; +#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) + +struct ifla_vxlan_port_range { + __be16 low; + __be16 high; +}; + +/* GENEVE section */ +enum { + IFLA_GENEVE_UNSPEC, + IFLA_GENEVE_ID, + IFLA_GENEVE_REMOTE, + IFLA_GENEVE_TTL, + IFLA_GENEVE_TOS, + IFLA_GENEVE_PORT, /* destination port */ + IFLA_GENEVE_COLLECT_METADATA, + IFLA_GENEVE_REMOTE6, + IFLA_GENEVE_UDP_CSUM, + IFLA_GENEVE_UDP_ZERO_CSUM6_TX, + IFLA_GENEVE_UDP_ZERO_CSUM6_RX, + IFLA_GENEVE_LABEL, + __IFLA_GENEVE_MAX +}; +#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) + +/* PPP section */ +enum { + IFLA_PPP_UNSPEC, + IFLA_PPP_DEV_FD, + __IFLA_PPP_MAX +}; +#define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1) + +/* GTP section */ + +enum ifla_gtp_role { + GTP_ROLE_GGSN = 0, + GTP_ROLE_SGSN, +}; + +enum { + IFLA_GTP_UNSPEC, + IFLA_GTP_FD0, + IFLA_GTP_FD1, + IFLA_GTP_PDP_HASHSIZE, + IFLA_GTP_ROLE, + __IFLA_GTP_MAX, +}; +#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) + +/* Bonding section */ + +enum { + IFLA_BOND_UNSPEC, + IFLA_BOND_MODE, + IFLA_BOND_ACTIVE_SLAVE, + IFLA_BOND_MIIMON, + IFLA_BOND_UPDELAY, + IFLA_BOND_DOWNDELAY, + IFLA_BOND_USE_CARRIER, + IFLA_BOND_ARP_INTERVAL, + IFLA_BOND_ARP_IP_TARGET, + IFLA_BOND_ARP_VALIDATE, + IFLA_BOND_ARP_ALL_TARGETS, + IFLA_BOND_PRIMARY, + IFLA_BOND_PRIMARY_RESELECT, + IFLA_BOND_FAIL_OVER_MAC, + IFLA_BOND_XMIT_HASH_POLICY, + IFLA_BOND_RESEND_IGMP, + IFLA_BOND_NUM_PEER_NOTIF, + IFLA_BOND_ALL_SLAVES_ACTIVE, + IFLA_BOND_MIN_LINKS, + IFLA_BOND_LP_INTERVAL, + IFLA_BOND_PACKETS_PER_SLAVE, + IFLA_BOND_AD_LACP_RATE, + IFLA_BOND_AD_SELECT, + IFLA_BOND_AD_INFO, + IFLA_BOND_AD_ACTOR_SYS_PRIO, + IFLA_BOND_AD_USER_PORT_KEY, + IFLA_BOND_AD_ACTOR_SYSTEM, + IFLA_BOND_TLB_DYNAMIC_LB, + __IFLA_BOND_MAX, +}; + +#define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1) + +enum { + IFLA_BOND_AD_INFO_UNSPEC, + IFLA_BOND_AD_INFO_AGGREGATOR, + IFLA_BOND_AD_INFO_NUM_PORTS, + IFLA_BOND_AD_INFO_ACTOR_KEY, + IFLA_BOND_AD_INFO_PARTNER_KEY, + IFLA_BOND_AD_INFO_PARTNER_MAC, + __IFLA_BOND_AD_INFO_MAX, +}; + +#define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1) + +enum { + IFLA_BOND_SLAVE_UNSPEC, + IFLA_BOND_SLAVE_STATE, + IFLA_BOND_SLAVE_MII_STATUS, + IFLA_BOND_SLAVE_LINK_FAILURE_COUNT, + IFLA_BOND_SLAVE_PERM_HWADDR, + IFLA_BOND_SLAVE_QUEUE_ID, + IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, + IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, + IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, + __IFLA_BOND_SLAVE_MAX, +}; + +#define IFLA_BOND_SLAVE_MAX (__IFLA_BOND_SLAVE_MAX - 1) + +/* SR-IOV virtual function management section */ + +enum { + IFLA_VF_INFO_UNSPEC, + IFLA_VF_INFO, + __IFLA_VF_INFO_MAX, +}; + +#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1) + +enum { + IFLA_VF_UNSPEC, + IFLA_VF_MAC, /* Hardware queue specific attributes */ + IFLA_VF_VLAN, /* VLAN ID and QoS */ + IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */ + IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ + IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ + IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */ + IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query + * on/off switch + */ + IFLA_VF_STATS, /* network device statistics */ + IFLA_VF_TRUST, /* Trust VF */ + IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */ + IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */ + IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */ + __IFLA_VF_MAX, +}; + +#define IFLA_VF_MAX (__IFLA_VF_MAX - 1) + +struct ifla_vf_mac { + __u32 vf; + __u8 mac[32]; /* MAX_ADDR_LEN */ +}; + +struct ifla_vf_vlan { + __u32 vf; + __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ + __u32 qos; +}; + +enum { + IFLA_VF_VLAN_INFO_UNSPEC, + IFLA_VF_VLAN_INFO, /* VLAN ID, QoS and VLAN protocol */ + __IFLA_VF_VLAN_INFO_MAX, +}; + +#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1) +#define MAX_VLAN_LIST_LEN 1 + +struct ifla_vf_vlan_info { + __u32 vf; + __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ + __u32 qos; + __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */ +}; + +struct ifla_vf_tx_rate { + __u32 vf; + __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ +}; + +struct ifla_vf_rate { + __u32 vf; + __u32 min_tx_rate; /* Min Bandwidth in Mbps */ + __u32 max_tx_rate; /* Max Bandwidth in Mbps */ +}; + +struct ifla_vf_spoofchk { + __u32 vf; + __u32 setting; +}; + +struct ifla_vf_guid { + __u32 vf; + __u64 guid; +}; + +enum { + IFLA_VF_LINK_STATE_AUTO, /* link state of the uplink */ + IFLA_VF_LINK_STATE_ENABLE, /* link always up */ + IFLA_VF_LINK_STATE_DISABLE, /* link always down */ + __IFLA_VF_LINK_STATE_MAX, +}; + +struct ifla_vf_link_state { + __u32 vf; + __u32 link_state; +}; + +struct ifla_vf_rss_query_en { + __u32 vf; + __u32 setting; +}; + +enum { + IFLA_VF_STATS_RX_PACKETS, + IFLA_VF_STATS_TX_PACKETS, + IFLA_VF_STATS_RX_BYTES, + IFLA_VF_STATS_TX_BYTES, + IFLA_VF_STATS_BROADCAST, + IFLA_VF_STATS_MULTICAST, + IFLA_VF_STATS_PAD, + IFLA_VF_STATS_RX_DROPPED, + IFLA_VF_STATS_TX_DROPPED, + __IFLA_VF_STATS_MAX, +}; + +#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1) + +struct ifla_vf_trust { + __u32 vf; + __u32 setting; +}; + +/* VF ports management section + * + * Nested layout of set/get msg is: + * + * [IFLA_NUM_VF] + * [IFLA_VF_PORTS] + * [IFLA_VF_PORT] + * [IFLA_PORT_*], ... + * [IFLA_VF_PORT] + * [IFLA_PORT_*], ... + * ... + * [IFLA_PORT_SELF] + * [IFLA_PORT_*], ... + */ + +enum { + IFLA_VF_PORT_UNSPEC, + IFLA_VF_PORT, /* nest */ + __IFLA_VF_PORT_MAX, +}; + +#define IFLA_VF_PORT_MAX (__IFLA_VF_PORT_MAX - 1) + +enum { + IFLA_PORT_UNSPEC, + IFLA_PORT_VF, /* __u32 */ + IFLA_PORT_PROFILE, /* string */ + IFLA_PORT_VSI_TYPE, /* 802.1Qbg (pre-)standard VDP */ + IFLA_PORT_INSTANCE_UUID, /* binary UUID */ + IFLA_PORT_HOST_UUID, /* binary UUID */ + IFLA_PORT_REQUEST, /* __u8 */ + IFLA_PORT_RESPONSE, /* __u16, output only */ + __IFLA_PORT_MAX, +}; + +#define IFLA_PORT_MAX (__IFLA_PORT_MAX - 1) + +#define PORT_PROFILE_MAX 40 +#define PORT_UUID_MAX 16 +#define PORT_SELF_VF -1 + +enum { + PORT_REQUEST_PREASSOCIATE = 0, + PORT_REQUEST_PREASSOCIATE_RR, + PORT_REQUEST_ASSOCIATE, + PORT_REQUEST_DISASSOCIATE, +}; + +enum { + PORT_VDP_RESPONSE_SUCCESS = 0, + PORT_VDP_RESPONSE_INVALID_FORMAT, + PORT_VDP_RESPONSE_INSUFFICIENT_RESOURCES, + PORT_VDP_RESPONSE_UNUSED_VTID, + PORT_VDP_RESPONSE_VTID_VIOLATION, + PORT_VDP_RESPONSE_VTID_VERSION_VIOALTION, + PORT_VDP_RESPONSE_OUT_OF_SYNC, + /* 0x08-0xFF reserved for future VDP use */ + PORT_PROFILE_RESPONSE_SUCCESS = 0x100, + PORT_PROFILE_RESPONSE_INPROGRESS, + PORT_PROFILE_RESPONSE_INVALID, + PORT_PROFILE_RESPONSE_BADSTATE, + PORT_PROFILE_RESPONSE_INSUFFICIENT_RESOURCES, + PORT_PROFILE_RESPONSE_ERROR, +}; + +struct ifla_port_vsi { + __u8 vsi_mgr_id; + __u8 vsi_type_id[3]; + __u8 vsi_type_version; + __u8 pad[3]; +}; + + +/* IPoIB section */ + +enum { + IFLA_IPOIB_UNSPEC, + IFLA_IPOIB_PKEY, + IFLA_IPOIB_MODE, + IFLA_IPOIB_UMCAST, + __IFLA_IPOIB_MAX +}; + +enum { + IPOIB_MODE_DATAGRAM = 0, /* using unreliable datagram QPs */ + IPOIB_MODE_CONNECTED = 1, /* using connected QPs */ +}; + +#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) + + +/* HSR section */ + +enum { + IFLA_HSR_UNSPEC, + IFLA_HSR_SLAVE1, + IFLA_HSR_SLAVE2, + IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */ + IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ + IFLA_HSR_SEQ_NR, + IFLA_HSR_VERSION, /* HSR version */ + __IFLA_HSR_MAX, +}; + +#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1) + +/* STATS section */ + +struct if_stats_msg { + __u8 family; + __u8 pad1; + __u16 pad2; + __u32 ifindex; + __u32 filter_mask; +}; + +/* A stats attribute can be netdev specific or a global stat. + * For netdev stats, lets use the prefix IFLA_STATS_LINK_* + */ +enum { + IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */ + IFLA_STATS_LINK_64, + IFLA_STATS_LINK_XSTATS, + IFLA_STATS_LINK_XSTATS_SLAVE, + IFLA_STATS_LINK_OFFLOAD_XSTATS, + IFLA_STATS_AF_SPEC, + __IFLA_STATS_MAX, +}; + +#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1) + +#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1)) + +/* These are embedded into IFLA_STATS_LINK_XSTATS: + * [IFLA_STATS_LINK_XSTATS] + * -> [LINK_XSTATS_TYPE_xxx] + * -> [rtnl link type specific attributes] + */ +enum { + LINK_XSTATS_TYPE_UNSPEC, + LINK_XSTATS_TYPE_BRIDGE, + __LINK_XSTATS_TYPE_MAX +}; +#define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) + +/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */ +enum { + IFLA_OFFLOAD_XSTATS_UNSPEC, + IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ + __IFLA_OFFLOAD_XSTATS_MAX +}; +#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) + +/* XDP section */ + +#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) +#define XDP_FLAGS_SKB_MODE (1U << 1) +#define XDP_FLAGS_DRV_MODE (1U << 2) +#define XDP_FLAGS_HW_MODE (1U << 3) +#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \ + XDP_FLAGS_DRV_MODE | \ + XDP_FLAGS_HW_MODE) +#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ + XDP_FLAGS_MODES) + +/* These are stored into IFLA_XDP_ATTACHED on dump. */ +enum { + XDP_ATTACHED_NONE = 0, + XDP_ATTACHED_DRV, + XDP_ATTACHED_SKB, + XDP_ATTACHED_HW, +}; + +enum { + IFLA_XDP_UNSPEC, + IFLA_XDP_FD, + IFLA_XDP_ATTACHED, + IFLA_XDP_FLAGS, + IFLA_XDP_PROG_ID, + __IFLA_XDP_MAX, +}; + +#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1) + +enum { + IFLA_EVENT_NONE, + IFLA_EVENT_REBOOT, /* internal reset / reboot */ + IFLA_EVENT_FEATURES, /* change in offload features */ + IFLA_EVENT_BONDING_FAILOVER, /* change in active slave */ + IFLA_EVENT_NOTIFY_PEERS, /* re-sent grat. arp/ndisc */ + IFLA_EVENT_IGMP_RESEND, /* re-sent IGMP JOIN */ + IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */ +}; + +#endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 496e59a2738b..0fb5ef939732 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -932,6 +932,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_SYNIC2 148 #define KVM_CAP_HYPERV_VP_INDEX 149 #define KVM_CAP_S390_AIS_MIGRATION 150 +#define KVM_CAP_PPC_GET_CPU_CHAR 151 +#define KVM_CAP_S390_BPB 152 #ifdef KVM_CAP_IRQ_ROUTING @@ -1261,6 +1263,8 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) /* Available with KVM_CAP_PPC_RADIX_MMU */ #define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) +/* Available with KVM_CAP_PPC_GET_CPU_CHAR */ +#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) @@ -1358,6 +1362,96 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_S390_CMMA_MIGRATION */ #define KVM_S390_GET_CMMA_BITS _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log) #define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log) +/* Memory Encryption Commands */ +#define KVM_MEMORY_ENCRYPT_OP _IOWR(KVMIO, 0xba, unsigned long) + +struct kvm_enc_region { + __u64 addr; + __u64 size; +}; + +#define KVM_MEMORY_ENCRYPT_REG_REGION _IOR(KVMIO, 0xbb, struct kvm_enc_region) +#define KVM_MEMORY_ENCRYPT_UNREG_REGION _IOR(KVMIO, 0xbc, struct kvm_enc_region) + +/* Secure Encrypted Virtualization command */ +enum sev_cmd_id { + /* Guest initialization commands */ + KVM_SEV_INIT = 0, + KVM_SEV_ES_INIT, + /* Guest launch commands */ + KVM_SEV_LAUNCH_START, + KVM_SEV_LAUNCH_UPDATE_DATA, + KVM_SEV_LAUNCH_UPDATE_VMSA, + KVM_SEV_LAUNCH_SECRET, + KVM_SEV_LAUNCH_MEASURE, + KVM_SEV_LAUNCH_FINISH, + /* Guest migration commands (outgoing) */ + KVM_SEV_SEND_START, + KVM_SEV_SEND_UPDATE_DATA, + KVM_SEV_SEND_UPDATE_VMSA, + KVM_SEV_SEND_FINISH, + /* Guest migration commands (incoming) */ + KVM_SEV_RECEIVE_START, + KVM_SEV_RECEIVE_UPDATE_DATA, + KVM_SEV_RECEIVE_UPDATE_VMSA, + KVM_SEV_RECEIVE_FINISH, + /* Guest status and debug commands */ + KVM_SEV_GUEST_STATUS, + KVM_SEV_DBG_DECRYPT, + KVM_SEV_DBG_ENCRYPT, + /* Guest certificates commands */ + KVM_SEV_CERT_EXPORT, + + KVM_SEV_NR_MAX, +}; + +struct kvm_sev_cmd { + __u32 id; + __u64 data; + __u32 error; + __u32 sev_fd; +}; + +struct kvm_sev_launch_start { + __u32 handle; + __u32 policy; + __u64 dh_uaddr; + __u32 dh_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_launch_update_data { + __u64 uaddr; + __u32 len; +}; + + +struct kvm_sev_launch_secret { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +struct kvm_sev_launch_measure { + __u64 uaddr; + __u32 len; +}; + +struct kvm_sev_guest_status { + __u32 handle; + __u32 policy; + __u32 state; +}; + +struct kvm_sev_dbg { + __u64 src_uaddr; + __u64 dst_uaddr; + __u32 len; +}; #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) diff --git a/tools/include/uapi/linux/netlink.h b/tools/include/uapi/linux/netlink.h new file mode 100644 index 000000000000..776bc92e9118 --- /dev/null +++ b/tools/include/uapi/linux/netlink.h @@ -0,0 +1,251 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_NETLINK_H +#define _UAPI__LINUX_NETLINK_H + +#include <linux/kernel.h> +#include <linux/socket.h> /* for __kernel_sa_family_t */ +#include <linux/types.h> + +#define NETLINK_ROUTE 0 /* Routing/device hook */ +#define NETLINK_UNUSED 1 /* Unused number */ +#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ +#define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */ +#define NETLINK_SOCK_DIAG 4 /* socket monitoring */ +#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ +#define NETLINK_XFRM 6 /* ipsec */ +#define NETLINK_SELINUX 7 /* SELinux event notifications */ +#define NETLINK_ISCSI 8 /* Open-iSCSI */ +#define NETLINK_AUDIT 9 /* auditing */ +#define NETLINK_FIB_LOOKUP 10 +#define NETLINK_CONNECTOR 11 +#define NETLINK_NETFILTER 12 /* netfilter subsystem */ +#define NETLINK_IP6_FW 13 +#define NETLINK_DNRTMSG 14 /* DECnet routing messages */ +#define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ +#define NETLINK_GENERIC 16 +/* leave room for NETLINK_DM (DM Events) */ +#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ +#define NETLINK_ECRYPTFS 19 +#define NETLINK_RDMA 20 +#define NETLINK_CRYPTO 21 /* Crypto layer */ +#define NETLINK_SMC 22 /* SMC monitoring */ + +#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG + +#define MAX_LINKS 32 + +struct sockaddr_nl { + __kernel_sa_family_t nl_family; /* AF_NETLINK */ + unsigned short nl_pad; /* zero */ + __u32 nl_pid; /* port ID */ + __u32 nl_groups; /* multicast groups mask */ +}; + +struct nlmsghdr { + __u32 nlmsg_len; /* Length of message including header */ + __u16 nlmsg_type; /* Message content */ + __u16 nlmsg_flags; /* Additional flags */ + __u32 nlmsg_seq; /* Sequence number */ + __u32 nlmsg_pid; /* Sending process port ID */ +}; + +/* Flags values */ + +#define NLM_F_REQUEST 0x01 /* It is request message. */ +#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */ +#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */ +#define NLM_F_ECHO 0x08 /* Echo this request */ +#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */ +#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */ + +/* Modifiers to GET request */ +#define NLM_F_ROOT 0x100 /* specify tree root */ +#define NLM_F_MATCH 0x200 /* return all matching */ +#define NLM_F_ATOMIC 0x400 /* atomic GET */ +#define NLM_F_DUMP (NLM_F_ROOT|NLM_F_MATCH) + +/* Modifiers to NEW request */ +#define NLM_F_REPLACE 0x100 /* Override existing */ +#define NLM_F_EXCL 0x200 /* Do not touch, if it exists */ +#define NLM_F_CREATE 0x400 /* Create, if it does not exist */ +#define NLM_F_APPEND 0x800 /* Add to end of list */ + +/* Modifiers to DELETE request */ +#define NLM_F_NONREC 0x100 /* Do not delete recursively */ + +/* Flags for ACK message */ +#define NLM_F_CAPPED 0x100 /* request was capped */ +#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */ + +/* + 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL + 4.4BSD CHANGE NLM_F_REPLACE + + True CHANGE NLM_F_CREATE|NLM_F_REPLACE + Append NLM_F_CREATE + Check NLM_F_EXCL + */ + +#define NLMSG_ALIGNTO 4U +#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) +#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) +#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN) +#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len)) +#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0))) +#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ + (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len))) +#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \ + (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \ + (nlh)->nlmsg_len <= (len)) +#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len))) + +#define NLMSG_NOOP 0x1 /* Nothing. */ +#define NLMSG_ERROR 0x2 /* Error */ +#define NLMSG_DONE 0x3 /* End of a dump */ +#define NLMSG_OVERRUN 0x4 /* Data lost */ + +#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */ + +struct nlmsgerr { + int error; + struct nlmsghdr msg; + /* + * followed by the message contents unless NETLINK_CAP_ACK was set + * or the ACK indicates success (error == 0) + * message length is aligned with NLMSG_ALIGN() + */ + /* + * followed by TLVs defined in enum nlmsgerr_attrs + * if NETLINK_EXT_ACK was set + */ +}; + +/** + * enum nlmsgerr_attrs - nlmsgerr attributes + * @NLMSGERR_ATTR_UNUSED: unused + * @NLMSGERR_ATTR_MSG: error message string (string) + * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original + * message, counting from the beginning of the header (u32) + * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to + * be used - in the success case - to identify a created + * object or operation or similar (binary) + * @__NLMSGERR_ATTR_MAX: number of attributes + * @NLMSGERR_ATTR_MAX: highest attribute number + */ +enum nlmsgerr_attrs { + NLMSGERR_ATTR_UNUSED, + NLMSGERR_ATTR_MSG, + NLMSGERR_ATTR_OFFS, + NLMSGERR_ATTR_COOKIE, + + __NLMSGERR_ATTR_MAX, + NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 +}; + +#define NETLINK_ADD_MEMBERSHIP 1 +#define NETLINK_DROP_MEMBERSHIP 2 +#define NETLINK_PKTINFO 3 +#define NETLINK_BROADCAST_ERROR 4 +#define NETLINK_NO_ENOBUFS 5 +#ifndef __KERNEL__ +#define NETLINK_RX_RING 6 +#define NETLINK_TX_RING 7 +#endif +#define NETLINK_LISTEN_ALL_NSID 8 +#define NETLINK_LIST_MEMBERSHIPS 9 +#define NETLINK_CAP_ACK 10 +#define NETLINK_EXT_ACK 11 + +struct nl_pktinfo { + __u32 group; +}; + +struct nl_mmap_req { + unsigned int nm_block_size; + unsigned int nm_block_nr; + unsigned int nm_frame_size; + unsigned int nm_frame_nr; +}; + +struct nl_mmap_hdr { + unsigned int nm_status; + unsigned int nm_len; + __u32 nm_group; + /* credentials */ + __u32 nm_pid; + __u32 nm_uid; + __u32 nm_gid; +}; + +#ifndef __KERNEL__ +enum nl_mmap_status { + NL_MMAP_STATUS_UNUSED, + NL_MMAP_STATUS_RESERVED, + NL_MMAP_STATUS_VALID, + NL_MMAP_STATUS_COPY, + NL_MMAP_STATUS_SKIP, +}; + +#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO +#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) +#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) +#endif + +#define NET_MAJOR 36 /* Major 36 is reserved for networking */ + +enum { + NETLINK_UNCONNECTED = 0, + NETLINK_CONNECTED, +}; + +/* + * <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)--> + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * | Header | Pad | Payload | Pad | + * | (struct nlattr) | ing | | ing | + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * <-------------- nlattr->nla_len --------------> + */ + +struct nlattr { + __u16 nla_len; + __u16 nla_type; +}; + +/* + * nla_type (16 bits) + * +---+---+-------------------------------+ + * | N | O | Attribute Type | + * +---+---+-------------------------------+ + * N := Carries nested attributes + * O := Payload stored in network byte order + * + * Note: The N and O flag are mutually exclusive. + */ +#define NLA_F_NESTED (1 << 15) +#define NLA_F_NET_BYTEORDER (1 << 14) +#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER) + +#define NLA_ALIGNTO 4 +#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) +#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) + +/* Generic 32 bitflags attribute content sent to the kernel. + * + * The value is a bitmap that defines the values being set + * The selector is a bitmask that defines which value is legit + * + * Examples: + * value = 0x0, and selector = 0x1 + * implies we are selecting bit 1 and we want to set its value to 0. + * + * value = 0x2, and selector = 0x2 + * implies we are selecting bit 2 and we want to set its value to 1. + * + */ +struct nla_bitfield32 { + __u32 value; + __u32 selector; +}; + +#endif /* _UAPI__LINUX_NETLINK_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index c77c9a2ebbbb..e0739a1aa4b2 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -418,6 +418,27 @@ struct perf_event_attr { __u16 __reserved_2; /* align to __u64 */ }; +/* + * Structure used by below PERF_EVENT_IOC_QUERY_BPF command + * to query bpf programs attached to the same perf tracepoint + * as the given perf event. + */ +struct perf_event_query_bpf { + /* + * The below ids array length + */ + __u32 ids_len; + /* + * Set by the kernel to indicate the number of + * available programs + */ + __u32 prog_cnt; + /* + * User provided buffer to store program ids + */ + __u32 ids[0]; +}; + #define perf_flags(attr) (*(&(attr)->read_format + 1)) /* @@ -433,6 +454,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) #define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h index 30a9e51bbb1e..22627f80063e 100644 --- a/tools/include/uapi/linux/sched.h +++ b/tools/include/uapi/linux/sched.h @@ -49,5 +49,10 @@ */ #define SCHED_FLAG_RESET_ON_FORK 0x01 #define SCHED_FLAG_RECLAIM 0x02 +#define SCHED_FLAG_DL_OVERRUN 0x04 + +#define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \ + SCHED_FLAG_RECLAIM | \ + SCHED_FLAG_DL_OVERRUN) #endif /* _UAPI_LINUX_SCHED_H */ diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index c227ccba60ae..07d61583fd02 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -214,6 +214,11 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_IMA_ADPCM ((__force snd_pcm_format_t) 22) #define SNDRV_PCM_FORMAT_MPEG ((__force snd_pcm_format_t) 23) #define SNDRV_PCM_FORMAT_GSM ((__force snd_pcm_format_t) 24) +#define SNDRV_PCM_FORMAT_S20_LE ((__force snd_pcm_format_t) 25) /* in four bytes, LSB justified */ +#define SNDRV_PCM_FORMAT_S20_BE ((__force snd_pcm_format_t) 26) /* in four bytes, LSB justified */ +#define SNDRV_PCM_FORMAT_U20_LE ((__force snd_pcm_format_t) 27) /* in four bytes, LSB justified */ +#define SNDRV_PCM_FORMAT_U20_BE ((__force snd_pcm_format_t) 28) /* in four bytes, LSB justified */ +/* gap in the numbering for a future standard linear format */ #define SNDRV_PCM_FORMAT_SPECIAL ((__force snd_pcm_format_t) 31) #define SNDRV_PCM_FORMAT_S24_3LE ((__force snd_pcm_format_t) 32) /* in three bytes */ #define SNDRV_PCM_FORMAT_S24_3BE ((__force snd_pcm_format_t) 33) /* in three bytes */ @@ -248,6 +253,8 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_FLOAT SNDRV_PCM_FORMAT_FLOAT_LE #define SNDRV_PCM_FORMAT_FLOAT64 SNDRV_PCM_FORMAT_FLOAT64_LE #define SNDRV_PCM_FORMAT_IEC958_SUBFRAME SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE +#define SNDRV_PCM_FORMAT_S20 SNDRV_PCM_FORMAT_S20_LE +#define SNDRV_PCM_FORMAT_U20 SNDRV_PCM_FORMAT_U20_LE #endif #ifdef SNDRV_BIG_ENDIAN #define SNDRV_PCM_FORMAT_S16 SNDRV_PCM_FORMAT_S16_BE @@ -259,6 +266,8 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_FLOAT SNDRV_PCM_FORMAT_FLOAT_BE #define SNDRV_PCM_FORMAT_FLOAT64 SNDRV_PCM_FORMAT_FLOAT64_BE #define SNDRV_PCM_FORMAT_IEC958_SUBFRAME SNDRV_PCM_FORMAT_IEC958_SUBFRAME_BE +#define SNDRV_PCM_FORMAT_S20 SNDRV_PCM_FORMAT_S20_BE +#define SNDRV_PCM_FORMAT_U20 SNDRV_PCM_FORMAT_U20_BE #endif typedef int __bitwise snd_pcm_subformat_t; diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index d8749756352d..64c679d67109 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1 +1 @@ -libbpf-y := libbpf.o bpf.o +libbpf-y := libbpf.o bpf.o nlattr.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 4555304dc18e..e6d5f8d1477f 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -93,7 +93,6 @@ export prefix libdir src obj # Shell quotes libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -plugin_dir_SQ = $(subst ','\'',$(plugin_dir)) LIB_FILE = libbpf.a libbpf.so @@ -150,7 +149,7 @@ CMD_TARGETS = $(LIB_FILE) TARGETS = $(CMD_TARGETS) -all: fixdep $(VERSION_FILES) all_cmd +all: fixdep all_cmd all_cmd: $(CMD_TARGETS) @@ -161,6 +160,12 @@ $(BPF_IN): force elfdep bpfdep @(test -f ../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \ (diff -B ../../include/uapi/linux/bpf_common.h ../../../include/uapi/linux/bpf_common.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf_common.h' differs from latest version at 'include/uapi/linux/bpf_common.h'" >&2 )) || true + @(test -f ../../include/uapi/linux/netlink.h -a -f ../../../include/uapi/linux/netlink.h && ( \ + (diff -B ../../include/uapi/linux/netlink.h ../../../include/uapi/linux/netlink.h >/dev/null) || \ + echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/netlink.h' differs from latest version at 'include/uapi/linux/netlink.h'" >&2 )) || true + @(test -f ../../include/uapi/linux/if_link.h -a -f ../../../include/uapi/linux/if_link.h && ( \ + (diff -B ../../include/uapi/linux/if_link.h ../../../include/uapi/linux/if_link.h >/dev/null) || \ + echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf $(OUTPUT)libbpf.so: $(BPF_IN) @@ -169,21 +174,11 @@ $(OUTPUT)libbpf.so: $(BPF_IN) $(OUTPUT)libbpf.a: $(BPF_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ -define update_dir - (echo $1 > $@.tmp; \ - if [ -r $@ ] && cmp -s $@ $@.tmp; then \ - rm -f $@.tmp; \ - else \ - echo ' UPDATE $@'; \ - mv -f $@.tmp $@; \ - fi); -endef - define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ fi; \ - $(INSTALL) $1 '$(DESTDIR_SQ)$2' + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' endef install_lib: all_cmd @@ -192,7 +187,8 @@ install_lib: all_cmd install_headers: $(call QUIET_INSTALL, headers) \ - $(call do_install,bpf.h,$(prefix)/include/bpf,644) + $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ + $(call do_install,libbpf.h,$(prefix)/include/bpf,644); install: install_lib @@ -203,7 +199,7 @@ config-clean: $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null clean: - $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd \ + $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so .*.d .*.cmd \ $(RM) LIBBPF-CFLAGS $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf @@ -213,10 +209,10 @@ PHONY += force elfdep bpfdep force: elfdep: - @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit -1 ; fi + @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi bpfdep: - @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit -1 ; fi + @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable so we can use it in if_changed and friends. diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 5128677e4117..592a58a2b681 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: LGPL-2.1 + /* * common eBPF ELF operations. * @@ -25,6 +27,16 @@ #include <asm/unistd.h> #include <linux/bpf.h> #include "bpf.h" +#include "libbpf.h" +#include "nlattr.h" +#include <linux/rtnetlink.h> +#include <linux/if_link.h> +#include <sys/socket.h> +#include <errno.h> + +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif /* * When building perf, unistd.h is overridden. __NR_bpf is @@ -46,7 +58,9 @@ # endif #endif +#ifndef min #define min(x, y) ((x) < (y) ? (x) : (y)) +#endif static inline __u64 ptr_to_u64(const void *ptr) { @@ -413,3 +427,124 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) return err; } + +int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) +{ + struct sockaddr_nl sa; + int sock, seq = 0, len, ret = -1; + char buf[4096]; + struct nlattr *nla, *nla_xdp; + struct { + struct nlmsghdr nh; + struct ifinfomsg ifinfo; + char attrbuf[64]; + } req; + struct nlmsghdr *nh; + struct nlmsgerr *err; + socklen_t addrlen; + int one = 1; + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + return -errno; + } + + if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)) < 0) { + fprintf(stderr, "Netlink error reporting not supported\n"); + } + + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + ret = -errno; + goto cleanup; + } + + addrlen = sizeof(sa); + if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) { + ret = -errno; + goto cleanup; + } + + if (addrlen != sizeof(sa)) { + ret = -LIBBPF_ERRNO__INTERNAL; + goto cleanup; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_SETLINK; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = ifindex; + + /* started nested attribute for XDP */ + nla = (struct nlattr *)(((char *)&req) + + NLMSG_ALIGN(req.nh.nlmsg_len)); + nla->nla_type = NLA_F_NESTED | IFLA_XDP; + nla->nla_len = NLA_HDRLEN; + + /* add XDP fd */ + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = IFLA_XDP_FD; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); + memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); + nla->nla_len += nla_xdp->nla_len; + + /* if user passed in any flags, add those too */ + if (flags) { + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = IFLA_XDP_FLAGS; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); + memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); + nla->nla_len += nla_xdp->nla_len; + } + + req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + ret = -errno; + goto cleanup; + } + + len = recv(sock, buf, sizeof(buf), 0); + if (len < 0) { + ret = -errno; + goto cleanup; + } + + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + nh = NLMSG_NEXT(nh, len)) { + if (nh->nlmsg_pid != sa.nl_pid) { + ret = -LIBBPF_ERRNO__WRNGPID; + goto cleanup; + } + if (nh->nlmsg_seq != seq) { + ret = -LIBBPF_ERRNO__INVSEQ; + goto cleanup; + } + switch (nh->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(nh); + if (!err->error) + continue; + ret = err->error; + nla_dump_errormsg(nh); + goto cleanup; + case NLMSG_DONE: + break; + default: + break; + } + } + + ret = 0; + +cleanup: + close(sock); + return ret; +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 6534889e2b2f..8d18fb73d7fb 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ + /* * common eBPF ELF operations. * @@ -40,7 +42,7 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, __u32 map_flags); /* Recommend log buffer size */ -#define BPF_LOG_BUF_SIZE 65536 +#define BPF_LOG_BUF_SIZE (256 * 1024) int bpf_load_program_name(enum bpf_prog_type type, const char *name, const struct bpf_insn *insns, size_t insns_cnt, const char *license, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5aa45f89da93..97073d649c1a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: LGPL-2.1 + /* * Common eBPF ELF object loading operations. * @@ -106,6 +108,8 @@ static const char *libbpf_strerror_table[NR_ERRNO] = { [ERRCODE_OFFSET(PROG2BIG)] = "Program too big", [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version", [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type", + [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message", + [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence", }; int libbpf_strerror(int err, char *buf, size_t size) @@ -174,12 +178,19 @@ struct bpf_program { char *name; char *section_name; struct bpf_insn *insns; - size_t insns_cnt; + size_t insns_cnt, main_prog_cnt; enum bpf_prog_type type; - struct { + struct reloc_desc { + enum { + RELO_LD64, + RELO_CALL, + } type; int insn_idx; - int map_idx; + union { + int map_idx; + int text_off; + }; } *reloc_desc; int nr_reloc; @@ -234,6 +245,7 @@ struct bpf_object { } *reloc; int nr_reloc; int maps_shndx; + int text_shndx; } efile; /* * All loaded bpf_object is linked in a list, which is @@ -307,8 +319,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, prog->section_name = strdup(section_name); if (!prog->section_name) { - pr_warning("failed to alloc name for prog under section %s\n", - section_name); + pr_warning("failed to alloc name for prog under section(%d) %s\n", + idx, section_name); goto errout; } @@ -375,9 +387,13 @@ bpf_object__init_prog_names(struct bpf_object *obj) size_t pi, si; for (pi = 0; pi < obj->nr_programs; pi++) { - char *name = NULL; + const char *name = NULL; prog = &obj->programs[pi]; + if (prog->idx == obj->efile.text_shndx) { + name = ".text"; + goto skip_search; + } for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; si++) { @@ -387,6 +403,8 @@ bpf_object__init_prog_names(struct bpf_object *obj) continue; if (sym.st_shndx != prog->idx) continue; + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL) + continue; name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, @@ -403,7 +421,7 @@ bpf_object__init_prog_names(struct bpf_object *obj) prog->section_name); return -EINVAL; } - +skip_search: prog->name = strdup(name); if (!prog->name) { pr_warning("failed to allocate memory for prog sym %s\n", @@ -724,6 +742,24 @@ bpf_object__init_maps(struct bpf_object *obj) return 0; } +static bool section_have_execinstr(struct bpf_object *obj, int idx) +{ + Elf_Scn *scn; + GElf_Shdr sh; + + scn = elf_getscn(obj->efile.elf, idx); + if (!scn) + return false; + + if (gelf_getshdr(scn, &sh) != &sh) + return false; + + if (sh.sh_flags & SHF_EXECINSTR) + return true; + + return false; +} + static int bpf_object__elf_collect(struct bpf_object *obj) { Elf *elf = obj->efile.elf; @@ -745,29 +781,29 @@ static int bpf_object__elf_collect(struct bpf_object *obj) idx++; if (gelf_getshdr(scn, &sh) != &sh) { - pr_warning("failed to get section header from %s\n", - obj->path); + pr_warning("failed to get section(%d) header from %s\n", + idx, obj->path); err = -LIBBPF_ERRNO__FORMAT; goto out; } name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); if (!name) { - pr_warning("failed to get section name from %s\n", - obj->path); + pr_warning("failed to get section(%d) name from %s\n", + idx, obj->path); err = -LIBBPF_ERRNO__FORMAT; goto out; } data = elf_getdata(scn, 0); if (!data) { - pr_warning("failed to get section data from %s(%s)\n", - name, obj->path); + pr_warning("failed to get section(%d) data from %s(%s)\n", + idx, name, obj->path); err = -LIBBPF_ERRNO__FORMAT; goto out; } - pr_debug("section %s, size %ld, link %d, flags %lx, type=%d\n", - name, (unsigned long)data->d_size, + pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", + idx, name, (unsigned long)data->d_size, (int)sh.sh_link, (unsigned long)sh.sh_flags, (int)sh.sh_type); @@ -793,6 +829,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if ((sh.sh_type == SHT_PROGBITS) && (sh.sh_flags & SHF_EXECINSTR) && (data->d_size > 0)) { + if (strcmp(name, ".text") == 0) + obj->efile.text_shndx = idx; err = bpf_object__add_program(obj, data->d_buf, data->d_size, name, idx); if (err) { @@ -805,6 +843,14 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if (sh.sh_type == SHT_REL) { void *reloc = obj->efile.reloc; int nr_reloc = obj->efile.nr_reloc + 1; + int sec = sh.sh_info; /* points to other section */ + + /* Only do relo for section with exec instructions */ + if (!section_have_execinstr(obj, sec)) { + pr_debug("skip relo %s(%d) for section(%d)\n", + name, idx, sec); + continue; + } reloc = realloc(reloc, sizeof(*obj->efile.reloc) * nr_reloc); @@ -820,6 +866,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) obj->efile.reloc[n].shdr = sh; obj->efile.reloc[n].data = data; } + } else { + pr_debug("skip section(%d) %s\n", idx, name); } if (err) goto out; @@ -854,11 +902,14 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) } static int -bpf_program__collect_reloc(struct bpf_program *prog, - size_t nr_maps, GElf_Shdr *shdr, - Elf_Data *data, Elf_Data *symbols, - int maps_shndx, struct bpf_map *maps) +bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, + Elf_Data *data, struct bpf_object *obj) { + Elf_Data *symbols = obj->efile.symbols; + int text_shndx = obj->efile.text_shndx; + int maps_shndx = obj->efile.maps_shndx; + struct bpf_map *maps = obj->maps; + size_t nr_maps = obj->nr_maps; int i, nrels; pr_debug("collecting relocating info for: '%s'\n", @@ -891,8 +942,11 @@ bpf_program__collect_reloc(struct bpf_program *prog, GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; } + pr_debug("relo for %lld value %lld name %d\n", + (long long) (rel.r_info >> 32), + (long long) sym.st_value, sym.st_name); - if (sym.st_shndx != maps_shndx) { + if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) { pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n", prog->section_name, sym.st_shndx); return -LIBBPF_ERRNO__RELOC; @@ -901,6 +955,17 @@ bpf_program__collect_reloc(struct bpf_program *prog, insn_idx = rel.r_offset / sizeof(struct bpf_insn); pr_debug("relocation: insn_idx=%u\n", insn_idx); + if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) { + if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) { + pr_warning("incorrect bpf_call opcode\n"); + return -LIBBPF_ERRNO__RELOC; + } + prog->reloc_desc[i].type = RELO_CALL; + prog->reloc_desc[i].insn_idx = insn_idx; + prog->reloc_desc[i].text_off = sym.st_value; + continue; + } + if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n", insn_idx, insns[insn_idx].code); @@ -922,6 +987,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__RELOC; } + prog->reloc_desc[i].type = RELO_LD64; prog->reloc_desc[i].insn_idx = insn_idx; prog->reloc_desc[i].map_idx = map_idx; } @@ -961,27 +1027,76 @@ bpf_object__create_maps(struct bpf_object *obj) } static int +bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, + struct reloc_desc *relo) +{ + struct bpf_insn *insn, *new_insn; + struct bpf_program *text; + size_t new_cnt; + + if (relo->type != RELO_CALL) + return -LIBBPF_ERRNO__RELOC; + + if (prog->idx == obj->efile.text_shndx) { + pr_warning("relo in .text insn %d into off %d\n", + relo->insn_idx, relo->text_off); + return -LIBBPF_ERRNO__RELOC; + } + + if (prog->main_prog_cnt == 0) { + text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); + if (!text) { + pr_warning("no .text section found yet relo into text exist\n"); + return -LIBBPF_ERRNO__RELOC; + } + new_cnt = prog->insns_cnt + text->insns_cnt; + new_insn = realloc(prog->insns, new_cnt * sizeof(*insn)); + if (!new_insn) { + pr_warning("oom in prog realloc\n"); + return -ENOMEM; + } + memcpy(new_insn + prog->insns_cnt, text->insns, + text->insns_cnt * sizeof(*insn)); + prog->insns = new_insn; + prog->main_prog_cnt = prog->insns_cnt; + prog->insns_cnt = new_cnt; + } + insn = &prog->insns[relo->insn_idx]; + insn->imm += prog->main_prog_cnt - relo->insn_idx; + pr_debug("added %zd insn from %s to prog %s\n", + text->insns_cnt, text->section_name, prog->section_name); + return 0; +} + +static int bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) { - int i; + int i, err; if (!prog || !prog->reloc_desc) return 0; for (i = 0; i < prog->nr_reloc; i++) { - int insn_idx, map_idx; - struct bpf_insn *insns = prog->insns; + if (prog->reloc_desc[i].type == RELO_LD64) { + struct bpf_insn *insns = prog->insns; + int insn_idx, map_idx; - insn_idx = prog->reloc_desc[i].insn_idx; - map_idx = prog->reloc_desc[i].map_idx; + insn_idx = prog->reloc_desc[i].insn_idx; + map_idx = prog->reloc_desc[i].map_idx; - if (insn_idx >= (int)prog->insns_cnt) { - pr_warning("relocation out of range: '%s'\n", - prog->section_name); - return -LIBBPF_ERRNO__RELOC; + if (insn_idx >= (int)prog->insns_cnt) { + pr_warning("relocation out of range: '%s'\n", + prog->section_name); + return -LIBBPF_ERRNO__RELOC; + } + insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + insns[insn_idx].imm = obj->maps[map_idx].fd; + } else { + err = bpf_program__reloc_text(prog, obj, + &prog->reloc_desc[i]); + if (err) + return err; } - insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; - insns[insn_idx].imm = obj->maps[map_idx].fd; } zfree(&prog->reloc_desc); @@ -1024,7 +1139,6 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) Elf_Data *data = obj->efile.reloc[i].data; int idx = shdr->sh_info; struct bpf_program *prog; - size_t nr_maps = obj->nr_maps; if (shdr->sh_type != SHT_REL) { pr_warning("internal error at %d\n", __LINE__); @@ -1033,16 +1147,13 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) prog = bpf_object__find_prog_by_idx(obj, idx); if (!prog) { - pr_warning("relocation failed: no %d section\n", - idx); + pr_warning("relocation failed: no section(%d)\n", idx); return -LIBBPF_ERRNO__RELOC; } - err = bpf_program__collect_reloc(prog, nr_maps, + err = bpf_program__collect_reloc(prog, shdr, data, - obj->efile.symbols, - obj->efile.maps_shndx, - obj->maps); + obj); if (err) return err; } @@ -1195,6 +1306,8 @@ bpf_object__load_progs(struct bpf_object *obj) int err; for (i = 0; i < obj->nr_programs; i++) { + if (obj->programs[i].idx == obj->efile.text_shndx) + continue; err = bpf_program__load(&obj->programs[i], obj->license, obj->kern_version); @@ -1721,6 +1834,50 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); +#define BPF_PROG_SEC(string, type) { string, sizeof(string) - 1, type } +static const struct { + const char *sec; + size_t len; + enum bpf_prog_type prog_type; +} section_names[] = { + BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), + BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), + BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), + BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), + BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), + BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), + BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), + BPF_PROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK), + BPF_PROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE), + BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), + BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT), + BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT), + BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS), + BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB), +}; +#undef BPF_PROG_SEC + +static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog) +{ + int i; + + if (!prog->section_name) + goto err; + + for (i = 0; i < ARRAY_SIZE(section_names); i++) + if (strncmp(prog->section_name, section_names[i].sec, + section_names[i].len) == 0) + return section_names[i].prog_type; + +err: + pr_warning("failed to guess program type based on section name %s\n", + prog->section_name); + + return BPF_PROG_TYPE_UNSPEC; +} + int bpf_map__fd(struct bpf_map *map) { return map ? map->fd : -EINVAL; @@ -1818,7 +1975,7 @@ long libbpf_get_error(const void *ptr) int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd) { - struct bpf_program *prog; + struct bpf_program *prog, *first_prog = NULL; struct bpf_object *obj; int err; @@ -1826,13 +1983,30 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type, if (IS_ERR(obj)) return -ENOENT; - prog = bpf_program__next(NULL, obj); - if (!prog) { + bpf_object__for_each_program(prog, obj) { + /* + * If type is not specified, try to guess it based on + * section name. + */ + if (type == BPF_PROG_TYPE_UNSPEC) { + type = bpf_program__guess_type(prog); + if (type == BPF_PROG_TYPE_UNSPEC) { + bpf_object__close(obj); + return -EINVAL; + } + } + + bpf_program__set_type(prog, type); + if (prog->idx != obj->efile.text_shndx && !first_prog) + first_prog = prog; + } + + if (!first_prog) { + pr_warning("object file doesn't contain bpf program\n"); bpf_object__close(obj); return -ENOENT; } - bpf_program__set_type(prog, type); err = bpf_object__load(obj); if (err) { bpf_object__close(obj); @@ -1840,6 +2014,6 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type, } *pobj = obj; - *prog_fd = bpf_program__fd(prog); + *prog_fd = bpf_program__fd(first_prog); return 0; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 6e20003109e0..f85906533cdd 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ + /* * Common eBPF ELF object loading operations. * @@ -42,6 +44,8 @@ enum libbpf_errno { LIBBPF_ERRNO__PROG2BIG, /* Program too big */ LIBBPF_ERRNO__KVER, /* Incorrect kernel version */ LIBBPF_ERRNO__PROGTYPE, /* Kernel doesn't support this program type */ + LIBBPF_ERRNO__WRNGPID, /* Wrong pid in netlink message */ + LIBBPF_ERRNO__INVSEQ, /* Invalid netlink sequence */ __LIBBPF_ERRNO__END, }; @@ -246,4 +250,6 @@ long libbpf_get_error(const void *ptr); int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd); + +int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); #endif diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c new file mode 100644 index 000000000000..4719434278b2 --- /dev/null +++ b/tools/lib/bpf/nlattr.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: LGPL-2.1 + +/* + * NETLINK Netlink attributes + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation version 2.1 + * of the License. + * + * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch> + */ + +#include <errno.h> +#include "nlattr.h" +#include <linux/rtnetlink.h> +#include <string.h> +#include <stdio.h> + +static uint16_t nla_attr_minlen[NLA_TYPE_MAX+1] = { + [NLA_U8] = sizeof(uint8_t), + [NLA_U16] = sizeof(uint16_t), + [NLA_U32] = sizeof(uint32_t), + [NLA_U64] = sizeof(uint64_t), + [NLA_STRING] = 1, + [NLA_FLAG] = 0, +}; + +static int nla_len(const struct nlattr *nla) +{ + return nla->nla_len - NLA_HDRLEN; +} + +static struct nlattr *nla_next(const struct nlattr *nla, int *remaining) +{ + int totlen = NLA_ALIGN(nla->nla_len); + + *remaining -= totlen; + return (struct nlattr *) ((char *) nla + totlen); +} + +static int nla_ok(const struct nlattr *nla, int remaining) +{ + return remaining >= sizeof(*nla) && + nla->nla_len >= sizeof(*nla) && + nla->nla_len <= remaining; +} + +static void *nla_data(const struct nlattr *nla) +{ + return (char *) nla + NLA_HDRLEN; +} + +static int nla_type(const struct nlattr *nla) +{ + return nla->nla_type & NLA_TYPE_MASK; +} + +static int validate_nla(struct nlattr *nla, int maxtype, + struct nla_policy *policy) +{ + struct nla_policy *pt; + unsigned int minlen = 0; + int type = nla_type(nla); + + if (type < 0 || type > maxtype) + return 0; + + pt = &policy[type]; + + if (pt->type > NLA_TYPE_MAX) + return 0; + + if (pt->minlen) + minlen = pt->minlen; + else if (pt->type != NLA_UNSPEC) + minlen = nla_attr_minlen[pt->type]; + + if (nla_len(nla) < minlen) + return -1; + + if (pt->maxlen && nla_len(nla) > pt->maxlen) + return -1; + + if (pt->type == NLA_STRING) { + char *data = nla_data(nla); + if (data[nla_len(nla) - 1] != '\0') + return -1; + } + + return 0; +} + +static inline int nlmsg_len(const struct nlmsghdr *nlh) +{ + return nlh->nlmsg_len - NLMSG_HDRLEN; +} + +/** + * Create attribute index based on a stream of attributes. + * @arg tb Index array to be filled (maxtype+1 elements). + * @arg maxtype Maximum attribute type expected and accepted. + * @arg head Head of attribute stream. + * @arg len Length of attribute stream. + * @arg policy Attribute validation policy. + * + * Iterates over the stream of attributes and stores a pointer to each + * attribute in the index array using the attribute type as index to + * the array. Attribute with a type greater than the maximum type + * specified will be silently ignored in order to maintain backwards + * compatibility. If \a policy is not NULL, the attribute will be + * validated using the specified policy. + * + * @see nla_validate + * @return 0 on success or a negative error code. + */ +static int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, + struct nla_policy *policy) +{ + struct nlattr *nla; + int rem, err; + + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + + nla_for_each_attr(nla, head, len, rem) { + int type = nla_type(nla); + + if (type > maxtype) + continue; + + if (policy) { + err = validate_nla(nla, maxtype, policy); + if (err < 0) + goto errout; + } + + if (tb[type]) + fprintf(stderr, "Attribute of type %#x found multiple times in message, " + "previous attribute is being ignored.\n", type); + + tb[type] = nla; + } + + err = 0; +errout: + return err; +} + +/* dump netlink extended ack error message */ +int nla_dump_errormsg(struct nlmsghdr *nlh) +{ + struct nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = { + [NLMSGERR_ATTR_MSG] = { .type = NLA_STRING }, + [NLMSGERR_ATTR_OFFS] = { .type = NLA_U32 }, + }; + struct nlattr *tb[NLMSGERR_ATTR_MAX + 1], *attr; + struct nlmsgerr *err; + char *errmsg = NULL; + int hlen, alen; + + /* no TLVs, nothing to do here */ + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + return 0; + + err = (struct nlmsgerr *)NLMSG_DATA(nlh); + hlen = sizeof(*err); + + /* if NLM_F_CAPPED is set then the inner err msg was capped */ + if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) + hlen += nlmsg_len(&err->msg); + + attr = (struct nlattr *) ((void *) err + hlen); + alen = nlh->nlmsg_len - hlen; + + if (nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, extack_policy) != 0) { + fprintf(stderr, + "Failed to parse extended error attributes\n"); + return 0; + } + + if (tb[NLMSGERR_ATTR_MSG]) + errmsg = (char *) nla_data(tb[NLMSGERR_ATTR_MSG]); + + fprintf(stderr, "Kernel error message: %s\n", errmsg); + + return 0; +} diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h new file mode 100644 index 000000000000..931a71f68f93 --- /dev/null +++ b/tools/lib/bpf/nlattr.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ + +/* + * NETLINK Netlink attributes + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation version 2.1 + * of the License. + * + * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch> + */ + +#ifndef __NLATTR_H +#define __NLATTR_H + +#include <stdint.h> +#include <linux/netlink.h> +/* avoid multiple definition of netlink features */ +#define __LINUX_NETLINK_H + +/** + * Standard attribute types to specify validation policy + */ +enum { + NLA_UNSPEC, /**< Unspecified type, binary data chunk */ + NLA_U8, /**< 8 bit integer */ + NLA_U16, /**< 16 bit integer */ + NLA_U32, /**< 32 bit integer */ + NLA_U64, /**< 64 bit integer */ + NLA_STRING, /**< NUL terminated character string */ + NLA_FLAG, /**< Flag */ + NLA_MSECS, /**< Micro seconds (64bit) */ + NLA_NESTED, /**< Nested attributes */ + __NLA_TYPE_MAX, +}; + +#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1) + +/** + * @ingroup attr + * Attribute validation policy. + * + * See section @core_doc{core_attr_parse,Attribute Parsing} for more details. + */ +struct nla_policy { + /** Type of attribute or NLA_UNSPEC */ + uint16_t type; + + /** Minimal length of payload required */ + uint16_t minlen; + + /** Maximal length of payload allowed */ + uint16_t maxlen; +}; + +/** + * @ingroup attr + * Iterate over a stream of attributes + * @arg pos loop counter, set to current attribute + * @arg head head of attribute stream + * @arg len length of attribute stream + * @arg rem initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_attr(pos, head, len, rem) \ + for (pos = head, rem = len; \ + nla_ok(pos, rem); \ + pos = nla_next(pos, &(rem))) + +int nla_dump_errormsg(struct nlmsghdr *nlh); + +#endif /* __NLATTR_H */ diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c index 42c15f906aac..a88bd507091e 100644 --- a/tools/lib/find_bit.c +++ b/tools/lib/find_bit.c @@ -22,22 +22,29 @@ #include <linux/bitmap.h> #include <linux/kernel.h> -#if !defined(find_next_bit) +#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \ + !defined(find_next_and_bit) /* - * This is a common helper function for find_next_bit and - * find_next_zero_bit. The difference is the "invert" argument, which - * is XORed with each fetched word before searching it for one bits. + * This is a common helper function for find_next_bit, find_next_zero_bit, and + * find_next_and_bit. The differences are: + * - The "invert" argument, which is XORed with each fetched word before + * searching it for one bits. + * - The optional "addr2", which is anded with "addr1" if present. */ -static unsigned long _find_next_bit(const unsigned long *addr, - unsigned long nbits, unsigned long start, unsigned long invert) +static inline unsigned long _find_next_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long nbits, + unsigned long start, unsigned long invert) { unsigned long tmp; if (unlikely(start >= nbits)) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; /* Handle 1st word. */ tmp &= BITMAP_FIRST_WORD_MASK(start); @@ -48,7 +55,10 @@ static unsigned long _find_next_bit(const unsigned long *addr, if (start >= nbits) return nbits; - tmp = addr[start / BITS_PER_LONG] ^ invert; + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; } return min(start + __ffs(tmp), nbits); @@ -62,7 +72,7 @@ static unsigned long _find_next_bit(const unsigned long *addr, unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - return _find_next_bit(addr, size, offset, 0UL); + return _find_next_bit(addr, NULL, size, offset, 0UL); } #endif @@ -104,6 +114,15 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - return _find_next_bit(addr, size, offset, ~0UL); + return _find_next_bit(addr, NULL, size, offset, ~0UL); +} +#endif + +#ifndef find_next_and_bit +unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr1, addr2, size, offset, 0UL); } #endif diff --git a/tools/lib/subcmd/pager.c b/tools/lib/subcmd/pager.c index 5ba754d17952..9997a8805a82 100644 --- a/tools/lib/subcmd/pager.c +++ b/tools/lib/subcmd/pager.c @@ -30,10 +30,13 @@ static void pager_preexec(void) * have real input */ fd_set in; + fd_set exception; FD_ZERO(&in); + FD_ZERO(&exception); FD_SET(0, &in); - select(1, &in, NULL, &in, NULL); + FD_SET(0, &exception); + select(1, &in, NULL, &exception, NULL); setenv("LESS", "FRSX", 0); } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f40d46e24bcc..a8cb69a26576 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -138,6 +138,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, "__reiserfs_panic", "lbug_with_loc", "fortify_panic", + "usercopy_abort", }; if (func->bind == STB_WEAK) @@ -543,18 +544,14 @@ static int add_call_destinations(struct objtool_file *file) dest_off = insn->offset + insn->len + insn->immediate; insn->call_dest = find_symbol_by_offset(insn->sec, dest_off); - /* - * FIXME: Thanks to retpolines, it's now considered - * normal for a function to call within itself. So - * disable this warning for now. - */ -#if 0 - if (!insn->call_dest) { - WARN_FUNC("can't find call dest symbol at offset 0x%lx", - insn->sec, insn->offset, dest_off); + + if (!insn->call_dest && !insn->ignore) { + WARN_FUNC("unsupported intra-function call", + insn->sec, insn->offset); + WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."); return -1; } -#endif + } else if (rela->sym->type == STT_SECTION) { insn->call_dest = find_symbol_by_offset(rela->sym->sec, rela->addend+4); @@ -598,7 +595,7 @@ static int handle_group_alt(struct objtool_file *file, struct instruction *orig_insn, struct instruction **new_insn) { - struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; + struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL; unsigned long dest_off; last_orig_insn = NULL; @@ -614,28 +611,30 @@ static int handle_group_alt(struct objtool_file *file, last_orig_insn = insn; } - if (!next_insn_same_sec(file, last_orig_insn)) { - WARN("%s: don't know how to handle alternatives at end of section", - special_alt->orig_sec->name); - return -1; - } - - fake_jump = malloc(sizeof(*fake_jump)); - if (!fake_jump) { - WARN("malloc failed"); - return -1; + if (next_insn_same_sec(file, last_orig_insn)) { + fake_jump = malloc(sizeof(*fake_jump)); + if (!fake_jump) { + WARN("malloc failed"); + return -1; + } + memset(fake_jump, 0, sizeof(*fake_jump)); + INIT_LIST_HEAD(&fake_jump->alts); + clear_insn_state(&fake_jump->state); + + fake_jump->sec = special_alt->new_sec; + fake_jump->offset = -1; + fake_jump->type = INSN_JUMP_UNCONDITIONAL; + fake_jump->jump_dest = list_next_entry(last_orig_insn, list); + fake_jump->ignore = true; } - memset(fake_jump, 0, sizeof(*fake_jump)); - INIT_LIST_HEAD(&fake_jump->alts); - clear_insn_state(&fake_jump->state); - - fake_jump->sec = special_alt->new_sec; - fake_jump->offset = -1; - fake_jump->type = INSN_JUMP_UNCONDITIONAL; - fake_jump->jump_dest = list_next_entry(last_orig_insn, list); - fake_jump->ignore = true; if (!special_alt->new_len) { + if (!fake_jump) { + WARN("%s: empty alternative at end of section", + special_alt->orig_sec->name); + return -1; + } + *new_insn = fake_jump; return 0; } @@ -648,6 +647,8 @@ static int handle_group_alt(struct objtool_file *file, last_new_insn = insn; + insn->ignore = orig_insn->ignore_alts; + if (insn->type != INSN_JUMP_CONDITIONAL && insn->type != INSN_JUMP_UNCONDITIONAL) continue; @@ -656,8 +657,14 @@ static int handle_group_alt(struct objtool_file *file, continue; dest_off = insn->offset + insn->len + insn->immediate; - if (dest_off == special_alt->new_off + special_alt->new_len) + if (dest_off == special_alt->new_off + special_alt->new_len) { + if (!fake_jump) { + WARN("%s: alternative jump to end of section", + special_alt->orig_sec->name); + return -1; + } insn->jump_dest = fake_jump; + } if (!insn->jump_dest) { WARN_FUNC("can't find alternative jump destination", @@ -672,7 +679,8 @@ static int handle_group_alt(struct objtool_file *file, return -1; } - list_add(&fake_jump->list, &last_new_insn->list); + if (fake_jump) + list_add(&fake_jump->list, &last_new_insn->list); return 0; } @@ -729,10 +737,6 @@ static int add_special_section_alts(struct objtool_file *file) goto out; } - /* Ignore retpoline alternatives. */ - if (orig_insn->ignore_alts) - continue; - new_insn = NULL; if (!special_alt->group || special_alt->new_len) { new_insn = find_insn(file, special_alt->new_sec, @@ -848,8 +852,14 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func, * This is a fairly uncommon pattern which is new for GCC 6. As of this * writing, there are 11 occurrences of it in the allmodconfig kernel. * + * As of GCC 7 there are quite a few more of these and the 'in between' code + * is significant. Esp. with KASAN enabled some of the code between the mov + * and jmpq uses .rodata itself, which can confuse things. + * * TODO: Once we have DWARF CFI and smarter instruction decoding logic, * ensure the same register is used in the mov and jump instructions. + * + * NOTE: RETPOLINE made it harder still to decode dynamic jumps. */ static struct rela *find_switch_table(struct objtool_file *file, struct symbol *func, @@ -871,12 +881,25 @@ static struct rela *find_switch_table(struct objtool_file *file, text_rela->addend + 4); if (!rodata_rela) return NULL; + file->ignore_unreachables = true; return rodata_rela; } /* case 3 */ - func_for_each_insn_continue_reverse(file, func, insn) { + /* + * Backward search using the @first_jump_src links, these help avoid + * much of the 'in between' code. Which avoids us getting confused by + * it. + */ + for (insn = list_prev_entry(insn, list); + + &insn->list != &file->insn_list && + insn->sec == func->sec && + insn->offset >= func->offset; + + insn = insn->first_jump_src ?: list_prev_entry(insn, list)) { + if (insn->type == INSN_JUMP_DYNAMIC) break; @@ -906,14 +929,32 @@ static struct rela *find_switch_table(struct objtool_file *file, return NULL; } + static int add_func_switch_tables(struct objtool_file *file, struct symbol *func) { - struct instruction *insn, *prev_jump = NULL; + struct instruction *insn, *last = NULL, *prev_jump = NULL; struct rela *rela, *prev_rela = NULL; int ret; func_for_each_insn(file, func, insn) { + if (!last) + last = insn; + + /* + * Store back-pointers for unconditional forward jumps such + * that find_switch_table() can back-track using those and + * avoid some potentially confusing code. + */ + if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && + insn->offset > last->offset && + insn->jump_dest->offset > insn->offset && + !insn->jump_dest->first_jump_src) { + + insn->jump_dest->first_jump_src = insn; + last = insn->jump_dest; + } + if (insn->type != INSN_JUMP_DYNAMIC) continue; @@ -1089,11 +1130,11 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = add_call_destinations(file); + ret = add_special_section_alts(file); if (ret) return ret; - ret = add_special_section_alts(file); + ret = add_call_destinations(file); if (ret) return ret; @@ -1720,10 +1761,12 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, insn->visited = true; - list_for_each_entry(alt, &insn->alts, list) { - ret = validate_branch(file, alt->insn, state); - if (ret) - return 1; + if (!insn->ignore_alts) { + list_for_each_entry(alt, &insn->alts, list) { + ret = validate_branch(file, alt->insn, state); + if (ret) + return 1; + } } switch (insn->type) { @@ -1893,13 +1936,19 @@ static bool ignore_unreachable_insn(struct instruction *insn) if (is_kasan_insn(insn) || is_ubsan_insn(insn)) return true; - if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) { - insn = insn->jump_dest; - continue; + if (insn->type == INSN_JUMP_UNCONDITIONAL) { + if (insn->jump_dest && + insn->jump_dest->func == insn->func) { + insn = insn->jump_dest; + continue; + } + + break; } if (insn->offset + insn->len >= insn->func->offset + insn->func->len) break; + insn = list_next_entry(insn, list); } diff --git a/tools/objtool/check.h b/tools/objtool/check.h index dbadb304a410..23a1d065cae1 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -47,6 +47,7 @@ struct instruction { bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts; struct symbol *call_dest; struct instruction *jump_dest; + struct instruction *first_jump_src; struct list_head alts; struct symbol *func; struct stack_op stack_op; diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index e61fe703197b..18384d9be4e1 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -98,6 +98,11 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, struct orc_entry *orc; struct rela *rela; + if (!insn_sec->sym) { + WARN("missing symbol for section %s", insn_sec->name); + return -1; + } + /* populate ORC data */ orc = (struct orc_entry *)u_sec->data->d_buf + idx; memcpy(orc, o, sizeof(*orc)); diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 643cc4ba6872..3e5135dded16 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -31,5 +31,6 @@ config.mak.autogen .config-detected util/intel-pt-decoder/inat-tables.c arch/*/include/generated/ +trace/beauty/generated/ pmu-events/pmu-events.c pmu-events/jevents diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt index f0796a47dfa3..90bb4aabe4f8 100644 --- a/tools/perf/Documentation/perf-data.txt +++ b/tools/perf/Documentation/perf-data.txt @@ -30,6 +30,10 @@ OPTIONS for 'convert' -i:: Specify input perf data file path. +-f:: +--force:: + Don't complain, do it. + -v:: --verbose:: Be more verbose (show counter open errors, etc). diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 48228de415d0..dfa6e3103437 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -10,15 +10,19 @@ PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 out := $(OUTPUT)arch/s390/include/generated/asm header := $(out)/syscalls_64.c -sysdef := $(srctree)/tools/arch/s390/include/uapi/asm/unistd.h -sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls/ +syskrn := $(srctree)/arch/s390/kernel/syscalls/syscall.tbl +sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls +sysdef := $(sysprf)/syscall.tbl systbl := $(sysprf)/mksyscalltbl # Create output directory if not already present _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') $(header): $(sysdef) $(systbl) - $(Q)$(SHELL) '$(systbl)' '$(CC)' $(sysdef) > $@ + @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ + (diff -B $(sysdef) $(syskrn) >/dev/null) \ + || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true + $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@ clean:: $(call QUIET_CLEAN, s390) $(RM) $(header) diff --git a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl index 7fa0d0abd419..72ecbb676370 100755 --- a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl +++ b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl @@ -3,25 +3,23 @@ # # Generate system call table for perf # -# -# Copyright IBM Corp. 2017 +# Copyright IBM Corp. 2017, 2018 # Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> # -gcc=$1 -input=$2 +SYSCALL_TBL=$1 -if ! test -r $input; then +if ! test -r $SYSCALL_TBL; then echo "Could not read input file" >&2 exit 1 fi create_table() { - local max_nr + local max_nr nr abi sc discard echo 'static const char *syscalltbl_s390_64[] = {' - while read sc nr; do + while read nr abi sc discard; do printf '\t[%d] = "%s",\n' $nr $sc max_nr=$nr done @@ -29,8 +27,6 @@ create_table() echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr" } - -$gcc -m64 -E -dM -x c $input \ - |sed -ne 's/^#define __NR_//p' \ - |sort -t' ' -k2 -nu \ +grep -E "^[[:digit:]]+[[:space:]]+(common|64)" $SYSCALL_TBL \ + |sort -k1 -n \ |create_table diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl new file mode 100644 index 000000000000..b38d48464368 --- /dev/null +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -0,0 +1,390 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# System call table for s390 +# +# Format: +# +# <nr> <abi> <syscall> <entry-64bit> <compat-entry> +# +# where <abi> can be common, 64, or 32 + +1 common exit sys_exit sys_exit +2 common fork sys_fork sys_fork +3 common read sys_read compat_sys_s390_read +4 common write sys_write compat_sys_s390_write +5 common open sys_open compat_sys_open +6 common close sys_close sys_close +7 common restart_syscall sys_restart_syscall sys_restart_syscall +8 common creat sys_creat compat_sys_creat +9 common link sys_link compat_sys_link +10 common unlink sys_unlink compat_sys_unlink +11 common execve sys_execve compat_sys_execve +12 common chdir sys_chdir compat_sys_chdir +13 32 time - compat_sys_time +14 common mknod sys_mknod compat_sys_mknod +15 common chmod sys_chmod compat_sys_chmod +16 32 lchown - compat_sys_s390_lchown16 +19 common lseek sys_lseek compat_sys_lseek +20 common getpid sys_getpid sys_getpid +21 common mount sys_mount compat_sys_mount +22 common umount sys_oldumount compat_sys_oldumount +23 32 setuid - compat_sys_s390_setuid16 +24 32 getuid - compat_sys_s390_getuid16 +25 32 stime - compat_sys_stime +26 common ptrace sys_ptrace compat_sys_ptrace +27 common alarm sys_alarm sys_alarm +29 common pause sys_pause sys_pause +30 common utime sys_utime compat_sys_utime +33 common access sys_access compat_sys_access +34 common nice sys_nice sys_nice +36 common sync sys_sync sys_sync +37 common kill sys_kill sys_kill +38 common rename sys_rename compat_sys_rename +39 common mkdir sys_mkdir compat_sys_mkdir +40 common rmdir sys_rmdir compat_sys_rmdir +41 common dup sys_dup sys_dup +42 common pipe sys_pipe compat_sys_pipe +43 common times sys_times compat_sys_times +45 common brk sys_brk compat_sys_brk +46 32 setgid - compat_sys_s390_setgid16 +47 32 getgid - compat_sys_s390_getgid16 +48 common signal sys_signal compat_sys_signal +49 32 geteuid - compat_sys_s390_geteuid16 +50 32 getegid - compat_sys_s390_getegid16 +51 common acct sys_acct compat_sys_acct +52 common umount2 sys_umount compat_sys_umount +54 common ioctl sys_ioctl compat_sys_ioctl +55 common fcntl sys_fcntl compat_sys_fcntl +57 common setpgid sys_setpgid sys_setpgid +60 common umask sys_umask sys_umask +61 common chroot sys_chroot compat_sys_chroot +62 common ustat sys_ustat compat_sys_ustat +63 common dup2 sys_dup2 sys_dup2 +64 common getppid sys_getppid sys_getppid +65 common getpgrp sys_getpgrp sys_getpgrp +66 common setsid sys_setsid sys_setsid +67 common sigaction sys_sigaction compat_sys_sigaction +70 32 setreuid - compat_sys_s390_setreuid16 +71 32 setregid - compat_sys_s390_setregid16 +72 common sigsuspend sys_sigsuspend compat_sys_sigsuspend +73 common sigpending sys_sigpending compat_sys_sigpending +74 common sethostname sys_sethostname compat_sys_sethostname +75 common setrlimit sys_setrlimit compat_sys_setrlimit +76 32 getrlimit - compat_sys_old_getrlimit +77 common getrusage sys_getrusage compat_sys_getrusage +78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday +79 common settimeofday sys_settimeofday compat_sys_settimeofday +80 32 getgroups - compat_sys_s390_getgroups16 +81 32 setgroups - compat_sys_s390_setgroups16 +83 common symlink sys_symlink compat_sys_symlink +85 common readlink sys_readlink compat_sys_readlink +86 common uselib sys_uselib compat_sys_uselib +87 common swapon sys_swapon compat_sys_swapon +88 common reboot sys_reboot compat_sys_reboot +89 common readdir - compat_sys_old_readdir +90 common mmap sys_old_mmap compat_sys_s390_old_mmap +91 common munmap sys_munmap compat_sys_munmap +92 common truncate sys_truncate compat_sys_truncate +93 common ftruncate sys_ftruncate compat_sys_ftruncate +94 common fchmod sys_fchmod sys_fchmod +95 32 fchown - compat_sys_s390_fchown16 +96 common getpriority sys_getpriority sys_getpriority +97 common setpriority sys_setpriority sys_setpriority +99 common statfs sys_statfs compat_sys_statfs +100 common fstatfs sys_fstatfs compat_sys_fstatfs +101 32 ioperm - - +102 common socketcall sys_socketcall compat_sys_socketcall +103 common syslog sys_syslog compat_sys_syslog +104 common setitimer sys_setitimer compat_sys_setitimer +105 common getitimer sys_getitimer compat_sys_getitimer +106 common stat sys_newstat compat_sys_newstat +107 common lstat sys_newlstat compat_sys_newlstat +108 common fstat sys_newfstat compat_sys_newfstat +110 common lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie +111 common vhangup sys_vhangup sys_vhangup +112 common idle - - +114 common wait4 sys_wait4 compat_sys_wait4 +115 common swapoff sys_swapoff compat_sys_swapoff +116 common sysinfo sys_sysinfo compat_sys_sysinfo +117 common ipc sys_s390_ipc compat_sys_s390_ipc +118 common fsync sys_fsync sys_fsync +119 common sigreturn sys_sigreturn compat_sys_sigreturn +120 common clone sys_clone compat_sys_clone +121 common setdomainname sys_setdomainname compat_sys_setdomainname +122 common uname sys_newuname compat_sys_newuname +124 common adjtimex sys_adjtimex compat_sys_adjtimex +125 common mprotect sys_mprotect compat_sys_mprotect +126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask +127 common create_module - - +128 common init_module sys_init_module compat_sys_init_module +129 common delete_module sys_delete_module compat_sys_delete_module +130 common get_kernel_syms - - +131 common quotactl sys_quotactl compat_sys_quotactl +132 common getpgid sys_getpgid sys_getpgid +133 common fchdir sys_fchdir sys_fchdir +134 common bdflush sys_bdflush compat_sys_bdflush +135 common sysfs sys_sysfs compat_sys_sysfs +136 common personality sys_s390_personality sys_s390_personality +137 common afs_syscall - - +138 32 setfsuid - compat_sys_s390_setfsuid16 +139 32 setfsgid - compat_sys_s390_setfsgid16 +140 32 _llseek - compat_sys_llseek +141 common getdents sys_getdents compat_sys_getdents +142 32 _newselect - compat_sys_select +142 64 select sys_select - +143 common flock sys_flock sys_flock +144 common msync sys_msync compat_sys_msync +145 common readv sys_readv compat_sys_readv +146 common writev sys_writev compat_sys_writev +147 common getsid sys_getsid sys_getsid +148 common fdatasync sys_fdatasync sys_fdatasync +149 common _sysctl sys_sysctl compat_sys_sysctl +150 common mlock sys_mlock compat_sys_mlock +151 common munlock sys_munlock compat_sys_munlock +152 common mlockall sys_mlockall sys_mlockall +153 common munlockall sys_munlockall sys_munlockall +154 common sched_setparam sys_sched_setparam compat_sys_sched_setparam +155 common sched_getparam sys_sched_getparam compat_sys_sched_getparam +156 common sched_setscheduler sys_sched_setscheduler compat_sys_sched_setscheduler +157 common sched_getscheduler sys_sched_getscheduler sys_sched_getscheduler +158 common sched_yield sys_sched_yield sys_sched_yield +159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max +160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min +161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval +162 common nanosleep sys_nanosleep compat_sys_nanosleep +163 common mremap sys_mremap compat_sys_mremap +164 32 setresuid - compat_sys_s390_setresuid16 +165 32 getresuid - compat_sys_s390_getresuid16 +167 common query_module - - +168 common poll sys_poll compat_sys_poll +169 common nfsservctl - - +170 32 setresgid - compat_sys_s390_setresgid16 +171 32 getresgid - compat_sys_s390_getresgid16 +172 common prctl sys_prctl compat_sys_prctl +173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn +174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction +175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask +176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending +177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait +178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo +179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend +180 common pread64 sys_pread64 compat_sys_s390_pread64 +181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64 +182 32 chown - compat_sys_s390_chown16 +183 common getcwd sys_getcwd compat_sys_getcwd +184 common capget sys_capget compat_sys_capget +185 common capset sys_capset compat_sys_capset +186 common sigaltstack sys_sigaltstack compat_sys_sigaltstack +187 common sendfile sys_sendfile64 compat_sys_sendfile +188 common getpmsg - - +189 common putpmsg - - +190 common vfork sys_vfork sys_vfork +191 32 ugetrlimit - compat_sys_getrlimit +191 64 getrlimit sys_getrlimit - +192 32 mmap2 - compat_sys_s390_mmap2 +193 32 truncate64 - compat_sys_s390_truncate64 +194 32 ftruncate64 - compat_sys_s390_ftruncate64 +195 32 stat64 - compat_sys_s390_stat64 +196 32 lstat64 - compat_sys_s390_lstat64 +197 32 fstat64 - compat_sys_s390_fstat64 +198 32 lchown32 - compat_sys_lchown +198 64 lchown sys_lchown - +199 32 getuid32 - sys_getuid +199 64 getuid sys_getuid - +200 32 getgid32 - sys_getgid +200 64 getgid sys_getgid - +201 32 geteuid32 - sys_geteuid +201 64 geteuid sys_geteuid - +202 32 getegid32 - sys_getegid +202 64 getegid sys_getegid - +203 32 setreuid32 - sys_setreuid +203 64 setreuid sys_setreuid - +204 32 setregid32 - sys_setregid +204 64 setregid sys_setregid - +205 32 getgroups32 - compat_sys_getgroups +205 64 getgroups sys_getgroups - +206 32 setgroups32 - compat_sys_setgroups +206 64 setgroups sys_setgroups - +207 32 fchown32 - sys_fchown +207 64 fchown sys_fchown - +208 32 setresuid32 - sys_setresuid +208 64 setresuid sys_setresuid - +209 32 getresuid32 - compat_sys_getresuid +209 64 getresuid sys_getresuid - +210 32 setresgid32 - sys_setresgid +210 64 setresgid sys_setresgid - +211 32 getresgid32 - compat_sys_getresgid +211 64 getresgid sys_getresgid - +212 32 chown32 - compat_sys_chown +212 64 chown sys_chown - +213 32 setuid32 - sys_setuid +213 64 setuid sys_setuid - +214 32 setgid32 - sys_setgid +214 64 setgid sys_setgid - +215 32 setfsuid32 - sys_setfsuid +215 64 setfsuid sys_setfsuid - +216 32 setfsgid32 - sys_setfsgid +216 64 setfsgid sys_setfsgid - +217 common pivot_root sys_pivot_root compat_sys_pivot_root +218 common mincore sys_mincore compat_sys_mincore +219 common madvise sys_madvise compat_sys_madvise +220 common getdents64 sys_getdents64 compat_sys_getdents64 +221 32 fcntl64 - compat_sys_fcntl64 +222 common readahead sys_readahead compat_sys_s390_readahead +223 32 sendfile64 - compat_sys_sendfile64 +224 common setxattr sys_setxattr compat_sys_setxattr +225 common lsetxattr sys_lsetxattr compat_sys_lsetxattr +226 common fsetxattr sys_fsetxattr compat_sys_fsetxattr +227 common getxattr sys_getxattr compat_sys_getxattr +228 common lgetxattr sys_lgetxattr compat_sys_lgetxattr +229 common fgetxattr sys_fgetxattr compat_sys_fgetxattr +230 common listxattr sys_listxattr compat_sys_listxattr +231 common llistxattr sys_llistxattr compat_sys_llistxattr +232 common flistxattr sys_flistxattr compat_sys_flistxattr +233 common removexattr sys_removexattr compat_sys_removexattr +234 common lremovexattr sys_lremovexattr compat_sys_lremovexattr +235 common fremovexattr sys_fremovexattr compat_sys_fremovexattr +236 common gettid sys_gettid sys_gettid +237 common tkill sys_tkill sys_tkill +238 common futex sys_futex compat_sys_futex +239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity +240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity +241 common tgkill sys_tgkill sys_tgkill +243 common io_setup sys_io_setup compat_sys_io_setup +244 common io_destroy sys_io_destroy compat_sys_io_destroy +245 common io_getevents sys_io_getevents compat_sys_io_getevents +246 common io_submit sys_io_submit compat_sys_io_submit +247 common io_cancel sys_io_cancel compat_sys_io_cancel +248 common exit_group sys_exit_group sys_exit_group +249 common epoll_create sys_epoll_create sys_epoll_create +250 common epoll_ctl sys_epoll_ctl compat_sys_epoll_ctl +251 common epoll_wait sys_epoll_wait compat_sys_epoll_wait +252 common set_tid_address sys_set_tid_address compat_sys_set_tid_address +253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64 +254 common timer_create sys_timer_create compat_sys_timer_create +255 common timer_settime sys_timer_settime compat_sys_timer_settime +256 common timer_gettime sys_timer_gettime compat_sys_timer_gettime +257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun +258 common timer_delete sys_timer_delete sys_timer_delete +259 common clock_settime sys_clock_settime compat_sys_clock_settime +260 common clock_gettime sys_clock_gettime compat_sys_clock_gettime +261 common clock_getres sys_clock_getres compat_sys_clock_getres +262 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep +264 32 fadvise64_64 - compat_sys_s390_fadvise64_64 +265 common statfs64 sys_statfs64 compat_sys_statfs64 +266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 +267 common remap_file_pages sys_remap_file_pages compat_sys_remap_file_pages +268 common mbind sys_mbind compat_sys_mbind +269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy +270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy +271 common mq_open sys_mq_open compat_sys_mq_open +272 common mq_unlink sys_mq_unlink compat_sys_mq_unlink +273 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend +274 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive +275 common mq_notify sys_mq_notify compat_sys_mq_notify +276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr +277 common kexec_load sys_kexec_load compat_sys_kexec_load +278 common add_key sys_add_key compat_sys_add_key +279 common request_key sys_request_key compat_sys_request_key +280 common keyctl sys_keyctl compat_sys_keyctl +281 common waitid sys_waitid compat_sys_waitid +282 common ioprio_set sys_ioprio_set sys_ioprio_set +283 common ioprio_get sys_ioprio_get sys_ioprio_get +284 common inotify_init sys_inotify_init sys_inotify_init +285 common inotify_add_watch sys_inotify_add_watch compat_sys_inotify_add_watch +286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch +287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages +288 common openat sys_openat compat_sys_openat +289 common mkdirat sys_mkdirat compat_sys_mkdirat +290 common mknodat sys_mknodat compat_sys_mknodat +291 common fchownat sys_fchownat compat_sys_fchownat +292 common futimesat sys_futimesat compat_sys_futimesat +293 32 fstatat64 - compat_sys_s390_fstatat64 +293 64 newfstatat sys_newfstatat - +294 common unlinkat sys_unlinkat compat_sys_unlinkat +295 common renameat sys_renameat compat_sys_renameat +296 common linkat sys_linkat compat_sys_linkat +297 common symlinkat sys_symlinkat compat_sys_symlinkat +298 common readlinkat sys_readlinkat compat_sys_readlinkat +299 common fchmodat sys_fchmodat compat_sys_fchmodat +300 common faccessat sys_faccessat compat_sys_faccessat +301 common pselect6 sys_pselect6 compat_sys_pselect6 +302 common ppoll sys_ppoll compat_sys_ppoll +303 common unshare sys_unshare compat_sys_unshare +304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list +305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list +306 common splice sys_splice compat_sys_splice +307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range +308 common tee sys_tee compat_sys_tee +309 common vmsplice sys_vmsplice compat_sys_vmsplice +310 common move_pages sys_move_pages compat_sys_move_pages +311 common getcpu sys_getcpu compat_sys_getcpu +312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait +313 common utimes sys_utimes compat_sys_utimes +314 common fallocate sys_fallocate compat_sys_s390_fallocate +315 common utimensat sys_utimensat compat_sys_utimensat +316 common signalfd sys_signalfd compat_sys_signalfd +317 common timerfd - - +318 common eventfd sys_eventfd sys_eventfd +319 common timerfd_create sys_timerfd_create sys_timerfd_create +320 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime +321 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime +322 common signalfd4 sys_signalfd4 compat_sys_signalfd4 +323 common eventfd2 sys_eventfd2 sys_eventfd2 +324 common inotify_init1 sys_inotify_init1 sys_inotify_init1 +325 common pipe2 sys_pipe2 compat_sys_pipe2 +326 common dup3 sys_dup3 sys_dup3 +327 common epoll_create1 sys_epoll_create1 sys_epoll_create1 +328 common preadv sys_preadv compat_sys_preadv +329 common pwritev sys_pwritev compat_sys_pwritev +330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo +331 common perf_event_open sys_perf_event_open compat_sys_perf_event_open +332 common fanotify_init sys_fanotify_init sys_fanotify_init +333 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark +334 common prlimit64 sys_prlimit64 compat_sys_prlimit64 +335 common name_to_handle_at sys_name_to_handle_at compat_sys_name_to_handle_at +336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at +337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime +338 common syncfs sys_syncfs sys_syncfs +339 common setns sys_setns sys_setns +340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv +341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr +343 common kcmp sys_kcmp compat_sys_kcmp +344 common finit_module sys_finit_module compat_sys_finit_module +345 common sched_setattr sys_sched_setattr compat_sys_sched_setattr +346 common sched_getattr sys_sched_getattr compat_sys_sched_getattr +347 common renameat2 sys_renameat2 compat_sys_renameat2 +348 common seccomp sys_seccomp compat_sys_seccomp +349 common getrandom sys_getrandom compat_sys_getrandom +350 common memfd_create sys_memfd_create compat_sys_memfd_create +351 common bpf sys_bpf compat_sys_bpf +352 common s390_pci_mmio_write sys_s390_pci_mmio_write compat_sys_s390_pci_mmio_write +353 common s390_pci_mmio_read sys_s390_pci_mmio_read compat_sys_s390_pci_mmio_read +354 common execveat sys_execveat compat_sys_execveat +355 common userfaultfd sys_userfaultfd sys_userfaultfd +356 common membarrier sys_membarrier sys_membarrier +357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg +358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg +359 common socket sys_socket sys_socket +360 common socketpair sys_socketpair compat_sys_socketpair +361 common bind sys_bind compat_sys_bind +362 common connect sys_connect compat_sys_connect +363 common listen sys_listen sys_listen +364 common accept4 sys_accept4 compat_sys_accept4 +365 common getsockopt sys_getsockopt compat_sys_getsockopt +366 common setsockopt sys_setsockopt compat_sys_setsockopt +367 common getsockname sys_getsockname compat_sys_getsockname +368 common getpeername sys_getpeername compat_sys_getpeername +369 common sendto sys_sendto compat_sys_sendto +370 common sendmsg sys_sendmsg compat_sys_sendmsg +371 common recvfrom sys_recvfrom compat_sys_recvfrom +372 common recvmsg sys_recvmsg compat_sys_recvmsg +373 common shutdown sys_shutdown sys_shutdown +374 common mlock2 sys_mlock2 compat_sys_mlock2 +375 common copy_file_range sys_copy_file_range compat_sys_copy_file_range +376 common preadv2 sys_preadv2 compat_sys_preadv2 +377 common pwritev2 sys_pwritev2 compat_sys_pwritev2 +378 common s390_guarded_storage sys_s390_guarded_storage compat_sys_s390_guarded_storage +379 common statx sys_statx compat_sys_statx +380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c0815a37fdb5..539c3d460158 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2245,7 +2245,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) c2c_browser__update_nr_entries(browser); while (1) { - key = hist_browser__run(browser, "? - help"); + key = hist_browser__run(browser, "? - help", true); switch (key) { case 's': @@ -2314,7 +2314,7 @@ static int perf_c2c__hists_browse(struct hists *hists) c2c_browser__update_nr_entries(browser); while (1) { - key = hist_browser__run(browser, "? - help"); + key = hist_browser__run(browser, "? - help", true); switch (key) { case 'q': diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 65681a1a292a..bf4ca749d1ac 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1566,7 +1566,8 @@ static struct option __record_options[] = { OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, &record.opts.sample_time_set, "Record the sample timestamps"), - OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), + OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, + "Record the sample period"), OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, "don't sample"), OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 42a52dcc41cd..4ad5dc649716 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -530,7 +530,8 @@ static int report__browse_hists(struct report *rep) case 1: ret = perf_evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent, - &session->header.env); + &session->header.env, + true); /* * Usually "ret" is the last pressed key, and we only * care if the key notifies us to switch data file. diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c6ccda52117d..b7c823ba8374 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -283,8 +283,9 @@ static void perf_top__print_sym_table(struct perf_top *top) printf("%-*.*s\n", win_width, win_width, graph_dotted_line); - if (hists->stats.nr_lost_warned != - hists->stats.nr_events[PERF_RECORD_LOST]) { + if (!top->record_opts.overwrite && + (hists->stats.nr_lost_warned != + hists->stats.nr_events[PERF_RECORD_LOST])) { hists->stats.nr_lost_warned = hists->stats.nr_events[PERF_RECORD_LOST]; color_fprintf(stdout, PERF_COLOR_RED, @@ -611,7 +612,8 @@ static void *display_thread_tui(void *arg) perf_evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, - &top->session->header.env); + &top->session->header.env, + !top->record_opts.overwrite); done = 1; return NULL; @@ -807,15 +809,23 @@ static void perf_event__process_sample(struct perf_tool *tool, static void perf_top__mmap_read_idx(struct perf_top *top, int idx) { + struct record_opts *opts = &top->record_opts; + struct perf_evlist *evlist = top->evlist; struct perf_sample sample; struct perf_evsel *evsel; + struct perf_mmap *md; struct perf_session *session = top->session; union perf_event *event; struct machine *machine; + u64 end, start; int ret; - while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) { - ret = perf_evlist__parse_sample(top->evlist, event, &sample); + md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx]; + if (perf_mmap__read_init(md, opts->overwrite, &start, &end) < 0) + return; + + while ((event = perf_mmap__read_event(md, opts->overwrite, &start, end)) != NULL) { + ret = perf_evlist__parse_sample(evlist, event, &sample); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); goto next_event; @@ -869,16 +879,120 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) } else ++session->evlist->stats.nr_unknown_events; next_event: - perf_evlist__mmap_consume(top->evlist, idx); + perf_mmap__consume(md, opts->overwrite); } + + perf_mmap__read_done(md); } static void perf_top__mmap_read(struct perf_top *top) { + bool overwrite = top->record_opts.overwrite; + struct perf_evlist *evlist = top->evlist; + unsigned long long start, end; int i; + start = rdclock(); + if (overwrite) + perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING); + for (i = 0; i < top->evlist->nr_mmaps; i++) perf_top__mmap_read_idx(top, i); + + if (overwrite) { + perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); + perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); + } + end = rdclock(); + + if ((end - start) > (unsigned long long)top->delay_secs * NSEC_PER_SEC) + ui__warning("Too slow to read ring buffer.\n" + "Please try increasing the period (-c) or\n" + "decreasing the freq (-F) or\n" + "limiting the number of CPUs (-C)\n"); +} + +/* + * Check per-event overwrite term. + * perf top should support consistent term for all events. + * - All events don't have per-event term + * E.g. "cpu/cpu-cycles/,cpu/instructions/" + * Nothing change, return 0. + * - All events have same per-event term + * E.g. "cpu/cpu-cycles,no-overwrite/,cpu/instructions,no-overwrite/ + * Using the per-event setting to replace the opts->overwrite if + * they are different, then return 0. + * - Events have different per-event term + * E.g. "cpu/cpu-cycles,overwrite/,cpu/instructions,no-overwrite/" + * Return -1 + * - Some of the event set per-event term, but some not. + * E.g. "cpu/cpu-cycles/,cpu/instructions,no-overwrite/" + * Return -1 + */ +static int perf_top__overwrite_check(struct perf_top *top) +{ + struct record_opts *opts = &top->record_opts; + struct perf_evlist *evlist = top->evlist; + struct perf_evsel_config_term *term; + struct list_head *config_terms; + struct perf_evsel *evsel; + int set, overwrite = -1; + + evlist__for_each_entry(evlist, evsel) { + set = -1; + config_terms = &evsel->config_terms; + list_for_each_entry(term, config_terms, list) { + if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE) + set = term->val.overwrite ? 1 : 0; + } + + /* no term for current and previous event (likely) */ + if ((overwrite < 0) && (set < 0)) + continue; + + /* has term for both current and previous event, compare */ + if ((overwrite >= 0) && (set >= 0) && (overwrite != set)) + return -1; + + /* no term for current event but has term for previous one */ + if ((overwrite >= 0) && (set < 0)) + return -1; + + /* has term for current event */ + if ((overwrite < 0) && (set >= 0)) { + /* if it's first event, set overwrite */ + if (evsel == perf_evlist__first(evlist)) + overwrite = set; + else + return -1; + } + } + + if ((overwrite >= 0) && (opts->overwrite != overwrite)) + opts->overwrite = overwrite; + + return 0; +} + +static int perf_top_overwrite_fallback(struct perf_top *top, + struct perf_evsel *evsel) +{ + struct record_opts *opts = &top->record_opts; + struct perf_evlist *evlist = top->evlist; + struct perf_evsel *counter; + + if (!opts->overwrite) + return 0; + + /* only fall back when first event fails */ + if (evsel != perf_evlist__first(evlist)) + return 0; + + evlist__for_each_entry(evlist, counter) + counter->attr.write_backward = false; + opts->overwrite = false; + ui__warning("fall back to non-overwrite mode\n"); + return 1; } static int perf_top__start_counters(struct perf_top *top) @@ -888,12 +1002,33 @@ static int perf_top__start_counters(struct perf_top *top) struct perf_evlist *evlist = top->evlist; struct record_opts *opts = &top->record_opts; + if (perf_top__overwrite_check(top)) { + ui__error("perf top only support consistent per-event " + "overwrite setting for all events\n"); + goto out_err; + } + perf_evlist__config(evlist, opts, &callchain_param); evlist__for_each_entry(evlist, counter) { try_again: if (perf_evsel__open(counter, top->evlist->cpus, top->evlist->threads) < 0) { + + /* + * Specially handle overwrite fall back. + * Because perf top is the only tool which has + * overwrite mode by default, support + * both overwrite and non-overwrite mode, and + * require consistent mode for all events. + * + * May move it to generic code with more tools + * have similar attribute. + */ + if (perf_missing_features.write_backward && + perf_top_overwrite_fallback(top, counter)) + goto try_again; + if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); @@ -1033,7 +1168,7 @@ static int __cmd_top(struct perf_top *top) perf_top__mmap_read(top); - if (hits == top->samples) + if (opts->overwrite || (hits == top->samples)) ret = perf_evlist__poll(top->evlist, 100); if (resize) { @@ -1127,6 +1262,7 @@ int cmd_top(int argc, const char **argv) .uses_mmap = true, }, .proc_map_timeout = 500, + .overwrite = 1, }, .max_stack = sysctl_perf_event_max_stack, .sym_pcnt_filter = 5, diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 17d11deeb88d..e7f1b182fc15 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1661,9 +1661,12 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse struct callchain_cursor *cursor) { struct addr_location al; + int max_stack = evsel->attr.sample_max_stack ? + evsel->attr.sample_max_stack : + trace->max_stack; if (machine__resolve(trace->host, &al, sample) < 0 || - thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, evsel->attr.sample_max_stack)) + thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack)) return -1; return 0; diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 51abdb0a4047..790ec25919a0 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -33,7 +33,6 @@ arch/s390/include/uapi/asm/kvm.h arch/s390/include/uapi/asm/kvm_perf.h arch/s390/include/uapi/asm/ptrace.h arch/s390/include/uapi/asm/sie.h -arch/s390/include/uapi/asm/unistd.h arch/arm/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h arch/alpha/include/uapi/asm/errno.h diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 2357f4ccc9c7..cfe46236a5e5 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -50,6 +50,7 @@ struct record_opts { bool sample_time_set; bool sample_cpu; bool period; + bool period_set; bool running_time; bool full_auxtrace; bool auxtrace_snapshot_mode; diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json new file mode 100644 index 000000000000..3b6208763e50 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json @@ -0,0 +1,27 @@ +[ + {, + "EventCode": "0x7A", + "EventName": "BR_INDIRECT_SPEC", + "BriefDescription": "Branch speculatively executed - Indirect branch" + }, + {, + "EventCode": "0xC9", + "EventName": "BR_COND", + "BriefDescription": "Conditional branch executed" + }, + {, + "EventCode": "0xCA", + "EventName": "BR_INDIRECT_MISPRED", + "BriefDescription": "Indirect branch mispredicted" + }, + {, + "EventCode": "0xCB", + "EventName": "BR_INDIRECT_MISPRED_ADDR", + "BriefDescription": "Indirect branch mispredicted because of address miscompare" + }, + {, + "EventCode": "0xCC", + "EventName": "BR_COND_MISPRED", + "BriefDescription": "Conditional branch mispredicted" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json new file mode 100644 index 000000000000..480d9f7460ab --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json @@ -0,0 +1,22 @@ +[ + {, + "EventCode": "0x60", + "EventName": "BUS_ACCESS_LD", + "BriefDescription": "Bus access - Read" + }, + {, + "EventCode": "0x61", + "EventName": "BUS_ACCESS_ST", + "BriefDescription": "Bus access - Write" + }, + {, + "EventCode": "0xC0", + "EventName": "EXT_MEM_REQ", + "BriefDescription": "External memory request" + }, + {, + "EventCode": "0xC1", + "EventName": "EXT_MEM_REQ_NC", + "BriefDescription": "Non-cacheable external memory request" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json new file mode 100644 index 000000000000..11baad6344b9 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json @@ -0,0 +1,27 @@ +[ + {, + "EventCode": "0xC2", + "EventName": "PREFETCH_LINEFILL", + "BriefDescription": "Linefill because of prefetch" + }, + {, + "EventCode": "0xC3", + "EventName": "PREFETCH_LINEFILL_DROP", + "BriefDescription": "Instruction Cache Throttle occurred" + }, + {, + "EventCode": "0xC4", + "EventName": "READ_ALLOC_ENTER", + "BriefDescription": "Entering read allocate mode" + }, + {, + "EventCode": "0xC5", + "EventName": "READ_ALLOC", + "BriefDescription": "Read allocate mode" + }, + {, + "EventCode": "0xC8", + "EventName": "EXT_SNOOP", + "BriefDescription": "SCU Snooped data from another CPU for this CPU" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json new file mode 100644 index 000000000000..480d9f7460ab --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json @@ -0,0 +1,22 @@ +[ + {, + "EventCode": "0x60", + "EventName": "BUS_ACCESS_LD", + "BriefDescription": "Bus access - Read" + }, + {, + "EventCode": "0x61", + "EventName": "BUS_ACCESS_ST", + "BriefDescription": "Bus access - Write" + }, + {, + "EventCode": "0xC0", + "EventName": "EXT_MEM_REQ", + "BriefDescription": "External memory request" + }, + {, + "EventCode": "0xC1", + "EventName": "EXT_MEM_REQ_NC", + "BriefDescription": "Non-cacheable external memory request" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json new file mode 100644 index 000000000000..73a22402d003 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json @@ -0,0 +1,32 @@ +[ + {, + "EventCode": "0x86", + "EventName": "EXC_IRQ", + "BriefDescription": "Exception taken, IRQ" + }, + {, + "EventCode": "0x87", + "EventName": "EXC_FIQ", + "BriefDescription": "Exception taken, FIQ" + }, + {, + "EventCode": "0xC6", + "EventName": "PRE_DECODE_ERR", + "BriefDescription": "Pre-decode error" + }, + {, + "EventCode": "0xD0", + "EventName": "L1I_CACHE_ERR", + "BriefDescription": "L1 Instruction Cache (data or tag) memory error" + }, + {, + "EventCode": "0xD1", + "EventName": "L1D_CACHE_ERR", + "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable" + }, + {, + "EventCode": "0xD2", + "EventName": "TLB_ERR", + "BriefDescription": "TLB memory error" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json b/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json new file mode 100644 index 000000000000..3149fb90555a --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json @@ -0,0 +1,52 @@ +[ + {, + "EventCode": "0xC7", + "EventName": "STALL_SB_FULL", + "BriefDescription": "Data Write operation that stalls the pipeline because the store buffer is full" + }, + {, + "EventCode": "0xE0", + "EventName": "OTHER_IQ_DEP_STALL", + "BriefDescription": "Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error" + }, + {, + "EventCode": "0xE1", + "EventName": "IC_DEP_STALL", + "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction cache miss being processed" + }, + {, + "EventCode": "0xE2", + "EventName": "IUTLB_DEP_STALL", + "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed" + }, + {, + "EventCode": "0xE3", + "EventName": "DECODE_DEP_STALL", + "BriefDescription": "Cycles the DPU IQ is empty and there is a pre-decode error being processed" + }, + {, + "EventCode": "0xE4", + "EventName": "OTHER_INTERLOCK_STALL", + "BriefDescription": "Cycles there is an interlock other than Advanced SIMD/Floating-point instructions or load/store instruction" + }, + {, + "EventCode": "0xE5", + "EventName": "AGU_DEP_STALL", + "BriefDescription": "Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU" + }, + {, + "EventCode": "0xE6", + "EventName": "SIMD_DEP_STALL", + "BriefDescription": "Cycles there is an interlock for an Advanced SIMD/Floating-point operation." + }, + {, + "EventCode": "0xE7", + "EventName": "LD_DEP_STALL", + "BriefDescription": "Cycles there is a stall in the Wr stage because of a load miss" + }, + {, + "EventCode": "0xE8", + "EventName": "ST_DEP_STALL", + "BriefDescription": "Cycles there is a stall in the Wr stage because of a store" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 219d6756134e..e61c9ca6cf9e 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -13,3 +13,4 @@ # #Family-model,Version,Filename,EventType 0x00000000420f5160,v1,cavium,core +0x00000000410fd03[[:xdigit:]],v1,cortex-a53,core diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 4035d43523c3..e0b1b414d466 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -31,10 +31,12 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count, int i; for (i = 0; i < evlist->nr_mmaps; i++) { + struct perf_mmap *map = &evlist->overwrite_mmap[i]; union perf_event *event; + u64 start, end; - perf_mmap__read_catchup(&evlist->overwrite_mmap[i]); - while ((event = perf_mmap__read_backward(&evlist->overwrite_mmap[i])) != NULL) { + perf_mmap__read_init(map, true, &start, &end); + while ((event = perf_mmap__read_event(map, true, &start, end)) != NULL) { const u32 type = event->header.type; switch (type) { @@ -49,6 +51,7 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count, return TEST_FAIL; } } + perf_mmap__read_done(map); } return TEST_OK; } diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh index 8b3da21a08f1..c446c894b297 100755 --- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh @@ -22,10 +22,23 @@ trace_libc_inet_pton_backtrace() { expected[4]="rtt min.*" expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)" expected[6]=".*inet_pton[[:space:]]\($libc\)$" - expected[7]="getaddrinfo[[:space:]]\($libc\)$" - expected[8]=".*\(.*/bin/ping.*\)$" - - perf trace --no-syscalls -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do + case "$(uname -m)" in + s390x) + eventattr='call-graph=dwarf' + expected[7]="gaih_inet[[:space:]]\(inlined\)$" + expected[8]="__GI_getaddrinfo[[:space:]]\(inlined\)$" + expected[9]="main[[:space:]]\(.*/bin/ping.*\)$" + expected[10]="__libc_start_main[[:space:]]\($libc\)$" + expected[11]="_start[[:space:]]\(.*/bin/ping.*\)$" + ;; + *) + eventattr='max-stack=3' + expected[7]="getaddrinfo[[:space:]]\($libc\)$" + expected[8]=".*\(.*/bin/ping.*\)$" + ;; + esac + + perf trace --no-syscalls -e probe_libc:inet_pton/$eventattr/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do echo $line echo "$line" | egrep -q "${expected[$idx]}" if [ $? -ne 0 ] ; then @@ -33,7 +46,7 @@ trace_libc_inet_pton_backtrace() { exit 1 fi let idx+=1 - [ $idx -eq 9 ] && break + [ -z "${expected[$idx]}" ] && break done } diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 68146f4620a5..6495ee55d9c3 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -608,7 +608,8 @@ static int hist_browser__title(struct hist_browser *browser, char *bf, size_t si return browser->title ? browser->title(browser, bf, size) : 0; } -int hist_browser__run(struct hist_browser *browser, const char *help) +int hist_browser__run(struct hist_browser *browser, const char *help, + bool warn_lost_event) { int key; char title[160]; @@ -638,8 +639,9 @@ int hist_browser__run(struct hist_browser *browser, const char *help) nr_entries = hist_browser__nr_entries(browser); ui_browser__update_nr_entries(&browser->b, nr_entries); - if (browser->hists->stats.nr_lost_warned != - browser->hists->stats.nr_events[PERF_RECORD_LOST]) { + if (warn_lost_event && + (browser->hists->stats.nr_lost_warned != + browser->hists->stats.nr_events[PERF_RECORD_LOST])) { browser->hists->stats.nr_lost_warned = browser->hists->stats.nr_events[PERF_RECORD_LOST]; ui_browser__warn_lost_events(&browser->b); @@ -2763,7 +2765,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, bool left_exits, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_env *env) + struct perf_env *env, + bool warn_lost_event) { struct hists *hists = evsel__hists(evsel); struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env); @@ -2844,7 +2847,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, nr_options = 0; - key = hist_browser__run(browser, helpline); + key = hist_browser__run(browser, helpline, + warn_lost_event); if (browser->he_selection != NULL) { thread = hist_browser__selected_thread(browser); @@ -3184,7 +3188,8 @@ static void perf_evsel_menu__write(struct ui_browser *browser, static int perf_evsel_menu__run(struct perf_evsel_menu *menu, int nr_events, const char *help, - struct hist_browser_timer *hbt) + struct hist_browser_timer *hbt, + bool warn_lost_event) { struct perf_evlist *evlist = menu->b.priv; struct perf_evsel *pos; @@ -3203,7 +3208,9 @@ static int perf_evsel_menu__run(struct perf_evsel_menu *menu, case K_TIMER: hbt->timer(hbt->arg); - if (!menu->lost_events_warned && menu->lost_events) { + if (!menu->lost_events_warned && + menu->lost_events && + warn_lost_event) { ui_browser__warn_lost_events(&menu->b); menu->lost_events_warned = true; } @@ -3224,7 +3231,8 @@ browse_hists: key = perf_evsel__hists_browse(pos, nr_events, help, true, hbt, menu->min_pcnt, - menu->env); + menu->env, + warn_lost_event); ui_browser__show_title(&menu->b, title); switch (key) { case K_TAB: @@ -3282,7 +3290,8 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, int nr_entries, const char *help, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_env *env) + struct perf_env *env, + bool warn_lost_event) { struct perf_evsel *pos; struct perf_evsel_menu menu = { @@ -3309,13 +3318,15 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, menu.b.width = line_len; } - return perf_evsel_menu__run(&menu, nr_entries, help, hbt); + return perf_evsel_menu__run(&menu, nr_entries, help, + hbt, warn_lost_event); } int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_env *env) + struct perf_env *env, + bool warn_lost_event) { int nr_entries = evlist->nr_entries; @@ -3325,7 +3336,7 @@ single_entry: return perf_evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt, - env); + env, warn_lost_event); } if (symbol_conf.event_group) { @@ -3342,5 +3353,6 @@ single_entry: } return __perf_evlist__tui_browse_hists(evlist, nr_entries, help, - hbt, min_pcnt, env); + hbt, min_pcnt, env, + warn_lost_event); } diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h index ba431777f559..9428bee076f2 100644 --- a/tools/perf/ui/browsers/hists.h +++ b/tools/perf/ui/browsers/hists.h @@ -28,7 +28,8 @@ struct hist_browser { struct hist_browser *hist_browser__new(struct hists *hists); void hist_browser__delete(struct hist_browser *browser); -int hist_browser__run(struct hist_browser *browser, const char *help); +int hist_browser__run(struct hist_browser *browser, const char *help, + bool warn_lost_event); void hist_browser__init(struct hist_browser *browser, struct hists *hists); #endif /* _PERF_UI_BROWSER_HISTS_H_ */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ac35cd214feb..e5fc14e53c05 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -715,28 +715,11 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int return perf_mmap__read_forward(md); } -union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) -{ - struct perf_mmap *md = &evlist->mmap[idx]; - - /* - * No need to check messup for backward ring buffer: - * We can always read arbitrary long data from a backward - * ring buffer unless we forget to pause it before reading. - */ - return perf_mmap__read_backward(md); -} - union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) { return perf_evlist__mmap_read_forward(evlist, idx); } -void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) -{ - perf_mmap__read_catchup(&evlist->mmap[idx]); -} - void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) { perf_mmap__consume(&evlist->mmap[idx], false); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 75f8e0ad5d76..336b838e6957 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -133,10 +133,6 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx); -union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, - int idx); -void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx); - void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx); int perf_evlist__open(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 66fa45198a11..ef351688b797 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -41,17 +41,7 @@ #include "sane_ctype.h" -static struct { - bool sample_id_all; - bool exclude_guest; - bool mmap2; - bool cloexec; - bool clockid; - bool clockid_wrong; - bool lbr_flags; - bool write_backward; - bool group_read; -} perf_missing_features; +struct perf_missing_features perf_missing_features; static clockid_t clockid; @@ -745,12 +735,14 @@ static void apply_config_terms(struct perf_evsel *evsel, if (!(term->weak && opts->user_interval != ULLONG_MAX)) { attr->sample_period = term->val.period; attr->freq = 0; + perf_evsel__reset_sample_bit(evsel, PERIOD); } break; case PERF_EVSEL__CONFIG_TERM_FREQ: if (!(term->weak && opts->user_freq != UINT_MAX)) { attr->sample_freq = term->val.freq; attr->freq = 1; + perf_evsel__set_sample_bit(evsel, PERIOD); } break; case PERF_EVSEL__CONFIG_TERM_TIME: @@ -969,9 +961,6 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, if (target__has_cpu(&opts->target) || opts->sample_cpu) perf_evsel__set_sample_bit(evsel, CPU); - if (opts->period) - perf_evsel__set_sample_bit(evsel, PERIOD); - /* * When the user explicitly disabled time don't force it here. */ @@ -1073,6 +1062,14 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, apply_config_terms(evsel, opts, track); evsel->ignore_missing_thread = opts->ignore_missing_thread; + + /* The --period option takes the precedence. */ + if (opts->period_set) { + if (opts->period) + perf_evsel__set_sample_bit(evsel, PERIOD); + else + perf_evsel__reset_sample_bit(evsel, PERIOD); + } } static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 846e41644525..a7487c6d1866 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -149,6 +149,20 @@ union u64_swap { u32 val32[2]; }; +struct perf_missing_features { + bool sample_id_all; + bool exclude_guest; + bool mmap2; + bool cloexec; + bool clockid; + bool clockid_wrong; + bool lbr_flags; + bool write_backward; + bool group_read; +}; + +extern struct perf_missing_features perf_missing_features; + struct cpu_map; struct target; struct thread_map; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index f6630cb95eff..02721b579746 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -430,7 +430,8 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_env *env); + struct perf_env *env, + bool warn_lost_event); int script_browse(const char *script_opt); #else static inline @@ -438,7 +439,8 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, const char *help __maybe_unused, struct hist_browser_timer *hbt __maybe_unused, float min_pcnt __maybe_unused, - struct perf_env *env __maybe_unused) + struct perf_env *env __maybe_unused, + bool warn_lost_event __maybe_unused) { return 0; } diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 05076e683938..91531a7c8fbf 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -22,29 +22,27 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map) /* When check_messup is true, 'end' must points to a good entry */ static union perf_event *perf_mmap__read(struct perf_mmap *map, - u64 start, u64 end, u64 *prev) + u64 *startp, u64 end) { unsigned char *data = map->base + page_size; union perf_event *event = NULL; - int diff = end - start; + int diff = end - *startp; if (diff >= (int)sizeof(event->header)) { size_t size; - event = (union perf_event *)&data[start & map->mask]; + event = (union perf_event *)&data[*startp & map->mask]; size = event->header.size; - if (size < sizeof(event->header) || diff < (int)size) { - event = NULL; - goto broken_event; - } + if (size < sizeof(event->header) || diff < (int)size) + return NULL; /* * Event straddles the mmap boundary -- header should always * be inside due to u64 alignment of output. */ - if ((start & map->mask) + size != ((start + size) & map->mask)) { - unsigned int offset = start; + if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) { + unsigned int offset = *startp; unsigned int len = min(sizeof(*event), size), cpy; void *dst = map->event_copy; @@ -59,20 +57,19 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map, event = (union perf_event *)map->event_copy; } - start += size; + *startp += size; } -broken_event: - if (prev) - *prev = start; - return event; } +/* + * legacy interface for mmap read. + * Don't use it. Use perf_mmap__read_event(). + */ union perf_event *perf_mmap__read_forward(struct perf_mmap *map) { u64 head; - u64 old = map->prev; /* * Check if event was unmapped due to a POLLHUP/POLLERR. @@ -82,13 +79,26 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map) head = perf_mmap__read_head(map); - return perf_mmap__read(map, old, head, &map->prev); + return perf_mmap__read(map, &map->prev, head); } -union perf_event *perf_mmap__read_backward(struct perf_mmap *map) +/* + * Read event from ring buffer one by one. + * Return one event for each call. + * + * Usage: + * perf_mmap__read_init() + * while(event = perf_mmap__read_event()) { + * //process the event + * perf_mmap__consume() + * } + * perf_mmap__read_done() + */ +union perf_event *perf_mmap__read_event(struct perf_mmap *map, + bool overwrite, + u64 *startp, u64 end) { - u64 head, end; - u64 start = map->prev; + union perf_event *event; /* * Check if event was unmapped due to a POLLHUP/POLLERR. @@ -96,40 +106,19 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map) if (!refcount_read(&map->refcnt)) return NULL; - head = perf_mmap__read_head(map); - if (!head) + if (startp == NULL) return NULL; - /* - * 'head' pointer starts from 0. Kernel minus sizeof(record) form - * it each time when kernel writes to it, so in fact 'head' is - * negative. 'end' pointer is made manually by adding the size of - * the ring buffer to 'head' pointer, means the validate data can - * read is the whole ring buffer. If 'end' is positive, the ring - * buffer has not fully filled, so we must adjust 'end' to 0. - * - * However, since both 'head' and 'end' is unsigned, we can't - * simply compare 'end' against 0. Here we compare '-head' and - * the size of the ring buffer, where -head is the number of bytes - * kernel write to the ring buffer. - */ - if (-head < (u64)(map->mask + 1)) - end = 0; - else - end = head + map->mask + 1; - - return perf_mmap__read(map, start, end, &map->prev); -} + /* non-overwirte doesn't pause the ringbuffer */ + if (!overwrite) + end = perf_mmap__read_head(map); -void perf_mmap__read_catchup(struct perf_mmap *map) -{ - u64 head; + event = perf_mmap__read(map, startp, end); - if (!refcount_read(&map->refcnt)) - return; + if (!overwrite) + map->prev = *startp; - head = perf_mmap__read_head(map); - map->prev = head; + return event; } static bool perf_mmap__empty(struct perf_mmap *map) @@ -267,41 +256,60 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u6 return -1; } -int perf_mmap__push(struct perf_mmap *md, bool overwrite, - void *to, int push(void *to, void *buf, size_t size)) +/* + * Report the start and end of the available data in ringbuffer + */ +int perf_mmap__read_init(struct perf_mmap *md, bool overwrite, + u64 *startp, u64 *endp) { u64 head = perf_mmap__read_head(md); u64 old = md->prev; - u64 end = head, start = old; unsigned char *data = md->base + page_size; unsigned long size; - void *buf; - int rc = 0; - start = overwrite ? head : old; - end = overwrite ? old : head; + *startp = overwrite ? head : old; + *endp = overwrite ? old : head; - if (start == end) - return 0; + if (*startp == *endp) + return -EAGAIN; - size = end - start; + size = *endp - *startp; if (size > (unsigned long)(md->mask) + 1) { if (!overwrite) { WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); md->prev = head; perf_mmap__consume(md, overwrite); - return 0; + return -EAGAIN; } /* * Backward ring buffer is full. We still have a chance to read * most of data from it. */ - if (overwrite_rb_find_range(data, md->mask, head, &start, &end)) - return -1; + if (overwrite_rb_find_range(data, md->mask, head, startp, endp)) + return -EINVAL; } + return 0; +} + +int perf_mmap__push(struct perf_mmap *md, bool overwrite, + void *to, int push(void *to, void *buf, size_t size)) +{ + u64 head = perf_mmap__read_head(md); + u64 end, start; + unsigned char *data = md->base + page_size; + unsigned long size; + void *buf; + int rc = 0; + + rc = perf_mmap__read_init(md, overwrite, &start, &end); + if (rc < 0) + return (rc == -EAGAIN) ? 0 : -1; + + size = end - start; + if ((start & md->mask) + size != (end & md->mask)) { buf = &data[start & md->mask]; size = md->mask + 1 - (start & md->mask); @@ -327,3 +335,14 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, out: return rc; } + +/* + * Mandatory for overwrite mode + * The direction of overwrite mode is backward. + * The last perf_mmap__read() will set tail to map->prev. + * Need to correct the map->prev to head which is the end of next read. + */ +void perf_mmap__read_done(struct perf_mmap *map) +{ + map->prev = perf_mmap__read_head(map); +} diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index e43d7b55a55f..ec7d3a24e276 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -65,8 +65,6 @@ void perf_mmap__put(struct perf_mmap *map); void perf_mmap__consume(struct perf_mmap *map, bool overwrite); -void perf_mmap__read_catchup(struct perf_mmap *md); - static inline u64 perf_mmap__read_head(struct perf_mmap *mm) { struct perf_event_mmap_page *pc = mm->base; @@ -87,11 +85,17 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) } union perf_event *perf_mmap__read_forward(struct perf_mmap *map); -union perf_event *perf_mmap__read_backward(struct perf_mmap *map); + +union perf_event *perf_mmap__read_event(struct perf_mmap *map, + bool overwrite, + u64 *startp, u64 end); int perf_mmap__push(struct perf_mmap *md, bool backward, void *to, int push(void *to, void *buf, size_t size)); size_t perf_mmap__mmap_len(struct perf_mmap *map); +int perf_mmap__read_init(struct perf_mmap *md, bool overwrite, + u64 *startp, u64 *endp); +void perf_mmap__read_done(struct perf_mmap *map); #endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 443892dabedb..1019bbc5dbd8 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -340,35 +340,15 @@ size_t hex_width(u64 v) return n; } -static int hex(char ch) -{ - if ((ch >= '0') && (ch <= '9')) - return ch - '0'; - if ((ch >= 'a') && (ch <= 'f')) - return ch - 'a' + 10; - if ((ch >= 'A') && (ch <= 'F')) - return ch - 'A' + 10; - return -1; -} - /* * While we find nice hex chars, build a long_val. * Return number of chars processed. */ int hex2u64(const char *ptr, u64 *long_val) { - const char *p = ptr; - *long_val = 0; - - while (*p) { - const int hex_val = hex(*p); + char *p; - if (hex_val < 0) - break; - - *long_val = (*long_val << 4) | hex_val; - p++; - } + *long_val = strtoull(ptr, &p, 16); return p - ptr; } diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c index 5b38dc2fec4f..dfa6fee1b915 100644 --- a/tools/power/acpi/common/cmfsize.c +++ b/tools/power/acpi/common/cmfsize.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c index 6e78413bb2cb..f7032c9ab35e 100644 --- a/tools/power/acpi/common/getopt.c +++ b/tools/power/acpi/common/getopt.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index 52a39ecf5ca1..e7347edfd4f9 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c index ea14eaeb268f..9b5e61b636d9 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixdir.c +++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index cf9b5a54df92..8b26924e2602 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c index 025c1b07049d..34c044da433c 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixxf.c +++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h index d6aa40fce2b1..3c679607d1c3 100644 --- a/tools/power/acpi/tools/acpidump/acpidump.h +++ b/tools/power/acpi/tools/acpidump/acpidump.h @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index 0634449156d8..9ad1712e4cf9 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index d686e11936c4..856e1b83407b 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index be418fba9441..f4ef826800cf 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2017, Intel Corp. + * Copyright (C) 2000 - 2018, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 3fab179b1aba..fcb3ed0be5f8 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -99,5 +99,6 @@ ifneq ($(silent),1) QUIET_CLEAN = @printf ' CLEAN %s\n' $1; QUIET_INSTALL = @printf ' INSTALL %s\n' $1; + QUIET_UNINST = @printf ' UNINST %s\n' $1; endif endif diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index db33b28c5ef3..0392153a0009 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -37,10 +37,12 @@ obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o nfit-y := $(ACPI_SRC)/core.o nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o +nfit-y += acpi_nfit_test.o nfit-y += config_check.o nd_pmem-y := $(NVDIMM_SRC)/pmem.o nd_pmem-y += pmem-dax.o +nd_pmem-y += pmem_test.o nd_pmem-y += config_check.o nd_btt-y := $(NVDIMM_SRC)/btt.o @@ -57,6 +59,7 @@ dax-y += config_check.o device_dax-y := $(DAX_SRC)/device.o device_dax-y += dax-dev.o +device_dax-y += device_dax_test.o device_dax-y += config_check.o dax_pmem-y := $(DAX_SRC)/pmem.o @@ -75,6 +78,7 @@ libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o +libnvdimm-y += libnvdimm_test.o libnvdimm-y += config_check.o obj-m += test/ diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c new file mode 100644 index 000000000000..43521512e577 --- /dev/null +++ b/tools/testing/nvdimm/acpi_nfit_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include <linux/module.h> +#include <linux/printk.h> +#include "watermark.h" + +nfit_test_watermark(acpi_nfit); diff --git a/tools/testing/nvdimm/device_dax_test.c b/tools/testing/nvdimm/device_dax_test.c new file mode 100644 index 000000000000..24b17bf42429 --- /dev/null +++ b/tools/testing/nvdimm/device_dax_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include <linux/module.h> +#include <linux/printk.h> +#include "watermark.h" + +nfit_test_watermark(device_dax); diff --git a/tools/testing/nvdimm/libnvdimm_test.c b/tools/testing/nvdimm/libnvdimm_test.c new file mode 100644 index 000000000000..00ca30b23932 --- /dev/null +++ b/tools/testing/nvdimm/libnvdimm_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include <linux/module.h> +#include <linux/printk.h> +#include "watermark.h" + +nfit_test_watermark(libnvdimm); diff --git a/tools/testing/nvdimm/pmem_test.c b/tools/testing/nvdimm/pmem_test.c new file mode 100644 index 000000000000..fd38f92275cf --- /dev/null +++ b/tools/testing/nvdimm/pmem_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include <linux/module.h> +#include <linux/printk.h> +#include "watermark.h" + +nfit_test_watermark(pmem); diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index e1f75a1914a1..ff9d3a5825e1 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -104,15 +104,14 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset, } EXPORT_SYMBOL(__wrap_devm_memremap); -void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, - struct percpu_ref *ref, struct vmem_altmap *altmap) +void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { - resource_size_t offset = res->start; + resource_size_t offset = pgmap->res.start; struct nfit_test_resource *nfit_res = get_nfit_res(offset); if (nfit_res) return nfit_res->buf + offset - nfit_res->res.start; - return devm_memremap_pages(dev, res, ref, altmap); + return devm_memremap_pages(dev, pgmap); } EXPORT_SYMBOL(__wrap_devm_memremap_pages); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 7217b2b953b5..620fa78b3b1b 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -27,6 +27,7 @@ #include <nfit.h> #include <nd.h> #include "nfit_test.h" +#include "../watermark.h" /* * Generate an NFIT table to describe the following topology: @@ -137,6 +138,14 @@ static u32 handle[] = { static unsigned long dimm_fail_cmd_flags[NUM_DCR]; +struct nfit_test_fw { + enum intel_fw_update_state state; + u32 context; + u64 version; + u32 size_received; + u64 end_time; +}; + struct nfit_test { struct acpi_nfit_desc acpi_desc; struct platform_device pdev; @@ -168,8 +177,11 @@ struct nfit_test { spinlock_t lock; } ars_state; struct device *dimm_dev[NUM_DCR]; + struct nd_intel_smart *smart; + struct nd_intel_smart_threshold *smart_threshold; struct badrange badrange; struct work_struct work; + struct nfit_test_fw *fw; }; static struct workqueue_struct *nfit_wq; @@ -181,6 +193,226 @@ static struct nfit_test *to_nfit_test(struct device *dev) return container_of(pdev, struct nfit_test, pdev); } +static int nd_intel_test_get_fw_info(struct nfit_test *t, + struct nd_intel_fw_info *nd_cmd, unsigned int buf_len, + int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p, buf_len: %u, idx: %d\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + nd_cmd->status = 0; + nd_cmd->storage_size = INTEL_FW_STORAGE_SIZE; + nd_cmd->max_send_len = INTEL_FW_MAX_SEND_LEN; + nd_cmd->query_interval = INTEL_FW_QUERY_INTERVAL; + nd_cmd->max_query_time = INTEL_FW_QUERY_MAX_TIME; + nd_cmd->update_cap = 0; + nd_cmd->fis_version = INTEL_FW_FIS_VERSION; + nd_cmd->run_version = 0; + nd_cmd->updated_version = fw->version; + + return 0; +} + +static int nd_intel_test_start_update(struct nfit_test *t, + struct nd_intel_fw_start *nd_cmd, unsigned int buf_len, + int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + if (fw->state != FW_STATE_NEW) { + /* extended status, FW update in progress */ + nd_cmd->status = 0x10007; + return 0; + } + + fw->state = FW_STATE_IN_PROGRESS; + fw->context++; + fw->size_received = 0; + nd_cmd->status = 0; + nd_cmd->context = fw->context; + + dev_dbg(dev, "%s: context issued: %#x\n", __func__, nd_cmd->context); + + return 0; +} + +static int nd_intel_test_send_data(struct nfit_test *t, + struct nd_intel_fw_send_data *nd_cmd, unsigned int buf_len, + int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + u32 *status = (u32 *)&nd_cmd->data[nd_cmd->length]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + + dev_dbg(dev, "%s: cmd->status: %#x\n", __func__, *status); + dev_dbg(dev, "%s: cmd->data[0]: %#x\n", __func__, nd_cmd->data[0]); + dev_dbg(dev, "%s: cmd->data[%u]: %#x\n", __func__, nd_cmd->length-1, + nd_cmd->data[nd_cmd->length-1]); + + if (fw->state != FW_STATE_IN_PROGRESS) { + dev_dbg(dev, "%s: not in IN_PROGRESS state\n", __func__); + *status = 0x5; + return 0; + } + + if (nd_cmd->context != fw->context) { + dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n", + __func__, nd_cmd->context, fw->context); + *status = 0x10007; + return 0; + } + + /* + * check offset + len > size of fw storage + * check length is > max send length + */ + if (nd_cmd->offset + nd_cmd->length > INTEL_FW_STORAGE_SIZE || + nd_cmd->length > INTEL_FW_MAX_SEND_LEN) { + *status = 0x3; + dev_dbg(dev, "%s: buffer boundary violation\n", __func__); + return 0; + } + + fw->size_received += nd_cmd->length; + dev_dbg(dev, "%s: copying %u bytes, %u bytes so far\n", + __func__, nd_cmd->length, fw->size_received); + *status = 0; + return 0; +} + +static int nd_intel_test_finish_fw(struct nfit_test *t, + struct nd_intel_fw_finish_update *nd_cmd, + unsigned int buf_len, int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (fw->state == FW_STATE_UPDATED) { + /* update already done, need cold boot */ + nd_cmd->status = 0x20007; + return 0; + } + + dev_dbg(dev, "%s: context: %#x ctrl_flags: %#x\n", + __func__, nd_cmd->context, nd_cmd->ctrl_flags); + + switch (nd_cmd->ctrl_flags) { + case 0: /* finish */ + if (nd_cmd->context != fw->context) { + dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n", + __func__, nd_cmd->context, + fw->context); + nd_cmd->status = 0x10007; + return 0; + } + nd_cmd->status = 0; + fw->state = FW_STATE_VERIFY; + /* set 1 second of time for firmware "update" */ + fw->end_time = jiffies + HZ; + break; + + case 1: /* abort */ + fw->size_received = 0; + /* successfully aborted status */ + nd_cmd->status = 0x40007; + fw->state = FW_STATE_NEW; + dev_dbg(dev, "%s: abort successful\n", __func__); + break; + + default: /* bad control flag */ + dev_warn(dev, "%s: unknown control flag: %#x\n", + __func__, nd_cmd->ctrl_flags); + return -EINVAL; + } + + return 0; +} + +static int nd_intel_test_finish_query(struct nfit_test *t, + struct nd_intel_fw_finish_query *nd_cmd, + unsigned int buf_len, int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + if (nd_cmd->context != fw->context) { + dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n", + __func__, nd_cmd->context, fw->context); + nd_cmd->status = 0x10007; + return 0; + } + + dev_dbg(dev, "%s context: %#x\n", __func__, nd_cmd->context); + + switch (fw->state) { + case FW_STATE_NEW: + nd_cmd->updated_fw_rev = 0; + nd_cmd->status = 0; + dev_dbg(dev, "%s: new state\n", __func__); + break; + + case FW_STATE_IN_PROGRESS: + /* sequencing error */ + nd_cmd->status = 0x40007; + nd_cmd->updated_fw_rev = 0; + dev_dbg(dev, "%s: sequence error\n", __func__); + break; + + case FW_STATE_VERIFY: + if (time_is_after_jiffies64(fw->end_time)) { + nd_cmd->updated_fw_rev = 0; + nd_cmd->status = 0x20007; + dev_dbg(dev, "%s: still verifying\n", __func__); + break; + } + + dev_dbg(dev, "%s: transition out verify\n", __func__); + fw->state = FW_STATE_UPDATED; + /* we are going to fall through if it's "done" */ + case FW_STATE_UPDATED: + nd_cmd->status = 0; + /* bogus test version */ + fw->version = nd_cmd->updated_fw_rev = + INTEL_FW_FAKE_VERSION; + dev_dbg(dev, "%s: updated\n", __func__); + break; + + default: /* we should never get here */ + return -EINVAL; + } + + return 0; +} + static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd, unsigned int buf_len) { @@ -440,39 +672,66 @@ static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus, return 0; } -static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len) +static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len, + struct nd_intel_smart *smart_data) { - static const struct nd_smart_payload smart_data = { - .flags = ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID - | ND_SMART_SPARES_VALID | ND_SMART_ALARM_VALID - | ND_SMART_USED_VALID | ND_SMART_SHUTDOWN_VALID, - .health = ND_SMART_NON_CRITICAL_HEALTH, - .temperature = 23 * 16, - .spares = 75, - .alarm_flags = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP, - .life_used = 5, - .shutdown_state = 0, - .vendor_size = 0, - }; - if (buf_len < sizeof(*smart)) return -EINVAL; - memcpy(smart->data, &smart_data, sizeof(smart_data)); + memcpy(smart, smart_data, sizeof(*smart)); return 0; } -static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t, - unsigned int buf_len) +static int nfit_test_cmd_smart_threshold( + struct nd_intel_smart_threshold *out, + unsigned int buf_len, + struct nd_intel_smart_threshold *smart_t) { - static const struct nd_smart_threshold_payload smart_t_data = { - .alarm_control = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP, - .temperature = 40 * 16, - .spares = 5, - }; - if (buf_len < sizeof(*smart_t)) return -EINVAL; - memcpy(smart_t->data, &smart_t_data, sizeof(smart_t_data)); + memcpy(out, smart_t, sizeof(*smart_t)); + return 0; +} + +static void smart_notify(struct device *bus_dev, + struct device *dimm_dev, struct nd_intel_smart *smart, + struct nd_intel_smart_threshold *thresh) +{ + dev_dbg(dimm_dev, "%s: alarm: %#x spares: %d (%d) mtemp: %d (%d) ctemp: %d (%d)\n", + __func__, thresh->alarm_control, thresh->spares, + smart->spares, thresh->media_temperature, + smart->media_temperature, thresh->ctrl_temperature, + smart->ctrl_temperature); + if (((thresh->alarm_control & ND_INTEL_SMART_SPARE_TRIP) + && smart->spares + <= thresh->spares) + || ((thresh->alarm_control & ND_INTEL_SMART_TEMP_TRIP) + && smart->media_temperature + >= thresh->media_temperature) + || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP) + && smart->ctrl_temperature + >= thresh->ctrl_temperature)) { + device_lock(bus_dev); + __acpi_nvdimm_notify(dimm_dev, 0x81); + device_unlock(bus_dev); + } +} + +static int nfit_test_cmd_smart_set_threshold( + struct nd_intel_smart_set_threshold *in, + unsigned int buf_len, + struct nd_intel_smart_threshold *thresh, + struct nd_intel_smart *smart, + struct device *bus_dev, struct device *dimm_dev) +{ + unsigned int size; + + size = sizeof(*in) - 4; + if (buf_len < size) + return -EINVAL; + memcpy(thresh->data, in, size); + in->status = 0; + smart_notify(bus_dev, dimm_dev, smart, thresh); + return 0; } @@ -563,6 +822,52 @@ static int nfit_test_cmd_ars_inject_status(struct nfit_test *t, return 0; } +static int nd_intel_test_cmd_set_lss_status(struct nfit_test *t, + struct nd_intel_lss *nd_cmd, unsigned int buf_len) +{ + struct device *dev = &t->pdev.dev; + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + switch (nd_cmd->enable) { + case 0: + nd_cmd->status = 0; + dev_dbg(dev, "%s: Latch System Shutdown Status disabled\n", + __func__); + break; + case 1: + nd_cmd->status = 0; + dev_dbg(dev, "%s: Latch System Shutdown Status enabled\n", + __func__); + break; + default: + dev_warn(dev, "Unknown enable value: %#x\n", nd_cmd->enable); + nd_cmd->status = 0x3; + break; + } + + + return 0; +} + +static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) +{ + int i; + + /* lookup per-dimm data */ + for (i = 0; i < ARRAY_SIZE(handle); i++) + if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i]) + break; + if (i >= ARRAY_SIZE(handle)) + return -ENXIO; + + if ((1 << func) & dimm_fail_cmd_flags[i]) + return -EIO; + + return i; +} + static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) @@ -591,22 +896,57 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, func = call_pkg->nd_command; if (call_pkg->nd_family != nfit_mem->family) return -ENOTTY; + + i = get_dimm(nfit_mem, func); + if (i < 0) + return i; + + switch (func) { + case ND_INTEL_ENABLE_LSS_STATUS: + return nd_intel_test_cmd_set_lss_status(t, + buf, buf_len); + case ND_INTEL_FW_GET_INFO: + return nd_intel_test_get_fw_info(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_START_UPDATE: + return nd_intel_test_start_update(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_SEND_DATA: + return nd_intel_test_send_data(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_FINISH_UPDATE: + return nd_intel_test_finish_fw(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_FINISH_QUERY: + return nd_intel_test_finish_query(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_SMART: + return nfit_test_cmd_smart(buf, buf_len, + &t->smart[i - t->dcr_idx]); + case ND_INTEL_SMART_THRESHOLD: + return nfit_test_cmd_smart_threshold(buf, + buf_len, + &t->smart_threshold[i - + t->dcr_idx]); + case ND_INTEL_SMART_SET_THRESHOLD: + return nfit_test_cmd_smart_set_threshold(buf, + buf_len, + &t->smart_threshold[i - + t->dcr_idx], + &t->smart[i - t->dcr_idx], + &t->pdev.dev, t->dimm_dev[i]); + default: + return -ENOTTY; + } } if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &nfit_mem->dsm_mask)) return -ENOTTY; - /* lookup label space for the given dimm */ - for (i = 0; i < ARRAY_SIZE(handle); i++) - if (__to_nfit_memdev(nfit_mem)->device_handle == - handle[i]) - break; - if (i >= ARRAY_SIZE(handle)) - return -ENXIO; - - if ((1 << func) & dimm_fail_cmd_flags[i]) - return -EIO; + i = get_dimm(nfit_mem, func); + if (i < 0) + return i; switch (func) { case ND_CMD_GET_CONFIG_SIZE: @@ -620,15 +960,6 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, rc = nfit_test_cmd_set_config_data(buf, buf_len, t->label[i - t->dcr_idx]); break; - case ND_CMD_SMART: - rc = nfit_test_cmd_smart(buf, buf_len); - break; - case ND_CMD_SMART_THRESHOLD: - rc = nfit_test_cmd_smart_threshold(buf, buf_len); - device_lock(&t->pdev.dev); - __acpi_nvdimm_notify(t->dimm_dev[i], 0x81); - device_unlock(&t->pdev.dev); - break; default: return -ENOTTY; } @@ -872,6 +1203,44 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = { NULL, }; +static void smart_init(struct nfit_test *t) +{ + int i; + const struct nd_intel_smart_threshold smart_t_data = { + .alarm_control = ND_INTEL_SMART_SPARE_TRIP + | ND_INTEL_SMART_TEMP_TRIP, + .media_temperature = 40 * 16, + .ctrl_temperature = 30 * 16, + .spares = 5, + }; + const struct nd_intel_smart smart_data = { + .flags = ND_INTEL_SMART_HEALTH_VALID + | ND_INTEL_SMART_SPARES_VALID + | ND_INTEL_SMART_ALARM_VALID + | ND_INTEL_SMART_USED_VALID + | ND_INTEL_SMART_SHUTDOWN_VALID + | ND_INTEL_SMART_MTEMP_VALID, + .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH, + .media_temperature = 23 * 16, + .ctrl_temperature = 30 * 16, + .pmic_temperature = 40 * 16, + .spares = 75, + .alarm_flags = ND_INTEL_SMART_SPARE_TRIP + | ND_INTEL_SMART_TEMP_TRIP, + .ait_status = 1, + .life_used = 5, + .shutdown_state = 0, + .vendor_size = 0, + .shutdown_count = 100, + }; + + for (i = 0; i < t->num_dcr; i++) { + memcpy(&t->smart[i], &smart_data, sizeof(smart_data)); + memcpy(&t->smart_threshold[i], &smart_t_data, + sizeof(smart_t_data)); + } +} + static int nfit_test0_alloc(struct nfit_test *t) { size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA @@ -881,7 +1250,8 @@ static int nfit_test0_alloc(struct nfit_test *t) window_size) * NUM_DCR + sizeof(struct acpi_nfit_data_region) * NUM_BDW + (sizeof(struct acpi_nfit_flush_address) - + sizeof(u64) * NUM_HINTS) * NUM_DCR; + + sizeof(u64) * NUM_HINTS) * NUM_DCR + + sizeof(struct acpi_nfit_capabilities); int i; t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); @@ -939,6 +1309,7 @@ static int nfit_test0_alloc(struct nfit_test *t) return -ENOMEM; } + smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -969,6 +1340,7 @@ static int nfit_test1_alloc(struct nfit_test *t) if (!t->spa_set[1]) return -ENOMEM; + smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -993,6 +1365,7 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_control_region *dcr; struct acpi_nfit_data_region *bdw; struct acpi_nfit_flush_address *flush; + struct acpi_nfit_capabilities *pcap; unsigned int offset, i; /* @@ -1500,8 +1873,16 @@ static void nfit_test0_setup(struct nfit_test *t) for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64); + /* platform capabilities */ + pcap = nfit_buf + offset + flush_hint_size * 4; + pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES; + pcap->header.length = sizeof(*pcap); + pcap->highest_capability = 1; + pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH | + ACPI_NFIT_CAPABILITY_MEM_FLUSH; + if (t->setup_hotplug) { - offset = offset + flush_hint_size * 4; + offset = offset + flush_hint_size * 4 + sizeof(*pcap); /* dcr-descriptor4: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; @@ -1642,17 +2023,24 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); - set_bit(ND_CMD_SMART, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en); - set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en); + set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); } static void nfit_test1_setup(struct nfit_test *t) @@ -1750,6 +2138,7 @@ static void nfit_test1_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); + set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, @@ -2054,10 +2443,18 @@ static int nfit_test_probe(struct platform_device *pdev) sizeof(struct nfit_test_dcr *), GFP_KERNEL); nfit_test->dcr_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), GFP_KERNEL); + nfit_test->smart = devm_kcalloc(dev, num, + sizeof(struct nd_intel_smart), GFP_KERNEL); + nfit_test->smart_threshold = devm_kcalloc(dev, num, + sizeof(struct nd_intel_smart_threshold), + GFP_KERNEL); + nfit_test->fw = devm_kcalloc(dev, num, + sizeof(struct nfit_test_fw), GFP_KERNEL); if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label && nfit_test->label_dma && nfit_test->dcr && nfit_test->dcr_dma && nfit_test->flush - && nfit_test->flush_dma) + && nfit_test->flush_dma + && nfit_test->fw) /* pass */; else return -ENOMEM; @@ -2159,6 +2556,11 @@ static __init int nfit_test_init(void) { int rc, i; + pmem_test(); + libnvdimm_test(); + acpi_nfit_test(); + device_dax_test(); + nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); nfit_wq = create_singlethread_workqueue("nfit"); diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index 113b44675a71..428344519cdf 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -84,6 +84,140 @@ struct nd_cmd_ars_err_inj_stat { } __packed record[0]; } __packed; +#define ND_INTEL_SMART 1 +#define ND_INTEL_SMART_THRESHOLD 2 +#define ND_INTEL_ENABLE_LSS_STATUS 10 +#define ND_INTEL_FW_GET_INFO 12 +#define ND_INTEL_FW_START_UPDATE 13 +#define ND_INTEL_FW_SEND_DATA 14 +#define ND_INTEL_FW_FINISH_UPDATE 15 +#define ND_INTEL_FW_FINISH_QUERY 16 +#define ND_INTEL_SMART_SET_THRESHOLD 17 + +#define ND_INTEL_SMART_HEALTH_VALID (1 << 0) +#define ND_INTEL_SMART_SPARES_VALID (1 << 1) +#define ND_INTEL_SMART_USED_VALID (1 << 2) +#define ND_INTEL_SMART_MTEMP_VALID (1 << 3) +#define ND_INTEL_SMART_CTEMP_VALID (1 << 4) +#define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID (1 << 5) +#define ND_INTEL_SMART_AIT_STATUS_VALID (1 << 6) +#define ND_INTEL_SMART_PTEMP_VALID (1 << 7) +#define ND_INTEL_SMART_ALARM_VALID (1 << 9) +#define ND_INTEL_SMART_SHUTDOWN_VALID (1 << 10) +#define ND_INTEL_SMART_VENDOR_VALID (1 << 11) +#define ND_INTEL_SMART_SPARE_TRIP (1 << 0) +#define ND_INTEL_SMART_TEMP_TRIP (1 << 1) +#define ND_INTEL_SMART_CTEMP_TRIP (1 << 2) +#define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0) +#define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1) +#define ND_INTEL_SMART_FATAL_HEALTH (1 << 2) + +struct nd_intel_smart { + __u32 status; + union { + struct { + __u32 flags; + __u8 reserved0[4]; + __u8 health; + __u8 spares; + __u8 life_used; + __u8 alarm_flags; + __u16 media_temperature; + __u16 ctrl_temperature; + __u32 shutdown_count; + __u8 ait_status; + __u16 pmic_temperature; + __u8 reserved1[8]; + __u8 shutdown_state; + __u32 vendor_size; + __u8 vendor_data[92]; + } __packed; + __u8 data[128]; + }; +} __packed; + +struct nd_intel_smart_threshold { + __u32 status; + union { + struct { + __u16 alarm_control; + __u8 spares; + __u16 media_temperature; + __u16 ctrl_temperature; + __u8 reserved[1]; + } __packed; + __u8 data[8]; + }; +} __packed; + +struct nd_intel_smart_set_threshold { + __u16 alarm_control; + __u8 spares; + __u16 media_temperature; + __u16 ctrl_temperature; + __u32 status; +} __packed; + +#define INTEL_FW_STORAGE_SIZE 0x100000 +#define INTEL_FW_MAX_SEND_LEN 0xFFEC +#define INTEL_FW_QUERY_INTERVAL 250000 +#define INTEL_FW_QUERY_MAX_TIME 3000000 +#define INTEL_FW_FIS_VERSION 0x0105 +#define INTEL_FW_FAKE_VERSION 0xffffffffabcd + +enum intel_fw_update_state { + FW_STATE_NEW = 0, + FW_STATE_IN_PROGRESS, + FW_STATE_VERIFY, + FW_STATE_UPDATED, +}; + +struct nd_intel_fw_info { + __u32 status; + __u32 storage_size; + __u32 max_send_len; + __u32 query_interval; + __u32 max_query_time; + __u8 update_cap; + __u8 reserved[3]; + __u32 fis_version; + __u64 run_version; + __u64 updated_version; +} __packed; + +struct nd_intel_fw_start { + __u32 status; + __u32 context; +} __packed; + +/* this one has the output first because the variable input data size */ +struct nd_intel_fw_send_data { + __u32 context; + __u32 offset; + __u32 length; + __u8 data[0]; +/* this field is not declared due ot variable data from input */ +/* __u32 status; */ +} __packed; + +struct nd_intel_fw_finish_update { + __u8 ctrl_flags; + __u8 reserved[3]; + __u32 context; + __u32 status; +} __packed; + +struct nd_intel_fw_finish_query { + __u32 context; + __u32 status; + __u64 updated_fw_rev; +} __packed; + +struct nd_intel_lss { + __u8 enable; + __u32 status; +} __packed; + union acpi_object; typedef void *acpi_handle; diff --git a/tools/testing/nvdimm/watermark.h b/tools/testing/nvdimm/watermark.h new file mode 100644 index 000000000000..ed0528757bd4 --- /dev/null +++ b/tools/testing/nvdimm/watermark.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. +#ifndef _TEST_NVDIMM_WATERMARK_H_ +#define _TEST_NVDIMM_WATERMARK_H_ +int pmem_test(void); +int libnvdimm_test(void); +int acpi_nfit_test(void); +int device_dax_test(void); + +/* + * dummy routine for nfit_test to validate it is linking to the properly + * mocked module and not the standard one from the base tree. + */ +#define nfit_test_watermark(x) \ +int x##_test(void) \ +{ \ + pr_debug("%s for nfit_test\n", KBUILD_MODNAME); \ + return 0; \ +} \ +EXPORT_SYMBOL(x##_test) +#endif /* _TEST_NVDIMM_WATERMARK_H_ */ diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 30cd0b296f1a..44ef9eba5a7a 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -153,11 +153,12 @@ void idr_nowait_test(void) idr_destroy(&idr); } -void idr_get_next_test(void) +void idr_get_next_test(int base) { unsigned long i; int nextid; DEFINE_IDR(idr); + idr_init_base(&idr, base); int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0}; @@ -207,6 +208,7 @@ void idr_checks(void) assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i); } assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i, GFP_KERNEL) == -ENOSPC); + assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i + 10, GFP_KERNEL) == -ENOSPC); idr_for_each(&idr, item_idr_free, &idr); idr_destroy(&idr); @@ -214,6 +216,23 @@ void idr_checks(void) assert(idr_is_empty(&idr)); + idr_set_cursor(&idr, INT_MAX - 3UL); + for (i = INT_MAX - 3UL; i < INT_MAX + 3UL; i++) { + struct item *item; + unsigned int id; + if (i <= INT_MAX) + item = item_create(i, 0); + else + item = item_create(i - INT_MAX - 1, 0); + + id = idr_alloc_cyclic(&idr, item, 0, 0, GFP_KERNEL); + assert(id == item->index); + } + + idr_for_each(&idr, item_idr_free, &idr); + idr_destroy(&idr); + assert(idr_is_empty(&idr)); + for (i = 1; i < 10000; i++) { struct item *item = item_create(i, 0); assert(idr_alloc(&idr, item, 1, 20000, GFP_KERNEL) == i); @@ -226,7 +245,9 @@ void idr_checks(void) idr_alloc_test(); idr_null_test(); idr_nowait_test(); - idr_get_next_test(); + idr_get_next_test(0); + idr_get_next_test(1); + idr_get_next_test(4); } /* @@ -380,7 +401,7 @@ void ida_check_random(void) do { ida_pre_get(&ida, GFP_KERNEL); err = ida_get_new_above(&ida, bit, &id); - } while (err == -ENOMEM); + } while (err == -EAGAIN); assert(!err); assert(id == bit); } @@ -489,7 +510,7 @@ static void *ida_random_fn(void *arg) void ida_thread_tests(void) { - pthread_t threads[10]; + pthread_t threads[20]; int i; for (i = 0; i < ARRAY_SIZE(threads); i++) diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index c3bc3f364f68..426f32f28547 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -17,6 +17,4 @@ #define pr_debug printk #define pr_cont printk -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - #endif /* _KERNEL_H */ diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index eaf599dc2137..7442dfb73b7f 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -116,8 +116,15 @@ ifdef INSTALL_PATH @# Ask all targets to emit their test scripts echo "#!/bin/sh" > $(ALL_SCRIPT) - echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT) + echo "BASE_DIR=\$$(realpath \$$(dirname \$$0))" >> $(ALL_SCRIPT) + echo "cd \$$BASE_DIR" >> $(ALL_SCRIPT) echo "ROOT=\$$PWD" >> $(ALL_SCRIPT) + echo "if [ \"\$$1\" = \"--summary\" ]; then" >> $(ALL_SCRIPT) + echo " OUTPUT=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT) + echo " cat /dev/null > \$$OUTPUT" >> $(ALL_SCRIPT) + echo "else" >> $(ALL_SCRIPT) + echo " OUTPUT=/dev/stdout" >> $(ALL_SCRIPT) + echo "fi" >> $(ALL_SCRIPT) for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 541d9d7fad5a..cc15af2e54fe 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -3,3 +3,11 @@ test_maps test_lru_map test_lpm_map test_tag +FEATURE-DUMP.libbpf +fixdep +test_align +test_dev_cgroup +test_progs +test_tcpbpf_user +test_verifier_log +feature diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 9316e648a880..5c43c187f27c 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -11,23 +11,36 @@ ifneq ($(wildcard $(GENHDR)),) endif CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include -LDLIBS += -lcap -lelf -lrt +LDLIBS += -lcap -lelf -lrt -lpthread +# Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_align test_verifier_log test_dev_cgroup + test_align test_verifier_log test_dev_cgroup test_tcpbpf_user TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ - sockmap_verdict_prog.o dev_cgroup.o + sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ + test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ + sample_map_ret0.o test_tcpbpf_kern.o -TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh +# Order correspond to 'make run_tests' order +TEST_PROGS := test_kmod.sh \ + test_libbpf.sh \ + test_xdp_redirect.sh \ + test_xdp_meta.sh \ + test_offload.py + +# Compile but not part of 'make run_tests' +TEST_GEN_PROGS_EXTENDED = test_libbpf_open include ../lib.mk -BPFOBJ := $(OUTPUT)/libbpf.a $(OUTPUT)/cgroup_helpers.c +BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c $(TEST_GEN_PROGS): $(BPFOBJ) +$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a + .PHONY: force # force a rebuild of BPFOBJ when its dependencies are updated @@ -49,8 +62,13 @@ else CPU ?= generic endif -%.o: %.c - $(CLANG) -I. -I./include/uapi -I../../../include/uapi \ - -Wno-compare-distinct-pointer-types \ +CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ + -Wno-compare-distinct-pointer-types + +$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline +$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline + +$(OUTPUT)/%.o: %.c + $(CLANG) $(CLANG_FLAGS) \ -O2 -target bpf -emit-llvm -c $< -o - | \ $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@ diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index fd9a17fa8a8b..dde2c11d7771 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -71,6 +71,8 @@ static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_getsockopt; +static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) = + (void *) BPF_FUNC_sock_ops_cb_flags_set; static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) = (void *) BPF_FUNC_sk_redirect_map; static int (*bpf_sock_map_update)(void *map, void *key, void *value, @@ -82,7 +84,8 @@ static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, unsigned int buf_size) = (void *) BPF_FUNC_perf_prog_read_value; - +static int (*bpf_override_return)(void *ctx, unsigned long rc) = + (void *) BPF_FUNC_override_return; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 52d53ed08769..983dd25d49f4 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -3,3 +3,5 @@ CONFIG_BPF_SYSCALL=y CONFIG_NET_CLS_BPF=m CONFIG_BPF_EVENTS=y CONFIG_TEST_BPF=m +CONFIG_CGROUP_BPF=y +CONFIG_NETDEVSIM=m diff --git a/tools/testing/selftests/bpf/sample_map_ret0.c b/tools/testing/selftests/bpf/sample_map_ret0.c new file mode 100644 index 000000000000..0756303676ac --- /dev/null +++ b/tools/testing/selftests/bpf/sample_map_ret0.c @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") htab = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(long), + .max_entries = 2, +}; + +struct bpf_map_def SEC("maps") array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(long), + .max_entries = 2, +}; + +/* Sample program which should always load for testing control paths. */ +SEC(".text") int func() +{ + __u64 key64 = 0; + __u32 key = 0; + long *value; + + value = bpf_map_lookup_elem(&htab, &key); + if (!value) + return 1; + value = bpf_map_lookup_elem(&array, &key64); + if (!value) + return 1; + + return 0; +} diff --git a/tools/testing/selftests/bpf/sample_ret0.c b/tools/testing/selftests/bpf/sample_ret0.c new file mode 100644 index 000000000000..fec99750d6ea --- /dev/null +++ b/tools/testing/selftests/bpf/sample_ret0.c @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ + +/* Sample program which should always load for testing control paths. */ +int func() +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py new file mode 100755 index 000000000000..481dccdf140c --- /dev/null +++ b/tools/testing/selftests/bpf/tcp_client.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python2 +# +# SPDX-License-Identifier: GPL-2.0 +# + +import sys, os, os.path, getopt +import socket, time +import subprocess +import select + +def read(sock, n): + buf = '' + while len(buf) < n: + rem = n - len(buf) + try: s = sock.recv(rem) + except (socket.error), e: return '' + buf += s + return buf + +def send(sock, s): + total = len(s) + count = 0 + while count < total: + try: n = sock.send(s) + except (socket.error), e: n = 0 + if n == 0: + return count; + count += n + return count + + +serverPort = int(sys.argv[1]) +HostName = socket.gethostname() + +# create active socket +sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) +try: + sock.connect((HostName, serverPort)) +except socket.error as e: + sys.exit(1) + +buf = '' +n = 0 +while n < 1000: + buf += '+' + n += 1 + +sock.settimeout(1); +n = send(sock, buf) +n = read(sock, 500) +sys.exit(0) diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py new file mode 100755 index 000000000000..bc454d7d0be2 --- /dev/null +++ b/tools/testing/selftests/bpf/tcp_server.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python2 +# +# SPDX-License-Identifier: GPL-2.0 +# + +import sys, os, os.path, getopt +import socket, time +import subprocess +import select + +def read(sock, n): + buf = '' + while len(buf) < n: + rem = n - len(buf) + try: s = sock.recv(rem) + except (socket.error), e: return '' + buf += s + return buf + +def send(sock, s): + total = len(s) + count = 0 + while count < total: + try: n = sock.send(s) + except (socket.error), e: n = 0 + if n == 0: + return count; + count += n + return count + + +SERVER_PORT = 12877 +MAX_PORTS = 2 + +serverPort = SERVER_PORT +serverSocket = None + +HostName = socket.gethostname() + +# create passive socket +serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) +host = socket.gethostname() + +try: serverSocket.bind((host, 0)) +except socket.error as msg: + print 'bind fails: ', msg + +sn = serverSocket.getsockname() +serverPort = sn[1] + +cmdStr = ("./tcp_client.py %d &") % (serverPort) +os.system(cmdStr) + +buf = '' +n = 0 +while n < 500: + buf += '.' + n += 1 + +serverSocket.listen(MAX_PORTS) +readList = [serverSocket] + +while True: + readyRead, readyWrite, inError = \ + select.select(readList, [], [], 2) + + if len(readyRead) > 0: + waitCount = 0 + for sock in readyRead: + if sock == serverSocket: + (clientSocket, address) = serverSocket.accept() + address = str(address[0]) + readList.append(clientSocket) + else: + sock.settimeout(1); + s = read(sock, 1000) + n = send(sock, buf) + sock.close() + serverSocket.close() + sys.exit(0) + else: + print 'Select timeout!' + sys.exit(1) diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index 471bbbdb94db..ff8bd7e3e50c 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -64,11 +64,11 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv2"}, - {2, "R3=inv4"}, - {3, "R3=inv8"}, - {4, "R3=inv16"}, - {5, "R3=inv32"}, + {1, "R3_w=inv2"}, + {2, "R3_w=inv4"}, + {3, "R3_w=inv8"}, + {4, "R3_w=inv16"}, + {5, "R3_w=inv32"}, }, }, { @@ -92,17 +92,17 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv1"}, - {2, "R3=inv2"}, - {3, "R3=inv4"}, - {4, "R3=inv8"}, - {5, "R3=inv16"}, - {6, "R3=inv1"}, - {7, "R4=inv32"}, - {8, "R4=inv16"}, - {9, "R4=inv8"}, - {10, "R4=inv4"}, - {11, "R4=inv2"}, + {1, "R3_w=inv1"}, + {2, "R3_w=inv2"}, + {3, "R3_w=inv4"}, + {4, "R3_w=inv8"}, + {5, "R3_w=inv16"}, + {6, "R3_w=inv1"}, + {7, "R4_w=inv32"}, + {8, "R4_w=inv16"}, + {9, "R4_w=inv8"}, + {10, "R4_w=inv4"}, + {11, "R4_w=inv2"}, }, }, { @@ -121,12 +121,12 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv4"}, - {2, "R3=inv8"}, - {3, "R3=inv10"}, - {4, "R4=inv8"}, - {5, "R4=inv12"}, - {6, "R4=inv14"}, + {1, "R3_w=inv4"}, + {2, "R3_w=inv8"}, + {3, "R3_w=inv10"}, + {4, "R4_w=inv8"}, + {5, "R4_w=inv12"}, + {6, "R4_w=inv14"}, }, }, { @@ -143,10 +143,10 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv7"}, - {2, "R3=inv7"}, - {3, "R3=inv14"}, - {4, "R3=inv56"}, + {1, "R3_w=inv7"}, + {2, "R3_w=inv7"}, + {3, "R3_w=inv14"}, + {4, "R3_w=inv56"}, }, }, @@ -185,18 +185,18 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { {7, "R0=pkt(id=0,off=8,r=8,imm=0)"}, - {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R3=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {9, "R3=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {10, "R3=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {11, "R3=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, {18, "R3=pkt_end(id=0,off=0,imm=0)"}, - {18, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {19, "R4=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, - {20, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, - {21, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {22, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {23, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, + {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, }, }, { @@ -217,16 +217,16 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {9, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {11, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {12, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {13, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {14, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {15, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {16, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, }, }, { @@ -257,14 +257,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, - {5, "R5=pkt(id=0,off=14,r=0,imm=0)"}, - {6, "R4=pkt(id=0,off=14,r=0,imm=0)"}, + {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, + {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"}, + {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"}, {10, "R2=pkt(id=0,off=0,r=18,imm=0)"}, {10, "R5=pkt(id=0,off=14,r=18,imm=0)"}, - {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {14, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, - {15, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + {15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, }, }, { @@ -320,11 +320,11 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. */ - {11, "R5=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * it's total offset is NET_IP_ALIGN + reg->off (0) + * reg->aux_off (14) which is 16. Then the variable @@ -336,11 +336,11 @@ static struct bpf_align_test tests[] = { /* Variable offset is added to R5 packet pointer, * resulting in auxiliary alignment of 4. */ - {18, "R5=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {19, "R5=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte @@ -352,18 +352,18 @@ static struct bpf_align_test tests[] = { /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5=pkt(id=0,off=14,r=8"}, + {26, "R5_w=pkt(id=0,off=14,r=8"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). */ - {27, "R5=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {28, "R5=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {29, "R5=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"}, + {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so @@ -410,11 +410,11 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {9, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* Packet pointer has (4n+2) offset */ - {11, "R5=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@ -426,11 +426,11 @@ static struct bpf_align_test tests[] = { /* Newly read value in R6 was shifted left by 2, so has * known alignment of 4. */ - {18, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Added (4n) to packet pointer's (4n+2) var_off, giving * another (4n+2). */ - {19, "R5=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, + {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@ -446,11 +446,9 @@ static struct bpf_align_test tests[] = { .insns = { PREP_PKT_POINTERS, BPF_MOV64_IMM(BPF_REG_0, 0), - /* ptr & const => unknown & const */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), - BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 0x40), - /* ptr << const => unknown << const */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + /* (ptr - ptr) << 2 */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_2), BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2), /* We have a (4n) value. Let's make a packet offset * out of it. First add 14, to make it a (4n+2) @@ -473,8 +471,26 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, - /* R5 bitwise operator &= on pointer prohibited */ + {4, "R5_w=pkt_end(id=0,off=0,imm=0)"}, + /* (ptr - ptr) << 2 == unknown, (4n) */ + {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"}, + /* (4n) + 14 == (4n+2). We blow our bounds, because + * the add could overflow. + */ + {7, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"}, + /* Checked s>=0 */ + {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + /* packet pointer + nonnegative (4n+2) */ + {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + {13, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. + * We checked the bounds, but it might have been able + * to overflow if the packet pointer started in the + * upper half of the address space. + * So we did not get a 'range' on R6, and the access + * attempt will fail. + */ + {15, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, } }, { @@ -510,11 +526,11 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {9, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {10, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* New unknown value in R7 is (4n) */ - {11, "R7=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Subtracting it from R6 blows our unsigned bounds */ {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"}, /* Checked s>= 0 */ @@ -563,15 +579,15 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {10, "R6=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, + {10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, /* Adding 14 makes R6 be (4n+2) */ - {11, "R6=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, + {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"}, + {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"}, /* New unknown value in R7 is (4n), >= 76 */ - {15, "R7=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, + {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, + {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c index 02c85d6c89b0..3489cc283433 100644 --- a/tools/testing/selftests/bpf/test_dev_cgroup.c +++ b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -10,6 +10,8 @@ #include <string.h> #include <errno.h> #include <assert.h> +#include <sys/time.h> +#include <sys/resource.h> #include <linux/bpf.h> #include <bpf/bpf.h> @@ -19,19 +21,23 @@ #define DEV_CGROUP_PROG "./dev_cgroup.o" -#define TEST_CGROUP "test-bpf-based-device-cgroup/" +#define TEST_CGROUP "/test-bpf-based-device-cgroup/" int main(int argc, char **argv) { + struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; struct bpf_object *obj; int error = EXIT_FAILURE; int prog_fd, cgroup_fd; __u32 prog_cnt; + if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0) + perror("Unable to lift memlock rlimit"); + if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, &obj, &prog_fd)) { printf("Failed to load DEV_CGROUP program\n"); - goto err; + goto out; } if (setup_cgroup_environment()) { @@ -89,5 +95,6 @@ int main(int argc, char **argv) err: cleanup_cgroup_environment(); +out: return error; } diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh index ed4774d8d6ed..35669ccd4d23 100755 --- a/tools/testing/selftests/bpf/test_kmod.sh +++ b/tools/testing/selftests/bpf/test_kmod.sh @@ -10,9 +10,21 @@ test_run() echo "[ JIT enabled:$1 hardened:$2 ]" dmesg -C - insmod $SRC_TREE/lib/test_bpf.ko 2> /dev/null - if [ $? -ne 0 ]; then - rc=1 + if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then + insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null + if [ $? -ne 0 ]; then + rc=1 + fi + else + # Use modprobe dry run to check for missing test_bpf module + if ! /sbin/modprobe -q -n test_bpf; then + echo "test_bpf: [SKIP]" + elif /sbin/modprobe -q test_bpf; then + echo "test_bpf: ok" + else + echo "test_bpf: [FAIL]" + rc=1 + fi fi rmmod test_bpf 2> /dev/null dmesg | grep FAIL diff --git a/tools/testing/selftests/bpf/test_l4lb_noinline.c b/tools/testing/selftests/bpf/test_l4lb_noinline.c new file mode 100644 index 000000000000..ba44a14e6dc4 --- /dev/null +++ b/tools/testing/selftests/bpf/test_l4lb_noinline.c @@ -0,0 +1,473 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include "bpf_helpers.h" +#include "test_iptunnel_common.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; + +static __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +#define PCKT_FRAGMENTED 65343 +#define IPV4_HDR_LEN_NO_OPT 20 +#define IPV4_PLUS_ICMP_HDR 28 +#define IPV6_PLUS_ICMP_HDR 48 +#define RING_SIZE 2 +#define MAX_VIPS 12 +#define MAX_REALS 5 +#define CTL_MAP_SIZE 16 +#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE) +#define F_IPV6 (1 << 0) +#define F_HASH_NO_SRC_PORT (1 << 0) +#define F_ICMP (1 << 0) +#define F_SYN_SET (1 << 1) + +struct packet_description { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct vip_stats { + __u64 bytes; + __u64 pkts; +}; + +struct eth_hdr { + unsigned char eth_dest[ETH_ALEN]; + unsigned char eth_source[ETH_ALEN]; + unsigned short eth_proto; +}; + +struct bpf_map_def SEC("maps") vip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip), + .value_size = sizeof(struct vip_meta), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ch_rings = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = CH_RINGS_SIZE, +}; + +struct bpf_map_def SEC("maps") reals = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct real_definition), + .max_entries = MAX_REALS, +}; + +struct bpf_map_def SEC("maps") stats = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct vip_stats), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = CTL_MAP_SIZE, +}; + +static __u32 get_packet_hash(struct packet_description *pckt, + bool ipv6) +{ + if (ipv6) + return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS), + pckt->ports, CH_RINGS_SIZE); + else + return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE); +} + +static bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6) +{ + __u32 hash = get_packet_hash(pckt, is_ipv6); + __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE; + __u32 *real_pos; + + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) + return 0; + + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return false; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return false; + return true; +} + +static int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG) + return TC_ACT_OK; + off += sizeof(struct icmp6hdr); + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + pckt->proto = ip6h->nexthdr; + pckt->flags |= F_ICMP; + memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16); + memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16); + return TC_ACT_UNSPEC; +} + +static int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->type != ICMP_DEST_UNREACH || + icmp_hdr->code != ICMP_FRAG_NEEDED) + return TC_ACT_OK; + off += sizeof(struct icmphdr); + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + pckt->proto = iph->protocol; + pckt->flags |= F_ICMP; + pckt->src = iph->daddr; + pckt->dst = iph->saddr; + return TC_ACT_UNSPEC; +} + +static bool parse_udp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct udphdr *udp; + udp = data + off; + + if (udp + 1 > data_end) + return false; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = udp->source; + pckt->port16[1] = udp->dest; + } else { + pckt->port16[0] = udp->dest; + pckt->port16[1] = udp->source; + } + return true; +} + +static bool parse_tcp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct tcphdr *tcp; + + tcp = data + off; + if (tcp + 1 > data_end) + return false; + + if (tcp->syn) + pckt->flags |= F_SYN_SET; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = tcp->source; + pckt->port16[1] = tcp->dest; + } else { + pckt->port16[0] = tcp->dest; + pckt->port16[1] = tcp->source; + } + return true; +} + +static int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct __sk_buff *skb) +{ + void *pkt_start = (void *)(long)skb->data; + struct packet_description pckt = {}; + struct eth_hdr *eth = pkt_start; + struct bpf_tunnel_key tkey = {}; + struct vip_stats *data_stats; + struct real_definition *dst; + struct vip_meta *vip_info; + struct ctl_value *cval; + __u32 v4_intf_pos = 1; + __u32 v6_intf_pos = 2; + struct ipv6hdr *ip6h; + struct vip vip = {}; + struct iphdr *iph; + int tun_flag = 0; + __u16 pkt_bytes; + __u64 iph_len; + __u32 ifindex; + __u8 protocol; + __u32 vip_num; + int action; + + tkey.tunnel_ttl = 64; + if (is_ipv6) { + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + + iph_len = sizeof(struct ipv6hdr); + protocol = ip6h->nexthdr; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(ip6h->payload_len); + off += iph_len; + if (protocol == IPPROTO_FRAGMENT) { + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_ICMPV6) { + action = parse_icmpv6(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV6_PLUS_ICMP_HDR; + } else { + memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16); + memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16); + } + } else { + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + + protocol = iph->protocol; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(iph->tot_len); + off += IPV4_HDR_LEN_NO_OPT; + + if (iph->frag_off & PCKT_FRAGMENTED) + return TC_ACT_SHOT; + if (protocol == IPPROTO_ICMP) { + action = parse_icmp(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV4_PLUS_ICMP_HDR; + } else { + pckt.src = iph->saddr; + pckt.dst = iph->daddr; + } + } + protocol = pckt.proto; + + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else { + return TC_ACT_SHOT; + } + + if (is_ipv6) + memcpy(vip.daddr.v6, pckt.dstv6, 16); + else + vip.daddr.v4 = pckt.dst; + + vip.dport = pckt.port16[1]; + vip.protocol = pckt.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.dport = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return TC_ACT_SHOT; + pckt.port16[1] = 0; + } + + if (vip_info->flags & F_HASH_NO_SRC_PORT) + pckt.port16[0] = 0; + + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6)) + return TC_ACT_SHOT; + + if (dst->flags & F_IPV6) { + cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + memcpy(tkey.remote_ipv6, dst->dstv6, 16); + tun_flag = BPF_F_TUNINFO_IPV6; + } else { + cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + tkey.remote_ipv4 = dst->dst; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return TC_ACT_SHOT; + data_stats->pkts++; + data_stats->bytes += pkt_bytes; + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag); + *(u32 *)eth->eth_dest = tkey.remote_ipv4; + return bpf_redirect(ifindex, 0); +} + +SEC("l4lb-demo") +int balancer_ingress(struct __sk_buff *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return TC_ACT_SHOT; + eth_proto = eth->eth_proto; + if (eth_proto == bpf_htons(ETH_P_IP)) + return process_packet(data, nh_off, data_end, false, ctx); + else if (eth_proto == bpf_htons(ETH_P_IPV6)) + return process_packet(data, nh_off, data_end, true, ctx); + else + return TC_ACT_SHOT; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh new file mode 100755 index 000000000000..d97dc914cd49 --- /dev/null +++ b/tools/testing/selftests/bpf/test_libbpf.sh @@ -0,0 +1,49 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +export TESTNAME=test_libbpf + +# Determine selftest success via shell exit code +exit_handler() +{ + if (( $? == 0 )); then + echo "selftests: $TESTNAME [PASS]"; + else + echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2 + echo "selftests: $TESTNAME [FAILED]"; + fi +} + +libbpf_open_file() +{ + LAST_LOADED=$1 + if [ -n "$VERBOSE" ]; then + ./test_libbpf_open $1 + else + ./test_libbpf_open --quiet $1 + fi +} + +# Exit script immediately (well catched by trap handler) if any +# program/thing exits with a non-zero status. +set -e + +# (Use 'trap -l' to list meaning of numbers) +trap exit_handler 0 2 3 6 9 + +libbpf_open_file test_l4lb.o + +# TODO: fix libbpf to load noinline functions +# [warning] libbpf: incorrect bpf_call opcode +#libbpf_open_file test_l4lb_noinline.o + +# TODO: fix test_xdp_meta.c to load with libbpf +# [warning] libbpf: test_xdp_meta.o doesn't provide kernel version +#libbpf_open_file test_xdp_meta.o + +# TODO: fix libbpf to handle .eh_frame +# [warning] libbpf: relocation failed: no section(10) +#libbpf_open_file ../../../../samples/bpf/tracex3_kern.o + +# Success +exit 0 diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c new file mode 100644 index 000000000000..8fcd1c076add --- /dev/null +++ b/tools/testing/selftests/bpf/test_libbpf_open.c @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc. + */ +static const char *__doc__ = + "Libbpf test program for loading BPF ELF object files"; + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <bpf/libbpf.h> +#include <getopt.h> + +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h' }, + {"debug", no_argument, NULL, 'D' }, + {"quiet", no_argument, NULL, 'q' }, + {0, 0, NULL, 0 } +}; + +static void usage(char *argv[]) +{ + int i; + + printf("\nDOCUMENTATION:\n%s\n\n", __doc__); + printf(" Usage: %s (options-see-below) BPF_FILE\n", argv[0]); + printf(" Listing options:\n"); + for (i = 0; long_options[i].name != 0; i++) { + printf(" --%-12s", long_options[i].name); + printf(" short-option: -%c", + long_options[i].val); + printf("\n"); + } + printf("\n"); +} + +#define DEFINE_PRINT_FN(name, enabled) \ +static int libbpf_##name(const char *fmt, ...) \ +{ \ + va_list args; \ + int ret; \ + \ + va_start(args, fmt); \ + if (enabled) { \ + fprintf(stderr, "[" #name "] "); \ + ret = vfprintf(stderr, fmt, args); \ + } \ + va_end(args); \ + return ret; \ +} +DEFINE_PRINT_FN(warning, 1) +DEFINE_PRINT_FN(info, 1) +DEFINE_PRINT_FN(debug, 1) + +#define EXIT_FAIL_LIBBPF EXIT_FAILURE +#define EXIT_FAIL_OPTION 2 + +int test_walk_progs(struct bpf_object *obj, bool verbose) +{ + struct bpf_program *prog; + int cnt = 0; + + bpf_object__for_each_program(prog, obj) { + cnt++; + if (verbose) + printf("Prog (count:%d) section_name: %s\n", cnt, + bpf_program__title(prog, false)); + } + return 0; +} + +int test_walk_maps(struct bpf_object *obj, bool verbose) +{ + struct bpf_map *map; + int cnt = 0; + + bpf_map__for_each(map, obj) { + cnt++; + if (verbose) + printf("Map (count:%d) name: %s\n", cnt, + bpf_map__name(map)); + } + return 0; +} + +int test_open_file(char *filename, bool verbose) +{ + struct bpf_object *bpfobj = NULL; + long err; + + if (verbose) + printf("Open BPF ELF-file with libbpf: %s\n", filename); + + /* Load BPF ELF object file and check for errors */ + bpfobj = bpf_object__open(filename); + err = libbpf_get_error(bpfobj); + if (err) { + char err_buf[128]; + libbpf_strerror(err, err_buf, sizeof(err_buf)); + if (verbose) + printf("Unable to load eBPF objects in file '%s': %s\n", + filename, err_buf); + return EXIT_FAIL_LIBBPF; + } + test_walk_progs(bpfobj, verbose); + test_walk_maps(bpfobj, verbose); + + if (verbose) + printf("Close BPF ELF-file with libbpf: %s\n", + bpf_object__name(bpfobj)); + bpf_object__close(bpfobj); + + return 0; +} + +int main(int argc, char **argv) +{ + char filename[1024] = { 0 }; + bool verbose = 1; + int longindex = 0; + int opt; + + libbpf_set_print(libbpf_warning, libbpf_info, NULL); + + /* Parse commands line args */ + while ((opt = getopt_long(argc, argv, "hDq", + long_options, &longindex)) != -1) { + switch (opt) { + case 'D': + libbpf_set_print(libbpf_warning, libbpf_info, + libbpf_debug); + break; + case 'q': /* Use in scripting mode */ + verbose = 0; + break; + case 'h': + default: + usage(argv); + return EXIT_FAIL_OPTION; + } + } + if (optind >= argc) { + usage(argv); + printf("ERROR: Expected BPF_FILE argument after options\n"); + return EXIT_FAIL_OPTION; + } + snprintf(filename, sizeof(filename), "%s", argv[optind]); + + return test_open_file(filename, verbose); +} diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index f61480641b6e..2be87e9ee28d 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -14,6 +14,7 @@ #include <errno.h> #include <inttypes.h> #include <linux/bpf.h> +#include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -521,6 +522,218 @@ static void test_lpm_delete(void) close(map_fd); } +static void test_lpm_get_next_key(void) +{ + struct bpf_lpm_trie_key *key_p, *next_key_p; + size_t key_size; + __u32 value = 0; + int map_fd; + + key_size = sizeof(*key_p) + sizeof(__u32); + key_p = alloca(key_size); + next_key_p = alloca(key_size); + + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value), + 100, BPF_F_NO_PREALLOC); + assert(map_fd >= 0); + + /* empty tree. get_next_key should return ENOENT */ + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 && + errno == ENOENT); + + /* get and verify the first key, get the second one should fail. */ + key_p->prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && + key_p->data[1] == 168); + + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* no exact matching key should get the first one in post order. */ + key_p->prefixlen = 8; + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && + key_p->data[1] == 168); + + /* add one more element (total two) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* Add one more element (total three) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* Add one more element (total four) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 1); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* no exact matching key should return the first one in post order */ + key_p->prefixlen = 22; + inet_pton(AF_INET, "192.168.1.0", key_p->data); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 0); + + close(map_fd); +} + +#define MAX_TEST_KEYS 4 +struct lpm_mt_test_info { + int cmd; /* 0: update, 1: delete, 2: lookup, 3: get_next_key */ + int iter; + int map_fd; + struct { + __u32 prefixlen; + __u32 data; + } key[MAX_TEST_KEYS]; +}; + +static void *lpm_test_command(void *arg) +{ + int i, j, ret, iter, key_size; + struct lpm_mt_test_info *info = arg; + struct bpf_lpm_trie_key *key_p; + + key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32); + key_p = alloca(key_size); + for (iter = 0; iter < info->iter; iter++) + for (i = 0; i < MAX_TEST_KEYS; i++) { + /* first half of iterations in forward order, + * and second half in backward order. + */ + j = (iter < (info->iter / 2)) ? i : MAX_TEST_KEYS - i - 1; + key_p->prefixlen = info->key[j].prefixlen; + memcpy(key_p->data, &info->key[j].data, sizeof(__u32)); + if (info->cmd == 0) { + __u32 value = j; + /* update must succeed */ + assert(bpf_map_update_elem(info->map_fd, key_p, &value, 0) == 0); + } else if (info->cmd == 1) { + ret = bpf_map_delete_elem(info->map_fd, key_p); + assert(ret == 0 || errno == ENOENT); + } else if (info->cmd == 2) { + __u32 value; + ret = bpf_map_lookup_elem(info->map_fd, key_p, &value); + assert(ret == 0 || errno == ENOENT); + } else { + struct bpf_lpm_trie_key *next_key_p = alloca(key_size); + ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p); + assert(ret == 0 || errno == ENOENT || errno == ENOMEM); + } + } + + // Pass successful exit info back to the main thread + pthread_exit((void *)info); +} + +static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd) +{ + info->iter = 2000; + info->map_fd = map_fd; + info->key[0].prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", &info->key[0].data); + info->key[1].prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", &info->key[1].data); + info->key[2].prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", &info->key[2].data); + info->key[3].prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", &info->key[3].data); +} + +static void test_lpm_multi_thread(void) +{ + struct lpm_mt_test_info info[4]; + size_t key_size, value_size; + pthread_t thread_id[4]; + int i, map_fd; + void *ret; + + /* create a trie */ + value_size = sizeof(__u32); + key_size = sizeof(struct bpf_lpm_trie_key) + value_size; + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size, + 100, BPF_F_NO_PREALLOC); + + /* create 4 threads to test update, delete, lookup and get_next_key */ + setup_lpm_mt_test_info(&info[0], map_fd); + for (i = 0; i < 4; i++) { + if (i != 0) + memcpy(&info[i], &info[0], sizeof(info[i])); + info[i].cmd = i; + assert(pthread_create(&thread_id[i], NULL, &lpm_test_command, &info[i]) == 0); + } + + for (i = 0; i < 4; i++) + assert(pthread_join(thread_id[i], &ret) == 0 && ret == (void *)&info[i]); + + close(map_fd); +} + int main(void) { struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; @@ -545,6 +758,10 @@ int main(void) test_lpm_delete(); + test_lpm_get_next_key(); + + test_lpm_multi_thread(); + printf("test_lpm: OK\n"); return 0; } diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 040356ecc862..436c4c72414f 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -242,7 +242,7 @@ static void test_hashmap_percpu(int task, void *data) static void test_hashmap_walk(int task, void *data) { - int fd, i, max_entries = 100000; + int fd, i, max_entries = 1000; long long key, value, next_key; bool next_key_valid = true; @@ -463,7 +463,7 @@ static void test_devmap(int task, void *data) #define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o" static void test_sockmap(int tasks, void *data) { - int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc; + int one = 1, map_fd_rx = 0, map_fd_tx = 0, map_fd_break, s, sc, rc; struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break; int ports[] = {50200, 50201, 50202, 50204}; int err, i, fd, udp, sfd[6] = {0xdeadbeef}; @@ -868,9 +868,12 @@ static void test_sockmap(int tasks, void *data) goto out_sockmap; } - /* Test map close sockets */ - for (i = 0; i < 6; i++) + /* Test map close sockets and empty maps */ + for (i = 0; i < 6; i++) { + bpf_map_delete_elem(map_fd_tx, &i); + bpf_map_delete_elem(map_fd_rx, &i); close(sfd[i]); + } close(fd); close(map_fd_rx); bpf_object__close(obj); @@ -881,8 +884,13 @@ out: printf("Failed to create sockmap '%i:%s'!\n", i, strerror(errno)); exit(1); out_sockmap: - for (i = 0; i < 6; i++) + for (i = 0; i < 6; i++) { + if (map_fd_tx) + bpf_map_delete_elem(map_fd_tx, &i); + if (map_fd_rx) + bpf_map_delete_elem(map_fd_rx, &i); close(sfd[i]); + } close(fd); exit(1); } @@ -931,8 +939,12 @@ static void test_map_large(void) close(fd); } -static void run_parallel(int tasks, void (*fn)(int task, void *data), - void *data) +#define run_parallel(N, FN, DATA) \ + printf("Fork %d tasks to '" #FN "'\n", N); \ + __run_parallel(N, FN, DATA) + +static void __run_parallel(int tasks, void (*fn)(int task, void *data), + void *data) { pid_t pid[tasks]; int i; @@ -972,7 +984,7 @@ static void test_map_stress(void) #define DO_UPDATE 1 #define DO_DELETE 0 -static void do_work(int fn, void *data) +static void test_update_delete(int fn, void *data) { int do_update = ((int *)data)[1]; int fd = ((int *)data)[0]; @@ -1012,7 +1024,7 @@ static void test_map_parallel(void) */ data[0] = fd; data[1] = DO_UPDATE; - run_parallel(TASKS, do_work, data); + run_parallel(TASKS, test_update_delete, data); /* Check that key=0 is already there. */ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && @@ -1035,7 +1047,7 @@ static void test_map_parallel(void) /* Now let's delete all elemenets in parallel. */ data[1] = DO_DELETE; - run_parallel(TASKS, do_work, data); + run_parallel(TASKS, test_update_delete, data); /* Nothing should be left. */ key = -1; diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py new file mode 100755 index 000000000000..e78aad0a68bb --- /dev/null +++ b/tools/testing/selftests/bpf/test_offload.py @@ -0,0 +1,1085 @@ +#!/usr/bin/python3 + +# Copyright (C) 2017 Netronome Systems, Inc. +# +# This software is licensed under the GNU General License Version 2, +# June 1991 as shown in the file COPYING in the top-level directory of this +# source tree. +# +# THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE +# OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME +# THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +from datetime import datetime +import argparse +import json +import os +import pprint +import random +import string +import struct +import subprocess +import time + +logfile = None +log_level = 1 +skip_extack = False +bpf_test_dir = os.path.dirname(os.path.realpath(__file__)) +pp = pprint.PrettyPrinter() +devs = [] # devices we created for clean up +files = [] # files to be removed +netns = [] # net namespaces to be removed + +def log_get_sec(level=0): + return "*" * (log_level + level) + +def log_level_inc(add=1): + global log_level + log_level += add + +def log_level_dec(sub=1): + global log_level + log_level -= sub + +def log_level_set(level): + global log_level + log_level = level + +def log(header, data, level=None): + """ + Output to an optional log. + """ + if logfile is None: + return + if level is not None: + log_level_set(level) + + if not isinstance(data, str): + data = pp.pformat(data) + + if len(header): + logfile.write("\n" + log_get_sec() + " ") + logfile.write(header) + if len(header) and len(data.strip()): + logfile.write("\n") + logfile.write(data) + +def skip(cond, msg): + if not cond: + return + print("SKIP: " + msg) + log("SKIP: " + msg, "", level=1) + os.sys.exit(0) + +def fail(cond, msg): + if not cond: + return + print("FAIL: " + msg) + log("FAIL: " + msg, "", level=1) + os.sys.exit(1) + +def start_test(msg): + log(msg, "", level=1) + log_level_inc() + print(msg) + +def cmd(cmd, shell=True, include_stderr=False, background=False, fail=True): + """ + Run a command in subprocess and return tuple of (retval, stdout); + optionally return stderr as well as third value. + """ + proc = subprocess.Popen(cmd, shell=shell, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if background: + msg = "%s START: %s" % (log_get_sec(1), + datetime.now().strftime("%H:%M:%S.%f")) + log("BKG " + proc.args, msg) + return proc + + return cmd_result(proc, include_stderr=include_stderr, fail=fail) + +def cmd_result(proc, include_stderr=False, fail=False): + stdout, stderr = proc.communicate() + stdout = stdout.decode("utf-8") + stderr = stderr.decode("utf-8") + proc.stdout.close() + proc.stderr.close() + + stderr = "\n" + stderr + if stderr[-1] == "\n": + stderr = stderr[:-1] + + sec = log_get_sec(1) + log("CMD " + proc.args, + "RETCODE: %d\n%s STDOUT:\n%s%s STDERR:%s\n%s END: %s" % + (proc.returncode, sec, stdout, sec, stderr, + sec, datetime.now().strftime("%H:%M:%S.%f"))) + + if proc.returncode != 0 and fail: + if len(stderr) > 0 and stderr[-1] == "\n": + stderr = stderr[:-1] + raise Exception("Command failed: %s\n%s" % (proc.args, stderr)) + + if include_stderr: + return proc.returncode, stdout, stderr + else: + return proc.returncode, stdout + +def rm(f): + cmd("rm -f %s" % (f)) + if f in files: + files.remove(f) + +def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False): + params = "" + if JSON: + params += "%s " % (flags["json"]) + + if ns != "": + ns = "ip netns exec %s " % (ns) + + if include_stderr: + ret, stdout, stderr = cmd(ns + name + " " + params + args, + fail=fail, include_stderr=True) + else: + ret, stdout = cmd(ns + name + " " + params + args, + fail=fail, include_stderr=False) + + if JSON and len(stdout.strip()) != 0: + out = json.loads(stdout) + else: + out = stdout + + if include_stderr: + return ret, out, stderr + else: + return ret, out + +def bpftool(args, JSON=True, ns="", fail=True): + return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail) + +def bpftool_prog_list(expected=None, ns=""): + _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True) + if expected is not None: + if len(progs) != expected: + fail(True, "%d BPF programs loaded, expected %d" % + (len(progs), expected)) + return progs + +def bpftool_map_list(expected=None, ns=""): + _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) + if expected is not None: + if len(maps) != expected: + fail(True, "%d BPF maps loaded, expected %d" % + (len(maps), expected)) + return maps + +def bpftool_prog_list_wait(expected=0, n_retry=20): + for i in range(n_retry): + nprogs = len(bpftool_prog_list()) + if nprogs == expected: + return + time.sleep(0.05) + raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) + +def bpftool_map_list_wait(expected=0, n_retry=20): + for i in range(n_retry): + nmaps = len(bpftool_map_list()) + if nmaps == expected: + return + time.sleep(0.05) + raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) + +def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False): + if force: + args = "-force " + args + return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) + +def tc(args, JSON=True, ns="", fail=True, include_stderr=False): + return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) + +def ethtool(dev, opt, args, fail=True): + return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail) + +def bpf_obj(name, sec=".text", path=bpf_test_dir,): + return "obj %s sec %s" % (os.path.join(path, name), sec) + +def bpf_pinned(name): + return "pinned %s" % (name) + +def bpf_bytecode(bytecode): + return "bytecode \"%s\"" % (bytecode) + +def mknetns(n_retry=10): + for i in range(n_retry): + name = ''.join([random.choice(string.ascii_letters) for i in range(8)]) + ret, _ = ip("netns add %s" % (name), fail=False) + if ret == 0: + netns.append(name) + return name + return None + +def int2str(fmt, val): + ret = [] + for b in struct.pack(fmt, val): + ret.append(int(b)) + return " ".join(map(lambda x: str(x), ret)) + +def str2int(strtab): + inttab = [] + for i in strtab: + inttab.append(int(i, 16)) + ba = bytearray(inttab) + if len(strtab) == 4: + fmt = "I" + elif len(strtab) == 8: + fmt = "Q" + else: + raise Exception("String array of len %d can't be unpacked to an int" % + (len(strtab))) + return struct.unpack(fmt, ba)[0] + +class DebugfsDir: + """ + Class for accessing DebugFS directories as a dictionary. + """ + + def __init__(self, path): + self.path = path + self._dict = self._debugfs_dir_read(path) + + def __len__(self): + return len(self._dict.keys()) + + def __getitem__(self, key): + if type(key) is int: + key = list(self._dict.keys())[key] + return self._dict[key] + + def __setitem__(self, key, value): + log("DebugFS set %s = %s" % (key, value), "") + log_level_inc() + + cmd("echo '%s' > %s/%s" % (value, self.path, key)) + log_level_dec() + + _, out = cmd('cat %s/%s' % (self.path, key)) + self._dict[key] = out.strip() + + def _debugfs_dir_read(self, path): + dfs = {} + + log("DebugFS state for %s" % (path), "") + log_level_inc(add=2) + + _, out = cmd('ls ' + path) + for f in out.split(): + p = os.path.join(path, f) + if os.path.isfile(p): + _, out = cmd('cat %s/%s' % (path, f)) + dfs[f] = out.strip() + elif os.path.isdir(p): + dfs[f] = DebugfsDir(p) + else: + raise Exception("%s is neither file nor directory" % (p)) + + log_level_dec() + log("DebugFS state", dfs) + log_level_dec() + + return dfs + +class NetdevSim: + """ + Class for netdevsim netdevice and its attributes. + """ + + def __init__(self): + self.dev = self._netdevsim_create() + devs.append(self) + + self.ns = "" + + self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname']) + self.dfs_refresh() + + def __getitem__(self, key): + return self.dev[key] + + def _netdevsim_create(self): + _, old = ip("link show") + ip("link add sim%d type netdevsim") + _, new = ip("link show") + + for dev in new: + f = filter(lambda x: x["ifname"] == dev["ifname"], old) + if len(list(f)) == 0: + return dev + + raise Exception("failed to create netdevsim device") + + def remove(self): + devs.remove(self) + ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns) + + def dfs_refresh(self): + self.dfs = DebugfsDir(self.dfs_dir) + return self.dfs + + def dfs_num_bound_progs(self): + path = os.path.join(self.dfs_dir, "bpf_bound_progs") + _, progs = cmd('ls %s' % (path)) + return len(progs.split()) + + def dfs_get_bound_progs(self, expected): + progs = DebugfsDir(os.path.join(self.dfs_dir, "bpf_bound_progs")) + if expected is not None: + if len(progs) != expected: + fail(True, "%d BPF programs bound, expected %d" % + (len(progs), expected)) + return progs + + def wait_for_flush(self, bound=0, total=0, n_retry=20): + for i in range(n_retry): + nbound = self.dfs_num_bound_progs() + nprogs = len(bpftool_prog_list()) + if nbound == bound and nprogs == total: + return + time.sleep(0.05) + raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs)) + + def set_ns(self, ns): + name = "1" if ns == "" else ns + ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns) + self.ns = ns + + def set_mtu(self, mtu, fail=True): + return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), + fail=fail) + + def set_xdp(self, bpf, mode, force=False, JSON=True, verbose=False, + fail=True, include_stderr=False): + if verbose: + bpf += " verbose" + return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf), + force=force, JSON=JSON, + fail=fail, include_stderr=include_stderr) + + def unset_xdp(self, mode, force=False, JSON=True, + fail=True, include_stderr=False): + return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode), + force=force, JSON=JSON, + fail=fail, include_stderr=include_stderr) + + def ip_link_show(self, xdp): + _, link = ip("link show dev %s" % (self['ifname'])) + if len(link) > 1: + raise Exception("Multiple objects on ip link show") + if len(link) < 1: + return {} + fail(xdp != "xdp" in link, + "XDP program not reporting in iplink (reported %s, expected %s)" % + ("xdp" in link, xdp)) + return link[0] + + def tc_add_ingress(self): + tc("qdisc add dev %s ingress" % (self['ifname'])) + + def tc_del_ingress(self): + tc("qdisc del dev %s ingress" % (self['ifname'])) + + def tc_flush_filters(self, bound=0, total=0): + self.tc_del_ingress() + self.tc_add_ingress() + self.wait_for_flush(bound=bound, total=total) + + def tc_show_ingress(self, expected=None): + # No JSON support, oh well... + flags = ["skip_sw", "skip_hw", "in_hw"] + named = ["protocol", "pref", "chain", "handle", "id", "tag"] + + args = "-s filter show dev %s ingress" % (self['ifname']) + _, out = tc(args, JSON=False) + + filters = [] + lines = out.split('\n') + for line in lines: + words = line.split() + if "handle" not in words: + continue + fltr = {} + for flag in flags: + fltr[flag] = flag in words + for name in named: + try: + idx = words.index(name) + fltr[name] = words[idx + 1] + except ValueError: + pass + filters.append(fltr) + + if expected is not None: + fail(len(filters) != expected, + "%d ingress filters loaded, expected %d" % + (len(filters), expected)) + return filters + + def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None, + chain=None, cls="", params="", + fail=True, include_stderr=False): + spec = "" + if prio is not None: + spec += " prio %d" % (prio) + if handle: + spec += " handle %s" % (handle) + if chain is not None: + spec += " chain %d" % (chain) + + return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\ + .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec, + cls=cls, params=params), + fail=fail, include_stderr=include_stderr) + + def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None, + chain=None, da=False, verbose=False, + skip_sw=False, skip_hw=False, + fail=True, include_stderr=False): + cls = "bpf " + bpf + + params = "" + if da: + params += " da" + if verbose: + params += " verbose" + if skip_sw: + params += " skip_sw" + if skip_hw: + params += " skip_hw" + + return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls, + chain=chain, params=params, + fail=fail, include_stderr=include_stderr) + + def set_ethtool_tc_offloads(self, enable, fail=True): + args = "hw-tc-offload %s" % ("on" if enable else "off") + return ethtool(self, "-K", args, fail=fail) + +################################################################################ +def clean_up(): + global files, netns, devs + + for dev in devs: + dev.remove() + for f in files: + cmd("rm -f %s" % (f)) + for ns in netns: + cmd("ip netns delete %s" % (ns)) + files = [] + netns = [] + +def pin_prog(file_name, idx=0): + progs = bpftool_prog_list(expected=(idx + 1)) + prog = progs[idx] + bpftool("prog pin id %d %s" % (prog["id"], file_name)) + files.append(file_name) + + return file_name, bpf_pinned(file_name) + +def pin_map(file_name, idx=0, expected=1): + maps = bpftool_map_list(expected=expected) + m = maps[idx] + bpftool("map pin id %d %s" % (m["id"], file_name)) + files.append(file_name) + + return file_name, bpf_pinned(file_name) + +def check_dev_info_removed(prog_file=None, map_file=None): + bpftool_prog_list(expected=0) + ret, err = bpftool("prog show pin %s" % (prog_file), fail=False) + fail(ret == 0, "Showing prog with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing prog with removed device expected ENODEV, error is %s" % + (err["error"])) + + bpftool_map_list(expected=0) + ret, err = bpftool("map show pin %s" % (map_file), fail=False) + fail(ret == 0, "Showing map with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing map with removed device expected ENODEV, error is %s" % + (err["error"])) + +def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False): + progs = bpftool_prog_list(expected=1, ns=ns) + prog = progs[0] + + fail("dev" not in prog.keys(), "Device parameters not reported") + dev = prog["dev"] + fail("ifindex" not in dev.keys(), "Device parameters not reported") + fail("ns_dev" not in dev.keys(), "Device parameters not reported") + fail("ns_inode" not in dev.keys(), "Device parameters not reported") + + if not other_ns: + fail("ifname" not in dev.keys(), "Ifname not reported") + fail(dev["ifname"] != sim["ifname"], + "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"])) + else: + fail("ifname" in dev.keys(), "Ifname is reported for other ns") + + maps = bpftool_map_list(expected=2, ns=ns) + for m in maps: + fail("dev" not in m.keys(), "Device parameters not reported") + fail(dev != m["dev"], "Map's device different than program's") + +def check_extack(output, reference, args): + if skip_extack: + return + lines = output.split("\n") + comp = len(lines) >= 2 and lines[1] == reference + fail(not comp, "Missing or incorrect netlink extack message") + +def check_extack_nsim(output, reference, args): + check_extack(output, "Error: netdevsim: " + reference, args) + +def check_no_extack(res, needle): + fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"), + "Found '%s' in command output, leaky extack?" % (needle)) + +def check_verifier_log(output, reference): + lines = output.split("\n") + for l in reversed(lines): + if l == reference: + return + fail(True, "Missing or incorrect message from netdevsim in verifier log") + +def test_spurios_extack(sim, obj, skip_hw, needle): + res = sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=skip_hw, + include_stderr=True) + check_no_extack(res, needle) + res = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, + skip_hw=skip_hw, include_stderr=True) + check_no_extack(res, needle) + res = sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf", + include_stderr=True) + check_no_extack(res, needle) + + +# Parse command line +parser = argparse.ArgumentParser() +parser.add_argument("--log", help="output verbose log to given file") +args = parser.parse_args() +if args.log: + logfile = open(args.log, 'w+') + logfile.write("# -*-Org-*-") + +log("Prepare...", "", level=1) +log_level_inc() + +# Check permissions +skip(os.getuid() != 0, "test must be run as root") + +# Check tools +ret, progs = bpftool("prog", fail=False) +skip(ret != 0, "bpftool not installed") +# Check no BPF programs are loaded +skip(len(progs) != 0, "BPF programs already loaded on the system") + +# Check netdevsim +ret, out = cmd("modprobe netdevsim", fail=False) +skip(ret != 0, "netdevsim module could not be loaded") + +# Check debugfs +_, out = cmd("mount") +if out.find("/sys/kernel/debug type debugfs") == -1: + cmd("mount -t debugfs none /sys/kernel/debug") + +# Check samples are compiled +samples = ["sample_ret0.o", "sample_map_ret0.o"] +for s in samples: + ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False) + skip(ret != 0, "sample %s/%s not found, please compile it" % + (bpf_test_dir, s)) + +# Check if iproute2 is built with libmnl (needed by extack support) +_, _, err = cmd("tc qdisc delete dev lo handle 0", + fail=False, include_stderr=True) +if err.find("Error: Failed to find qdisc with specified handle.") == -1: + print("Warning: no extack message in iproute2 output, libmnl missing?") + log("Warning: no extack message in iproute2 output, libmnl missing?", "") + skip_extack = True + +# Check if net namespaces seem to work +ns = mknetns() +skip(ns is None, "Could not create a net namespace") +cmd("ip netns delete %s" % (ns)) +netns = [] + +try: + obj = bpf_obj("sample_ret0.o") + bytecode = bpf_bytecode("1,6 0 0 4294967295,") + + start_test("Test destruction of generic XDP...") + sim = NetdevSim() + sim.set_xdp(obj, "generic") + sim.remove() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.tc_add_ingress() + + start_test("Test TC non-offloaded...") + ret, _ = sim.cls_bpf_add_filter(obj, skip_hw=True, fail=False) + fail(ret != 0, "Software TC filter did not load") + + start_test("Test TC non-offloaded isn't getting bound...") + ret, _ = sim.cls_bpf_add_filter(obj, fail=False) + fail(ret != 0, "Software TC filter did not load") + sim.dfs_get_bound_progs(expected=0) + + sim.tc_flush_filters() + + start_test("Test TC offloads are off by default...") + ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "TC filter loaded without enabling TC offloads") + check_extack(err, "Error: TC offload is disabled on net device.", args) + sim.wait_for_flush() + + sim.set_ethtool_tc_offloads(True) + sim.dfs["bpf_tc_non_bound_accept"] = "Y" + + start_test("Test TC offload by default...") + ret, _ = sim.cls_bpf_add_filter(obj, fail=False) + fail(ret != 0, "Software TC filter did not load") + sim.dfs_get_bound_progs(expected=0) + ingress = sim.tc_show_ingress(expected=1) + fltr = ingress[0] + fail(not fltr["in_hw"], "Filter not offloaded by default") + + sim.tc_flush_filters() + + start_test("Test TC cBPF bytcode tries offload by default...") + ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False) + fail(ret != 0, "Software TC filter did not load") + sim.dfs_get_bound_progs(expected=0) + ingress = sim.tc_show_ingress(expected=1) + fltr = ingress[0] + fail(not fltr["in_hw"], "Bytecode not offloaded by default") + + sim.tc_flush_filters() + sim.dfs["bpf_tc_non_bound_accept"] = "N" + + start_test("Test TC cBPF unbound bytecode doesn't offload...") + ret, _, err = sim.cls_bpf_add_filter(bytecode, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "TC bytecode loaded for offload") + check_extack_nsim(err, "netdevsim configured to reject unbound programs.", + args) + sim.wait_for_flush() + + start_test("Test non-0 chain offload...") + ret, _, err = sim.cls_bpf_add_filter(obj, chain=1, prio=1, handle=1, + skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Offloaded a filter to chain other than 0") + check_extack(err, "Error: Driver supports only offload of chain 0.", args) + sim.tc_flush_filters() + + start_test("Test TC replace...") + sim.cls_bpf_add_filter(obj, prio=1, handle=1) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_sw=True) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_sw=True) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=True) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_hw=True) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + start_test("Test TC replace bad flags...") + for i in range(3): + for j in range(3): + ret, _ = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, + skip_sw=(j == 1), skip_hw=(j == 2), + fail=False) + fail(bool(ret) != bool(j), + "Software TC incorrect load in replace test, iteration %d" % + (j)) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + start_test("Test spurious extack from the driver...") + test_spurios_extack(sim, obj, False, "netdevsim") + test_spurios_extack(sim, obj, True, "netdevsim") + + sim.set_ethtool_tc_offloads(False) + + test_spurios_extack(sim, obj, False, "TC offload is disabled") + test_spurios_extack(sim, obj, True, "TC offload is disabled") + + sim.set_ethtool_tc_offloads(True) + + sim.tc_flush_filters() + + start_test("Test TC offloads work...") + ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, + fail=False, include_stderr=True) + fail(ret != 0, "TC filter did not load with TC offloads enabled") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + + start_test("Test TC offload basics...") + dfs = sim.dfs_get_bound_progs(expected=1) + progs = bpftool_prog_list(expected=1) + ingress = sim.tc_show_ingress(expected=1) + + dprog = dfs[0] + prog = progs[0] + fltr = ingress[0] + fail(fltr["skip_hw"], "TC does reports 'skip_hw' on offloaded filter") + fail(not fltr["in_hw"], "TC does not report 'in_hw' for offloaded filter") + fail(not fltr["skip_sw"], "TC does not report 'skip_sw' back") + + start_test("Test TC offload is device-bound...") + fail(str(prog["id"]) != fltr["id"], "Program IDs don't match") + fail(prog["tag"] != fltr["tag"], "Program tags don't match") + fail(fltr["id"] != dprog["id"], "Program IDs don't match") + fail(dprog["state"] != "xlated", "Offloaded program state not translated") + fail(dprog["loaded"] != "Y", "Offloaded program is not loaded") + + start_test("Test disabling TC offloads is rejected while filters installed...") + ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) + fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...") + + start_test("Test qdisc removal frees things...") + sim.tc_flush_filters() + sim.tc_show_ingress(expected=0) + + start_test("Test disabling TC offloads is OK without filters...") + ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) + fail(ret != 0, + "Driver refused to disable TC offloads without filters installed...") + + sim.set_ethtool_tc_offloads(True) + + start_test("Test destroying device gets rid of TC filters...") + sim.cls_bpf_add_filter(obj, skip_sw=True) + sim.remove() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.set_ethtool_tc_offloads(True) + + start_test("Test destroying device gets rid of XDP...") + sim.set_xdp(obj, "offload") + sim.remove() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.set_ethtool_tc_offloads(True) + + start_test("Test XDP prog reporting...") + sim.set_xdp(obj, "drv") + ipl = sim.ip_link_show(xdp=True) + progs = bpftool_prog_list(expected=1) + fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], + "Loaded program has wrong ID") + + start_test("Test XDP prog replace without force...") + ret, _ = sim.set_xdp(obj, "drv", fail=False) + fail(ret == 0, "Replaced XDP program without -force") + sim.wait_for_flush(total=1) + + start_test("Test XDP prog replace with force...") + ret, _ = sim.set_xdp(obj, "drv", force=True, fail=False) + fail(ret != 0, "Could not replace XDP program with -force") + bpftool_prog_list_wait(expected=1) + ipl = sim.ip_link_show(xdp=True) + progs = bpftool_prog_list(expected=1) + fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], + "Loaded program has wrong ID") + fail("dev" in progs[0].keys(), + "Device parameters reported for non-offloaded program") + + start_test("Test XDP prog replace with bad flags...") + ret, _, err = sim.set_xdp(obj, "offload", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Replaced XDP program with a program in different mode") + check_extack_nsim(err, "program loaded with different flags.", args) + ret, _, err = sim.set_xdp(obj, "", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Replaced XDP program with a program in different mode") + check_extack_nsim(err, "program loaded with different flags.", args) + + start_test("Test XDP prog remove with bad flags...") + ret, _, err = sim.unset_xdp("offload", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Removed program with a bad mode mode") + check_extack_nsim(err, "program loaded with different flags.", args) + ret, _, err = sim.unset_xdp("", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Removed program with a bad mode mode") + check_extack_nsim(err, "program loaded with different flags.", args) + + start_test("Test MTU restrictions...") + ret, _ = sim.set_mtu(9000, fail=False) + fail(ret == 0, + "Driver should refuse to increase MTU to 9000 with XDP loaded...") + sim.unset_xdp("drv") + bpftool_prog_list_wait(expected=0) + sim.set_mtu(9000) + ret, _, err = sim.set_xdp(obj, "drv", fail=False, include_stderr=True) + fail(ret == 0, "Driver should refuse to load program with MTU of 9000...") + check_extack_nsim(err, "MTU too large w/ XDP enabled.", args) + sim.set_mtu(1500) + + sim.wait_for_flush() + start_test("Test XDP offload...") + _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True) + ipl = sim.ip_link_show(xdp=True) + link_xdp = ipl["xdp"]["prog"] + progs = bpftool_prog_list(expected=1) + prog = progs[0] + fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + + start_test("Test XDP offload is device bound...") + dfs = sim.dfs_get_bound_progs(expected=1) + dprog = dfs[0] + + fail(prog["id"] != link_xdp["id"], "Program IDs don't match") + fail(prog["tag"] != link_xdp["tag"], "Program tags don't match") + fail(str(link_xdp["id"]) != dprog["id"], "Program IDs don't match") + fail(dprog["state"] != "xlated", "Offloaded program state not translated") + fail(dprog["loaded"] != "Y", "Offloaded program is not loaded") + + start_test("Test removing XDP program many times...") + sim.unset_xdp("offload") + sim.unset_xdp("offload") + sim.unset_xdp("drv") + sim.unset_xdp("drv") + sim.unset_xdp("") + sim.unset_xdp("") + bpftool_prog_list_wait(expected=0) + + start_test("Test attempt to use a program for a wrong device...") + sim2 = NetdevSim() + sim2.set_xdp(obj, "offload") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") + + ret, _, err = sim.set_xdp(pinned, "offload", + fail=False, include_stderr=True) + fail(ret == 0, "Pinned program loaded for a different device accepted") + check_extack_nsim(err, "program bound to different dev.", args) + sim2.remove() + ret, _, err = sim.set_xdp(pinned, "offload", + fail=False, include_stderr=True) + fail(ret == 0, "Pinned program loaded for a removed device accepted") + check_extack_nsim(err, "xdpoffload of non-bound program.", args) + rm(pin_file) + bpftool_prog_list_wait(expected=0) + + start_test("Test mixing of TC and XDP...") + sim.tc_add_ingress() + sim.set_xdp(obj, "offload") + ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Loading TC when XDP active should fail") + check_extack_nsim(err, "driver and netdev offload states mismatch.", args) + sim.unset_xdp("offload") + sim.wait_for_flush() + + sim.cls_bpf_add_filter(obj, skip_sw=True) + ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True) + fail(ret == 0, "Loading XDP when TC active should fail") + check_extack_nsim(err, "TC program is already loaded.", args) + + start_test("Test binding TC from pinned...") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") + sim.tc_flush_filters(bound=1, total=1) + sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True) + sim.tc_flush_filters(bound=1, total=1) + + start_test("Test binding XDP from pinned...") + sim.set_xdp(obj, "offload") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp2", idx=1) + + sim.set_xdp(pinned, "offload", force=True) + sim.unset_xdp("offload") + sim.set_xdp(pinned, "offload", force=True) + sim.unset_xdp("offload") + + start_test("Test offload of wrong type fails...") + ret, _ = sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True, fail=False) + fail(ret == 0, "Managed to attach XDP program to TC") + + start_test("Test asking for TC offload of two filters...") + sim.cls_bpf_add_filter(obj, da=True, skip_sw=True) + ret, _, err = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Managed to offload two TC filters at the same time") + check_extack_nsim(err, "driver and netdev offload states mismatch.", args) + + sim.tc_flush_filters(bound=2, total=2) + + start_test("Test if netdev removal waits for translation...") + delay_msec = 500 + sim.dfs["bpf_bind_verifier_delay"] = delay_msec + start = time.time() + cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \ + (sim['ifname'], obj) + tc_proc = cmd(cmd_line, background=True, fail=False) + # Wait for the verifier to start + while sim.dfs_num_bound_progs() <= 2: + pass + sim.remove() + end = time.time() + ret, _ = cmd_result(tc_proc, fail=False) + time_diff = end - start + log("Time", "start:\t%s\nend:\t%s\ndiff:\t%s" % (start, end, time_diff)) + + fail(ret == 0, "Managed to load TC filter on a unregistering device") + delay_sec = delay_msec * 0.001 + fail(time_diff < delay_sec, "Removal process took %s, expected %s" % + (time_diff, delay_sec)) + + # Remove all pinned files and reinstantiate the netdev + clean_up() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + map_obj = bpf_obj("sample_map_ret0.o") + start_test("Test loading program with maps...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + + start_test("Test bpftool bound info reporting (own ns)...") + check_dev_info(False, "") + + start_test("Test bpftool bound info reporting (other ns)...") + ns = mknetns() + sim.set_ns(ns) + check_dev_info(True, "") + + start_test("Test bpftool bound info reporting (remote ns)...") + check_dev_info(False, ns) + + start_test("Test bpftool bound info reporting (back to own ns)...") + sim.set_ns("") + check_dev_info(False, "") + + prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog") + map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2) + sim.remove() + + start_test("Test bpftool bound info reporting (removed dev)...") + check_dev_info_removed(prog_file=prog_file, map_file=map_file) + + # Remove all pinned files and reinstantiate the netdev + clean_up() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + + start_test("Test map update (no flags)...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + maps = bpftool_map_list(expected=2) + array = maps[0] if maps[0]["type"] == "array" else maps[1] + htab = maps[0] if maps[0]["type"] == "hash" else maps[1] + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, _ = bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "added too many entries") + + start_test("Test map update (exists)...") + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, err = bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "updated non-existing key") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map update (noexist)...") + for m in maps: + for i in range(2): + ret, err = bpftool("map update id %d key %s value %s noexist" % + (m["id"], int2str("I", i), int2str("Q", i * 3)), + fail=False) + fail(ret == 0, "updated existing key") + fail(err["error"].find("File exists") == -1, + "expected EEXIST, error is '%s'" % (err["error"])) + + start_test("Test map dump...") + for m in maps: + _, entries = bpftool("map dump id %d" % (m["id"])) + for i in range(2): + key = str2int(entries[i]["key"]) + fail(key != i, "expected key %d, got %d" % (key, i)) + val = str2int(entries[i]["value"]) + fail(val != i * 3, "expected value %d, got %d" % (val, i * 3)) + + start_test("Test map getnext...") + for m in maps: + _, entry = bpftool("map getnext id %d" % (m["id"])) + key = str2int(entry["next_key"]) + fail(key != 0, "next key %d, expected %d" % (key, 0)) + _, entry = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 0))) + key = str2int(entry["next_key"]) + fail(key != 1, "next key %d, expected %d" % (key, 1)) + ret, err = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 1)), fail=False) + fail(ret == 0, "got next key past the end of map") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map delete (htab)...") + for i in range(2): + bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i))) + + start_test("Test map delete (array)...") + for i in range(2): + ret, err = bpftool("map delete id %d key %s" % + (htab["id"], int2str("I", i)), fail=False) + fail(ret == 0, "removed entry from an array") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map remove...") + sim.unset_xdp("offload") + bpftool_map_list_wait(expected=0) + sim.remove() + + sim = NetdevSim() + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + sim.remove() + bpftool_map_list_wait(expected=0) + + start_test("Test map creation fail path...") + sim = NetdevSim() + sim.dfs["bpf_map_accept"] = "N" + ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False) + fail(ret == 0, + "netdevsim didn't refuse to create a map with offload disabled") + + print("%s: OK" % (os.path.basename(__file__))) + +finally: + log("Clean up...", "", level=1) + log_level_inc() + clean_up() diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 6761be18a91f..b549308abd19 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -21,8 +21,10 @@ typedef __u16 __sum16; #include <linux/ipv6.h> #include <linux/tcp.h> #include <linux/filter.h> +#include <linux/perf_event.h> #include <linux/unistd.h> +#include <sys/ioctl.h> #include <sys/wait.h> #include <sys/resource.h> #include <sys/types.h> @@ -167,10 +169,9 @@ out: #define NUM_ITER 100000 #define VIP_NUM 5 -static void test_l4lb(void) +static void test_l4lb(const char *file) { unsigned int nr_cpus = bpf_num_possible_cpus(); - const char *file = "./test_l4lb.o"; struct vip key = {.protocol = 6}; struct vip_meta { __u32 flags; @@ -247,6 +248,95 @@ out: bpf_object__close(obj); } +static void test_l4lb_all(void) +{ + const char *file1 = "./test_l4lb.o"; + const char *file2 = "./test_l4lb_noinline.o"; + + test_l4lb(file1); + test_l4lb(file2); +} + +static void test_xdp_noinline(void) +{ + const char *file = "./test_xdp_noinline.o"; + unsigned int nr_cpus = bpf_num_possible_cpus(); + struct vip key = {.protocol = 6}; + struct vip_meta { + __u32 flags; + __u32 vip_num; + } value = {.vip_num = VIP_NUM}; + __u32 stats_key = VIP_NUM; + struct vip_stats { + __u64 bytes; + __u64 pkts; + } stats[nr_cpus]; + struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; + } real_def = {.dst = MAGIC_VAL}; + __u32 ch_key = 11, real_num = 3; + __u32 duration, retval, size; + int err, i, prog_fd, map_fd; + __u64 bytes = 0, pkts = 0; + struct bpf_object *obj; + char buf[128]; + u32 *magic = (u32 *)buf; + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (err) { + error_cnt++; + return; + } + + map_fd = bpf_find_map(__func__, obj, "vip_map"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &key, &value, 0); + + map_fd = bpf_find_map(__func__, obj, "ch_rings"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &ch_key, &real_num, 0); + + map_fd = bpf_find_map(__func__, obj, "reals"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &real_num, &real_def, 0); + + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != 1 || size != 54 || + *magic != MAGIC_VAL, "ipv4", + "err %d errno %d retval %d size %d magic %x\n", + err, errno, retval, size, *magic); + + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != 1 || size != 74 || + *magic != MAGIC_VAL, "ipv6", + "err %d errno %d retval %d size %d magic %x\n", + err, errno, retval, size, *magic); + + map_fd = bpf_find_map(__func__, obj, "stats"); + if (map_fd < 0) + goto out; + bpf_map_lookup_elem(map_fd, &stats_key, stats); + for (i = 0; i < nr_cpus; i++) { + bytes += stats[i].bytes; + pkts += stats[i].pkts; + } + if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) { + error_cnt++; + printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts); + } +out: + bpf_object__close(obj); +} + static void test_tcp_estats(void) { const char *file = "./test_tcp_estats.o"; @@ -617,6 +707,262 @@ static void test_obj_name(void) } } +static void test_tp_attach_query(void) +{ + const int num_progs = 3; + int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs]; + __u32 duration = 0, info_len, saved_prog_ids[num_progs]; + const char *file = "./test_tracepoint.o"; + struct perf_event_query_bpf *query; + struct perf_event_attr attr = {}; + struct bpf_object *obj[num_progs]; + struct bpf_prog_info prog_info; + char buf[256]; + + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/sched/sched_switch/id"); + efd = open(buf, O_RDONLY, 0); + if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) + return; + bytes = read(efd, buf, sizeof(buf)); + close(efd); + if (CHECK(bytes <= 0 || bytes >= sizeof(buf), + "read", "bytes %d errno %d\n", bytes, errno)) + return; + + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN; + attr.sample_period = 1; + attr.wakeup_events = 1; + + query = malloc(sizeof(*query) + sizeof(__u32) * num_progs); + for (i = 0; i < num_progs; i++) { + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i], + &prog_fd[i]); + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + goto cleanup1; + + bzero(&prog_info, sizeof(prog_info)); + prog_info.jited_prog_len = 0; + prog_info.xlated_prog_len = 0; + prog_info.nr_map_ids = 0; + info_len = sizeof(prog_info); + err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len); + if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n", + err, errno)) + goto cleanup1; + saved_prog_ids[i] = prog_info.id; + + pmu_fd[i] = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (CHECK(pmu_fd[i] < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd[i], errno)) + goto cleanup2; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", + err, errno)) + goto cleanup3; + + if (i == 0) { + /* check NULL prog array query */ + query->ids_len = num_progs; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(err || query->prog_cnt != 0, + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + } + + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[i]); + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", + err, errno)) + goto cleanup3; + + if (i == 1) { + /* try to get # of programs only */ + query->ids_len = 0; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(err || query->prog_cnt != 2, + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + + /* try a few negative tests */ + /* invalid query pointer */ + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, + (struct perf_event_query_bpf *)0x1); + if (CHECK(!err || errno != EFAULT, + "perf_event_ioc_query_bpf", + "err %d errno %d\n", err, errno)) + goto cleanup3; + + /* no enough space */ + query->ids_len = 1; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(!err || errno != ENOSPC || query->prog_cnt != 2, + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + } + + query->ids_len = num_progs; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(err || query->prog_cnt != (i + 1), + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + for (j = 0; j < i + 1; j++) + if (CHECK(saved_prog_ids[j] != query->ids[j], + "perf_event_ioc_query_bpf", + "#%d saved_prog_id %x query prog_id %x\n", + j, saved_prog_ids[j], query->ids[j])) + goto cleanup3; + } + + i = num_progs - 1; + for (; i >= 0; i--) { + cleanup3: + ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE); + cleanup2: + close(pmu_fd[i]); + cleanup1: + bpf_object__close(obj[i]); + } + free(query); +} + +static int compare_map_keys(int map1_fd, int map2_fd) +{ + __u32 key, next_key; + char val_buf[PERF_MAX_STACK_DEPTH * sizeof(__u64)]; + int err; + + err = bpf_map_get_next_key(map1_fd, NULL, &key); + if (err) + return err; + err = bpf_map_lookup_elem(map2_fd, &key, val_buf); + if (err) + return err; + + while (bpf_map_get_next_key(map1_fd, &key, &next_key) == 0) { + err = bpf_map_lookup_elem(map2_fd, &next_key, val_buf); + if (err) + return err; + + key = next_key; + } + if (errno != ENOENT) + return -1; + + return 0; +} + +static void test_stacktrace_map() +{ + int control_map_fd, stackid_hmap_fd, stackmap_fd; + const char *file = "./test_stacktrace_map.o"; + int bytes, efd, err, pmu_fd, prog_fd; + struct perf_event_attr attr = {}; + __u32 key, val, duration = 0; + struct bpf_object *obj; + char buf[256]; + + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + goto out; + + /* Get the ID for the sched/sched_switch tracepoint */ + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/sched/sched_switch/id"); + efd = open(buf, O_RDONLY, 0); + if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) + goto close_prog; + + bytes = read(efd, buf, sizeof(buf)); + close(efd); + if (CHECK(bytes <= 0 || bytes >= sizeof(buf), + "read", "bytes %d errno %d\n", bytes, errno)) + goto close_prog; + + /* Open the perf event and attach bpf progrram */ + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN; + attr.sample_period = 1; + attr.wakeup_events = 1; + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd, errno)) + goto close_prog; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", + err, errno)) + goto close_pmu; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", + err, errno)) + goto disable_pmu; + + /* find map fds */ + control_map_fd = bpf_find_map(__func__, obj, "control_map"); + if (CHECK(control_map_fd < 0, "bpf_find_map control_map", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); + if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); + if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n", + err, errno)) + goto disable_pmu; + + /* give some time for bpf program run */ + sleep(1); + + /* disable stack trace collection */ + key = 0; + val = 1; + bpf_map_update_elem(control_map_fd, &key, &val, 0); + + /* for every element in stackid_hmap, we can find a corresponding one + * in stackmap, and vise versa. + */ + err = compare_map_keys(stackid_hmap_fd, stackmap_fd); + if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + err = compare_map_keys(stackmap_fd, stackid_hmap_fd); + if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", + "err %d errno %d\n", err, errno)) + ; /* fall through */ + +disable_pmu: + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); + +close_pmu: + close(pmu_fd); + +close_prog: + bpf_object__close(obj); + +out: + return; +} + int main(void) { struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; @@ -625,11 +971,14 @@ int main(void) test_pkt_access(); test_xdp(); - test_l4lb(); + test_l4lb_all(); + test_xdp_noinline(); test_tcp_estats(); test_bpf_obj_id(); test_pkt_md_access(); test_obj_name(); + test_tp_attach_query(); + test_stacktrace_map(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c new file mode 100644 index 000000000000..76d85c5d08bd --- /dev/null +++ b/tools/testing/selftests/bpf/test_stacktrace_map.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +#ifndef PERF_MAX_STACK_DEPTH +#define PERF_MAX_STACK_DEPTH 127 +#endif + +struct bpf_map_def SEC("maps") control_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") stackid_hmap = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 10000, +}; + +struct bpf_map_def SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, + .max_entries = 10000, +}; + +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +struct sched_switch_args { + unsigned long long pad; + char prev_comm[16]; + int prev_pid; + int prev_prio; + long long prev_state; + char next_comm[16]; + int next_pid; + int next_prio; +}; + +SEC("tracepoint/sched/sched_switch") +int oncpu(struct sched_switch_args *ctx) +{ + __u32 key = 0, val = 0, *value_p; + + value_p = bpf_map_lookup_elem(&control_map, &key); + if (value_p && *value_p) + return 0; /* skip if non-zero *value_p */ + + /* The size of stackmap and stackid_hmap should be the same */ + key = bpf_get_stackid(ctx, &stackmap, 0); + if ((int)key >= 0) + bpf_map_update_elem(&stackid_hmap, &key, &val, 0); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h new file mode 100644 index 000000000000..2fe43289943c --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpbpf.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef _TEST_TCPBPF_H +#define _TEST_TCPBPF_H + +struct tcpbpf_globals { + __u32 event_map; + __u32 total_retrans; + __u32 data_segs_in; + __u32 data_segs_out; + __u32 bad_cb_test_rv; + __u32 good_cb_test_rv; + __u64 bytes_received; + __u64 bytes_acked; +}; +#endif diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c new file mode 100644 index 000000000000..57119ad57a3f --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/in6.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/tcp.h> +#include <netinet/in.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "test_tcpbpf.h" + +struct bpf_map_def SEC("maps") global_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct tcpbpf_globals), + .max_entries = 2, +}; + +static inline void update_event_map(int event) +{ + __u32 key = 0; + struct tcpbpf_globals g, *gp; + + gp = bpf_map_lookup_elem(&global_map, &key); + if (gp == NULL) { + struct tcpbpf_globals g = {0}; + + g.event_map |= (1 << event); + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } else { + g = *gp; + g.event_map |= (1 << event); + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } +} + +int _version SEC("version") = 1; + +SEC("sockops") +int bpf_testcb(struct bpf_sock_ops *skops) +{ + int rv = -1; + int bad_call_rv = 0; + int good_call_rv = 0; + int op; + int v = 0; + + op = (int) skops->op; + + update_event_map(op); + + switch (op) { + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + /* Test failure to set largest cb flag (assumes not defined) */ + bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80); + /* Set callback */ + good_call_rv = bpf_sock_ops_cb_flags_set(skops, + BPF_SOCK_OPS_STATE_CB_FLAG); + /* Update results */ + { + __u32 key = 0; + struct tcpbpf_globals g, *gp; + + gp = bpf_map_lookup_elem(&global_map, &key); + if (!gp) + break; + g = *gp; + g.bad_cb_test_rv = bad_call_rv; + g.good_cb_test_rv = good_call_rv; + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + skops->sk_txhash = 0x12345f; + v = 0xff; + rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v, + sizeof(v)); + break; + case BPF_SOCK_OPS_RTO_CB: + break; + case BPF_SOCK_OPS_RETRANS_CB: + break; + case BPF_SOCK_OPS_STATE_CB: + if (skops->args[1] == BPF_TCP_CLOSE) { + __u32 key = 0; + struct tcpbpf_globals g, *gp; + + gp = bpf_map_lookup_elem(&global_map, &key); + if (!gp) + break; + g = *gp; + g.total_retrans = skops->total_retrans; + g.data_segs_in = skops->data_segs_in; + g.data_segs_out = skops->data_segs_out; + g.bytes_received = skops->bytes_received; + g.bytes_acked = skops->bytes_acked; + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } + break; + default: + rv = -1; + } + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c new file mode 100644 index 000000000000..95a370f3d378 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <assert.h> +#include <linux/perf_event.h> +#include <linux/ptrace.h> +#include <linux/bpf.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include "bpf_util.h" +#include <linux/perf_event.h> +#include "test_tcpbpf.h" + +static int bpf_find_map(const char *test, struct bpf_object *obj, + const char *name) +{ + struct bpf_map *map; + + map = bpf_object__find_map_by_name(obj, name); + if (!map) { + printf("%s:FAIL:map '%s' not found\n", test, name); + return -1; + } + return bpf_map__fd(map); +} + +#define SYSTEM(CMD) \ + do { \ + if (system(CMD)) { \ + printf("system(%s) FAILS!\n", CMD); \ + } \ + } while (0) + +int main(int argc, char **argv) +{ + const char *file = "test_tcpbpf_kern.o"; + struct tcpbpf_globals g = {0}; + int cg_fd, prog_fd, map_fd; + bool debug_flag = false; + int error = EXIT_FAILURE; + struct bpf_object *obj; + char cmd[100], *dir; + struct stat buffer; + __u32 key = 0; + int pid; + int rv; + + if (argc > 1 && strcmp(argv[1], "-d") == 0) + debug_flag = true; + + dir = "/tmp/cgroupv2/foo"; + + if (stat(dir, &buffer) != 0) { + SYSTEM("mkdir -p /tmp/cgroupv2"); + SYSTEM("mount -t cgroup2 none /tmp/cgroupv2"); + SYSTEM("mkdir -p /tmp/cgroupv2/foo"); + } + pid = (int) getpid(); + sprintf(cmd, "echo %d >> /tmp/cgroupv2/foo/cgroup.procs", pid); + SYSTEM(cmd); + + cg_fd = open(dir, O_DIRECTORY, O_RDONLY); + if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { + printf("FAILED: load_bpf_file failed for: %s\n", file); + goto err; + } + + rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (rv) { + printf("FAILED: bpf_prog_attach: %d (%s)\n", + error, strerror(errno)); + goto err; + } + + SYSTEM("./tcp_server.py"); + + map_fd = bpf_find_map(__func__, obj, "global_map"); + if (map_fd < 0) + goto err; + + rv = bpf_map_lookup_elem(map_fd, &key, &g); + if (rv != 0) { + printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); + goto err; + } + + if (g.bytes_received != 501 || g.bytes_acked != 1002 || + g.data_segs_in != 1 || g.data_segs_out != 1 || + (g.event_map ^ 0x47e) != 0 || g.bad_cb_test_rv != 0x80 || + g.good_cb_test_rv != 0) { + printf("FAILED: Wrong stats\n"); + if (debug_flag) { + printf("\n"); + printf("bytes_received: %d (expecting 501)\n", + (int)g.bytes_received); + printf("bytes_acked: %d (expecting 1002)\n", + (int)g.bytes_acked); + printf("data_segs_in: %d (expecting 1)\n", + g.data_segs_in); + printf("data_segs_out: %d (expecting 1)\n", + g.data_segs_out); + printf("event_map: 0x%x (at least 0x47e)\n", + g.event_map); + printf("bad_cb_test_rv: 0x%x (expecting 0x80)\n", + g.bad_cb_test_rv); + printf("good_cb_test_rv:0x%x (expecting 0)\n", + g.good_cb_test_rv); + } + goto err; + } + printf("PASSED!\n"); + error = 0; +err: + bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); + return error; + +} diff --git a/tools/testing/selftests/bpf/test_tracepoint.c b/tools/testing/selftests/bpf/test_tracepoint.c new file mode 100644 index 000000000000..04bf084517e0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tracepoint.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +struct sched_switch_args { + unsigned long long pad; + char prev_comm[16]; + int prev_pid; + int prev_prio; + long long prev_state; + char next_comm[16]; + int next_pid; + int next_prio; +}; + +SEC("tracepoint/sched/sched_switch") +int oncpu(struct sched_switch_args *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 5ed4175c4ff8..c0f16e93f9bd 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2,6 +2,7 @@ * Testsuite for eBPF verifier * * Copyright (c) 2014 PLUMgrid, http://plumgrid.com + * Copyright (c) 2017 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -20,6 +21,7 @@ #include <stddef.h> #include <stdbool.h> #include <sched.h> +#include <limits.h> #include <sys/capability.h> #include <sys/resource.h> @@ -28,6 +30,7 @@ #include <linux/filter.h> #include <linux/bpf_perf_event.h> #include <linux/bpf.h> +#include <linux/if_ether.h> #include <bpf/bpf.h> @@ -48,6 +51,8 @@ #define MAX_INSNS 512 #define MAX_FIXUPS 8 #define MAX_NR_MAPS 4 +#define POINTER_VALUE 0xcafe4all +#define TEST_DATA_LEN 64 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) @@ -61,6 +66,7 @@ struct bpf_test { int fixup_map_in_map[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; + uint32_t retval; enum { UNDEF, ACCEPT, @@ -94,6 +100,326 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = -3, + }, + { + "DIV32 by 0, zero check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "DIV32 by 0, zero check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "DIV64 by 0, zero check", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "MOD32 by 0, zero check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "MOD32 by 0, zero check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "MOD64 by 0, zero check", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "DIV32 by 0, zero check ok, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 2), + BPF_MOV32_IMM(BPF_REG_2, 16), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 8, + }, + { + "DIV32 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV32 by 0, zero check 2, cls", + .insns = { + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 by 0, zero check, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "MOD32 by 0, zero check ok, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 3), + BPF_MOV32_IMM(BPF_REG_2, 5), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, + }, + { + "MOD32 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "MOD32 by 0, zero check 2, cls", + .insns = { + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "MOD64 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, + }, + { + "MOD64 by 0, zero check 2, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, -1), + BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = -1, + }, + /* Just make sure that JITs used udiv/umod as otherwise we get + * an exception from INT_MIN/-1 overflow similarly as with div + * by zero. + */ + { + "DIV32 overflow, check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV32 overflow, check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 overflow, check 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 overflow, check 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), + BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "MOD32 overflow, check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = INT_MIN, + }, + { + "MOD32 overflow, check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = INT_MIN, + }, + { + "MOD64 overflow, check 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "MOD64 overflow, check 2", + .insns = { + BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "xor32 zero extend check", + .insns = { + BPF_MOV32_IMM(BPF_REG_2, -1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32), + BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 0xffff), + BPF_ALU32_REG(BPF_XOR, BPF_REG_2, BPF_REG_2), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "empty prog", + .insns = { + }, + .errstr = "unknown opcode 00", + .result = REJECT, + }, + { + "only exit insn", + .insns = { + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .result = REJECT, }, { "unreachable", @@ -209,6 +535,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 1, }, { "test8 ld_imm64", @@ -280,7 +607,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_ARSH not supported for 32 bit ALU", + .errstr = "unknown opcode c4", }, { "arsh32 on reg", @@ -291,7 +618,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_ARSH not supported for 32 bit ALU", + .errstr = "unknown opcode cc", }, { "arsh64 on imm", @@ -317,7 +644,7 @@ static struct bpf_test tests[] = { .insns = { BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2), }, - .errstr = "jump out of range", + .errstr = "not an exit", .result = REJECT, }, { @@ -407,7 +734,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0), BPF_EXIT_INSN(), }, - .errstr = "BPF_CALL uses reserved", + .errstr = "unknown opcode 8d", .result = REJECT, }, { @@ -516,6 +843,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, + .retval = POINTER_VALUE, }, { "check valid spill/fill, skb mark", @@ -596,7 +924,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(0, 0, 0, 0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_LD_IMM", + .errstr = "unknown opcode 00", .result = REJECT, }, { @@ -614,7 +942,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(-1, 0, 0, 0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_ALU opcode f0", + .errstr = "unknown opcode ff", .result = REJECT, }, { @@ -623,7 +951,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(-1, -1, -1, -1, -1), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_ALU opcode f0", + .errstr = "unknown opcode ff", .result = REJECT, }, { @@ -802,6 +1130,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R1 pointer comparison", .result_unpriv = REJECT, .result = ACCEPT, + .retval = -ENOENT, }, { "jump test 4", @@ -1822,6 +2151,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 0xfaceb00c, }, { "PTR_TO_STACK store/load - bad alignment on off", @@ -1880,6 +2210,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr", + .retval = POINTER_VALUE, }, { "unpriv: add const to pointer", @@ -2053,6 +2384,7 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .result = ACCEPT, @@ -2840,6 +3172,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test12 (and, good access)", @@ -2864,6 +3197,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test13 (branches, good access)", @@ -2894,6 +3228,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)", @@ -2917,6 +3252,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test15 (spill with xadd)", @@ -3203,6 +3539,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test28 (marking on <=, bad access)", @@ -5700,7 +6037,7 @@ static struct bpf_test tests[] = { "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", .insns = { BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), @@ -5822,6 +6159,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 0 /* csum_diff of 64-byte packet */, }, { "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", @@ -5935,7 +6273,7 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), @@ -6190,6 +6528,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .retval = 42 /* ultimate return value */, }, { "ld_ind: check calling conv, r1", @@ -6261,6 +6600,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 1, }, { "check bpf_perf_event_data->sample_period byte load permitted", @@ -7248,6 +7588,7 @@ static struct bpf_test tests[] = { }, .fixup_map1 = { 3 }, .result = ACCEPT, + .retval = POINTER_VALUE, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr as return value" }, @@ -7268,6 +7609,7 @@ static struct bpf_test tests[] = { }, .fixup_map1 = { 3 }, .result = ACCEPT, + .retval = POINTER_VALUE, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr as return value" }, @@ -7434,10 +7776,24 @@ static struct bpf_test tests[] = { }, BPF_EXIT_INSN(), }, - .errstr = "BPF_END uses reserved fields", + .errstr = "unknown opcode d7", .result = REJECT, }, { + "XDP, using ifindex from netdev", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, ingress_ifindex)), + BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 1, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .retval = 1, + }, + { "meta access, test1", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, @@ -7709,6 +8065,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = TEST_DATA_LEN, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -8656,6 +9013,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 1, }, { "check deducing bounds from const, 3", @@ -8826,6 +9184,1959 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, }, + { + "calls: basic sanity", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: not on unpriviledged", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 1, + }, + { + "calls: div by 0 in subprog", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(BPF_REG_2, 0), + BPF_MOV32_IMM(BPF_REG_3, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_3, BPF_REG_2), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "calls: multiple ret types in subprog 1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + }, + { + "calls: multiple ret types in subprog 2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, + offsetof(struct __sk_buff, data)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 16 }, + .result = REJECT, + .errstr = "R0 min value is outside of the array range", + }, + { + "calls: overlapping caller/callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn is not an exit or jmp", + .result = REJECT, + }, + { + "calls: wrong recursive calls", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: wrong src reg", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "BPF_CALL uses reserved fields", + .result = REJECT, + }, + { + "calls: wrong off value", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, -1, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "BPF_CALL uses reserved fields", + .result = REJECT, + }, + { + "calls: jump back loop", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn 0 to 0", + .result = REJECT, + }, + { + "calls: conditional call", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: conditional call 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: conditional call 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: conditional call 4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -5), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: conditional call 5", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: conditional call 6", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: using r0 returned by callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: using uninit r0 from callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "calls: callee is using r1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_ACT, + .result = ACCEPT, + .retval = TEST_DATA_LEN, + }, + { + "calls: callee using args1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = POINTER_VALUE, + }, + { + "calls: callee using wrong args2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "R2 !read_ok", + .result = REJECT, + }, + { + "calls: callee using two args", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN, + }, + { + "calls: callee changing pkt pointers", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_8, BPF_REG_7, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + /* clear_all_pkt_pointers() has to walk all frames + * to make sure that pkt pointers in the caller + * are cleared when callee is calling a helper that + * adjusts packet size + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_xdp_adjust_head), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R6 invalid mem access 'inv'", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "calls: two calls with args", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = TEST_DATA_LEN + TEST_DATA_LEN, + }, + { + "calls: calls with stack arith", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 42, + }, + { + "calls: calls with misaligned stack access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -61), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, + .errstr = "misaligned stack access", + .result = REJECT, + }, + { + "calls: calls control flow, jump test", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 43), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 43, + }, + { + "calls: calls control flow, jump test 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 43), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "jump out of range from insn 1 to 4", + .result = REJECT, + }, + { + "calls: two calls with bad jump", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range from insn 11 to 9", + .result = REJECT, + }, + { + "calls: recursive call. test1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge", + .result = REJECT, + }, + { + "calls: recursive call. test2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge", + .result = REJECT, + }, + { + "calls: unreachable code", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "unreachable insn 6", + .result = REJECT, + }, + { + "calls: invalid call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -4), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "invalid destination", + .result = REJECT, + }, + { + "calls: invalid call 2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0x7fffffff), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "invalid destination", + .result = REJECT, + }, + { + "calls: jumping across function bodies. test1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: jumping across function bodies. test2", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: call without exit", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "not an exit", + .result = REJECT, + }, + { + "calls: call into middle of ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn", + .result = REJECT, + }, + { + "calls: call into middle of other call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn", + .result = REJECT, + }, + { + "calls: ld_abs with changing ctx data in callee", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_vlan_push), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed", + .result = REJECT, + }, + { + "calls: two calls with bad fallthrough", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "not an exit", + .result = REJECT, + }, + { + "calls: two calls with stack read", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: two calls with stack write", + .insns = { + /* main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 7), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_8), + /* write into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* read from stack frame of main prog */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: stack overflow using two frames (pre-call access)", + .insns = { + /* prog 1 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* prog 2 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "combined stack size", + .result = REJECT, + }, + { + "calls: stack overflow using two frames (post-call access)", + .insns = { + /* prog 1 */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + + /* prog 2 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "combined stack size", + .result = REJECT, + }, + { + "calls: stack depth check using three frames. test1", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=32, stack_A=256, stack_B=64 + * and max(main+A, main+A+B) < 512 + */ + .result = ACCEPT, + }, + { + "calls: stack depth check using three frames. test2", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=32, stack_A=64, stack_B=256 + * and max(main+A, main+A+B) < 512 + */ + .result = ACCEPT, + }, + { + "calls: stack depth check using three frames. test3", + .insns = { + /* main */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 8), /* call B */ + BPF_JMP_IMM(BPF_JGE, BPF_REG_6, 0, 1), + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 10, 1), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_B, BPF_REG_10, -224, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + /* B */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -6), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=64, stack_A=224, stack_B=256 + * and max(main+A, main+A+B) > 512 + */ + .errstr = "combined stack", + .result = REJECT, + }, + { + "calls: stack depth check using three frames. test4", + /* void main(void) { + * func1(0); + * func1(1); + * func2(1); + * } + * void func1(int alloc_or_recurse) { + * if (alloc_or_recurse) { + * frame_pointer[-300] = 1; + * } else { + * func2(alloc_or_recurse); + * } + * } + * void func2(int alloc_or_recurse) { + * if (alloc_or_recurse) { + * frame_pointer[-300] = 1; + * } + * } + */ + .insns = { + /* main */ + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 7), /* call B */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ + BPF_EXIT_INSN(), + /* B */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = REJECT, + .errstr = "combined stack", + }, + { + "calls: stack depth check using three frames. test5", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */ + BPF_EXIT_INSN(), + /* A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */ + BPF_EXIT_INSN(), + /* C */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */ + BPF_EXIT_INSN(), + /* D */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */ + BPF_EXIT_INSN(), + /* E */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */ + BPF_EXIT_INSN(), + /* F */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */ + BPF_EXIT_INSN(), + /* G */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */ + BPF_EXIT_INSN(), + /* H */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "call stack", + .result = REJECT, + }, + { + "calls: spill into caller stack frame", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "cannot spill", + .result = REJECT, + }, + { + "calls: write into caller stack frame", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + .retval = 42, + }, + { + "calls: write into callee stack frame", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "cannot return stack pointer", + .result = REJECT, + }, + { + "calls: two calls with stack write and void return", + .insns = { + /* main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* write into stack frame of main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), /* void return */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: ambiguous return value", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .errstr = "R0 !read_ok", + .result = REJECT, + }, + { + "calls: two calls that return map_value", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = ACCEPT, + }, + { + "calls: two calls that return map_value with bool condition", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), /* return 1 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = ACCEPT, + }, + { + "calls: two calls that return map_value with incorrect bool check", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), /* return 1 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = REJECT, + .errstr = "invalid read from stack off -16+0 size 8", + }, + { + "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=2 size=8", + }, + { + "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = ACCEPT, + }, + { + "calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), // 26 + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), // 30 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), // 34 + BPF_JMP_IMM(BPF_JA, 0, 0, -30), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -8), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=2 size=8", + }, + { + "calls: two calls that receive map_value_ptr_or_null via arg. test1", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = ACCEPT, + }, + { + "calls: two calls that receive map_value_ptr_or_null via arg. test2", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 0 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 0, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + }, + { + "calls: pkt_ptr spill into caller stack", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = POINTER_VALUE, + }, + { + "calls: pkt_ptr spill into caller stack 2", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + /* Marking is still kept, but not in all cases safe. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid access to packet", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 3", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* Marking is still kept and safe here. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "calls: pkt_ptr spill into caller stack 4", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* Check marking propagated. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "calls: pkt_ptr spill into caller stack 5", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "same insn cannot be used with different", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R4 invalid mem access", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R4 invalid mem access", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 8", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: pkt_ptr spill into caller stack 9", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid access to packet", + .result = REJECT, + }, + { + "calls: caller stack init to zero or map_value_or_null", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + /* fetch map_value_or_null or const_zero from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* store into map_value */ + BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* if (ctx == 0) return; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8), + /* else bpf_map_lookup() and *(fp - 8) = r0 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 13 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "calls: stack init to zero and pruning", + .insns = { + /* first make allocated_stack 16 byte */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + /* now fork the execution such that the false branch + * of JGT insn will be verified second and it skisp zero + * init of fp-8 stack slot. If stack liveness marking + * is missing live_read marks from call map_lookup + * processing then pruning will incorrectly assume + * that fp-8 stack slot was unused in the fall-through + * branch and will accept the program incorrectly + */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 6 }, + .errstr = "invalid indirect read from stack off -8+0 size 8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "search pruning: all branches should be verified (nop operation)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_A(1), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0, 2), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "R6 invalid mem access 'inv'", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "search pruning: all branches should be verified (invalid stack access)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), + BPF_JMP_A(1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -24), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "invalid read from stack off -16+0 size 8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -8937,10 +11248,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int fd_prog, expected_ret, reject_from_alignment; struct bpf_insn *prog = test->insns; int prog_len = probe_filter_length(prog); + char data_in[TEST_DATA_LEN] = {}; int prog_type = test->prog_type; int map_fds[MAX_NR_MAPS]; const char *expected_err; - int i; + uint32_t retval; + int i, err; for (i = 0; i < MAX_NR_MAPS; i++) map_fds[i] = -1; @@ -8983,6 +11296,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv, } } + if (fd_prog >= 0) { + err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in), + NULL, NULL, &retval, NULL); + if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { + printf("Unexpected bpf_prog_test_run error\n"); + goto fail_log; + } + if (!err && retval != test->retval && + test->retval != POINTER_VALUE) { + printf("FAIL retval %d != %d\n", retval, test->retval); + goto fail_log; + } + } (*passes)++; printf("OK%s\n", reject_from_alignment ? " (NOTE: reject due to unknown alignment)" : ""); diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh index 307aa856cee3..637fcf4fe4e3 100755 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ b/tools/testing/selftests/bpf/test_xdp_meta.sh @@ -9,6 +9,7 @@ cleanup() fi set +e + ip link del veth1 2> /dev/null ip netns del ns1 2> /dev/null ip netns del ns2 2> /dev/null } diff --git a/tools/testing/selftests/bpf/test_xdp_noinline.c b/tools/testing/selftests/bpf/test_xdp_noinline.c new file mode 100644 index 000000000000..5e4aac74f9d0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_noinline.c @@ -0,0 +1,833 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include "bpf_helpers.h" + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +static __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static __attribute__ ((noinline)) +u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static __attribute__ ((noinline)) +u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static __attribute__ ((noinline)) +u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +struct flow_key { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; +}; + +struct packet_description { + struct flow_key flow; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_definition { + union { + __be32 vip; + __be32 vipv6[4]; + }; + __u16 port; + __u16 family; + __u8 proto; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_pos_lru { + __u32 pos; + __u64 atime; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct lb_stats { + __u64 v2; + __u64 v1; +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip_definition), + .value_size = sizeof(struct vip_meta), + .max_entries = 512, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = { + .type = BPF_MAP_TYPE_LRU_HASH, + .key_size = sizeof(struct flow_key), + .value_size = sizeof(struct real_pos_lru), + .max_entries = 300, + .map_flags = 1U << 1, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 12 * 655, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) reals = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct real_definition), + .max_entries = 40, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) stats = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct lb_stats), + .max_entries = 515, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = 16, + .map_flags = 0, +}; + +struct eth_hdr { + unsigned char eth_dest[6]; + unsigned char eth_source[6]; + unsigned short eth_proto; +}; + +static inline __u64 calc_offset(bool is_ipv6, bool is_icmp) +{ + __u64 off = sizeof(struct eth_hdr); + if (is_ipv6) { + off += sizeof(struct ipv6hdr); + if (is_icmp) + off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr); + } else { + off += sizeof(struct iphdr); + if (is_icmp) + off += sizeof(struct icmphdr) + sizeof(struct iphdr); + } + return off; +} + +static __attribute__ ((noinline)) +bool parse_udp(void *data, void *data_end, + bool is_ipv6, struct packet_description *pckt) +{ + + bool is_icmp = !((pckt->flags & (1 << 0)) == 0); + __u64 off = calc_offset(is_ipv6, is_icmp); + struct udphdr *udp; + udp = data + off; + + if (udp + 1 > data_end) + return 0; + if (!is_icmp) { + pckt->flow.port16[0] = udp->source; + pckt->flow.port16[1] = udp->dest; + } else { + pckt->flow.port16[0] = udp->dest; + pckt->flow.port16[1] = udp->source; + } + return 1; +} + +static __attribute__ ((noinline)) +bool parse_tcp(void *data, void *data_end, + bool is_ipv6, struct packet_description *pckt) +{ + + bool is_icmp = !((pckt->flags & (1 << 0)) == 0); + __u64 off = calc_offset(is_ipv6, is_icmp); + struct tcphdr *tcp; + + tcp = data + off; + if (tcp + 1 > data_end) + return 0; + if (tcp->syn) + pckt->flags |= (1 << 1); + if (!is_icmp) { + pckt->flow.port16[0] = tcp->source; + pckt->flow.port16[1] = tcp->dest; + } else { + pckt->flow.port16[0] = tcp->dest; + pckt->flow.port16[1] = tcp->source; + } + return 1; +} + +static __attribute__ ((noinline)) +bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, + struct packet_description *pckt, + struct real_definition *dst, __u32 pkt_bytes) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + struct ipv6hdr *ip6h; + __u32 ip_suffix; + void *data_end; + void *data; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) + return 0; + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + new_eth = data; + ip6h = data + sizeof(struct eth_hdr); + old_eth = data + sizeof(struct ipv6hdr); + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || ip6h + 1 > data_end) + return 0; + memcpy(new_eth->eth_dest, cval->mac, 6); + memcpy(new_eth->eth_source, old_eth->eth_dest, 6); + new_eth->eth_proto = 56710; + ip6h->version = 6; + ip6h->priority = 0; + memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); + + ip6h->nexthdr = IPPROTO_IPV6; + ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0]; + ip6h->payload_len = + __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr)); + ip6h->hop_limit = 4; + + ip6h->saddr.in6_u.u6_addr32[0] = 1; + ip6h->saddr.in6_u.u6_addr32[1] = 2; + ip6h->saddr.in6_u.u6_addr32[2] = 3; + ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix; + memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16); + return 1; +} + +static __attribute__ ((noinline)) +bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, + struct packet_description *pckt, + struct real_definition *dst, __u32 pkt_bytes) +{ + + __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]); + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + __u16 *next_iph_u16; + struct iphdr *iph; + __u32 csum = 0; + void *data_end; + void *data; + + ip_suffix <<= 15; + ip_suffix ^= pckt->flow.src; + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) + return 0; + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + new_eth = data; + iph = data + sizeof(struct eth_hdr); + old_eth = data + sizeof(struct iphdr); + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || iph + 1 > data_end) + return 0; + memcpy(new_eth->eth_dest, cval->mac, 6); + memcpy(new_eth->eth_source, old_eth->eth_dest, 6); + new_eth->eth_proto = 8; + iph->version = 4; + iph->ihl = 5; + iph->frag_off = 0; + iph->protocol = IPPROTO_IPIP; + iph->check = 0; + iph->tos = 1; + iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr)); + /* don't update iph->daddr, since it will overwrite old eth_proto + * and multiple iterations of bpf_prog_run() will fail + */ + + iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst; + iph->ttl = 4; + + next_iph_u16 = (__u16 *) iph; +#pragma clang loop unroll(full) + for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_iph_u16++; + iph->check = ~((csum & 0xffff) + (csum >> 16)); + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) + return 0; + return 1; +} + +static __attribute__ ((noinline)) +bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + + old_eth = *data; + new_eth = *data + sizeof(struct ipv6hdr); + memcpy(new_eth->eth_source, old_eth->eth_source, 6); + memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); + if (inner_v4) + new_eth->eth_proto = 8; + else + new_eth->eth_proto = 56710; + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr))) + return 0; + *data = (void *)(long)xdp->data; + *data_end = (void *)(long)xdp->data_end; + return 1; +} + +static __attribute__ ((noinline)) +bool decap_v4(struct xdp_md *xdp, void **data, void **data_end) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + + old_eth = *data; + new_eth = *data + sizeof(struct iphdr); + memcpy(new_eth->eth_source, old_eth->eth_source, 6); + memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); + new_eth->eth_proto = 8; + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) + return 0; + *data = (void *)(long)xdp->data; + *data_end = (void *)(long)xdp->data_end; + return 1; +} + +static __attribute__ ((noinline)) +int swap_mac_and_send(void *data, void *data_end) +{ + unsigned char tmp_mac[6]; + struct eth_hdr *eth; + + eth = data; + memcpy(tmp_mac, eth->eth_source, 6); + memcpy(eth->eth_source, eth->eth_dest, 6); + memcpy(eth->eth_dest, tmp_mac, 6); + return XDP_TX; +} + +static __attribute__ ((noinline)) +int send_icmp_reply(void *data, void *data_end) +{ + struct icmphdr *icmp_hdr; + __u16 *next_iph_u16; + __u32 tmp_addr = 0; + struct iphdr *iph; + __u32 csum1 = 0; + __u32 csum = 0; + __u64 off = 0; + + if (data + sizeof(struct eth_hdr) + + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end) + return XDP_DROP; + off += sizeof(struct eth_hdr); + iph = data + off; + off += sizeof(struct iphdr); + icmp_hdr = data + off; + icmp_hdr->type = 0; + icmp_hdr->checksum += 0x0007; + iph->ttl = 4; + tmp_addr = iph->daddr; + iph->daddr = iph->saddr; + iph->saddr = tmp_addr; + iph->check = 0; + next_iph_u16 = (__u16 *) iph; +#pragma clang loop unroll(full) + for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_iph_u16++; + iph->check = ~((csum & 0xffff) + (csum >> 16)); + return swap_mac_and_send(data, data_end); +} + +static __attribute__ ((noinline)) +int send_icmp6_reply(void *data, void *data_end) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + __be32 tmp_addr[4]; + __u64 off = 0; + + if (data + sizeof(struct eth_hdr) + + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end) + return XDP_DROP; + off += sizeof(struct eth_hdr); + ip6h = data + off; + off += sizeof(struct ipv6hdr); + icmp_hdr = data + off; + icmp_hdr->icmp6_type = 129; + icmp_hdr->icmp6_cksum -= 0x0001; + ip6h->hop_limit = 4; + memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16); + memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16); + memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16); + return swap_mac_and_send(data, data_end); +} + +static __attribute__ ((noinline)) +int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return XDP_DROP; + if (icmp_hdr->icmp6_type == 128) + return send_icmp6_reply(data, data_end); + if (icmp_hdr->icmp6_type != 3) + return XDP_PASS; + off += sizeof(struct icmp6hdr); + ip6h = data + off; + if (ip6h + 1 > data_end) + return XDP_DROP; + pckt->flow.proto = ip6h->nexthdr; + pckt->flags |= (1 << 0); + memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16); + memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16); + return -1; +} + +static __attribute__ ((noinline)) +int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return XDP_DROP; + if (icmp_hdr->type == 8) + return send_icmp_reply(data, data_end); + if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4)) + return XDP_PASS; + off += sizeof(struct icmphdr); + iph = data + off; + if (iph + 1 > data_end) + return XDP_DROP; + if (iph->ihl != 5) + return XDP_DROP; + pckt->flow.proto = iph->protocol; + pckt->flags |= (1 << 0); + pckt->flow.src = iph->daddr; + pckt->flow.dst = iph->saddr; + return -1; +} + +static __attribute__ ((noinline)) +__u32 get_packet_hash(struct packet_description *pckt, + bool hash_16bytes) +{ + if (hash_16bytes) + return jhash_2words(jhash(pckt->flow.srcv6, 16, 12), + pckt->flow.ports, 24); + else + return jhash_2words(pckt->flow.src, pckt->flow.ports, + 24); +} + +__attribute__ ((noinline)) +static bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6, void *lru_map) +{ + struct real_pos_lru new_dst_lru = { }; + bool hash_16bytes = is_ipv6; + __u32 *real_pos, hash, key; + __u64 cur_time; + + if (vip_info->flags & (1 << 2)) + hash_16bytes = 1; + if (vip_info->flags & (1 << 3)) { + pckt->flow.port16[0] = pckt->flow.port16[1]; + memset(pckt->flow.srcv6, 0, 16); + } + hash = get_packet_hash(pckt, hash_16bytes); + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) + return 0; + key = 2 * vip_info->vip_num + hash % 2; + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return 0; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return 0; + if (!(vip_info->flags & (1 << 1))) { + __u32 conn_rate_key = 512 + 2; + struct lb_stats *conn_rate_stats = + bpf_map_lookup_elem(&stats, &conn_rate_key); + + if (!conn_rate_stats) + return 1; + cur_time = bpf_ktime_get_ns(); + if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) { + conn_rate_stats->v1 = 1; + conn_rate_stats->v2 = cur_time; + } else { + conn_rate_stats->v1 += 1; + if (conn_rate_stats->v1 >= 1) + return 1; + } + if (pckt->flow.proto == IPPROTO_UDP) + new_dst_lru.atime = cur_time; + new_dst_lru.pos = key; + bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0); + } + return 1; +} + +__attribute__ ((noinline)) +static void connection_table_lookup(struct real_definition **real, + struct packet_description *pckt, + void *lru_map) +{ + + struct real_pos_lru *dst_lru; + __u64 cur_time; + __u32 key; + + dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow); + if (!dst_lru) + return; + if (pckt->flow.proto == IPPROTO_UDP) { + cur_time = bpf_ktime_get_ns(); + if (cur_time - dst_lru->atime > 300000) + return; + dst_lru->atime = cur_time; + } + key = dst_lru->pos; + *real = bpf_map_lookup_elem(&reals, &key); +} + +/* don't believe your eyes! + * below function has 6 arguments whereas bpf and llvm allow maximum of 5 + * but since it's _static_ llvm can optimize one argument away + */ +__attribute__ ((noinline)) +static int process_l3_headers_v6(struct packet_description *pckt, + __u8 *protocol, __u64 off, + __u16 *pkt_bytes, void *data, + void *data_end) +{ + struct ipv6hdr *ip6h; + __u64 iph_len; + int action; + + ip6h = data + off; + if (ip6h + 1 > data_end) + return XDP_DROP; + iph_len = sizeof(struct ipv6hdr); + *protocol = ip6h->nexthdr; + pckt->flow.proto = *protocol; + *pkt_bytes = __builtin_bswap16(ip6h->payload_len); + off += iph_len; + if (*protocol == 45) { + return XDP_DROP; + } else if (*protocol == 59) { + action = parse_icmpv6(data, data_end, off, pckt); + if (action >= 0) + return action; + } else { + memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16); + memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16); + } + return -1; +} + +__attribute__ ((noinline)) +static int process_l3_headers_v4(struct packet_description *pckt, + __u8 *protocol, __u64 off, + __u16 *pkt_bytes, void *data, + void *data_end) +{ + struct iphdr *iph; + __u64 iph_len; + int action; + + iph = data + off; + if (iph + 1 > data_end) + return XDP_DROP; + if (iph->ihl != 5) + return XDP_DROP; + *protocol = iph->protocol; + pckt->flow.proto = *protocol; + *pkt_bytes = __builtin_bswap16(iph->tot_len); + off += 20; + if (iph->frag_off & 65343) + return XDP_DROP; + if (*protocol == IPPROTO_ICMP) { + action = parse_icmp(data, data_end, off, pckt); + if (action >= 0) + return action; + } else { + pckt->flow.src = iph->saddr; + pckt->flow.dst = iph->daddr; + } + return -1; +} + +__attribute__ ((noinline)) +static int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct xdp_md *xdp) +{ + + struct real_definition *dst = NULL; + struct packet_description pckt = { }; + struct vip_definition vip = { }; + struct lb_stats *data_stats; + struct eth_hdr *eth = data; + void *lru_map = &lru_cache; + struct vip_meta *vip_info; + __u32 lru_stats_key = 513; + __u32 mac_addr_pos = 0; + __u32 stats_key = 512; + struct ctl_value *cval; + __u16 pkt_bytes; + __u64 iph_len; + __u8 protocol; + __u32 vip_num; + int action; + + if (is_ipv6) + action = process_l3_headers_v6(&pckt, &protocol, off, + &pkt_bytes, data, data_end); + else + action = process_l3_headers_v4(&pckt, &protocol, off, + &pkt_bytes, data, data_end); + if (action >= 0) + return action; + protocol = pckt.flow.proto; + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(data, data_end, is_ipv6, &pckt)) + return XDP_DROP; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(data, data_end, is_ipv6, &pckt)) + return XDP_DROP; + } else { + return XDP_TX; + } + + if (is_ipv6) + memcpy(vip.vipv6, pckt.flow.dstv6, 16); + else + vip.vip = pckt.flow.dst; + vip.port = pckt.flow.port16[1]; + vip.proto = pckt.flow.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.port = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return XDP_PASS; + if (!(vip_info->flags & (1 << 4))) + pckt.flow.port16[1] = 0; + } + if (data_end - data > 1400) + return XDP_DROP; + data_stats = bpf_map_lookup_elem(&stats, &stats_key); + if (!data_stats) + return XDP_DROP; + data_stats->v1 += 1; + if (!dst) { + if (vip_info->flags & (1 << 0)) + pckt.flow.port16[0] = 0; + if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1))) + connection_table_lookup(&dst, &pckt, lru_map); + if (dst) + goto out; + if (pckt.flow.proto == IPPROTO_TCP) { + struct lb_stats *lru_stats = + bpf_map_lookup_elem(&stats, &lru_stats_key); + + if (!lru_stats) + return XDP_DROP; + if (pckt.flags & (1 << 1)) + lru_stats->v1 += 1; + else + lru_stats->v2 += 1; + } + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map)) + return XDP_DROP; + data_stats->v2 += 1; + } +out: + cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos); + if (!cval) + return XDP_DROP; + if (dst->flags & (1 << 0)) { + if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes)) + return XDP_DROP; + } else { + if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes)) + return XDP_DROP; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return XDP_DROP; + data_stats->v1 += 1; + data_stats->v2 += pkt_bytes; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + if (data + 4 > data_end) + return XDP_DROP; + *(u32 *)data = dst->dst; + return XDP_DROP; +} + +__attribute__ ((section("xdp-test"), used)) +int balancer_ingress(struct xdp_md *ctx) +{ + void *data = (void *)(long)ctx->data; + void *data_end = (void *)(long)ctx->data_end; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return XDP_DROP; + eth_proto = eth->eth_proto; + if (eth_proto == 8) + return process_packet(data, nh_off, data_end, 0, ctx); + else if (eth_proto == 56710) + return process_packet(data, nh_off, data_end, 1, ctx); + else + return XDP_DROP; +} + +char _license[] __attribute__ ((section("license"), used)) = "GPL"; +int _version __attribute__ ((section("version"), used)) = 1; diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh index 344a3656dea6..c4b17e08d431 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh @@ -19,6 +19,8 @@ cleanup() fi set +e + ip link del veth1 2> /dev/null + ip link del veth2 2> /dev/null ip netns del ns1 2> /dev/null ip netns del ns2 2> /dev/null } diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh index 34a42c68ebfb..722cad91df74 100755 --- a/tools/testing/selftests/firmware/fw_fallback.sh +++ b/tools/testing/selftests/firmware/fw_fallback.sh @@ -175,96 +175,118 @@ trap "test_finish" EXIT echo "ABCD0123" >"$FW" NAME=$(basename "$FW") -DEVPATH="$DIR"/"nope-$NAME"/loading - -# Test failure when doing nothing (timeout works). -echo -n 2 >/sys/class/firmware/timeout -echo -n "nope-$NAME" >"$DIR"/trigger_request 2>/dev/null & - -# Give the kernel some time to load the loading file, must be less -# than the timeout above. -sleep 1 -if [ ! -f $DEVPATH ]; then - echo "$0: fallback mechanism immediately cancelled" - echo "" - echo "The file never appeared: $DEVPATH" - echo "" - echo "This might be a distribution udev rule setup by your distribution" - echo "to immediately cancel all fallback requests, this must be" - echo "removed before running these tests. To confirm look for" - echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules" - echo "and see if you have something like this:" - echo "" - echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\"" - echo "" - echo "If you do remove this file or comment out this line before" - echo "proceeding with these tests." - exit 1 -fi - -if diff -q "$FW" /dev/test_firmware >/dev/null ; then - echo "$0: firmware was not expected to match" >&2 - exit 1 -else - echo "$0: timeout works" -fi - -# Put timeout high enough for us to do work but not so long that failures -# slow down this test too much. -echo 4 >/sys/class/firmware/timeout +test_syfs_timeout() +{ + DEVPATH="$DIR"/"nope-$NAME"/loading + + # Test failure when doing nothing (timeout works). + echo -n 2 >/sys/class/firmware/timeout + echo -n "nope-$NAME" >"$DIR"/trigger_request 2>/dev/null & + + # Give the kernel some time to load the loading file, must be less + # than the timeout above. + sleep 1 + if [ ! -f $DEVPATH ]; then + echo "$0: fallback mechanism immediately cancelled" + echo "" + echo "The file never appeared: $DEVPATH" + echo "" + echo "This might be a distribution udev rule setup by your distribution" + echo "to immediately cancel all fallback requests, this must be" + echo "removed before running these tests. To confirm look for" + echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules" + echo "and see if you have something like this:" + echo "" + echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\"" + echo "" + echo "If you do remove this file or comment out this line before" + echo "proceeding with these tests." + exit 1 + fi -# Load this script instead of the desired firmware. -load_fw "$NAME" "$0" -if diff -q "$FW" /dev/test_firmware >/dev/null ; then - echo "$0: firmware was not expected to match" >&2 - exit 1 -else - echo "$0: firmware comparison works" -fi + if diff -q "$FW" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was not expected to match" >&2 + exit 1 + else + echo "$0: timeout works" + fi +} -# Do a proper load, which should work correctly. -load_fw "$NAME" "$FW" -if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then - echo "$0: firmware was not loaded" >&2 - exit 1 -else - echo "$0: fallback mechanism works" -fi +run_sysfs_main_tests() +{ + test_syfs_timeout + # Put timeout high enough for us to do work but not so long that failures + # slow down this test too much. + echo 4 >/sys/class/firmware/timeout -load_fw_cancel "nope-$NAME" "$FW" -if diff -q "$FW" /dev/test_firmware >/dev/null ; then - echo "$0: firmware was expected to be cancelled" >&2 - exit 1 -else - echo "$0: cancelling fallback mechanism works" -fi + # Load this script instead of the desired firmware. + load_fw "$NAME" "$0" + if diff -q "$FW" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was not expected to match" >&2 + exit 1 + else + echo "$0: firmware comparison works" + fi -if load_fw_custom "$NAME" "$FW" ; then + # Do a proper load, which should work correctly. + load_fw "$NAME" "$FW" if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then echo "$0: firmware was not loaded" >&2 exit 1 else - echo "$0: custom fallback loading mechanism works" + echo "$0: fallback mechanism works" fi -fi -if load_fw_custom_cancel "nope-$NAME" "$FW" ; then + load_fw_cancel "nope-$NAME" "$FW" if diff -q "$FW" /dev/test_firmware >/dev/null ; then echo "$0: firmware was expected to be cancelled" >&2 exit 1 else - echo "$0: cancelling custom fallback mechanism works" + echo "$0: cancelling fallback mechanism works" fi -fi -set +e -load_fw_fallback_with_child "nope-signal-$NAME" "$FW" -if [ "$?" -eq 0 ]; then - echo "$0: SIGCHLD on sync ignored as expected" >&2 -else - echo "$0: error - sync firmware request cancelled due to SIGCHLD" >&2 - exit 1 -fi -set -e + set +e + load_fw_fallback_with_child "nope-signal-$NAME" "$FW" + if [ "$?" -eq 0 ]; then + echo "$0: SIGCHLD on sync ignored as expected" >&2 + else + echo "$0: error - sync firmware request cancelled due to SIGCHLD" >&2 + exit 1 + fi + set -e +} + +run_sysfs_custom_load_tests() +{ + if load_fw_custom "$NAME" "$FW" ; then + if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was not loaded" >&2 + exit 1 + else + echo "$0: custom fallback loading mechanism works" + fi + fi + + if load_fw_custom "$NAME" "$FW" ; then + if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was not loaded" >&2 + exit 1 + else + echo "$0: custom fallback loading mechanism works" + fi + fi + + if load_fw_custom_cancel "nope-$NAME" "$FW" ; then + if diff -q "$FW" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was expected to be cancelled" >&2 + exit 1 + else + echo "$0: cancelling custom fallback mechanism works" + fi + fi +} + +run_sysfs_main_tests +run_sysfs_custom_load_tests exit 0 diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index b1f20fef36c7..f9508e1a4058 100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh @@ -45,7 +45,10 @@ test_finish() if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout fi - echo -n "$OLD_PATH" >/sys/module/firmware_class/parameters/path + if [ "$OLD_FWPATH" = "" ]; then + OLD_FWPATH=" " + fi + echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path rm -f "$FW" rmdir "$FWPATH" } diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc index 589d52b211b7..27a54a17da65 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc @@ -29,6 +29,12 @@ ftrace_filter_check '*schedule*' '^.*schedule.*$' # filter by *, end match ftrace_filter_check 'schedule*' '^schedule.*$' +# filter by *mid*end +ftrace_filter_check '*aw*lock' '.*aw.*lock$' + +# filter by start*mid* +ftrace_filter_check 'mutex*try*' '^mutex.*try.*' + # Advanced full-glob matching feature is recently supported. # Skip the tests if we are sure the kernel does not support it. if grep -q 'accepts: .* glob-matching-pattern' README ; then diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc index 0f3f92622e33..68e7a48f5828 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc @@ -128,6 +128,43 @@ if check_set_ftrace_filter "$FUNC1" "$FUNC2" ; then fail "Expected $FUNC1 and $FUNC2" fi +test_actual() { # Compares $TMPDIR/expected with set_ftrace_filter + cat set_ftrace_filter | grep -v '#' | cut -d' ' -f1 | cut -d':' -f1 | sort -u > $TMPDIR/actual + DIFF=`diff $TMPDIR/actual $TMPDIR/expected` + test -z "$DIFF" +} + +# Set traceoff trigger for all fuctions with "lock" in their name +cat available_filter_functions | cut -d' ' -f1 | grep 'lock' | sort -u > $TMPDIR/expected +echo '*lock*:traceoff' > set_ftrace_filter +test_actual + +# now remove all with 'try' in it, and end with lock +grep -v 'try.*lock$' $TMPDIR/expected > $TMPDIR/expected2 +mv $TMPDIR/expected2 $TMPDIR/expected +echo '!*try*lock:traceoff' >> set_ftrace_filter +test_actual + +# remove all that start with "m" and end with "lock" +grep -v '^m.*lock$' $TMPDIR/expected > $TMPDIR/expected2 +mv $TMPDIR/expected2 $TMPDIR/expected +echo '!m*lock:traceoff' >> set_ftrace_filter +test_actual + +# remove all that start with "c" and have "unlock" +grep -v '^c.*unlock' $TMPDIR/expected > $TMPDIR/expected2 +mv $TMPDIR/expected2 $TMPDIR/expected +echo '!c*unlock*:traceoff' >> set_ftrace_filter +test_actual + +# clear all the rest +> $TMPDIR/expected +echo '!*:traceoff' >> set_ftrace_filter +test_actual + +rm $TMPDIR/expected +rm $TMPDIR/actual + do_reset exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index f2019b37370d..df3dd7fe5f9b 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -37,17 +37,21 @@ reset_ftrace_filter() { # reset all triggers in set_ftrace_filter if [ "$tr" = "" ]; then continue fi + if ! grep -q "$t" set_ftrace_filter; then + continue; + fi + name=`echo $t | cut -d: -f1 | cut -d' ' -f1` if [ $tr = "enable_event" -o $tr = "disable_event" ]; then - tr=`echo $t | cut -d: -f1-4` + tr=`echo $t | cut -d: -f2-4` limit=`echo $t | cut -d: -f5` else - tr=`echo $t | cut -d: -f1-2` + tr=`echo $t | cut -d: -f2` limit=`echo $t | cut -d: -f3` fi if [ "$limit" != "unlimited" ]; then tr="$tr:$limit" fi - echo "!$tr" > set_ftrace_filter + echo "!$name:$tr" > set_ftrace_filter done } diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc index bb16cf91f1b5..ce361b9d62cf 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc @@ -12,9 +12,24 @@ case `uname -m` in *) OFFS=0;; esac -echo "Setup up to 256 kprobes" -grep t /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \ -head -n 256 | while read i; do echo p ${i}+${OFFS} ; done > kprobe_events ||: +if [ -d events/kprobes ]; then + echo 0 > events/kprobes/enable + echo > kprobe_events +fi + +N=0 +echo "Setup up kprobes on first available 256 text symbols" +grep -i " t " /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \ +while read i; do + echo p ${i}+${OFFS} >> kprobe_events && N=$((N+1)) ||: + test $N -eq 256 && break +done + +L=`wc -l kprobe_events` +if [ $L -ne $N ]; then + echo "The number of kprobes events ($L) is not $N" + exit_fail +fi echo 1 > events/kprobes/enable echo 0 > events/kprobes/enable diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh index 17d5bd0c0936..a27e2eec3586 100755 --- a/tools/testing/selftests/gen_kselftest_tar.sh +++ b/tools/testing/selftests/gen_kselftest_tar.sh @@ -1,13 +1,11 @@ #!/bin/bash # +# SPDX-License-Identifier: GPL-2.0 # gen_kselftest_tar # Generate kselftest tarball # Author: Shuah Khan <shuahkh@osg.samsung.com> # Copyright (C) 2015 Samsung Electronics Co., Ltd. -# This software may be freely redistributed under the terms of the GNU -# General Public License (GPLv2). - # main main() { diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 1ae565ed9bf0..1a52b03962a3 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * kselftest.h: kselftest framework return codes to include from * selftests. @@ -5,7 +6,6 @@ * Copyright (c) 2014 Shuah Khan <shuahkh@osg.samsung.com> * Copyright (c) 2014 Samsung Electronics Co., Ltd. * - * This file is released under the GPLv2. */ #ifndef __KSELFTEST_H #define __KSELFTEST_H diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh index 1555fbdb08da..ec304463883c 100755 --- a/tools/testing/selftests/kselftest_install.sh +++ b/tools/testing/selftests/kselftest_install.sh @@ -1,13 +1,11 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 # # Kselftest Install # Install kselftest tests # Author: Shuah Khan <shuahkh@osg.samsung.com> # Copyright (C) 2015 Samsung Electronics Co., Ltd. -# This software may be freely redistributed under the terms of the GNU -# General Public License (GPLv2). - install_loc=`pwd` main() diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 5bef05d6ba39..7de482a0519d 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -77,7 +77,7 @@ endif define EMIT_TESTS @for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \ BASENAME_TEST=`basename $$TEST`; \ - echo "(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \ + echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \ done; endef diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile index be5bd4ffb895..c82cec2497de 100644 --- a/tools/testing/selftests/media_tests/Makefile +++ b/tools/testing/selftests/media_tests/Makefile @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := media_device_test media_device_open video_device_test -all: $(TEST_PROGS) +TEST_GEN_PROGS := media_device_test media_device_open video_device_test +all: $(TEST_GEN_PROGS) include ../lib.mk - -clean: - rm -fr media_device_test media_device_open video_device_test diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c index 44343c091a20..a5ce5434bafd 100644 --- a/tools/testing/selftests/media_tests/media_device_open.c +++ b/tools/testing/selftests/media_tests/media_device_open.c @@ -1,10 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 + /* * media_device_open.c - Media Controller Device Open Test * * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com> * Copyright (c) 2016 Samsung Electronics Co., Ltd. * - * This file is released under the GPLv2. */ /* diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c index 5d49943e77d0..421a367e4bb3 100644 --- a/tools/testing/selftests/media_tests/media_device_test.c +++ b/tools/testing/selftests/media_tests/media_device_test.c @@ -1,10 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 + /* * media_device_test.c - Media Controller Device ioctl loop Test * * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com> * Copyright (c) 2016 Samsung Electronics Co., Ltd. * - * This file is released under the GPLv2. */ /* diff --git a/tools/testing/selftests/media_tests/video_device_test.c b/tools/testing/selftests/media_tests/video_device_test.c index 66d419c28653..0f6aef2e2593 100644 --- a/tools/testing/selftests/media_tests/video_device_test.c +++ b/tools/testing/selftests/media_tests/video_device_test.c @@ -1,10 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 + /* * video_device_test - Video Device Test * * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com> * Copyright (c) 2016 Samsung Electronics Co., Ltd. * - * This file is released under the GPLv2. */ /* diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c index 9e674d9514d1..22bffd55a523 100644 --- a/tools/testing/selftests/membarrier/membarrier_test.c +++ b/tools/testing/selftests/membarrier/membarrier_test.c @@ -16,49 +16,210 @@ static int sys_membarrier(int cmd, int flags) static int test_membarrier_cmd_fail(void) { int cmd = -1, flags = 0; + const char *test_name = "sys membarrier invalid command"; if (sys_membarrier(cmd, flags) != -1) { ksft_exit_fail_msg( - "sys membarrier invalid command test: command = %d, flags = %d. Should fail, but passed\n", - cmd, flags); + "%s test: command = %d, flags = %d. Should fail, but passed\n", + test_name, cmd, flags); + } + if (errno != EINVAL) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n", + test_name, flags, EINVAL, strerror(EINVAL), + errno, strerror(errno)); } ksft_test_result_pass( - "sys membarrier invalid command test: command = %d, flags = %d. Failed as expected\n", - cmd, flags); + "%s test: command = %d, flags = %d, errno = %d. Failed as expected\n", + test_name, cmd, flags, errno); return 0; } static int test_membarrier_flags_fail(void) { int cmd = MEMBARRIER_CMD_QUERY, flags = 1; + const char *test_name = "sys membarrier MEMBARRIER_CMD_QUERY invalid flags"; + + if (sys_membarrier(cmd, flags) != -1) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should fail, but passed\n", + test_name, flags); + } + if (errno != EINVAL) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n", + test_name, flags, EINVAL, strerror(EINVAL), + errno, strerror(errno)); + } + + ksft_test_result_pass( + "%s test: flags = %d, errno = %d. Failed as expected\n", + test_name, flags, errno); + return 0; +} + +static int test_membarrier_global_success(void) +{ + int cmd = MEMBARRIER_CMD_GLOBAL, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", test_name, flags); + return 0; +} + +static int test_membarrier_private_expedited_fail(void) +{ + int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED not registered failure"; + + if (sys_membarrier(cmd, flags) != -1) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should fail, but passed\n", + test_name, flags); + } + if (errno != EPERM) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n", + test_name, flags, EPERM, strerror(EPERM), + errno, strerror(errno)); + } + + ksft_test_result_pass( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + return 0; +} + +static int test_membarrier_register_private_expedited_success(void) +{ + int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", + test_name, flags); + return 0; +} + +static int test_membarrier_private_expedited_success(void) +{ + int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", + test_name, flags); + return 0; +} + +static int test_membarrier_private_expedited_sync_core_fail(void) +{ + int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE not registered failure"; if (sys_membarrier(cmd, flags) != -1) { ksft_exit_fail_msg( - "sys membarrier MEMBARRIER_CMD_QUERY invalid flags test: flags = %d. Should fail, but passed\n", - flags); + "%s test: flags = %d. Should fail, but passed\n", + test_name, flags); + } + if (errno != EPERM) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n", + test_name, flags, EPERM, strerror(EPERM), + errno, strerror(errno)); + } + + ksft_test_result_pass( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + return 0; +} + +static int test_membarrier_register_private_expedited_sync_core_success(void) +{ + int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", + test_name, flags); + return 0; +} + +static int test_membarrier_private_expedited_sync_core_success(void) +{ + int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); } ksft_test_result_pass( - "sys membarrier MEMBARRIER_CMD_QUERY invalid flags test: flags = %d. Failed as expected\n", - flags); + "%s test: flags = %d\n", + test_name, flags); return 0; } -static int test_membarrier_success(void) +static int test_membarrier_register_global_expedited_success(void) { - int cmd = MEMBARRIER_CMD_SHARED, flags = 0; - const char *test_name = "sys membarrier MEMBARRIER_CMD_SHARED\n"; + int cmd = MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED"; if (sys_membarrier(cmd, flags) != 0) { ksft_exit_fail_msg( - "sys membarrier MEMBARRIER_CMD_SHARED test: flags = %d\n", - flags); + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); } ksft_test_result_pass( - "sys membarrier MEMBARRIER_CMD_SHARED test: flags = %d\n", - flags); + "%s test: flags = %d\n", + test_name, flags); + return 0; +} + +static int test_membarrier_global_expedited_success(void) +{ + int cmd = MEMBARRIER_CMD_GLOBAL_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL_EXPEDITED"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", + test_name, flags); return 0; } @@ -72,7 +233,45 @@ static int test_membarrier(void) status = test_membarrier_flags_fail(); if (status) return status; - status = test_membarrier_success(); + status = test_membarrier_global_success(); + if (status) + return status; + status = test_membarrier_private_expedited_fail(); + if (status) + return status; + status = test_membarrier_register_private_expedited_success(); + if (status) + return status; + status = test_membarrier_private_expedited_success(); + if (status) + return status; + status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0); + if (status < 0) { + ksft_test_result_fail("sys_membarrier() failed\n"); + return status; + } + if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) { + status = test_membarrier_private_expedited_sync_core_fail(); + if (status) + return status; + status = test_membarrier_register_private_expedited_sync_core_success(); + if (status) + return status; + status = test_membarrier_private_expedited_sync_core_success(); + if (status) + return status; + } + /* + * It is valid to send a global membarrier from a non-registered + * process. + */ + status = test_membarrier_global_expedited_success(); + if (status) + return status; + status = test_membarrier_register_global_expedited_success(); + if (status) + return status; + status = test_membarrier_global_expedited_success(); if (status) return status; return 0; @@ -94,8 +293,10 @@ static int test_membarrier_query(void) } ksft_exit_fail_msg("sys_membarrier() failed\n"); } - if (!(ret & MEMBARRIER_CMD_SHARED)) + if (!(ret & MEMBARRIER_CMD_GLOBAL)) { + ksft_test_result_fail("sys_membarrier() CMD_GLOBAL query failed\n"); ksft_exit_fail_msg("sys_membarrier is not supported.\n"); + } ksft_test_result_pass("sys_membarrier available\n"); return 0; @@ -108,5 +309,5 @@ int main(int argc, char **argv) test_membarrier_query(); test_membarrier(); - ksft_exit_pass(); + return ksft_exit_pass(); } diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile index 3926a0409dda..a5276a91dfbf 100644 --- a/tools/testing/selftests/memfd/Makefile +++ b/tools/testing/selftests/memfd/Makefile @@ -12,3 +12,8 @@ fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags) include ../lib.mk $(OUTPUT)/fuse_mnt: LDLIBS += $(shell pkg-config fuse --libs) + +$(OUTPUT)/memfd_test: memfd_test.c common.o +$(OUTPUT)/fuse_test: fuse_test.c common.o + +EXTRA_CLEAN = common.o diff --git a/tools/testing/selftests/memfd/common.c b/tools/testing/selftests/memfd/common.c new file mode 100644 index 000000000000..8eb3d75f6e60 --- /dev/null +++ b/tools/testing/selftests/memfd/common.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __EXPORTED_HEADERS__ + +#include <stdio.h> +#include <stdlib.h> +#include <linux/fcntl.h> +#include <linux/memfd.h> +#include <unistd.h> +#include <sys/syscall.h> + +#include "common.h" + +int hugetlbfs_test = 0; + +/* + * Copied from mlock2-tests.c + */ +unsigned long default_huge_page_size(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + + if (!f) + return 0; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + + free(line); + fclose(f); + return hps; +} + +int sys_memfd_create(const char *name, unsigned int flags) +{ + if (hugetlbfs_test) + flags |= MFD_HUGETLB; + + return syscall(__NR_memfd_create, name, flags); +} diff --git a/tools/testing/selftests/memfd/common.h b/tools/testing/selftests/memfd/common.h new file mode 100644 index 000000000000..522d2c630bd8 --- /dev/null +++ b/tools/testing/selftests/memfd/common.h @@ -0,0 +1,9 @@ +#ifndef COMMON_H_ +#define COMMON_H_ + +extern int hugetlbfs_test; + +unsigned long default_huge_page_size(void); +int sys_memfd_create(const char *name, unsigned int flags); + +#endif diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c index 1ccb7a3eb14b..b018e835737d 100644 --- a/tools/testing/selftests/memfd/fuse_test.c +++ b/tools/testing/selftests/memfd/fuse_test.c @@ -33,14 +33,12 @@ #include <sys/wait.h> #include <unistd.h> +#include "common.h" + #define MFD_DEF_SIZE 8192 #define STACK_SIZE 65536 -static int sys_memfd_create(const char *name, - unsigned int flags) -{ - return syscall(__NR_memfd_create, name, flags); -} +static size_t mfd_def_size = MFD_DEF_SIZE; static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags) { @@ -127,7 +125,7 @@ static void *mfd_assert_mmap_shared(int fd) void *p; p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, @@ -145,7 +143,7 @@ static void *mfd_assert_mmap_private(int fd) void *p; p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, @@ -178,7 +176,7 @@ static int sealing_thread_fn(void *arg) usleep(200000); /* unmount mapping before sealing to avoid i_mmap_writable failures */ - munmap(global_p, MFD_DEF_SIZE); + munmap(global_p, mfd_def_size); /* Try sealing the global file; expect EBUSY or success. Current * kernels will never succeed, but in the future, kernels might @@ -228,7 +226,7 @@ static void join_sealing_thread(pid_t pid) int main(int argc, char **argv) { - static const char zero[MFD_DEF_SIZE]; + char *zero; int fd, mfd, r; void *p; int was_sealed; @@ -239,6 +237,25 @@ int main(int argc, char **argv) abort(); } + if (argc >= 3) { + if (!strcmp(argv[2], "hugetlbfs")) { + unsigned long hpage_size = default_huge_page_size(); + + if (!hpage_size) { + printf("Unable to determine huge page size\n"); + abort(); + } + + hugetlbfs_test = 1; + mfd_def_size = hpage_size * 2; + } else { + printf("Unknown option: %s\n", argv[2]); + abort(); + } + } + + zero = calloc(sizeof(*zero), mfd_def_size); + /* open FUSE memfd file for GUP testing */ printf("opening: %s\n", argv[1]); fd = open(argv[1], O_RDONLY | O_CLOEXEC); @@ -249,7 +266,7 @@ int main(int argc, char **argv) /* create new memfd-object */ mfd = mfd_assert_new("kern_memfd_fuse", - MFD_DEF_SIZE, + mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); /* mmap memfd-object for writing */ @@ -268,7 +285,7 @@ int main(int argc, char **argv) * This guarantees that the receive-buffer is pinned for 1s until the * data is written into it. The racing ADD_SEALS should thus fail as * the pages are still pinned. */ - r = read(fd, p, MFD_DEF_SIZE); + r = read(fd, p, mfd_def_size); if (r < 0) { printf("read() failed: %m\n"); abort(); @@ -295,10 +312,10 @@ int main(int argc, char **argv) * enough to avoid any in-flight writes. */ p = mfd_assert_mmap_private(mfd); - if (was_sealed && memcmp(p, zero, MFD_DEF_SIZE)) { + if (was_sealed && memcmp(p, zero, mfd_def_size)) { printf("memfd sealed during read() but data not discarded\n"); abort(); - } else if (!was_sealed && !memcmp(p, zero, MFD_DEF_SIZE)) { + } else if (!was_sealed && !memcmp(p, zero, mfd_def_size)) { printf("memfd sealed after read() but data discarded\n"); abort(); } @@ -307,6 +324,7 @@ int main(int argc, char **argv) close(fd); printf("fuse: DONE\n"); + free(zero); return 0; } diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 132a54f74e88..10baa1652fc2 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -19,7 +19,10 @@ #include <sys/wait.h> #include <unistd.h> +#include "common.h" + #define MEMFD_STR "memfd:" +#define MEMFD_HUGE_STR "memfd-hugetlb:" #define SHARED_FT_STR "(shared file-table)" #define MFD_DEF_SIZE 8192 @@ -28,41 +31,8 @@ /* * Default is not to test hugetlbfs */ -static int hugetlbfs_test; static size_t mfd_def_size = MFD_DEF_SIZE; - -/* - * Copied from mlock2-tests.c - */ -static unsigned long default_huge_page_size(void) -{ - unsigned long hps = 0; - char *line = NULL; - size_t linelen = 0; - FILE *f = fopen("/proc/meminfo", "r"); - - if (!f) - return 0; - while (getline(&line, &linelen, f) > 0) { - if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { - hps <<= 10; - break; - } - } - - free(line); - fclose(f); - return hps; -} - -static int sys_memfd_create(const char *name, - unsigned int flags) -{ - if (hugetlbfs_test) - flags |= MFD_HUGETLB; - - return syscall(__NR_memfd_create, name, flags); -} +static const char *memfd_str = MEMFD_STR; static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags) { @@ -513,6 +483,10 @@ static void mfd_assert_grow_write(int fd) static char *buf; ssize_t l; + /* hugetlbfs does not support write */ + if (hugetlbfs_test) + return; + buf = malloc(mfd_def_size * 8); if (!buf) { printf("malloc(%zu) failed: %m\n", mfd_def_size * 8); @@ -533,6 +507,10 @@ static void mfd_fail_grow_write(int fd) static char *buf; ssize_t l; + /* hugetlbfs does not support write */ + if (hugetlbfs_test) + return; + buf = malloc(mfd_def_size * 8); if (!buf) { printf("malloc(%zu) failed: %m\n", mfd_def_size * 8); @@ -598,7 +576,7 @@ static void test_create(void) char buf[2048]; int fd; - printf("%s CREATE\n", MEMFD_STR); + printf("%s CREATE\n", memfd_str); /* test NULL name */ mfd_fail_new(NULL, 0); @@ -627,18 +605,13 @@ static void test_create(void) fd = mfd_assert_new("", 0, MFD_CLOEXEC); close(fd); - if (!hugetlbfs_test) { - /* verify MFD_ALLOW_SEALING is allowed */ - fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING); - close(fd); - - /* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */ - fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC); - close(fd); - } else { - /* sealing is not supported on hugetlbfs */ - mfd_fail_new("", MFD_ALLOW_SEALING); - } + /* verify MFD_ALLOW_SEALING is allowed */ + fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING); + close(fd); + + /* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */ + fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC); + close(fd); } /* @@ -649,11 +622,7 @@ static void test_basic(void) { int fd; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - - printf("%s BASIC\n", MEMFD_STR); + printf("%s BASIC\n", memfd_str); fd = mfd_assert_new("kern_memfd_basic", mfd_def_size, @@ -698,28 +667,6 @@ static void test_basic(void) } /* - * hugetlbfs doesn't support seals or write, so just verify grow and shrink - * on a hugetlbfs file created via memfd_create. - */ -static void test_hugetlbfs_grow_shrink(void) -{ - int fd; - - printf("%s HUGETLBFS-GROW-SHRINK\n", MEMFD_STR); - - fd = mfd_assert_new("kern_memfd_seal_write", - mfd_def_size, - MFD_CLOEXEC); - - mfd_assert_read(fd); - mfd_assert_write(fd); - mfd_assert_shrink(fd); - mfd_assert_grow(fd); - - close(fd); -} - -/* * Test SEAL_WRITE * Test whether SEAL_WRITE actually prevents modifications. */ @@ -727,14 +674,7 @@ static void test_seal_write(void) { int fd; - /* - * hugetlbfs does not contain sealing or write support. Just test - * basic grow and shrink via test_hugetlbfs_grow_shrink. - */ - if (hugetlbfs_test) - return test_hugetlbfs_grow_shrink(); - - printf("%s SEAL-WRITE\n", MEMFD_STR); + printf("%s SEAL-WRITE\n", memfd_str); fd = mfd_assert_new("kern_memfd_seal_write", mfd_def_size, @@ -760,11 +700,7 @@ static void test_seal_shrink(void) { int fd; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - - printf("%s SEAL-SHRINK\n", MEMFD_STR); + printf("%s SEAL-SHRINK\n", memfd_str); fd = mfd_assert_new("kern_memfd_seal_shrink", mfd_def_size, @@ -790,11 +726,7 @@ static void test_seal_grow(void) { int fd; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - - printf("%s SEAL-GROW\n", MEMFD_STR); + printf("%s SEAL-GROW\n", memfd_str); fd = mfd_assert_new("kern_memfd_seal_grow", mfd_def_size, @@ -820,11 +752,7 @@ static void test_seal_resize(void) { int fd; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - - printf("%s SEAL-RESIZE\n", MEMFD_STR); + printf("%s SEAL-RESIZE\n", memfd_str); fd = mfd_assert_new("kern_memfd_seal_resize", mfd_def_size, @@ -843,32 +771,6 @@ static void test_seal_resize(void) } /* - * hugetlbfs does not support seals. Basic test to dup the memfd created - * fd and perform some basic operations on it. - */ -static void hugetlbfs_dup(char *b_suffix) -{ - int fd, fd2; - - printf("%s HUGETLBFS-DUP %s\n", MEMFD_STR, b_suffix); - - fd = mfd_assert_new("kern_memfd_share_dup", - mfd_def_size, - MFD_CLOEXEC); - - fd2 = mfd_assert_dup(fd); - - mfd_assert_read(fd); - mfd_assert_write(fd); - - mfd_assert_shrink(fd2); - mfd_assert_grow(fd2); - - close(fd2); - close(fd); -} - -/* * Test sharing via dup() * Test that seals are shared between dupped FDs and they're all equal. */ @@ -876,16 +778,7 @@ static void test_share_dup(char *banner, char *b_suffix) { int fd, fd2; - /* - * hugetlbfs does not contain sealing support. Perform some - * basic testing on dup'ed fd instead via hugetlbfs_dup. - */ - if (hugetlbfs_test) { - hugetlbfs_dup(b_suffix); - return; - } - - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + printf("%s %s %s\n", memfd_str, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_dup", mfd_def_size, @@ -927,11 +820,7 @@ static void test_share_mmap(char *banner, char *b_suffix) int fd; void *p; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + printf("%s %s %s\n", memfd_str, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_mmap", mfd_def_size, @@ -956,32 +845,6 @@ static void test_share_mmap(char *banner, char *b_suffix) } /* - * Basic test to make sure we can open the hugetlbfs fd via /proc and - * perform some simple operations on it. - */ -static void hugetlbfs_proc_open(char *b_suffix) -{ - int fd, fd2; - - printf("%s HUGETLBFS-PROC-OPEN %s\n", MEMFD_STR, b_suffix); - - fd = mfd_assert_new("kern_memfd_share_open", - mfd_def_size, - MFD_CLOEXEC); - - fd2 = mfd_assert_open(fd, O_RDWR, 0); - - mfd_assert_read(fd); - mfd_assert_write(fd); - - mfd_assert_shrink(fd2); - mfd_assert_grow(fd2); - - close(fd2); - close(fd); -} - -/* * Test sealing with open(/proc/self/fd/%d) * Via /proc we can get access to a separate file-context for the same memfd. * This is *not* like dup(), but like a real separate open(). Make sure the @@ -991,16 +854,7 @@ static void test_share_open(char *banner, char *b_suffix) { int fd, fd2; - /* - * hugetlbfs does not contain sealing support. So test basic - * functionality of using /proc fd via hugetlbfs_proc_open - */ - if (hugetlbfs_test) { - hugetlbfs_proc_open(b_suffix); - return; - } - - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + printf("%s %s %s\n", memfd_str, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_open", mfd_def_size, @@ -1043,11 +897,7 @@ static void test_share_fork(char *banner, char *b_suffix) int fd; pid_t pid; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + printf("%s %s %s\n", memfd_str, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_fork", mfd_def_size, @@ -1083,7 +933,11 @@ int main(int argc, char **argv) } hugetlbfs_test = 1; + memfd_str = MEMFD_HUGE_STR; mfd_def_size = hpage_size * 2; + } else { + printf("Unknown option: %s\n", argv[1]); + abort(); } } diff --git a/tools/testing/selftests/memfd/run_fuse_test.sh b/tools/testing/selftests/memfd/run_fuse_test.sh index 407df68dfe27..22e572e2d66a 100755 --- a/tools/testing/selftests/memfd/run_fuse_test.sh +++ b/tools/testing/selftests/memfd/run_fuse_test.sh @@ -10,6 +10,6 @@ set -e mkdir mnt ./fuse_mnt ./mnt -./fuse_test ./mnt/memfd +./fuse_test ./mnt/memfd $@ fusermount -u ./mnt rmdir ./mnt diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_tests.sh index daabb350697c..c2d41ed81b24 100755 --- a/tools/testing/selftests/memfd/run_tests.sh +++ b/tools/testing/selftests/memfd/run_tests.sh @@ -60,6 +60,7 @@ fi # Run the hugetlbfs test # ./memfd_test hugetlbfs +./run_fuse_test.sh hugetlbfs # # Give back any huge pages allocated for the test diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 500c74db746c..d7c30d366935 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -5,6 +5,7 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh +TEST_PROGS += fib_tests.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh new file mode 100755 index 000000000000..a9154eefb2e2 --- /dev/null +++ b/tools/testing/selftests/net/fib_tests.sh @@ -0,0 +1,429 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking IPv4 and IPv6 FIB behavior in response to +# different events. + +ret=0 + +check_err() +{ + if [ $ret -eq 0 ]; then + ret=$1 + fi +} + +check_fail() +{ + if [ $1 -eq 0 ]; then + ret=1 + fi +} + +netns_create() +{ + local testns=$1 + + ip netns add $testns + ip netns exec $testns ip link set dev lo up +} + +fib_unreg_unicast_test() +{ + ret=0 + + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_err $? + + ip netns exec testns ip link del dev dummy0 + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_fail $? + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: unicast route test" + return 1 + fi + echo "PASS: unicast route test" +} + +fib_unreg_multipath_test() +{ + ret=0 + + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip link add dummy1 type dummy + ip netns exec testns ip link set dev dummy1 up + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip address add 192.0.2.1/24 dev dummy1 + ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1 + + ip netns exec testns ip route add 203.0.113.0/24 \ + nexthop via 198.51.100.2 dev dummy0 \ + nexthop via 192.0.2.2 dev dummy1 + ip netns exec testns ip -6 route add 2001:db8:3::/64 \ + nexthop via 2001:db8:1::2 dev dummy0 \ + nexthop via 2001:db8:2::2 dev dummy1 + + ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null + check_err $? + + ip netns exec testns ip link del dev dummy0 + check_err $? + + ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null + # In IPv6 we do not flush the entire multipath route. + check_err $? + + ip netns exec testns ip link del dev dummy1 + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: multipath route test" + return 1 + fi + echo "PASS: multipath route test" +} + +fib_unreg_test() +{ + echo "Running netdev unregister tests" + + fib_unreg_unicast_test + fib_unreg_multipath_test +} + +fib_down_unicast_test() +{ + ret=0 + + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_err $? + + ip netns exec testns ip link set dev dummy0 down + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_fail $? + + ip netns exec testns ip link del dev dummy0 + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: unicast route test" + return 1 + fi + echo "PASS: unicast route test" +} + +fib_down_multipath_test_do() +{ + local down_dev=$1 + local up_dev=$2 + + ip netns exec testns ip route get fibmatch 203.0.113.1 \ + oif $down_dev &> /dev/null + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \ + oif $down_dev &> /dev/null + check_fail $? + + ip netns exec testns ip route get fibmatch 203.0.113.1 \ + oif $up_dev &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \ + oif $up_dev &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 203.0.113.1 | \ + grep $down_dev | grep -q "dead linkdown" + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \ + grep $down_dev | grep -q "dead linkdown" + check_err $? + + ip netns exec testns ip route get fibmatch 203.0.113.1 | \ + grep $up_dev | grep -q "dead linkdown" + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \ + grep $up_dev | grep -q "dead linkdown" + check_fail $? +} + +fib_down_multipath_test() +{ + ret=0 + + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip link add dummy1 type dummy + ip netns exec testns ip link set dev dummy1 up + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip address add 192.0.2.1/24 dev dummy1 + ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1 + + ip netns exec testns ip route add 203.0.113.0/24 \ + nexthop via 198.51.100.2 dev dummy0 \ + nexthop via 192.0.2.2 dev dummy1 + ip netns exec testns ip -6 route add 2001:db8:3::/64 \ + nexthop via 2001:db8:1::2 dev dummy0 \ + nexthop via 2001:db8:2::2 dev dummy1 + + ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null + check_err $? + + ip netns exec testns ip link set dev dummy0 down + check_err $? + + fib_down_multipath_test_do "dummy0" "dummy1" + + ip netns exec testns ip link set dev dummy0 up + check_err $? + ip netns exec testns ip link set dev dummy1 down + check_err $? + + fib_down_multipath_test_do "dummy1" "dummy0" + + ip netns exec testns ip link set dev dummy0 down + check_err $? + + ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null + check_fail $? + + ip netns exec testns ip link del dev dummy1 + ip netns exec testns ip link del dev dummy0 + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: multipath route test" + return 1 + fi + echo "PASS: multipath route test" +} + +fib_down_test() +{ + echo "Running netdev down tests" + + fib_down_unicast_test + fib_down_multipath_test +} + +fib_carrier_local_test() +{ + ret=0 + + # Local routes should not be affected when carrier changes. + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip link set dev dummy0 carrier on + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.1 | \ + grep -q "linkdown" + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \ + grep -q "linkdown" + check_fail $? + + ip netns exec testns ip link set dev dummy0 carrier off + + ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.1 | \ + grep -q "linkdown" + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \ + grep -q "linkdown" + check_fail $? + + ip netns exec testns ip address add 192.0.2.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 192.0.2.1 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 192.0.2.1 | \ + grep -q "linkdown" + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 | \ + grep -q "linkdown" + check_fail $? + + ip netns exec testns ip link del dev dummy0 + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: local route carrier test" + return 1 + fi + echo "PASS: local route carrier test" +} + +fib_carrier_unicast_test() +{ + ret=0 + + netns_create "testns" + + ip netns exec testns ip link add dummy0 type dummy + ip netns exec testns ip link set dev dummy0 up + + ip netns exec testns ip link set dev dummy0 carrier on + + ip netns exec testns ip address add 198.51.100.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.2 | \ + grep -q "linkdown" + check_fail $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \ + grep -q "linkdown" + check_fail $? + + ip netns exec testns ip link set dev dummy0 carrier off + + ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 198.51.100.2 | \ + grep -q "linkdown" + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \ + grep -q "linkdown" + check_err $? + + ip netns exec testns ip address add 192.0.2.1/24 dev dummy0 + ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0 + + ip netns exec testns ip route get fibmatch 192.0.2.2 &> /dev/null + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 &> /dev/null + check_err $? + + ip netns exec testns ip route get fibmatch 192.0.2.2 | \ + grep -q "linkdown" + check_err $? + ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 | \ + grep -q "linkdown" + check_err $? + + ip netns exec testns ip link del dev dummy0 + + ip netns del testns + + if [ $ret -ne 0 ]; then + echo "FAIL: unicast route carrier test" + return 1 + fi + echo "PASS: unicast route carrier test" +} + +fib_carrier_test() +{ + echo "Running netdev carrier change tests" + + fib_carrier_local_test + fib_carrier_unicast_test +} + +fib_test() +{ + fib_unreg_test + fib_down_test + fib_carrier_test +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +ip route help 2>&1 | grep -q fibmatch +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing fibmatch" + exit 0 +fi + +fib_test + +exit $ret diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index 3ab6ec403905..e11fe84de0fd 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -259,22 +259,28 @@ static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len) return sizeof(*ip6h); } -static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr) + +static void setup_sockaddr(int domain, const char *str_addr, + struct sockaddr_storage *sockaddr) { struct sockaddr_in6 *addr6 = (void *) sockaddr; struct sockaddr_in *addr4 = (void *) sockaddr; switch (domain) { case PF_INET: + memset(addr4, 0, sizeof(*addr4)); addr4->sin_family = AF_INET; addr4->sin_port = htons(cfg_port); - if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + if (str_addr && + inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) error(1, 0, "ipv4 parse error: %s", str_addr); break; case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); addr6->sin6_family = AF_INET6; addr6->sin6_port = htons(cfg_port); - if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + if (str_addr && + inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) error(1, 0, "ipv6 parse error: %s", str_addr); break; default: @@ -603,6 +609,7 @@ static void parse_opts(int argc, char **argv) sizeof(struct tcphdr) - 40 /* max tcp options */; int c; + char *daddr = NULL, *saddr = NULL; cfg_payload_len = max_payload_len; @@ -627,7 +634,7 @@ static void parse_opts(int argc, char **argv) cfg_cpu = strtol(optarg, NULL, 0); break; case 'D': - setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + daddr = optarg; break; case 'i': cfg_ifindex = if_nametoindex(optarg); @@ -638,7 +645,7 @@ static void parse_opts(int argc, char **argv) cfg_cork_mixed = true; break; case 'p': - cfg_port = htons(strtoul(optarg, NULL, 0)); + cfg_port = strtoul(optarg, NULL, 0); break; case 'r': cfg_rx = true; @@ -647,7 +654,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = strtoul(optarg, NULL, 0); break; case 'S': - setup_sockaddr(cfg_family, optarg, &cfg_src_addr); + saddr = optarg; break; case 't': cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000; @@ -660,6 +667,8 @@ static void parse_opts(int argc, char **argv) break; } } + setup_sockaddr(cfg_family, daddr, &cfg_dst_addr); + setup_sockaddr(cfg_family, saddr, &cfg_src_addr); if (cfg_payload_len > max_payload_len) error(1, 0, "-s: payload exceeds max (%d)", max_payload_len); diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index 4a8217448f20..cad14cd0ea92 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -21,6 +21,7 @@ #include <sys/epoll.h> #include <sys/types.h> #include <sys/socket.h> +#include <sys/resource.h> #include <unistd.h> #ifndef ARRAY_SIZE @@ -190,11 +191,14 @@ static void send_from(struct test_params p, uint16_t sport, char *buf, struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport); struct sockaddr * const daddr = new_loopback_sockaddr(p.send_family, p.recv_port); - const int fd = socket(p.send_family, p.protocol, 0); + const int fd = socket(p.send_family, p.protocol, 0), one = 1; if (fd < 0) error(1, errno, "failed to create send socket"); + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) + error(1, errno, "failed to set reuseaddr"); + if (bind(fd, saddr, sockaddr_size())) error(1, errno, "failed to bind send socket"); @@ -433,6 +437,21 @@ void enable_fastopen(void) } } +static struct rlimit rlim_old, rlim_new; + +static __attribute__((constructor)) void main_ctor(void) +{ + getrlimit(RLIMIT_MEMLOCK, &rlim_old); + rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); + rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); + setrlimit(RLIMIT_MEMLOCK, &rlim_new); +} + +static __attribute__((destructor)) void main_dtor(void) +{ + setrlimit(RLIMIT_MEMLOCK, &rlim_old); +} + int main(void) { fprintf(stderr, "---- IPv4 UDP ----\n"); diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 5215493166c9..a622eeecc3a6 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -502,6 +502,231 @@ kci_test_macsec() echo "PASS: macsec" } +kci_test_gretap() +{ + testns="testns" + DEV_NS=gretap00 + ret=0 + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP gretap tests: cannot add net namespace $testns" + return 1 + fi + + ip link help gretap 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: gretap: iproute2 too old" + return 1 + fi + + # test native tunnel + ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap seq \ + key 102 local 172.16.1.100 remote 172.16.1.200 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: gretap" + return 1 + fi + echo "PASS: gretap" + + ip netns del "$testns" +} + +kci_test_ip6gretap() +{ + testns="testns" + DEV_NS=ip6gretap00 + ret=0 + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP ip6gretap tests: cannot add net namespace $testns" + return 1 + fi + + ip link help ip6gretap 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: ip6gretap: iproute2 too old" + return 1 + fi + + # test native tunnel + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap seq \ + key 102 local fc00:100::1 remote fc00:100::2 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" fc00:200::1/96 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: ip6gretap" + return 1 + fi + echo "PASS: ip6gretap" + + ip netns del "$testns" +} + +kci_test_erspan() +{ + testns="testns" + DEV_NS=erspan00 + ret=0 + + ip link help erspan 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: erspan: iproute2 too old" + return 1 + fi + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP erspan tests: cannot add net namespace $testns" + return 1 + fi + + # test native tunnel erspan v1 + ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \ + key 102 local 172.16.1.100 remote 172.16.1.200 \ + erspan_ver 1 erspan 488 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test native tunnel erspan v2 + ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \ + key 102 local 172.16.1.100 remote 172.16.1.200 \ + erspan_ver 2 erspan_dir ingress erspan_hwid 7 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: erspan" + return 1 + fi + echo "PASS: erspan" + + ip netns del "$testns" +} + +kci_test_ip6erspan() +{ + testns="testns" + DEV_NS=ip6erspan00 + ret=0 + + ip link help ip6erspan 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: ip6erspan: iproute2 too old" + return 1 + fi + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP ip6erspan tests: cannot add net namespace $testns" + return 1 + fi + + # test native tunnel ip6erspan v1 + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \ + key 102 local fc00:100::1 remote fc00:100::2 \ + erspan_ver 1 erspan 488 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test native tunnel ip6erspan v2 + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \ + key 102 local fc00:100::1 remote fc00:100::2 \ + erspan_ver 2 erspan_dir ingress erspan_hwid 7 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" \ + type ip6erspan external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: ip6erspan" + return 1 + fi + echo "PASS: ip6erspan" + + ip netns del "$testns" +} + kci_test_rtnl() { kci_add_dummy @@ -514,6 +739,10 @@ kci_test_rtnl() kci_test_route_get kci_test_tc kci_test_gre + kci_test_gretap + kci_test_ip6gretap + kci_test_erspan + kci_test_ip6erspan kci_test_bridge kci_test_addrlabel kci_test_ifalias diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/nsfs/pidns.c index 1182d4e437a2..e0d86e1668c0 100644 --- a/tools/testing/selftests/nsfs/pidns.c +++ b/tools/testing/selftests/nsfs/pidns.c @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) return pr_err("NS_GET_PARENT returned a wrong namespace"); if (ioctl(pns, NS_GET_PARENT) >= 0 || errno != EPERM) - return pr_err("Don't get EPERM");; + return pr_err("Don't get EPERM"); } kill(pid, SIGKILL); diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index 5fc7ad359e21..08cbfbbc7029 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -18,7 +18,6 @@ LIST_DEVS=FALSE DEBUGFS=${DEBUGFS-/sys/kernel/debug} -DB_BITMASK=0x7FFF PERF_RUN_ORDER=32 MAX_MW_SIZE=0 RUN_DMA_TESTS= @@ -39,15 +38,17 @@ function show_help() echo "be highly recommended." echo echo "Options:" - echo " -b BITMASK doorbell clear bitmask for ntb_tool" echo " -C don't cleanup ntb modules on exit" - echo " -d run dma tests" echo " -h show this help message" echo " -l list available local and remote PCI ids" echo " -r REMOTE_HOST specify the remote's hostname to connect" - echo " to for the test (using ssh)" - echo " -p NUM ntb_perf run order (default: $PERF_RUN_ORDER)" - echo " -w max_mw_size maxmium memory window size" + echo " to for the test (using ssh)" + echo " -m MW_SIZE memory window size for ntb_tool" + echo " (default: $MW_SIZE)" + echo " -d run dma tests for ntb_perf" + echo " -p ORDER total data order for ntb_perf" + echo " (default: $PERF_RUN_ORDER)" + echo " -w MAX_MW_SIZE maxmium memory window size for ntb_perf" echo } @@ -56,7 +57,6 @@ function parse_args() OPTIND=0 while getopts "b:Cdhlm:r:p:w:" opt; do case "$opt" in - b) DB_BITMASK=${OPTARG} ;; C) DONT_CLEANUP=1 ;; d) RUN_DMA_TESTS=1 ;; h) show_help; exit 0 ;; @@ -87,7 +87,7 @@ set -e function _modprobe() { - modprobe "$@" + modprobe "$@" if [[ "$REMOTE_HOST" != "" ]]; then ssh "$REMOTE_HOST" modprobe "$@" @@ -127,15 +127,70 @@ function write_file() fi } +function check_file() +{ + split_remote $1 + + if [[ "$REMOTE" != "" ]]; then + ssh "$REMOTE" "[[ -e ${VPATH} ]]" + else + [[ -e ${VPATH} ]] + fi +} + +function subdirname() +{ + echo $(basename $(dirname $1)) 2> /dev/null +} + +function find_pidx() +{ + PORT=$1 + PPATH=$2 + + for ((i = 0; i < 64; i++)); do + PEER_DIR="$PPATH/peer$i" + + check_file ${PEER_DIR} || break + + PEER_PORT=$(read_file "${PEER_DIR}/port") + if [[ ${PORT} -eq $PEER_PORT ]]; then + echo $i + return 0 + fi + done + + return 1 +} + +function port_test() +{ + LOC=$1 + REM=$2 + + echo "Running port tests on: $(basename $LOC) / $(basename $REM)" + + LOCAL_PORT=$(read_file "$LOC/port") + REMOTE_PORT=$(read_file "$REM/port") + + LOCAL_PIDX=$(find_pidx ${REMOTE_PORT} "$LOC") + REMOTE_PIDX=$(find_pidx ${LOCAL_PORT} "$REM") + + echo "Local port ${LOCAL_PORT} with index ${REMOTE_PIDX} on remote host" + echo "Peer port ${REMOTE_PORT} with index ${LOCAL_PIDX} on local host" + + echo " Passed" +} + function link_test() { LOC=$1 REM=$2 EXP=0 - echo "Running link tests on: $(basename $LOC) / $(basename $REM)" + echo "Running link tests on: $(subdirname $LOC) / $(subdirname $REM)" - if ! write_file "N" "$LOC/link" 2> /dev/null; then + if ! write_file "N" "$LOC/../link" 2> /dev/null; then echo " Unsupported" return fi @@ -143,12 +198,11 @@ function link_test() write_file "N" "$LOC/link_event" if [[ $(read_file "$REM/link") != "N" ]]; then - echo "Expected remote link to be down in $REM/link" >&2 + echo "Expected link to be down in $REM/link" >&2 exit -1 fi - write_file "Y" "$LOC/link" - write_file "Y" "$LOC/link_event" + write_file "Y" "$LOC/../link" echo " Passed" } @@ -161,58 +215,136 @@ function doorbell_test() echo "Running db tests on: $(basename $LOC) / $(basename $REM)" - write_file "c $DB_BITMASK" "$REM/db" + DB_VALID_MASK=$(read_file "$LOC/db_valid_mask") + + write_file "c $DB_VALID_MASK" "$REM/db" - for ((i=1; i <= 8; i++)); do - let DB=$(read_file "$REM/db") || true - if [[ "$DB" != "$EXP" ]]; then + for ((i = 0; i < 64; i++)); do + DB=$(read_file "$REM/db") + if [[ "$DB" -ne "$EXP" ]]; then echo "Doorbell doesn't match expected value $EXP " \ "in $REM/db" >&2 exit -1 fi - let "MASK=1 << ($i-1)" || true - let "EXP=$EXP | $MASK" || true + let "MASK = (1 << $i) & $DB_VALID_MASK" || true + let "EXP = $EXP | $MASK" || true + write_file "s $MASK" "$LOC/peer_db" done + write_file "c $DB_VALID_MASK" "$REM/db_mask" + write_file $DB_VALID_MASK "$REM/db_event" + write_file "s $DB_VALID_MASK" "$REM/db_mask" + + write_file "c $DB_VALID_MASK" "$REM/db" + echo " Passed" } -function read_spad() +function get_files_count() { - VPATH=$1 - IDX=$2 + NAME=$1 + LOC=$2 + + split_remote $LOC - ROW=($(read_file "$VPATH" | grep -e "^$IDX")) - let VAL=${ROW[1]} || true - echo $VAL + if [[ "$REMOTE" == "" ]]; then + echo $(ls -1 "$LOC"/${NAME}* 2>/dev/null | wc -l) + else + echo $(ssh "$REMOTE" "ls -1 \"$VPATH\"/${NAME}* | \ + wc -l" 2> /dev/null) + fi } function scratchpad_test() { LOC=$1 REM=$2 - CNT=$(read_file "$LOC/spad" | wc -l) - echo "Running spad tests on: $(basename $LOC) / $(basename $REM)" + echo "Running spad tests on: $(subdirname $LOC) / $(subdirname $REM)" + + CNT=$(get_files_count "spad" "$LOC") + + if [[ $CNT -eq 0 ]]; then + echo " Unsupported" + return + fi for ((i = 0; i < $CNT; i++)); do VAL=$RANDOM - write_file "$i $VAL" "$LOC/peer_spad" - RVAL=$(read_spad "$REM/spad" $i) + write_file "$VAL" "$LOC/spad$i" + RVAL=$(read_file "$REM/../spad$i") - if [[ "$VAL" != "$RVAL" ]]; then - echo "Scratchpad doesn't match expected value $VAL " \ - "in $REM/spad, got $RVAL" >&2 + if [[ "$VAL" -ne "$RVAL" ]]; then + echo "Scratchpad $i value $RVAL doesn't match $VAL" >&2 exit -1 fi + done + + echo " Passed" +} + +function message_test() +{ + LOC=$1 + REM=$2 + + echo "Running msg tests on: $(subdirname $LOC) / $(subdirname $REM)" + + CNT=$(get_files_count "msg" "$LOC") + if [[ $CNT -eq 0 ]]; then + echo " Unsupported" + return + fi + + MSG_OUTBITS_MASK=$(read_file "$LOC/../msg_inbits") + MSG_INBITS_MASK=$(read_file "$REM/../msg_inbits") + + write_file "c $MSG_OUTBITS_MASK" "$LOC/../msg_sts" + write_file "c $MSG_INBITS_MASK" "$REM/../msg_sts" + + for ((i = 0; i < $CNT; i++)); do + VAL=$RANDOM + write_file "$VAL" "$LOC/msg$i" + RVAL=$(read_file "$REM/../msg$i") + + if [[ "$VAL" -ne "${RVAL%%<-*}" ]]; then + echo "Message $i value $RVAL doesn't match $VAL" >&2 + exit -1 + fi done echo " Passed" } +function get_number() +{ + KEY=$1 + + sed -n "s/^\(${KEY}\)[ \t]*\(0x[0-9a-fA-F]*\)\(\[p\]\)\?$/\2/p" +} + +function mw_alloc() +{ + IDX=$1 + LOC=$2 + REM=$3 + + write_file $MW_SIZE "$LOC/mw_trans$IDX" + + INB_MW=$(read_file "$LOC/mw_trans$IDX") + MW_ALIGNED_SIZE=$(echo "$INB_MW" | get_number "Window Size") + MW_DMA_ADDR=$(echo "$INB_MW" | get_number "DMA Address") + + write_file "$MW_DMA_ADDR:$(($MW_ALIGNED_SIZE))" "$REM/peer_mw_trans$IDX" + + if [[ $MW_SIZE -ne $MW_ALIGNED_SIZE ]]; then + echo "MW $IDX size aligned to $MW_ALIGNED_SIZE" + fi +} + function write_mw() { split_remote $2 @@ -225,17 +357,15 @@ function write_mw() fi } -function mw_test() +function mw_check() { IDX=$1 LOC=$2 REM=$3 - echo "Running $IDX tests on: $(basename $LOC) / $(basename $REM)" + write_mw "$LOC/mw$IDX" - write_mw "$LOC/$IDX" - - split_remote "$LOC/$IDX" + split_remote "$LOC/mw$IDX" if [[ "$REMOTE" == "" ]]; then A=$VPATH else @@ -243,7 +373,7 @@ function mw_test() ssh "$REMOTE" cat "$VPATH" > "$A" fi - split_remote "$REM/peer_$IDX" + split_remote "$REM/peer_mw$IDX" if [[ "$REMOTE" == "" ]]; then B=$VPATH else @@ -251,7 +381,7 @@ function mw_test() ssh "$REMOTE" cat "$VPATH" > "$B" fi - cmp -n $MW_SIZE "$A" "$B" + cmp -n $MW_ALIGNED_SIZE "$A" "$B" if [[ $? != 0 ]]; then echo "Memory window $MW did not match!" >&2 fi @@ -263,8 +393,39 @@ function mw_test() if [[ "$B" == "/tmp/*" ]]; then rm "$B" fi +} + +function mw_free() +{ + IDX=$1 + LOC=$2 + REM=$3 + + write_file "$MW_DMA_ADDR:0" "$REM/peer_mw_trans$IDX" + + write_file 0 "$LOC/mw_trans$IDX" +} + +function mw_test() +{ + LOC=$1 + REM=$2 + + CNT=$(get_files_count "mw_trans" "$LOC") + + for ((i = 0; i < $CNT; i++)); do + echo "Running mw$i tests on: $(subdirname $LOC) / " \ + "$(subdirname $REM)" + + mw_alloc $i $LOC $REM + + mw_check $i $LOC $REM + + mw_free $i $LOC $REM + + echo " Passed" + done - echo " Passed" } function pingpong_test() @@ -274,13 +435,13 @@ function pingpong_test() echo "Running ping pong tests on: $(basename $LOC) / $(basename $REM)" - LOC_START=$(read_file $LOC/count) - REM_START=$(read_file $REM/count) + LOC_START=$(read_file "$LOC/count") + REM_START=$(read_file "$REM/count") sleep 7 - LOC_END=$(read_file $LOC/count) - REM_END=$(read_file $REM/count) + LOC_END=$(read_file "$LOC/count") + REM_END=$(read_file "$REM/count") if [[ $LOC_START == $LOC_END ]] || [[ $REM_START == $REM_END ]]; then echo "Ping pong counter not incrementing!" >&2 @@ -300,19 +461,19 @@ function perf_test() WITH="without" fi - _modprobe ntb_perf run_order=$PERF_RUN_ORDER \ + _modprobe ntb_perf total_order=$PERF_RUN_ORDER \ max_mw_size=$MAX_MW_SIZE use_dma=$USE_DMA echo "Running local perf test $WITH DMA" - write_file "" $LOCAL_PERF/run + write_file "$LOCAL_PIDX" "$LOCAL_PERF/run" echo -n " " - read_file $LOCAL_PERF/run + read_file "$LOCAL_PERF/run" echo " Passed" echo "Running remote perf test $WITH DMA" - write_file "" $REMOTE_PERF/run + write_file "$REMOTE_PIDX" "$REMOTE_PERF/run" echo -n " " - read_file $REMOTE_PERF/run + read_file "$REMOTE_PERF/run" echo " Passed" _modprobe -r ntb_perf @@ -320,48 +481,44 @@ function perf_test() function ntb_tool_tests() { - LOCAL_TOOL=$DEBUGFS/ntb_tool/$LOCAL_DEV - REMOTE_TOOL=$REMOTE_HOST:$DEBUGFS/ntb_tool/$REMOTE_DEV + LOCAL_TOOL="$DEBUGFS/ntb_tool/$LOCAL_DEV" + REMOTE_TOOL="$REMOTE_HOST:$DEBUGFS/ntb_tool/$REMOTE_DEV" echo "Starting ntb_tool tests..." _modprobe ntb_tool - write_file Y $LOCAL_TOOL/link_event - write_file Y $REMOTE_TOOL/link_event + port_test "$LOCAL_TOOL" "$REMOTE_TOOL" - link_test $LOCAL_TOOL $REMOTE_TOOL - link_test $REMOTE_TOOL $LOCAL_TOOL + LOCAL_PEER_TOOL="$LOCAL_TOOL/peer$LOCAL_PIDX" + REMOTE_PEER_TOOL="$REMOTE_TOOL/peer$REMOTE_PIDX" + + link_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL" + link_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL" #Ensure the link is up on both sides before continuing - write_file Y $LOCAL_TOOL/link_event - write_file Y $REMOTE_TOOL/link_event + write_file "Y" "$LOCAL_PEER_TOOL/link_event" + write_file "Y" "$REMOTE_PEER_TOOL/link_event" - for PEER_TRANS in $(ls $LOCAL_TOOL/peer_trans*); do - PT=$(basename $PEER_TRANS) - write_file $MW_SIZE $LOCAL_TOOL/$PT - write_file $MW_SIZE $REMOTE_TOOL/$PT - done + doorbell_test "$LOCAL_TOOL" "$REMOTE_TOOL" + doorbell_test "$REMOTE_TOOL" "$LOCAL_TOOL" - doorbell_test $LOCAL_TOOL $REMOTE_TOOL - doorbell_test $REMOTE_TOOL $LOCAL_TOOL - scratchpad_test $LOCAL_TOOL $REMOTE_TOOL - scratchpad_test $REMOTE_TOOL $LOCAL_TOOL + scratchpad_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL" + scratchpad_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL" - for MW in $(ls $LOCAL_TOOL/mw*); do - MW=$(basename $MW) + message_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL" + message_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL" - mw_test $MW $LOCAL_TOOL $REMOTE_TOOL - mw_test $MW $REMOTE_TOOL $LOCAL_TOOL - done + mw_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL" + mw_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL" _modprobe -r ntb_tool } function ntb_pingpong_tests() { - LOCAL_PP=$DEBUGFS/ntb_pingpong/$LOCAL_DEV - REMOTE_PP=$REMOTE_HOST:$DEBUGFS/ntb_pingpong/$REMOTE_DEV + LOCAL_PP="$DEBUGFS/ntb_pingpong/$LOCAL_DEV" + REMOTE_PP="$REMOTE_HOST:$DEBUGFS/ntb_pingpong/$REMOTE_DEV" echo "Starting ntb_pingpong tests..." @@ -374,8 +531,8 @@ function ntb_pingpong_tests() function ntb_perf_tests() { - LOCAL_PERF=$DEBUGFS/ntb_perf/$LOCAL_DEV - REMOTE_PERF=$REMOTE_HOST:$DEBUGFS/ntb_perf/$REMOTE_DEV + LOCAL_PERF="$DEBUGFS/ntb_perf/$LOCAL_DEV" + REMOTE_PERF="$REMOTE_HOST:$DEBUGFS/ntb_perf/$REMOTE_DEV" echo "Starting ntb_perf tests..." diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile index 16b22004e75f..083a48a008b4 100644 --- a/tools/testing/selftests/powerpc/alignment/Makefile +++ b/tools/testing/selftests/powerpc/alignment/Makefile @@ -1,4 +1,5 @@ -TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned paste_last_unaligned +TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned \ + paste_last_unaligned alignment_handler include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c new file mode 100644 index 000000000000..0f2698f9fd6d --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -0,0 +1,491 @@ +/* + * Test the powerpc alignment handler on POWER8/POWER9 + * + * Copyright (C) 2017 IBM Corporation (Michael Neuling, Andrew Donnellan) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * This selftest exercises the powerpc alignment fault handler. + * + * We create two sets of source and destination buffers, one in regular memory, + * the other cache-inhibited (we use /dev/fb0 for this). + * + * We initialise the source buffers, then use whichever set of load/store + * instructions is under test to copy bytes from the source buffers to the + * destination buffers. For the regular buffers, these instructions will + * execute normally. For the cache-inhibited buffers, these instructions + * will trap and cause an alignment fault, and the alignment fault handler + * will emulate the particular instruction under test. We then compare the + * destination buffers to ensure that the native and emulated cases give the + * same result. + * + * TODO: + * - Any FIXMEs below + * - Test VSX regs < 32 and > 32 + * - Test all loads and stores + * - Check update forms do update register + * - Test alignment faults over page boundary + * + * Some old binutils may not support all the instructions. + */ + + +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <getopt.h> +#include <setjmp.h> +#include <signal.h> + +#include "utils.h" + +int bufsize; +int debug; +int testing; +volatile int gotsig; + +void sighandler(int sig, siginfo_t *info, void *ctx) +{ + ucontext_t *ucp = ctx; + + if (!testing) { + signal(sig, SIG_DFL); + kill(0, sig); + } + gotsig = sig; +#ifdef __powerpc64__ + ucp->uc_mcontext.gp_regs[PT_NIP] += 4; +#else + ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4; +#endif +} + +#define XFORM(reg, n) " " #reg " ,%"#n",%2 ;" +#define DFORM(reg, n) " " #reg " ,0(%"#n") ;" + +#define TEST(name, ld_op, st_op, form, ld_reg, st_reg) \ + void test_##name(char *s, char *d) \ + { \ + asm volatile( \ + #ld_op form(ld_reg, 0) \ + #st_op form(st_reg, 1) \ + :: "r"(s), "r"(d), "r"(0) \ + : "memory", "vs0", "vs32", "r31"); \ + } \ + rc |= do_test(#name, test_##name) + +#define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32) +#define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32) +#define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32) +#define STORE_VSX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 32) +#define LOAD_VMX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 0, 32) +#define STORE_VMX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 0) +#define LOAD_VMX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 0, 32) +#define STORE_VMX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 0) + +#define LOAD_XFORM_TEST(op) TEST(op, op, stdx, XFORM, 31, 31) +#define STORE_XFORM_TEST(op) TEST(op, ldx, op, XFORM, 31, 31) +#define LOAD_DFORM_TEST(op) TEST(op, op, std, DFORM, 31, 31) +#define STORE_DFORM_TEST(op) TEST(op, ld, op, DFORM, 31, 31) + +#define LOAD_FLOAT_DFORM_TEST(op) TEST(op, op, stfd, DFORM, 0, 0) +#define STORE_FLOAT_DFORM_TEST(op) TEST(op, lfd, op, DFORM, 0, 0) +#define LOAD_FLOAT_XFORM_TEST(op) TEST(op, op, stfdx, XFORM, 0, 0) +#define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0) + + +/* FIXME: Unimplemented tests: */ +// STORE_DFORM_TEST(stq) /* FIXME: need two registers for quad */ +// STORE_DFORM_TEST(stswi) /* FIXME: string instruction */ + +// STORE_XFORM_TEST(stwat) /* AMO can't emulate or run on CI */ +// STORE_XFORM_TEST(stdat) /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */ + + +/* preload byte by byte */ +void preload_data(void *dst, int offset, int width) +{ + char *c = dst; + int i; + + c += offset; + + for (i = 0 ; i < width ; i++) + c[i] = i; +} + +int test_memcpy(void *dst, void *src, int size, int offset, + void (*test_func)(char *, char *)) +{ + char *s, *d; + + s = src; + s += offset; + d = dst; + d += offset; + + assert(size == 16); + gotsig = 0; + testing = 1; + + test_func(s, d); /* run the actual test */ + + testing = 0; + if (gotsig) { + if (debug) + printf(" Got signal %i\n", gotsig); + return 1; + } + return 0; +} + +void dumpdata(char *s1, char *s2, int n, char *test_name) +{ + int i; + + printf(" %s: unexpected result:\n", test_name); + printf(" mem:"); + for (i = 0; i < n; i++) + printf(" %02x", s1[i]); + printf("\n"); + printf(" ci: "); + for (i = 0; i < n; i++) + printf(" %02x", s2[i]); + printf("\n"); +} + +int test_memcmp(void *s1, void *s2, int n, int offset, char *test_name) +{ + char *s1c, *s2c; + + s1c = s1; + s1c += offset; + s2c = s2; + s2c += offset; + + if (memcmp(s1c, s2c, n)) { + if (debug) { + printf("\n Compare failed. Offset:%i length:%i\n", + offset, n); + dumpdata(s1c, s2c, n, test_name); + } + return 1; + } + return 0; +} + +/* + * Do two memcpy tests using the same instructions. One cachable + * memory and the other doesn't. + */ +int do_test(char *test_name, void (*test_func)(char *, char *)) +{ + int offset, width, fd, rc = 0, r; + void *mem0, *mem1, *ci0, *ci1; + + printf("\tDoing %s:\t", test_name); + + fd = open("/dev/fb0", O_RDWR); + if (fd < 0) { + printf("\n"); + perror("Can't open /dev/fb0"); + SKIP_IF(1); + } + + ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED, + fd, 0x0); + ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED, + fd, bufsize); + if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) { + printf("\n"); + perror("mmap failed"); + SKIP_IF(1); + } + + rc = posix_memalign(&mem0, bufsize, bufsize); + if (rc) { + printf("\n"); + return rc; + } + + rc = posix_memalign(&mem1, bufsize, bufsize); + if (rc) { + printf("\n"); + free(mem0); + return rc; + } + + /* offset = 0 no alignment fault, so skip */ + for (offset = 1; offset < 16; offset++) { + width = 16; /* vsx == 16 bytes */ + r = 0; + + /* load pattern into memory byte by byte */ + preload_data(ci0, offset, width); + preload_data(mem0, offset, width); // FIXME: remove?? + memcpy(ci0, mem0, bufsize); + memcpy(ci1, mem1, bufsize); /* initialise output to the same */ + + /* sanity check */ + test_memcmp(mem0, ci0, width, offset, test_name); + + r |= test_memcpy(ci1, ci0, width, offset, test_func); + r |= test_memcpy(mem1, mem0, width, offset, test_func); + if (r && !debug) { + printf("FAILED: Got signal"); + break; + } + + r |= test_memcmp(mem1, ci1, width, offset, test_name); + rc |= r; + if (r && !debug) { + printf("FAILED: Wrong Data"); + break; + } + } + if (!r) + printf("PASSED"); + printf("\n"); + + munmap(ci0, bufsize); + munmap(ci1, bufsize); + free(mem0); + free(mem1); + + return rc; +} + +int test_alignment_handler_vsx_206(void) +{ + int rc = 0; + + printf("VSX: 2.06B\n"); + LOAD_VSX_XFORM_TEST(lxvd2x); + LOAD_VSX_XFORM_TEST(lxvw4x); + LOAD_VSX_XFORM_TEST(lxsdx); + LOAD_VSX_XFORM_TEST(lxvdsx); + STORE_VSX_XFORM_TEST(stxvd2x); + STORE_VSX_XFORM_TEST(stxvw4x); + STORE_VSX_XFORM_TEST(stxsdx); + return rc; +} + +int test_alignment_handler_vsx_207(void) +{ + int rc = 0; + + printf("VSX: 2.07B\n"); + LOAD_VSX_XFORM_TEST(lxsspx); + LOAD_VSX_XFORM_TEST(lxsiwax); + LOAD_VSX_XFORM_TEST(lxsiwzx); + STORE_VSX_XFORM_TEST(stxsspx); + STORE_VSX_XFORM_TEST(stxsiwx); + return rc; +} + +int test_alignment_handler_vsx_300(void) +{ + int rc = 0; + + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); + printf("VSX: 3.00B\n"); + LOAD_VMX_DFORM_TEST(lxsd); + LOAD_VSX_XFORM_TEST(lxsibzx); + LOAD_VSX_XFORM_TEST(lxsihzx); + LOAD_VMX_DFORM_TEST(lxssp); + LOAD_VSX_DFORM_TEST(lxv); + LOAD_VSX_XFORM_TEST(lxvb16x); + LOAD_VSX_XFORM_TEST(lxvh8x); + LOAD_VSX_XFORM_TEST(lxvx); + LOAD_VSX_XFORM_TEST(lxvwsx); + LOAD_VSX_XFORM_TEST(lxvl); + LOAD_VSX_XFORM_TEST(lxvll); + STORE_VMX_DFORM_TEST(stxsd); + STORE_VSX_XFORM_TEST(stxsibx); + STORE_VSX_XFORM_TEST(stxsihx); + STORE_VMX_DFORM_TEST(stxssp); + STORE_VSX_DFORM_TEST(stxv); + STORE_VSX_XFORM_TEST(stxvb16x); + STORE_VSX_XFORM_TEST(stxvh8x); + STORE_VSX_XFORM_TEST(stxvx); + STORE_VSX_XFORM_TEST(stxvl); + STORE_VSX_XFORM_TEST(stxvll); + return rc; +} + +int test_alignment_handler_integer(void) +{ + int rc = 0; + + printf("Integer\n"); + LOAD_DFORM_TEST(lbz); + LOAD_DFORM_TEST(lbzu); + LOAD_XFORM_TEST(lbzx); + LOAD_XFORM_TEST(lbzux); + LOAD_DFORM_TEST(lhz); + LOAD_DFORM_TEST(lhzu); + LOAD_XFORM_TEST(lhzx); + LOAD_XFORM_TEST(lhzux); + LOAD_DFORM_TEST(lha); + LOAD_DFORM_TEST(lhau); + LOAD_XFORM_TEST(lhax); + LOAD_XFORM_TEST(lhaux); + LOAD_XFORM_TEST(lhbrx); + LOAD_DFORM_TEST(lwz); + LOAD_DFORM_TEST(lwzu); + LOAD_XFORM_TEST(lwzx); + LOAD_XFORM_TEST(lwzux); + LOAD_DFORM_TEST(lwa); + LOAD_XFORM_TEST(lwax); + LOAD_XFORM_TEST(lwaux); + LOAD_XFORM_TEST(lwbrx); + LOAD_DFORM_TEST(ld); + LOAD_DFORM_TEST(ldu); + LOAD_XFORM_TEST(ldx); + LOAD_XFORM_TEST(ldux); + LOAD_XFORM_TEST(ldbrx); + LOAD_DFORM_TEST(lmw); + STORE_DFORM_TEST(stb); + STORE_XFORM_TEST(stbx); + STORE_DFORM_TEST(stbu); + STORE_XFORM_TEST(stbux); + STORE_DFORM_TEST(sth); + STORE_XFORM_TEST(sthx); + STORE_DFORM_TEST(sthu); + STORE_XFORM_TEST(sthux); + STORE_XFORM_TEST(sthbrx); + STORE_DFORM_TEST(stw); + STORE_XFORM_TEST(stwx); + STORE_DFORM_TEST(stwu); + STORE_XFORM_TEST(stwux); + STORE_XFORM_TEST(stwbrx); + STORE_DFORM_TEST(std); + STORE_XFORM_TEST(stdx); + STORE_DFORM_TEST(stdu); + STORE_XFORM_TEST(stdux); + STORE_XFORM_TEST(stdbrx); + STORE_DFORM_TEST(stmw); + return rc; +} + +int test_alignment_handler_vmx(void) +{ + int rc = 0; + + printf("VMX\n"); + LOAD_VMX_XFORM_TEST(lvx); + + /* + * FIXME: These loads only load part of the register, so our + * testing method doesn't work. Also they don't take alignment + * faults, so it's kinda pointless anyway + * + LOAD_VMX_XFORM_TEST(lvebx) + LOAD_VMX_XFORM_TEST(lvehx) + LOAD_VMX_XFORM_TEST(lvewx) + LOAD_VMX_XFORM_TEST(lvxl) + */ + STORE_VMX_XFORM_TEST(stvx); + STORE_VMX_XFORM_TEST(stvebx); + STORE_VMX_XFORM_TEST(stvehx); + STORE_VMX_XFORM_TEST(stvewx); + STORE_VMX_XFORM_TEST(stvxl); + return rc; +} + +int test_alignment_handler_fp(void) +{ + int rc = 0; + + printf("Floating point\n"); + LOAD_FLOAT_DFORM_TEST(lfd); + LOAD_FLOAT_XFORM_TEST(lfdx); + LOAD_FLOAT_DFORM_TEST(lfdp); + LOAD_FLOAT_XFORM_TEST(lfdpx); + LOAD_FLOAT_DFORM_TEST(lfdu); + LOAD_FLOAT_XFORM_TEST(lfdux); + LOAD_FLOAT_DFORM_TEST(lfs); + LOAD_FLOAT_XFORM_TEST(lfsx); + LOAD_FLOAT_DFORM_TEST(lfsu); + LOAD_FLOAT_XFORM_TEST(lfsux); + LOAD_FLOAT_XFORM_TEST(lfiwzx); + LOAD_FLOAT_XFORM_TEST(lfiwax); + STORE_FLOAT_DFORM_TEST(stfd); + STORE_FLOAT_XFORM_TEST(stfdx); + STORE_FLOAT_DFORM_TEST(stfdp); + STORE_FLOAT_XFORM_TEST(stfdpx); + STORE_FLOAT_DFORM_TEST(stfdu); + STORE_FLOAT_XFORM_TEST(stfdux); + STORE_FLOAT_DFORM_TEST(stfs); + STORE_FLOAT_XFORM_TEST(stfsx); + STORE_FLOAT_DFORM_TEST(stfsu); + STORE_FLOAT_XFORM_TEST(stfsux); + STORE_FLOAT_XFORM_TEST(stfiwx); + + return rc; +} + +void usage(char *prog) +{ + printf("Usage: %s [options]\n", prog); + printf(" -d Enable debug error output\n"); + printf("\n"); + printf("This test requires a POWER8 or POWER9 CPU and a usable "); + printf("framebuffer at /dev/fb0.\n"); +} + +int main(int argc, char *argv[]) +{ + + struct sigaction sa; + int rc = 0; + int option = 0; + + while ((option = getopt(argc, argv, "d")) != -1) { + switch (option) { + case 'd': + debug++; + break; + default: + usage(argv[0]); + exit(1); + } + } + + bufsize = getpagesize(); + + sa.sa_sigaction = sighandler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL) == -1 + || sigaction(SIGBUS, &sa, NULL) == -1 + || sigaction(SIGILL, &sa, NULL) == -1) { + perror("sigaction"); + exit(1); + } + + rc |= test_harness(test_alignment_handler_vsx_206, + "test_alignment_handler_vsx_206"); + rc |= test_harness(test_alignment_handler_vsx_207, + "test_alignment_handler_vsx_207"); + rc |= test_harness(test_alignment_handler_vsx_300, + "test_alignment_handler_vsx_300"); + rc |= test_harness(test_alignment_handler_integer, + "test_alignment_handler_integer"); + rc |= test_harness(test_alignment_handler_vmx, + "test_alignment_handler_vmx"); + rc |= test_harness(test_alignment_handler_fp, + "test_alignment_handler_fp"); + return rc; +} diff --git a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c index 8d084a2d6e74..7a0a462a2272 100644 --- a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c +++ b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c @@ -7,17 +7,34 @@ #include <stdlib.h> #include <sys/mman.h> #include <time.h> +#include <getopt.h> #include "utils.h" #define ITERATIONS 5000000 -#define MEMSIZE (128 * 1024 * 1024) +#define MEMSIZE (1UL << 27) +#define PAGE_SIZE (1UL << 16) +#define CHUNK_COUNT (MEMSIZE/PAGE_SIZE) + +static int pg_fault; +static int iterations = ITERATIONS; + +static struct option options[] = { + { "pgfault", no_argument, &pg_fault, 1 }, + { "iterations", required_argument, 0, 'i' }, + { 0, }, +}; + +static void usage(void) +{ + printf("mmap_bench <--pgfault> <--iterations count>\n"); +} int test_mmap(void) { struct timespec ts_start, ts_end; - unsigned long i = ITERATIONS; + unsigned long i = iterations; clock_gettime(CLOCK_MONOTONIC, &ts_start); @@ -25,6 +42,11 @@ int test_mmap(void) char *c = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); FAIL_IF(c == MAP_FAILED); + if (pg_fault) { + int count; + for (count = 0; count < CHUNK_COUNT; count++) + c[count << 16] = 'c'; + } munmap(c, MEMSIZE); } @@ -35,7 +57,32 @@ int test_mmap(void) return 0; } -int main(void) +int main(int argc, char *argv[]) { + signed char c; + while (1) { + int option_index = 0; + + c = getopt_long(argc, argv, "", options, &option_index); + + if (c == -1) + break; + + switch (c) { + case 0: + if (options[option_index].flag != 0) + break; + + usage(); + exit(1); + break; + case 'i': + iterations = atoi(optarg); + break; + default: + usage(); + exit(1); + } + } return test_harness(test_mmap, "mmap_bench"); } diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index e715a3f2fbf4..7d7c42ed6de9 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -1,4 +1,5 @@ hugetlb_vs_thp_test subpage_prot tempfile -prot_sao
\ No newline at end of file +prot_sao +segv_errors
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index bf315bcbe663..8ebbe96d80a8 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -2,7 +2,7 @@ noarg: $(MAKE) -C ../ -TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao +TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors TEST_GEN_FILES := tempfile include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/mm/segv_errors.c b/tools/testing/selftests/powerpc/mm/segv_errors.c new file mode 100644 index 000000000000..06ae76ee3ea1 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/segv_errors.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2017 John Sperbeck + * + * Test that an access to a mapped but inaccessible area causes a SEGV and + * reports si_code == SEGV_ACCERR. + */ + +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include <sys/mman.h> +#include <assert.h> +#include <ucontext.h> + +#include "utils.h" + +static bool faulted; +static int si_code; + +static void segv_handler(int n, siginfo_t *info, void *ctxt_v) +{ + ucontext_t *ctxt = (ucontext_t *)ctxt_v; + struct pt_regs *regs = ctxt->uc_mcontext.regs; + + faulted = true; + si_code = info->si_code; + regs->nip += 4; +} + +int test_segv_errors(void) +{ + struct sigaction act = { + .sa_sigaction = segv_handler, + .sa_flags = SA_SIGINFO, + }; + char c, *p = NULL; + + p = mmap(NULL, getpagesize(), 0, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + FAIL_IF(p == MAP_FAILED); + + FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0); + + faulted = false; + si_code = 0; + + /* + * We just need a compiler barrier, but mb() works and has the nice + * property of being easy to spot in the disassembly. + */ + mb(); + c = *p; + mb(); + + FAIL_IF(!faulted); + FAIL_IF(si_code != SEGV_ACCERR); + + faulted = false; + si_code = 0; + + mb(); + *p = c; + mb(); + + FAIL_IF(!faulted); + FAIL_IF(si_code != SEGV_ACCERR); + + return 0; +} + +int main(void) +{ + return test_harness(test_segv_errors, "segv_errors"); +} diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c index 0df3c23b7888..277dade1b382 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c @@ -79,8 +79,8 @@ trans: : [res] "=r" (result), [texasr] "=r" (texasr) : [fp_load] "r" (fp_load), [fp_load_ckpt] "r" (fp_load_ckpt), [sprn_texasr] "i" (SPRN_TEXASR) - : "memory", "r0", "r1", "r2", "r3", "r4", - "r8", "r9", "r10", "r11" + : "memory", "r0", "r1", "r3", "r4", + "r7", "r8", "r9", "r10", "r11" ); if (result) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c index 94e57cb89769..51427a2465f6 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c @@ -76,8 +76,7 @@ trans: : [tfhar] "=r" (tfhar), [res] "=r" (result), [texasr] "=r" (texasr), [cptr1] "=r" (cptr1) : [sprn_texasr] "i" (SPRN_TEXASR) - : "memory", "r0", "r1", "r2", "r3", "r4", - "r8", "r9", "r10", "r11", "r31" + : "memory", "r0", "r8", "r31" ); /* There are 2 32bit instructions before tbegin. */ diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c index b4081e2b22d5..17c23cabac3e 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c @@ -67,7 +67,7 @@ trans: : [res] "=r" (result), [texasr] "=r" (texasr) : [fp_load] "r" (fp_load), [fp_load_ckpt] "r" (fp_load_ckpt), [sprn_texasr] "i" (SPRN_TEXASR), [cptr1] "r" (&cptr[1]) - : "memory", "r0", "r1", "r2", "r3", "r4", + : "memory", "r0", "r1", "r3", "r4", "r7", "r8", "r9", "r10", "r11" ); diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 241a4a4ee0e4..bb90d4b79524 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -13,3 +13,4 @@ tm-signal-context-chk-vmx tm-signal-context-chk-vsx tm-vmx-unavail tm-unavailable +tm-trap diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 8ed6f8c57230..a23453943ad2 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -3,7 +3,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu tm-signal-context-chk-vmx tm-signal-context-chk-vsx TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ - tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable \ + tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \ $(SIGNAL_CONTEXT_CHK_TESTS) include ../../lib.mk @@ -18,6 +18,7 @@ $(OUTPUT)/tm-tmspr: CFLAGS += -pthread $(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64 $(OUTPUT)/tm-resched-dscr: ../pmu/lib.o $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx +$(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS)) $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c new file mode 100644 index 000000000000..5d92c23ee6cb --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-trap.c @@ -0,0 +1,329 @@ +/* + * Copyright 2017, Gustavo Romero, IBM Corp. + * Licensed under GPLv2. + * + * Check if thread endianness is flipped inadvertently to BE on trap + * caught in TM whilst MSR.FP and MSR.VEC are zero (i.e. just after + * load_fp and load_vec overflowed). + * + * The issue can be checked on LE machines simply by zeroing load_fp + * and load_vec and then causing a trap in TM. Since the endianness + * changes to BE on return from the signal handler, 'nop' is + * thread as an illegal instruction in following sequence: + * tbegin. + * beq 1f + * trap + * tend. + * 1: nop + * + * However, although the issue is also present on BE machines, it's a + * bit trickier to check it on BE machines because MSR.LE bit is set + * to zero which determines a BE endianness that is the native + * endianness on BE machines, so nothing notably critical happens, + * i.e. no illegal instruction is observed immediately after returning + * from the signal handler (as it happens on LE machines). Thus to test + * it on BE machines LE endianness is forced after a first trap and then + * the endianness is verified on subsequent traps to determine if the + * endianness "flipped back" to the native endianness (BE). + */ + +#define _GNU_SOURCE +#include <error.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <htmintrin.h> +#include <inttypes.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdbool.h> + +#include "tm.h" +#include "utils.h" + +#define pr_error(error_code, format, ...) \ + error_at_line(1, error_code, __FILE__, __LINE__, format, ##__VA_ARGS__) + +#define MSR_LE 1UL +#define LE 1UL + +pthread_t t0_ping; +pthread_t t1_pong; + +int exit_from_pong; + +int trap_event; +int le; + +bool success; + +void trap_signal_handler(int signo, siginfo_t *si, void *uc) +{ + ucontext_t *ucp = uc; + uint64_t thread_endianness; + + /* Get thread endianness: extract bit LE from MSR */ + thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR]; + + /*** + * Little-Endian Machine + */ + + if (le) { + /* First trap event */ + if (trap_event == 0) { + /* Do nothing. Since it is returning from this trap + * event that endianness is flipped by the bug, so just + * let the process return from the signal handler and + * check on the second trap event if endianness is + * flipped or not. + */ + } + /* Second trap event */ + else if (trap_event == 1) { + /* + * Since trap was caught in TM on first trap event, if + * endianness was still LE (not flipped inadvertently) + * after returning from the signal handler instruction + * (1) is executed (basically a 'nop'), as it's located + * at address of tbegin. +4 (rollback addr). As (1) on + * LE endianness does in effect nothing, instruction (2) + * is then executed again as 'trap', generating a second + * trap event (note that in that case 'trap' is caught + * not in transacional mode). On te other hand, if after + * the return from the signal handler the endianness in- + * advertently flipped, instruction (1) is tread as a + * branch instruction, i.e. b .+8, hence instruction (3) + * and (4) are executed (tbegin.; trap;) and we get sim- + * ilaly on the trap signal handler, but now in TM mode. + * Either way, it's now possible to check the MSR LE bit + * once in the trap handler to verify if endianness was + * flipped or not after the return from the second trap + * event. If endianness is flipped, the bug is present. + * Finally, getting a trap in TM mode or not is just + * worth noting because it affects the math to determine + * the offset added to the NIP on return: the NIP for a + * trap caught in TM is the rollback address, i.e. the + * next instruction after 'tbegin.', whilst the NIP for + * a trap caught in non-transactional mode is the very + * same address of the 'trap' instruction that generated + * the trap event. + */ + + if (thread_endianness == LE) { + /* Go to 'success', i.e. instruction (6) */ + ucp->uc_mcontext.gp_regs[PT_NIP] += 16; + } else { + /* + * Thread endianness is BE, so it flipped + * inadvertently. Thus we flip back to LE and + * set NIP to go to 'failure', instruction (5). + */ + ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL; + ucp->uc_mcontext.gp_regs[PT_NIP] += 4; + } + } + } + + /*** + * Big-Endian Machine + */ + + else { + /* First trap event */ + if (trap_event == 0) { + /* + * Force thread endianness to be LE. Instructions (1), + * (3), and (4) will be executed, generating a second + * trap in TM mode. + */ + ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL; + } + /* Second trap event */ + else if (trap_event == 1) { + /* + * Do nothing. If bug is present on return from this + * second trap event endianness will flip back "automat- + * ically" to BE, otherwise thread endianness will + * continue to be LE, just as it was set above. + */ + } + /* A third trap event */ + else { + /* + * Once here it means that after returning from the sec- + * ond trap event instruction (4) (trap) was executed + * as LE, generating a third trap event. In that case + * endianness is still LE as set on return from the + * first trap event, hence no bug. Otherwise, bug + * flipped back to BE on return from the second trap + * event and instruction (4) was executed as 'tdi' (so + * basically a 'nop') and branch to 'failure' in + * instruction (5) was taken to indicate failure and we + * never get here. + */ + + /* + * Flip back to BE and go to instruction (6), i.e. go to + * 'success'. + */ + ucp->uc_mcontext.gp_regs[PT_MSR] &= ~1UL; + ucp->uc_mcontext.gp_regs[PT_NIP] += 8; + } + } + + trap_event++; +} + +void usr1_signal_handler(int signo, siginfo_t *si, void *not_used) +{ + /* Got a USR1 signal from ping(), so just tell pong() to exit */ + exit_from_pong = 1; +} + +void *ping(void *not_used) +{ + uint64_t i; + + trap_event = 0; + + /* + * Wait an amount of context switches so load_fp and load_vec overflows + * and MSR_[FP|VEC|V] is 0. + */ + for (i = 0; i < 1024*1024*512; i++) + ; + + asm goto( + /* + * [NA] means "Native Endianness", i.e. it tells how a + * instruction is executed on machine's native endianness (in + * other words, native endianness matches kernel endianness). + * [OP] means "Opposite Endianness", i.e. on a BE machine, it + * tells how a instruction is executed as a LE instruction; con- + * versely, on a LE machine, it tells how a instruction is + * executed as a BE instruction. When [NA] is omitted, it means + * that the native interpretation of a given instruction is not + * relevant for the test. Likewise when [OP] is omitted. + */ + + " tbegin. ;" /* (0) tbegin. [NA] */ + " tdi 0, 0, 0x48;" /* (1) nop [NA]; b (3) [OP] */ + " trap ;" /* (2) trap [NA] */ + ".long 0x1D05007C;" /* (3) tbegin. [OP] */ + ".long 0x0800E07F;" /* (4) trap [OP]; nop [NA] */ + " b %l[failure] ;" /* (5) b [NA]; MSR.LE flipped (bug) */ + " b %l[success] ;" /* (6) b [NA]; MSR.LE did not flip (ok)*/ + + : : : : failure, success); + +failure: + success = false; + goto exit_from_ping; + +success: + success = true; + +exit_from_ping: + /* Tell pong() to exit before leaving */ + pthread_kill(t1_pong, SIGUSR1); + return NULL; +} + +void *pong(void *not_used) +{ + while (!exit_from_pong) + /* + * Induce context switches on ping() thread + * until ping() finishes its job and signs + * to exit from this loop. + */ + sched_yield(); + + return NULL; +} + +int tm_trap_test(void) +{ + uint16_t k = 1; + + int rc; + + pthread_attr_t attr; + cpu_set_t cpuset; + + struct sigaction trap_sa; + + trap_sa.sa_flags = SA_SIGINFO; + trap_sa.sa_sigaction = trap_signal_handler; + sigaction(SIGTRAP, &trap_sa, NULL); + + struct sigaction usr1_sa; + + usr1_sa.sa_flags = SA_SIGINFO; + usr1_sa.sa_sigaction = usr1_signal_handler; + sigaction(SIGUSR1, &usr1_sa, NULL); + + /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */ + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + + /* Init pthread attribute */ + rc = pthread_attr_init(&attr); + if (rc) + pr_error(rc, "pthread_attr_init()"); + + /* + * Bind thread ping() and pong() both to CPU 0 so they ping-pong and + * speed up context switches on ping() thread, speeding up the load_fp + * and load_vec overflow. + */ + rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); + if (rc) + pr_error(rc, "pthread_attr_setaffinity()"); + + /* Figure out the machine endianness */ + le = (int) *(uint8_t *)&k; + + printf("%s machine detected. Checking if endianness flips %s", + le ? "Little-Endian" : "Big-Endian", + "inadvertently on trap in TM... "); + + rc = fflush(0); + if (rc) + pr_error(rc, "fflush()"); + + /* Launch ping() */ + rc = pthread_create(&t0_ping, &attr, ping, NULL); + if (rc) + pr_error(rc, "pthread_create()"); + + exit_from_pong = 0; + + /* Launch pong() */ + rc = pthread_create(&t1_pong, &attr, pong, NULL); + if (rc) + pr_error(rc, "pthread_create()"); + + rc = pthread_join(t0_ping, NULL); + if (rc) + pr_error(rc, "pthread_join()"); + + rc = pthread_join(t1_pong, NULL); + if (rc) + pr_error(rc, "pthread_join()"); + + if (success) { + printf("no.\n"); /* no, endianness did not flip inadvertently */ + return EXIT_SUCCESS; + } + + printf("yes!\n"); /* yes, endianness did flip inadvertently */ + return EXIT_FAILURE; +} + +int main(int argc, char **argv) +{ + return test_harness(tm_trap_test, "tm_trap_test"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c index 96c37f84ce54..e6a0fad2bfd0 100644 --- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c +++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c @@ -15,6 +15,7 @@ */ #define _GNU_SOURCE +#include <error.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -33,6 +34,11 @@ #define VSX_UNA_EXCEPTION 2 #define NUM_EXCEPTIONS 3 +#define err_at_line(status, errnum, format, ...) \ + error_at_line(status, errnum, __FILE__, __LINE__, format ##__VA_ARGS__) + +#define pr_warn(code, format, ...) err_at_line(0, code, format, ##__VA_ARGS__) +#define pr_err(code, format, ...) err_at_line(1, code, format, ##__VA_ARGS__) struct Flags { int touch_fp; @@ -303,10 +309,19 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr) * checking if the failure cause is the one we expect. */ do { + int rc; + /* Bind 'ping' to CPU 0, as specified in 'attr'. */ - pthread_create(&t0, attr, ping, (void *) &flags); - pthread_setname_np(t0, "ping"); - pthread_join(t0, &ret_value); + rc = pthread_create(&t0, attr, ping, (void *) &flags); + if (rc) + pr_err(rc, "pthread_create()"); + rc = pthread_setname_np(t0, "ping"); + if (rc) + pr_warn(rc, "pthread_setname_np"); + rc = pthread_join(t0, &ret_value); + if (rc) + pr_err(rc, "pthread_join"); + retries--; } while (ret_value != NULL && retries); @@ -320,7 +335,7 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr) int main(int argc, char **argv) { - int exception; /* FP = 0, VEC = 1, VSX = 2 */ + int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */ pthread_t t1; pthread_attr_t attr; cpu_set_t cpuset; @@ -330,13 +345,23 @@ int main(int argc, char **argv) CPU_SET(0, &cpuset); /* Init pthread attribute. */ - pthread_attr_init(&attr); + rc = pthread_attr_init(&attr); + if (rc) + pr_err(rc, "pthread_attr_init()"); /* Set CPU 0 mask into the pthread attribute. */ - pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); - - pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL); - pthread_setname_np(t1, "pong"); /* Name it for systemtap convenience */ + rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); + if (rc) + pr_err(rc, "pthread_attr_setaffinity_np()"); + + rc = pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL); + if (rc) + pr_err(rc, "pthread_create()"); + + /* Name it for systemtap convenience */ + rc = pthread_setname_np(t1, "pong"); + if (rc) + pr_warn(rc, "pthread_create()"); flags.result = 0; diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 24dbf634e2dd..0b457e8e0f0c 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1717,7 +1717,7 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, if (nr == __NR_getpid) change_syscall(_metadata, tracee, __NR_getppid); - if (nr == __NR_open) + if (nr == __NR_openat) change_syscall(_metadata, tracee, -1); } @@ -1792,7 +1792,7 @@ TEST_F(TRACE_syscall, ptrace_syscall_dropped) true); /* Tracer should skip the open syscall, resulting in EPERM. */ - EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open)); + EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_openat)); } TEST_F(TRACE_syscall, syscall_allowed) diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 1ca2ee4d15b9..63c94d776e89 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -10,3 +10,4 @@ userfaultfd mlock-intersect-test mlock-random-test virtual_address_range +gup_benchmark diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 7f45806bd863..fdefa2295ddc 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -8,17 +8,18 @@ endif CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) LDLIBS = -lrt TEST_GEN_FILES = compaction_test +TEST_GEN_FILES += gup_benchmark TEST_GEN_FILES += hugepage-mmap TEST_GEN_FILES += hugepage-shm TEST_GEN_FILES += map_hugetlb +TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += mlock2-tests TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd -TEST_GEN_FILES += mlock-random-test +TEST_GEN_FILES += va_128TBswitch TEST_GEN_FILES += virtual_address_range -TEST_GEN_FILES += gup_benchmark TEST_PROGS := run_vmtests diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c index a65b016d4c13..1097f04e4d80 100644 --- a/tools/testing/selftests/vm/compaction_test.c +++ b/tools/testing/selftests/vm/compaction_test.c @@ -137,6 +137,8 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) printf("No of huge pages allocated = %d\n", (atoi(nr_hugepages))); + lseek(fd, 0, SEEK_SET); + if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages)) != strlen(initial_nr_hugepages)) { perror("Failed to write value to /proc/sys/vm/nr_hugepages\n"); diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index cc826326de87..d2561895a021 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -177,4 +177,15 @@ else echo "[PASS]" fi +echo "-----------------------------" +echo "running virtual address 128TB switch test" +echo "-----------------------------" +./va_128TBswitch +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + exit $exitcode diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c new file mode 100644 index 000000000000..e7fe734c374f --- /dev/null +++ b/tools/testing/selftests/vm/va_128TBswitch.c @@ -0,0 +1,297 @@ +/* + * + * Authors: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> + * Authors: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#ifdef __powerpc64__ +#define PAGE_SIZE (64 << 10) +/* + * This will work with 16M and 2M hugepage size + */ +#define HUGETLB_SIZE (16 << 20) +#else +#define PAGE_SIZE (4 << 10) +#define HUGETLB_SIZE (2 << 20) +#endif + +/* + * >= 128TB is the hint addr value we used to select + * large address space. + */ +#define ADDR_SWITCH_HINT (1UL << 47) +#define LOW_ADDR ((void *) (1UL << 30)) +#define HIGH_ADDR ((void *) (1UL << 48)) + +struct testcase { + void *addr; + unsigned long size; + unsigned long flags; + const char *msg; + unsigned int low_addr_required:1; + unsigned int keep_mapped:1; +}; + +static struct testcase testcases[] = { + { + /* + * If stack is moved, we could possibly allocate + * this at the requested address. + */ + .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", + .low_addr_required = 1, + }, + { + /* + * We should never allocate at the requested address or above it + * The len cross the 128TB boundary. Without MAP_FIXED + * we will always search in the lower address space. + */ + .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, (2 * PAGE_SIZE))", + .low_addr_required = 1, + }, + { + /* + * Exact mapping at 128TB, the area is free we should get that + * even without MAP_FIXED. + */ + .addr = ((void *)(ADDR_SWITCH_HINT)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", + .keep_mapped = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", + }, + { + .addr = NULL, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL)", + .low_addr_required = 1, + }, + { + .addr = LOW_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(LOW_ADDR)", + .low_addr_required = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR)", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR) again", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(HIGH_ADDR, MAP_FIXED)", + }, + { + .addr = (void *) -1, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1)", + .keep_mapped = 1, + }, + { + .addr = (void *) -1, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1) again", + }, + { + .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", + .low_addr_required = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2 * PAGE_SIZE)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE / 2), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE/2 , 2 * PAGE_SIZE)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = ((void *)(ADDR_SWITCH_HINT)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", + }, + { + .addr = (void *)(ADDR_SWITCH_HINT), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", + }, +}; + +static struct testcase hugetlb_testcases[] = { + { + .addr = NULL, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = LOW_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(LOW_ADDR, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = HIGH_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)", + }, + { + .addr = (void *) -1, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = (void *) -1, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB) again", + }, + { + .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), + .size = 2 * HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2*HUGETLB_SIZE, MAP_HUGETLB)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT), + .size = 2 * HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(ADDR_SWITCH_HINT , 2*HUGETLB_SIZE, MAP_FIXED | MAP_HUGETLB)", + }, +}; + +static int run_test(struct testcase *test, int count) +{ + void *p; + int i, ret = 0; + + for (i = 0; i < count; i++) { + struct testcase *t = test + i; + + p = mmap(t->addr, t->size, PROT_READ | PROT_WRITE, t->flags, -1, 0); + + printf("%s: %p - ", t->msg, p); + + if (p == MAP_FAILED) { + printf("FAILED\n"); + ret = 1; + continue; + } + + if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) { + printf("FAILED\n"); + ret = 1; + } else { + /* + * Do a dereference of the address returned so that we catch + * bugs in page fault handling + */ + memset(p, 0, t->size); + printf("OK\n"); + } + if (!t->keep_mapped) + munmap(p, t->size); + } + + return ret; +} + +static int supported_arch(void) +{ +#if defined(__powerpc64__) + return 1; +#elif defined(__x86_64__) + return 1; +#else + return 0; +#endif +} + +int main(int argc, char **argv) +{ + int ret; + + if (!supported_arch()) + return 0; + + ret = run_test(testcases, ARRAY_SIZE(testcases)); + if (argc == 2 && !strcmp(argv[1], "--run-hugetlb")) + ret = run_test(hugetlb_testcases, ARRAY_SIZE(hugetlb_testcases)); + return ret; +} diff --git a/tools/testing/selftests/x86/5lvl.c b/tools/testing/selftests/x86/5lvl.c deleted file mode 100644 index 2eafdcd4c2b3..000000000000 --- a/tools/testing/selftests/x86/5lvl.c +++ /dev/null @@ -1,177 +0,0 @@ -#include <stdio.h> -#include <sys/mman.h> - -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - -#define PAGE_SIZE 4096 -#define LOW_ADDR ((void *) (1UL << 30)) -#define HIGH_ADDR ((void *) (1UL << 50)) - -struct testcase { - void *addr; - unsigned long size; - unsigned long flags; - const char *msg; - unsigned int low_addr_required:1; - unsigned int keep_mapped:1; -}; - -static struct testcase testcases[] = { - { - .addr = NULL, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(NULL)", - .low_addr_required = 1, - }, - { - .addr = LOW_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(LOW_ADDR)", - .low_addr_required = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR)", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR) again", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(HIGH_ADDR, MAP_FIXED)", - }, - { - .addr = (void*) -1, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1)", - .keep_mapped = 1, - }, - { - .addr = (void*) -1, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1) again", - }, - { - .addr = (void *)((1UL << 47) - PAGE_SIZE), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap((1UL << 47), 2 * PAGE_SIZE)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)((1UL << 47) - PAGE_SIZE / 2), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap((1UL << 47), 2 * PAGE_SIZE / 2)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)((1UL << 47) - PAGE_SIZE), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap((1UL << 47) - PAGE_SIZE, 2 * PAGE_SIZE, MAP_FIXED)", - }, - { - .addr = NULL, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(NULL, MAP_HUGETLB)", - .low_addr_required = 1, - }, - { - .addr = LOW_ADDR, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(LOW_ADDR, MAP_HUGETLB)", - .low_addr_required = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)", - }, - { - .addr = (void*) -1, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1, MAP_HUGETLB)", - .keep_mapped = 1, - }, - { - .addr = (void*) -1, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1, MAP_HUGETLB) again", - }, - { - .addr = (void *)((1UL << 47) - PAGE_SIZE), - .size = 4UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap((1UL << 47), 4UL << 20, MAP_HUGETLB)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)((1UL << 47) - (2UL << 20)), - .size = 4UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap((1UL << 47) - (2UL << 20), 4UL << 20, MAP_FIXED | MAP_HUGETLB)", - }, -}; - -int main(int argc, char **argv) -{ - int i; - void *p; - - for (i = 0; i < ARRAY_SIZE(testcases); i++) { - struct testcase *t = testcases + i; - - p = mmap(t->addr, t->size, PROT_NONE, t->flags, -1, 0); - - printf("%s: %p - ", t->msg, p); - - if (p == MAP_FAILED) { - printf("FAILED\n"); - continue; - } - - if (t->low_addr_required && p >= (void *)(1UL << 47)) - printf("FAILED\n"); - else - printf("OK\n"); - if (!t->keep_mapped) - munmap(p, t->size); - } - return 0; -} diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 5d4f10ac2af2..d744991c0f4f 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -5,16 +5,26 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean -TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ - check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \ +UNAME_M := $(shell uname -m) +CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) +CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) + +TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ + check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ protection_keys test_vdso test_vsyscall TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer -TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 5lvl +TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip +# Some selftests require 32bit support enabled also on 64bit systems +TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall -TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) +TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) $(TARGETS_C_32BIT_NEEDED) TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) +ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),11) +TARGETS_C_64BIT_ALL += $(TARGETS_C_32BIT_NEEDED) +endif + BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) @@ -23,18 +33,28 @@ BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie -UNAME_M := $(shell uname -m) -CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) -CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) +define gen-target-rule-32 +$(1) $(1)_32: $(OUTPUT)/$(1)_32 +.PHONY: $(1) $(1)_32 +endef + +define gen-target-rule-64 +$(1) $(1)_64: $(OUTPUT)/$(1)_64 +.PHONY: $(1) $(1)_64 +endef ifeq ($(CAN_BUILD_I386),1) all: all_32 TEST_PROGS += $(BINARIES_32) +EXTRA_CFLAGS += -DCAN_BUILD_32 +$(foreach t,$(TARGETS_C_32BIT_ALL),$(eval $(call gen-target-rule-32,$(t)))) endif ifeq ($(CAN_BUILD_X86_64),1) all: all_64 TEST_PROGS += $(BINARIES_64) +EXTRA_CFLAGS += -DCAN_BUILD_64 +$(foreach t,$(TARGETS_C_64BIT_ALL),$(eval $(call gen-target-rule-64,$(t)))) endif all_32: $(BINARIES_32) diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index ec0f6b45ce8b..9c0325e1ea68 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -315,11 +315,39 @@ static inline void *__si_bounds_upper(siginfo_t *si) return si->si_upper; } #else + +/* + * This deals with old version of _sigfault in some distros: + * + +old _sigfault: + struct { + void *si_addr; + } _sigfault; + +new _sigfault: + struct { + void __user *_addr; + int _trapno; + short _addr_lsb; + union { + struct { + void __user *_lower; + void __user *_upper; + } _addr_bnd; + __u32 _pkey; + }; + } _sigfault; + * + */ + static inline void **__si_bounds_hack(siginfo_t *si) { void *sigfault = &si->_sifields._sigfault; void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault); - void **__si_lower = end_sigfault; + int *trapno = (int*)end_sigfault; + /* skip _trapno and _addr_lsb */ + void **__si_lower = (void**)(trapno + 2); return __si_lower; } @@ -331,7 +359,7 @@ static inline void *__si_bounds_lower(siginfo_t *si) static inline void *__si_bounds_upper(siginfo_t *si) { - return (*__si_bounds_hack(si)) + sizeof(void *); + return *(__si_bounds_hack(si) + 1); } #endif diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index bc1b0735bb50..f15aa5a76fe3 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -393,34 +393,6 @@ pid_t fork_lazy_child(void) return forkret; } -void davecmp(void *_a, void *_b, int len) -{ - int i; - unsigned long *a = _a; - unsigned long *b = _b; - - for (i = 0; i < len / sizeof(*a); i++) { - if (a[i] == b[i]) - continue; - - dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]); - } -} - -void dumpit(char *f) -{ - int fd = open(f, O_RDONLY); - char buf[100]; - int nr_read; - - dprintf2("maps fd: %d\n", fd); - do { - nr_read = read(fd, &buf[0], sizeof(buf)); - write(1, buf, nr_read); - } while (nr_read > 0); - close(fd); -} - #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index a48da95c18fd..ddfdd635de16 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -119,7 +119,9 @@ static void check_result(void) int main() { +#ifdef CAN_BUILD_32 int tmp; +#endif sethandler(SIGTRAP, sigtrap, 0); @@ -139,12 +141,13 @@ int main() : : "c" (post_nop) : "r11"); check_result(); #endif - +#ifdef CAN_BUILD_32 printf("[RUN]\tSet TF and check int80\n"); set_eflags(get_eflags() | X86_EFLAGS_TF); asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid) : INT80_CLOBBERS); check_result(); +#endif /* * This test is particularly interesting if fast syscalls use diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c index bf0d687c7db7..64f11c8d9b76 100644 --- a/tools/testing/selftests/x86/test_mremap_vdso.c +++ b/tools/testing/selftests/x86/test_mremap_vdso.c @@ -90,8 +90,12 @@ int main(int argc, char **argv, char **envp) vdso_size += PAGE_SIZE; } +#ifdef __i386__ /* Glibc is likely to explode now - exit with raw syscall */ asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret)); +#else /* __x86_64__ */ + syscall(SYS_exit, ret); +#endif } else { int status; diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c index 29973cde06d3..235259011704 100644 --- a/tools/testing/selftests/x86/test_vdso.c +++ b/tools/testing/selftests/x86/test_vdso.c @@ -26,20 +26,59 @@ # endif #endif +/* max length of lines in /proc/self/maps - anything longer is skipped here */ +#define MAPS_LINE_LEN 128 + int nerrs = 0; +typedef long (*getcpu_t)(unsigned *, unsigned *, void *); + +getcpu_t vgetcpu; +getcpu_t vdso_getcpu; + +static void *vsyscall_getcpu(void) +{ #ifdef __x86_64__ -# define VSYS(x) (x) + FILE *maps; + char line[MAPS_LINE_LEN]; + bool found = false; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */ + return NULL; + + while (fgets(line, MAPS_LINE_LEN, maps)) { + char r, x; + void *start, *end; + char name[MAPS_LINE_LEN]; + + /* sscanf() is safe here as strlen(name) >= strlen(line) */ + if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", + &start, &end, &r, &x, name) != 5) + continue; + + if (strcmp(name, "[vsyscall]")) + continue; + + /* assume entries are OK, as we test vDSO here not vsyscall */ + found = true; + break; + } + + fclose(maps); + + if (!found) { + printf("Warning: failed to find vsyscall getcpu\n"); + return NULL; + } + return (void *) (0xffffffffff600800); #else -# define VSYS(x) 0 + return NULL; #endif +} -typedef long (*getcpu_t)(unsigned *, unsigned *, void *); - -const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); -getcpu_t vdso_getcpu; -void fill_function_pointers() +static void fill_function_pointers() { void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); @@ -54,6 +93,8 @@ void fill_function_pointers() vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); if (!vdso_getcpu) printf("Warning: failed to find getcpu in vDSO\n"); + + vgetcpu = (getcpu_t) vsyscall_getcpu(); } static long sys_getcpu(unsigned * cpu, unsigned * node, diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index 7a744fa7b786..be81621446f0 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -33,6 +33,9 @@ # endif #endif +/* max length of lines in /proc/self/maps - anything longer is skipped here */ +#define MAPS_LINE_LEN 128 + static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) { @@ -98,7 +101,7 @@ static int init_vsys(void) #ifdef __x86_64__ int nerrs = 0; FILE *maps; - char line[128]; + char line[MAPS_LINE_LEN]; bool found = false; maps = fopen("/proc/self/maps", "r"); @@ -108,10 +111,12 @@ static int init_vsys(void) return 0; } - while (fgets(line, sizeof(line), maps)) { + while (fgets(line, MAPS_LINE_LEN, maps)) { char r, x; void *start, *end; - char name[128]; + char name[MAPS_LINE_LEN]; + + /* sscanf() is safe here as strlen(name) >= strlen(line) */ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", &start, &end, &r, &x, name) != 5) continue; diff --git a/tools/usb/usbip/libsrc/usbip_device_driver.c b/tools/usb/usbip/libsrc/usbip_device_driver.c index e059b7d1ec5b..ec3a0b794f15 100644 --- a/tools/usb/usbip/libsrc/usbip_device_driver.c +++ b/tools/usb/usbip/libsrc/usbip_device_driver.c @@ -77,7 +77,7 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev) const char *path, *name; char filepath[SYSFS_PATH_MAX]; struct usb_device_descriptor descr; - unsigned i; + unsigned int i; FILE *fd = NULL; struct udev_device *plat; const char *speed; @@ -92,7 +92,7 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev) return -1; ret = fread((char *) &descr, sizeof(descr), 1, fd); if (ret < 0) - return -1; + goto err; fclose(fd); copy_descr_attr(dev, &descr, bDeviceClass); @@ -124,6 +124,9 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev) name = udev_device_get_sysname(plat); strncpy(dev->busid, name, SYSFS_BUS_ID_SIZE); return 0; +err: + fclose(fd); + return -1; } static int is_my_device(struct udev_device *dev) diff --git a/tools/usb/usbip/src/usbip_bind.c b/tools/usb/usbip/src/usbip_bind.c index fa46141ae68b..e121cfb1746a 100644 --- a/tools/usb/usbip/src/usbip_bind.c +++ b/tools/usb/usbip/src/usbip_bind.c @@ -144,6 +144,7 @@ static int bind_device(char *busid) int rc; struct udev *udev; struct udev_device *dev; + const char *devpath; /* Check whether the device with this bus ID exists. */ udev = udev_new(); @@ -152,8 +153,16 @@ static int bind_device(char *busid) err("device with the specified bus ID does not exist"); return -1; } + devpath = udev_device_get_devpath(dev); udev_unref(udev); + /* If the device is already attached to vhci_hcd - bail out */ + if (strstr(devpath, USBIP_VHCI_DRV_NAME)) { + err("bind loop detected: device: %s is attached to %s\n", + devpath, USBIP_VHCI_DRV_NAME); + return -1; + } + rc = unbind_other(busid); if (rc == UNBIND_ST_FAILED) { err("could not unbind driver from device on busid %s", busid); diff --git a/tools/usb/usbip/src/usbip_list.c b/tools/usb/usbip/src/usbip_list.c index f1b38e866dd7..d65a9f444174 100644 --- a/tools/usb/usbip/src/usbip_list.c +++ b/tools/usb/usbip/src/usbip_list.c @@ -187,6 +187,7 @@ static int list_devices(bool parsable) const char *busid; char product_name[128]; int ret = -1; + const char *devpath; /* Create libudev context. */ udev = udev_new(); @@ -209,6 +210,14 @@ static int list_devices(bool parsable) path = udev_list_entry_get_name(dev_list_entry); dev = udev_device_new_from_syspath(udev, path); + /* Ignore devices attached to vhci_hcd */ + devpath = udev_device_get_devpath(dev); + if (strstr(devpath, USBIP_VHCI_DRV_NAME)) { + dbg("Skip the device %s already attached to %s\n", + devpath, USBIP_VHCI_DRV_NAME); + continue; + } + /* Get device information. */ idVendor = udev_device_get_sysattr_value(dev, "idVendor"); idProduct = udev_device_get_sysattr_value(dev, "idProduct"); diff --git a/tools/usb/usbip/src/usbipd.c b/tools/usb/usbip/src/usbipd.c index 009afb4a3aae..c6dad2a13c80 100644 --- a/tools/usb/usbip/src/usbipd.c +++ b/tools/usb/usbip/src/usbipd.c @@ -456,7 +456,7 @@ static void set_signal(void) sigaction(SIGTERM, &act, NULL); sigaction(SIGINT, &act, NULL); act.sa_handler = SIG_IGN; - sigaction(SIGCLD, &act, NULL); + sigaction(SIGCHLD, &act, NULL); } static const char *pid_file; diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 395521a7a8d8..fca8381bbe04 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -118,7 +118,7 @@ static inline void free_page(unsigned long addr) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) -#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n")) +#define WARN_ON_ONCE(cond) ((cond) ? fprintf (stderr, "WARNING\n") : 0) #define min(x, y) ({ \ typeof(x) _min1 = (x); \ diff --git a/tools/virtio/linux/thread_info.h b/tools/virtio/linux/thread_info.h new file mode 100644 index 000000000000..e0f610d08006 --- /dev/null +++ b/tools/virtio/linux/thread_info.h @@ -0,0 +1 @@ +#define check_copy_size(A, B, C) (1) diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h index 5706e075adf2..301d59bfcd0a 100644 --- a/tools/virtio/ringtest/main.h +++ b/tools/virtio/ringtest/main.h @@ -111,7 +111,7 @@ static inline void busy_wait(void) } #if defined(__x86_64__) || defined(__i386__) -#define smp_mb() asm volatile("lock; addl $0,-128(%%rsp)" ::: "memory", "cc") +#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") #else /* * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized @@ -134,4 +134,61 @@ static inline void busy_wait(void) barrier(); \ } while (0) +#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) +#define smp_wmb() barrier() +#else +#define smp_wmb() smp_release() +#endif + +#ifdef __alpha__ +#define smp_read_barrier_depends() smp_acquire() +#else +#define smp_read_barrier_depends() do {} while(0) +#endif + +static __always_inline +void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { \ + case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; \ + case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; \ + case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; \ + case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; \ + default: \ + barrier(); \ + __builtin_memcpy((void *)res, (const void *)p, size); \ + barrier(); \ + } \ +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break; + case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break; + case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break; + case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break; + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +#define READ_ONCE(x) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u; \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ + __u.__val; \ +}) + +#define WRITE_ONCE(x, val) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__val = (typeof(x)) (val) }; \ + __write_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) + #endif diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c index e6e81305ef46..477899c12c51 100644 --- a/tools/virtio/ringtest/ptr_ring.c +++ b/tools/virtio/ringtest/ptr_ring.c @@ -187,7 +187,7 @@ bool enable_kick() bool avail_empty() { - return !__ptr_ring_peek(&array); + return __ptr_ring_empty(&array); } bool use_buf(unsigned *lenp, void **bufp) diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c index 747c5dd47be8..5a41404aaef5 100644 --- a/tools/virtio/ringtest/ring.c +++ b/tools/virtio/ringtest/ring.c @@ -84,12 +84,11 @@ void alloc_ring(void) perror("Unable to allocate ring buffer.\n"); exit(3); } - event = malloc(sizeof *event); + event = calloc(1, sizeof(*event)); if (!event) { perror("Unable to allocate event buffer.\n"); exit(3); } - memset(event, 0, sizeof *event); guest.avail_idx = 0; guest.kicked_avail_idx = -1; guest.last_used_idx = 0; @@ -102,12 +101,11 @@ void alloc_ring(void) ring[i] = desc; } guest.num_free = ring_size; - data = malloc(ring_size * sizeof *data); + data = calloc(ring_size, sizeof(*data)); if (!data) { perror("Unable to allocate data buffer.\n"); exit(3); } - memset(data, 0, ring_size * sizeof *data); } /* guest side */ @@ -188,16 +186,18 @@ bool enable_call() void kick_available(void) { + bool need; + /* Flush in previous flags write */ /* Barrier C (for pairing) */ smp_mb(); - if (!need_event(event->kick_index, - guest.avail_idx, - guest.kicked_avail_idx)) - return; + need = need_event(event->kick_index, + guest.avail_idx, + guest.kicked_avail_idx); guest.kicked_avail_idx = guest.avail_idx; - kick(); + if (need) + kick(); } /* host side */ @@ -253,14 +253,18 @@ bool use_buf(unsigned *lenp, void **bufp) void call_used(void) { + bool need; + /* Flush in previous flags write */ /* Barrier D (for pairing) */ smp_mb(); - if (!need_event(event->call_index, + + need = need_event(event->call_index, host.used_idx, - host.called_used_idx)) - return; + host.called_used_idx); host.called_used_idx = host.used_idx; - call(); + + if (need) + call(); } diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c index bbc3043b2fb1..5fd3fbcb9e57 100644 --- a/tools/virtio/ringtest/virtio_ring_0_9.c +++ b/tools/virtio/ringtest/virtio_ring_0_9.c @@ -225,16 +225,18 @@ bool enable_call() void kick_available(void) { + bool need; + /* Flush in previous flags write */ /* Barrier C (for pairing) */ smp_mb(); - if (!vring_need_event(vring_avail_event(&ring), - guest.avail_idx, - guest.kicked_avail_idx)) - return; + need = vring_need_event(vring_avail_event(&ring), + guest.avail_idx, + guest.kicked_avail_idx); guest.kicked_avail_idx = guest.avail_idx; - kick(); + if (need) + kick(); } /* host side */ @@ -316,14 +318,16 @@ bool use_buf(unsigned *lenp, void **bufp) void call_used(void) { + bool need; + /* Flush in previous flags write */ /* Barrier D (for pairing) */ smp_mb(); - if (!vring_need_event(vring_used_event(&ring), - host.used_idx, - host.called_used_idx)) - return; + need = vring_need_event(vring_used_event(&ring), + host.used_idx, + host.called_used_idx); host.called_used_idx = host.used_idx; - call(); + if (need) + call(); } diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index e92903fc7113..a8783f48f77f 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -169,9 +169,10 @@ static int opt_raw; /* for kernel developers */ static int opt_list; /* list pages (in ranges) */ static int opt_no_summary; /* don't show summary */ static pid_t opt_pid; /* process to walk */ -const char * opt_file; /* file or directory path */ +const char *opt_file; /* file or directory path */ static uint64_t opt_cgroup; /* cgroup inode */ static int opt_list_cgroup;/* list page cgroup */ +static const char *opt_kpageflags;/* kpageflags file to parse */ #define MAX_ADDR_RANGES 1024 static int nr_addr_ranges; @@ -258,7 +259,7 @@ static int checked_open(const char *pathname, int flags) * pagemap/kpageflags routines */ -static unsigned long do_u64_read(int fd, char *name, +static unsigned long do_u64_read(int fd, const char *name, uint64_t *buf, unsigned long index, unsigned long count) @@ -283,7 +284,7 @@ static unsigned long kpageflags_read(uint64_t *buf, unsigned long index, unsigned long pages) { - return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages); + return do_u64_read(kpageflags_fd, opt_kpageflags, buf, index, pages); } static unsigned long kpagecgroup_read(uint64_t *buf, @@ -293,7 +294,7 @@ static unsigned long kpagecgroup_read(uint64_t *buf, if (kpagecgroup_fd < 0) return pages; - return do_u64_read(kpagecgroup_fd, PROC_KPAGEFLAGS, buf, index, pages); + return do_u64_read(kpagecgroup_fd, opt_kpageflags, buf, index, pages); } static unsigned long pagemap_read(uint64_t *buf, @@ -743,7 +744,7 @@ static void walk_addr_ranges(void) { int i; - kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY); + kpageflags_fd = checked_open(opt_kpageflags, O_RDONLY); if (!nr_addr_ranges) add_addr_range(0, ULONG_MAX); @@ -790,6 +791,7 @@ static void usage(void) " -N|--no-summary Don't show summary info\n" " -X|--hwpoison hwpoison pages\n" " -x|--unpoison unpoison pages\n" +" -F|--kpageflags filename kpageflags file to parse\n" " -h|--help Show this usage message\n" "flags:\n" " 0x10 bitfield format, e.g.\n" @@ -1013,7 +1015,7 @@ static void walk_page_cache(void) { struct stat st; - kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY); + kpageflags_fd = checked_open(opt_kpageflags, O_RDONLY); pagemap_fd = checked_open("/proc/self/pagemap", O_RDONLY); sigaction(SIGBUS, &sigbus_action, NULL); @@ -1164,6 +1166,11 @@ static void parse_bits_mask(const char *optarg) add_bits_filter(mask, bits); } +static void parse_kpageflags(const char *name) +{ + opt_kpageflags = name; +} + static void describe_flags(const char *optarg) { uint64_t flags = parse_flag_names(optarg, 0); @@ -1188,6 +1195,7 @@ static const struct option opts[] = { { "no-summary", 0, NULL, 'N' }, { "hwpoison" , 0, NULL, 'X' }, { "unpoison" , 0, NULL, 'x' }, + { "kpageflags", 0, NULL, 'F' }, { "help" , 0, NULL, 'h' }, { NULL , 0, NULL, 0 } }; @@ -1199,7 +1207,7 @@ int main(int argc, char *argv[]) page_size = getpagesize(); while ((c = getopt_long(argc, argv, - "rp:f:a:b:d:c:ClLNXxh", opts, NULL)) != -1) { + "rp:f:a:b:d:c:ClLNXxF:h", opts, NULL)) != -1) { switch (c) { case 'r': opt_raw = 1; @@ -1242,6 +1250,9 @@ int main(int argc, char *argv[]) opt_unpoison = 1; prepare_hwpoison_fd(); break; + case 'F': + parse_kpageflags(optarg); + break; case 'h': usage(); exit(0); @@ -1251,6 +1262,9 @@ int main(int argc, char *argv[]) } } + if (!opt_kpageflags) + opt_kpageflags = PROC_KPAGEFLAGS; + if (opt_cgroup || opt_list_cgroup) kpagecgroup_fd = checked_open(PROC_KPAGECGROUP, O_RDONLY); |