diff options
Diffstat (limited to 'include')
57 files changed, 1889 insertions, 304 deletions
diff --git a/include/linux/atm_suni.h b/include/linux/atm_suni.h deleted file mode 100644 index 84f3aab54468..000000000000 --- a/include/linux/atm_suni.h +++ /dev/null @@ -1,12 +0,0 @@ -/* atm_suni.h - Driver-specific declarations of the SUNI driver (for use by - driver-specific utilities) */ - -/* Written 1998,2000 by Werner Almesberger, EPFL ICA */ - - -#ifndef LINUX_ATM_SUNI_H -#define LINUX_ATM_SUNI_H - -/* everything obsoleted */ - -#endif diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 40bad71865ea..47482049f640 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -136,6 +136,9 @@ enum virtchnl_ops { VIRTCHNL_OP_DISABLE_CHANNELS = 31, VIRTCHNL_OP_ADD_CLOUD_FILTER = 32, VIRTCHNL_OP_DEL_CLOUD_FILTER = 33, + /* opcode 34 - 46 are reserved */ + VIRTCHNL_OP_ADD_FDIR_FILTER = 47, + VIRTCHNL_OP_DEL_FDIR_FILTER = 48, }; /* These macros are used to generate compilation errors if a structure/union @@ -247,6 +250,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000 #define VIRTCHNL_VF_OFFLOAD_ADQ 0X00800000 +#define VIRTCHNL_VF_OFFLOAD_FDIR_PF 0X10000000 /* Define below the capability flags that are not offloads */ #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED 0x00000080 @@ -559,6 +563,11 @@ enum virtchnl_action { /* action types */ VIRTCHNL_ACTION_DROP = 0, VIRTCHNL_ACTION_TC_REDIRECT, + VIRTCHNL_ACTION_PASSTHRU, + VIRTCHNL_ACTION_QUEUE, + VIRTCHNL_ACTION_Q_REGION, + VIRTCHNL_ACTION_MARK, + VIRTCHNL_ACTION_COUNT, }; enum virtchnl_flow_type { @@ -668,6 +677,269 @@ enum virtchnl_vfr_states { VIRTCHNL_VFR_VFACTIVE, }; +#define VIRTCHNL_MAX_NUM_PROTO_HDRS 32 +#define PROTO_HDR_SHIFT 5 +#define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT) +#define PROTO_HDR_FIELD_MASK ((1UL << PROTO_HDR_SHIFT) - 1) + +/* VF use these macros to configure each protocol header. + * Specify which protocol headers and protocol header fields base on + * virtchnl_proto_hdr_type and virtchnl_proto_hdr_field. + * @param hdr: a struct of virtchnl_proto_hdr + * @param hdr_type: ETH/IPV4/TCP, etc + * @param field: SRC/DST/TEID/SPI, etc + */ +#define VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, field) \ + ((hdr)->field_selector |= BIT((field) & PROTO_HDR_FIELD_MASK)) +#define VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, field) \ + ((hdr)->field_selector &= ~BIT((field) & PROTO_HDR_FIELD_MASK)) +#define VIRTCHNL_TEST_PROTO_HDR_FIELD(hdr, val) \ + ((hdr)->field_selector & BIT((val) & PROTO_HDR_FIELD_MASK)) +#define VIRTCHNL_GET_PROTO_HDR_FIELD(hdr) ((hdr)->field_selector) + +#define VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \ + (VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, \ + VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field)) +#define VIRTCHNL_DEL_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \ + (VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, \ + VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field)) + +#define VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, hdr_type) \ + ((hdr)->type = VIRTCHNL_PROTO_HDR_ ## hdr_type) +#define VIRTCHNL_GET_PROTO_HDR_TYPE(hdr) \ + (((hdr)->type) >> PROTO_HDR_SHIFT) +#define VIRTCHNL_TEST_PROTO_HDR_TYPE(hdr, val) \ + ((hdr)->type == ((val) >> PROTO_HDR_SHIFT)) +#define VIRTCHNL_TEST_PROTO_HDR(hdr, val) \ + (VIRTCHNL_TEST_PROTO_HDR_TYPE((hdr), (val)) && \ + VIRTCHNL_TEST_PROTO_HDR_FIELD((hdr), (val))) + +/* Protocol header type within a packet segment. A segment consists of one or + * more protocol headers that make up a logical group of protocol headers. Each + * logical group of protocol headers encapsulates or is encapsulated using/by + * tunneling or encapsulation protocols for network virtualization. + */ +enum virtchnl_proto_hdr_type { + VIRTCHNL_PROTO_HDR_NONE, + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_S_VLAN, + VIRTCHNL_PROTO_HDR_C_VLAN, + VIRTCHNL_PROTO_HDR_IPV4, + VIRTCHNL_PROTO_HDR_IPV6, + VIRTCHNL_PROTO_HDR_TCP, + VIRTCHNL_PROTO_HDR_UDP, + VIRTCHNL_PROTO_HDR_SCTP, + VIRTCHNL_PROTO_HDR_GTPU_IP, + VIRTCHNL_PROTO_HDR_GTPU_EH, + VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN, + VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP, + VIRTCHNL_PROTO_HDR_PPPOE, + VIRTCHNL_PROTO_HDR_L2TPV3, + VIRTCHNL_PROTO_HDR_ESP, + VIRTCHNL_PROTO_HDR_AH, + VIRTCHNL_PROTO_HDR_PFCP, +}; + +/* Protocol header field within a protocol header. */ +enum virtchnl_proto_hdr_field { + /* ETHER */ + VIRTCHNL_PROTO_HDR_ETH_SRC = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ETH), + VIRTCHNL_PROTO_HDR_ETH_DST, + VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE, + /* S-VLAN */ + VIRTCHNL_PROTO_HDR_S_VLAN_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_S_VLAN), + /* C-VLAN */ + VIRTCHNL_PROTO_HDR_C_VLAN_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_C_VLAN), + /* IPV4 */ + VIRTCHNL_PROTO_HDR_IPV4_SRC = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV4), + VIRTCHNL_PROTO_HDR_IPV4_DST, + VIRTCHNL_PROTO_HDR_IPV4_DSCP, + VIRTCHNL_PROTO_HDR_IPV4_TTL, + VIRTCHNL_PROTO_HDR_IPV4_PROT, + /* IPV6 */ + VIRTCHNL_PROTO_HDR_IPV6_SRC = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV6), + VIRTCHNL_PROTO_HDR_IPV6_DST, + VIRTCHNL_PROTO_HDR_IPV6_TC, + VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT, + VIRTCHNL_PROTO_HDR_IPV6_PROT, + /* TCP */ + VIRTCHNL_PROTO_HDR_TCP_SRC_PORT = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_TCP), + VIRTCHNL_PROTO_HDR_TCP_DST_PORT, + /* UDP */ + VIRTCHNL_PROTO_HDR_UDP_SRC_PORT = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_UDP), + VIRTCHNL_PROTO_HDR_UDP_DST_PORT, + /* SCTP */ + VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_SCTP), + VIRTCHNL_PROTO_HDR_SCTP_DST_PORT, + /* GTPU_IP */ + VIRTCHNL_PROTO_HDR_GTPU_IP_TEID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_IP), + /* GTPU_EH */ + VIRTCHNL_PROTO_HDR_GTPU_EH_PDU = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_EH), + VIRTCHNL_PROTO_HDR_GTPU_EH_QFI, + /* PPPOE */ + VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PPPOE), + /* L2TPV3 */ + VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_L2TPV3), + /* ESP */ + VIRTCHNL_PROTO_HDR_ESP_SPI = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ESP), + /* AH */ + VIRTCHNL_PROTO_HDR_AH_SPI = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_AH), + /* PFCP */ + VIRTCHNL_PROTO_HDR_PFCP_S_FIELD = + PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PFCP), + VIRTCHNL_PROTO_HDR_PFCP_SEID, +}; + +struct virtchnl_proto_hdr { + enum virtchnl_proto_hdr_type type; + u32 field_selector; /* a bit mask to select field for header type */ + u8 buffer[64]; + /** + * binary buffer in network order for specific header type. + * For example, if type = VIRTCHNL_PROTO_HDR_IPV4, a IPv4 + * header is expected to be copied into the buffer. + */ +}; + +VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_proto_hdr); + +struct virtchnl_proto_hdrs { + u8 tunnel_level; + /** + * specify where protocol header start from. + * 0 - from the outer layer + * 1 - from the first inner layer + * 2 - from the second inner layer + * .... + **/ + int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */ + struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs); + +/* action configuration for FDIR */ +struct virtchnl_filter_action { + enum virtchnl_action type; + union { + /* used for queue and qgroup action */ + struct { + u16 index; + u8 region; + } queue; + /* used for count action */ + struct { + /* share counter ID with other flow rules */ + u8 shared; + u32 id; /* counter ID */ + } count; + /* used for mark action */ + u32 mark_id; + u8 reserve[32]; + } act_conf; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_filter_action); + +#define VIRTCHNL_MAX_NUM_ACTIONS 8 + +struct virtchnl_filter_action_set { + /* action number must be less then VIRTCHNL_MAX_NUM_ACTIONS */ + int count; + struct virtchnl_filter_action actions[VIRTCHNL_MAX_NUM_ACTIONS]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(292, virtchnl_filter_action_set); + +/* pattern and action for FDIR rule */ +struct virtchnl_fdir_rule { + struct virtchnl_proto_hdrs proto_hdrs; + struct virtchnl_filter_action_set action_set; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(2604, virtchnl_fdir_rule); + +/* Status returned to VF after VF requests FDIR commands + * VIRTCHNL_FDIR_SUCCESS + * VF FDIR related request is successfully done by PF + * The request can be OP_ADD/DEL. + * + * VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE + * OP_ADD_FDIR_FILTER request is failed due to no Hardware resource. + * + * VIRTCHNL_FDIR_FAILURE_RULE_EXIST + * OP_ADD_FDIR_FILTER request is failed due to the rule is already existed. + * + * VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT + * OP_ADD_FDIR_FILTER request is failed due to conflict with existing rule. + * + * VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST + * OP_DEL_FDIR_FILTER request is failed due to this rule doesn't exist. + * + * VIRTCHNL_FDIR_FAILURE_RULE_INVALID + * OP_ADD_FDIR_FILTER request is failed due to parameters validation + * or HW doesn't support. + * + * VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT + * OP_ADD/DEL_FDIR_FILTER request is failed due to timing out + * for programming. + */ +enum virtchnl_fdir_prgm_status { + VIRTCHNL_FDIR_SUCCESS = 0, + VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE, + VIRTCHNL_FDIR_FAILURE_RULE_EXIST, + VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT, + VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST, + VIRTCHNL_FDIR_FAILURE_RULE_INVALID, + VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT, +}; + +/* VIRTCHNL_OP_ADD_FDIR_FILTER + * VF sends this request to PF by filling out vsi_id, + * validate_only and rule_cfg. PF will return flow_id + * if the request is successfully done and return add_status to VF. + */ +struct virtchnl_fdir_add { + u16 vsi_id; /* INPUT */ + /* + * 1 for validating a fdir rule, 0 for creating a fdir rule. + * Validate and create share one ops: VIRTCHNL_OP_ADD_FDIR_FILTER. + */ + u16 validate_only; /* INPUT */ + u32 flow_id; /* OUTPUT */ + struct virtchnl_fdir_rule rule_cfg; /* INPUT */ + enum virtchnl_fdir_prgm_status status; /* OUTPUT */ +}; + +VIRTCHNL_CHECK_STRUCT_LEN(2616, virtchnl_fdir_add); + +/* VIRTCHNL_OP_DEL_FDIR_FILTER + * VF sends this request to PF by filling out vsi_id + * and flow_id. PF will return del_status to VF. + */ +struct virtchnl_fdir_del { + u16 vsi_id; /* INPUT */ + u16 pad; + u32 flow_id; /* INPUT */ + enum virtchnl_fdir_prgm_status status; /* OUTPUT */ +}; + +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); + /** * virtchnl_vc_validate_vf_msg * @ver: Virtchnl version info @@ -828,6 +1100,12 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_DEL_CLOUD_FILTER: valid_len = sizeof(struct virtchnl_filter); break; + case VIRTCHNL_OP_ADD_FDIR_FILTER: + valid_len = sizeof(struct virtchnl_fdir_add); + break; + case VIRTCHNL_OP_DEL_FDIR_FILTER: + valid_len = sizeof(struct virtchnl_fdir_del); + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3625f019767d..39dce9d3c3a5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -40,6 +40,7 @@ struct bpf_local_storage; struct bpf_local_storage_map; struct kobject; struct mem_cgroup; +struct bpf_func_state; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -118,6 +119,9 @@ struct bpf_map_ops { void *owner, u32 size); struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner); + /* Misc helpers.*/ + int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags); + /* map_meta_equal must be implemented for maps that can be * used as an inner map. It is a runtime check to ensure * an inner map can be inserted to an outer map. @@ -130,6 +134,13 @@ struct bpf_map_ops { bool (*map_meta_equal)(const struct bpf_map *meta0, const struct bpf_map *meta1); + + int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee); + int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn, + void *callback_ctx, u64 flags); + /* BTF name and id of struct allocated by map_alloc */ const char * const map_btf_name; int *map_btf_id; @@ -296,6 +307,8 @@ enum bpf_arg_type { ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ + ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ + ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ __BPF_ARG_TYPE_MAX, }; @@ -412,6 +425,8 @@ enum bpf_reg_type { PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ + PTR_TO_FUNC, /* reg points to a bpf program function */ + PTR_TO_MAP_KEY, /* reg points to a map element key */ }; /* The information passed from prog-specific *_is_valid_access @@ -507,6 +522,11 @@ enum bpf_cgroup_storage_type { */ #define MAX_BPF_FUNC_ARGS 12 +/* The maximum number of arguments passed through registers + * a single function may have. + */ +#define MAX_BPF_FUNC_REG_ARGS 5 + struct btf_func_model { u8 ret_size; u8 nr_args; @@ -1397,6 +1417,10 @@ void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux, int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info); +int map_set_for_each_callback_args(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee); + int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, @@ -1446,9 +1470,9 @@ struct btf *bpf_get_btf_vmlinux(void); /* Map specifics */ struct xdp_buff; struct sk_buff; +struct bpf_dtab_netdev; +struct bpf_cpu_map_entry; -struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); -struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key); void __dev_flush(void); int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, struct net_device *dev_rx); @@ -1458,7 +1482,6 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, struct bpf_prog *xdp_prog); bool dev_map_can_have_prog(struct bpf_map *map); -struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); void __cpu_map_flush(void); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); @@ -1487,6 +1510,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); @@ -1516,6 +1542,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); +void bpf_task_storage_free(struct task_struct *task); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -1585,17 +1612,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } -static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, - u32 key) -{ - return NULL; -} - -static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map, - u32 key) -{ - return NULL; -} static inline bool dev_map_can_have_prog(struct bpf_map *map) { return false; @@ -1607,6 +1623,7 @@ static inline void __dev_flush(void) struct xdp_buff; struct bpf_dtab_netdev; +struct bpf_cpu_map_entry; static inline int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, @@ -1631,12 +1648,6 @@ static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, return 0; } -static inline -struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) -{ - return NULL; -} - static inline void __cpu_map_flush(void) { } @@ -1687,6 +1698,13 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, return -ENOTSUPP; } +static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + static inline void bpf_map_put(struct bpf_map *map) { } @@ -1701,6 +1719,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) { return NULL; } + +static inline void bpf_task_storage_free(struct task_struct *task) +{ +} #endif /* CONFIG_BPF_SYSCALL */ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, @@ -1785,22 +1807,24 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) } #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ -#if defined(CONFIG_BPF_STREAM_PARSER) -int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - struct bpf_prog *old, u32 which); +#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); void sock_map_unhash(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); + +void bpf_sk_reuseport_detach(struct sock *sk); +int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, + void *value); +int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags); #else -static inline int sock_map_prog_update(struct bpf_map *map, - struct bpf_prog *prog, - struct bpf_prog *old, u32 which) +static inline void bpf_sk_reuseport_detach(struct sock *sk) { - return -EOPNOTSUPP; } +#ifdef CONFIG_BPF_SYSCALL static inline int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) { @@ -1818,20 +1842,7 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void { return -EOPNOTSUPP; } -#endif /* CONFIG_BPF_STREAM_PARSER */ -#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) -void bpf_sk_reuseport_detach(struct sock *sk); -int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, - void *value); -int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags); -#else -static inline void bpf_sk_reuseport_detach(struct sock *sk) -{ -} - -#ifdef CONFIG_BPF_SYSCALL static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, void *value) { @@ -1903,6 +1914,9 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; extern const struct bpf_func_proto bpf_sock_from_file_proto; extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; +extern const struct bpf_func_proto bpf_task_storage_get_proto; +extern const struct bpf_func_proto bpf_task_storage_delete_proto; +extern const struct bpf_func_proto bpf_for_each_map_elem_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index b2c9463f36a1..b902c580c48d 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -126,7 +126,8 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, bool cacheit_lockit); -void bpf_local_storage_map_free(struct bpf_local_storage_map *smap); +void bpf_local_storage_map_free(struct bpf_local_storage_map *smap, + int __percpu *busy_counter); int bpf_local_storage_map_check_btf(const struct bpf_map *map, const struct btf *btf, diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 0d1c33ace398..479c101546ad 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode( return inode->i_security + bpf_lsm_blob_sizes.lbs_inode; } -static inline struct bpf_storage_blob *bpf_task( - const struct task_struct *task) -{ - if (unlikely(!task->security)) - return NULL; - - return task->security + bpf_lsm_blob_sizes.lbs_task; -} - extern const struct bpf_func_proto bpf_inode_storage_get_proto; extern const struct bpf_func_proto bpf_inode_storage_delete_proto; -extern const struct bpf_func_proto bpf_task_storage_get_proto; -extern const struct bpf_func_proto bpf_task_storage_delete_proto; void bpf_inode_storage_free(struct inode *inode); -void bpf_task_storage_free(struct task_struct *task); #else /* !CONFIG_BPF_LSM */ @@ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode( return NULL; } -static inline struct bpf_storage_blob *bpf_task( - const struct task_struct *task) -{ - return NULL; -} - static inline void bpf_inode_storage_free(struct inode *inode) { } -static inline void bpf_task_storage_free(struct task_struct *task) -{ -} - #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 99f7fd657d87..f883f01a5061 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -103,19 +103,17 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) -#if defined(CONFIG_BPF_STREAM_PARSER) -BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) -#endif #ifdef CONFIG_BPF_LSM BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) #endif +BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) #endif #ifdef CONFIG_INET +BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) #endif #endif diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 971b33aca13d..51c2ffa3d901 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -68,6 +68,8 @@ struct bpf_reg_state { unsigned long raw1; unsigned long raw2; } raw; + + u32 subprogno; /* for PTR_TO_FUNC */ }; /* For PTR_TO_PACKET, used to find other pointers with the same variable * offset, so they can share range knowledge. @@ -204,6 +206,7 @@ struct bpf_func_state { int acquired_refs; struct bpf_reference_state *refs; int allocated_stack; + bool in_callback_fn; struct bpf_stack_state *stack; }; diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index 4265f328681a..c6bc45ae5e03 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -160,11 +160,6 @@ static inline void ocelot_xfh_get_src_port(void *extraction, u64 *src_port) packing(extraction, src_port, 46, 43, OCELOT_TAG_LEN, UNPACK, 0); } -static inline void ocelot_xfh_get_cpuq(void *extraction, u64 *cpuq) -{ - packing(extraction, cpuq, 28, 20, OCELOT_TAG_LEN, UNPACK, 0); -} - static inline void ocelot_xfh_get_qos_class(void *extraction, u64 *qos_class) { packing(extraction, qos_class, 19, 17, OCELOT_TAG_LEN, UNPACK, 0); diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 2e5debc0373c..330345b1be54 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -11,7 +11,7 @@ * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * - * Relocated to include/linux where it belongs by Alan Cox + * Relocated to include/linux where it belongs by Alan Cox * <gw4pts@gw4pts.ampr.org> */ #ifndef _LINUX_ETHERDEVICE_H @@ -29,7 +29,7 @@ struct device; int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr); unsigned char *arch_get_platform_mac_address(void); int nvmem_get_mac_address(struct device *dev, void *addrbuf); -u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len); +u32 eth_get_headlen(const struct net_device *dev, const void *data, u32 len); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern const struct header_ops eth_header_ops; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index ec4cd3921c67..3583f7fc075c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -571,4 +571,13 @@ struct ethtool_phy_ops { */ void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops); +/** + * ethtool_sprintf - Write formatted string to ethtool string data + * @data: Pointer to start of string to update + * @fmt: Format of string to write + * + * Write formatted string to data. Update data to point at start of + * next string. + */ +extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...); #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 3b00fc906ccd..b2b85b2cad8e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -646,7 +646,8 @@ struct bpf_redirect_info { u32 flags; u32 tgt_index; void *tgt_value; - struct bpf_map *map; + u32 map_id; + enum bpf_map_type map_type; u32 kern_flags; struct bpf_nh_params nh; }; @@ -1472,4 +1473,32 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, } #endif /* IS_ENABLED(CONFIG_IPV6) */ +static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags, + void *lookup_elem(struct bpf_map *map, u32 key)) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + + /* Lower bits of the flags are used as return code on lookup failure */ + if (unlikely(flags > XDP_TX)) + return XDP_ABORTED; + + ri->tgt_value = lookup_elem(map, ifindex); + if (unlikely(!ri->tgt_value)) { + /* If the lookup fails we want to clear out the state in the + * redirect_info struct completely, so that if an eBPF program + * performs multiple lookups, the last one always takes + * precedence. + */ + ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */ + ri->map_type = BPF_MAP_TYPE_UNSPEC; + return flags; + } + + ri->tgt_index = ifindex; + ri->map_id = map->id; + ri->map_type = map->map_type; + + return XDP_REDIRECT; +} + #endif /* __LINUX_FILTER_H__ */ diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index b979005ea39c..2cc35038a8ca 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -69,6 +69,8 @@ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto); bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto); bool br_multicast_enabled(const struct net_device *dev); bool br_multicast_router(const struct net_device *dev); +int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, + struct notifier_block *nb, struct netlink_ext_ack *extack); #else static inline int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list) @@ -93,6 +95,13 @@ static inline bool br_multicast_router(const struct net_device *dev) { return false; } +static inline int br_mdb_replay(struct net_device *br_dev, + struct net_device *dev, + struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} #endif #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING) @@ -102,6 +111,8 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid); int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto); int br_vlan_get_info(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo); +int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, + struct notifier_block *nb, struct netlink_ext_ack *extack); #else static inline bool br_vlan_enabled(const struct net_device *dev) { @@ -128,6 +139,14 @@ static inline int br_vlan_get_info(const struct net_device *dev, u16 vid, { return -EINVAL; } + +static inline int br_vlan_replay(struct net_device *br_dev, + struct net_device *dev, + struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} #endif #if IS_ENABLED(CONFIG_BRIDGE) @@ -136,6 +155,10 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev, __u16 vid); void br_fdb_clear_offload(const struct net_device *dev, u16 vid); bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag); +u8 br_port_get_stp_state(const struct net_device *dev); +clock_t br_get_ageing_time(struct net_device *br_dev); +int br_fdb_replay(struct net_device *br_dev, struct net_device *dev, + struct notifier_block *nb); #else static inline struct net_device * br_fdb_find_port(const struct net_device *br_dev, @@ -154,6 +177,23 @@ br_port_flag_is_set(const struct net_device *dev, unsigned long flag) { return false; } + +static inline u8 br_port_get_stp_state(const struct net_device *dev) +{ + return BR_STATE_DISABLED; +} + +static inline clock_t br_get_ageing_time(struct net_device *br_dev) +{ + return 0; +} + +static inline int br_fdb_replay(struct net_device *br_dev, + struct net_device *dev, + struct notifier_block *nb) +{ + return -EOPNOTSUPP; +} #endif #endif diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h index 9661416a9bb4..4efb537f57f3 100644 --- a/include/linux/if_rmnet.h +++ b/include/linux/if_rmnet.h @@ -6,50 +6,43 @@ #define _LINUX_IF_RMNET_H_ struct rmnet_map_header { -#if defined(__LITTLE_ENDIAN_BITFIELD) - u8 pad_len:6; - u8 reserved_bit:1; - u8 cd_bit:1; -#elif defined (__BIG_ENDIAN_BITFIELD) - u8 cd_bit:1; - u8 reserved_bit:1; - u8 pad_len:6; -#else -#error "Please fix <asm/byteorder.h>" -#endif - u8 mux_id; - __be16 pkt_len; + u8 flags; /* MAP_CMD_FLAG, MAP_PAD_LEN_MASK */ + u8 mux_id; + __be16 pkt_len; /* Length of packet, including pad */ } __aligned(1); +/* rmnet_map_header flags field: + * PAD_LEN: number of pad bytes following packet data + * CMD: 1 = packet contains a MAP command; 0 = packet contains data + */ +#define MAP_PAD_LEN_MASK GENMASK(5, 0) +#define MAP_CMD_FLAG BIT(7) + struct rmnet_map_dl_csum_trailer { - u8 reserved1; -#if defined(__LITTLE_ENDIAN_BITFIELD) - u8 valid:1; - u8 reserved2:7; -#elif defined (__BIG_ENDIAN_BITFIELD) - u8 reserved2:7; - u8 valid:1; -#else -#error "Please fix <asm/byteorder.h>" -#endif - u16 csum_start_offset; - u16 csum_length; + u8 reserved1; + u8 flags; /* MAP_CSUM_DL_VALID_FLAG */ + __be16 csum_start_offset; + __be16 csum_length; __be16 csum_value; } __aligned(1); +/* rmnet_map_dl_csum_trailer flags field: + * VALID: 1 = checksum and length valid; 0 = ignore them + */ +#define MAP_CSUM_DL_VALID_FLAG BIT(0) + struct rmnet_map_ul_csum_header { __be16 csum_start_offset; -#if defined(__LITTLE_ENDIAN_BITFIELD) - u16 csum_insert_offset:14; - u16 udp_ind:1; - u16 csum_enabled:1; -#elif defined (__BIG_ENDIAN_BITFIELD) - u16 csum_enabled:1; - u16 udp_ind:1; - u16 csum_insert_offset:14; -#else -#error "Please fix <asm/byteorder.h>" -#endif + __be16 csum_info; /* MAP_CSUM_UL_* */ } __aligned(1); +/* csum_info field: + * OFFSET: where (offset in bytes) to insert computed checksum + * UDP: 1 = UDP checksum (zero checkum means no checksum) + * ENABLED: 1 = checksum computation requested + */ +#define MAP_CSUM_UL_OFFSET_MASK GENMASK(13, 0) +#define MAP_CSUM_UL_UDP_FLAG BIT(14) +#define MAP_CSUM_UL_ENABLED_FLAG BIT(15) + #endif /* !(_LINUX_IF_RMNET_H_) */ diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index 52b1610eae68..274abd5fbac3 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -24,6 +24,7 @@ #define MARVELL_PHY_ID_88E3016 0x01410e60 #define MARVELL_PHY_ID_88X3310 0x002b09a0 #define MARVELL_PHY_ID_88E2110 0x002b09b0 +#define MARVELL_PHY_ID_88X2222 0x01410f10 /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ #define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index dc3d2508f5c6..92a029a800a0 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1142,6 +1142,8 @@ enum mlx5_flex_parser_protos { MLX5_FLEX_PROTO_GENEVE = 1 << 3, MLX5_FLEX_PROTO_CW_MPLS_GRE = 1 << 4, MLX5_FLEX_PROTO_CW_MPLS_UDP = 1 << 5, + MLX5_FLEX_PROTO_ICMP = 1 << 8, + MLX5_FLEX_PROTO_ICMPV6 = 1 << 9, }; /* MLX5 DEV CAPs */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 53b89631a1d9..23bb01d7c9b9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -644,10 +644,14 @@ struct mlx5_td { }; struct mlx5e_resources { - u32 pdn; - struct mlx5_td td; - struct mlx5_core_mkey mkey; - struct mlx5_sq_bfreg bfreg; + struct mlx5e_hw_objs { + u32 pdn; + struct mlx5_td td; + struct mlx5_core_mkey mkey; + struct mlx5_sq_bfreg bfreg; + } hw_objs; + struct devlink_port dl_port; + struct net_device *uplink_netdev; }; enum mlx5_sw_icm_type { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 87a5d186faff..f57b70fc251f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -756,6 +756,13 @@ struct rx_queue_attribute { const char *buf, size_t len); }; +/* XPS map type and offset of the xps map within net_device->xps_maps[]. */ +enum xps_map_type { + XPS_CPUS = 0, + XPS_RXQS, + XPS_MAPS_MAX, +}; + #ifdef CONFIG_XPS /* * This structure holds an XPS map which can be of variable length. The @@ -773,9 +780,19 @@ struct xps_map { /* * This structure holds all XPS maps for device. Maps are indexed by CPU. + * + * We keep track of the number of cpus/rxqs used when the struct is allocated, + * in nr_ids. This will help not accessing out-of-bound memory. + * + * We keep track of the number of traffic classes used when the struct is + * allocated, in num_tc. This will be used to navigate the maps, to ensure we're + * not crossing its upper bound, as the original dev->num_tc can be updated in + * the meantime. */ struct xps_dev_maps { struct rcu_head rcu; + unsigned int nr_ids; + s16 num_tc; struct xps_map __rcu *attr_map[]; /* Either CPUs map or RXQs map */ }; @@ -833,6 +850,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); +enum net_device_path_type { + DEV_PATH_ETHERNET = 0, + DEV_PATH_VLAN, + DEV_PATH_BRIDGE, + DEV_PATH_PPPOE, + DEV_PATH_DSA, +}; + +struct net_device_path { + enum net_device_path_type type; + const struct net_device *dev; + union { + struct { + u16 id; + __be16 proto; + u8 h_dest[ETH_ALEN]; + } encap; + struct { + enum { + DEV_PATH_BR_VLAN_KEEP, + DEV_PATH_BR_VLAN_TAG, + DEV_PATH_BR_VLAN_UNTAG, + DEV_PATH_BR_VLAN_UNTAG_HW, + } vlan_mode; + u16 vlan_id; + __be16 vlan_proto; + } bridge; + struct { + int port; + u16 proto; + } dsa; + }; +}; + +#define NET_DEVICE_PATH_STACK_MAX 5 +#define NET_DEVICE_PATH_VLAN_MAX 2 + +struct net_device_path_stack { + int num_paths; + struct net_device_path path[NET_DEVICE_PATH_STACK_MAX]; +}; + +struct net_device_path_ctx { + const struct net_device *dev; + const u8 *daddr; + + int num_vlans; + struct { + u16 id; + __be16 proto; + } vlan[NET_DEVICE_PATH_VLAN_MAX]; +}; + enum tc_setup_type { TC_SETUP_QDISC_MQPRIO, TC_SETUP_CLSU32, @@ -1267,6 +1337,8 @@ struct netdev_net_notifier { * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev); * If a device is paired with a peer device, return the peer instance. * The caller must be under RCU read context. + * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path); + * Get the forwarding path to reach the real device from the HW destination address */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1473,6 +1545,8 @@ struct net_device_ops { int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); struct net_device * (*ndo_get_peer_dev)(struct net_device *dev); + int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, + struct net_device_path *path); }; /** @@ -1520,6 +1594,8 @@ struct net_device_ops { * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running + * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with + * skb_headlen(skb) == 0 (data starts from frag0) */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1553,6 +1629,7 @@ enum netdev_priv_flags { IFF_FAILOVER_SLAVE = 1<<28, IFF_L3MDEV_RX_HANDLER = 1<<29, IFF_LIVE_RENAME_OK = 1<<30, + IFF_TX_SKB_NO_LINEAR = 1<<31, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1579,12 +1656,14 @@ enum netdev_priv_flags { #define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE #define IFF_TEAM IFF_TEAM #define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED +#define IFF_PHONY_HEADROOM IFF_PHONY_HEADROOM #define IFF_MACSEC IFF_MACSEC #define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER #define IFF_FAILOVER IFF_FAILOVER #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER #define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK +#define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR /* Specifies the type of the struct net_device::ml_priv pointer */ enum netdev_ml_priv_type { @@ -1760,8 +1839,7 @@ enum netdev_ml_priv_type { * @tx_queue_len: Max frames per queue allowed * @tx_global_lock: XXX: need comments on this one * @xdp_bulkq: XDP device bulk queue - * @xps_cpus_map: all CPUs map for XPS device - * @xps_rxqs_map: all RXQs map for XPS device + * @xps_maps: all CPUs/RXQs maps for XPS device * * @xps_maps: XXX: need comments on this one * @miniq_egress: clsact qdisc specific data for @@ -1773,6 +1851,7 @@ enum netdev_ml_priv_type { * * @proto_down_reason: reason a netdev interface is held down * @pcpu_refcnt: Number of references to this device + * @dev_refcnt: Number of references to this device * @todo_list: Delayed register/unregister * @link_watch_list: XXX: need comments on this one * @@ -2057,8 +2136,7 @@ struct net_device { struct xdp_dev_bulk_queue __percpu *xdp_bulkq; #ifdef CONFIG_XPS - struct xps_dev_maps __rcu *xps_cpus_map; - struct xps_dev_maps __rcu *xps_rxqs_map; + struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX]; #endif #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc __rcu *miniq_egress; @@ -2074,7 +2152,12 @@ struct net_device { u32 proto_down_reason; struct list_head todo_list; + +#ifdef CONFIG_PCPU_DEV_REFCNT int __percpu *pcpu_refcnt; +#else + refcount_t dev_refcnt; +#endif struct list_head link_watch_list; @@ -2846,6 +2929,8 @@ void dev_remove_offload(struct packet_offload *po); int dev_get_iflink(const struct net_device *dev); int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb); +int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, + struct net_device_path_stack *stack); struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); @@ -3424,6 +3509,24 @@ netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue) } /** + * netdev_queue_set_dql_min_limit - set dql minimum limit + * @dev_queue: pointer to transmit queue + * @min_limit: dql minimum limit + * + * Forces xmit_more() to return true until the minimum threshold + * defined by @min_limit is reached (or until the tx queue is + * empty). Warning: to be use with care, misuse will impact the + * latency. + */ +static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue, + unsigned int min_limit) +{ +#ifdef CONFIG_BQL + dev_queue->dql.min_limit = min_limit; +#endif +} + +/** * netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write * @dev_queue: pointer to transmit queue * @@ -3688,7 +3791,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, u16 index); int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, - u16 index, bool is_rxqs_map); + u16 index, enum xps_map_type type); /** * netif_attr_test_mask - Test a CPU or Rx queue set in a mask @@ -3783,7 +3886,7 @@ static inline int netif_set_xps_queue(struct net_device *dev, static inline int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, - u16 index, bool is_rxqs_map) + u16 index, enum xps_map_type type) { return 0; } @@ -4026,7 +4129,11 @@ void netdev_run_todo(void); */ static inline void dev_put(struct net_device *dev) { +#ifdef CONFIG_PCPU_DEV_REFCNT this_cpu_dec(*dev->pcpu_refcnt); +#else + refcount_dec(&dev->dev_refcnt); +#endif } /** @@ -4037,7 +4144,11 @@ static inline void dev_put(struct net_device *dev) */ static inline void dev_hold(struct net_device *dev) { +#ifdef CONFIG_PCPU_DEV_REFCNT this_cpu_inc(*dev->pcpu_refcnt); +#else + refcount_inc(&dev->dev_refcnt); +#endif } /* Carrier loss detection, dial on demand. The functions netif_carrier_on @@ -4172,7 +4283,7 @@ static inline bool netif_oper_up(const struct net_device *dev) * * Check if device has not been removed from system. */ -static inline bool netif_device_present(struct net_device *dev) +static inline bool netif_device_present(const struct net_device *dev) { return test_bit(__LINK_STATE_PRESENT, &dev->state); } @@ -4611,6 +4722,7 @@ void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; +extern int netdev_unregister_timeout_secs; extern int weight_p; extern int dev_weight_rx_bias; extern int dev_weight_tx_bias; @@ -5287,6 +5399,9 @@ do { \ #define PTYPE_HASH_SIZE (16) #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) +extern struct list_head ptype_all __read_mostly; +extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; + extern struct net_device *blackhole_netdev; #endif /* _LINUX_NETDEVICE_H */ diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 351c1c9aedc5..2cb5188a7ef1 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -10,10 +10,15 @@ #include <linux/phy.h> #include <linux/phylink.h> +/* AN mode */ +#define DW_AN_C73 1 +#define DW_AN_C37_SGMII 2 + struct mdio_xpcs_args { __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); struct mii_bus *bus; int addr; + int an_mode; }; struct mdio_xpcs_ops { diff --git a/include/linux/phy.h b/include/linux/phy.h index 1a12e4436b5b..8e2cf84b2318 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1532,6 +1532,7 @@ int genphy_c45_read_mdix(struct phy_device *phydev); int genphy_c45_pma_read_abilities(struct phy_device *phydev); int genphy_c45_read_status(struct phy_device *phydev); int genphy_c45_config_aneg(struct phy_device *phydev); +int genphy_c45_loopback(struct phy_device *phydev, bool enable); /* Generic C45 PHY driver */ extern struct phy_driver genphy_c45_driver; diff --git a/include/linux/phylink.h b/include/linux/phylink.h index d81a714cfbbd..fd2acfd9b597 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -64,6 +64,7 @@ enum phylink_op_type { * @pcs_poll: MAC PCS cannot provide link change interrupt * @poll_fixed_state: if true, starts link_poll, * if MAC link is at %MLO_AN_FIXED mode. + * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND * @get_fixed_state: callback to execute to determine the fixed link state, * if MAC link is at %MLO_AN_FIXED mode. */ @@ -72,6 +73,7 @@ struct phylink_config { enum phylink_op_type type; bool pcs_poll; bool poll_fixed_state; + bool ovr_an_inband; void (*get_fixed_state)(struct phylink_config *config, struct phylink_link_state *state); }; diff --git a/include/linux/platform_data/hirschmann-hellcreek.h b/include/linux/platform_data/hirschmann-hellcreek.h index 388846766bb2..6a000df5541f 100644 --- a/include/linux/platform_data/hirschmann-hellcreek.h +++ b/include/linux/platform_data/hirschmann-hellcreek.h @@ -12,6 +12,7 @@ #include <linux/types.h> struct hellcreek_platform_data { + const char *name; /* Switch name */ int num_ports; /* Amount of switch ports */ int is_100_mbits; /* Is it configured to 100 or 1000 mbit/s */ int qbv_support; /* Qbv support on front TSN ports */ diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h index 98966064ee68..91f9a928344e 100644 --- a/include/linux/ppp_channel.h +++ b/include/linux/ppp_channel.h @@ -28,6 +28,9 @@ struct ppp_channel_ops { int (*start_xmit)(struct ppp_channel *, struct sk_buff *); /* Handle an ioctl call that has come in via /dev/ppp. */ int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long); + int (*fill_forward_path)(struct net_device_path_ctx *, + struct net_device_path *, + const struct ppp_channel *); }; struct ppp_channel { diff --git a/include/linux/ptp_pch.h b/include/linux/ptp_pch.h new file mode 100644 index 000000000000..51818198c292 --- /dev/null +++ b/include/linux/ptp_pch.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * PTP PCH + * + * Copyright 2019 Linaro Ltd. + * + * Author Lee Jones <lee.jones@linaro.org> + */ + +#ifndef _PTP_PCH_H_ +#define _PTP_PCH_H_ + +void pch_ch_control_write(struct pci_dev *pdev, u32 val); +u32 pch_ch_event_read(struct pci_dev *pdev); +void pch_ch_event_write(struct pci_dev *pdev, u32 val); +u32 pch_src_uuid_lo_read(struct pci_dev *pdev); +u32 pch_src_uuid_hi_read(struct pci_dev *pdev); +u64 pch_rx_snap_read(struct pci_dev *pdev); +u64 pch_tx_snap_read(struct pci_dev *pdev); +int pch_set_station_address(u8 *addr, struct pci_dev *pdev); + +#endif /* _PTP_PCH_H_ */ diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index e339b48de32d..f34dbd0db795 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -19,7 +19,7 @@ enum qed_chain_mode { /* Each Page contains a next pointer at its end */ QED_CHAIN_MODE_NEXT_PTR, - /* Chain is a single page (next ptr) is unrequired */ + /* Chain is a single page (next ptr) is not required */ QED_CHAIN_MODE_SINGLE, /* Page pointers are located in a side list */ @@ -56,13 +56,13 @@ struct qed_chain_pbl_u32 { }; struct qed_chain_u16 { - /* Cyclic index of next element to produce/consme */ + /* Cyclic index of next element to produce/consume */ u16 prod_idx; u16 cons_idx; }; struct qed_chain_u32 { - /* Cyclic index of next element to produce/consme */ + /* Cyclic index of next element to produce/consume */ u32 prod_idx; u32 cons_idx; }; @@ -270,7 +270,7 @@ static inline dma_addr_t qed_chain_get_pbl_phys(const struct qed_chain *chain) /** * @brief qed_chain_advance_page - * - * Advance the next element accros pages for a linked chain + * Advance the next element across pages for a linked chain * * @param p_chain * @param p_next_elem diff --git a/include/linux/sched.h b/include/linux/sched.h index ef00bb22164c..e5b7d9054473 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -42,6 +42,7 @@ struct audit_context; struct backing_dev_info; struct bio_list; struct blk_plug; +struct bpf_local_storage; struct capture_control; struct cfs_rq; struct fs_struct; @@ -1351,6 +1352,10 @@ struct task_struct { /* Used by LSM modules for access restriction: */ void *security; #endif +#ifdef CONFIG_BPF_SYSCALL + /* Used by BPF task local storage */ + struct bpf_local_storage __rcu *bpf_storage; +#endif #ifdef CONFIG_GCC_PLUGIN_STACKLEAK unsigned long lowest_stack; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f2c9ee71cb2c..c8def85fcc22 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -657,6 +657,7 @@ typedef unsigned char *sk_buff_data_t; * @protocol: Packet protocol from driver * @destructor: Destruct function * @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue) + * @_sk_redir: socket redirection information for skmsg * @_nfct: Associated connection, if any (with nfctinfo bits) * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on @@ -756,6 +757,9 @@ struct sk_buff { void (*destructor)(struct sk_buff *skb); }; struct list_head tcp_tsorted_anchor; +#ifdef CONFIG_NET_SOCK_MSG + unsigned long _sk_redir; +#endif }; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) @@ -1137,7 +1141,7 @@ static inline bool skb_fclone_busy(const struct sock *sk, return skb->fclone == SKB_FCLONE_ORIG && refcount_read(&fclones->fclone_ref) > 1 && - fclones->skb2.sk == sk; + READ_ONCE(fclones->skb2.sk) == sk; } /** @@ -1289,10 +1293,10 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) void __skb_get_hash(struct sk_buff *skb); u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); -u32 __skb_get_poff(const struct sk_buff *skb, void *data, +u32 __skb_get_poff(const struct sk_buff *skb, const void *data, const struct flow_keys_basic *keys, int hlen); __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, - void *data, int hlen_proto); + const void *data, int hlen_proto); static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) @@ -1311,9 +1315,8 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, bool __skb_flow_dissect(const struct net *net, const struct sk_buff *skb, struct flow_dissector *flow_dissector, - void *target_container, - void *data, __be16 proto, int nhoff, int hlen, - unsigned int flags); + void *target_container, const void *data, + __be16 proto, int nhoff, int hlen, unsigned int flags); static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, @@ -1335,9 +1338,9 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, static inline bool skb_flow_dissect_flow_keys_basic(const struct net *net, const struct sk_buff *skb, - struct flow_keys_basic *flow, void *data, - __be16 proto, int nhoff, int hlen, - unsigned int flags) + struct flow_keys_basic *flow, + const void *data, __be16 proto, + int nhoff, int hlen, unsigned int flags) { memset(flow, 0, sizeof(*flow)); return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow, @@ -3675,14 +3678,13 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); static inline void * __must_check -__skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *data, int hlen, void *buffer) +__skb_header_pointer(const struct sk_buff *skb, int offset, int len, + const void *data, int hlen, void *buffer) { - if (hlen - offset >= len) - return data + offset; + if (likely(hlen - offset >= len)) + return (void *)data + offset; - if (!skb || - skb_copy_bits(skb, offset, buffer, len) < 0) + if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0)) return NULL; return buffer; diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 8edbbf5f2f93..6c09d94be2e9 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -56,8 +56,8 @@ struct sk_msg { struct sk_psock_progs { struct bpf_prog *msg_parser; - struct bpf_prog *skb_parser; - struct bpf_prog *skb_verdict; + struct bpf_prog *stream_parser; + struct bpf_prog *stream_verdict; }; enum sk_psock_state_bits { @@ -70,12 +70,6 @@ struct sk_psock_link { void *link_raw; }; -struct sk_psock_parser { - struct strparser strp; - bool enabled; - void (*saved_data_ready)(struct sock *sk); -}; - struct sk_psock_work_state { struct sk_buff *skb; u32 len; @@ -90,7 +84,9 @@ struct sk_psock { u32 eval; struct sk_msg *cork; struct sk_psock_progs progs; - struct sk_psock_parser parser; +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) + struct strparser strp; +#endif struct sk_buff_head ingress_skb; struct list_head ingress_msg; unsigned long state; @@ -100,6 +96,7 @@ struct sk_psock { void (*saved_unhash)(struct sock *sk); void (*saved_close)(struct sock *sk, long timeout); void (*saved_write_space)(struct sock *sk); + void (*saved_data_ready)(struct sock *sk); struct proto *sk_proto; struct sk_psock_work_state work_state; struct work_struct work; @@ -305,9 +302,25 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err) struct sk_psock *sk_psock_init(struct sock *sk, int node); +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock); void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock); +#else +static inline int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock) +{ + return -EOPNOTSUPP; +} + +static inline void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) +{ +} + +static inline void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) +{ +} +#endif + void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock); void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock); @@ -327,8 +340,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link) struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock); -void __sk_psock_purge_ingress_msg(struct sk_psock *psock); - static inline void sk_psock_cork_free(struct sk_psock *psock) { if (psock->cork) { @@ -389,7 +400,6 @@ static inline struct sk_psock *sk_psock_get(struct sock *sk) return psock; } -void sk_psock_stop(struct sock *sk, struct sk_psock *psock); void sk_psock_drop(struct sock *sk, struct sk_psock *psock); static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) @@ -400,8 +410,8 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock) { - if (psock->parser.enabled) - psock->parser.saved_data_ready(sk); + if (psock->saved_data_ready) + psock->saved_data_ready(sk); else sk->sk_data_ready(sk); } @@ -430,8 +440,8 @@ static inline int psock_replace_prog(struct bpf_prog **pprog, static inline void psock_progs_drop(struct sk_psock_progs *progs) { psock_set_prog(&progs->msg_parser, NULL); - psock_set_prog(&progs->skb_parser, NULL); - psock_set_prog(&progs->skb_verdict, NULL); + psock_set_prog(&progs->stream_parser, NULL); + psock_set_prog(&progs->stream_verdict, NULL); } int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb); @@ -440,6 +450,44 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) { if (!psock) return false; - return psock->parser.enabled; + return !!psock->saved_data_ready; +} + +#if IS_ENABLED(CONFIG_NET_SOCK_MSG) + +/* We only have one bit so far. */ +#define BPF_F_PTR_MASK ~(BPF_F_INGRESS) + +static inline bool skb_bpf_ingress(const struct sk_buff *skb) +{ + unsigned long sk_redir = skb->_sk_redir; + + return sk_redir & BPF_F_INGRESS; +} + +static inline void skb_bpf_set_ingress(struct sk_buff *skb) +{ + skb->_sk_redir |= BPF_F_INGRESS; +} + +static inline void skb_bpf_set_redir(struct sk_buff *skb, struct sock *sk_redir, + bool ingress) +{ + skb->_sk_redir = (unsigned long)sk_redir; + if (ingress) + skb->_sk_redir |= BPF_F_INGRESS; +} + +static inline struct sock *skb_bpf_redirect_fetch(const struct sk_buff *skb) +{ + unsigned long sk_redir = skb->_sk_redir; + + return (struct sock *)(sk_redir & BPF_F_PTR_MASK); +} + +static inline void skb_bpf_redirect_clear(struct sk_buff *skb) +{ + skb->_sk_redir = 0; } +#endif /* CONFIG_NET_SOCK_MSG */ #endif /* _LINUX_SKMSG_H */ diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index a302982de2d7..febdb43d27e5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -81,6 +81,7 @@ struct stmmac_mdio_bus_data { unsigned int phy_mask; unsigned int has_xpcs; + unsigned int xpcs_an_inband; int *irqs; int probed_phy_irq; bool needs_reset; @@ -143,6 +144,32 @@ struct stmmac_txq_cfg { int tbs_en; }; +/* FPE link state */ +enum stmmac_fpe_state { + FPE_STATE_OFF = 0, + FPE_STATE_CAPABLE = 1, + FPE_STATE_ENTERING_ON = 2, + FPE_STATE_ON = 3, +}; + +/* FPE link-partner hand-shaking mPacket type */ +enum stmmac_mpacket_type { + MPACKET_VERIFY = 0, + MPACKET_RESPONSE = 1, +}; + +enum stmmac_fpe_task_state_t { + __FPE_REMOVING, + __FPE_TASK_SCHED, +}; + +struct stmmac_fpe_cfg { + bool enable; /* FPE enable */ + bool hs_enable; /* FPE handshake enable */ + enum stmmac_fpe_state lp_fpe_state; /* Link Partner FPE state */ + enum stmmac_fpe_state lo_fpe_state; /* Local station FPE state */ +}; + struct plat_stmmacenet_data { int bus_id; int phy_addr; @@ -154,6 +181,7 @@ struct plat_stmmacenet_data { struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; struct stmmac_est *est; + struct stmmac_fpe_cfg *fpe_cfg; int clk_csr; int has_gmac; int enh_desc; @@ -180,9 +208,13 @@ struct plat_stmmacenet_data { void (*fix_mac_speed)(void *priv, unsigned int speed); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); + void (*ptp_clk_freq_config)(void *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); struct mac_device_info *(*setup)(void *priv); + int (*clks_config)(void *priv, bool enabled); + int (*crosststamp)(ktime_t *device, struct system_counterval_t *system, + void *ctx); void *bsp_priv; struct clk *stmmac_clk; struct clk *pclk; @@ -203,5 +235,7 @@ struct plat_stmmacenet_data { u8 vlan_fail_q; unsigned int eee_usecs_rate; struct pci_dev *pdev; + bool has_crossts; + int int_snapshot_num; }; #endif diff --git a/include/net/dsa.h b/include/net/dsa.h index 83a933e563fe..57b2c49f72f4 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -49,10 +49,12 @@ struct phylink_link_state; #define DSA_TAG_PROTO_XRS700X_VALUE 19 #define DSA_TAG_PROTO_OCELOT_8021Q_VALUE 20 #define DSA_TAG_PROTO_SEVILLE_VALUE 21 +#define DSA_TAG_PROTO_BRCM_LEGACY_VALUE 22 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, DSA_TAG_PROTO_BRCM = DSA_TAG_PROTO_BRCM_VALUE, + DSA_TAG_PROTO_BRCM_LEGACY = DSA_TAG_PROTO_BRCM_LEGACY_VALUE, DSA_TAG_PROTO_BRCM_PREPEND = DSA_TAG_PROTO_BRCM_PREPEND_VALUE, DSA_TAG_PROTO_DSA = DSA_TAG_PROTO_DSA_VALUE, DSA_TAG_PROTO_EDSA = DSA_TAG_PROTO_EDSA_VALUE, @@ -491,6 +493,20 @@ static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp) return dp->vlan_filtering; } +static inline +struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) +{ + if (!dp->bridge_dev) + return NULL; + + if (dp->lag_dev) + return dp->lag_dev; + else if (dp->hsr_dev) + return dp->hsr_dev; + + return dp->slave; +} + typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index cc10b10dc3a1..ffd386ea0dbb 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -350,7 +350,7 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys) u32 flow_hash_from_keys(struct flow_keys *keys); void skb_flow_get_icmp_tci(const struct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp, - void *data, int thoff, int hlen); + const void *data, int thoff, int hlen); static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector, enum flow_dissector_key_id key_id) @@ -368,8 +368,8 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec struct bpf_flow_dissector { struct bpf_flow_keys *flow_keys; const struct sk_buff *skb; - void *data; - void *data_end; + const void *data; + const void *data_end; }; static inline void diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index e6bd8ebf9ac3..dc5c1e69cd9f 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -147,6 +147,7 @@ enum flow_action_id { FLOW_ACTION_MPLS_POP, FLOW_ACTION_MPLS_MANGLE, FLOW_ACTION_GATE, + FLOW_ACTION_PPPOE_PUSH, NUM_FLOW_ACTIONS, }; @@ -234,6 +235,8 @@ struct flow_action_entry { u32 index; u32 burst; u64 rate_bytes_ps; + u64 burst_pkt; + u64 rate_pkt_ps; u32 mtu; } police; struct { /* FLOW_ACTION_CT */ @@ -272,6 +275,9 @@ struct flow_action_entry { u32 num_entries; struct action_gate_entry *entries; } gate; + struct { /* FLOW_ACTION_PPPOE_PUSH */ + u16 sid; + } pppoe; }; struct flow_action_cookie *cookie; /* user defined action cookie */ }; diff --git a/include/net/gro.h b/include/net/gro.h index 8a6eb5303cc4..01edaf3fdda0 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -3,10 +3,23 @@ #ifndef _NET_IPV6_GRO_H #define _NET_IPV6_GRO_H +#include <linux/indirect_call_wrapper.h> + +struct list_head; +struct sk_buff; + INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); + +#define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ +({ \ + unlikely(gro_recursion_inc_test(skb)) ? \ + NAPI_GRO_CB(skb)->flush |= 1, NULL : \ + INDIRECT_CALL_INET(cb, f2, f1, head, skb); \ +}) + #endif /* _NET_IPV6_GRO_H */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index bd1f396cc9c7..448bf2b34759 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -30,6 +30,7 @@ */ #define NEXTHDR_HOP 0 /* Hop-by-hop option header. */ +#define NEXTHDR_IPV4 4 /* IPv4 in IPv6 */ #define NEXTHDR_TCP 6 /* TCP segment. */ #define NEXTHDR_UDP 17 /* UDP message. */ #define NEXTHDR_IPV6 41 /* IPv6 in IPv6 */ diff --git a/include/net/lapb.h b/include/net/lapb.h index eee73442a1ba..124ee122f2c8 100644 --- a/include/net/lapb.h +++ b/include/net/lapb.h @@ -92,7 +92,7 @@ struct lapb_cb { unsigned short n2, n2count; unsigned short t1, t2; struct timer_list t1timer, t2timer; - bool t1timer_stop, t2timer_stop; + bool t1timer_running, t2timer_running; /* Internal control information */ struct sk_buff_head write_queue; diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 5694370be3d4..cea69c801595 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -34,6 +34,13 @@ struct mptcp_ext { /* one byte hole */ }; +#define MPTCP_RM_IDS_MAX 8 + +struct mptcp_rm_list { + u8 ids[MPTCP_RM_IDS_MAX]; + u8 nr; +}; + struct mptcp_out_options { #if IS_ENABLED(CONFIG_MPTCP) u16 suboptions; @@ -48,7 +55,7 @@ struct mptcp_out_options { u8 addr_id; u16 port; u64 ahmac; - u8 rm_id; + struct mptcp_rm_list rm_list; u8 join_id; u8 backup; u32 nonce; diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index 7b3c873f8839..e95483192d1b 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -4,7 +4,4 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; -#include <linux/sysctl.h> -extern struct ctl_table nf_ct_ipv6_sysctl_table[]; - #endif /* _NF_CONNTRACK_IPV6_H*/ diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 54c4d5c908a5..4d991c1e93ef 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -86,8 +86,16 @@ static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable) enum flow_offload_tuple_dir { FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL, FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY, - FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX }; +#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX + +enum flow_offload_xmit_type { + FLOW_OFFLOAD_XMIT_NEIGH = 0, + FLOW_OFFLOAD_XMIT_XFRM, + FLOW_OFFLOAD_XMIT_DIRECT, +}; + +#define NF_FLOW_TABLE_ENCAP_MAX 2 struct flow_offload_tuple { union { @@ -107,15 +115,28 @@ struct flow_offload_tuple { u8 l3proto; u8 l4proto; + struct { + u16 id; + __be16 proto; + } encap[NF_FLOW_TABLE_ENCAP_MAX]; /* All members above are keys for lookups, see flow_offload_hash(). */ struct { } __hash; - u8 dir; - + u8 dir:2, + xmit_type:2, + encap_num:2, + in_vlan_ingress:2; u16 mtu; - - struct dst_entry *dst_cache; + union { + struct dst_entry *dst_cache; + struct { + u32 ifidx; + u32 hw_ifidx; + u8 h_source[ETH_ALEN]; + u8 h_dest[ETH_ALEN]; + } out; + }; }; struct flow_offload_tuple_rhash { @@ -158,7 +179,23 @@ static inline __s32 nf_flow_timeout_delta(unsigned int timeout) struct nf_flow_route { struct { - struct dst_entry *dst; + struct dst_entry *dst; + struct { + u32 ifindex; + struct { + u16 id; + __be16 proto; + } encap[NF_FLOW_TABLE_ENCAP_MAX]; + u8 num_encaps:2, + ingress_vlans:2; + } in; + struct { + u32 ifindex; + u32 hw_ifindex; + u8 h_source[ETH_ALEN]; + u8 h_dest[ETH_ALEN]; + } out; + enum flow_offload_xmit_type xmit_type; } tuple[FLOW_OFFLOAD_DIR_MAX]; }; @@ -229,12 +266,12 @@ void nf_flow_table_free(struct nf_flowtable *flow_table); void flow_offload_teardown(struct flow_offload *flow); -int nf_flow_snat_port(const struct flow_offload *flow, - struct sk_buff *skb, unsigned int thoff, - u8 protocol, enum flow_offload_tuple_dir dir); -int nf_flow_dnat_port(const struct flow_offload *flow, - struct sk_buff *skb, unsigned int thoff, - u8 protocol, enum flow_offload_tuple_dir dir); +void nf_flow_snat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir); +void nf_flow_dnat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir); struct flow_ports { __be16 source, dest; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5aaced6bf13e..0cef5ad9768a 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1498,13 +1498,16 @@ struct nft_trans_chain { struct nft_trans_table { bool update; - bool enable; + u8 state; + u32 flags; }; #define nft_trans_table_update(trans) \ (((struct nft_trans_table *)trans->data)->update) -#define nft_trans_table_enable(trans) \ - (((struct nft_trans_table *)trans->data)->enable) +#define nft_trans_table_state(trans) \ + (((struct nft_trans_table *)trans->data)->state) +#define nft_trans_table_flags(trans) \ + (((struct nft_trans_table *)trans->data)->flags) struct nft_trans_elem { struct nft_set *set; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 70a2a085dd1a..9e3cb2722b80 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -11,7 +11,6 @@ #include <linux/rcupdate.h> #include <linux/siphash.h> -struct tcpm_hash_bucket; struct ctl_table_header; struct ipv4_devconf; struct fib_rules_ops; diff --git a/include/net/nexthop.h b/include/net/nexthop.h index a10a319d7eb2..28145f714801 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -40,6 +40,12 @@ struct nh_config { struct nlattr *nh_grp; u16 nh_grp_type; + u16 nh_grp_res_num_buckets; + unsigned long nh_grp_res_idle_timer; + unsigned long nh_grp_res_unbalanced_timer; + bool nh_grp_res_has_num_buckets; + bool nh_grp_res_has_idle_timer; + bool nh_grp_res_has_unbalanced_timer; struct nlattr *nh_encap; u16 nh_encap_type; @@ -63,6 +69,32 @@ struct nh_info { }; }; +struct nh_res_bucket { + struct nh_grp_entry __rcu *nh_entry; + atomic_long_t used_time; + unsigned long migrated_time; + bool occupied; + u8 nh_flags; +}; + +struct nh_res_table { + struct net *net; + u32 nhg_id; + struct delayed_work upkeep_dw; + + /* List of NHGEs that have too few buckets ("uw" for underweight). + * Reclaimed buckets will be given to entries in this list. + */ + struct list_head uw_nh_entries; + unsigned long unbalanced_since; + + u32 idle_timer; + u32 unbalanced_timer; + + u16 num_nh_buckets; + struct nh_res_bucket nh_buckets[]; +}; + struct nh_grp_entry { struct nexthop *nh; u8 weight; @@ -71,6 +103,13 @@ struct nh_grp_entry { struct { atomic_t upper_bound; } mpath; + struct { + /* Member on uw_nh_entries. */ + struct list_head uw_nh_entry; + + u16 count_buckets; + u16 wants_buckets; + } res; }; struct list_head nh_list; @@ -80,9 +119,13 @@ struct nh_grp_entry { struct nh_group { struct nh_group *spare; /* spare group for removals */ u16 num_nh; + bool is_multipath; bool mpath; + bool resilient; bool fdb_nh; bool has_v4; + + struct nh_res_table __rcu *res_table; struct nh_grp_entry nh_entries[]; }; @@ -112,11 +155,15 @@ struct nexthop { enum nexthop_event_type { NEXTHOP_EVENT_DEL, NEXTHOP_EVENT_REPLACE, + NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, + NEXTHOP_EVENT_BUCKET_REPLACE, }; enum nh_notifier_info_type { NH_NOTIFIER_INFO_TYPE_SINGLE, NH_NOTIFIER_INFO_TYPE_GRP, + NH_NOTIFIER_INFO_TYPE_RES_TABLE, + NH_NOTIFIER_INFO_TYPE_RES_BUCKET, }; struct nh_notifier_single_info { @@ -143,6 +190,19 @@ struct nh_notifier_grp_info { struct nh_notifier_grp_entry_info nh_entries[]; }; +struct nh_notifier_res_bucket_info { + u16 bucket_index; + unsigned int idle_timer_ms; + bool force; + struct nh_notifier_single_info old_nh; + struct nh_notifier_single_info new_nh; +}; + +struct nh_notifier_res_table_info { + u16 num_nh_buckets; + struct nh_notifier_single_info nhs[]; +}; + struct nh_notifier_info { struct net *net; struct netlink_ext_ack *extack; @@ -151,6 +211,8 @@ struct nh_notifier_info { union { struct nh_notifier_single_info *nh; struct nh_notifier_grp_info *nh_grp; + struct nh_notifier_res_table_info *nh_res_table; + struct nh_notifier_res_bucket_info *nh_res_bucket; }; }; @@ -158,6 +220,10 @@ int register_nexthop_notifier(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); +void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, + bool offload, bool trap); +void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, + unsigned long *activity); /* caller is holding rcu or rtnl; no reference taken to nexthop */ struct nexthop *nexthop_find_by_id(struct net *net, u32 id); @@ -212,7 +278,7 @@ static inline bool nexthop_is_multipath(const struct nexthop *nh) struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); - return nh_grp->mpath; + return nh_grp->is_multipath; } return false; } @@ -227,7 +293,7 @@ static inline unsigned int nexthop_num_path(const struct nexthop *nh) struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); - if (nh_grp->mpath) + if (nh_grp->is_multipath) rc = nh_grp->num_nh; } @@ -308,7 +374,7 @@ struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) struct nh_group *nh_grp; nh_grp = rcu_dereference_rtnl(nh->nh_grp); - if (nh_grp->mpath) { + if (nh_grp->is_multipath) { nh = nexthop_mpath_select(nh_grp, nhsel); if (!nh) return NULL; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 15b1b30f454e..f5c1bee0cd6a 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -188,4 +188,13 @@ struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload *offload); void taprio_offload_free(struct tc_taprio_qopt_offload *offload); +/* Ensure skb_mstamp_ns, which might have been populated with the txtime, is + * not mistaken for a software timestamp, because this will otherwise prevent + * the dispatch of hardware timestamps to the socket. + */ +static inline void skb_txtime_consumed(struct sk_buff *skb) +{ + skb->tstamp = ktime_set(0, 0); +} + #endif diff --git a/include/net/psample.h b/include/net/psample.h index 68ae16bb0a4a..e328c5127757 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -14,6 +14,19 @@ struct psample_group { struct rcu_head rcu; }; +struct psample_metadata { + u32 trunc_size; + int in_ifindex; + int out_ifindex; + u16 out_tc; + u64 out_tc_occ; /* bytes */ + u64 latency; /* nanoseconds */ + u8 out_tc_valid:1, + out_tc_occ_valid:1, + latency_valid:1, + unused:5; +}; + struct psample_group *psample_group_get(struct net *net, u32 group_num); void psample_group_take(struct psample_group *group); void psample_group_put(struct psample_group *group); @@ -21,15 +34,13 @@ void psample_group_put(struct psample_group *group); #if IS_ENABLED(CONFIG_PSAMPLE) void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, - u32 trunc_size, int in_ifindex, int out_ifindex, - u32 sample_rate); + u32 sample_rate, const struct psample_metadata *md); #else static inline void psample_sample_packet(struct psample_group *group, - struct sk_buff *skb, u32 trunc_size, - int in_ifindex, int out_ifindex, - u32 sample_rate) + struct sk_buff *skb, u32 sample_rate, + const struct psample_metadata *md) { } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2d6eb60c58c8..f7a6e14491fb 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1242,6 +1242,20 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } +struct psched_pktrate { + u64 rate_pkts_ps; /* packets per second */ + u32 mult; + u8 shift; +}; + +static inline u64 psched_pkt2t_ns(const struct psched_pktrate *r, + unsigned int pkt_num) +{ + return ((u64)pkt_num * r->mult) >> r->shift; +} + +void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64); + /* Mini Qdisc serves for specific needs of ingress/clsact Qdisc. * The fast path only needs to access filter list and to update stats */ diff --git a/include/net/switchdev.h b/include/net/switchdev.h index b7fc7d0f54e2..8c3218177136 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -68,6 +68,7 @@ enum switchdev_obj_id { }; struct switchdev_obj { + struct list_head list; struct net_device *orig_dev; enum switchdev_obj_id id; u32 flags; diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h index 6d1e26b709b5..72649512dcdd 100644 --- a/include/net/tc_act/tc_police.h +++ b/include/net/tc_act/tc_police.h @@ -10,10 +10,13 @@ struct tcf_police_params { s64 tcfp_burst; u32 tcfp_mtu; s64 tcfp_mtu_ptoks; + s64 tcfp_pkt_burst; struct psched_ratecfg rate; bool rate_present; struct psched_ratecfg peak; bool peak_present; + struct psched_pktrate ppsrate; + bool pps_present; struct rcu_head rcu; }; @@ -24,6 +27,7 @@ struct tcf_police { spinlock_t tcfp_lock ____cacheline_aligned_in_smp; s64 tcfp_toks; s64 tcfp_ptoks; + s64 tcfp_pkttoks; s64 tcfp_t_c; }; @@ -97,6 +101,54 @@ static inline u32 tcf_police_burst(const struct tc_action *act) return burst; } +static inline u64 tcf_police_rate_pkt_ps(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + + params = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); + return params->ppsrate.rate_pkts_ps; +} + +static inline u32 tcf_police_burst_pkt(const struct tc_action *act) +{ + struct tcf_police *police = to_police(act); + struct tcf_police_params *params; + u32 burst; + + params = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); + + /* + * "rate" pkts "burst" nanoseconds + * ------------ * ------------------- + * 1 second 2^6 ticks + * + * ------------------------------------ + * NSEC_PER_SEC nanoseconds + * ------------------------ + * 2^6 ticks + * + * "rate" pkts "burst" nanoseconds 2^6 ticks + * = ------------ * ------------------- * ------------------------ + * 1 second 2^6 ticks NSEC_PER_SEC nanoseconds + * + * "rate" * "burst" + * = ---------------- pkts/nanosecond + * NSEC_PER_SEC^2 + * + * + * "rate" * "burst" + * = ---------------- pkts/second + * NSEC_PER_SEC + */ + burst = div_u64(params->tcfp_pkt_burst * params->ppsrate.rate_pkts_ps, + NSEC_PER_SEC); + + return burst; +} + static inline u32 tcf_police_tcfp_mtu(const struct tc_action *act) { struct tcf_police *police = to_police(act); diff --git a/include/net/tcp.h b/include/net/tcp.h index 963cd86d12dd..075de26f449d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -883,36 +883,11 @@ struct tcp_skb_cb { struct inet6_skb_parm h6; #endif } header; /* For incoming skbs */ - struct { - __u32 flags; - struct sock *sk_redir; - void *data_end; - } bpf; }; }; #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) -static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb) -{ - TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb); -} - -static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb) -{ - return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS; -} - -static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb) -{ - return TCP_SKB_CB(skb)->bpf.sk_redir; -} - -static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb) -{ - TCP_SKB_CB(skb)->bpf.sk_redir = NULL; -} - extern const struct inet_connection_sock_af_ops ipv4_specific; #if IS_ENABLED(CONFIG_IPV6) @@ -2222,25 +2197,27 @@ void tcp_update_ulp(struct sock *sk, struct proto *p, __MODULE_INFO(alias, alias_userspace, name); \ __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name) +#ifdef CONFIG_NET_SOCK_MSG struct sk_msg; struct sk_psock; -#ifdef CONFIG_BPF_STREAM_PARSER +#ifdef CONFIG_BPF_SYSCALL struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); -#else -static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) -{ -} -#endif /* CONFIG_BPF_STREAM_PARSER */ +#endif /* CONFIG_BPF_SYSCALL */ -#ifdef CONFIG_NET_SOCK_MSG int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, int flags); int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags); #endif /* CONFIG_NET_SOCK_MSG */ +#if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG) +static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) +{ +} +#endif + #ifdef CONFIG_CGROUP_BPF static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops, struct sk_buff *skb, diff --git a/include/net/udp.h b/include/net/udp.h index a132a02b2f2c..d4d064c59232 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -515,9 +515,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, return segs; } -#ifdef CONFIG_BPF_STREAM_PARSER +#ifdef CONFIG_BPF_SYSCALL struct sk_psock; struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); -#endif /* BPF_STREAM_PARSER */ +#endif #endif /* _UDP_H */ diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index cc17bc957548..9c0722c6d7ac 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -80,19 +80,6 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); void __xsk_map_flush(void); -static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, - u32 key) -{ - struct xsk_map *m = container_of(map, struct xsk_map, map); - struct xdp_sock *xs; - - if (key >= map->max_entries) - return NULL; - - xs = READ_ONCE(m->xsk_map[key]); - return xs; -} - #else static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) @@ -109,12 +96,6 @@ static inline void __xsk_map_flush(void) { } -static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, - u32 key) -{ - return NULL; -} - #endif /* CONFIG_XDP_SOCKETS */ #endif /* _LINUX_XDP_SOCK_H */ diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 425ff29d9389..68cdc7ceaf4d 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -51,6 +51,7 @@ */ /* Reserve some destination PGIDs at the end of the range: + * PGID_BLACKHOLE: used for not forwarding the frames * PGID_CPU: used for whitelisting certain MAC addresses, such as the addresses * of the switch port net devices, towards the CPU port module. * PGID_UC: the flooding destinations for unknown unicast traffic. @@ -59,6 +60,7 @@ * PGID_MCIPV6: the flooding destinations for IPv6 multicast traffic. * PGID_BC: the flooding destinations for broadcast traffic. */ +#define PGID_BLACKHOLE 57 #define PGID_CPU 58 #define PGID_UC 59 #define PGID_MC 60 @@ -73,7 +75,7 @@ #define for_each_nonreserved_multicast_dest_pgid(ocelot, pgid) \ for ((pgid) = (ocelot)->num_phys_ports + 1; \ - (pgid) < PGID_CPU; \ + (pgid) < PGID_BLACKHOLE; \ (pgid)++) #define for_each_aggr_pgid(ocelot, pgid) \ @@ -611,6 +613,11 @@ struct ocelot_port { struct net_device *bond; bool lag_tx_active; + + u16 mrp_ring_id; + + struct net_device *bridge; + u8 stp_state; }; struct ocelot { @@ -630,10 +637,6 @@ struct ocelot { int num_frame_refs; int num_mact_rows; - struct net_device *hw_bridge_dev; - u16 bridge_mask; - u16 bridge_fwd_mask; - struct ocelot_port **ports; u8 base_mac[ETH_ALEN]; @@ -679,12 +682,6 @@ struct ocelot { /* Protects the PTP clock */ spinlock_t ptp_clock_lock; struct ptp_pin_desc ptp_pins[OCELOT_PTP_PINS_NUM]; - -#if IS_ENABLED(CONFIG_BRIDGE_MRP) - u16 mrp_ring_id; - struct net_device *mrp_p_port; - struct net_device *mrp_s_port; -#endif }; struct ocelot_policer { @@ -806,10 +803,10 @@ int ocelot_port_pre_bridge_flags(struct ocelot *ocelot, int port, struct switchdev_brport_flags val); void ocelot_port_bridge_flags(struct ocelot *ocelot, int port, struct switchdev_brport_flags val); -int ocelot_port_bridge_join(struct ocelot *ocelot, int port, - struct net_device *bridge); -int ocelot_port_bridge_leave(struct ocelot *ocelot, int port, +void ocelot_port_bridge_join(struct ocelot *ocelot, int port, struct net_device *bridge); +void ocelot_port_bridge_leave(struct ocelot *ocelot, int port, + struct net_device *bridge); int ocelot_fdb_dump(struct ocelot *ocelot, int port, dsa_fdb_dump_cb_t *cb, void *data); int ocelot_fdb_add(struct ocelot *ocelot, int port, diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 76a97176ab81..fcad3645a70b 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -86,19 +86,15 @@ struct _bpf_dtab_netdev { }; #endif /* __DEVMAP_OBJ_TYPE */ -#define devmap_ifindex(tgt, map) \ - (((map->map_type == BPF_MAP_TYPE_DEVMAP || \ - map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \ - ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0) - DECLARE_EVENT_CLASS(xdp_redirect_template, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, - const struct bpf_map *map, u32 index), + enum bpf_map_type map_type, + u32 map_id, u32 index), - TP_ARGS(dev, xdp, tgt, err, map, index), + TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index), TP_STRUCT__entry( __field(int, prog_id) @@ -111,14 +107,22 @@ DECLARE_EVENT_CLASS(xdp_redirect_template, ), TP_fast_assign( + u32 ifindex = 0, map_index = index; + + if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) { + ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex; + } else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) { + ifindex = index; + map_index = 0; + } + __entry->prog_id = xdp->aux->id; __entry->act = XDP_REDIRECT; __entry->ifindex = dev->ifindex; __entry->err = err; - __entry->to_ifindex = map ? devmap_ifindex(tgt, map) : - index; - __entry->map_id = map ? map->id : 0; - __entry->map_index = map ? index : 0; + __entry->to_ifindex = ifindex; + __entry->map_id = map_id; + __entry->map_index = map_index; ), TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" @@ -133,45 +137,49 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, - const struct bpf_map *map, u32 index), - TP_ARGS(dev, xdp, tgt, err, map, index) + enum bpf_map_type map_type, + u32 map_id, u32 index), + TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, - const struct bpf_map *map, u32 index), - TP_ARGS(dev, xdp, tgt, err, map, index) + enum bpf_map_type map_type, + u32 map_id, u32 index), + TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); -#define _trace_xdp_redirect(dev, xdp, to) \ - trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to) +#define _trace_xdp_redirect(dev, xdp, to) \ + trace_xdp_redirect(dev, xdp, NULL, 0, BPF_MAP_TYPE_UNSPEC, INT_MAX, to) -#define _trace_xdp_redirect_err(dev, xdp, to, err) \ - trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to) +#define _trace_xdp_redirect_err(dev, xdp, to, err) \ + trace_xdp_redirect_err(dev, xdp, NULL, err, BPF_MAP_TYPE_UNSPEC, INT_MAX, to) -#define _trace_xdp_redirect_map(dev, xdp, to, map, index) \ - trace_xdp_redirect(dev, xdp, to, 0, map, index) +#define _trace_xdp_redirect_map(dev, xdp, to, map_type, map_id, index) \ + trace_xdp_redirect(dev, xdp, to, 0, map_type, map_id, index) -#define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err) \ - trace_xdp_redirect_err(dev, xdp, to, err, map, index) +#define _trace_xdp_redirect_map_err(dev, xdp, to, map_type, map_id, index, err) \ + trace_xdp_redirect_err(dev, xdp, to, err, map_type, map_id, index) /* not used anymore, but kept around so as not to break old programs */ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, - const struct bpf_map *map, u32 index), - TP_ARGS(dev, xdp, tgt, err, map, index) + enum bpf_map_type map_type, + u32 map_id, u32 index), + TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, const void *tgt, int err, - const struct bpf_map *map, u32 index), - TP_ARGS(dev, xdp, tgt, err, map, index) + enum bpf_map_type map_type, + u32 map_id, u32 index), + TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index) ); TRACE_EVENT(xdp_cpumap_kthread, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4ba4ef0ff63a..008edc1dc8c1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -93,7 +93,717 @@ union bpf_iter_link_info { } map; }; -/* BPF syscall commands, see bpf(2) man-page for details. */ +/* BPF syscall commands, see bpf(2) man-page for more details. */ +/** + * DOC: eBPF Syscall Preamble + * + * The operation to be performed by the **bpf**\ () system call is determined + * by the *cmd* argument. Each operation takes an accompanying argument, + * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see + * below). The size argument is the size of the union pointed to by *attr*. + */ +/** + * DOC: eBPF Syscall Commands + * + * BPF_MAP_CREATE + * Description + * Create a map and return a file descriptor that refers to the + * map. The close-on-exec file descriptor flag (see **fcntl**\ (2)) + * is automatically enabled for the new file descriptor. + * + * Applying **close**\ (2) to the file descriptor returned by + * **BPF_MAP_CREATE** will delete the map (but see NOTES). + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_MAP_LOOKUP_ELEM + * Description + * Look up an element with a given *key* in the map referred to + * by the file descriptor *map_fd*. + * + * The *flags* argument may be specified as one of the + * following: + * + * **BPF_F_LOCK** + * Look up the value of a spin-locked map without + * returning the lock. This must be specified if the + * elements contain a spinlock. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_UPDATE_ELEM + * Description + * Create or update an element (key/value pair) in a specified map. + * + * The *flags* argument should be specified as one of the + * following: + * + * **BPF_ANY** + * Create a new element or update an existing element. + * **BPF_NOEXIST** + * Create a new element only if it did not exist. + * **BPF_EXIST** + * Update an existing element. + * **BPF_F_LOCK** + * Update a spin_lock-ed map element. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, + * **E2BIG**, **EEXIST**, or **ENOENT**. + * + * **E2BIG** + * The number of elements in the map reached the + * *max_entries* limit specified at map creation time. + * **EEXIST** + * If *flags* specifies **BPF_NOEXIST** and the element + * with *key* already exists in the map. + * **ENOENT** + * If *flags* specifies **BPF_EXIST** and the element with + * *key* does not exist in the map. + * + * BPF_MAP_DELETE_ELEM + * Description + * Look up and delete an element by key in a specified map. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_GET_NEXT_KEY + * Description + * Look up an element by key in a specified map and return the key + * of the next element. Can be used to iterate over all elements + * in the map. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * The following cases can be used to iterate over all elements of + * the map: + * + * * If *key* is not found, the operation returns zero and sets + * the *next_key* pointer to the key of the first element. + * * If *key* is found, the operation returns zero and sets the + * *next_key* pointer to the key of the next element. + * * If *key* is the last element, returns -1 and *errno* is set + * to **ENOENT**. + * + * May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or + * **EINVAL** on error. + * + * BPF_PROG_LOAD + * Description + * Verify and load an eBPF program, returning a new file + * descriptor associated with the program. + * + * Applying **close**\ (2) to the file descriptor returned by + * **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES). + * + * The close-on-exec file descriptor flag (see **fcntl**\ (2)) is + * automatically enabled for the new file descriptor. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_OBJ_PIN + * Description + * Pin an eBPF program or map referred by the specified *bpf_fd* + * to the provided *pathname* on the filesystem. + * + * The *pathname* argument must not contain a dot ("."). + * + * On success, *pathname* retains a reference to the eBPF object, + * preventing deallocation of the object when the original + * *bpf_fd* is closed. This allow the eBPF object to live beyond + * **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent + * process. + * + * Applying **unlink**\ (2) or similar calls to the *pathname* + * unpins the object from the filesystem, removing the reference. + * If no other file descriptors or filesystem nodes refer to the + * same object, it will be deallocated (see NOTES). + * + * The filesystem type for the parent directory of *pathname* must + * be **BPF_FS_MAGIC**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_OBJ_GET + * Description + * Open a file descriptor for the eBPF object pinned to the + * specified *pathname*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_PROG_ATTACH + * Description + * Attach an eBPF program to a *target_fd* at the specified + * *attach_type* hook. + * + * The *attach_type* specifies the eBPF attachment point to + * attach the program to, and must be one of *bpf_attach_type* + * (see below). + * + * The *attach_bpf_fd* must be a valid file descriptor for a + * loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap + * or sock_ops type corresponding to the specified *attach_type*. + * + * The *target_fd* must be a valid file descriptor for a kernel + * object which depends on the attach type of *attach_bpf_fd*: + * + * **BPF_PROG_TYPE_CGROUP_DEVICE**, + * **BPF_PROG_TYPE_CGROUP_SKB**, + * **BPF_PROG_TYPE_CGROUP_SOCK**, + * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + * **BPF_PROG_TYPE_CGROUP_SOCKOPT**, + * **BPF_PROG_TYPE_CGROUP_SYSCTL**, + * **BPF_PROG_TYPE_SOCK_OPS** + * + * Control Group v2 hierarchy with the eBPF controller + * enabled. Requires the kernel to be compiled with + * **CONFIG_CGROUP_BPF**. + * + * **BPF_PROG_TYPE_FLOW_DISSECTOR** + * + * Network namespace (eg /proc/self/ns/net). + * + * **BPF_PROG_TYPE_LIRC_MODE2** + * + * LIRC device path (eg /dev/lircN). Requires the kernel + * to be compiled with **CONFIG_BPF_LIRC_MODE2**. + * + * **BPF_PROG_TYPE_SK_SKB**, + * **BPF_PROG_TYPE_SK_MSG** + * + * eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**). + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_DETACH + * Description + * Detach the eBPF program associated with the *target_fd* at the + * hook specified by *attach_type*. The program must have been + * previously attached using **BPF_PROG_ATTACH**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_TEST_RUN + * Description + * Run the eBPF program associated with the *prog_fd* a *repeat* + * number of times against a provided program context *ctx_in* and + * data *data_in*, and return the modified program context + * *ctx_out*, *data_out* (for example, packet data), result of the + * execution *retval*, and *duration* of the test run. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * **ENOSPC** + * Either *data_size_out* or *ctx_size_out* is too small. + * **ENOTSUPP** + * This command is not supported by the program type of + * the program referred to by *prog_fd*. + * + * BPF_PROG_GET_NEXT_ID + * Description + * Fetch the next eBPF program currently loaded into the kernel. + * + * Looks for the eBPF program with an id greater than *start_id* + * and updates *next_id* on success. If no other eBPF programs + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_MAP_GET_NEXT_ID + * Description + * Fetch the next eBPF map currently loaded into the kernel. + * + * Looks for the eBPF map with an id greater than *start_id* + * and updates *next_id* on success. If no other eBPF maps + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_PROG_GET_FD_BY_ID + * Description + * Open a file descriptor for the eBPF program corresponding to + * *prog_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_MAP_GET_FD_BY_ID + * Description + * Open a file descriptor for the eBPF map corresponding to + * *map_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_OBJ_GET_INFO_BY_FD + * Description + * Obtain information about the eBPF object corresponding to + * *bpf_fd*. + * + * Populates up to *info_len* bytes of *info*, which will be in + * one of the following formats depending on the eBPF object type + * of *bpf_fd*: + * + * * **struct bpf_prog_info** + * * **struct bpf_map_info** + * * **struct bpf_btf_info** + * * **struct bpf_link_info** + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_QUERY + * Description + * Obtain information about eBPF programs associated with the + * specified *attach_type* hook. + * + * The *target_fd* must be a valid file descriptor for a kernel + * object which depends on the attach type of *attach_bpf_fd*: + * + * **BPF_PROG_TYPE_CGROUP_DEVICE**, + * **BPF_PROG_TYPE_CGROUP_SKB**, + * **BPF_PROG_TYPE_CGROUP_SOCK**, + * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + * **BPF_PROG_TYPE_CGROUP_SOCKOPT**, + * **BPF_PROG_TYPE_CGROUP_SYSCTL**, + * **BPF_PROG_TYPE_SOCK_OPS** + * + * Control Group v2 hierarchy with the eBPF controller + * enabled. Requires the kernel to be compiled with + * **CONFIG_CGROUP_BPF**. + * + * **BPF_PROG_TYPE_FLOW_DISSECTOR** + * + * Network namespace (eg /proc/self/ns/net). + * + * **BPF_PROG_TYPE_LIRC_MODE2** + * + * LIRC device path (eg /dev/lircN). Requires the kernel + * to be compiled with **CONFIG_BPF_LIRC_MODE2**. + * + * **BPF_PROG_QUERY** always fetches the number of programs + * attached and the *attach_flags* which were used to attach those + * programs. Additionally, if *prog_ids* is nonzero and the number + * of attached programs is less than *prog_cnt*, populates + * *prog_ids* with the eBPF program ids of the programs attached + * at *target_fd*. + * + * The following flags may alter the result: + * + * **BPF_F_QUERY_EFFECTIVE** + * Only return information regarding programs which are + * currently effective at the specified *target_fd*. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_RAW_TRACEPOINT_OPEN + * Description + * Attach an eBPF program to a tracepoint *name* to access kernel + * internal arguments of the tracepoint in their raw form. + * + * The *prog_fd* must be a valid file descriptor associated with + * a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**. + * + * No ABI guarantees are made about the content of tracepoint + * arguments exposed to the corresponding eBPF program. + * + * Applying **close**\ (2) to the file descriptor returned by + * **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES). + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_BTF_LOAD + * Description + * Verify and load BPF Type Format (BTF) metadata into the kernel, + * returning a new file descriptor associated with the metadata. + * BTF is described in more detail at + * https://www.kernel.org/doc/html/latest/bpf/btf.html. + * + * The *btf* parameter must point to valid memory providing + * *btf_size* bytes of BTF binary metadata. + * + * The returned file descriptor can be passed to other **bpf**\ () + * subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to + * associate the BTF with those objects. + * + * Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional + * parameters to specify a *btf_log_buf*, *btf_log_size* and + * *btf_log_level* which allow the kernel to return freeform log + * output regarding the BTF verification process. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_BTF_GET_FD_BY_ID + * Description + * Open a file descriptor for the BPF Type Format (BTF) + * corresponding to *btf_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_TASK_FD_QUERY + * Description + * Obtain information about eBPF programs associated with the + * target process identified by *pid* and *fd*. + * + * If the *pid* and *fd* are associated with a tracepoint, kprobe + * or uprobe perf event, then the *prog_id* and *fd_type* will + * be populated with the eBPF program id and file descriptor type + * of type **bpf_task_fd_type**. If associated with a kprobe or + * uprobe, the *probe_offset* and *probe_addr* will also be + * populated. Optionally, if *buf* is provided, then up to + * *buf_len* bytes of *buf* will be populated with the name of + * the tracepoint, kprobe or uprobe. + * + * The resulting *prog_id* may be introspected in deeper detail + * using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_LOOKUP_AND_DELETE_ELEM + * Description + * Look up an element with the given *key* in the map referred to + * by the file descriptor *fd*, and if found, delete the element. + * + * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types + * implement this command as a "pop" operation, deleting the top + * element rather than one corresponding to *key*. + * The *key* and *key_len* parameters should be zeroed when + * issuing this operation for these map types. + * + * This command is only valid for the following map types: + * * **BPF_MAP_TYPE_QUEUE** + * * **BPF_MAP_TYPE_STACK** + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_FREEZE + * Description + * Freeze the permissions of the specified map. + * + * Write permissions may be frozen by passing zero *flags*. + * Upon success, no future syscall invocations may alter the + * map state of *map_fd*. Write operations from eBPF programs + * are still possible for a frozen map. + * + * Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_BTF_GET_NEXT_ID + * Description + * Fetch the next BPF Type Format (BTF) object currently loaded + * into the kernel. + * + * Looks for the BTF object with an id greater than *start_id* + * and updates *next_id* on success. If no other BTF objects + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_MAP_LOOKUP_BATCH + * Description + * Iterate and fetch multiple elements in a map. + * + * Two opaque values are used to manage batch operations, + * *in_batch* and *out_batch*. Initially, *in_batch* must be set + * to NULL to begin the batched operation. After each subsequent + * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant + * *out_batch* as the *in_batch* for the next operation to + * continue iteration from the current point. + * + * The *keys* and *values* are output parameters which must point + * to memory large enough to hold *count* items based on the key + * and value size of the map *map_fd*. The *keys* buffer must be + * of *key_size* * *count*. The *values* buffer must be of + * *value_size* * *count*. + * + * The *elem_flags* argument may be specified as one of the + * following: + * + * **BPF_F_LOCK** + * Look up the value of a spin-locked map without + * returning the lock. This must be specified if the + * elements contain a spinlock. + * + * On success, *count* elements from the map are copied into the + * user buffer, with the keys copied into *keys* and the values + * copied into the corresponding indices in *values*. + * + * If an error is returned and *errno* is not **EFAULT**, *count* + * is set to the number of successfully processed elements. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * May set *errno* to **ENOSPC** to indicate that *keys* or + * *values* is too small to dump an entire bucket during + * iteration of a hash-based map type. + * + * BPF_MAP_LOOKUP_AND_DELETE_BATCH + * Description + * Iterate and delete all elements in a map. + * + * This operation has the same behavior as + * **BPF_MAP_LOOKUP_BATCH** with two exceptions: + * + * * Every element that is successfully returned is also deleted + * from the map. This is at least *count* elements. Note that + * *count* is both an input and an output parameter. + * * Upon returning with *errno* set to **EFAULT**, up to + * *count* elements may be deleted without returning the keys + * and values of the deleted elements. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_UPDATE_BATCH + * Description + * Update multiple elements in a map by *key*. + * + * The *keys* and *values* are input parameters which must point + * to memory large enough to hold *count* items based on the key + * and value size of the map *map_fd*. The *keys* buffer must be + * of *key_size* * *count*. The *values* buffer must be of + * *value_size* * *count*. + * + * Each element specified in *keys* is sequentially updated to the + * value in the corresponding index in *values*. The *in_batch* + * and *out_batch* parameters are ignored and should be zeroed. + * + * The *elem_flags* argument should be specified as one of the + * following: + * + * **BPF_ANY** + * Create new elements or update a existing elements. + * **BPF_NOEXIST** + * Create new elements only if they do not exist. + * **BPF_EXIST** + * Update existing elements. + * **BPF_F_LOCK** + * Update spin_lock-ed map elements. This must be + * specified if the map value contains a spinlock. + * + * On success, *count* elements from the map are updated. + * + * If an error is returned and *errno* is not **EFAULT**, *count* + * is set to the number of successfully processed elements. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or + * **E2BIG**. **E2BIG** indicates that the number of elements in + * the map reached the *max_entries* limit specified at map + * creation time. + * + * May set *errno* to one of the following error codes under + * specific circumstances: + * + * **EEXIST** + * If *flags* specifies **BPF_NOEXIST** and the element + * with *key* already exists in the map. + * **ENOENT** + * If *flags* specifies **BPF_EXIST** and the element with + * *key* does not exist in the map. + * + * BPF_MAP_DELETE_BATCH + * Description + * Delete multiple elements in a map by *key*. + * + * The *keys* parameter is an input parameter which must point + * to memory large enough to hold *count* items based on the key + * size of the map *map_fd*, that is, *key_size* * *count*. + * + * Each element specified in *keys* is sequentially deleted. The + * *in_batch*, *out_batch*, and *values* parameters are ignored + * and should be zeroed. + * + * The *elem_flags* argument may be specified as one of the + * following: + * + * **BPF_F_LOCK** + * Look up the value of a spin-locked map without + * returning the lock. This must be specified if the + * elements contain a spinlock. + * + * On success, *count* elements from the map are updated. + * + * If an error is returned and *errno* is not **EFAULT**, *count* + * is set to the number of successfully processed elements. If + * *errno* is **EFAULT**, up to *count* elements may be been + * deleted. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_LINK_CREATE + * Description + * Attach an eBPF program to a *target_fd* at the specified + * *attach_type* hook and return a file descriptor handle for + * managing the link. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_LINK_UPDATE + * Description + * Update the eBPF program in the specified *link_fd* to + * *new_prog_fd*. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_LINK_GET_FD_BY_ID + * Description + * Open a file descriptor for the eBPF Link corresponding to + * *link_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_LINK_GET_NEXT_ID + * Description + * Fetch the next eBPF link currently loaded into the kernel. + * + * Looks for the eBPF link with an id greater than *start_id* + * and updates *next_id* on success. If no other eBPF links + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_ENABLE_STATS + * Description + * Enable eBPF runtime statistics gathering. + * + * Runtime statistics gathering for the eBPF runtime is disabled + * by default to minimize the corresponding performance overhead. + * This command enables statistics globally. + * + * Multiple programs may independently enable statistics. + * After gathering the desired statistics, eBPF runtime statistics + * may be disabled again by calling **close**\ (2) for the file + * descriptor returned by this function. Statistics will only be + * disabled system-wide when all outstanding file descriptors + * returned by prior calls for this subcommand are closed. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_ITER_CREATE + * Description + * Create an iterator on top of the specified *link_fd* (as + * previously created using **BPF_LINK_CREATE**) and return a + * file descriptor that can be used to trigger the iteration. + * + * If the resulting file descriptor is pinned to the filesystem + * using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls + * for that path will trigger the iterator to read kernel state + * using the eBPF program attached to *link_fd*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_LINK_DETACH + * Description + * Forcefully detach the specified *link_fd* from its + * corresponding attachment point. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_BIND_MAP + * Description + * Bind a map to the lifetime of an eBPF program. + * + * The map identified by *map_fd* is bound to the program + * identified by *prog_fd* and only released when *prog_fd* is + * released. This may be used in cases where metadata should be + * associated with a program which otherwise does not contain any + * references to the map (for example, embedded in the eBPF + * program instructions). + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * NOTES + * eBPF objects (maps and programs) can be shared between processes. + * + * * After **fork**\ (2), the child inherits file descriptors + * referring to the same eBPF objects. + * * File descriptors referring to eBPF objects can be transferred over + * **unix**\ (7) domain sockets. + * * File descriptors referring to eBPF objects can be duplicated in the + * usual way, using **dup**\ (2) and similar calls. + * * File descriptors referring to eBPF objects can be pinned to the + * filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2). + * + * An eBPF object is deallocated only after all file descriptors referring + * to the object have been closed and no references remain pinned to the + * filesystem or attached (for example, bound to a program or device). + */ enum bpf_cmd { BPF_MAP_CREATE, BPF_MAP_LOOKUP_ELEM, @@ -393,6 +1103,15 @@ enum bpf_link_type { * is struct/union. */ #define BPF_PSEUDO_BTF_ID 3 +/* insn[0].src_reg: BPF_PSEUDO_FUNC + * insn[0].imm: insn offset to the func + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the function + * verifier type: PTR_TO_FUNC. + */ +#define BPF_PSEUDO_FUNC 4 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function @@ -720,7 +1439,7 @@ union bpf_attr { * parsed and used to produce a manual page. The workflow is the following, * and requires the rst2man utility: * - * $ ./scripts/bpf_helpers_doc.py \ + * $ ./scripts/bpf_doc.py \ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 * $ man /tmp/bpf-helpers.7 @@ -1765,6 +2484,10 @@ union bpf_attr { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -3915,6 +4638,34 @@ union bpf_attr { * * **BPF_MTU_CHK_RET_FRAG_NEEDED** * * **BPF_MTU_CHK_RET_SEGS_TOOBIG** * + * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For each element in **map**, call **callback_fn** function with + * **map**, **callback_ctx** and other map-specific parameters. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. + * + * The following are a list of supported map types and their + * respective expected callback signatures: + * + * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH, + * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, + * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY + * + * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx); + * + * For per_cpu maps, the map_value is the value on the cpu where the + * bpf_prog is running. + * + * If **callback_fn** return 0, the helper will continue to the next + * element. If return value is 1, the helper will skip the rest of + * elements and return. Other return values are not used now. + * + * Return + * The number of traversed map elements for success, **-EINVAL** for + * invalid **flags**. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4081,6 +4832,7 @@ union bpf_attr { FN(ima_inode_hash), \ FN(sock_from_file), \ FN(check_mtu), \ + FN(for_each_map_elem), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4174,6 +4926,7 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), }; enum { @@ -5211,7 +5964,10 @@ struct bpf_pidns_info { /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ struct bpf_sk_lookup { - __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + union { + __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */ + }; __u32 family; /* Protocol family (AF_INET, AF_INET6) */ __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 5a667107ad2c..d27b1708efe9 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -52,7 +52,7 @@ struct btf_type { }; }; -#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) +#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f) #define BTF_INFO_VLEN(info) ((info) & 0xffff) #define BTF_INFO_KFLAG(info) ((info) >> 31) @@ -72,7 +72,8 @@ struct btf_type { #define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ #define BTF_KIND_VAR 14 /* Variable */ #define BTF_KIND_DATASEC 15 /* Section */ -#define BTF_KIND_MAX BTF_KIND_DATASEC +#define BTF_KIND_FLOAT 16 /* Floating point */ +#define BTF_KIND_MAX BTF_KIND_FLOAT #define NR_BTF_KINDS (BTF_KIND_MAX + 1) /* For some specific BTF_KIND, "struct btf_type" is immediately diff --git a/include/uapi/linux/if_fddi.h b/include/uapi/linux/if_fddi.h index 7239aa9c0766..8df2d9934bcd 100644 --- a/include/uapi/linux/if_fddi.h +++ b/include/uapi/linux/if_fddi.h @@ -9,7 +9,7 @@ * Version: @(#)if_fddi.h 1.0.3 Oct 6 2018 * * Author: Lawrence V. Stefani, <stefani@yahoo.com> - * Maintainer: Maciej W. Rozycki, <macro@linux-mips.org> + * Maintainer: Maciej W. Rozycki, <macro@orcam.me.uk> * * if_fddi.h is based on previous if_ether.h and if_tr.h work by * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index 2d4a1e784cf0..d8ffa8c9ca78 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -21,7 +21,10 @@ struct nexthop_grp { }; enum { - NEXTHOP_GRP_TYPE_MPATH, /* default type if not specified */ + NEXTHOP_GRP_TYPE_MPATH, /* hash-threshold nexthop group + * default type if not specified + */ + NEXTHOP_GRP_TYPE_RES, /* resilient nexthop group */ __NEXTHOP_GRP_TYPE_MAX, }; @@ -52,8 +55,50 @@ enum { NHA_FDB, /* flag; nexthop belongs to a bridge fdb */ /* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */ + /* nested; resilient nexthop group attributes */ + NHA_RES_GROUP, + /* nested; nexthop bucket attributes */ + NHA_RES_BUCKET, + __NHA_MAX, }; #define NHA_MAX (__NHA_MAX - 1) + +enum { + NHA_RES_GROUP_UNSPEC, + /* Pad attribute for 64-bit alignment. */ + NHA_RES_GROUP_PAD = NHA_RES_GROUP_UNSPEC, + + /* u16; number of nexthop buckets in a resilient nexthop group */ + NHA_RES_GROUP_BUCKETS, + /* clock_t as u32; nexthop bucket idle timer (per-group) */ + NHA_RES_GROUP_IDLE_TIMER, + /* clock_t as u32; nexthop unbalanced timer */ + NHA_RES_GROUP_UNBALANCED_TIMER, + /* clock_t as u64; nexthop unbalanced time */ + NHA_RES_GROUP_UNBALANCED_TIME, + + __NHA_RES_GROUP_MAX, +}; + +#define NHA_RES_GROUP_MAX (__NHA_RES_GROUP_MAX - 1) + +enum { + NHA_RES_BUCKET_UNSPEC, + /* Pad attribute for 64-bit alignment. */ + NHA_RES_BUCKET_PAD = NHA_RES_BUCKET_UNSPEC, + + /* u16; nexthop bucket index */ + NHA_RES_BUCKET_INDEX, + /* clock_t as u64; nexthop bucket idle time */ + NHA_RES_BUCKET_IDLE_TIME, + /* u32; nexthop id assigned to the nexthop bucket */ + NHA_RES_BUCKET_NH_ID, + + __NHA_RES_BUCKET_MAX, +}; + +#define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1) + #endif diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 7ea59cfe1fa7..025c40fef93d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -190,6 +190,8 @@ enum { TCA_POLICE_PAD, TCA_POLICE_RATE64, TCA_POLICE_PEAKRATE64, + TCA_POLICE_PKTRATE64, + TCA_POLICE_PKTBURST64, __TCA_POLICE_MAX #define TCA_POLICE_RESULT TCA_POLICE_RESULT }; diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h index bff5032c98df..e585db5bf2d2 100644 --- a/include/uapi/linux/psample.h +++ b/include/uapi/linux/psample.h @@ -13,6 +13,13 @@ enum { PSAMPLE_ATTR_GROUP_REFCOUNT, PSAMPLE_ATTR_TUNNEL, + PSAMPLE_ATTR_PAD, + PSAMPLE_ATTR_OUT_TC, /* u16 */ + PSAMPLE_ATTR_OUT_TC_OCC, /* u64, bytes */ + PSAMPLE_ATTR_LATENCY, /* u64, nanoseconds */ + PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */ + PSAMPLE_ATTR_PROTO, /* u16 */ + __PSAMPLE_ATTR_MAX }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 91e4ca064d61..5888492a5257 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -178,6 +178,13 @@ enum { RTM_GETVLAN, #define RTM_GETVLAN RTM_GETVLAN + RTM_NEWNEXTHOPBUCKET = 116, +#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET + RTM_DELNEXTHOPBUCKET, +#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET + RTM_GETNEXTHOPBUCKET, +#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; @@ -283,6 +290,7 @@ enum { #define RTPROT_MROUTED 17 /* Multicast daemon */ #define RTPROT_KEEPALIVED 18 /* Keepalived daemon */ #define RTPROT_BABEL 42 /* Babel daemon */ +#define RTPROT_OPENR 99 /* Open Routing (Open/R) Routes */ #define RTPROT_BGP 186 /* BGP Routes */ #define RTPROT_ISIS 187 /* ISIS Routes */ #define RTPROT_OSPF 188 /* OSPF Routes */ |