diff options
Diffstat (limited to 'include/net')
74 files changed, 1082 insertions, 344 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h index 82f3c912a5b1..1d716449209e 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -36,13 +36,12 @@ struct tc_action { struct tcf_t tcfa_tm; struct gnet_stats_basic_packed tcfa_bstats; struct gnet_stats_queue tcfa_qstats; - struct gnet_stats_rate_est64 tcfa_rate_est; + struct net_rate_estimator __rcu *tcfa_rate_est; spinlock_t tcfa_lock; struct rcu_head tcfa_rcu; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; }; -#define tcf_act common.tcfa_act #define tcf_head common.tcfa_head #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt @@ -120,6 +119,8 @@ struct tc_action_ops { int (*walk)(struct net *, struct sk_buff *, struct netlink_callback *, int, const struct tc_action_ops *); void (*stats_update)(struct tc_action *, u64, u32, u64); + int (*get_dev)(const struct tc_action *a, struct net *net, + struct net_device **mirred_dev); }; struct tc_action_net { diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f2d072787947..8f998afc1384 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -174,6 +174,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr); int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr); +void __ipv6_sock_mc_close(struct sock *sk); void ipv6_sock_mc_close(struct sock *sk); bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, const struct in6_addr *src_addr); diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 0a1e21d7bce1..01487192f628 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -197,7 +197,7 @@ typedef struct { #define BDADDR_LE_PUBLIC 0x01 #define BDADDR_LE_RANDOM 0x02 -static inline bool bdaddr_type_is_valid(__u8 type) +static inline bool bdaddr_type_is_valid(u8 type) { switch (type) { case BDADDR_BREDR: @@ -209,7 +209,7 @@ static inline bool bdaddr_type_is_valid(__u8 type) return false; } -static inline bool bdaddr_type_is_le(__u8 type) +static inline bool bdaddr_type_is_le(u8 type) { switch (type) { case BDADDR_LE_PUBLIC: @@ -279,15 +279,16 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock); /* Skb helpers */ struct l2cap_ctrl { - __u8 sframe:1, + u8 sframe:1, poll:1, final:1, fcs:1, sar:2, super:2; - __u16 reqseq; - __u16 txseq; - __u8 retries; + + u16 reqseq; + u16 txseq; + u8 retries; __le16 psm; bdaddr_t bdaddr; struct l2cap_chan *chan; @@ -303,7 +304,7 @@ typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status, #define HCI_REQ_SKB BIT(1) struct hci_ctrl { - __u16 opcode; + u16 opcode; u8 req_flags; u8 req_event; union { @@ -313,10 +314,10 @@ struct hci_ctrl { }; struct bt_skb_cb { - __u8 pkt_type; - __u8 force_active; - __u16 expect; - __u8 incoming:1; + u8 pkt_type; + u8 force_active; + u16 expect; + u8 incoming:1; union { struct l2cap_ctrl l2cap; struct hci_ctrl hci; @@ -366,7 +367,7 @@ out: return NULL; } -int bt_to_errno(__u16 code); +int bt_to_errno(u16 code); void hci_sock_set_flag(struct sock *sk, int nr); void hci_sock_clear_flag(struct sock *sk, int nr); diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f00bf667ec33..554671c81f4a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1018,7 +1018,7 @@ static inline void hci_set_drvdata(struct hci_dev *hdev, void *data) } struct hci_dev *hci_dev_get(int index); -struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src); +struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, u8 src_type); struct hci_dev *hci_alloc_dev(void); void hci_free_dev(struct hci_dev *hdev); diff --git a/include/net/bonding.h b/include/net/bonding.h index f32f7ef8a23a..3c857778a6ca 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -681,7 +681,7 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip) } /* exported from bond_main.c */ -extern int bond_net_id; +extern unsigned int bond_net_id; extern const struct bond_parm_tbl bond_lacp_tbl[]; extern const struct bond_parm_tbl xmit_hashtype_tbl[]; extern const struct bond_parm_tbl arp_validate_tbl[]; diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 2fbeb1313c0f..d73b849e29a6 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -58,10 +58,9 @@ static inline unsigned long busy_loop_end_time(void) return busy_loop_us_clock() + ACCESS_ONCE(sysctl_net_busy_poll); } -static inline bool sk_can_busy_loop(struct sock *sk) +static inline bool sk_can_busy_loop(const struct sock *sk) { - return sk->sk_ll_usec && sk->sk_napi_id && - !need_resched() && !signal_pending(current); + return sk->sk_ll_usec && sk->sk_napi_id && !signal_pending(current); } @@ -81,11 +80,6 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, skb->napi_id = napi->napi_id; } -/* used in the protocol hanlder to propagate the napi_id to the socket */ -static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb) -{ - sk->sk_napi_id = skb->napi_id; -} #else /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long net_busy_loop_on(void) @@ -108,10 +102,6 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, { } -static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb) -{ -} - static inline bool busy_loop_timeout(unsigned long end_time) { return true; @@ -123,4 +113,23 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock) } #endif /* CONFIG_NET_RX_BUSY_POLL */ + +/* used in the protocol hanlder to propagate the napi_id to the socket */ +static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + sk->sk_napi_id = skb->napi_id; +#endif +} + +/* variant used for unconnected sockets */ +static inline void sk_mark_napi_id_once(struct sock *sk, + const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + if (!sk->sk_napi_id) + sk->sk_napi_id = skb->napi_id; +#endif +} + #endif /* _LINUX_NET_BUSY_POLL_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index bd19faad0d96..814be4b4200c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -772,6 +772,30 @@ struct cfg80211_csa_settings { }; /** + * struct iface_combination_params - input parameters for interface combinations + * + * Used to pass interface combination parameters + * + * @num_different_channels: the number of different channels we want + * to use for verification + * @radar_detect: a bitmap where each bit corresponds to a channel + * width where radar detection is needed, as in the definition of + * &struct ieee80211_iface_combination.@radar_detect_widths + * @iftype_num: array with the number of interfaces of each interface + * type. The index is the interface type as specified in &enum + * nl80211_iftype. + * @new_beacon_int: set this to the beacon interval of a new interface + * that's not operating yet, if such is to be checked as part of + * the verification + */ +struct iface_combination_params { + int num_different_channels; + u8 radar_detect; + int iftype_num[NUM_NL80211_IFTYPES]; + u32 new_beacon_int; +}; + +/** * enum station_parameters_apply_mask - station parameter values to apply * @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp) * @STATION_PARAM_APPLY_CAPABILITY: apply new capability @@ -1761,9 +1785,11 @@ const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie); * @key_len: length of WEP key for shared key authentication * @key_idx: index of WEP key for shared key authentication * @key: WEP key for shared key authentication - * @sae_data: Non-IE data to use with SAE or %NULL. This starts with - * Authentication transaction sequence number field. - * @sae_data_len: Length of sae_data buffer in octets + * @auth_data: Fields and elements in Authentication frames. This contains + * the authentication frame body (non-IE and IE data), excluding the + * Authentication algorithm number, i.e., starting at the Authentication + * transaction sequence number field. + * @auth_data_len: Length of auth_data buffer in octets */ struct cfg80211_auth_request { struct cfg80211_bss *bss; @@ -1772,8 +1798,8 @@ struct cfg80211_auth_request { enum nl80211_auth_type auth_type; const u8 *key; u8 key_len, key_idx; - const u8 *sae_data; - size_t sae_data_len; + const u8 *auth_data; + size_t auth_data_len; }; /** @@ -1814,6 +1840,12 @@ enum cfg80211_assoc_req_flags { * @ht_capa_mask: The bits of ht_capa which are to be used. * @vht_capa: VHT capability override * @vht_capa_mask: VHT capability mask indicating which fields to use + * @fils_kek: FILS KEK for protecting (Re)Association Request/Response frame or + * %NULL if FILS is not used. + * @fils_kek_len: Length of fils_kek in octets + * @fils_nonces: FILS nonces (part of AAD) for protecting (Re)Association + * Request/Response frame or %NULL if FILS is not used. This field starts + * with 16 octets of STA Nonce followed by 16 octets of AP Nonce. */ struct cfg80211_assoc_request { struct cfg80211_bss *bss; @@ -1825,6 +1857,9 @@ struct cfg80211_assoc_request { struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; struct ieee80211_vht_cap vht_capa, vht_capa_mask; + const u8 *fils_kek; + size_t fils_kek_len; + const u8 *fils_nonces; }; /** @@ -2016,6 +2051,18 @@ struct cfg80211_connect_params { }; /** + * enum cfg80211_connect_params_changed - Connection parameters being updated + * + * This enum provides information of all connect parameters that + * have to be updated as part of update_connect_params() call. + * + * @UPDATE_ASSOC_IES: Indicates whether association request IEs are updated + */ +enum cfg80211_connect_params_changed { + UPDATE_ASSOC_IES = BIT(0), +}; + +/** * enum wiphy_params_flags - set_wiphy_params bitfield values * @WIPHY_PARAM_RETRY_SHORT: wiphy->retry_short has changed * @WIPHY_PARAM_RETRY_LONG: wiphy->retry_long has changed @@ -2536,9 +2583,18 @@ struct cfg80211_nan_func { * cases, the result of roaming is indicated with a call to * cfg80211_roamed() or cfg80211_roamed_bss(). * (invoked with the wireless_dev mutex held) - * @disconnect: Disconnect from the BSS/ESS. Once done, call - * cfg80211_disconnected(). + * @update_connect_params: Update the connect parameters while connected to a + * BSS. The updated parameters can be used by driver/firmware for + * subsequent BSS selection (roaming) decisions and to form the + * Authentication/(Re)Association Request frames. This call does not + * request an immediate disassociation or reassociation with the current + * BSS, i.e., this impacts only subsequent (re)associations. The bits in + * changed are defined in &enum cfg80211_connect_params_changed. * (invoked with the wireless_dev mutex held) + * @disconnect: Disconnect from the BSS/ESS or stop connection attempts if + * connection is in progress. Once done, call cfg80211_disconnected() in + * case connection was already established (invoked with the + * wireless_dev mutex held), otherwise call cfg80211_connect_timeout(). * * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call * cfg80211_ibss_joined(), also call that function when changing BSSID due @@ -2706,6 +2762,8 @@ struct cfg80211_nan_func { * @nan_change_conf: changes NAN configuration. The changed parameters must * be specified in @changes (using &enum cfg80211_nan_conf_changes); * All other parameters must be ignored. + * + * @set_multicast_to_unicast: configure multicast to unicast conversion for BSS */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2820,6 +2878,10 @@ struct cfg80211_ops { int (*connect)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_connect_params *sme); + int (*update_connect_params)(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_connect_params *sme, + u32 changed); int (*disconnect)(struct wiphy *wiphy, struct net_device *dev, u16 reason_code); @@ -2982,6 +3044,10 @@ struct cfg80211_ops { struct wireless_dev *wdev, struct cfg80211_nan_conf *conf, u32 changes); + + int (*set_multicast_to_unicast)(struct wiphy *wiphy, + struct net_device *dev, + const bool enabled); }; /* @@ -3080,6 +3146,12 @@ struct ieee80211_iface_limit { * only in special cases. * @radar_detect_widths: bitmap of channel widths supported for radar detection * @radar_detect_regions: bitmap of regions supported for radar detection + * @beacon_int_min_gcd: This interface combination supports different + * beacon intervals. + * = 0 - all beacon intervals for different interface must be same. + * > 0 - any beacon interval for the interface part of this combination AND + * *GCD* of all beacon intervals from beaconing interfaces of this + * combination must be greater or equal to this value. * * With this structure the driver can describe which interface * combinations it supports concurrently. @@ -3145,6 +3217,7 @@ struct ieee80211_iface_combination { bool beacon_int_infra_match; u8 radar_detect_widths; u8 radar_detect_regions; + u32 beacon_int_min_gcd; }; struct ieee80211_txrx_stypes { @@ -3752,8 +3825,8 @@ struct cfg80211_cached_keys; * @beacon_interval: beacon interval used on this device for transmitting * beacons, 0 when not valid * @address: The address for this device, valid only if @netdev is %NULL - * @p2p_started: true if this is a P2P Device that has been started - * @nan_started: true if this is a NAN interface that has been started + * @is_running: true if this is a non-netdev device that has been started, e.g. + * the P2P Device. * @cac_started: true if DFS channel availability check has been started * @cac_start_time: timestamp (jiffies) when the dfs state was entered. * @cac_time_ms: CAC time in ms @@ -3785,7 +3858,7 @@ struct wireless_dev { struct mutex mtx; - bool use_4addr, p2p_started, nan_started; + bool use_4addr, is_running; u8 address[ETH_ALEN] __aligned(sizeof(u16)); @@ -3842,6 +3915,13 @@ static inline u8 *wdev_address(struct wireless_dev *wdev) return wdev->address; } +static inline bool wdev_running(struct wireless_dev *wdev) +{ + if (wdev->netdev) + return netif_running(wdev->netdev); + return wdev->is_running; +} + /** * wdev_priv - return wiphy priv from wireless_dev * @@ -4047,14 +4127,29 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); */ /** + * ieee80211_data_to_8023_exthdr - convert an 802.11 data frame to 802.3 + * @skb: the 802.11 data frame + * @ehdr: pointer to a &struct ethhdr that will get the header, instead + * of it being pushed into the SKB + * @addr: the device MAC address + * @iftype: the virtual interface type + * Return: 0 on success. Non-zero on error. + */ +int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, + const u8 *addr, enum nl80211_iftype iftype); + +/** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 * @skb: the 802.11 data frame * @addr: the device MAC address * @iftype: the virtual interface type * Return: 0 on success. Non-zero on error. */ -int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype); +static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, + enum nl80211_iftype iftype) +{ + return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype); +} /** * ieee80211_data_from_8023 - convert an 802.3 frame to 802.11 @@ -4072,22 +4167,23 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, /** * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame * - * Decode an IEEE 802.11n A-MSDU frame and convert it to a list of - * 802.3 frames. The @list will be empty if the decode fails. The - * @skb is consumed after the function returns. + * Decode an IEEE 802.11 A-MSDU and convert it to a list of 802.3 frames. + * The @list will be empty if the decode fails. The @skb must be fully + * header-less before being passed in here; it is freed in this function. * - * @skb: The input IEEE 802.11n A-MSDU frame. + * @skb: The input A-MSDU frame without any headers. * @list: The output list of 802.3 frames. It must be allocated and * initialized by by the caller. * @addr: The device MAC address. * @iftype: The device interface type. * @extra_headroom: The hardware extra headroom for SKBs in the @list. - * @has_80211_header: Set it true if SKB is with IEEE 802.11 header. + * @check_da: DA to check in the inner ethernet header, or NULL + * @check_sa: SA to check in the inner ethernet header, or NULL */ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, const unsigned int extra_headroom, - bool has_80211_header); + const u8 *check_da, const u8 *check_sa); /** * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame @@ -4147,6 +4243,27 @@ static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) } /** + * cfg80211_find_ext_ie - find information element with EID Extension in data + * + * @ext_eid: element ID Extension + * @ies: data consisting of IEs + * @len: length of data + * + * Return: %NULL if the extended element ID could not be found or if + * the element is invalid (claims to be longer than the given + * data), or a pointer to the first byte of the requested + * element, that is the byte containing the element ID. + * + * Note: There are no checks on the element length other than + * having to fit into the given data. + */ +static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len) +{ + return cfg80211_find_ie_match(WLAN_EID_EXTENSION, ies, len, + &ext_eid, 1, 2); +} + +/** * cfg80211_find_vendor_ie - find vendor specific information element in data * * @oui: vendor OUI @@ -4546,7 +4663,8 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); * moves to cfg80211 in this call * @buf: authentication frame (header + body) * @len: length of the frame data - * @uapsd_queues: bitmap of ACs configured to uapsd. -1 if n/a. + * @uapsd_queues: bitmap of queues configured for uapsd. Same format + * as the AC bitmap in the QoS info field * * After being asked to associate via cfg80211_ops::assoc() the driver must * call either this function or cfg80211_auth_timeout(). @@ -4568,6 +4686,17 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss); /** + * cfg80211_abandon_assoc - notify cfg80211 of abandoned association attempt + * @dev: network device + * @bss: The BSS entry with which association was abandoned. + * + * Call this whenever - for reasons reported through other API, like deauth RX, + * an association attempt was abandoned. + * This function may sleep. The caller must hold the corresponding wdev's mutex. + */ +void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss); + +/** * cfg80211_tx_mlme_mgmt - notification of transmitted deauth/disassoc frame * @dev: network device * @buf: 802.11 frame (header + body) @@ -5582,36 +5711,20 @@ unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy); * cfg80211_check_combinations - check interface combinations * * @wiphy: the wiphy - * @num_different_channels: the number of different channels we want - * to use for verification - * @radar_detect: a bitmap where each bit corresponds to a channel - * width where radar detection is needed, as in the definition of - * &struct ieee80211_iface_combination.@radar_detect_widths - * @iftype_num: array with the numbers of interfaces of each interface - * type. The index is the interface type as specified in &enum - * nl80211_iftype. + * @params: the interface combinations parameter * * This function can be called by the driver to check whether a * combination of interfaces and their types are allowed according to * the interface combinations. */ int cfg80211_check_combinations(struct wiphy *wiphy, - const int num_different_channels, - const u8 radar_detect, - const int iftype_num[NUM_NL80211_IFTYPES]); + struct iface_combination_params *params); /** * cfg80211_iter_combinations - iterate over matching combinations * * @wiphy: the wiphy - * @num_different_channels: the number of different channels we want - * to use for verification - * @radar_detect: a bitmap where each bit corresponds to a channel - * width where radar detection is needed, as in the definition of - * &struct ieee80211_iface_combination.@radar_detect_widths - * @iftype_num: array with the numbers of interfaces of each interface - * type. The index is the interface type as specified in &enum - * nl80211_iftype. + * @params: the interface combinations parameter * @iter: function to call for each matching combination * @data: pointer to pass to iter function * @@ -5620,9 +5733,7 @@ int cfg80211_check_combinations(struct wiphy *wiphy, * purposes. */ int cfg80211_iter_combinations(struct wiphy *wiphy, - const int num_different_channels, - const u8 radar_detect, - const int iftype_num[NUM_NL80211_IFTYPES], + struct iface_combination_params *params, void (*iter)(const struct ieee80211_iface_combination *c, void *data), void *data); diff --git a/include/net/devlink.h b/include/net/devlink.h index 211bd3c37028..d29e5fc82582 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -92,6 +92,8 @@ struct devlink_ops { int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode); int (*eswitch_mode_set)(struct devlink *devlink, u16 mode); + int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode); + int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 6965c8f68ade..701fc814d0af 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -115,6 +115,7 @@ static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, __be32 daddr, __u8 tos, __u8 ttl, + __be16 tp_dst, __be16 flags, __be64 tunnel_id, int md_size) @@ -127,7 +128,7 @@ static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, ip_tunnel_key_init(&tun_dst->u.tun_info.key, saddr, daddr, tos, ttl, - 0, 0, 0, tunnel_id, flags); + 0, 0, tp_dst, tunnel_id, flags); return tun_dst; } @@ -139,12 +140,13 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, const struct iphdr *iph = ip_hdr(skb); return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, - flags, tunnel_id, md_size); + 0, flags, tunnel_id, md_size); } static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 tos, __u8 ttl, + __be16 tp_dst, __be32 label, __be16 flags, __be64 tunnel_id, @@ -162,7 +164,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad info->key.tun_flags = flags; info->key.tun_id = tunnel_id; info->key.tp_src = 0; - info->key.tp_dst = 0; + info->key.tp_dst = tp_dst; info->key.u.ipv6.src = *saddr; info->key.u.ipv6.dst = *daddr; @@ -183,7 +185,7 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, ipv6_get_dsfield(ip6h), ip6h->hop_limit, - ip6_flowlabel(ip6h), flags, tunnel_id, + 0, ip6_flowlabel(ip6h), flags, tunnel_id, md_size); } #endif /* __NET_DST_METADATA_H */ diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 456e4a6006ab..8dbfdf728cd8 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -8,6 +8,11 @@ #include <net/flow.h> #include <net/rtnetlink.h> +struct fib_kuid_range { + kuid_t start; + kuid_t end; +}; + struct fib_rule { struct list_head list; int iifindex; @@ -30,6 +35,7 @@ struct fib_rule { int suppress_prefixlen; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; + struct fib_kuid_range uid_range; struct rcu_head rcu; }; @@ -92,7 +98,8 @@ struct fib_rules_ops { [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ [FRA_GOTO] = { .type = NLA_U32 }, \ - [FRA_L3MDEV] = { .type = NLA_U8 } + [FRA_L3MDEV] = { .type = NLA_U8 }, \ + [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) } static inline void fib_rule_get(struct fib_rule *rule) { diff --git a/include/net/flow.h b/include/net/flow.h index 035aa7716967..6984f1913dc1 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -11,6 +11,7 @@ #include <linux/in6.h> #include <linux/atomic.h> #include <net/flow_dissector.h> +#include <linux/uidgid.h> /* * ifindex generation is per-net namespace, and loopback is @@ -37,6 +38,7 @@ struct flowi_common { #define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; + kuid_t flowic_uid; }; union flowi_uli { @@ -74,6 +76,7 @@ struct flowi4 { #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid #define flowi4_tun_key __fl_common.flowic_tun_key +#define flowi4_uid __fl_common.flowic_uid /* (saddr,daddr) must be grouped, same order as in IP header */ __be32 saddr; @@ -93,7 +96,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, - __be16 dport, __be16 sport) + __be16 dport, __be16 sport, + kuid_t uid) { fl4->flowi4_oif = oif; fl4->flowi4_iif = LOOPBACK_IFINDEX; @@ -104,6 +108,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; fl4->flowi4_tun_key.tun_id = 0; + fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; @@ -131,6 +136,7 @@ struct flowi6 { #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid #define flowi6_tun_key __fl_common.flowic_tun_key +#define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; /* Note: flowi6_tos is encoded in flowlabel, too. */ @@ -176,6 +182,7 @@ struct flowi { #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid #define flowi_tun_key u.__fl_common.flowic_tun_key +#define flowi_uid u.__fl_common.flowic_uid } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) @@ -239,6 +246,7 @@ struct flow_cache_object *flow_cache_lookup(struct net *net, void *ctx); int flow_cache_init(struct net *net); void flow_cache_fini(struct net *net); +void flow_cache_hp_init(void); void flow_cache_flush(struct net *net); void flow_cache_flush_deferred(struct net *net); diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index d9534927d93b..d896a33e00d4 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -104,6 +104,22 @@ struct flow_dissector_key_ports { }; }; +/** + * flow_dissector_key_icmp: + * @ports: type and code of ICMP header + * icmp: ICMP type (high) and code (low) + * type: ICMP type + * code: ICMP code + */ +struct flow_dissector_key_icmp { + union { + __be16 icmp; + struct { + u8 type; + u8 code; + }; + }; +}; /** * struct flow_dissector_key_eth_addrs: @@ -122,12 +138,18 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ + FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ + FLOW_DISSECTOR_KEY_ENC_KEYID, /* struct flow_dissector_key_keyid */ + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ + FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */ + FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/flowcache.h b/include/net/flowcache.h index c8f665ec6e0d..9caf3bfc8d2d 100644 --- a/include/net/flowcache.h +++ b/include/net/flowcache.h @@ -17,7 +17,7 @@ struct flow_cache_percpu { struct flow_cache { u32 hash_shift; struct flow_cache_percpu __percpu *percpu; - struct notifier_block hotcpu_notifier; + struct hlist_node node; int low_watermark; int high_watermark; struct timer_list rnd_timer; diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 231e121cc7d9..8b7aa370e7a4 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -11,6 +11,8 @@ struct gnet_stats_basic_cpu { struct u64_stats_sync syncp; }; +struct net_rate_estimator; + struct gnet_dump { spinlock_t * lock; struct sk_buff * skb; @@ -42,8 +44,7 @@ void __gnet_stats_copy_basic(const seqcount_t *running, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); int gnet_stats_copy_rate_est(struct gnet_dump *d, - const struct gnet_stats_basic_packed *b, - struct gnet_stats_rate_est64 *r); + struct net_rate_estimator __rcu **ptr); int gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue __percpu *cpu_q, struct gnet_stats_queue *q, __u32 qlen); @@ -53,16 +54,16 @@ int gnet_stats_finish_copy(struct gnet_dump *d); int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, - struct gnet_stats_rate_est64 *rate_est, + struct net_rate_estimator __rcu **rate_est, spinlock_t *stats_lock, seqcount_t *running, struct nlattr *opt); -void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est64 *rate_est); +void gen_kill_estimator(struct net_rate_estimator __rcu **ptr); int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, - struct gnet_stats_rate_est64 *rate_est, + struct net_rate_estimator __rcu **ptr, spinlock_t *stats_lock, seqcount_t *running, struct nlattr *opt); -bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est64 *rate_est); +bool gen_estimator_active(struct net_rate_estimator __rcu **ptr); +bool gen_estimator_read(struct net_rate_estimator __rcu **ptr, + struct gnet_stats_rate_est64 *sample); #endif diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 8d4608ce8716..a34275be3600 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -20,7 +20,7 @@ struct genl_info; /** * struct genl_family - generic netlink family - * @id: protocol family idenfitier + * @id: protocol family identifier (private) * @hdrsize: length of user specific header in bytes * @name: name of family * @version: protocol version @@ -39,16 +39,16 @@ struct genl_info; * Note that unbind() will not be called symmetrically if the * generic netlink family is removed while there are still open * sockets. - * @attrbuf: buffer to store parsed attributes - * @family_list: family list - * @mcgrps: multicast groups used by this family (private) - * @n_mcgrps: number of multicast groups (private) + * @attrbuf: buffer to store parsed attributes (private) + * @mcgrps: multicast groups used by this family + * @n_mcgrps: number of multicast groups * @mcgrp_offset: starting number of multicast group IDs in this family - * @ops: the operations supported by this family (private) - * @n_ops: number of operations supported by this family (private) + * (private) + * @ops: the operations supported by this family + * @n_ops: number of operations supported by this family */ struct genl_family { - unsigned int id; + int id; /* private */ unsigned int hdrsize; char name[GENL_NAMSIZ]; unsigned int version; @@ -64,15 +64,16 @@ struct genl_family { int (*mcast_bind)(struct net *net, int group); void (*mcast_unbind)(struct net *net, int group); struct nlattr ** attrbuf; /* private */ - const struct genl_ops * ops; /* private */ - const struct genl_multicast_group *mcgrps; /* private */ - unsigned int n_ops; /* private */ - unsigned int n_mcgrps; /* private */ + const struct genl_ops * ops; + const struct genl_multicast_group *mcgrps; + unsigned int n_ops; + unsigned int n_mcgrps; unsigned int mcgrp_offset; /* private */ - struct list_head family_list; /* private */ struct module *module; }; +struct nlattr **genl_family_attrbuf(const struct genl_family *family); + /** * struct genl_info - receiving information * @snd_seq: sending sequence number @@ -130,64 +131,13 @@ struct genl_ops { u8 flags; }; -int __genl_register_family(struct genl_family *family); - -static inline int genl_register_family(struct genl_family *family) -{ - family->module = THIS_MODULE; - return __genl_register_family(family); -} - -/** - * genl_register_family_with_ops - register a generic netlink family with ops - * @family: generic netlink family - * @ops: operations to be registered - * @n_ops: number of elements to register - * - * Registers the specified family and operations from the specified table. - * Only one family may be registered with the same family name or identifier. - * - * The family id may equal GENL_ID_GENERATE causing an unique id to - * be automatically generated and assigned. - * - * Either a doit or dumpit callback must be specified for every registered - * operation or the function will fail. Only one operation structure per - * command identifier may be registered. - * - * See include/net/genetlink.h for more documenation on the operations - * structure. - * - * Return 0 on success or a negative error code. - */ -static inline int -_genl_register_family_with_ops_grps(struct genl_family *family, - const struct genl_ops *ops, size_t n_ops, - const struct genl_multicast_group *mcgrps, - size_t n_mcgrps) -{ - family->module = THIS_MODULE; - family->ops = ops; - family->n_ops = n_ops; - family->mcgrps = mcgrps; - family->n_mcgrps = n_mcgrps; - return __genl_register_family(family); -} - -#define genl_register_family_with_ops(family, ops) \ - _genl_register_family_with_ops_grps((family), \ - (ops), ARRAY_SIZE(ops), \ - NULL, 0) -#define genl_register_family_with_ops_groups(family, ops, grps) \ - _genl_register_family_with_ops_grps((family), \ - (ops), ARRAY_SIZE(ops), \ - (grps), ARRAY_SIZE(grps)) - -int genl_unregister_family(struct genl_family *family); -void genl_notify(struct genl_family *family, struct sk_buff *skb, +int genl_register_family(struct genl_family *family); +int genl_unregister_family(const struct genl_family *family); +void genl_notify(const struct genl_family *family, struct sk_buff *skb, struct genl_info *info, u32 group, gfp_t flags); void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, - struct genl_family *family, int flags, u8 cmd); + const struct genl_family *family, int flags, u8 cmd); /** * genlmsg_nlhdr - Obtain netlink header from user specified header @@ -196,8 +146,8 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, * * Returns pointer to netlink header. */ -static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr, - struct genl_family *family) +static inline struct nlmsghdr * +genlmsg_nlhdr(void *user_hdr, const struct genl_family *family) { return (struct nlmsghdr *)((char *)user_hdr - family->hdrsize - @@ -233,7 +183,7 @@ static inline int genlmsg_parse(const struct nlmsghdr *nlh, */ static inline void genl_dump_check_consistent(struct netlink_callback *cb, void *user_hdr, - struct genl_family *family) + const struct genl_family *family) { nl_dump_check_consistent(cb, genlmsg_nlhdr(user_hdr, family)); } @@ -250,7 +200,7 @@ static inline void genl_dump_check_consistent(struct netlink_callback *cb, */ static inline void *genlmsg_put_reply(struct sk_buff *skb, struct genl_info *info, - struct genl_family *family, + const struct genl_family *family, int flags, u8 cmd) { return genlmsg_put(skb, info->snd_portid, info->snd_seq, family, @@ -287,7 +237,7 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr) * @group: offset of multicast group in groups array * @flags: allocation flags */ -static inline int genlmsg_multicast_netns(struct genl_family *family, +static inline int genlmsg_multicast_netns(const struct genl_family *family, struct net *net, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { @@ -305,7 +255,7 @@ static inline int genlmsg_multicast_netns(struct genl_family *family, * @group: offset of multicast group in groups array * @flags: allocation flags */ -static inline int genlmsg_multicast(struct genl_family *family, +static inline int genlmsg_multicast(const struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { @@ -323,7 +273,7 @@ static inline int genlmsg_multicast(struct genl_family *family, * * This function must hold the RTNL or rcu_read_lock(). */ -int genlmsg_multicast_allns(struct genl_family *family, +int genlmsg_multicast_allns(const struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags); @@ -407,8 +357,9 @@ static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags) * This function returns the number of broadcast listeners that have set the * NETLINK_RECV_NO_ENOBUFS socket option. */ -static inline int genl_set_err(struct genl_family *family, struct net *net, - u32 portid, u32 group, int code) +static inline int genl_set_err(const struct genl_family *family, + struct net *net, u32 portid, + u32 group, int code) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; @@ -416,7 +367,7 @@ static inline int genl_set_err(struct genl_family *family, struct net *net, return netlink_set_err(net->genl_sock, portid, group, code); } -static inline int genl_has_listeners(struct genl_family *family, +static inline int genl_has_listeners(const struct genl_family *family, struct net *net, unsigned int group) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index d15214d673b2..2a1abbf8da74 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -68,6 +68,9 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); __skb_queue_head_init(&cell->napi_skbs); + + set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); + netif_napi_add(dev, &cell->napi, gro_cell_poll, 64); napi_enable(&cell->napi); } diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index ba07b9d8ed63..d0e7e3f8e67a 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -333,9 +333,9 @@ enum ieee80211_radiotap_type { #define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_NS 0x0003 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_MASK 0x00F0 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_BEGIN_MDPU 0x0000 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU 0x0010 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ 0x0010 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_PPDU 0x0020 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ 0x0030 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU 0x0030 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_UNKNOWN 0x00F0 #define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT 0x00 diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 515352c6280a..0fa4c324b713 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -55,6 +55,7 @@ struct inet6_ifaddr { __u8 stable_privacy_retry; __u16 scope; + __u64 dad_nonce; unsigned long cstamp; /* created timestamp */ unsigned long tstamp; /* updated timestamp */ @@ -190,8 +191,8 @@ struct inet6_dev { __u32 if_flags; int dead; + u32 desync_factor; u8 rndid[8]; - struct timer_list regen_timer; struct list_head tempaddr_list; struct in6_addr token; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 197a30d221e9..146054ceea8e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -289,11 +289,6 @@ static inline int inet_csk_reqsk_queue_len(const struct sock *sk) return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); } -static inline int inet_csk_reqsk_queue_young(const struct sock *sk) -{ - return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); -} - static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) { return inet_csk_reqsk_queue_len(sk) >= sk->sk_max_ack_backlog; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 236a81034fef..c9cff977a7fb 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -228,6 +228,7 @@ struct inet_sock { #define IP_CMSG_PASSSEC BIT(5) #define IP_CMSG_ORIGDSTADDR BIT(6) #define IP_CMSG_CHECKSUM BIT(7) +#define IP_CMSG_RECVFRAGSIZE BIT(8) /** * sk_to_full_sk - Access to a full socket diff --git a/include/net/ip.h b/include/net/ip.h index bc43c0fcae12..ab6761a7c883 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -38,7 +38,7 @@ struct sock; struct inet_skb_parm { int iif; struct ip_options opt; /* Compiled IP options */ - unsigned char flags; + u16 flags; #define IPSKB_FORWARDED BIT(0) #define IPSKB_XFRM_TUNNEL_SIZE BIT(1) @@ -47,11 +47,16 @@ struct inet_skb_parm { #define IPSKB_REROUTED BIT(4) #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) -#define IPSKB_FRAG_SEGS BIT(7) +#define IPSKB_L3SLAVE BIT(7) u16 frag_max_size; }; +static inline bool ipv4_l3mdev_skb(u16 flags) +{ + return !!(flags & IPSKB_L3SLAVE); +} + static inline unsigned int ip_hdrlen(const struct sk_buff *skb) { return ip_hdr(skb)->ihl * 4; @@ -173,6 +178,7 @@ struct ip_reply_arg { /* -1 if not needed */ int bound_dev_if; u8 tos; + kuid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 @@ -572,7 +578,8 @@ int ip_options_rcv_srr(struct sk_buff *skb); */ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); -void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int offset); +void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, @@ -594,7 +601,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { - ip_cmsg_recv_offset(msg, skb, 0); + ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0); } bool icmp_global_allow(void); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index fb961a576abe..a74e2aa40ef4 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -230,6 +230,8 @@ struct fib6_table { rwlock_t tb6_lock; struct fib6_node tb6_root; struct inet_peer_base tb6_peers; + unsigned int flags; +#define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; #define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index e0cd318d5103..9dc2c182a263 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -32,6 +32,7 @@ struct route_info { #define RT6_LOOKUP_F_SRCPREF_TMP 0x00000008 #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 +#define RT6_LOOKUP_F_IGNORE_LINKSTATE 0x00000040 /* We do not (yet ?) support IPv6 jumbograms (RFC 2675) * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header @@ -139,9 +140,10 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr); void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, - u32 mark); + u32 mark, kuid_t uid); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark); +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid); void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, u32 mark); void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk); diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 20ed9699fcd4..1b1cf33cbfb0 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -146,6 +146,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, { int pkt_len, err; + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); pkt_len = skb->len - skb_inner_network_offset(skb); err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); if (unlikely(net_xmit_eval(err))) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b9314b48e39f..5f376af377c7 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -221,7 +221,8 @@ enum fib_event_type { FIB_EVENT_RULE_DEL, }; -int register_fib_notifier(struct notifier_block *nb); +int register_fib_notifier(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb)); int unregister_fib_notifier(struct notifier_block *nb); int call_fib_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info); @@ -243,6 +244,7 @@ int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); +void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -396,6 +398,11 @@ static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) void free_fib_info(struct fib_info *fi); +static inline void fib_info_hold(struct fib_info *fi) +{ + atomic_inc(&fi->fib_clntref); +} + static inline void fib_info_put(struct fib_info *fi) { if (atomic_dec_and_test(&fi->fib_clntref)) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 59557c07904b..e893fe43dd13 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -129,7 +129,7 @@ struct ip_tunnel { #endif struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ unsigned int prl_count; /* # of entries in PRL */ - int ip_tnl_net_id; + unsigned int ip_tnl_net_id; struct gro_cells gro_cells; bool collect_md; bool ignore_df; @@ -248,7 +248,7 @@ void ip_tunnel_uninit(struct net_device *dev); void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); struct net *ip_tunnel_get_link_net(const struct net_device *dev); int ip_tunnel_get_iflink(const struct net_device *dev); -int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, +int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops); @@ -275,7 +275,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p); -void ip_tunnel_setup(struct net_device *dev, int net_id); +void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); struct ip_tunnel_encap_ops { size_t (*encap_hlen)(struct ip_tunnel_encap *e); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8fed1cd78658..487e57391664 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -932,7 +932,8 @@ int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); */ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, - u8 *proto, struct in6_addr **daddr_p); + u8 *proto, struct in6_addr **daddr_p, + struct in6_addr *saddr); void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto); @@ -970,6 +971,8 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); +int __ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, + int addr_len); int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr, int addr_len); diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index ea3f80f58fd6..d4c1c75b8862 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -24,11 +24,11 @@ enum { struct lwtunnel_state { __u16 type; __u16 flags; + __u16 headroom; atomic_t refcnt; int (*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*orig_input)(struct sk_buff *); - int len; - __u16 headroom; + struct rcu_head rcu; __u8 data[0]; }; @@ -36,6 +36,7 @@ struct lwtunnel_encap_ops { int (*build_state)(struct net_device *dev, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **ts); + void (*destroy_state)(struct lwtunnel_state *lws); int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*input)(struct sk_buff *skb); int (*fill_encap)(struct sk_buff *skb, @@ -46,10 +47,7 @@ struct lwtunnel_encap_ops { }; #ifdef CONFIG_LWTUNNEL -static inline void lwtstate_free(struct lwtunnel_state *lws) -{ - kfree(lws); -} +void lwtstate_free(struct lwtunnel_state *lws); static inline struct lwtunnel_state * lwtstate_get(struct lwtunnel_state *lws) @@ -96,7 +94,8 @@ static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, unsigned int mtu) { - if (lwtunnel_xmit_redirect(lwtstate) && lwtstate->headroom < mtu) + if ((lwtunnel_xmit_redirect(lwtstate) || + lwtunnel_output_redirect(lwtstate)) && lwtstate->headroom < mtu) return lwtstate->headroom; return 0; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a810dfcb83c2..5345d358a510 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -811,14 +811,18 @@ enum mac80211_rate_control_flags { * in the control information, and it will be filled by the rate * control algorithm according to what should be sent. For example, * if this array contains, in the format { <idx>, <count> } the - * information + * information:: + * * { 3, 2 }, { 2, 2 }, { 1, 4 }, { -1, 0 }, { -1, 0 } + * * then this means that the frame should be transmitted * up to twice at rate 3, up to twice at rate 2, and up to four * times at rate 1 if it doesn't get acknowledged. Say it gets * acknowledged by the peer after the fifth attempt, the status - * information should then contain + * information should then contain:: + * * { 3, 2 }, { 2, 2 }, { 1, 1 }, { -1, 0 } ... + * * since it was transmitted twice at rate 3, twice at rate 2 * and once at rate 1 after which we received an acknowledgement. */ @@ -1168,8 +1172,8 @@ enum mac80211_rx_vht_flags { * @rate_idx: index of data rate into band's supported rates or MCS index if * HT or VHT is used (%RX_FLAG_HT/%RX_FLAG_VHT) * @vht_nss: number of streams (VHT only) - * @flag: %RX_FLAG_* - * @vht_flag: %RX_VHT_FLAG_* + * @flag: %RX_FLAG_\* + * @vht_flag: %RX_VHT_FLAG_\* * @rx_flags: internal RX flags for mac80211 * @ampdu_reference: A-MPDU reference number, must be a different value for * each A-MPDU but the same for each subframe within one A-MPDU @@ -1432,13 +1436,13 @@ enum ieee80211_vif_flags { * @probe_req_reg: probe requests should be reported to mac80211 for this * interface. * @drv_priv: data area for driver use, will always be aligned to - * sizeof(void *). + * sizeof(void \*). * @txq: the multicast data TX queue (if driver uses the TXQ abstraction) */ struct ieee80211_vif { enum nl80211_iftype type; struct ieee80211_bss_conf bss_conf; - u8 addr[ETH_ALEN]; + u8 addr[ETH_ALEN] __aligned(2); bool p2p; bool csa_active; bool mu_mimo_owner; @@ -1743,9 +1747,10 @@ struct ieee80211_sta_rates { * @wme: indicates whether the STA supports QoS/WME (if local devices does, * otherwise always false) * @drv_priv: data area for driver use, will always be aligned to - * sizeof(void *), size is determined in hw information. + * sizeof(void \*), size is determined in hw information. * @uapsd_queues: bitmap of queues configured for uapsd. Only valid - * if wme is supported. + * if wme is supported. The bits order is like in + * IEEE80211_WMM_IE_STA_QOSINFO_AC_*. * @max_sp: max Service Period. Only valid if wme is supported. * @bandwidth: current bandwidth the station can receive with * @rx_nss: in HT/VHT, the maximum number of spatial streams the @@ -2025,6 +2030,10 @@ struct ieee80211_txq { * drivers, mac80211 packet loss mechanism will not be triggered and driver * is completely depending on firmware event for station kickout. * + * @IEEE80211_HW_SUPPORTS_TX_FRAG: Hardware does fragmentation by itself. + * The stack will not do fragmentation. + * The callback for @set_frag_threshold should be set as well. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2066,6 +2075,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_TX_AMSDU, IEEE80211_HW_TX_FRAG_LIST, IEEE80211_HW_REPORTS_LOW_ACK, + IEEE80211_HW_SUPPORTS_TX_FRAG, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -2146,12 +2156,12 @@ enum ieee80211_hw_flags { * * @radiotap_mcs_details: lists which MCS information can the HW * reports, by default it is set to _MCS, _GI and _BW but doesn't - * include _FMT. Use %IEEE80211_RADIOTAP_MCS_HAVE_* values, only + * include _FMT. Use %IEEE80211_RADIOTAP_MCS_HAVE_\* values, only * adding _BW is supported today. * * @radiotap_vht_details: lists which VHT MCS information the HW reports, * the default is _GI | _BANDWIDTH. - * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_* values. + * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_\* values. * * @radiotap_timestamp: Information for the radiotap timestamp field; if the * 'units_pos' member is set to a non-negative value it must be set to @@ -2486,6 +2496,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * in the software stack cares about, we will, in the future, have mac80211 * tell the driver which information elements are interesting in the sense * that we want to see changes in them. This will include + * * - a list of information element IDs * - a list of OUIs for the vendor information element * @@ -3093,8 +3104,9 @@ enum ieee80211_reconfig_type { * The callback must be atomic. * * @set_frag_threshold: Configuration of fragmentation threshold. Assign this - * if the device does fragmentation by itself; if this callback is - * implemented then the stack will not do fragmentation. + * if the device does fragmentation by itself. Note that to prevent the + * stack from doing fragmentation IEEE80211_HW_SUPPORTS_TX_FRAG + * should be set as well. * The callback can sleep. * * @set_rts_threshold: Configuration of RTS threshold (if device needs it) @@ -4087,6 +4099,10 @@ void ieee80211_sta_pspoll(struct ieee80211_sta *sta); * This must be used in conjunction with ieee80211_sta_ps_transition() * and possibly ieee80211_sta_pspoll(); calls to all three must be * serialized. + * %IEEE80211_NUM_TIDS can be passed as the tid if the tid is unknown. + * In this case, mac80211 will not check that this tid maps to an AC + * that is trigger enabled and assume that the caller did the proper + * checks. */ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *sta, u8 tid); diff --git a/include/net/ndisc.h b/include/net/ndisc.h index be1fe2283254..d562a2fe4860 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -31,6 +31,7 @@ enum { ND_OPT_PREFIX_INFO = 3, /* RFC2461 */ ND_OPT_REDIRECT_HDR = 4, /* RFC2461 */ ND_OPT_MTU = 5, /* RFC2461 */ + ND_OPT_NONCE = 14, /* RFC7527 */ __ND_OPT_ARRAY_MAX, ND_OPT_ROUTE_INFO = 24, /* RFC4191 */ ND_OPT_RDNSS = 25, /* RFC5006 */ @@ -121,6 +122,7 @@ struct ndisc_options { #define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END] #define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR] #define nd_opts_mtu nd_opt_array[ND_OPT_MTU] +#define nd_opts_nonce nd_opt_array[ND_OPT_NONCE] #define nd_802154_opts_src_lladdr nd_802154_opt_array[ND_OPT_SOURCE_LL_ADDR] #define nd_802154_opts_tgt_lladdr nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR] @@ -398,7 +400,8 @@ void ndisc_cleanup(void); int ndisc_rcv(struct sk_buff *skb); void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, - const struct in6_addr *daddr, const struct in6_addr *saddr); + const struct in6_addr *daddr, const struct in6_addr *saddr, + u64 nonce); void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index fc4f757107df..af8fe8a909dc 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -170,7 +170,7 @@ static inline struct net *copy_net_ns(unsigned long flags, extern struct list_head net_namespace_list; struct net *get_net_ns_by_pid(pid_t pid); -struct net *get_net_ns_by_fd(int pid); +struct net *get_net_ns_by_fd(int fd); #ifdef CONFIG_SYSCTL void ipx_register_sysctl(void); @@ -291,7 +291,7 @@ struct pernet_operations { int (*init)(struct net *net); void (*exit)(struct net *net); void (*exit_batch)(struct list_head *net_exit_list); - int *id; + unsigned int *id; size_t size; }; diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 981c327374da..919e4e8af327 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -15,6 +15,15 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; +#ifdef CONFIG_NF_CT_PROTO_DCCP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4; +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; +#endif int nf_conntrack_ipv4_compat_init(void); void nf_conntrack_ipv4_compat_fini(void); diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h b/include/net/netfilter/ipv4/nf_defrag_ipv4.h index f01ef208dff6..db405f70e538 100644 --- a/include/net/netfilter/ipv4/nf_defrag_ipv4.h +++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h @@ -1,6 +1,7 @@ #ifndef _NF_DEFRAG_IPV4_H #define _NF_DEFRAG_IPV4_H -void nf_defrag_ipv4_enable(void); +struct net; +int nf_defrag_ipv4_enable(struct net *); #endif /* _NF_DEFRAG_IPV4_H */ diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index a4c993685795..eaea968f8657 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -6,6 +6,15 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; +#ifdef CONFIG_NF_CT_PROTO_DCCP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6; +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; +#endif #include <linux/sysctl.h> extern struct ctl_table nf_ct_ipv6_sysctl_table[]; diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index ddf162f7966f..7664efe37974 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -1,7 +1,8 @@ #ifndef _NF_DEFRAG_IPV6_H #define _NF_DEFRAG_IPV6_H -void nf_defrag_ipv6_enable(void); +struct net; +int nf_defrag_ipv6_enable(struct net *); int nf_ct_frag6_init(void); void nf_ct_frag6_cleanup(void); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 50418052a520..5916aa9ab3f0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -100,6 +100,9 @@ struct nf_conn { possible_net_t ct_net; +#if IS_ENABLED(CONFIG_NF_NAT) + struct rhlist_head nat_bysource; +#endif /* all members below initialized via memset */ u8 __nfct_init_offset[0]; @@ -117,9 +120,6 @@ struct nf_conn { /* Extensions */ struct nf_ct_ext *ext; -#if IS_ENABLED(CONFIG_NF_NAT) - struct rhash_head nat_bysource; -#endif /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; }; @@ -181,6 +181,10 @@ static inline void nf_ct_put(struct nf_conn *ct) int nf_ct_l3proto_try_module_get(unsigned short l3proto); void nf_ct_l3proto_module_put(unsigned short l3proto); +/* load module; enable/disable conntrack in this namespace */ +int nf_ct_netns_get(struct net *net, u8 nfproto); +void nf_ct_netns_put(struct net *net, u8 nfproto); + /* * Allocate a hashtable of hlist_head (if nulls == 0), * or hlist_nulls_head (if nulls == 1) diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 8992e4229da9..e01559b4d781 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -52,6 +52,10 @@ struct nf_conntrack_l3proto { int (*tuple_to_nlattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); + /* Called when netns wants to use connection tracking */ + int (*net_ns_get)(struct net *); + void (*net_ns_put)(struct net *); + /* * Calculate size of tuple nlattr */ @@ -63,18 +67,24 @@ struct nf_conntrack_l3proto { size_t nla_size; - /* Init l3proto pernet data */ - int (*init_net)(struct net *net); - /* Module (if any) which this is connected to. */ struct module *me; }; extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX]; +#ifdef CONFIG_SYSCTL /* Protocol pernet registration. */ int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto); +#else +static inline int nf_ct_l3proto_pernet_register(struct net *n, + struct nf_conntrack_l3proto *p) +{ + return 0; +} +#endif + void nf_ct_l3proto_pernet_unregister(struct net *net, struct nf_conntrack_l3proto *proto); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index de629f1520df..e7b836590f0b 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -98,7 +98,7 @@ struct nf_conntrack_l4proto { const struct nla_policy *nla_policy; } ctnl_timeout; #endif - int *net_id; + unsigned int *net_id; /* Init l4proto pernet data */ int (*init_net)(struct net *net, u_int16_t proto); @@ -125,14 +125,24 @@ struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p); /* Protocol pernet registration. */ +int nf_ct_l4proto_pernet_register_one(struct net *net, + struct nf_conntrack_l4proto *proto); +void nf_ct_l4proto_pernet_unregister_one(struct net *net, + struct nf_conntrack_l4proto *proto); int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *proto); + struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *proto); + struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); /* Protocol global registration. */ -int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto); -void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto); +int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto); +void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *proto); +int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); +void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); /* Generic netlink helpers */ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index 498814626e28..1723a67c0b0a 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -30,8 +30,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) if (net->ct.labels_used == 0) return NULL; - return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, - sizeof(struct nf_conn_labels), GFP_ATOMIC); + return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC); #else return NULL; #endif diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index e6937318546c..b0ca402c1f72 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -54,7 +54,7 @@ struct synproxy_net { struct synproxy_stats __percpu *stats; }; -extern int synproxy_net_id; +extern unsigned int synproxy_net_id; static inline struct synproxy_net *synproxy_pernet(struct net *net) { return net_generic(net, synproxy_net_id); diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h index 397dcae349f9..3e919356bedf 100644 --- a/include/net/netfilter/nf_dup_netdev.h +++ b/include/net/netfilter/nf_dup_netdev.h @@ -2,5 +2,6 @@ #define _NF_DUP_NETDEV_H_ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif); +void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif); #endif diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 309cd267be4f..450f87f95415 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -109,5 +109,12 @@ void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix); +void nf_log_l2packet(struct net *net, u_int8_t pf, + __be16 protocol, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, const char *prefix); #endif /* _NF_LOG_H */ diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h index 12f4cc841b6e..3923150f2a1e 100644 --- a/include/net/netfilter/nf_nat_l4proto.h +++ b/include/net/netfilter/nf_nat_l4proto.h @@ -54,6 +54,15 @@ extern const struct nf_nat_l4proto nf_nat_l4proto_udp; extern const struct nf_nat_l4proto nf_nat_l4proto_icmp; extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6; extern const struct nf_nat_l4proto nf_nat_l4proto_unknown; +#ifdef CONFIG_NF_NAT_PROTO_DCCP +extern const struct nf_nat_l4proto nf_nat_l4proto_dccp; +#endif +#ifdef CONFIG_NF_NAT_PROTO_SCTP +extern const struct nf_nat_l4proto nf_nat_l4proto_sctp; +#endif +#ifdef CONFIG_NF_NAT_PROTO_UDPLITE +extern const struct nf_nat_l4proto nf_nat_l4proto_udplite; +#endif bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype, diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 2280cfe86c56..09948d10e38e 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -12,6 +12,7 @@ struct nf_queue_entry { unsigned int id; struct nf_hook_state state; + struct nf_hook_entry *hook; u16 size; /* sizeof(entry) + saved route keys */ /* extra space to store route keys */ diff --git a/include/net/netfilter/nf_socket.h b/include/net/netfilter/nf_socket.h new file mode 100644 index 000000000000..f2fc39c97d43 --- /dev/null +++ b/include/net/netfilter/nf_socket.h @@ -0,0 +1,27 @@ +#ifndef _NF_SOCK_H_ +#define _NF_SOCK_H_ + +struct net_device; +struct sk_buff; +struct sock; +struct net; + +static inline bool nf_sk_is_transparent(struct sock *sk) +{ + switch (sk->sk_state) { + case TCP_TIME_WAIT: + return inet_twsk(sk)->tw_transparent; + case TCP_NEW_SYN_RECV: + return inet_rsk(inet_reqsk(sk))->no_srccheck; + default: + return inet_sk(sk)->transparent; + } +} + +struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, + const struct net_device *indev); + +struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, + const struct net_device *indev); + +#endif diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5031e072567b..924325c46aab 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -14,27 +14,43 @@ struct nft_pktinfo { struct sk_buff *skb; - struct net *net; - const struct net_device *in; - const struct net_device *out; - u8 pf; - u8 hook; bool tprot_set; u8 tprot; /* for x_tables compatibility */ struct xt_action_param xt; }; +static inline struct net *nft_net(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->net; +} + +static inline unsigned int nft_hook(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->hook; +} + +static inline u8 nft_pf(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->pf; +} + +static inline const struct net_device *nft_in(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->in; +} + +static inline const struct net_device *nft_out(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->out; +} + static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, struct sk_buff *skb, const struct nf_hook_state *state) { pkt->skb = skb; - pkt->net = pkt->xt.net = state->net; - pkt->in = pkt->xt.in = state->in; - pkt->out = pkt->xt.out = state->out; - pkt->hook = pkt->xt.hooknum = state->hook; - pkt->pf = pkt->xt.family = state->pf; + pkt->xt.state = state; } static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt, @@ -145,7 +161,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE; } -unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); +int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); unsigned int nft_parse_register(const struct nlattr *attr); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); @@ -243,7 +259,8 @@ struct nft_expr; * @lookup: look up an element within the set * @insert: insert new element into set * @activate: activate new element in the next generation - * @deactivate: deactivate element in the next generation + * @deactivate: lookup for element and deactivate it in the next generation + * @deactivate_one: deactivate element in the next generation * @remove: remove element from set * @walk: iterate over all set elemeennts * @privsize: function to return size of set private data @@ -278,6 +295,9 @@ struct nft_set_ops { void * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); + bool (*deactivate_one)(const struct net *net, + const struct nft_set *set, + void *priv); void (*remove)(const struct nft_set *set, const struct nft_set_elem *elem); void (*walk)(const struct nft_ctx *ctx, @@ -310,10 +330,11 @@ void nft_unregister_set(struct nft_set_ops *ops); * @name: name of the set * @ktype: key type (numeric type defined by userspace, not used in the kernel) * @dtype: data type (verdict or numeric type defined by userspace) + * @objtype: object type (see NFT_OBJECT_* definitions) * @size: maximum set size * @nelems: number of elements * @ndeact: number of deactivated elements queued for removal - * @timeout: default timeout value in msecs + * @timeout: default timeout value in jiffies * @gc_int: garbage collection interval in msecs * @policy: set parameterization (see enum nft_set_policies) * @udlen: user data length @@ -331,6 +352,7 @@ struct nft_set { char name[NFT_SET_MAXNAMELEN]; u32 ktype; u32 dtype; + u32 objtype; u32 size; atomic_t nelems; u32 ndeact; @@ -400,6 +422,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, * @NFT_SET_EXT_EXPIRATION: element expiration time * @NFT_SET_EXT_USERDATA: user data associated with the element * @NFT_SET_EXT_EXPR: expression assiociated with the element + * @NFT_SET_EXT_OBJREF: stateful object reference associated with element * @NFT_SET_EXT_NUM: number of extension types */ enum nft_set_extensions { @@ -410,6 +433,7 @@ enum nft_set_extensions { NFT_SET_EXT_EXPIRATION, NFT_SET_EXT_USERDATA, NFT_SET_EXT_EXPR, + NFT_SET_EXT_OBJREF, NFT_SET_EXT_NUM }; @@ -538,11 +562,17 @@ static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, return elem + set->ops->elemsize; } +static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_OBJREF); +} + void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *data, u64 timeout, gfp_t gfp); -void nft_set_elem_destroy(const struct nft_set *set, void *elem); +void nft_set_elem_destroy(const struct nft_set *set, void *elem, + bool destroy_expr); /** * struct nft_set_gc_batch_head - nf_tables set garbage collection batch @@ -693,7 +723,6 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) { int err; - __module_get(src->ops->type->owner); if (src->ops->clone) { dst->ops = src->ops; err = src->ops->clone(dst, src); @@ -702,6 +731,8 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) } else { memcpy(dst, src, src->ops->size); } + + __module_get(src->ops->type->owner); return 0; } @@ -857,6 +888,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); * @list: used internally * @chains: chains in the table * @sets: sets in the table + * @objects: stateful objects in the table * @hgenerator: handle generator state * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) @@ -867,6 +899,7 @@ struct nft_table { struct list_head list; struct list_head chains; struct list_head sets; + struct list_head objects; u64 hgenerator; u32 use; u16 flags:14, @@ -917,6 +950,80 @@ int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v); /** + * struct nft_object - nf_tables stateful object + * + * @list: table stateful object list node + * @table: table this object belongs to + * @type: pointer to object type + * @data: pointer to object data + * @name: name of this stateful object + * @genmask: generation mask + * @use: number of references to this stateful object + * @data: object data, layout depends on type + */ +struct nft_object { + struct list_head list; + char name[NFT_OBJ_MAXNAMELEN]; + struct nft_table *table; + u32 genmask:2, + use:30; + /* runtime data below here */ + const struct nft_object_type *type ____cacheline_aligned; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); +}; + +static inline void *nft_obj_data(const struct nft_object *obj) +{ + return (void *)obj->data; +} + +#define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr)) + +struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, + const struct nlattr *nla, u32 objtype, + u8 genmask); + +int nft_obj_notify(struct net *net, struct nft_table *table, + struct nft_object *obj, u32 portid, u32 seq, + int event, int family, int report, gfp_t gfp); + +/** + * struct nft_object_type - stateful object type + * + * @eval: stateful object evaluation function + * @list: list node in list of object types + * @type: stateful object numeric type + * @size: stateful object size + * @owner: module owner + * @maxattr: maximum netlink attribute + * @policy: netlink attribute policy + * @init: initialize object from netlink attributes + * @destroy: release existing stateful object + * @dump: netlink dump stateful object + */ +struct nft_object_type { + void (*eval)(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt); + struct list_head list; + u32 type; + unsigned int size; + unsigned int maxattr; + struct module *owner; + const struct nla_policy *policy; + int (*init)(const struct nlattr * const tb[], + struct nft_object *obj); + void (*destroy)(struct nft_object *obj); + int (*dump)(struct sk_buff *skb, + struct nft_object *obj, + bool reset); +}; + +int nft_register_obj(struct nft_object_type *obj_type); +void nft_unregister_obj(struct nft_object_type *obj_type); + +/** * struct nft_traceinfo - nft tracing information and state * * @pkt: pktinfo currently processed @@ -963,6 +1070,9 @@ void nft_trace_notify(struct nft_traceinfo *info); #define MODULE_ALIAS_NFT_SET() \ MODULE_ALIAS("nft-set") +#define MODULE_ALIAS_NFT_OBJ(type) \ + MODULE_ALIAS("nft-obj-" __stringify(type)) + /* * The gencursor defines two generations, the currently active and the * next one. Objects contain a bitmask of 2 bits specifying the generations @@ -1139,4 +1249,11 @@ struct nft_trans_elem { #define nft_trans_elem(trans) \ (((struct nft_trans_elem *)trans->data)->elem) +struct nft_trans_obj { + struct nft_object *obj; +}; + +#define nft_trans_obj(trans) \ + (((struct nft_trans_obj *)trans->data)->obj) + #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 00f4f6b1b1ba..8f690effec37 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -1,12 +1,18 @@ #ifndef _NET_NF_TABLES_CORE_H #define _NET_NF_TABLES_CORE_H +extern struct nft_expr_type nft_imm_type; +extern struct nft_expr_type nft_cmp_type; +extern struct nft_expr_type nft_lookup_type; +extern struct nft_expr_type nft_bitwise_type; +extern struct nft_expr_type nft_byteorder_type; +extern struct nft_expr_type nft_payload_type; +extern struct nft_expr_type nft_dynset_type; +extern struct nft_expr_type nft_range_type; + int nf_tables_core_module_init(void); void nf_tables_core_module_exit(void); -int nft_immediate_module_init(void); -void nft_immediate_module_exit(void); - struct nft_cmp_fast_expr { u32 data; enum nft_registers sreg:8; @@ -25,24 +31,6 @@ static inline u32 nft_cmp_fast_mask(unsigned int len) extern const struct nft_expr_ops nft_cmp_fast_ops; -int nft_cmp_module_init(void); -void nft_cmp_module_exit(void); - -int nft_range_module_init(void); -void nft_range_module_exit(void); - -int nft_lookup_module_init(void); -void nft_lookup_module_exit(void); - -int nft_dynset_module_init(void); -void nft_dynset_module_exit(void); - -int nft_bitwise_module_init(void); -void nft_bitwise_module_exit(void); - -int nft_byteorder_module_init(void); -void nft_byteorder_module_exit(void); - struct nft_payload { enum nft_payload_bases base:8; u8 offset; @@ -57,12 +45,10 @@ struct nft_payload_set { enum nft_registers sreg:8; u8 csum_type; u8 csum_offset; + u8 csum_flags; }; extern const struct nft_expr_ops nft_payload_fast_ops; extern struct static_key_false nft_trace_enabled; -int nft_payload_module_init(void); -void nft_payload_module_exit(void); - #endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h new file mode 100644 index 000000000000..cbedda077db2 --- /dev/null +++ b/include/net/netfilter/nft_fib.h @@ -0,0 +1,31 @@ +#ifndef _NFT_FIB_H_ +#define _NFT_FIB_H_ + +struct nft_fib { + enum nft_registers dreg:8; + u8 result; + u32 flags; +}; + +extern const struct nla_policy nft_fib_policy[]; + +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr); +int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]); +int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data); + + +void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_fib_store_result(void *reg, enum nft_fib_result r, + const struct nft_pktinfo *pkt, int index); +#endif diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h index 79f45e19f31e..130e58361f99 100644 --- a/include/net/netfilter/xt_rateest.h +++ b/include/net/netfilter/xt_rateest.h @@ -1,19 +1,23 @@ #ifndef _XT_RATEEST_H #define _XT_RATEEST_H +#include <net/gen_stats.h> + struct xt_rateest { /* keep lock and bstats on same cache line to speedup xt_rateest_tg() */ struct gnet_stats_basic_packed bstats; spinlock_t lock; - /* keep rstats and lock on same cache line to speedup xt_rateest_mt() */ - struct gnet_stats_rate_est64 rstats; + /* following fields not accessed in hot path */ + unsigned int refcnt; struct hlist_node list; char name[IFNAMSIZ]; - unsigned int refcnt; struct gnet_estimator params; struct rcu_head rcu; + + /* keep this field far away to speedup xt_rateest_mt() */ + struct net_rate_estimator __rcu *rate_est; }; struct xt_rateest *xt_rateest_lookup(const char *name); diff --git a/include/net/netlink.h b/include/net/netlink.h index 254a0fc01800..dd657a33f8c3 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -698,8 +698,7 @@ static inline int nla_len(const struct nlattr *nla) */ static inline int nla_ok(const struct nlattr *nla, int remaining) { - return remaining >= (int) sizeof(*nla) && - nla->nla_len >= sizeof(*nla) && + return nla->nla_len >= sizeof(*nla) && nla->nla_len <= remaining; } @@ -713,7 +712,7 @@ static inline int nla_ok(const struct nlattr *nla, int remaining) */ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining) { - int totlen = NLA_ALIGN(nla->nla_len); + unsigned int totlen = NLA_ALIGN(nla->nla_len); *remaining -= totlen; return (struct nlattr *) ((char *) nla + totlen); @@ -1191,6 +1190,16 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla) } /** + * nla_memdup - duplicate attribute memory (kmemdup) + * @src: netlink attribute to duplicate from + * @gfp: GFP mask + */ +static inline void *nla_memdup(const struct nlattr *src, gfp_t gfp) +{ + return kmemdup(nla_data(src), nla_len(src), gfp); +} + +/** * nla_nest_start - Start a new level of nested attributes * @skb: socket buffer to add attributes to * @attrtype: attribute type of container diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index e469e85de3f9..cf799fc3fdec 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -6,6 +6,12 @@ #include <linux/atomic.h> #include <linux/workqueue.h> #include <linux/netfilter/nf_conntrack_tcp.h> +#ifdef CONFIG_NF_CT_PROTO_DCCP +#include <linux/netfilter/nf_conntrack_dccp.h> +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP +#include <linux/netfilter/nf_conntrack_sctp.h> +#endif #include <linux/seqlock.h> struct ctl_table_header; @@ -48,12 +54,49 @@ struct nf_icmp_net { unsigned int timeout; }; +#ifdef CONFIG_NF_CT_PROTO_DCCP +struct nf_dccp_net { + struct nf_proto_net pn; + int dccp_loose; + unsigned int dccp_timeout[CT_DCCP_MAX + 1]; +}; +#endif + +#ifdef CONFIG_NF_CT_PROTO_SCTP +struct nf_sctp_net { + struct nf_proto_net pn; + unsigned int timeouts[SCTP_CONNTRACK_MAX]; +}; +#endif + +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +enum udplite_conntrack { + UDPLITE_CT_UNREPLIED, + UDPLITE_CT_REPLIED, + UDPLITE_CT_MAX +}; + +struct nf_udplite_net { + struct nf_proto_net pn; + unsigned int timeouts[UDPLITE_CT_MAX]; +}; +#endif + struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; struct nf_udp_net udp; struct nf_icmp_net icmp; struct nf_icmp_net icmpv6; +#ifdef CONFIG_NF_CT_PROTO_DCCP + struct nf_dccp_net dccp; +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + struct nf_sctp_net sctp; +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + struct nf_udplite_net udplite; +#endif }; struct ct_pcpu { @@ -91,7 +134,6 @@ struct netns_ct { struct nf_ip_net nf_ct_proto; #if defined(CONFIG_NF_CONNTRACK_LABELS) unsigned int labels_used; - u8 label_words; #endif }; #endif diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h index 70e158551704..f15daaa89385 100644 --- a/include/net/netns/generic.h +++ b/include/net/netns/generic.h @@ -25,20 +25,24 @@ */ struct net_generic { - unsigned int len; - struct rcu_head rcu; - - void *ptr[0]; + union { + struct { + unsigned int len; + struct rcu_head rcu; + } s; + + void *ptr[0]; + }; }; -static inline void *net_generic(const struct net *net, int id) +static inline void *net_generic(const struct net *net, unsigned int id) { struct net_generic *ng; void *ptr; rcu_read_lock(); ng = rcu_dereference(net->gen); - ptr = ng->ptr[id - 1]; + ptr = ng->ptr[id]; rcu_read_unlock(); return ptr; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 7adf4386ac8f..f0cf5a1b777e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -135,6 +135,9 @@ struct netns_ipv4 { #ifdef CONFIG_IP_ROUTE_MULTIPATH int sysctl_fib_multipath_use_neigh; #endif + + unsigned int fib_seq; /* protected by rtnl_mutex */ + atomic_t rt_genid; }; #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 10d0848f5b8a..de7745e2edcc 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -85,6 +85,7 @@ struct netns_ipv6 { #endif atomic_t dev_addr_genid; atomic_t fib6_sernum; + struct seg6_pernet_data *seg6_data; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 58487b1cc99a..cea396b53a60 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -17,5 +17,11 @@ struct netns_nf { struct ctl_table_header *nf_log_dir_header; #endif struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) + bool defrag_ipv4; +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + bool defrag_ipv6; +#endif }; #endif diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 767b03a3fe67..f0a051480c6c 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -171,6 +171,8 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); +int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, + struct net_device **hw_dev); /** * struct tcf_pkt_info - packet information @@ -425,16 +427,14 @@ struct tc_cls_u32_offload { }; }; -static inline bool tc_should_offload(const struct net_device *dev, - const struct tcf_proto *tp, u32 flags) +static inline bool tc_can_offload(const struct net_device *dev, + const struct tcf_proto *tp) { const struct Qdisc *sch = tp->q; const struct Qdisc_class_ops *cops = sch->ops->cl_ops; if (!(dev->features & NETIF_F_HW_TC)) return false; - if (flags & TCA_CLS_FLAGS_SKIP_HW) - return false; if (!dev->netdev_ops->ndo_setup_tc) return false; if (cops && cops->tcf_cl_offload) @@ -443,6 +443,19 @@ static inline bool tc_should_offload(const struct net_device *dev, return true; } +static inline bool tc_skip_hw(u32 flags) +{ + return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false; +} + +static inline bool tc_should_offload(const struct net_device *dev, + const struct tcf_proto *tp, u32 flags) +{ + if (tc_skip_hw(flags)) + return false; + return tc_can_offload(dev, tp); +} + static inline bool tc_skip_sw(u32 flags) { return (flags & TCA_CLS_FLAGS_SKIP_SW) ? true : false; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index cd334c9584e9..f1b76b8e6d2d 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -6,6 +6,8 @@ #include <linux/if_vlan.h> #include <net/sch_generic.h> +#define DEFAULT_TX_QUEUE_LEN 1000 + struct qdisc_walker { int stop; int skip; diff --git a/include/net/raw.h b/include/net/raw.h index 3e789008394d..57c33dd22ec4 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -23,6 +23,12 @@ extern struct proto raw_prot; +extern struct raw_hashinfo raw_v4_hashinfo; +struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, + unsigned short num, __be32 raddr, + __be32 laddr, int dif); + +int raw_abort(struct sock *sk, int err); void raw_icmp_error(struct sk_buff *, int, u32); int raw_local_deliver(struct sk_buff *, int); diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 87783dea0791..cbe4e9de1894 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -3,6 +3,13 @@ #include <net/protocol.h> +extern struct raw_hashinfo raw_v6_hashinfo; +struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, + unsigned short num, const struct in6_addr *loc_addr, + const struct in6_addr *rmt_addr, int dif); + +int raw_abort(struct sock *sk, int err); + void raw6_icmp_error(struct sk_buff *, int nexthdr, u8 type, u8 code, int inner_offset, __be32); bool raw6_local_deliver(struct sk_buff *, int); diff --git a/include/net/route.h b/include/net/route.h index 0429d47cad25..c0874c87c173 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -153,7 +153,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport); + daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -269,7 +269,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_ANYSRC; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport); + protocol, flow_flags, dst, src, dport, sport, + sk->sk_uid); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e6aa0a249672..498f81b229a4 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -76,7 +76,7 @@ struct Qdisc { struct netdev_queue *dev_queue; - struct gnet_stats_rate_est64 rate_est; + struct net_rate_estimator __rcu *rate_est; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 87a7f42e7639..f0dcaebebddb 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -152,7 +152,7 @@ void sctp_unhash_endpoint(struct sctp_endpoint *); struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *, struct sctphdr *, struct sctp_association **, struct sctp_transport **); -void sctp_err_finish(struct sock *, struct sctp_association *); +void sctp_err_finish(struct sock *, struct sctp_transport *); void sctp_icmp_frag_needed(struct sock *, struct sctp_association *, struct sctp_transport *t, __u32 pmtu); void sctp_icmp_redirect(struct sock *, struct sctp_transport *, @@ -164,7 +164,7 @@ void sctp_backlog_migrate(struct sctp_association *assoc, struct sock *oldsk, struct sock *newsk); int sctp_transport_hashtable_init(void); void sctp_transport_hashtable_destroy(void); -void sctp_hash_transport(struct sctp_transport *t); +int sctp_hash_transport(struct sctp_transport *t); void sctp_unhash_transport(struct sctp_transport *t); struct sctp_transport *sctp_addrs_lookup_transport( struct net *net, diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 11c3bf262a85..92daabdc007d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -124,7 +124,7 @@ extern struct sctp_globals { /* This is the sctp port control hash. */ struct sctp_bind_hashbucket *port_hashtable; /* This is the hash of all transports. */ - struct rhashtable transport_hashtable; + struct rhltable transport_hashtable; /* Sizes of above hashtables. */ int ep_hashsize; @@ -530,7 +530,6 @@ struct sctp_datamsg { /* Did the messenge fail to send? */ int send_error; u8 send_failed:1, - can_abandon:1, /* can chunks from this message can be abandoned. */ can_delay; /* should this message be Nagle delayed */ }; @@ -641,7 +640,6 @@ struct sctp_chunk { #define SCTP_NEED_FRTX 0x1 #define SCTP_DONT_FRTX 0x2 __u16 rtt_in_progress:1, /* This chunk used for RTT calc? */ - resent:1, /* Has this chunk ever been resent. */ has_tsn:1, /* Does this chunk have a TSN yet? */ has_ssn:1, /* Does this chunk have a SSN yet? */ singleton:1, /* Only chunk in the packet? */ @@ -656,6 +654,7 @@ struct sctp_chunk { fast_retransmit:2; /* Is this chunk fast retransmitted? */ }; +#define sctp_chunk_retransmitted(chunk) (chunk->sent_count > 1) void sctp_chunk_hold(struct sctp_chunk *); void sctp_chunk_put(struct sctp_chunk *); int sctp_user_addto_chunk(struct sctp_chunk *chunk, int len, @@ -762,7 +761,7 @@ static inline int sctp_packet_empty(struct sctp_packet *packet) struct sctp_transport { /* A list of transports. */ struct list_head transports; - struct rhash_head node; + struct rhlist_head node; /* Reference counting. */ atomic_t refcnt; diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index 3f36d45b714a..0caee631a836 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -6,10 +6,10 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); -__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); -__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport); +u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport, u32 *tsoff); +u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport, u32 *tsoff); u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, diff --git a/include/net/seg6.h b/include/net/seg6.h new file mode 100644 index 000000000000..4e0357517d79 --- /dev/null +++ b/include/net/seg6.h @@ -0,0 +1,62 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_SEG6_H +#define _NET_SEG6_H + +#include <linux/net.h> +#include <linux/ipv6.h> +#include <net/lwtunnel.h> +#include <linux/seg6.h> +#include <linux/rhashtable.h> + +static inline void update_csum_diff4(struct sk_buff *skb, __be32 from, + __be32 to) +{ + __be32 diff[] = { ~from, to }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); +} + +static inline void update_csum_diff16(struct sk_buff *skb, __be32 *from, + __be32 *to) +{ + __be32 diff[] = { + ~from[0], ~from[1], ~from[2], ~from[3], + to[0], to[1], to[2], to[3], + }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); +} + +struct seg6_pernet_data { + struct mutex lock; + struct in6_addr __rcu *tun_src; +#ifdef CONFIG_IPV6_SEG6_HMAC + struct rhashtable hmac_infos; +#endif +}; + +static inline struct seg6_pernet_data *seg6_pernet(struct net *net) +{ + return net->ipv6.seg6_data; +} + +extern int seg6_init(void); +extern void seg6_exit(void); +extern int seg6_iptunnel_init(void); +extern void seg6_iptunnel_exit(void); + +extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); + +#endif diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h new file mode 100644 index 000000000000..69c3a106056b --- /dev/null +++ b/include/net/seg6_hmac.h @@ -0,0 +1,62 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_SEG6_HMAC_H +#define _NET_SEG6_HMAC_H + +#include <net/flow.h> +#include <net/ip6_fib.h> +#include <net/sock.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/route.h> +#include <net/seg6.h> +#include <linux/seg6_hmac.h> +#include <linux/rhashtable.h> + +#define SEG6_HMAC_MAX_DIGESTSIZE 160 +#define SEG6_HMAC_RING_SIZE 256 + +struct seg6_hmac_info { + struct rhash_head node; + struct rcu_head rcu; + + u32 hmackeyid; + char secret[SEG6_HMAC_SECRET_LEN]; + u8 slen; + u8 alg_id; +}; + +struct seg6_hmac_algo { + u8 alg_id; + char name[64]; + struct crypto_shash * __percpu *tfms; + struct shash_desc * __percpu *shashs; +}; + +extern int seg6_hmac_compute(struct seg6_hmac_info *hinfo, + struct ipv6_sr_hdr *hdr, struct in6_addr *saddr, + u8 *output); +extern struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key); +extern int seg6_hmac_info_add(struct net *net, u32 key, + struct seg6_hmac_info *hinfo); +extern int seg6_hmac_info_del(struct net *net, u32 key); +extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr, + struct ipv6_sr_hdr *srh); +extern bool seg6_hmac_validate_skb(struct sk_buff *skb); +extern int seg6_hmac_init(void); +extern void seg6_hmac_exit(void); +extern int seg6_hmac_net_init(struct net *net); +extern void seg6_hmac_net_exit(struct net *net); + +#endif diff --git a/include/net/sock.h b/include/net/sock.h index ebf75db08e06..e17aa3de2b4d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -252,6 +252,7 @@ struct sock_common { * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) * @sk_sndbuf: size of send buffer in bytes + * @sk_padding: unused element for alignment * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) @@ -302,7 +303,8 @@ struct sock_common { * @sk_backlog_rcv: callback to process the backlog * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 * @sk_reuseport_cb: reuseport group container - */ + * @sk_rcu: used during RCU grace period + */ struct sock { /* * Now struct inet_timewait_sock also uses sock_common, so please just @@ -341,6 +343,9 @@ struct sock { #define sk_rxhash __sk_common.skc_rxhash socket_lock_t sk_lock; + atomic_t sk_drops; + int sk_rcvlowat; + struct sk_buff_head sk_error_queue; struct sk_buff_head sk_receive_queue; /* * The backlog queue is special, it is always used with @@ -357,14 +362,13 @@ struct sock { struct sk_buff *tail; } sk_backlog; #define sk_rmem_alloc sk_backlog.rmem_alloc - int sk_forward_alloc; - __u32 sk_txhash; + int sk_forward_alloc; #ifdef CONFIG_NET_RX_BUSY_POLL - unsigned int sk_napi_id; unsigned int sk_ll_usec; + /* ===== mostly read cache line ===== */ + unsigned int sk_napi_id; #endif - atomic_t sk_drops; int sk_rcvbuf; struct sk_filter __rcu *sk_filter; @@ -377,16 +381,50 @@ struct sock { #endif struct dst_entry *sk_rx_dst; struct dst_entry __rcu *sk_dst_cache; - /* Note: 32bit hole on 64bit arches */ - atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; int sk_sndbuf; + + /* ===== cache line for TX ===== */ + int sk_wmem_queued; + atomic_t sk_wmem_alloc; + unsigned long sk_tsq_flags; + struct sk_buff *sk_send_head; struct sk_buff_head sk_write_queue; + __s32 sk_peek_off; + int sk_write_pending; + long sk_sndtimeo; + struct timer_list sk_timer; + __u32 sk_priority; + __u32 sk_mark; + u32 sk_pacing_rate; /* bytes per second */ + u32 sk_max_pacing_rate; + struct page_frag sk_frag; + netdev_features_t sk_route_caps; + netdev_features_t sk_route_nocaps; + int sk_gso_type; + unsigned int sk_gso_max_size; + gfp_t sk_allocation; + __u32 sk_txhash; /* * Because of non atomicity rules, all * changes are protected by socket lock. */ + unsigned int __sk_flags_offset[0]; +#ifdef __BIG_ENDIAN_BITFIELD +#define SK_FL_PROTO_SHIFT 16 +#define SK_FL_PROTO_MASK 0x00ff0000 + +#define SK_FL_TYPE_SHIFT 0 +#define SK_FL_TYPE_MASK 0x0000ffff +#else +#define SK_FL_PROTO_SHIFT 8 +#define SK_FL_PROTO_MASK 0x0000ff00 + +#define SK_FL_TYPE_SHIFT 16 +#define SK_FL_TYPE_MASK 0xffff0000 +#endif + kmemcheck_bitfield_begin(flags); unsigned int sk_padding : 2, sk_no_check_tx : 1, @@ -397,41 +435,24 @@ struct sock { #define SK_PROTOCOL_MAX U8_MAX kmemcheck_bitfield_end(flags); - int sk_wmem_queued; - gfp_t sk_allocation; - u32 sk_pacing_rate; /* bytes per second */ - u32 sk_max_pacing_rate; - netdev_features_t sk_route_caps; - netdev_features_t sk_route_nocaps; - int sk_gso_type; - unsigned int sk_gso_max_size; u16 sk_gso_max_segs; - int sk_rcvlowat; unsigned long sk_lingertime; - struct sk_buff_head sk_error_queue; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; int sk_err, sk_err_soft; u32 sk_ack_backlog; u32 sk_max_ack_backlog; - __u32 sk_priority; - __u32 sk_mark; + kuid_t sk_uid; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; - long sk_sndtimeo; - struct timer_list sk_timer; ktime_t sk_stamp; u16 sk_tsflags; u8 sk_shutdown; u32 sk_tskey; struct socket *sk_socket; void *sk_user_data; - struct page_frag sk_frag; - struct sk_buff *sk_send_head; - __s32 sk_peek_off; - int sk_write_pending; #ifdef CONFIG_SECURITY void *sk_security; #endif @@ -892,7 +913,20 @@ static inline void sock_rps_record_flow_hash(__u32 hash) static inline void sock_rps_record_flow(const struct sock *sk) { #ifdef CONFIG_RPS - sock_rps_record_flow_hash(sk->sk_rxhash); + if (static_key_false(&rfs_needed)) { + /* Reading sk->sk_rxhash might incur an expensive cache line + * miss. + * + * TCP_ESTABLISHED does cover almost all states where RFS + * might be useful, and is cheaper [1] than testing : + * IPv4: inet_sk(sk)->inet_daddr + * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) + * OR an additional socket flag + * [1] : sk_state and sk_prot are in the same cache line. + */ + if (sk->sk_state == TCP_ESTABLISHED) + sock_rps_record_flow_hash(sk->sk_rxhash); + } #endif } @@ -912,14 +946,16 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) #endif } -#define sk_wait_event(__sk, __timeo, __condition) \ +#define sk_wait_event(__sk, __timeo, __condition, __wait) \ ({ int __rc; \ release_sock(__sk); \ __rc = __condition; \ if (!__rc) { \ - *(__timeo) = schedule_timeout(*(__timeo)); \ + *(__timeo) = wait_woken(__wait, \ + TASK_INTERRUPTIBLE, \ + *(__timeo)); \ } \ - sched_annotate_sleep(); \ + sched_annotate_sleep(); \ lock_sock(__sk); \ __rc = __condition; \ __rc; \ @@ -1160,11 +1196,6 @@ static inline void sk_enter_memory_pressure(struct sock *sk) sk->sk_prot->enter_memory_pressure(sk); } -static inline long sk_prot_mem_limits(const struct sock *sk, int index) -{ - return sk->sk_prot->sysctl_mem[index]; -} - static inline long sk_memory_allocated(const struct sock *sk) { @@ -1274,14 +1305,32 @@ static inline struct inode *SOCK_INODE(struct socket *socket) /* * Functions for memory accounting */ +int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind); int __sk_mem_schedule(struct sock *sk, int size, int kind); +void __sk_mem_reduce_allocated(struct sock *sk, int amount); void __sk_mem_reclaim(struct sock *sk, int amount); -#define SK_MEM_QUANTUM ((int)PAGE_SIZE) +/* We used to have PAGE_SIZE here, but systems with 64KB pages + * do not necessarily have 16x time more memory than 4KB ones. + */ +#define SK_MEM_QUANTUM 4096 #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) #define SK_MEM_SEND 0 #define SK_MEM_RECV 1 +/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ +static inline long sk_prot_mem_limits(const struct sock *sk, int index) +{ + long val = sk->sk_prot->sysctl_mem[index]; + +#if PAGE_SIZE > SK_MEM_QUANTUM + val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; +#elif PAGE_SIZE < SK_MEM_QUANTUM + val >>= SK_MEM_QUANTUM_SHIFT - PAGE_SHIFT; +#endif + return val; +} + static inline int sk_mem_pages(int amt) { return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT; @@ -1594,11 +1643,11 @@ static inline void sock_put(struct sock *sk) void sock_gen_put(struct sock *sk); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, - unsigned int trim_cap); + unsigned int trim_cap, bool refcounted); static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) { - return __sk_receive_skb(sk, skb, nested, 1); + return __sk_receive_skb(sk, skb, nested, 1, true); } static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) @@ -1649,6 +1698,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) sk->sk_wq = parent->wq; parent->sk = sk; sk_set_socket(sk, parent); + sk->sk_uid = SOCK_INODE(parent)->i_uid; security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } @@ -1656,6 +1706,11 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) kuid_t sock_i_uid(struct sock *sk); unsigned long sock_i_ino(struct sock *sk); +static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) +{ + return sk ? sk->sk_uid : make_kuid(net->user_ns, 0); +} + static inline u32 net_tx_rndhash(void) { u32 v = prandom_u32(); @@ -1950,6 +2005,8 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer, void sk_stop_timer(struct sock *sk, struct timer_list *timer); +int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, + unsigned int flags); int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); @@ -2106,7 +2163,8 @@ struct sock_skb_cb { static inline void sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb) { - SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops); + SOCK_SKB_CB(skb)->dropcount = sock_flag(sk, SOCK_RXQ_OVFL) ? + atomic_read(&sk->sk_drops) : 0; } static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 62770add15bd..604bc31e23ab 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -8,13 +8,13 @@ struct tcf_mirred { struct tc_action common; int tcfm_eaction; int tcfm_ifindex; - int tcfm_ok_push; + bool tcfm_mac_header_xmit; struct net_device __rcu *tcfm_dev; struct list_head tcfm_list; }; #define to_mirred(a) ((struct tcf_mirred *)a) -static inline bool is_tcf_mirred_redirect(const struct tc_action *a) +static inline bool is_tcf_mirred_egress_redirect(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT if (a->ops && a->ops->type == TCA_ACT_MIRRED) @@ -23,7 +23,7 @@ static inline bool is_tcf_mirred_redirect(const struct tc_action *a) return false; } -static inline bool is_tcf_mirred_mirror(const struct tc_action *a) +static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT if (a->ops && a->ops->type == TCA_ACT_MIRRED) diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index 5767e9dbcf92..19cd3d345804 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -27,6 +27,7 @@ struct tcf_skbedit { u32 flags; u32 priority; u32 mark; + u32 mask; u16 queue_mapping; u16 ptype; }; diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 253f8da6c2a6..efef0b4b1b2b 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -12,6 +12,8 @@ #define __NET_TC_TUNNEL_KEY_H #include <net/act_api.h> +#include <linux/tc_act/tc_tunnel_key.h> +#include <net/dst_metadata.h> struct tcf_tunnel_key_params { struct rcu_head rcu; @@ -27,4 +29,39 @@ struct tcf_tunnel_key { #define to_tunnel_key(a) ((struct tcf_tunnel_key *)a) +static inline bool is_tcf_tunnel_set(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + + if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + return params->tcft_action == TCA_TUNNEL_KEY_ACT_SET; +#endif + return false; +} + +static inline bool is_tcf_tunnel_release(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + + if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + return params->tcft_action == TCA_TUNNEL_KEY_ACT_RELEASE; +#endif + return false; +} + +static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + + return ¶ms->tcft_enc_metadata->u.tun_info; +#else + return NULL; +#endif +} #endif /* __NET_TC_TUNNEL_KEY_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index f83b7f220a65..207147b4c6b2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -794,12 +794,23 @@ struct tcp_skb_cb { */ static inline int tcp_v6_iif(const struct sk_buff *skb) { - bool l3_slave = skb_l3mdev_slave(TCP_SKB_CB(skb)->header.h6.flags); + bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif; } #endif +/* TCP_SKB_CB reference means this can not be used from early demux */ +static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (!net->ipv4.sysctl_tcp_l3mdev_accept && + skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) + return true; +#endif + return false; +} + /* Due to TSO, an SKB can be composed of multiple actual * packets. To keep these tracked properly, we use this. */ @@ -947,6 +958,7 @@ u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); u32 tcp_reno_ssthresh(struct sock *sk); +u32 tcp_reno_undo_cwnd(struct sock *sk); void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); extern struct tcp_congestion_ops tcp_reno; @@ -1209,6 +1221,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); +int tcp_filter(struct sock *sk, struct sk_buff *skb); #undef STATE_TRACE @@ -1503,11 +1516,26 @@ struct tcp_fastopen_context { struct rcu_head rcu; }; +/* Latencies incurred by various limits for a sender. They are + * chronograph-like stats that are mutually exclusive. + */ +enum tcp_chrono { + TCP_CHRONO_UNSPEC, + TCP_CHRONO_BUSY, /* Actively sending data (non-empty write queue) */ + TCP_CHRONO_RWND_LIMITED, /* Stalled by insufficient receive window */ + TCP_CHRONO_SNDBUF_LIMITED, /* Stalled by insufficient send buffer */ + __TCP_CHRONO_MAX, +}; + +void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type); +void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type); + /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) { struct sk_buff *skb; + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) sk_wmem_free_skb(sk, skb); sk_mem_reclaim(sk); @@ -1566,8 +1594,10 @@ static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff * static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) { - if (sk->sk_send_head == skb_unlinked) + if (sk->sk_send_head == skb_unlinked) { sk->sk_send_head = NULL; + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); + } if (tcp_sk(sk)->highest_sack == skb_unlinked) tcp_sk(sk)->highest_sack = NULL; } @@ -1589,6 +1619,7 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb /* Queue it, remembering where we must start sending. */ if (sk->sk_send_head == NULL) { sk->sk_send_head = skb; + tcp_chrono_start(sk, TCP_CHRONO_BUSY); if (tcp_sk(sk)->highest_sack == NULL) tcp_sk(sk)->highest_sack = skb; @@ -1796,7 +1827,7 @@ struct tcp_request_sock_ops { struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl, const struct request_sock *req, bool *strict); - __u32 (*init_seq)(const struct sk_buff *skb); + __u32 (*init_seq)(const struct sk_buff *skb, u32 *tsoff); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, diff --git a/include/net/udp.h b/include/net/udp.h index ea53a87d880f..1661791e8ca1 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -246,6 +246,25 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, } /* net/ipv4/udp.c */ +void udp_destruct_sock(struct sock *sk); +void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); +int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); +void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); +static inline struct sk_buff * +__skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *peeked, + int *off, int *err) +{ + return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + udp_skb_destructor, peeked, off, err); +} +static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, + int noblock, int *err) +{ + int peeked, off = 0; + + return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err); +} + void udp_v4_early_demux(struct sk_buff *skb); int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, @@ -258,6 +277,8 @@ void udp_flush_pending_frames(struct sock *sk); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); +int udp_init_sock(struct sock *sk); +int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait); struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, diff --git a/include/net/udplite.h b/include/net/udplite.h index 80761938b9a7..36097d388219 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -27,6 +27,7 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, static inline int udplite_sk_init(struct sock *sk) { udp_sk(sk)->pcflag = UDPLITE_BIT; + sk->sk_destruct = udp_destruct_sock; return 0; } diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 0255613a54a4..49a59202f85e 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -225,9 +225,9 @@ struct vxlan_config { struct vxlan_dev { struct hlist_node hlist; /* vni hash table */ struct list_head next; /* vxlan's per namespace list */ - struct vxlan_sock *vn4_sock; /* listening socket for IPv4 */ + struct vxlan_sock __rcu *vn4_sock; /* listening socket for IPv4 */ #if IS_ENABLED(CONFIG_IPV6) - struct vxlan_sock *vn6_sock; /* listening socket for IPv6 */ + struct vxlan_sock __rcu *vn6_sock; /* listening socket for IPv6 */ #endif struct net_device *dev; struct net *net; /* netns for packet i/o */ @@ -281,16 +281,6 @@ struct vxlan_dev { struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); -static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan, - unsigned short family) -{ -#if IS_ENABLED(CONFIG_IPV6) - if (family == AF_INET6) - return inet_sk(vxlan->vn6_sock->sock->sk)->inet_sport; -#endif - return inet_sk(vxlan->vn4_sock->sock->sk)->inet_sport; -} - static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, netdev_features_t features) { |