From 47846c9b0c10808d9337d2e7d09361f3e0a0a71a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 25 Nov 2009 17:46:19 +0100 Subject: mac80211: reduce reliance on netdev For bluetooth 3, we will most likely not have a netdev for a virtual interface (sdata), so prepare for that by reducing the reliance on having a netdev. This patch moves the name and address fields into the sdata struct and uses them from there all over. Some work is needed to keep them sync'ed, but that's not a lot of work and in slow paths anyway. In doing so, this also reduces the number of pointer dereferences in many places, because of things like sdata->dev->dev_addr becoming sdata->vif.addr. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2aff4906b2ae..e94cc526b0f6 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -659,12 +659,14 @@ struct ieee80211_conf { * @type: type of this virtual interface * @bss_conf: BSS configuration for this interface, either our own * or the BSS we're associated to + * @addr: address of this interface * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *). */ struct ieee80211_vif { enum nl80211_iftype type; struct ieee80211_bss_conf bss_conf; + u8 addr[ETH_ALEN]; /* must be last */ u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *)))); }; -- cgit v1.2.3 From eaf85ca7fecb218fc41ff57c1642ca73b097aabb Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Tue, 1 Dec 2009 10:18:37 +0800 Subject: wireless: add ieee80211_amsdu_to_8023s Move the A-MSDU handling code from mac80211 to cfg80211 so that more drivers can use it. The new created function ieee80211_amsdu_to_8023s converts an A-MSDU frame to a list of 802.3 frames. Cc: Johannes Berg Signed-off-by: Zhu Yi Signed-off-by: John W. Linville --- include/net/cfg80211.h | 22 +++++++++- net/mac80211/rx.c | 106 ++++++++++--------------------------------------- net/wireless/util.c | 99 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 137 insertions(+), 90 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0884b9a0f778..542a477a94da 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1578,7 +1578,7 @@ unsigned int ieee80211_hdrlen(__le16 fc); * @addr: the device MAC address * @iftype: the virtual interface type */ -int ieee80211_data_to_8023(struct sk_buff *skb, u8 *addr, +int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, enum nl80211_iftype iftype); /** @@ -1589,9 +1589,27 @@ int ieee80211_data_to_8023(struct sk_buff *skb, u8 *addr, * @bssid: the network bssid (used only for iftype STATION and ADHOC) * @qos: build 802.11 QoS data frame */ -int ieee80211_data_from_8023(struct sk_buff *skb, u8 *addr, +int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, enum nl80211_iftype iftype, u8 *bssid, bool qos); +/** + * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame + * + * Decode an IEEE 802.11n A-MSDU frame and convert it to a list of + * 802.3 frames. The @list will be empty if the decode fails. The + * @skb is consumed after the function returns. + * + * @skb: The input IEEE 802.11n A-MSDU frame. + * @list: The output list of 802.3 frames. It must be allocated and + * initialized by by the caller. + * @addr: The device MAC address. + * @iftype: The device interface type. + * @extra_headroom: The hardware extra headroom for SKBs in the @list. + */ +void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, + const u8 *addr, enum nl80211_iftype iftype, + const unsigned int extra_headroom); + /** * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame * @skb: the data frame diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index dbfd684e3e2e..a182e423109b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1541,16 +1541,10 @@ static ieee80211_rx_result debug_noinline ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) { struct net_device *dev = rx->sdata->dev; - struct ieee80211_local *local = rx->local; - u16 ethertype; - u8 *payload; - struct sk_buff *skb = rx->skb, *frame = NULL; + struct sk_buff *skb = rx->skb; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; __le16 fc = hdr->frame_control; - const struct ethhdr *eth; - int remaining, err; - u8 dst[ETH_ALEN]; - u8 src[ETH_ALEN]; + struct sk_buff_head frame_list; if (unlikely(!ieee80211_is_data(fc))) return RX_CONTINUE; @@ -1561,94 +1555,34 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) if (!(rx->flags & IEEE80211_RX_AMSDU)) return RX_CONTINUE; - err = __ieee80211_data_to_8023(rx); - if (unlikely(err)) + if (ieee80211_has_a4(hdr->frame_control) && + rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && + !rx->sdata->u.vlan.sta) return RX_DROP_UNUSABLE; - skb->dev = dev; - - dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; - - /* skip the wrapping header */ - eth = (struct ethhdr *) skb_pull(skb, sizeof(struct ethhdr)); - if (!eth) + if (is_multicast_ether_addr(hdr->addr1) && + ((rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && + rx->sdata->u.vlan.sta) || + (rx->sdata->vif.type == NL80211_IFTYPE_STATION && + rx->sdata->u.mgd.use_4addr))) return RX_DROP_UNUSABLE; - while (skb != frame) { - u8 padding; - __be16 len = eth->h_proto; - unsigned int subframe_len = sizeof(struct ethhdr) + ntohs(len); - - remaining = skb->len; - memcpy(dst, eth->h_dest, ETH_ALEN); - memcpy(src, eth->h_source, ETH_ALEN); + skb->dev = dev; + __skb_queue_head_init(&frame_list); - padding = ((4 - subframe_len) & 0x3); - /* the last MSDU has no padding */ - if (subframe_len > remaining) - return RX_DROP_UNUSABLE; + ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, + rx->sdata->vif.type, + rx->local->hw.extra_tx_headroom); - skb_pull(skb, sizeof(struct ethhdr)); - /* if last subframe reuse skb */ - if (remaining <= subframe_len + padding) - frame = skb; - else { - /* - * Allocate and reserve two bytes more for payload - * alignment since sizeof(struct ethhdr) is 14. - */ - frame = dev_alloc_skb( - ALIGN(local->hw.extra_tx_headroom, 4) + - subframe_len + 2); - - if (frame == NULL) - return RX_DROP_UNUSABLE; - - skb_reserve(frame, - ALIGN(local->hw.extra_tx_headroom, 4) + - sizeof(struct ethhdr) + 2); - memcpy(skb_put(frame, ntohs(len)), skb->data, - ntohs(len)); - - eth = (struct ethhdr *) skb_pull(skb, ntohs(len) + - padding); - if (!eth) { - dev_kfree_skb(frame); - return RX_DROP_UNUSABLE; - } - } - - skb_reset_network_header(frame); - frame->dev = dev; - frame->priority = skb->priority; - rx->skb = frame; - - payload = frame->data; - ethertype = (payload[6] << 8) | payload[7]; - - if (likely((compare_ether_addr(payload, rfc1042_header) == 0 && - ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || - compare_ether_addr(payload, - bridge_tunnel_header) == 0)) { - /* remove RFC1042 or Bridge-Tunnel - * encapsulation and replace EtherType */ - skb_pull(frame, 6); - memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN); - memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN); - } else { - memcpy(skb_push(frame, sizeof(__be16)), - &len, sizeof(__be16)); - memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN); - memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN); - } + while (!skb_queue_empty(&frame_list)) { + rx->skb = __skb_dequeue(&frame_list); if (!ieee80211_frame_allowed(rx, fc)) { - if (skb == frame) /* last frame */ - return RX_DROP_UNUSABLE; - dev_kfree_skb(frame); + dev_kfree_skb(rx->skb); continue; } + dev->stats.rx_packets++; + dev->stats.rx_bytes += rx->skb->len; ieee80211_deliver_skb(rx); } diff --git a/net/wireless/util.c b/net/wireless/util.c index a3c841a255db..23557c1d0a9c 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -285,7 +285,7 @@ static int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) } } -int ieee80211_data_to_8023(struct sk_buff *skb, u8 *addr, +int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, enum nl80211_iftype iftype) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; @@ -383,7 +383,7 @@ int ieee80211_data_to_8023(struct sk_buff *skb, u8 *addr, } EXPORT_SYMBOL(ieee80211_data_to_8023); -int ieee80211_data_from_8023(struct sk_buff *skb, u8 *addr, +int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, enum nl80211_iftype iftype, u8 *bssid, bool qos) { struct ieee80211_hdr hdr; @@ -497,6 +497,101 @@ int ieee80211_data_from_8023(struct sk_buff *skb, u8 *addr, } EXPORT_SYMBOL(ieee80211_data_from_8023); + +void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, + const u8 *addr, enum nl80211_iftype iftype, + const unsigned int extra_headroom) +{ + struct sk_buff *frame = NULL; + u16 ethertype; + u8 *payload; + const struct ethhdr *eth; + int remaining, err; + u8 dst[ETH_ALEN], src[ETH_ALEN]; + + err = ieee80211_data_to_8023(skb, addr, iftype); + if (err) + goto out; + + /* skip the wrapping header */ + eth = (struct ethhdr *) skb_pull(skb, sizeof(struct ethhdr)); + if (!eth) + goto out; + + while (skb != frame) { + u8 padding; + __be16 len = eth->h_proto; + unsigned int subframe_len = sizeof(struct ethhdr) + ntohs(len); + + remaining = skb->len; + memcpy(dst, eth->h_dest, ETH_ALEN); + memcpy(src, eth->h_source, ETH_ALEN); + + padding = (4 - subframe_len) & 0x3; + /* the last MSDU has no padding */ + if (subframe_len > remaining) + goto purge; + + skb_pull(skb, sizeof(struct ethhdr)); + /* reuse skb for the last subframe */ + if (remaining <= subframe_len + padding) + frame = skb; + else { + unsigned int hlen = ALIGN(extra_headroom, 4); + /* + * Allocate and reserve two bytes more for payload + * alignment since sizeof(struct ethhdr) is 14. + */ + frame = dev_alloc_skb(hlen + subframe_len + 2); + if (!frame) + goto purge; + + skb_reserve(frame, hlen + sizeof(struct ethhdr) + 2); + memcpy(skb_put(frame, ntohs(len)), skb->data, + ntohs(len)); + + eth = (struct ethhdr *)skb_pull(skb, ntohs(len) + + padding); + if (!eth) { + dev_kfree_skb(frame); + goto purge; + } + } + + skb_reset_network_header(frame); + frame->dev = skb->dev; + frame->priority = skb->priority; + + payload = frame->data; + ethertype = (payload[6] << 8) | payload[7]; + + if (likely((compare_ether_addr(payload, rfc1042_header) == 0 && + ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || + compare_ether_addr(payload, + bridge_tunnel_header) == 0)) { + /* remove RFC1042 or Bridge-Tunnel + * encapsulation and replace EtherType */ + skb_pull(frame, 6); + memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN); + memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN); + } else { + memcpy(skb_push(frame, sizeof(__be16)), &len, + sizeof(__be16)); + memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN); + memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN); + } + __skb_queue_tail(list, frame); + } + + return; + + purge: + __skb_queue_purge(list); + out: + dev_kfree_skb(skb); +} +EXPORT_SYMBOL(ieee80211_amsdu_to_8023s); + /* Given a data frame determine the 802.1p/1d tag to use. */ unsigned int cfg80211_classify8021d(struct sk_buff *skb) { -- cgit v1.2.3 From 0f78231bffb868a30e8533aace142213266bb811 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 1 Dec 2009 13:37:02 +0100 Subject: mac80211: enable spatial multiplexing powersave Enable spatial multiplexing in mac80211 by telling the driver what to do and, where necessary, sending action frames to the AP to update the requested SMPS mode. Also includes a trivial implementation for hwsim that just logs the requested mode. For now, the userspace interface is in debugfs only, and let you toggle the requested mode at any time. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/mac80211_hwsim.c | 24 ++++++-- include/linux/ieee80211.h | 25 +++++++- include/net/mac80211.h | 59 ++++++++++++++++++ net/mac80211/cfg.c | 49 +++++++++++++++ net/mac80211/debugfs_netdev.c | 111 +++++++++++++++++++++++++++++++++- net/mac80211/driver-trace.h | 2 + net/mac80211/ht.c | 47 ++++++++++++++ net/mac80211/ieee80211_i.h | 14 +++++ net/mac80211/main.c | 24 ++++++++ net/mac80211/mlme.c | 63 +++++++++++++++++-- net/mac80211/status.c | 38 ++++++++++++ net/mac80211/util.c | 74 +++++++++++++++++++++++ 12 files changed, 518 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 88e41176e7fd..92c669ebb358 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -618,12 +618,26 @@ static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed) { struct mac80211_hwsim_data *data = hw->priv; struct ieee80211_conf *conf = &hw->conf; - - printk(KERN_DEBUG "%s:%s (freq=%d idle=%d ps=%d)\n", + static const char *chantypes[4] = { + [NL80211_CHAN_NO_HT] = "noht", + [NL80211_CHAN_HT20] = "ht20", + [NL80211_CHAN_HT40MINUS] = "ht40-", + [NL80211_CHAN_HT40PLUS] = "ht40+", + }; + static const char *smps_modes[IEEE80211_SMPS_NUM_MODES] = { + [IEEE80211_SMPS_AUTOMATIC] = "auto", + [IEEE80211_SMPS_OFF] = "off", + [IEEE80211_SMPS_STATIC] = "static", + [IEEE80211_SMPS_DYNAMIC] = "dynamic", + }; + + printk(KERN_DEBUG "%s:%s (freq=%d/%s idle=%d ps=%d smps=%s)\n", wiphy_name(hw->wiphy), __func__, conf->channel->center_freq, + chantypes[conf->channel_type], !!(conf->flags & IEEE80211_CONF_IDLE), - !!(conf->flags & IEEE80211_CONF_PS)); + !!(conf->flags & IEEE80211_CONF_PS), + smps_modes[conf->smps_mode]); data->idle = !!(conf->flags & IEEE80211_CONF_IDLE); @@ -1082,7 +1096,9 @@ static int __init init_mac80211_hwsim(void) BIT(NL80211_IFTYPE_MESH_POINT); hw->flags = IEEE80211_HW_MFP_CAPABLE | - IEEE80211_HW_SIGNAL_DBM; + IEEE80211_HW_SIGNAL_DBM | + IEEE80211_HW_SUPPORTS_STATIC_SMPS | + IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS; /* ask mac80211 to reserve space for magic */ hw->vif_data_size = sizeof(struct hwsim_vif_priv); diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index d9724a28c0c2..e8d43d0ff2c3 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -707,6 +707,10 @@ struct ieee80211_mgmt { u8 action; u8 trans_id[WLAN_SA_QUERY_TR_ID_LEN]; } __attribute__ ((packed)) sa_query; + struct { + u8 action; + u8 smps_control; + } __attribute__ ((packed)) ht_smps; } u; } __attribute__ ((packed)) action; } u; @@ -824,6 +828,7 @@ struct ieee80211_ht_cap { #define IEEE80211_HT_CAP_LDPC_CODING 0x0001 #define IEEE80211_HT_CAP_SUP_WIDTH_20_40 0x0002 #define IEEE80211_HT_CAP_SM_PS 0x000C +#define IEEE80211_HT_CAP_SM_PS_SHIFT 2 #define IEEE80211_HT_CAP_GRN_FLD 0x0010 #define IEEE80211_HT_CAP_SGI_20 0x0020 #define IEEE80211_HT_CAP_SGI_40 0x0040 @@ -839,6 +844,7 @@ struct ieee80211_ht_cap { /* 802.11n HT capability AMPDU settings (for ampdu_params_info) */ #define IEEE80211_HT_AMPDU_PARM_FACTOR 0x03 #define IEEE80211_HT_AMPDU_PARM_DENSITY 0x1C +#define IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT 2 /* * Maximum length of AMPDU that the STA can receive. @@ -922,12 +928,17 @@ struct ieee80211_ht_info { #define IEEE80211_MAX_AMPDU_BUF 0x40 -/* Spatial Multiplexing Power Save Modes */ +/* Spatial Multiplexing Power Save Modes (for capability) */ #define WLAN_HT_CAP_SM_PS_STATIC 0 #define WLAN_HT_CAP_SM_PS_DYNAMIC 1 #define WLAN_HT_CAP_SM_PS_INVALID 2 #define WLAN_HT_CAP_SM_PS_DISABLED 3 +/* for SM power control field lower two bits */ +#define WLAN_HT_SMPS_CONTROL_DISABLED 0 +#define WLAN_HT_SMPS_CONTROL_STATIC 1 +#define WLAN_HT_SMPS_CONTROL_DYNAMIC 3 + /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 @@ -1150,6 +1161,18 @@ enum ieee80211_spectrum_mgmt_actioncode { WLAN_ACTION_SPCT_CHL_SWITCH = 4, }; +/* HT action codes */ +enum ieee80211_ht_actioncode { + WLAN_HT_ACTION_NOTIFY_CHANWIDTH = 0, + WLAN_HT_ACTION_SMPS = 1, + WLAN_HT_ACTION_PSMP = 2, + WLAN_HT_ACTION_PCO_PHASE = 3, + WLAN_HT_ACTION_CSI = 4, + WLAN_HT_ACTION_NONCOMPRESSED_BF = 5, + WLAN_HT_ACTION_COMPRESSED_BF = 6, + WLAN_HT_ACTION_ASEL_IDX_FEEDBACK = 7, +}; + /* Security key length */ enum ieee80211_key_len { WLAN_KEY_LEN_WEP40 = 5, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e94cc526b0f6..e6b6bf81d5b9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -597,8 +597,10 @@ enum ieee80211_conf_flags { * @IEEE80211_CONF_CHANGE_CHANNEL: the channel/channel_type changed * @IEEE80211_CONF_CHANGE_RETRY_LIMITS: retry limits changed * @IEEE80211_CONF_CHANGE_IDLE: Idle flag changed + * @IEEE80211_CONF_CHANGE_SMPS: Spatial multiplexing powersave mode changed */ enum ieee80211_conf_changed { + IEEE80211_CONF_CHANGE_SMPS = BIT(1), IEEE80211_CONF_CHANGE_LISTEN_INTERVAL = BIT(2), IEEE80211_CONF_CHANGE_MONITOR = BIT(3), IEEE80211_CONF_CHANGE_PS = BIT(4), @@ -608,6 +610,21 @@ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_IDLE = BIT(8), }; +/** + * enum ieee80211_smps_mode - spatial multiplexing power save mode + * + * @ + */ +enum ieee80211_smps_mode { + IEEE80211_SMPS_AUTOMATIC, + IEEE80211_SMPS_OFF, + IEEE80211_SMPS_STATIC, + IEEE80211_SMPS_DYNAMIC, + + /* keep last */ + IEEE80211_SMPS_NUM_MODES, +}; + /** * struct ieee80211_conf - configuration of the device * @@ -636,6 +653,10 @@ enum ieee80211_conf_changed { * @short_frame_max_tx_count: Maximum number of transmissions for a "short" * frame, called "dot11ShortRetryLimit" in 802.11, but actually means the * number of transmissions not the number of retries + * + * @smps_mode: spatial multiplexing powersave mode; note that + * %IEEE80211_SMPS_STATIC is used when the device is not + * configured for an HT channel */ struct ieee80211_conf { u32 flags; @@ -648,6 +669,7 @@ struct ieee80211_conf { struct ieee80211_channel *channel; enum nl80211_channel_type channel_type; + enum ieee80211_smps_mode smps_mode; }; /** @@ -930,6 +952,16 @@ enum ieee80211_tkip_key_type { * @IEEE80211_HW_BEACON_FILTER: * Hardware supports dropping of irrelevant beacon frames to * avoid waking up cpu. + * + * @IEEE80211_HW_SUPPORTS_STATIC_SMPS: + * Hardware supports static spatial multiplexing powersave, + * ie. can turn off all but one chain even on HT connections + * that should be using more chains. + * + * @IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS: + * Hardware supports dynamic spatial multiplexing powersave, + * ie. can turn off all but one chain and then wake the rest + * up as required after, for example, rts/cts handshake. */ enum ieee80211_hw_flags { IEEE80211_HW_HAS_RATE_CONTROL = 1<<0, @@ -947,6 +979,8 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_DYNAMIC_PS = 1<<12, IEEE80211_HW_MFP_CAPABLE = 1<<13, IEEE80211_HW_BEACON_FILTER = 1<<14, + IEEE80211_HW_SUPPORTS_STATIC_SMPS = 1<<15, + IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS = 1<<16, }; /** @@ -1214,6 +1248,31 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * signal strength threshold checking. */ +/** + * DOC: Spatial multiplexing power save + * + * SMPS (Spatial multiplexing power save) is a mechanism to conserve + * power in an 802.11n implementation. For details on the mechanism + * and rationale, please refer to 802.11 (as amended by 802.11n-2009) + * "11.2.3 SM power save". + * + * The mac80211 implementation is capable of sending action frames + * to update the AP about the station's SMPS mode, and will instruct + * the driver to enter the specific mode. It will also announce the + * requested SMPS mode during the association handshake. Hardware + * support for this feature is required, and can be indicated by + * hardware flags. + * + * The default mode will be "automatic", which nl80211/cfg80211 + * defines to be dynamic SMPS in (regular) powersave, and SMPS + * turned off otherwise. + * + * To support this feature, the driver must set the appropriate + * hardware support flags, and handle the SMPS flag to the config() + * operation. It will then with this mechanism be instructed to + * enter the requested SMPS mode while associated to an HT AP. + */ + /** * DOC: Frame filtering * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fcfa1bf776a7..8c35418d1c96 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1318,6 +1318,50 @@ static int ieee80211_testmode_cmd(struct wiphy *wiphy, void *data, int len) } #endif +int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, + enum ieee80211_smps_mode smps_mode) +{ + const u8 *ap; + enum ieee80211_smps_mode old_req; + int err; + + old_req = sdata->u.mgd.req_smps; + sdata->u.mgd.req_smps = smps_mode; + + if (old_req == smps_mode && + smps_mode != IEEE80211_SMPS_AUTOMATIC) + return 0; + + /* + * If not associated, or current association is not an HT + * association, there's no need to send an action frame. + */ + if (!sdata->u.mgd.associated || + sdata->local->oper_channel_type == NL80211_CHAN_NO_HT) { + mutex_lock(&sdata->local->iflist_mtx); + ieee80211_recalc_smps(sdata->local, sdata); + mutex_unlock(&sdata->local->iflist_mtx); + return 0; + } + + ap = sdata->u.mgd.associated->cbss.bssid; + + if (smps_mode == IEEE80211_SMPS_AUTOMATIC) { + if (sdata->u.mgd.powersave) + smps_mode = IEEE80211_SMPS_DYNAMIC; + else + smps_mode = IEEE80211_SMPS_OFF; + } + + /* send SM PS frame to AP */ + err = ieee80211_send_smps_action(sdata, smps_mode, + ap, ap); + if (err) + sdata->u.mgd.req_smps = old_req; + + return err; +} + static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout) { @@ -1335,6 +1379,11 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, sdata->u.mgd.powersave = enabled; conf->dynamic_ps_timeout = timeout; + /* no change, but if automatic follow powersave */ + mutex_lock(&sdata->u.mgd.mtx); + __ieee80211_request_smps(sdata, sdata->u.mgd.req_smps); + mutex_unlock(&sdata->u.mgd.mtx); + if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 5d9c797635a9..355983503885 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -41,6 +41,30 @@ static ssize_t ieee80211_if_read( return ret; } +static ssize_t ieee80211_if_write( + struct ieee80211_sub_if_data *sdata, + const char __user *userbuf, + size_t count, loff_t *ppos, + ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int)) +{ + u8 *buf; + ssize_t ret = -ENODEV; + + buf = kzalloc(count, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, userbuf, count)) + return -EFAULT; + + rtnl_lock(); + if (sdata->dev->reg_state == NETREG_REGISTERED) + ret = (*write)(sdata, buf, count); + rtnl_unlock(); + + return ret; +} + #define IEEE80211_IF_FMT(name, field, format_string) \ static ssize_t ieee80211_if_fmt_##name( \ const struct ieee80211_sub_if_data *sdata, char *buf, \ @@ -71,7 +95,7 @@ static ssize_t ieee80211_if_fmt_##name( \ return scnprintf(buf, buflen, "%pM\n", sdata->field); \ } -#define __IEEE80211_IF_FILE(name) \ +#define __IEEE80211_IF_FILE(name, _write) \ static ssize_t ieee80211_if_read_##name(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ @@ -82,12 +106,24 @@ static ssize_t ieee80211_if_read_##name(struct file *file, \ } \ static const struct file_operations name##_ops = { \ .read = ieee80211_if_read_##name, \ + .write = (_write), \ .open = mac80211_open_file_generic, \ } +#define __IEEE80211_IF_FILE_W(name) \ +static ssize_t ieee80211_if_write_##name(struct file *file, \ + const char __user *userbuf, \ + size_t count, loff_t *ppos) \ +{ \ + return ieee80211_if_write(file->private_data, userbuf, count, \ + ppos, ieee80211_if_parse_##name); \ +} \ +__IEEE80211_IF_FILE(name, ieee80211_if_write_##name) + + #define IEEE80211_IF_FILE(name, field, format) \ IEEE80211_IF_FMT_##format(name, field) \ - __IEEE80211_IF_FILE(name) + __IEEE80211_IF_FILE(name, NULL) /* common attributes */ IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC); @@ -99,6 +135,70 @@ IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC); IEEE80211_IF_FILE(aid, u.mgd.aid, DEC); IEEE80211_IF_FILE(capab, u.mgd.capab, HEX); +static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, + enum ieee80211_smps_mode smps_mode) +{ + struct ieee80211_local *local = sdata->local; + int err; + + if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_STATIC_SMPS) && + smps_mode == IEEE80211_SMPS_STATIC) + return -EINVAL; + + /* auto should be dynamic if in PS mode */ + if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS) && + (smps_mode == IEEE80211_SMPS_DYNAMIC || + smps_mode == IEEE80211_SMPS_AUTOMATIC)) + return -EINVAL; + + /* supported only on managed interfaces for now */ + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EOPNOTSUPP; + + mutex_lock(&local->iflist_mtx); + err = __ieee80211_request_smps(sdata, smps_mode); + mutex_unlock(&local->iflist_mtx); + + return err; +} + +static const char *smps_modes[IEEE80211_SMPS_NUM_MODES] = { + [IEEE80211_SMPS_AUTOMATIC] = "auto", + [IEEE80211_SMPS_OFF] = "off", + [IEEE80211_SMPS_STATIC] = "static", + [IEEE80211_SMPS_DYNAMIC] = "dynamic", +}; + +static ssize_t ieee80211_if_fmt_smps(const struct ieee80211_sub_if_data *sdata, + char *buf, int buflen) +{ + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EOPNOTSUPP; + + return snprintf(buf, buflen, "request: %s\nused: %s\n", + smps_modes[sdata->u.mgd.req_smps], + smps_modes[sdata->u.mgd.ap_smps]); +} + +static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata, + const char *buf, int buflen) +{ + enum ieee80211_smps_mode mode; + + for (mode = 0; mode < IEEE80211_SMPS_NUM_MODES; mode++) { + if (strncmp(buf, smps_modes[mode], buflen) == 0) { + int err = ieee80211_set_smps(sdata, mode); + if (!err) + return buflen; + return err; + } + } + + return -EINVAL; +} + +__IEEE80211_IF_FILE_W(smps); + /* AP attributes */ IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC); IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC); @@ -109,7 +209,7 @@ static ssize_t ieee80211_if_fmt_num_buffered_multicast( return scnprintf(buf, buflen, "%u\n", skb_queue_len(&sdata->u.ap.ps_bc_buf)); } -__IEEE80211_IF_FILE(num_buffered_multicast); +__IEEE80211_IF_FILE(num_buffered_multicast, NULL); /* WDS attributes */ IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); @@ -158,6 +258,10 @@ IEEE80211_IF_FILE(dot11MeshHWMPRootMode, debugfs_create_file(#name, 0400, sdata->debugfs.dir, \ sdata, &name##_ops); +#define DEBUGFS_ADD_MODE(name, mode) \ + debugfs_create_file(#name, mode, sdata->debugfs.dir, \ + sdata, &name##_ops); + static void add_sta_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted, sta); @@ -167,6 +271,7 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(bssid, sta); DEBUGFS_ADD(aid, sta); DEBUGFS_ADD(capab, sta); + DEBUGFS_ADD_MODE(smps, 0600); } static void add_ap_files(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index ee2d19a25ce1..7a849b920165 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -140,6 +140,7 @@ TRACE_EVENT(drv_config, __field(u8, short_frame_max_tx_count) __field(int, center_freq) __field(int, channel_type) + __field(int, smps) ), TP_fast_assign( @@ -155,6 +156,7 @@ TRACE_EVENT(drv_config, __entry->short_frame_max_tx_count = local->hw.conf.short_frame_max_tx_count; __entry->center_freq = local->hw.conf.channel->center_freq; __entry->channel_type = local->hw.conf.channel_type; + __entry->smps = local->hw.conf.smps_mode; ), TP_printk( diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 45ebd062a2fb..63b8f86b7f16 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -166,3 +166,50 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, spin_unlock_bh(&sta->lock); } } + +int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, + enum ieee80211_smps_mode smps, const u8 *da, + const u8 *bssid) +{ + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_mgmt *action_frame; + + /* 27 = header + category + action + smps mode */ + skb = dev_alloc_skb(27 + local->hw.extra_tx_headroom); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, local->hw.extra_tx_headroom); + action_frame = (void *)skb_put(skb, 27); + memcpy(action_frame->da, da, ETH_ALEN); + memcpy(action_frame->sa, sdata->dev->dev_addr, ETH_ALEN); + memcpy(action_frame->bssid, bssid, ETH_ALEN); + action_frame->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + action_frame->u.action.category = WLAN_CATEGORY_HT; + action_frame->u.action.u.ht_smps.action = WLAN_HT_ACTION_SMPS; + switch (smps) { + case IEEE80211_SMPS_AUTOMATIC: + case IEEE80211_SMPS_NUM_MODES: + WARN_ON(1); + case IEEE80211_SMPS_OFF: + action_frame->u.action.u.ht_smps.smps_control = + WLAN_HT_SMPS_CONTROL_DISABLED; + break; + case IEEE80211_SMPS_STATIC: + action_frame->u.action.u.ht_smps.smps_control = + WLAN_HT_SMPS_CONTROL_STATIC; + break; + case IEEE80211_SMPS_DYNAMIC: + action_frame->u.action.u.ht_smps.smps_control = + WLAN_HT_SMPS_CONTROL_DYNAMIC; + break; + } + + /* we'll do more on status of this frame */ + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + ieee80211_tx_skb(sdata, skb); + + return 0; +} diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 178e329f9257..e63aecbddfbe 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -297,6 +297,8 @@ struct ieee80211_if_managed { unsigned long timers_running; /* used for quiesce/restart */ bool powersave; /* powersave requested for this iface */ + enum ieee80211_smps_mode req_smps, /* requested smps mode */ + ap_smps; /* smps mode AP thinks we're in */ unsigned long request; @@ -587,6 +589,9 @@ struct ieee80211_local { /* used for uploading changed mc list */ struct work_struct reconfig_filter; + /* used to reconfigure hardware SM PS */ + struct work_struct recalc_smps; + /* aggregated multicast list */ struct dev_addr_list *mc_list; int mc_count; @@ -760,6 +765,8 @@ struct ieee80211_local { int user_power_level; /* in dBm */ int power_constr_level; /* in dBm */ + enum ieee80211_smps_mode smps_mode; + struct work_struct restart_work; #ifdef CONFIG_MAC80211_DEBUGFS @@ -978,6 +985,9 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1 void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, u16 initiator, u16 reason_code); +int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, + enum ieee80211_smps_mode smps, const u8 *da, + const u8 *bssid); void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, u16 initiator, u16 reason); @@ -1088,6 +1098,10 @@ void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, u32 ieee80211_sta_get_rates(struct ieee80211_local *local, struct ieee802_11_elems *elems, enum ieee80211_band band); +int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, + enum ieee80211_smps_mode smps_mode); +void ieee80211_recalc_smps(struct ieee80211_local *local, + struct ieee80211_sub_if_data *forsdata); #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 98320a94c270..e1293e8ed83a 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -113,6 +113,18 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) changed |= IEEE80211_CONF_CHANGE_CHANNEL; } + if (!conf_is_ht(&local->hw.conf)) { + /* + * mac80211.h documents that this is only valid + * when the channel is set to an HT type, and + * that otherwise STATIC is used. + */ + local->hw.conf.smps_mode = IEEE80211_SMPS_STATIC; + } else if (local->hw.conf.smps_mode != local->smps_mode) { + local->hw.conf.smps_mode = local->smps_mode; + changed |= IEEE80211_CONF_CHANGE_SMPS; + } + if (scan_chan) power = chan->max_power; else @@ -297,6 +309,16 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_restart_hw); +static void ieee80211_recalc_smps_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, recalc_smps); + + mutex_lock(&local->iflist_mtx); + ieee80211_recalc_smps(local, NULL); + mutex_unlock(&local->iflist_mtx); +} + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -370,6 +392,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, INIT_WORK(&local->restart_work, ieee80211_restart_work); INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter); + INIT_WORK(&local->recalc_smps, ieee80211_recalc_smps_work); + local->smps_mode = IEEE80211_SMPS_OFF; INIT_WORK(&local->dynamic_ps_enable_work, ieee80211_dynamic_ps_enable_work); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index cd5dcc3d8c2b..0a762a9ba4df 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -398,6 +398,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, __le16 tmp; u32 flags = local->hw.conf.channel->flags; + /* determine capability flags */ + switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: if (flags & IEEE80211_CHAN_NO_HT40PLUS) { @@ -413,17 +415,64 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, break; } - tmp = cpu_to_le16(cap); - pos = skb_put(skb, sizeof(struct ieee80211_ht_cap)+2); + /* set SM PS mode properly */ + cap &= ~IEEE80211_HT_CAP_SM_PS; + /* new association always uses requested smps mode */ + if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) { + if (ifmgd->powersave) + ifmgd->ap_smps = IEEE80211_SMPS_DYNAMIC; + else + ifmgd->ap_smps = IEEE80211_SMPS_OFF; + } else + ifmgd->ap_smps = ifmgd->req_smps; + + switch (ifmgd->ap_smps) { + case IEEE80211_SMPS_AUTOMATIC: + case IEEE80211_SMPS_NUM_MODES: + WARN_ON(1); + case IEEE80211_SMPS_OFF: + cap |= WLAN_HT_CAP_SM_PS_DISABLED << + IEEE80211_HT_CAP_SM_PS_SHIFT; + break; + case IEEE80211_SMPS_STATIC: + cap |= WLAN_HT_CAP_SM_PS_STATIC << + IEEE80211_HT_CAP_SM_PS_SHIFT; + break; + case IEEE80211_SMPS_DYNAMIC: + cap |= WLAN_HT_CAP_SM_PS_DYNAMIC << + IEEE80211_HT_CAP_SM_PS_SHIFT; + break; + } + + /* reserve and fill IE */ + + pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2); *pos++ = WLAN_EID_HT_CAPABILITY; *pos++ = sizeof(struct ieee80211_ht_cap); memset(pos, 0, sizeof(struct ieee80211_ht_cap)); + + /* capability flags */ + tmp = cpu_to_le16(cap); memcpy(pos, &tmp, sizeof(u16)); pos += sizeof(u16); - /* TODO: needs a define here for << 2 */ + + /* AMPDU parameters */ *pos++ = sband->ht_cap.ampdu_factor | - (sband->ht_cap.ampdu_density << 2); + (sband->ht_cap.ampdu_density << + IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT); + + /* MCS set */ memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs)); + pos += sizeof(sband->ht_cap.mcs); + + /* extended capabilities */ + pos += sizeof(__le16); + + /* BF capabilities */ + pos += sizeof(__le32); + + /* antenna selection */ + pos += sizeof(u8); } IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; @@ -932,6 +981,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->iflist_mtx); ieee80211_recalc_ps(local, -1); + ieee80211_recalc_smps(local, sdata); mutex_unlock(&local->iflist_mtx); netif_start_queue(sdata->dev); @@ -2327,6 +2377,11 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ifmgd->flags |= IEEE80211_STA_WMM_ENABLED; mutex_init(&ifmgd->mtx); + + if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS) + ifmgd->req_smps = IEEE80211_SMPS_AUTOMATIC; + else + ifmgd->req_smps = IEEE80211_SMPS_OFF; } /* scan finished notification */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index b4608f11a40f..0c0850d37dda 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -134,6 +134,40 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, dev_kfree_skb(skb); } +static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (void *) skb->data; + struct ieee80211_local *local = sta->local; + struct ieee80211_sub_if_data *sdata = sta->sdata; + + if (ieee80211_is_action(mgmt->frame_control) && + sdata->vif.type == NL80211_IFTYPE_STATION && + mgmt->u.action.category == WLAN_CATEGORY_HT && + mgmt->u.action.u.ht_smps.action == WLAN_HT_ACTION_SMPS) { + /* + * This update looks racy, but isn't -- if we come + * here we've definitely got a station that we're + * talking to, and on a managed interface that can + * only be the AP. And the only other place updating + * this variable is before we're associated. + */ + switch (mgmt->u.action.u.ht_smps.smps_control) { + case WLAN_HT_SMPS_CONTROL_DYNAMIC: + sta->sdata->u.mgd.ap_smps = IEEE80211_SMPS_DYNAMIC; + break; + case WLAN_HT_SMPS_CONTROL_STATIC: + sta->sdata->u.mgd.ap_smps = IEEE80211_SMPS_STATIC; + break; + case WLAN_HT_SMPS_CONTROL_DISABLED: + default: /* shouldn't happen since we don't send that */ + sta->sdata->u.mgd.ap_smps = IEEE80211_SMPS_OFF; + break; + } + + ieee80211_queue_work(&local->hw, &local->recalc_smps); + } +} + void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) { struct sk_buff *skb2; @@ -210,6 +244,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) rate_control_tx_status(local, sband, sta, skb); if (ieee80211_vif_is_mesh(&sta->sdata->vif)) ieee80211s_update_metric(local, sta, skb); + + if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && + (info->flags & IEEE80211_TX_STAT_ACK)) + ieee80211_frame_acked(sta, skb); } rcu_read_unlock(); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d54dbe8e09ba..086ef6257b4b 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1170,3 +1170,77 @@ int ieee80211_reconfig(struct ieee80211_local *local) return 0; } +static int check_mgd_smps(struct ieee80211_if_managed *ifmgd, + enum ieee80211_smps_mode *smps_mode) +{ + if (ifmgd->associated) { + *smps_mode = ifmgd->ap_smps; + + if (*smps_mode == IEEE80211_SMPS_AUTOMATIC) { + if (ifmgd->powersave) + *smps_mode = IEEE80211_SMPS_DYNAMIC; + else + *smps_mode = IEEE80211_SMPS_OFF; + } + + return 1; + } + + return 0; +} + +/* must hold iflist_mtx */ +void ieee80211_recalc_smps(struct ieee80211_local *local, + struct ieee80211_sub_if_data *forsdata) +{ + struct ieee80211_sub_if_data *sdata; + enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_OFF; + int count = 0; + + if (forsdata) + WARN_ON(!mutex_is_locked(&forsdata->u.mgd.mtx)); + + WARN_ON(!mutex_is_locked(&local->iflist_mtx)); + + /* + * This function could be improved to handle multiple + * interfaces better, but right now it makes any + * non-station interfaces force SM PS to be turned + * off. If there are multiple station interfaces it + * could also use the best possible mode, e.g. if + * one is in static and the other in dynamic then + * dynamic is ok. + */ + + list_for_each_entry(sdata, &local->interfaces, list) { + if (!netif_running(sdata->dev)) + continue; + if (sdata->vif.type != NL80211_IFTYPE_STATION) + goto set; + if (sdata != forsdata) { + /* + * This nested is ok -- we are holding the iflist_mtx + * so can't get here twice or so. But it's required + * since normally we acquire it first and then the + * iflist_mtx. + */ + mutex_lock_nested(&sdata->u.mgd.mtx, SINGLE_DEPTH_NESTING); + count += check_mgd_smps(&sdata->u.mgd, &smps_mode); + mutex_unlock(&sdata->u.mgd.mtx); + } else + count += check_mgd_smps(&sdata->u.mgd, &smps_mode); + + if (count > 1) { + smps_mode = IEEE80211_SMPS_OFF; + break; + } + } + + if (smps_mode == local->smps_mode) + return; + + set: + local->smps_mode = smps_mode; + /* changed flag is auto-detected for this */ + ieee80211_hw_config(local, 0); +} -- cgit v1.2.3 From 12d50c46dc0f7fd2e625c4befaa5fa5740a7a594 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Tue, 8 Dec 2009 22:26:13 +0000 Subject: tcp: Remove check in __tcp_push_pending_frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tcp_push checks tcp_send_head and calls __tcp_push_pending_frames, which again checks tcp_send_head, and this unnecessary check is done for every other caller of __tcp_push_pending_frames. Remove tcp_send_head check in __tcp_push_pending_frames and add the check to tcp_push_pending_frames. Other functions call __tcp_push_pending_frames only when tcp_send_head would evaluate to true. Signed-off-by: Krishna Kumar Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 16 +++++++++------- net/ipv4/tcp_output.c | 5 ----- 2 files changed, 9 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 34f5cc24d903..185e22baecb1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -856,13 +856,6 @@ static inline void tcp_check_probe_timer(struct sock *sk) icsk->icsk_rto, TCP_RTO_MAX); } -static inline void tcp_push_pending_frames(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle); -} - static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) { tp->snd_wl1 = seq; @@ -1342,6 +1335,15 @@ static inline int tcp_write_queue_empty(struct sock *sk) return skb_queue_empty(&sk->sk_write_queue); } +static inline void tcp_push_pending_frames(struct sock *sk) +{ + if (tcp_send_head(sk)) { + struct tcp_sock *tp = tcp_sk(sk); + + __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle); + } +} + /* Start sequence of the highest skb with SACKed bit, valid only if * sacked > 0 or when the caller has ensured validity by itself. */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 383ce237640f..12b2af36eab8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1794,11 +1794,6 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle) { - struct sk_buff *skb = tcp_send_head(sk); - - if (!skb) - return; - /* If we are closed, the bytes will have to remain here. * In time closedown will finish, we empty the write queue and * all will be happy. -- cgit v1.2.3 From 31d12926e37291970dd4f6e9940df3897766a81d Mon Sep 17 00:00:00 2001 From: laurent chavey Date: Tue, 15 Dec 2009 11:15:28 +0000 Subject: net: Add rtnetlink init_rcvwnd to set the TCP initial receive window Add rtnetlink init_rcvwnd to set the TCP initial receive window size advertised by passive and active TCP connections. The current Linux TCP implementation limits the advertised TCP initial receive window to the one prescribed by slow start. For short lived TCP connections used for transaction type of traffic (i.e. http requests), bounding the advertised TCP initial receive window results in increased latency to complete the transaction. Support for setting initial congestion window is already supported using rtnetlink init_cwnd, but the feature is useless without the ability to set a larger TCP initial receive window. The rtnetlink init_rcvwnd allows increasing the TCP initial receive window, allowing TCP connection to advertise larger TCP receive window than the ones bounded by slow start. Signed-off-by: Laurent Chavey Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 ++ include/net/dst.h | 2 -- include/net/tcp.h | 3 ++- net/ipv4/syncookies.c | 3 ++- net/ipv4/tcp_output.c | 17 +++++++++++++---- net/ipv6/syncookies.c | 3 ++- 6 files changed, 21 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 05330fc5b436..9590364fe8b5 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -362,6 +362,8 @@ enum { #define RTAX_FEATURES RTAX_FEATURES RTAX_RTO_MIN, #define RTAX_RTO_MIN RTAX_RTO_MIN + RTAX_INITRWND, +#define RTAX_INITRWND RTAX_INITRWND __RTAX_MAX }; diff --git a/include/net/dst.h b/include/net/dst.h index 39c4a5963e12..ce078cda6b74 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -83,8 +83,6 @@ struct dst_entry { * (L1_CACHE_SIZE would be too much) */ #ifdef CONFIG_64BIT - long __pad_to_align_refcnt[2]; -#else long __pad_to_align_refcnt[1]; #endif /* diff --git a/include/net/tcp.h b/include/net/tcp.h index 185e22baecb1..788c99f98597 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -965,7 +965,8 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) /* Determine a window scaling and initial window to offer. */ extern void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, - int wscale_ok, __u8 *rcv_wscale); + int wscale_ok, __u8 *rcv_wscale, + __u32 init_rcv_wnd); static inline int tcp_win_from_space(int space) { diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 66fd80ef2473..5c24db4a3c91 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -358,7 +358,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, tcp_select_initial_window(tcp_full_space(sk), req->mss, &req->rcv_wnd, &req->window_clamp, - ireq->wscale_ok, &rcv_wscale); + ireq->wscale_ok, &rcv_wscale, + dst_metric(&rt->u.dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 12b2af36eab8..4a1605d3f909 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -183,7 +183,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) */ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, - int wscale_ok, __u8 *rcv_wscale) + int wscale_ok, __u8 *rcv_wscale, + __u32 init_rcv_wnd) { unsigned int space = (__space < 0 ? 0 : __space); @@ -232,7 +233,13 @@ void tcp_select_initial_window(int __space, __u32 mss, init_cwnd = 2; else if (mss > 1460) init_cwnd = 3; - if (*rcv_wnd > init_cwnd * mss) + /* when initializing use the value from init_rcv_wnd + * rather than the default from above + */ + if (init_rcv_wnd && + (*rcv_wnd > init_rcv_wnd * mss)) + *rcv_wnd = init_rcv_wnd * mss; + else if (*rcv_wnd > init_cwnd * mss) *rcv_wnd = init_cwnd * mss; } @@ -2417,7 +2424,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, &req->rcv_wnd, &req->window_clamp, ireq->wscale_ok, - &rcv_wscale); + &rcv_wscale, + dst_metric(dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; } @@ -2544,7 +2552,8 @@ static void tcp_connect_init(struct sock *sk) &tp->rcv_wnd, &tp->window_clamp, sysctl_tcp_window_scaling, - &rcv_wscale); + &rcv_wscale, + dst_metric(dst, RTAX_INITRWND)); tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 7208a06576c6..34d1f0690d7e 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -269,7 +269,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); tcp_select_initial_window(tcp_full_space(sk), req->mss, &req->rcv_wnd, &req->window_clamp, - ireq->wscale_ok, &rcv_wscale); + ireq->wscale_ok, &rcv_wscale, + dst_metric(dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; -- cgit v1.2.3 From e5cd6fe391aa8c93560bb7ffdfe334cf4d0a02e4 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Sat, 26 Dec 2009 11:51:00 +0000 Subject: llc: add support for LLC_OPT_PKTINFO Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/linux/llc.h | 7 +++++++ include/net/llc_conn.h | 1 + net/llc/af_llc.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) (limited to 'include/net') diff --git a/include/linux/llc.h b/include/linux/llc.h index 7733585603f1..ad7074ba81af 100644 --- a/include/linux/llc.h +++ b/include/linux/llc.h @@ -36,6 +36,7 @@ enum llc_sockopts { LLC_OPT_BUSY_TMR_EXP, /* busy state expire time (secs). */ LLC_OPT_TX_WIN, /* tx window size. */ LLC_OPT_RX_WIN, /* rx window size. */ + LLC_OPT_PKTINFO, /* ancillary packet information. */ LLC_OPT_MAX }; @@ -70,6 +71,12 @@ enum llc_sockopts { #define LLC_SAP_RM 0xD4 /* Resource Management */ #define LLC_SAP_GLOBAL 0xFF /* Global SAP. */ +struct llc_pktinfo { + int lpi_ifindex; + unsigned char lpi_sap; + unsigned char lpi_mac[IFHWADDRLEN]; +}; + #ifdef __KERNEL__ #define LLC_SAP_DYN_START 0xC0 #define LLC_SAP_DYN_STOP 0xDE diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index e2374e34989f..fe982fd94c4a 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -76,6 +76,7 @@ struct llc_sock { u32 rx_pdu_hdr; /* used for saving header of last pdu received and caused sending FRMR. Used for resending FRMR */ + u32 cmsg_flags; }; static inline struct llc_sock *llc_sk(const struct sock *sk) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 3a66546cad06..ac691fe08076 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -47,6 +47,10 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout); #define dprintk(args...) #endif +/* Maybe we'll add some more in the future. */ +#define LLC_CMSG_PKTINFO 1 + + /** * llc_ui_next_link_no - return the next unused link number for a sap * @sap: Address of sap to get link number from. @@ -591,6 +595,20 @@ static int llc_wait_data(struct sock *sk, long timeo) return rc; } +static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) +{ + struct llc_sock *llc = llc_sk(skb->sk); + + if (llc->cmsg_flags & LLC_CMSG_PKTINFO) { + struct llc_pktinfo info; + + info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex; + llc_pdu_decode_dsap(skb, &info.lpi_sap); + llc_pdu_decode_da(skb, info.lpi_mac); + put_cmsg(msg, SOL_LLC, LLC_OPT_PKTINFO, sizeof(info), &info); + } +} + /** * llc_ui_accept - accept a new incoming connection. * @sock: Socket which connections arrive on. @@ -812,6 +830,8 @@ copy_uaddr: memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr)); msg->msg_namelen = sizeof(*uaddr); } + if (llc_sk(sk)->cmsg_flags) + llc_cmsg_rcv(msg, skb); goto out; } @@ -1030,6 +1050,12 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, goto out; llc->rw = opt; break; + case LLC_OPT_PKTINFO: + if (opt) + llc->cmsg_flags |= LLC_CMSG_PKTINFO; + else + llc->cmsg_flags &= ~LLC_CMSG_PKTINFO; + break; default: rc = -ENOPROTOOPT; goto out; @@ -1083,6 +1109,9 @@ static int llc_ui_getsockopt(struct socket *sock, int level, int optname, val = llc->k; break; case LLC_OPT_RX_WIN: val = llc->rw; break; + case LLC_OPT_PKTINFO: + val = (llc->cmsg_flags & LLC_CMSG_PKTINFO) != 0; + break; default: rc = -ENOPROTOOPT; goto out; -- cgit v1.2.3 From b76f5a8427ac2928c07fa4ff2144bb8db072c240 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Sat, 26 Dec 2009 11:51:02 +0000 Subject: llc: convert the socket list to RCU locking For the reclamation phase we use the SLAB_DESTROY_BY_RCU mechanism, which require some extra checks in the lookup code: a) If the current socket was released, reallocated & inserted in another list it will short circuit the iteration for the current list, thus we need to restart the lookup. b) If the current socket was released, reallocated & inserted in the same list we just need to recheck it matches the look-up criteria and if not we can skip to the next element. In this case there is no need to restart the lookup, since sockets are inserted at the start of the list and the worst that will happen is that we will iterate throught some of the list elements more then once. Note that the /proc and multicast delivery was not yet converted to RCU, it still uses spinlocks for protection. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/llc.h | 7 ++-- net/llc/af_llc.c | 1 + net/llc/llc_conn.c | 93 ++++++++++++++++++++++++++++++++++++------------------ net/llc/llc_core.c | 5 +-- net/llc/llc_proc.c | 22 ++++++------- net/llc/llc_sap.c | 66 ++++++++++++++++++++++++-------------- 6 files changed, 123 insertions(+), 71 deletions(-) (limited to 'include/net') diff --git a/include/net/llc.h b/include/net/llc.h index 7940da1606e7..1559cf10e874 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -53,10 +54,8 @@ struct llc_sap { struct net_device *orig_dev); struct llc_addr laddr; struct list_head node; - struct { - rwlock_t lock; - struct hlist_head list; - } sk_list; + spinlock_t sk_lock; + struct hlist_nulls_head sk_list; }; #define LLC_DEST_INVALID 0 /* Invalid LLC PDU type */ diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index c4d1a1da813c..f49f3dd6fbd3 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -140,6 +140,7 @@ static struct proto llc_proto = { .name = "LLC", .owner = THIS_MODULE, .obj_size = sizeof(struct llc_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, }; /** diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index c6bab39b018e..77bb3816655e 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -468,6 +468,19 @@ static int llc_exec_conn_trans_actions(struct sock *sk, return rc; } +static inline bool llc_estab_match(const struct llc_sap *sap, + const struct llc_addr *daddr, + const struct llc_addr *laddr, + const struct sock *sk) +{ + struct llc_sock *llc = llc_sk(sk); + + return llc->laddr.lsap == laddr->lsap && + llc->daddr.lsap == daddr->lsap && + llc_mac_match(llc->laddr.mac, laddr->mac) && + llc_mac_match(llc->daddr.mac, daddr->mac); +} + /** * __llc_lookup_established - Finds connection for the remote/local sap/mac * @sap: SAP @@ -484,23 +497,26 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap, struct llc_addr *laddr) { struct sock *rc; - struct hlist_node *node; - - read_lock(&sap->sk_list.lock); - sk_for_each(rc, node, &sap->sk_list.list) { - struct llc_sock *llc = llc_sk(rc); - - if (llc->laddr.lsap == laddr->lsap && - llc->daddr.lsap == daddr->lsap && - llc_mac_match(llc->laddr.mac, laddr->mac) && - llc_mac_match(llc->daddr.mac, daddr->mac)) { - sock_hold(rc); + struct hlist_nulls_node *node; + + rcu_read_lock(); +again: + sk_nulls_for_each_rcu(rc, node, &sap->sk_list) { + if (llc_estab_match(sap, daddr, laddr, rc)) { + /* Extra checks required by SLAB_DESTROY_BY_RCU */ + if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) + goto again; + if (unlikely(llc_sk(rc)->sap != sap || + !llc_estab_match(sap, daddr, laddr, rc))) { + sock_put(rc); + continue; + } goto found; } } rc = NULL; found: - read_unlock(&sap->sk_list.lock); + rcu_read_unlock(); return rc; } @@ -516,6 +532,18 @@ struct sock *llc_lookup_established(struct llc_sap *sap, return sk; } +static inline bool llc_listener_match(const struct llc_sap *sap, + const struct llc_addr *laddr, + const struct sock *sk) +{ + struct llc_sock *llc = llc_sk(sk); + + return sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN && + llc->laddr.lsap == laddr->lsap && + (llc_mac_match(llc->laddr.mac, laddr->mac) || + llc_mac_null(llc->laddr.mac)); +} + /** * llc_lookup_listener - Finds listener for local MAC + SAP * @sap: SAP @@ -530,23 +558,26 @@ static struct sock *llc_lookup_listener(struct llc_sap *sap, struct llc_addr *laddr) { struct sock *rc; - struct hlist_node *node; - - read_lock(&sap->sk_list.lock); - sk_for_each(rc, node, &sap->sk_list.list) { - struct llc_sock *llc = llc_sk(rc); - - if (rc->sk_type == SOCK_STREAM && rc->sk_state == TCP_LISTEN && - llc->laddr.lsap == laddr->lsap && - (llc_mac_match(llc->laddr.mac, laddr->mac) || - llc_mac_null(llc->laddr.mac))) { - sock_hold(rc); + struct hlist_nulls_node *node; + + rcu_read_lock(); +again: + sk_nulls_for_each_rcu(rc, node, &sap->sk_list) { + if (llc_listener_match(sap, laddr, rc)) { + /* Extra checks required by SLAB_DESTROY_BY_RCU */ + if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) + goto again; + if (unlikely(llc_sk(rc)->sap != sap || + !llc_listener_match(sap, laddr, rc))) { + sock_put(rc); + continue; + } goto found; } } rc = NULL; found: - read_unlock(&sap->sk_list.lock); + rcu_read_unlock(); return rc; } @@ -652,10 +683,10 @@ static int llc_find_offset(int state, int ev_type) void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk) { llc_sap_hold(sap); - write_lock_bh(&sap->sk_list.lock); + spin_lock_bh(&sap->sk_lock); llc_sk(sk)->sap = sap; - sk_add_node(sk, &sap->sk_list.list); - write_unlock_bh(&sap->sk_list.lock); + sk_nulls_add_node_rcu(sk, &sap->sk_list); + spin_unlock_bh(&sap->sk_lock); } /** @@ -663,14 +694,14 @@ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk) * @sap: SAP * @sk: socket * - * This function removes a connection from sk_list.list of a SAP if + * This function removes a connection from sk_list of a SAP if * the connection was in this list. */ void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk) { - write_lock_bh(&sap->sk_list.lock); - sk_del_node_init(sk); - write_unlock_bh(&sap->sk_list.lock); + spin_lock_bh(&sap->sk_lock); + sk_nulls_del_node_init_rcu(sk); + spin_unlock_bh(&sap->sk_lock); llc_sap_put(sap); } diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index ff4c0ab96a69..5276b9722077 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -37,7 +37,8 @@ static struct llc_sap *llc_sap_alloc(void) if (sap) { /* sap->laddr.mac - leave as a null, it's filled by bind */ sap->state = LLC_SAP_STATE_ACTIVE; - rwlock_init(&sap->sk_list.lock); + spin_lock_init(&sap->sk_lock); + INIT_HLIST_NULLS_HEAD(&sap->sk_list, 0); atomic_set(&sap->refcnt, 1); } return sap; @@ -142,7 +143,7 @@ out: */ void llc_sap_close(struct llc_sap *sap) { - WARN_ON(!hlist_empty(&sap->sk_list.list)); + WARN_ON(!hlist_nulls_empty(&sap->sk_list)); llc_del_sap(sap); kfree(sap); } diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index be47ac427f6b..6b3d033b3236 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -34,19 +34,19 @@ static struct sock *llc_get_sk_idx(loff_t pos) { struct list_head *sap_entry; struct llc_sap *sap; - struct hlist_node *node; + struct hlist_nulls_node *node; struct sock *sk = NULL; list_for_each(sap_entry, &llc_sap_list) { sap = list_entry(sap_entry, struct llc_sap, node); - read_lock_bh(&sap->sk_list.lock); - sk_for_each(sk, node, &sap->sk_list.list) { + spin_lock_bh(&sap->sk_lock); + sk_nulls_for_each(sk, node, &sap->sk_list) { if (!pos) goto found; --pos; } - read_unlock_bh(&sap->sk_list.lock); + spin_unlock_bh(&sap->sk_lock); } sk = NULL; found: @@ -73,25 +73,25 @@ static void *llc_seq_next(struct seq_file *seq, void *v, loff_t *pos) goto out; } sk = v; - next = sk_next(sk); + next = sk_nulls_next(sk); if (next) { sk = next; goto out; } llc = llc_sk(sk); sap = llc->sap; - read_unlock_bh(&sap->sk_list.lock); + spin_unlock_bh(&sap->sk_lock); sk = NULL; for (;;) { if (sap->node.next == &llc_sap_list) break; sap = list_entry(sap->node.next, struct llc_sap, node); - read_lock_bh(&sap->sk_list.lock); - if (!hlist_empty(&sap->sk_list.list)) { - sk = sk_head(&sap->sk_list.list); + spin_lock_bh(&sap->sk_lock); + if (!hlist_nulls_empty(&sap->sk_list)) { + sk = sk_nulls_head(&sap->sk_list); break; } - read_unlock_bh(&sap->sk_list.lock); + spin_unlock_bh(&sap->sk_lock); } out: return sk; @@ -104,7 +104,7 @@ static void llc_seq_stop(struct seq_file *seq, void *v) struct llc_sock *llc = llc_sk(sk); struct llc_sap *sap = llc->sap; - read_unlock_bh(&sap->sk_list.lock); + spin_unlock_bh(&sap->sk_lock); } read_unlock_bh(&llc_sap_list_lock); } diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 008de1fc42ca..39760d013ce2 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -297,6 +297,17 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb, llc_sap_state_process(sap, skb); } +static inline bool llc_dgram_match(const struct llc_sap *sap, + const struct llc_addr *laddr, + const struct sock *sk) +{ + struct llc_sock *llc = llc_sk(sk); + + return sk->sk_type == SOCK_DGRAM && + llc->laddr.lsap == laddr->lsap && + llc_mac_match(llc->laddr.mac, laddr->mac); +} + /** * llc_lookup_dgram - Finds dgram socket for the local sap/mac * @sap: SAP @@ -309,25 +320,41 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap, const struct llc_addr *laddr) { struct sock *rc; - struct hlist_node *node; - - read_lock_bh(&sap->sk_list.lock); - sk_for_each(rc, node, &sap->sk_list.list) { - struct llc_sock *llc = llc_sk(rc); - - if (rc->sk_type == SOCK_DGRAM && - llc->laddr.lsap == laddr->lsap && - llc_mac_match(llc->laddr.mac, laddr->mac)) { - sock_hold(rc); + struct hlist_nulls_node *node; + + rcu_read_lock_bh(); +again: + sk_nulls_for_each_rcu(rc, node, &sap->sk_list) { + if (llc_dgram_match(sap, laddr, rc)) { + /* Extra checks required by SLAB_DESTROY_BY_RCU */ + if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) + goto again; + if (unlikely(llc_sk(rc)->sap != sap || + !llc_dgram_match(sap, laddr, rc))) { + sock_put(rc); + continue; + } goto found; } } rc = NULL; found: - read_unlock_bh(&sap->sk_list.lock); + rcu_read_unlock_bh(); return rc; } +static inline bool llc_mcast_match(const struct llc_sap *sap, + const struct llc_addr *laddr, + const struct sk_buff *skb, + const struct sock *sk) +{ + struct llc_sock *llc = llc_sk(sk); + + return sk->sk_type == SOCK_DGRAM && + llc->laddr.lsap == laddr->lsap && + llc->dev == skb->dev; +} + /** * llc_sap_mcast - Deliver multicast PDU's to all matching datagram sockets. * @sap: SAP @@ -341,20 +368,13 @@ static void llc_sap_mcast(struct llc_sap *sap, struct sk_buff *skb) { struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; - read_lock_bh(&sap->sk_list.lock); - sk_for_each(sk, node, &sap->sk_list.list) { - struct llc_sock *llc = llc_sk(sk); + spin_lock_bh(&sap->sk_lock); + sk_nulls_for_each_rcu(sk, node, &sap->sk_list) { struct sk_buff *skb1; - if (sk->sk_type != SOCK_DGRAM) - continue; - - if (llc->laddr.lsap != laddr->lsap) - continue; - - if (llc->dev != skb->dev) + if (!llc_mcast_match(sap, laddr, skb, sk)) continue; skb1 = skb_clone(skb, GFP_ATOMIC); @@ -365,7 +385,7 @@ static void llc_sap_mcast(struct llc_sap *sap, llc_sap_rcv(sap, skb1, sk); sock_put(sk); } - read_unlock_bh(&sap->sk_list.lock); + spin_unlock_bh(&sap->sk_lock); } -- cgit v1.2.3 From 6d2e3ea284463d5ab34e9cf2a41d0b8627b95d02 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Sat, 26 Dec 2009 11:51:04 +0000 Subject: llc: use a device based hash table to speed up multicast delivery This patch adds a per SAP device based hash table to solve the multicast delivery scalability issue when we have large number of interfaces and a large number of sockets bound to the same SAP. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/llc.h | 11 +++++++++++ include/net/llc_conn.h | 1 + net/llc/llc_conn.c | 10 +++++++++- net/llc/llc_sap.c | 8 ++++++-- 4 files changed, 27 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/llc.h b/include/net/llc.h index 1559cf10e874..dbef5917905b 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -32,6 +32,9 @@ struct llc_addr { #define LLC_SAP_STATE_INACTIVE 1 #define LLC_SAP_STATE_ACTIVE 2 +#define LLC_SK_DEV_HASH_BITS 6 +#define LLC_SK_DEV_HASH_ENTRIES (1<sk_dev_hash[ifindex % LLC_SK_DEV_HASH_ENTRIES]; +} + + #define LLC_DEST_INVALID 0 /* Invalid LLC PDU type */ #define LLC_DEST_SAP 1 /* Type 1 goes here */ #define LLC_DEST_CONN 2 /* Type 2 goes here */ diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index fe982fd94c4a..2f97d8ddce92 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -77,6 +77,7 @@ struct llc_sock { received and caused sending FRMR. Used for resending FRMR */ u32 cmsg_flags; + struct hlist_node dev_hash_node; }; static inline struct llc_sock *llc_sk(const struct sock *sk) diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 77bb3816655e..10cdfe2db830 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -682,10 +682,15 @@ static int llc_find_offset(int state, int ev_type) */ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk) { + struct llc_sock *llc = llc_sk(sk); + struct hlist_head *dev_hb = llc_sk_dev_hash(sap, llc->dev->ifindex); + llc_sap_hold(sap); - spin_lock_bh(&sap->sk_lock); llc_sk(sk)->sap = sap; + + spin_lock_bh(&sap->sk_lock); sk_nulls_add_node_rcu(sk, &sap->sk_list); + hlist_add_head(&llc->dev_hash_node, dev_hb); spin_unlock_bh(&sap->sk_lock); } @@ -699,8 +704,11 @@ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk) */ void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk) { + struct llc_sock *llc = llc_sk(sk); + spin_lock_bh(&sap->sk_lock); sk_nulls_del_node_init_rcu(sk); + hlist_del(&llc->dev_hash_node); spin_unlock_bh(&sap->sk_lock); llc_sap_put(sap); } diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 94790e60d072..94cb706f6cc4 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -387,10 +387,14 @@ static void llc_sap_mcast(struct llc_sap *sap, { int i = 0, count = 256 / sizeof(struct sock *); struct sock *sk, *stack[count]; - struct hlist_nulls_node *node; + struct hlist_node *node; + struct llc_sock *llc; + struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex); spin_lock_bh(&sap->sk_lock); - sk_nulls_for_each_rcu(sk, node, &sap->sk_list) { + hlist_for_each_entry(llc, node, dev_hb, dev_hash_node) { + + sk = &llc->sk; if (!llc_mcast_match(sap, laddr, skb, sk)) continue; -- cgit v1.2.3 From 52d58aef5ee460fedd7f250f05e79081019f2c79 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Sat, 26 Dec 2009 11:51:05 +0000 Subject: llc: replace the socket list with a local address based hash For the cases where a lot of interfaces are used in conjunction with a lot of LLC sockets bound to the same SAP, the iteration of the socket list becomes prohibitively expensive. Replacing the list with a a local address based hash significantly improves the bind and listener lookup operations as well as the datagram delivery. Connected sockets delivery is also improved, but this patch does not address the case where we have lots of sockets with the same local address connected to different remote addresses. In order to keep the socket sanity checks alive and fast a socket counter was added to the SAP structure. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/llc.h | 21 +++++++++++++++- net/llc/llc_conn.c | 70 +++++++++++++++++++++++++++++++++++++++--------------- net/llc/llc_core.c | 6 +++-- net/llc/llc_proc.c | 44 +++++++++++++++++++++++----------- net/llc/llc_sap.c | 11 ++++++++- 5 files changed, 115 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/llc.h b/include/net/llc.h index dbef5917905b..709a9b37e239 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include @@ -35,6 +37,9 @@ struct llc_addr { #define LLC_SK_DEV_HASH_BITS 6 #define LLC_SK_DEV_HASH_ENTRIES (1<sk_dev_hash[ifindex % LLC_SK_DEV_HASH_ENTRIES]; } +static inline +u32 llc_sk_laddr_hashfn(struct llc_sap *sap, const struct llc_addr *laddr) +{ + return hash_32(jhash(laddr->mac, sizeof(laddr->mac), 0), + LLC_SK_LADDR_HASH_BITS); +} + +static inline +struct hlist_nulls_head *llc_sk_laddr_hash(struct llc_sap *sap, + const struct llc_addr *laddr) +{ + return &sap->sk_laddr_hash[llc_sk_laddr_hashfn(sap, laddr)]; +} #define LLC_DEST_INVALID 0 /* Invalid LLC PDU type */ #define LLC_DEST_SAP 1 /* Type 1 goes here */ diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 10cdfe2db830..a8dde9b010da 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -498,10 +498,12 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap, { struct sock *rc; struct hlist_nulls_node *node; + int slot = llc_sk_laddr_hashfn(sap, laddr); + struct hlist_nulls_head *laddr_hb = &sap->sk_laddr_hash[slot]; rcu_read_lock(); again: - sk_nulls_for_each_rcu(rc, node, &sap->sk_list) { + sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_estab_match(sap, daddr, laddr, rc)) { /* Extra checks required by SLAB_DESTROY_BY_RCU */ if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) @@ -515,6 +517,13 @@ again: } } rc = NULL; + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (unlikely(get_nulls_value(node) != slot)) + goto again; found: rcu_read_unlock(); return rc; @@ -540,29 +549,20 @@ static inline bool llc_listener_match(const struct llc_sap *sap, return sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN && llc->laddr.lsap == laddr->lsap && - (llc_mac_match(llc->laddr.mac, laddr->mac) || - llc_mac_null(llc->laddr.mac)); + llc_mac_match(llc->laddr.mac, laddr->mac); } -/** - * llc_lookup_listener - Finds listener for local MAC + SAP - * @sap: SAP - * @laddr: address of local LLC (MAC + SAP) - * - * Search connection list of the SAP and finds connection listening on - * local mac, and local sap. Returns pointer for parent socket found, - * %NULL otherwise. - * Caller has to make sure local_bh is disabled. - */ -static struct sock *llc_lookup_listener(struct llc_sap *sap, - struct llc_addr *laddr) +static struct sock *__llc_lookup_listener(struct llc_sap *sap, + struct llc_addr *laddr) { struct sock *rc; struct hlist_nulls_node *node; + int slot = llc_sk_laddr_hashfn(sap, laddr); + struct hlist_nulls_head *laddr_hb = &sap->sk_laddr_hash[slot]; rcu_read_lock(); again: - sk_nulls_for_each_rcu(rc, node, &sap->sk_list) { + sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_listener_match(sap, laddr, rc)) { /* Extra checks required by SLAB_DESTROY_BY_RCU */ if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) @@ -576,11 +576,40 @@ again: } } rc = NULL; + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (unlikely(get_nulls_value(node) != slot)) + goto again; found: rcu_read_unlock(); return rc; } +/** + * llc_lookup_listener - Finds listener for local MAC + SAP + * @sap: SAP + * @laddr: address of local LLC (MAC + SAP) + * + * Search connection list of the SAP and finds connection listening on + * local mac, and local sap. Returns pointer for parent socket found, + * %NULL otherwise. + * Caller has to make sure local_bh is disabled. + */ +static struct sock *llc_lookup_listener(struct llc_sap *sap, + struct llc_addr *laddr) +{ + static struct llc_addr null_addr; + struct sock *rc = __llc_lookup_listener(sap, laddr); + + if (!rc) + rc = __llc_lookup_listener(sap, &null_addr); + + return rc; +} + static struct sock *__llc_lookup(struct llc_sap *sap, struct llc_addr *daddr, struct llc_addr *laddr) @@ -678,18 +707,20 @@ static int llc_find_offset(int state, int ev_type) * @sap: SAP * @sk: socket * - * This function adds a socket to sk_list of a SAP. + * This function adds a socket to the hash tables of a SAP. */ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk) { struct llc_sock *llc = llc_sk(sk); struct hlist_head *dev_hb = llc_sk_dev_hash(sap, llc->dev->ifindex); + struct hlist_nulls_head *laddr_hb = llc_sk_laddr_hash(sap, &llc->laddr); llc_sap_hold(sap); llc_sk(sk)->sap = sap; spin_lock_bh(&sap->sk_lock); - sk_nulls_add_node_rcu(sk, &sap->sk_list); + sap->sk_count++; + sk_nulls_add_node_rcu(sk, laddr_hb); hlist_add_head(&llc->dev_hash_node, dev_hb); spin_unlock_bh(&sap->sk_lock); } @@ -699,7 +730,7 @@ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk) * @sap: SAP * @sk: socket * - * This function removes a connection from sk_list of a SAP if + * This function removes a connection from the hash tables of a SAP if * the connection was in this list. */ void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk) @@ -709,6 +740,7 @@ void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk) spin_lock_bh(&sap->sk_lock); sk_nulls_del_node_init_rcu(sk); hlist_del(&llc->dev_hash_node); + sap->sk_count--; spin_unlock_bh(&sap->sk_lock); llc_sap_put(sap); } diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 5276b9722077..0c9ef8bc7655 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -33,12 +33,14 @@ DEFINE_RWLOCK(llc_sap_list_lock); static struct llc_sap *llc_sap_alloc(void) { struct llc_sap *sap = kzalloc(sizeof(*sap), GFP_ATOMIC); + int i; if (sap) { /* sap->laddr.mac - leave as a null, it's filled by bind */ sap->state = LLC_SAP_STATE_ACTIVE; spin_lock_init(&sap->sk_lock); - INIT_HLIST_NULLS_HEAD(&sap->sk_list, 0); + for (i = 0; i < LLC_SK_LADDR_HASH_ENTRIES; i++) + INIT_HLIST_NULLS_HEAD(&sap->sk_laddr_hash[i], i); atomic_set(&sap->refcnt, 1); } return sap; @@ -143,7 +145,7 @@ out: */ void llc_sap_close(struct llc_sap *sap) { - WARN_ON(!hlist_nulls_empty(&sap->sk_list)); + WARN_ON(sap->sk_count); llc_del_sap(sap); kfree(sap); } diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 6b3d033b3236..09dec6307206 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -34,17 +34,22 @@ static struct sock *llc_get_sk_idx(loff_t pos) { struct list_head *sap_entry; struct llc_sap *sap; - struct hlist_nulls_node *node; struct sock *sk = NULL; + int i; list_for_each(sap_entry, &llc_sap_list) { sap = list_entry(sap_entry, struct llc_sap, node); spin_lock_bh(&sap->sk_lock); - sk_nulls_for_each(sk, node, &sap->sk_list) { - if (!pos) - goto found; - --pos; + for (i = 0; i < LLC_SK_LADDR_HASH_ENTRIES; i++) { + struct hlist_nulls_head *head = &sap->sk_laddr_hash[i]; + struct hlist_nulls_node *node; + + sk_nulls_for_each(sk, node, head) { + if (!pos) + goto found; /* keep the lock */ + --pos; + } } spin_unlock_bh(&sap->sk_lock); } @@ -61,6 +66,19 @@ static void *llc_seq_start(struct seq_file *seq, loff_t *pos) return l ? llc_get_sk_idx(--l) : SEQ_START_TOKEN; } +static struct sock *laddr_hash_next(struct llc_sap *sap, int bucket) +{ + struct hlist_nulls_node *node; + struct sock *sk = NULL; + + while (++bucket < LLC_SK_LADDR_HASH_ENTRIES) + sk_nulls_for_each(sk, node, &sap->sk_laddr_hash[bucket]) + goto out; + +out: + return sk; +} + static void *llc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock* sk, *next; @@ -80,17 +98,15 @@ static void *llc_seq_next(struct seq_file *seq, void *v, loff_t *pos) } llc = llc_sk(sk); sap = llc->sap; + sk = laddr_hash_next(sap, llc_sk_laddr_hashfn(sap, &llc->laddr)); + if (sk) + goto out; spin_unlock_bh(&sap->sk_lock); - sk = NULL; - for (;;) { - if (sap->node.next == &llc_sap_list) - break; - sap = list_entry(sap->node.next, struct llc_sap, node); + list_for_each_entry_continue(sap, &llc_sap_list, node) { spin_lock_bh(&sap->sk_lock); - if (!hlist_nulls_empty(&sap->sk_list)) { - sk = sk_nulls_head(&sap->sk_list); - break; - } + sk = laddr_hash_next(sap, -1); + if (sk) + break; /* keep the lock */ spin_unlock_bh(&sap->sk_lock); } out: diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 94cb706f6cc4..ad6e6e1cf22f 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -321,10 +321,12 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap, { struct sock *rc; struct hlist_nulls_node *node; + int slot = llc_sk_laddr_hashfn(sap, laddr); + struct hlist_nulls_head *laddr_hb = &sap->sk_laddr_hash[slot]; rcu_read_lock_bh(); again: - sk_nulls_for_each_rcu(rc, node, &sap->sk_list) { + sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_dgram_match(sap, laddr, rc)) { /* Extra checks required by SLAB_DESTROY_BY_RCU */ if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) @@ -338,6 +340,13 @@ again: } } rc = NULL; + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (unlikely(get_nulls_value(node) != slot)) + goto again; found: rcu_read_unlock_bh(); return rc; -- cgit v1.2.3 From 8beb9ab6c2df203e8d68cb1f48cf42604a6bed86 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Sat, 26 Dec 2009 11:51:06 +0000 Subject: llc: convert llc_sap_list to RCU Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/llc.h | 2 +- net/llc/llc_core.c | 46 ++++++++++++++-------------------------------- net/llc/llc_proc.c | 11 ++++------- 3 files changed, 19 insertions(+), 40 deletions(-) (limited to 'include/net') diff --git a/include/net/llc.h b/include/net/llc.h index 709a9b37e239..5503b74ab170 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -93,7 +93,7 @@ struct hlist_nulls_head *llc_sk_laddr_hash(struct llc_sap *sap, #define LLC_DEST_CONN 2 /* Type 2 goes here */ extern struct list_head llc_sap_list; -extern rwlock_t llc_sap_list_lock; +extern spinlock_t llc_sap_list_lock; extern int llc_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 0c9ef8bc7655..78167e81dfeb 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -23,7 +23,7 @@ #include LIST_HEAD(llc_sap_list); -DEFINE_RWLOCK(llc_sap_list_lock); +DEFINE_SPINLOCK(llc_sap_list_lock); /** * llc_sap_alloc - allocates and initializes sap. @@ -46,30 +46,6 @@ static struct llc_sap *llc_sap_alloc(void) return sap; } -/** - * llc_add_sap - add sap to station list - * @sap: Address of the sap - * - * Adds a sap to the LLC's station sap list. - */ -static void llc_add_sap(struct llc_sap *sap) -{ - list_add_tail(&sap->node, &llc_sap_list); -} - -/** - * llc_del_sap - del sap from station list - * @sap: Address of the sap - * - * Removes a sap to the LLC's station sap list. - */ -static void llc_del_sap(struct llc_sap *sap) -{ - write_lock_bh(&llc_sap_list_lock); - list_del(&sap->node); - write_unlock_bh(&llc_sap_list_lock); -} - static struct llc_sap *__llc_sap_find(unsigned char sap_value) { struct llc_sap* sap; @@ -93,13 +69,13 @@ out: */ struct llc_sap *llc_sap_find(unsigned char sap_value) { - struct llc_sap* sap; + struct llc_sap *sap; - read_lock_bh(&llc_sap_list_lock); + rcu_read_lock_bh(); sap = __llc_sap_find(sap_value); if (sap) llc_sap_hold(sap); - read_unlock_bh(&llc_sap_list_lock); + rcu_read_unlock_bh(); return sap; } @@ -120,7 +96,7 @@ struct llc_sap *llc_sap_open(unsigned char lsap, { struct llc_sap *sap = NULL; - write_lock_bh(&llc_sap_list_lock); + spin_lock_bh(&llc_sap_list_lock); if (__llc_sap_find(lsap)) /* SAP already exists */ goto out; sap = llc_sap_alloc(); @@ -128,9 +104,9 @@ struct llc_sap *llc_sap_open(unsigned char lsap, goto out; sap->laddr.lsap = lsap; sap->rcv_func = func; - llc_add_sap(sap); + list_add_tail_rcu(&sap->node, &llc_sap_list); out: - write_unlock_bh(&llc_sap_list_lock); + spin_unlock_bh(&llc_sap_list_lock); return sap; } @@ -146,7 +122,13 @@ out: void llc_sap_close(struct llc_sap *sap) { WARN_ON(sap->sk_count); - llc_del_sap(sap); + + spin_lock_bh(&llc_sap_list_lock); + list_del_rcu(&sap->node); + spin_unlock_bh(&llc_sap_list_lock); + + synchronize_rcu(); + kfree(sap); } diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 09dec6307206..7af1ff2d1f19 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -32,14 +32,11 @@ static void llc_ui_format_mac(struct seq_file *seq, u8 *addr) static struct sock *llc_get_sk_idx(loff_t pos) { - struct list_head *sap_entry; struct llc_sap *sap; struct sock *sk = NULL; int i; - list_for_each(sap_entry, &llc_sap_list) { - sap = list_entry(sap_entry, struct llc_sap, node); - + list_for_each_entry_rcu(sap, &llc_sap_list, node) { spin_lock_bh(&sap->sk_lock); for (i = 0; i < LLC_SK_LADDR_HASH_ENTRIES; i++) { struct hlist_nulls_head *head = &sap->sk_laddr_hash[i]; @@ -62,7 +59,7 @@ static void *llc_seq_start(struct seq_file *seq, loff_t *pos) { loff_t l = *pos; - read_lock_bh(&llc_sap_list_lock); + rcu_read_lock_bh(); return l ? llc_get_sk_idx(--l) : SEQ_START_TOKEN; } @@ -102,7 +99,7 @@ static void *llc_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (sk) goto out; spin_unlock_bh(&sap->sk_lock); - list_for_each_entry_continue(sap, &llc_sap_list, node) { + list_for_each_entry_continue_rcu(sap, &llc_sap_list, node) { spin_lock_bh(&sap->sk_lock); sk = laddr_hash_next(sap, -1); if (sk) @@ -122,7 +119,7 @@ static void llc_seq_stop(struct seq_file *seq, void *v) spin_unlock_bh(&sap->sk_lock); } - read_unlock_bh(&llc_sap_list_lock); + rcu_read_unlock_bh(); } static int llc_seq_socket_show(struct seq_file *seq, void *v) -- cgit v1.2.3 From a80f7c0b088187c8471b441d461e937991870661 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 23 Dec 2009 13:15:32 +0100 Subject: mac80211: introduce flush operation We've long lacked a good confirmation that frames have really gone out, e.g. before going off-channel for a scan. Add a flush() operation that drivers can implement to provide that confirmation, and use it in a few places: * before scanning sends the nullfunc frames * after scanning sends the nullfunc frames, if any * when going idle, to send any pending frames Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/mac80211_hwsim.c | 11 +++++++++++ include/net/mac80211.h | 5 +++++ net/mac80211/driver-ops.h | 7 +++++++ net/mac80211/driver-trace.h | 21 +++++++++++++++++++++ net/mac80211/iface.c | 2 ++ net/mac80211/scan.c | 13 +++++++++++-- 6 files changed, 57 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 718a5f198c30..4dee69a38c1d 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -896,6 +896,16 @@ static int mac80211_hwsim_ampdu_action(struct ieee80211_hw *hw, return 0; } +static void mac80211_hwsim_flush(struct ieee80211_hw *hw, bool drop) +{ + /* + * In this special case, there's nothing we need to + * do because hwsim does transmission synchronously. + * In the future, when it does transmissions via + * userspace, we may need to do something. + */ +} + static const struct ieee80211_ops mac80211_hwsim_ops = { @@ -912,6 +922,7 @@ static const struct ieee80211_ops mac80211_hwsim_ops = .conf_tx = mac80211_hwsim_conf_tx, CFG80211_TESTMODE_CMD(mac80211_hwsim_testmode_cmd) .ampdu_action = mac80211_hwsim_ampdu_action, + .flush = mac80211_hwsim_flush, }; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 494ac69ff477..77ea34b03285 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1545,6 +1545,10 @@ enum ieee80211_ampdu_mlme_action { * and need to call wiphy_rfkill_set_hw_state() in the callback. * * @testmode_cmd: Implement a cfg80211 test mode command. + * + * @flush: Flush all pending frames from the hardware queue, making sure + * that the hardware queues are empty. If the parameter @drop is set + * to %true, pending frames may be dropped. */ struct ieee80211_ops { int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); @@ -1601,6 +1605,7 @@ struct ieee80211_ops { #ifdef CONFIG_NL80211_TESTMODE int (*testmode_cmd)(struct ieee80211_hw *hw, void *data, int len); #endif + void (*flush)(struct ieee80211_hw *hw, bool drop); }; /** diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 727e4cf7b8a6..cbe133bcdf34 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -259,4 +259,11 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local) if (local->ops->rfkill_poll) local->ops->rfkill_poll(&local->hw); } + +static inline void drv_flush(struct ieee80211_local *local, bool drop) +{ + trace_drv_flush(local, drop); + if (local->ops->flush) + local->ops->flush(&local->hw, drop); +} #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 7a849b920165..977cc7528bc6 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -690,6 +690,27 @@ TRACE_EVENT(drv_ampdu_action, LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid, __entry->ret ) ); + +TRACE_EVENT(drv_flush, + TP_PROTO(struct ieee80211_local *local, bool drop), + + TP_ARGS(local, drop), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(bool, drop) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->drop = drop; + ), + + TP_printk( + LOCAL_PR_FMT " drop:%d", + LOCAL_PR_ARG, __entry->drop + ) +); #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 1ceca14331d4..389dc8d880f3 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -917,6 +917,8 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local) wiphy_name(local->hw.wiphy)); #endif + drv_flush(local, false); + local->hw.conf.flags |= IEEE80211_CONF_IDLE; return IEEE80211_CONF_CHANGE_IDLE; } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index ae1830056521..d98c45e5528b 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -418,9 +418,10 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) local->next_scan_state = SCAN_DECISION; local->scan_channel_idx = 0; + drv_flush(local, false); + ieee80211_configure_filter(local); - /* TODO: start scan as soon as all nullfunc frames are ACKed */ ieee80211_queue_delayed_work(&local->hw, &local->scan_work, IEEE80211_CHANNEL_TIME); @@ -584,8 +585,16 @@ static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *loca __set_bit(SCAN_OFF_CHANNEL, &local->scanning); + /* + * What if the nullfunc frames didn't arrive? + */ + drv_flush(local, false); + if (local->ops->flush) + *next_delay = 0; + else + *next_delay = HZ / 10; + /* advance to the next channel to be scanned */ - *next_delay = HZ / 10; local->next_scan_state = SCAN_SET_CHANNEL; } -- cgit v1.2.3 From 9588bbd5529461a3dacd435bf239c84c3508f569 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 23 Dec 2009 13:15:41 +0100 Subject: cfg80211: add remain-on-channel command Add new commands for requesting the driver to remain awake on a specified channel for the specified amount of time (and another command to cancel such an operation). This can be used to implement userspace-controlled off-channel operations, like Public Action frame exchange on another channel than the operation channel. The off-channel operation should behave similarly to scan, i.e. the local station (if associated) moves into power save mode to request the AP to buffer frames for it and then moves to the other channel to allow the off-channel operation to be completed. The duration parameter can be used to request enough time to receive a response from the target station. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 36 ++++++++ include/net/cfg80211.h | 47 +++++++++++ net/wireless/chan.c | 41 +++++---- net/wireless/core.h | 3 + net/wireless/mlme.c | 27 ++++++ net/wireless/nl80211.c | 218 +++++++++++++++++++++++++++++++++++++++++++++++- net/wireless/nl80211.h | 11 +++ 7 files changed, 368 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index da8ea2e19273..2bfbe88837ef 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -270,6 +270,31 @@ * @NL80211_CMD_SET_WIPHY_NETNS: Set a wiphy's netns. Note that all devices * associated with this wiphy must be down and will follow. * + * @NL80211_CMD_REMAIN_ON_CHANNEL: Request to remain awake on the specified + * channel for the specified amount of time. This can be used to do + * off-channel operations like transmit a Public Action frame and wait for + * a response while being associated to an AP on another channel. + * %NL80211_ATTR_WIPHY or %NL80211_ATTR_IFINDEX is used to specify which + * radio is used. %NL80211_ATTR_WIPHY_FREQ is used to specify the + * frequency for the operation and %NL80211_ATTR_WIPHY_CHANNEL_TYPE may be + * optionally used to specify additional channel parameters. + * %NL80211_ATTR_DURATION is used to specify the duration in milliseconds + * to remain on the channel. This command is also used as an event to + * notify when the requested duration starts (it may take a while for the + * driver to schedule this time due to other concurrent needs for the + * radio). + * When called, this operation returns a cookie (%NL80211_ATTR_COOKIE) + * that will be included with any events pertaining to this request; + * the cookie is also used to cancel the request. + * @NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL: This command can be used to cancel a + * pending remain-on-channel duration if the desired operation has been + * completed prior to expiration of the originally requested duration. + * %NL80211_ATTR_WIPHY or %NL80211_ATTR_IFINDEX is used to specify the + * radio. The %NL80211_ATTR_COOKIE attribute must be given as well to + * uniquely identify the request. + * This command is also used as an event to notify when a requested + * remain-on-channel duration has expired. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -353,6 +378,9 @@ enum nl80211_commands { NL80211_CMD_DEL_PMKSA, NL80211_CMD_FLUSH_PMKSA, + NL80211_CMD_REMAIN_ON_CHANNEL, + NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -606,6 +634,10 @@ enum nl80211_commands { * @NL80211_ATTR_MAX_NUM_PMKIDS: maximum number of PMKIDs a firmware can * cache, a wiphy attribute. * + * @NL80211_ATTR_DURATION: Duration of an operation in milliseconds, u32. + * + * @NL80211_ATTR_COOKIE: Generic 64-bit cookie to identify objects. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -743,6 +775,10 @@ enum nl80211_attrs { NL80211_ATTR_PMKID, NL80211_ATTR_MAX_NUM_PMKIDS, + NL80211_ATTR_DURATION, + + NL80211_ATTR_COOKIE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 542a477a94da..b66beb052054 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -988,6 +988,15 @@ struct cfg80211_pmksa { * * @dump_survey: get site survey information. * + * @remain_on_channel: Request the driver to remain awake on the specified + * channel for the specified duration to complete an off-channel + * operation (e.g., public action frame exchange). When the driver is + * ready on the requested channel, it must indicate this with an event + * notification by calling cfg80211_ready_on_channel(). + * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation. + * This allows the operation to be terminated prior to timeout based on + * the duration value. + * * @testmode_cmd: run a test mode command * * @set_pmksa: Cache a PMKID for a BSSID. This is mostly useful for fullmac @@ -1123,6 +1132,16 @@ struct cfg80211_ops { struct cfg80211_pmksa *pmksa); int (*flush_pmksa)(struct wiphy *wiphy, struct net_device *netdev); + int (*remain_on_channel)(struct wiphy *wiphy, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int duration, + u64 *cookie); + int (*cancel_remain_on_channel)(struct wiphy *wiphy, + struct net_device *dev, + u64 cookie); + /* some temporary stuff to finish wext */ int (*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout); @@ -2147,5 +2166,33 @@ void cfg80211_roamed(struct net_device *dev, const u8 *bssid, void cfg80211_disconnected(struct net_device *dev, u16 reason, u8 *ie, size_t ie_len, gfp_t gfp); +/** + * cfg80211_ready_on_channel - notification of remain_on_channel start + * @dev: network device + * @cookie: the request cookie + * @chan: The current channel (from remain_on_channel request) + * @channel_type: Channel type + * @duration: Duration in milliseconds that the driver intents to remain on the + * channel + * @gfp: allocation flags + */ +void cfg80211_ready_on_channel(struct net_device *dev, u64 cookie, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int duration, gfp_t gfp); + +/** + * cfg80211_remain_on_channel_expired - remain_on_channel duration expired + * @dev: network device + * @cookie: the request cookie + * @chan: The current channel (from remain_on_channel request) + * @channel_type: Channel type + * @gfp: allocation flags + */ +void cfg80211_remain_on_channel_expired(struct net_device *dev, + u64 cookie, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + gfp_t gfp); #endif /* __NET_CFG80211_H */ diff --git a/net/wireless/chan.c b/net/wireless/chan.c index a46ac6c9b365..bf1737fc9a7e 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -41,44 +41,57 @@ rdev_fixed_channel(struct cfg80211_registered_device *rdev, return result; } -int rdev_set_freq(struct cfg80211_registered_device *rdev, - struct wireless_dev *for_wdev, +struct ieee80211_channel * +rdev_freq_to_chan(struct cfg80211_registered_device *rdev, int freq, enum nl80211_channel_type channel_type) { struct ieee80211_channel *chan; struct ieee80211_sta_ht_cap *ht_cap; - int result; - - if (rdev_fixed_channel(rdev, for_wdev)) - return -EBUSY; - - if (!rdev->ops->set_channel) - return -EOPNOTSUPP; chan = ieee80211_get_channel(&rdev->wiphy, freq); /* Primary channel not allowed */ if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) - return -EINVAL; + return NULL; if (channel_type == NL80211_CHAN_HT40MINUS && chan->flags & IEEE80211_CHAN_NO_HT40MINUS) - return -EINVAL; + return NULL; else if (channel_type == NL80211_CHAN_HT40PLUS && chan->flags & IEEE80211_CHAN_NO_HT40PLUS) - return -EINVAL; + return NULL; ht_cap = &rdev->wiphy.bands[chan->band]->ht_cap; if (channel_type != NL80211_CHAN_NO_HT) { if (!ht_cap->ht_supported) - return -EINVAL; + return NULL; if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) || ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT) - return -EINVAL; + return NULL; } + return chan; +} + +int rdev_set_freq(struct cfg80211_registered_device *rdev, + struct wireless_dev *for_wdev, + int freq, enum nl80211_channel_type channel_type) +{ + struct ieee80211_channel *chan; + int result; + + if (rdev_fixed_channel(rdev, for_wdev)) + return -EBUSY; + + if (!rdev->ops->set_channel) + return -EOPNOTSUPP; + + chan = rdev_freq_to_chan(rdev, freq, channel_type); + if (!chan) + return -EINVAL; + result = rdev->ops->set_channel(&rdev->wiphy, chan, channel_type); if (result) return result; diff --git a/net/wireless/core.h b/net/wireless/core.h index 35b712127143..30ec95f05b52 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -374,6 +374,9 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); struct ieee80211_channel * rdev_fixed_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *for_wdev); +struct ieee80211_channel * +rdev_freq_to_chan(struct cfg80211_registered_device *rdev, + int freq, enum nl80211_channel_type channel_type); int rdev_set_freq(struct cfg80211_registered_device *rdev, struct wireless_dev *for_wdev, int freq, enum nl80211_channel_type channel_type); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index acaeaa784d68..11f6469b3f98 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -680,3 +680,30 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, } } } + +void cfg80211_ready_on_channel(struct net_device *dev, u64 cookie, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int duration, gfp_t gfp) +{ + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + nl80211_send_remain_on_channel(rdev, dev, cookie, chan, channel_type, + duration, gfp); +} +EXPORT_SYMBOL(cfg80211_ready_on_channel); + +void cfg80211_remain_on_channel_expired(struct net_device *dev, + u64 cookie, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + gfp_t gfp) +{ + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + nl80211_send_remain_on_channel_cancel(rdev, dev, cookie, chan, + channel_type, gfp); +} +EXPORT_SYMBOL(cfg80211_remain_on_channel_expired); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 60f854377f90..ff857f10cb85 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -141,6 +141,8 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_4ADDR] = { .type = NLA_U8 }, [NL80211_ATTR_PMKID] = { .type = NLA_BINARY, .len = WLAN_PMKID_LEN }, + [NL80211_ATTR_DURATION] = { .type = NLA_U32 }, + [NL80211_ATTR_COOKIE] = { .type = NLA_U64 }, }; /* policy for the attributes */ @@ -569,6 +571,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(set_pmksa, SET_PMKSA); CMD(del_pmksa, DEL_PMKSA); CMD(flush_pmksa, FLUSH_PMKSA); + CMD(remain_on_channel, REMAIN_ON_CHANNEL); if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); @@ -4283,6 +4286,143 @@ static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info) } +static int nl80211_remain_on_channel(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + struct net_device *dev; + struct ieee80211_channel *chan; + struct sk_buff *msg; + void *hdr; + u64 cookie; + enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; + u32 freq, duration; + int err; + + if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] || + !info->attrs[NL80211_ATTR_DURATION]) + return -EINVAL; + + duration = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]); + + /* + * We should be on that channel for at least one jiffie, + * and more than 5 seconds seems excessive. + */ + if (!duration || !msecs_to_jiffies(duration) || duration > 5000) + return -EINVAL; + + rtnl_lock(); + + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); + if (err) + goto unlock_rtnl; + + if (!rdev->ops->remain_on_channel) { + err = -EOPNOTSUPP; + goto out; + } + + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { + channel_type = nla_get_u32( + info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]); + if (channel_type != NL80211_CHAN_NO_HT && + channel_type != NL80211_CHAN_HT20 && + channel_type != NL80211_CHAN_HT40PLUS && + channel_type != NL80211_CHAN_HT40MINUS) + err = -EINVAL; + goto out; + } + + freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); + chan = rdev_freq_to_chan(rdev, freq, channel_type); + if (chan == NULL) { + err = -EINVAL; + goto out; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + err = -ENOMEM; + goto out; + } + + hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + NL80211_CMD_REMAIN_ON_CHANNEL); + + if (IS_ERR(hdr)) { + err = PTR_ERR(hdr); + goto free_msg; + } + + err = rdev->ops->remain_on_channel(&rdev->wiphy, dev, chan, + channel_type, duration, &cookie); + + if (err) + goto free_msg; + + NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie); + + genlmsg_end(msg, hdr); + err = genlmsg_reply(msg, info); + goto out; + + nla_put_failure: + err = -ENOBUFS; + free_msg: + nlmsg_free(msg); + out: + cfg80211_unlock_rdev(rdev); + dev_put(dev); + unlock_rtnl: + rtnl_unlock(); + return err; +} + +static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + struct net_device *dev; + u64 cookie; + int err; + + if (!info->attrs[NL80211_ATTR_COOKIE]) + return -EINVAL; + + rtnl_lock(); + + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); + if (err) + goto unlock_rtnl; + + if (!rdev->ops->cancel_remain_on_channel) { + err = -EOPNOTSUPP; + goto out; + } + + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + + cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); + + err = rdev->ops->cancel_remain_on_channel(&rdev->wiphy, dev, cookie); + + out: + cfg80211_unlock_rdev(rdev); + dev_put(dev); + unlock_rtnl: + rtnl_unlock(); + return err; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -4545,8 +4685,20 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, - + { + .cmd = NL80211_CMD_REMAIN_ON_CHANNEL, + .doit = nl80211_remain_on_channel, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, + .doit = nl80211_cancel_remain_on_channel, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; + static struct genl_multicast_group nl80211_mlme_mcgrp = { .name = "mlme", }; @@ -5134,6 +5286,70 @@ nla_put_failure: nlmsg_free(msg); } +static void nl80211_send_remain_on_chan_event( + int cmd, struct cfg80211_registered_device *rdev, + struct net_device *netdev, u64 cookie, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int duration, gfp_t gfp) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_FREQ, chan->center_freq); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, channel_type); + NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie); + + if (cmd == NL80211_CMD_REMAIN_ON_CHANNEL) + NLA_PUT_U32(msg, NL80211_ATTR_DURATION, duration); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + +void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u64 cookie, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int duration, gfp_t gfp) +{ + nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL, + rdev, netdev, cookie, chan, + channel_type, duration, gfp); +} + +void nl80211_send_remain_on_channel_cancel( + struct cfg80211_registered_device *rdev, struct net_device *netdev, + u64 cookie, struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, gfp_t gfp) +{ + nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, + rdev, netdev, cookie, chan, + channel_type, 0, gfp); +} + /* initialisation/exit functions */ int nl80211_init(void) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 44cc2a76a1b0..a5e2de419b7a 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -59,4 +59,15 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, gfp_t gfp); +void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + u64 cookie, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int duration, gfp_t gfp); +void nl80211_send_remain_on_channel_cancel( + struct cfg80211_registered_device *rdev, struct net_device *netdev, + u64 cookie, struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, gfp_t gfp); + #endif /* __NET_WIRELESS_NL80211_H */ -- cgit v1.2.3 From 98b6218388e345064c3f2d3c161383a18274c638 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 23 Dec 2009 13:15:44 +0100 Subject: mac80211/cfg80211: add station events When, for instance, a new IBSS peer is found, userspace wants to be notified. Add events for all new stations that mac80211 learns about. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 12 ++++++++++++ net/mac80211/sta_info.c | 5 +++++ net/wireless/mlme.c | 10 ++++++++++ net/wireless/nl80211.c | 21 ++++++++++++++++++++- net/wireless/nl80211.h | 4 ++++ 5 files changed, 51 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index b66beb052054..add79930f47d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2195,4 +2195,16 @@ void cfg80211_remain_on_channel_expired(struct net_device *dev, enum nl80211_channel_type channel_type, gfp_t gfp); + +/** + * cfg80211_new_sta - notify userspace about station + * + * @dev: the netdev + * @mac_addr: the station's address + * @sinfo: the station information + * @gfp: allocation flags + */ +void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, + struct station_info *sinfo, gfp_t gfp); + #endif /* __NET_CFG80211_H */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f039e761aec1..47da552ce8a6 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -359,6 +359,7 @@ int sta_info_insert(struct sta_info *sta) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; + struct station_info sinfo; unsigned long flags; int err = 0; @@ -408,6 +409,10 @@ int sta_info_insert(struct sta_info *sta) spin_unlock_irqrestore(&local->sta_lock, flags); + sinfo.filled = 0; + sinfo.generation = local->sta_generation; + cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_ATOMIC); + #ifdef CONFIG_MAC80211_DEBUGFS /* * Debugfs entry adding might sleep, so schedule process diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 11f6469b3f98..3dba19e17271 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -707,3 +707,13 @@ void cfg80211_remain_on_channel_expired(struct net_device *dev, channel_type, gfp); } EXPORT_SYMBOL(cfg80211_remain_on_channel_expired); + +void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, + struct station_info *sinfo, gfp_t gfp) +{ + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + nl80211_send_sta_event(rdev, dev, mac_addr, sinfo, gfp); +} +EXPORT_SYMBOL(cfg80211_new_sta); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ff857f10cb85..e3bee3cecdfa 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1642,7 +1642,7 @@ static int parse_station_flags(struct genl_info *info, static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct net_device *dev, - u8 *mac_addr, struct station_info *sinfo) + const u8 *mac_addr, struct station_info *sinfo) { void *hdr; struct nlattr *sinfoattr, *txrate; @@ -5350,6 +5350,25 @@ void nl80211_send_remain_on_channel_cancel( channel_type, 0, gfp); } +void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *mac_addr, + struct station_info *sinfo, gfp_t gfp) +{ + struct sk_buff *msg; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + if (nl80211_send_station(msg, 0, 0, 0, dev, mac_addr, sinfo) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); +} + /* initialisation/exit functions */ int nl80211_init(void) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index a5e2de419b7a..14855b8fb430 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -70,4 +70,8 @@ void nl80211_send_remain_on_channel_cancel( u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, gfp_t gfp); +void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *mac_addr, + struct station_info *sinfo, gfp_t gfp); + #endif /* __NET_WIRELESS_NL80211_H */ -- cgit v1.2.3 From 1ed32e4fc8cfc9656cc1101e7f9617d485fcbe7b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 23 Dec 2009 13:15:45 +0100 Subject: mac80211: remove struct ieee80211_if_init_conf All its members (vif, mac_addr, type) are now available in the vif struct directly, so we can pass that instead of the conf struct. I generated this patch (except the mac80211 and header file changes) with this semantic patch: @@ identifier conf, fn, hw; type tp; @@ tp fn(struct ieee80211_hw *hw, -struct ieee80211_if_init_conf *conf) +struct ieee80211_vif *vif) { <... ( -conf->type +vif->type | -conf->mac_addr +vif->addr | -conf->vif +vif ) ...> } Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/adm8211.c | 12 +++---- drivers/net/wireless/at76c50x-usb.c | 6 ++-- drivers/net/wireless/ath/ar9170/main.c | 8 ++--- drivers/net/wireless/ath/ath5k/base.c | 18 +++++----- drivers/net/wireless/ath/ath9k/main.c | 28 +++++++-------- drivers/net/wireless/b43/main.c | 26 +++++++------- drivers/net/wireless/b43legacy/main.c | 24 ++++++------- drivers/net/wireless/iwlwifi/iwl-core.c | 20 +++++------ drivers/net/wireless/iwlwifi/iwl-core.h | 4 +-- drivers/net/wireless/libertas_tf/main.c | 10 +++--- drivers/net/wireless/mac80211_hwsim.c | 18 +++++----- drivers/net/wireless/mwl8k.c | 14 ++++---- drivers/net/wireless/p54/main.c | 12 +++---- drivers/net/wireless/rt2x00/rt2x00.h | 4 +-- drivers/net/wireless/rt2x00/rt2x00mac.c | 26 +++++++------- drivers/net/wireless/rtl818x/rtl8180_dev.c | 12 +++---- drivers/net/wireless/rtl818x/rtl8187_dev.c | 10 +++--- drivers/net/wireless/wl12xx/wl1251_main.c | 14 ++++---- drivers/net/wireless/wl12xx/wl1271_main.c | 10 +++--- drivers/net/wireless/zd1211rw/zd_mac.c | 10 +++--- include/net/mac80211.h | 57 ++++++++---------------------- net/mac80211/driver-ops.h | 12 +++---- net/mac80211/iface.c | 14 ++------ net/mac80211/pm.c | 6 +--- net/mac80211/util.c | 9 ++--- 25 files changed, 170 insertions(+), 214 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c index 39410016b4ff..e1f04bb437e3 100644 --- a/drivers/net/wireless/adm8211.c +++ b/drivers/net/wireless/adm8211.c @@ -1400,15 +1400,15 @@ static void adm8211_configure_filter(struct ieee80211_hw *dev, } static int adm8211_add_interface(struct ieee80211_hw *dev, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct adm8211_priv *priv = dev->priv; if (priv->mode != NL80211_IFTYPE_MONITOR) return -EOPNOTSUPP; - switch (conf->type) { + switch (vif->type) { case NL80211_IFTYPE_STATION: - priv->mode = conf->type; + priv->mode = vif->type; break; default: return -EOPNOTSUPP; @@ -1416,8 +1416,8 @@ static int adm8211_add_interface(struct ieee80211_hw *dev, ADM8211_IDLE(); - ADM8211_CSR_WRITE(PAR0, le32_to_cpu(*(__le32 *)conf->mac_addr)); - ADM8211_CSR_WRITE(PAR1, le16_to_cpu(*(__le16 *)(conf->mac_addr + 4))); + ADM8211_CSR_WRITE(PAR0, le32_to_cpu(*(__le32 *)vif->addr)); + ADM8211_CSR_WRITE(PAR1, le16_to_cpu(*(__le16 *)(vif->addr + 4))); adm8211_update_mode(dev); @@ -1427,7 +1427,7 @@ static int adm8211_add_interface(struct ieee80211_hw *dev, } static void adm8211_remove_interface(struct ieee80211_hw *dev, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct adm8211_priv *priv = dev->priv; priv->mode = NL80211_IFTYPE_MONITOR; diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c index 2517364d3ebe..0fb419936dff 100644 --- a/drivers/net/wireless/at76c50x-usb.c +++ b/drivers/net/wireless/at76c50x-usb.c @@ -1789,7 +1789,7 @@ static void at76_mac80211_stop(struct ieee80211_hw *hw) } static int at76_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct at76_priv *priv = hw->priv; int ret = 0; @@ -1798,7 +1798,7 @@ static int at76_add_interface(struct ieee80211_hw *hw, mutex_lock(&priv->mtx); - switch (conf->type) { + switch (vif->type) { case NL80211_IFTYPE_STATION: priv->iw_mode = IW_MODE_INFRA; break; @@ -1814,7 +1814,7 @@ exit: } static void at76_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { at76_dbg(DBG_MAC80211, "%s()", __func__); } diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c index 20f04ab2b13e..4d27f7f67c76 100644 --- a/drivers/net/wireless/ath/ar9170/main.c +++ b/drivers/net/wireless/ath/ar9170/main.c @@ -1939,7 +1939,7 @@ err_free: } static int ar9170_op_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct ar9170 *ar = hw->priv; struct ath_common *common = &ar->common; @@ -1952,8 +1952,8 @@ static int ar9170_op_add_interface(struct ieee80211_hw *hw, goto unlock; } - ar->vif = conf->vif; - memcpy(common->macaddr, conf->mac_addr, ETH_ALEN); + ar->vif = vif; + memcpy(common->macaddr, vif->addr, ETH_ALEN); if (modparam_nohwcrypt || (ar->vif->type != NL80211_IFTYPE_STATION)) { ar->rx_software_decryption = true; @@ -1973,7 +1973,7 @@ unlock: } static void ar9170_op_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct ar9170 *ar = hw->priv; diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index a4c086f069b1..20c7e5b450aa 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -225,9 +225,9 @@ static int ath5k_reset_wake(struct ath5k_softc *sc); static int ath5k_start(struct ieee80211_hw *hw); static void ath5k_stop(struct ieee80211_hw *hw); static int ath5k_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf); + struct ieee80211_vif *vif); static void ath5k_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf); + struct ieee80211_vif *vif); static int ath5k_config(struct ieee80211_hw *hw, u32 changed); static u64 ath5k_prepare_multicast(struct ieee80211_hw *hw, int mc_count, struct dev_addr_list *mc_list); @@ -2785,7 +2785,7 @@ static void ath5k_stop(struct ieee80211_hw *hw) } static int ath5k_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct ath5k_softc *sc = hw->priv; int ret; @@ -2796,22 +2796,22 @@ static int ath5k_add_interface(struct ieee80211_hw *hw, goto end; } - sc->vif = conf->vif; + sc->vif = vif; - switch (conf->type) { + switch (vif->type) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_MONITOR: - sc->opmode = conf->type; + sc->opmode = vif->type; break; default: ret = -EOPNOTSUPP; goto end; } - ath5k_hw_set_lladdr(sc->ah, conf->mac_addr); + ath5k_hw_set_lladdr(sc->ah, vif->addr); ath5k_mode_setup(sc); ret = 0; @@ -2822,13 +2822,13 @@ end: static void ath5k_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct ath5k_softc *sc = hw->priv; u8 mac[ETH_ALEN] = {}; mutex_lock(&sc->lock); - if (sc->vif != conf->vif) + if (sc->vif != vif) goto end; ath5k_hw_set_lladdr(sc->ah, mac); diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 3f5b887d0fcd..446bd23756e5 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -2534,12 +2534,12 @@ static void ath9k_stop(struct ieee80211_hw *hw) } static int ath9k_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct ath_wiphy *aphy = hw->priv; struct ath_softc *sc = aphy->sc; struct ath_common *common = ath9k_hw_common(sc->sc_ah); - struct ath_vif *avp = (void *)conf->vif->drv_priv; + struct ath_vif *avp = (void *)vif->drv_priv; enum nl80211_iftype ic_opmode = NL80211_IFTYPE_UNSPECIFIED; int ret = 0; @@ -2551,7 +2551,7 @@ static int ath9k_add_interface(struct ieee80211_hw *hw, goto out; } - switch (conf->type) { + switch (vif->type) { case NL80211_IFTYPE_STATION: ic_opmode = NL80211_IFTYPE_STATION; break; @@ -2562,11 +2562,11 @@ static int ath9k_add_interface(struct ieee80211_hw *hw, ret = -ENOBUFS; goto out; } - ic_opmode = conf->type; + ic_opmode = vif->type; break; default: ath_print(common, ATH_DBG_FATAL, - "Interface type %d not yet supported\n", conf->type); + "Interface type %d not yet supported\n", vif->type); ret = -EOPNOTSUPP; goto out; } @@ -2598,18 +2598,18 @@ static int ath9k_add_interface(struct ieee80211_hw *hw, * Enable MIB interrupts when there are hardware phy counters. * Note we only do this (at the moment) for station mode. */ - if ((conf->type == NL80211_IFTYPE_STATION) || - (conf->type == NL80211_IFTYPE_ADHOC) || - (conf->type == NL80211_IFTYPE_MESH_POINT)) { + if ((vif->type == NL80211_IFTYPE_STATION) || + (vif->type == NL80211_IFTYPE_ADHOC) || + (vif->type == NL80211_IFTYPE_MESH_POINT)) { sc->imask |= ATH9K_INT_MIB; sc->imask |= ATH9K_INT_TSFOOR; } ath9k_hw_set_interrupts(sc->sc_ah, sc->imask); - if (conf->type == NL80211_IFTYPE_AP || - conf->type == NL80211_IFTYPE_ADHOC || - conf->type == NL80211_IFTYPE_MONITOR) + if (vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_ADHOC || + vif->type == NL80211_IFTYPE_MONITOR) ath_start_ani(common); out: @@ -2618,12 +2618,12 @@ out: } static void ath9k_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct ath_wiphy *aphy = hw->priv; struct ath_softc *sc = aphy->sc; struct ath_common *common = ath9k_hw_common(sc->sc_ah); - struct ath_vif *avp = (void *)conf->vif->drv_priv; + struct ath_vif *avp = (void *)vif->drv_priv; int i; ath_print(common, ATH_DBG_CONFIG, "Detach Interface\n"); @@ -2644,7 +2644,7 @@ static void ath9k_remove_interface(struct ieee80211_hw *hw, sc->sc_flags &= ~SC_OP_BEACONS; for (i = 0; i < ARRAY_SIZE(sc->beacon.bslot); i++) { - if (sc->beacon.bslot[i] == conf->vif) { + if (sc->beacon.bslot[i] == vif) { printk(KERN_DEBUG "%s: vif had allocated beacon " "slot\n", __func__); sc->beacon.bslot[i] = NULL; diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index b0b5ce950008..6634a77fc766 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -4391,7 +4391,7 @@ err_busdown: } static int b43_op_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct b43_wl *wl = hw_to_b43_wl(hw); struct b43_wldev *dev; @@ -4399,24 +4399,24 @@ static int b43_op_add_interface(struct ieee80211_hw *hw, /* TODO: allow WDS/AP devices to coexist */ - if (conf->type != NL80211_IFTYPE_AP && - conf->type != NL80211_IFTYPE_MESH_POINT && - conf->type != NL80211_IFTYPE_STATION && - conf->type != NL80211_IFTYPE_WDS && - conf->type != NL80211_IFTYPE_ADHOC) + if (vif->type != NL80211_IFTYPE_AP && + vif->type != NL80211_IFTYPE_MESH_POINT && + vif->type != NL80211_IFTYPE_STATION && + vif->type != NL80211_IFTYPE_WDS && + vif->type != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; mutex_lock(&wl->mutex); if (wl->operating) goto out_mutex_unlock; - b43dbg(wl, "Adding Interface type %d\n", conf->type); + b43dbg(wl, "Adding Interface type %d\n", vif->type); dev = wl->current_dev; wl->operating = 1; - wl->vif = conf->vif; - wl->if_type = conf->type; - memcpy(wl->mac_addr, conf->mac_addr, ETH_ALEN); + wl->vif = vif; + wl->if_type = vif->type; + memcpy(wl->mac_addr, vif->addr, ETH_ALEN); b43_adjust_opmode(dev); b43_set_pretbtt(dev); @@ -4431,17 +4431,17 @@ static int b43_op_add_interface(struct ieee80211_hw *hw, } static void b43_op_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct b43_wl *wl = hw_to_b43_wl(hw); struct b43_wldev *dev = wl->current_dev; - b43dbg(wl, "Removing Interface type %d\n", conf->type); + b43dbg(wl, "Removing Interface type %d\n", vif->type); mutex_lock(&wl->mutex); B43_WARN_ON(!wl->operating); - B43_WARN_ON(wl->vif != conf->vif); + B43_WARN_ON(wl->vif != vif); wl->vif = NULL; wl->operating = 0; diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index ab6a18c2e9d9..494017e4fcc9 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -3361,7 +3361,7 @@ err_kfree_lo_control: } static int b43legacy_op_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw); struct b43legacy_wldev *dev; @@ -3370,23 +3370,23 @@ static int b43legacy_op_add_interface(struct ieee80211_hw *hw, /* TODO: allow WDS/AP devices to coexist */ - if (conf->type != NL80211_IFTYPE_AP && - conf->type != NL80211_IFTYPE_STATION && - conf->type != NL80211_IFTYPE_WDS && - conf->type != NL80211_IFTYPE_ADHOC) + if (vif->type != NL80211_IFTYPE_AP && + vif->type != NL80211_IFTYPE_STATION && + vif->type != NL80211_IFTYPE_WDS && + vif->type != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; mutex_lock(&wl->mutex); if (wl->operating) goto out_mutex_unlock; - b43legacydbg(wl, "Adding Interface type %d\n", conf->type); + b43legacydbg(wl, "Adding Interface type %d\n", vif->type); dev = wl->current_dev; wl->operating = 1; - wl->vif = conf->vif; - wl->if_type = conf->type; - memcpy(wl->mac_addr, conf->mac_addr, ETH_ALEN); + wl->vif = vif; + wl->if_type = vif->type; + memcpy(wl->mac_addr, vif->addr, ETH_ALEN); spin_lock_irqsave(&wl->irq_lock, flags); b43legacy_adjust_opmode(dev); @@ -3403,18 +3403,18 @@ static int b43legacy_op_add_interface(struct ieee80211_hw *hw, } static void b43legacy_op_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw); struct b43legacy_wldev *dev = wl->current_dev; unsigned long flags; - b43legacydbg(wl, "Removing Interface type %d\n", conf->type); + b43legacydbg(wl, "Removing Interface type %d\n", vif->type); mutex_lock(&wl->mutex); B43legacy_WARN_ON(!wl->operating); - B43legacy_WARN_ON(wl->vif != conf->vif); + B43legacy_WARN_ON(wl->vif != vif); wl->vif = NULL; wl->operating = 0; diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index e3b96b48b7fe..14f482960d7f 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -2584,12 +2584,12 @@ int iwl_set_mode(struct iwl_priv *priv, int mode) EXPORT_SYMBOL(iwl_set_mode); int iwl_mac_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct iwl_priv *priv = hw->priv; unsigned long flags; - IWL_DEBUG_MAC80211(priv, "enter: type %d\n", conf->type); + IWL_DEBUG_MAC80211(priv, "enter: type %d\n", vif->type); if (priv->vif) { IWL_DEBUG_MAC80211(priv, "leave - vif != NULL\n"); @@ -2597,19 +2597,19 @@ int iwl_mac_add_interface(struct ieee80211_hw *hw, } spin_lock_irqsave(&priv->lock, flags); - priv->vif = conf->vif; - priv->iw_mode = conf->type; + priv->vif = vif; + priv->iw_mode = vif->type; spin_unlock_irqrestore(&priv->lock, flags); mutex_lock(&priv->mutex); - if (conf->mac_addr) { - IWL_DEBUG_MAC80211(priv, "Set %pM\n", conf->mac_addr); - memcpy(priv->mac_addr, conf->mac_addr, ETH_ALEN); + if (vif->addr) { + IWL_DEBUG_MAC80211(priv, "Set %pM\n", vif->addr); + memcpy(priv->mac_addr, vif->addr, ETH_ALEN); } - if (iwl_set_mode(priv, conf->type) == -EAGAIN) + if (iwl_set_mode(priv, vif->type) == -EAGAIN) /* we are not ready, will run again when ready */ set_bit(STATUS_MODE_PENDING, &priv->status); @@ -2621,7 +2621,7 @@ int iwl_mac_add_interface(struct ieee80211_hw *hw, EXPORT_SYMBOL(iwl_mac_add_interface); void iwl_mac_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct iwl_priv *priv = hw->priv; @@ -2634,7 +2634,7 @@ void iwl_mac_remove_interface(struct ieee80211_hw *hw, priv->staging_rxon.filter_flags &= ~RXON_FILTER_ASSOC_MSK; iwlcore_commit_rxon(priv); } - if (priv->vif == conf->vif) { + if (priv->vif == vif) { priv->vif = NULL; memset(priv->bssid, 0, ETH_ALEN); } diff --git a/drivers/net/wireless/iwlwifi/iwl-core.h b/drivers/net/wireless/iwlwifi/iwl-core.h index f7acbb32900a..1728f961dcba 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.h +++ b/drivers/net/wireless/iwlwifi/iwl-core.h @@ -332,9 +332,9 @@ int iwl_mac_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb); int iwl_commit_rxon(struct iwl_priv *priv); int iwl_set_mode(struct iwl_priv *priv, int mode); int iwl_mac_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf); + struct ieee80211_vif *vif); void iwl_mac_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf); + struct ieee80211_vif *vif); int iwl_mac_config(struct ieee80211_hw *hw, u32 changed); void iwl_config_ap(struct iwl_priv *priv); int iwl_mac_get_tx_stats(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c index 019431d2f8a9..15a367680f59 100644 --- a/drivers/net/wireless/libertas_tf/main.c +++ b/drivers/net/wireless/libertas_tf/main.c @@ -318,14 +318,14 @@ static void lbtf_op_stop(struct ieee80211_hw *hw) } static int lbtf_op_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct lbtf_private *priv = hw->priv; if (priv->vif != NULL) return -EOPNOTSUPP; - priv->vif = conf->vif; - switch (conf->type) { + priv->vif = vif; + switch (vif->type) { case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_AP: lbtf_set_mode(priv, LBTF_AP_MODE); @@ -337,12 +337,12 @@ static int lbtf_op_add_interface(struct ieee80211_hw *hw, priv->vif = NULL; return -EOPNOTSUPP; } - lbtf_set_mac_address(priv, (u8 *) conf->mac_addr); + lbtf_set_mac_address(priv, (u8 *) vif->addr); return 0; } static void lbtf_op_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct lbtf_private *priv = hw->priv; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 4dee69a38c1d..84df3fcf37b3 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -584,24 +584,24 @@ static void mac80211_hwsim_stop(struct ieee80211_hw *hw) static int mac80211_hwsim_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { printk(KERN_DEBUG "%s:%s (type=%d mac_addr=%pM)\n", - wiphy_name(hw->wiphy), __func__, conf->type, - conf->mac_addr); - hwsim_set_magic(conf->vif); + wiphy_name(hw->wiphy), __func__, vif->type, + vif->addr); + hwsim_set_magic(vif); return 0; } static void mac80211_hwsim_remove_interface( - struct ieee80211_hw *hw, struct ieee80211_if_init_conf *conf) + struct ieee80211_hw *hw, struct ieee80211_vif *vif) { printk(KERN_DEBUG "%s:%s (type=%d mac_addr=%pM)\n", - wiphy_name(hw->wiphy), __func__, conf->type, - conf->mac_addr); - hwsim_check_magic(conf->vif); - hwsim_clear_magic(conf->vif); + wiphy_name(hw->wiphy), __func__, vif->type, + vif->addr); + hwsim_check_magic(vif); + hwsim_clear_magic(vif); } diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c index 97a95decf9a8..c1c6ecd0c5b3 100644 --- a/drivers/net/wireless/mwl8k.c +++ b/drivers/net/wireless/mwl8k.c @@ -2835,7 +2835,7 @@ static void mwl8k_stop(struct ieee80211_hw *hw) } static int mwl8k_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct mwl8k_priv *priv = hw->priv; struct mwl8k_vif *mwl8k_vif; @@ -2849,7 +2849,7 @@ static int mwl8k_add_interface(struct ieee80211_hw *hw, /* * We only support managed interfaces for now. */ - if (conf->type != NL80211_IFTYPE_STATION) + if (vif->type != NL80211_IFTYPE_STATION) return -EINVAL; /* @@ -2865,24 +2865,24 @@ static int mwl8k_add_interface(struct ieee80211_hw *hw, } /* Clean out driver private area */ - mwl8k_vif = MWL8K_VIF(conf->vif); + mwl8k_vif = MWL8K_VIF(vif); memset(mwl8k_vif, 0, sizeof(*mwl8k_vif)); /* Set and save the mac address */ - mwl8k_cmd_set_mac_addr(hw, conf->mac_addr); - memcpy(mwl8k_vif->mac_addr, conf->mac_addr, ETH_ALEN); + mwl8k_cmd_set_mac_addr(hw, vif->addr); + memcpy(mwl8k_vif->mac_addr, vif->addr, ETH_ALEN); /* Set Initial sequence number to zero */ mwl8k_vif->seqno = 0; - priv->vif = conf->vif; + priv->vif = vif; priv->current_channel = NULL; return 0; } static void mwl8k_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct mwl8k_priv *priv = hw->priv; diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c index 18012dbfb45d..26428e4c9c60 100644 --- a/drivers/net/wireless/p54/main.c +++ b/drivers/net/wireless/p54/main.c @@ -216,7 +216,7 @@ static void p54_stop(struct ieee80211_hw *dev) } static int p54_add_interface(struct ieee80211_hw *dev, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct p54_common *priv = dev->priv; @@ -226,28 +226,28 @@ static int p54_add_interface(struct ieee80211_hw *dev, return -EOPNOTSUPP; } - priv->vif = conf->vif; + priv->vif = vif; - switch (conf->type) { + switch (vif->type) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_MESH_POINT: - priv->mode = conf->type; + priv->mode = vif->type; break; default: mutex_unlock(&priv->conf_mutex); return -EOPNOTSUPP; } - memcpy(priv->mac_addr, conf->mac_addr, ETH_ALEN); + memcpy(priv->mac_addr, vif->addr, ETH_ALEN); p54_setup_mac(priv); mutex_unlock(&priv->conf_mutex); return 0; } static void p54_remove_interface(struct ieee80211_hw *dev, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct p54_common *priv = dev->priv; diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index a664a999b2b0..b4c6e0a6d7e0 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -1019,9 +1019,9 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb); int rt2x00mac_start(struct ieee80211_hw *hw); void rt2x00mac_stop(struct ieee80211_hw *hw); int rt2x00mac_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf); + struct ieee80211_vif *vif); void rt2x00mac_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf); + struct ieee80211_vif *vif); int rt2x00mac_config(struct ieee80211_hw *hw, u32 changed); void rt2x00mac_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index de549c244ed8..00f1f939f1bb 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -187,10 +187,10 @@ void rt2x00mac_stop(struct ieee80211_hw *hw) EXPORT_SYMBOL_GPL(rt2x00mac_stop); int rt2x00mac_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct rt2x00_dev *rt2x00dev = hw->priv; - struct rt2x00_intf *intf = vif_to_intf(conf->vif); + struct rt2x00_intf *intf = vif_to_intf(vif); struct data_queue *queue = rt2x00queue_get_queue(rt2x00dev, QID_BEACON); struct queue_entry *entry = NULL; unsigned int i; @@ -203,7 +203,7 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw, !test_bit(DEVICE_STATE_STARTED, &rt2x00dev->flags)) return -ENODEV; - switch (conf->type) { + switch (vif->type) { case NL80211_IFTYPE_AP: /* * We don't support mixed combinations of @@ -263,7 +263,7 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw, * increase interface count and start initialization. */ - if (conf->type == NL80211_IFTYPE_AP) + if (vif->type == NL80211_IFTYPE_AP) rt2x00dev->intf_ap_count++; else rt2x00dev->intf_sta_count++; @@ -273,16 +273,16 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw, mutex_init(&intf->beacon_skb_mutex); intf->beacon = entry; - if (conf->type == NL80211_IFTYPE_AP) - memcpy(&intf->bssid, conf->mac_addr, ETH_ALEN); - memcpy(&intf->mac, conf->mac_addr, ETH_ALEN); + if (vif->type == NL80211_IFTYPE_AP) + memcpy(&intf->bssid, vif->addr, ETH_ALEN); + memcpy(&intf->mac, vif->addr, ETH_ALEN); /* * The MAC adddress must be configured after the device * has been initialized. Otherwise the device can reset * the MAC registers. */ - rt2x00lib_config_intf(rt2x00dev, intf, conf->type, intf->mac, NULL); + rt2x00lib_config_intf(rt2x00dev, intf, vif->type, intf->mac, NULL); /* * Some filters depend on the current working mode. We can force @@ -296,10 +296,10 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw, EXPORT_SYMBOL_GPL(rt2x00mac_add_interface); void rt2x00mac_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct rt2x00_dev *rt2x00dev = hw->priv; - struct rt2x00_intf *intf = vif_to_intf(conf->vif); + struct rt2x00_intf *intf = vif_to_intf(vif); /* * Don't allow interfaces to be remove while @@ -307,11 +307,11 @@ void rt2x00mac_remove_interface(struct ieee80211_hw *hw, * no interface is present. */ if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags) || - (conf->type == NL80211_IFTYPE_AP && !rt2x00dev->intf_ap_count) || - (conf->type != NL80211_IFTYPE_AP && !rt2x00dev->intf_sta_count)) + (vif->type == NL80211_IFTYPE_AP && !rt2x00dev->intf_ap_count) || + (vif->type != NL80211_IFTYPE_AP && !rt2x00dev->intf_sta_count)) return; - if (conf->type == NL80211_IFTYPE_AP) + if (vif->type == NL80211_IFTYPE_AP) rt2x00dev->intf_ap_count--; else rt2x00dev->intf_sta_count--; diff --git a/drivers/net/wireless/rtl818x/rtl8180_dev.c b/drivers/net/wireless/rtl818x/rtl8180_dev.c index f01f1ef9e3be..5a2b7199f5d5 100644 --- a/drivers/net/wireless/rtl818x/rtl8180_dev.c +++ b/drivers/net/wireless/rtl818x/rtl8180_dev.c @@ -652,7 +652,7 @@ static void rtl8180_stop(struct ieee80211_hw *dev) } static int rtl8180_add_interface(struct ieee80211_hw *dev, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct rtl8180_priv *priv = dev->priv; @@ -662,27 +662,27 @@ static int rtl8180_add_interface(struct ieee80211_hw *dev, if (priv->vif) return -EBUSY; - switch (conf->type) { + switch (vif->type) { case NL80211_IFTYPE_STATION: break; default: return -EOPNOTSUPP; } - priv->vif = conf->vif; + priv->vif = vif; rtl818x_iowrite8(priv, &priv->map->EEPROM_CMD, RTL818X_EEPROM_CMD_CONFIG); rtl818x_iowrite32(priv, (__le32 __iomem *)&priv->map->MAC[0], - le32_to_cpu(*(__le32 *)conf->mac_addr)); + le32_to_cpu(*(__le32 *)vif->addr)); rtl818x_iowrite16(priv, (__le16 __iomem *)&priv->map->MAC[4], - le16_to_cpu(*(__le16 *)(conf->mac_addr + 4))); + le16_to_cpu(*(__le16 *)(vif->addr + 4))); rtl818x_iowrite8(priv, &priv->map->EEPROM_CMD, RTL818X_EEPROM_CMD_NORMAL); return 0; } static void rtl8180_remove_interface(struct ieee80211_hw *dev, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct rtl8180_priv *priv = dev->priv; priv->vif = NULL; diff --git a/drivers/net/wireless/rtl818x/rtl8187_dev.c b/drivers/net/wireless/rtl818x/rtl8187_dev.c index 1cb0eff46223..f336c63053c1 100644 --- a/drivers/net/wireless/rtl818x/rtl8187_dev.c +++ b/drivers/net/wireless/rtl818x/rtl8187_dev.c @@ -1018,7 +1018,7 @@ static void rtl8187_stop(struct ieee80211_hw *dev) } static int rtl8187_add_interface(struct ieee80211_hw *dev, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct rtl8187_priv *priv = dev->priv; int i; @@ -1028,7 +1028,7 @@ static int rtl8187_add_interface(struct ieee80211_hw *dev, if (priv->vif) goto exit; - switch (conf->type) { + switch (vif->type) { case NL80211_IFTYPE_STATION: break; default: @@ -1036,12 +1036,12 @@ static int rtl8187_add_interface(struct ieee80211_hw *dev, } ret = 0; - priv->vif = conf->vif; + priv->vif = vif; rtl818x_iowrite8(priv, &priv->map->EEPROM_CMD, RTL818X_EEPROM_CMD_CONFIG); for (i = 0; i < ETH_ALEN; i++) rtl818x_iowrite8(priv, &priv->map->MAC[i], - ((u8 *)conf->mac_addr)[i]); + ((u8 *)vif->addr)[i]); rtl818x_iowrite8(priv, &priv->map->EEPROM_CMD, RTL818X_EEPROM_CMD_NORMAL); exit: @@ -1050,7 +1050,7 @@ exit: } static void rtl8187_remove_interface(struct ieee80211_hw *dev, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct rtl8187_priv *priv = dev->priv; mutex_lock(&priv->conf_mutex); diff --git a/drivers/net/wireless/wl12xx/wl1251_main.c b/drivers/net/wireless/wl12xx/wl1251_main.c index 6aeffbe9e401..4e373f3dbc43 100644 --- a/drivers/net/wireless/wl12xx/wl1251_main.c +++ b/drivers/net/wireless/wl12xx/wl1251_main.c @@ -511,13 +511,13 @@ static void wl1251_op_stop(struct ieee80211_hw *hw) } static int wl1251_op_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct wl1251 *wl = hw->priv; int ret = 0; wl1251_debug(DEBUG_MAC80211, "mac80211 add interface type %d mac %pM", - conf->type, conf->mac_addr); + vif->type, vif->addr); mutex_lock(&wl->mutex); if (wl->vif) { @@ -525,9 +525,9 @@ static int wl1251_op_add_interface(struct ieee80211_hw *hw, goto out; } - wl->vif = conf->vif; + wl->vif = vif; - switch (conf->type) { + switch (vif->type) { case NL80211_IFTYPE_STATION: wl->bss_type = BSS_TYPE_STA_BSS; break; @@ -539,8 +539,8 @@ static int wl1251_op_add_interface(struct ieee80211_hw *hw, goto out; } - if (memcmp(wl->mac_addr, conf->mac_addr, ETH_ALEN)) { - memcpy(wl->mac_addr, conf->mac_addr, ETH_ALEN); + if (memcmp(wl->mac_addr, vif->addr, ETH_ALEN)) { + memcpy(wl->mac_addr, vif->addr, ETH_ALEN); SET_IEEE80211_PERM_ADDR(wl->hw, wl->mac_addr); ret = wl1251_acx_station_id(wl); if (ret < 0) @@ -553,7 +553,7 @@ out: } static void wl1251_op_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct wl1251 *wl = hw->priv; diff --git a/drivers/net/wireless/wl12xx/wl1271_main.c b/drivers/net/wireless/wl12xx/wl1271_main.c index 4a997381a8d0..e4867b895c43 100644 --- a/drivers/net/wireless/wl12xx/wl1271_main.c +++ b/drivers/net/wireless/wl12xx/wl1271_main.c @@ -1039,13 +1039,13 @@ static void wl1271_op_stop(struct ieee80211_hw *hw) } static int wl1271_op_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct wl1271 *wl = hw->priv; int ret = 0; wl1271_debug(DEBUG_MAC80211, "mac80211 add interface type %d mac %pM", - conf->type, conf->mac_addr); + vif->type, vif->addr); mutex_lock(&wl->mutex); if (wl->vif) { @@ -1053,9 +1053,9 @@ static int wl1271_op_add_interface(struct ieee80211_hw *hw, goto out; } - wl->vif = conf->vif; + wl->vif = vif; - switch (conf->type) { + switch (vif->type) { case NL80211_IFTYPE_STATION: wl->bss_type = BSS_TYPE_STA_BSS; break; @@ -1075,7 +1075,7 @@ out: } static void wl1271_op_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct wl1271 *wl = hw->priv; diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 8a243732c519..c4f41d0016c5 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -872,7 +872,7 @@ int zd_mac_rx(struct ieee80211_hw *hw, const u8 *buffer, unsigned int length) } static int zd_op_add_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct zd_mac *mac = zd_hw_mac(hw); @@ -880,22 +880,22 @@ static int zd_op_add_interface(struct ieee80211_hw *hw, if (mac->type != NL80211_IFTYPE_UNSPECIFIED) return -EOPNOTSUPP; - switch (conf->type) { + switch (vif->type) { case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: - mac->type = conf->type; + mac->type = vif->type; break; default: return -EOPNOTSUPP; } - return zd_write_mac_addr(&mac->chip, conf->mac_addr); + return zd_write_mac_addr(&mac->chip, vif->addr); } static void zd_op_remove_interface(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { struct zd_mac *mac = zd_hw_mac(hw); mac->type = NL80211_IFTYPE_UNSPECIFIED; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 77ea34b03285..08d41357dcbe 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -701,33 +701,6 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) return false; } -/** - * struct ieee80211_if_init_conf - initial configuration of an interface - * - * @vif: pointer to a driver-use per-interface structure. The pointer - * itself is also used for various functions including - * ieee80211_beacon_get() and ieee80211_get_buffered_bc(). - * @type: one of &enum nl80211_iftype constants. Determines the type of - * added/removed interface. - * @mac_addr: pointer to MAC address of the interface. This pointer is valid - * until the interface is removed (i.e. it cannot be used after - * remove_interface() callback was called for this interface). - * - * This structure is used in add_interface() and remove_interface() - * callbacks of &struct ieee80211_hw. - * - * When you allow multiple interfaces to be added to your PHY, take care - * that the hardware can actually handle multiple MAC addresses. However, - * also take care that when there's no interface left with mac_addr != %NULL - * you remove the MAC address from the device to avoid acknowledging packets - * in pure monitor mode. - */ -struct ieee80211_if_init_conf { - enum nl80211_iftype type; - struct ieee80211_vif *vif; - void *mac_addr; -}; - /** * enum ieee80211_key_alg - key algorithm * @ALG_WEP: WEP40 or WEP104 @@ -1555,9 +1528,9 @@ struct ieee80211_ops { int (*start)(struct ieee80211_hw *hw); void (*stop)(struct ieee80211_hw *hw); int (*add_interface)(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf); + struct ieee80211_vif *vif); void (*remove_interface)(struct ieee80211_hw *hw, - struct ieee80211_if_init_conf *conf); + struct ieee80211_vif *vif); int (*config)(struct ieee80211_hw *hw, u32 changed); void (*bss_info_changed)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, @@ -1845,7 +1818,7 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, /** * ieee80211_beacon_get_tim - beacon generation function * @hw: pointer obtained from ieee80211_alloc_hw(). - * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf. + * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @tim_offset: pointer to variable that will receive the TIM IE offset. * Set to 0 if invalid (in non-AP modes). * @tim_length: pointer to variable that will receive the TIM IE length, @@ -1873,7 +1846,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, /** * ieee80211_beacon_get - beacon generation function * @hw: pointer obtained from ieee80211_alloc_hw(). - * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf. + * @vif: &struct ieee80211_vif pointer from the add_interface callback. * * See ieee80211_beacon_get_tim(). */ @@ -1886,7 +1859,7 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, /** * ieee80211_rts_get - RTS frame generation function * @hw: pointer obtained from ieee80211_alloc_hw(). - * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf. + * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @frame: pointer to the frame that is going to be protected by the RTS. * @frame_len: the frame length (in octets). * @frame_txctl: &struct ieee80211_tx_info of the frame. @@ -1905,7 +1878,7 @@ void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, /** * ieee80211_rts_duration - Get the duration field for an RTS frame * @hw: pointer obtained from ieee80211_alloc_hw(). - * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf. + * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @frame_len: the length of the frame that is going to be protected by the RTS. * @frame_txctl: &struct ieee80211_tx_info of the frame. * @@ -1920,7 +1893,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, /** * ieee80211_ctstoself_get - CTS-to-self frame generation function * @hw: pointer obtained from ieee80211_alloc_hw(). - * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf. + * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @frame: pointer to the frame that is going to be protected by the CTS-to-self. * @frame_len: the frame length (in octets). * @frame_txctl: &struct ieee80211_tx_info of the frame. @@ -1940,7 +1913,7 @@ void ieee80211_ctstoself_get(struct ieee80211_hw *hw, /** * ieee80211_ctstoself_duration - Get the duration field for a CTS-to-self frame * @hw: pointer obtained from ieee80211_alloc_hw(). - * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf. + * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @frame_len: the length of the frame that is going to be protected by the CTS-to-self. * @frame_txctl: &struct ieee80211_tx_info of the frame. * @@ -1956,7 +1929,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, /** * ieee80211_generic_frame_duration - Calculate the duration field for a frame * @hw: pointer obtained from ieee80211_alloc_hw(). - * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf. + * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @frame_len: the length of the frame. * @rate: the rate at which the frame is going to be transmitted. * @@ -1971,7 +1944,7 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, /** * ieee80211_get_buffered_bc - accessing buffered broadcast and multicast frames * @hw: pointer as obtained from ieee80211_alloc_hw(). - * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf. + * @vif: &struct ieee80211_vif pointer from the add_interface callback. * * Function for accessing buffered broadcast and multicast frames. If * hardware/firmware does not implement buffering of broadcast/multicast @@ -2139,7 +2112,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *sta, u16 tid); /** * ieee80211_start_tx_ba_cb - low level driver ready to aggregate. - * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf + * @vif: &struct ieee80211_vif pointer from the add_interface callback * @ra: receiver address of the BA session recipient. * @tid: the TID to BA on. * @@ -2150,7 +2123,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid); /** * ieee80211_start_tx_ba_cb_irqsafe - low level driver ready to aggregate. - * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf + * @vif: &struct ieee80211_vif pointer from the add_interface callback * @ra: receiver address of the BA session recipient. * @tid: the TID to BA on. * @@ -2178,7 +2151,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *sta, u16 tid, /** * ieee80211_stop_tx_ba_cb - low level driver ready to stop aggregate. - * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf + * @vif: &struct ieee80211_vif pointer from the add_interface callback * @ra: receiver address of the BA session recipient. * @tid: the desired TID to BA on. * @@ -2189,7 +2162,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid); /** * ieee80211_stop_tx_ba_cb_irqsafe - low level driver ready to stop aggregate. - * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf + * @vif: &struct ieee80211_vif pointer from the add_interface callback * @ra: receiver address of the BA session recipient. * @tid: the desired TID to BA on. * @@ -2268,7 +2241,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, /** * ieee80211_beacon_loss - inform hardware does not receive beacons * - * @vif: &struct ieee80211_vif pointer from &struct ieee80211_if_init_conf. + * @vif: &struct ieee80211_vif pointer from the add_interface callback. * * When beacon filtering is enabled with IEEE80211_HW_BEACON_FILTERING and * IEEE80211_CONF_PS is set, the driver needs to inform whenever the diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index cbe133bcdf34..bc7c8f55487a 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -36,18 +36,18 @@ static inline void drv_stop(struct ieee80211_local *local) } static inline int drv_add_interface(struct ieee80211_local *local, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { - int ret = local->ops->add_interface(&local->hw, conf); - trace_drv_add_interface(local, vif_to_sdata(conf->vif), ret); + int ret = local->ops->add_interface(&local->hw, vif); + trace_drv_add_interface(local, vif_to_sdata(vif), ret); return ret; } static inline void drv_remove_interface(struct ieee80211_local *local, - struct ieee80211_if_init_conf *conf) + struct ieee80211_vif *vif) { - local->ops->remove_interface(&local->hw, conf); - trace_drv_remove_interface(local, vif_to_sdata(conf->vif)); + local->ops->remove_interface(&local->hw, vif); + trace_drv_remove_interface(local, vif_to_sdata(vif)); } static inline int drv_config(struct ieee80211_local *local, u32 changed) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 7d410f15281a..00a1f4ccdaf1 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -96,7 +96,6 @@ static int ieee80211_open(struct net_device *dev) struct ieee80211_sub_if_data *nsdata; struct ieee80211_local *local = sdata->local; struct sta_info *sta; - struct ieee80211_if_init_conf conf; u32 changed = 0; int res; u32 hw_reconf_flags = 0; @@ -248,10 +247,7 @@ static int ieee80211_open(struct net_device *dev) ieee80211_configure_filter(local); break; default: - conf.vif = &sdata->vif; - conf.type = sdata->vif.type; - conf.mac_addr = sdata->vif.addr; - res = drv_add_interface(local, &conf); + res = drv_add_interface(local, &sdata->vif); if (res) goto err_stop; @@ -334,7 +330,7 @@ static int ieee80211_open(struct net_device *dev) return 0; err_del_interface: - drv_remove_interface(local, &conf); + drv_remove_interface(local, &sdata->vif); err_stop: if (!local->open_count) drv_stop(local); @@ -349,7 +345,6 @@ static int ieee80211_stop(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - struct ieee80211_if_init_conf conf; struct sta_info *sta; unsigned long flags; struct sk_buff *skb, *tmp; @@ -533,12 +528,9 @@ static int ieee80211_stop(struct net_device *dev) BSS_CHANGED_BEACON_ENABLED); } - conf.vif = &sdata->vif; - conf.type = sdata->vif.type; - conf.mac_addr = sdata->vif.addr; /* disable all keys for as long as this netdev is down */ ieee80211_disable_keys(sdata); - drv_remove_interface(local, &conf); + drv_remove_interface(local, &sdata->vif); } sdata->bss = NULL; diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 913dc7e3b29e..47f818959ad7 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -10,7 +10,6 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; - struct ieee80211_if_init_conf conf; struct sta_info *sta; unsigned long flags; @@ -100,10 +99,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); - conf.vif = &sdata->vif; - conf.type = sdata->vif.type; - conf.mac_addr = sdata->vif.addr; - drv_remove_interface(local, &conf); + drv_remove_interface(local, &sdata->vif); } /* stop hardware - this must stop RX */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1fdb80ff9241..4b930308b1fb 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1075,7 +1075,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) { struct ieee80211_hw *hw = &local->hw; struct ieee80211_sub_if_data *sdata; - struct ieee80211_if_init_conf conf; struct sta_info *sta; unsigned long flags; int res; @@ -1094,12 +1093,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && sdata->vif.type != NL80211_IFTYPE_MONITOR && - ieee80211_sdata_running(sdata)) { - conf.vif = &sdata->vif; - conf.type = sdata->vif.type; - conf.mac_addr = sdata->vif.addr; - res = drv_add_interface(local, &conf); - } + ieee80211_sdata_running(sdata)) + res = drv_add_interface(local, &sdata->vif); } /* add STAs back */ -- cgit v1.2.3 From e1781ed33a8809c58ad6c3b6d432d656446efa43 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Wed, 23 Dec 2009 13:15:47 +0100 Subject: mac80211: annotate sleeping driver ops To make it easier to notice cases of calling sleeping ops in atomic context, annotate driver-ops.h with appropiate might_sleep() calls. At the same time, also document in mac80211.h the op functions with missing contexts. mac80211 doesn't seem to use get_tx_stats anywhere currently. Just to be on the safe side, I documented it to be atomic, but hopefully the op can be removed in the future. Compile-tested only. Signed-off-by: Kalle Valo Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 43 +++++++++++++++++++++++---------- net/mac80211/driver-ops.h | 60 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 87 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 08d41357dcbe..5ee666ae4c88 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1383,7 +1383,7 @@ enum ieee80211_ampdu_mlme_action { * When the device is started it should not have a MAC address * to avoid acknowledging frames before a non-monitor device * is added. - * Must be implemented. + * Must be implemented and can sleep. * * @stop: Called after last netdevice attached to the hardware * is disabled. This should turn off the hardware (at least @@ -1391,7 +1391,7 @@ enum ieee80211_ampdu_mlme_action { * May be called right after add_interface if that rejects * an interface. If you added any work onto the mac80211 workqueue * you should ensure to cancel it on this callback. - * Must be implemented. + * Must be implemented and can sleep. * * @add_interface: Called when a netdevice attached to the hardware is * enabled. Because it is not called for monitor mode devices, @start @@ -1401,7 +1401,7 @@ enum ieee80211_ampdu_mlme_action { * interface is given in the conf parameter. * The callback may refuse to add an interface by returning a * negative error code (which will be seen in userspace.) - * Must be implemented. + * Must be implemented and can sleep. * * @remove_interface: Notifies a driver that an interface is going down. * The @stop callback is called after this if it is the last interface @@ -1410,19 +1410,20 @@ enum ieee80211_ampdu_mlme_action { * must be cleared so the device no longer acknowledges packets, * the mac_addr member of the conf structure is, however, set to the * MAC address of the device going away. - * Hence, this callback must be implemented. + * Hence, this callback must be implemented. It can sleep. * * @config: Handler for configuration requests. IEEE 802.11 code calls this * function to change hardware configuration, e.g., channel. * This function should never fail but returns a negative error code - * if it does. + * if it does. The callback can sleep. * * @bss_info_changed: Handler for configuration requests related to BSS * parameters that may vary during BSS's lifespan, and may affect low * level driver (e.g. assoc/disassoc status, erp parameters). * This function should not be used if no BSS has been set, unless * for association indication. The @changed parameter indicates which - * of the bss parameters has changed when a call is made. + * of the bss parameters has changed when a call is made. The callback + * can sleep. * * @prepare_multicast: Prepare for multicast filter configuration. * This callback is optional, and its return value is passed @@ -1430,20 +1431,22 @@ enum ieee80211_ampdu_mlme_action { * * @configure_filter: Configure the device's RX filter. * See the section "Frame filtering" for more information. - * This callback must be implemented. + * This callback must be implemented and can sleep. * * @set_tim: Set TIM bit. mac80211 calls this function when a TIM bit * must be set or cleared for a given STA. Must be atomic. * * @set_key: See the section "Hardware crypto acceleration" - * This callback can sleep, and is only called between add_interface - * and remove_interface calls, i.e. while the given virtual interface + * This callback is only called between add_interface and + * remove_interface calls, i.e. while the given virtual interface * is enabled. * Returns a negative error code if the key can't be added. + * The callback can sleep. * * @update_tkip_key: See the section "Hardware crypto acceleration" * This callback will be called in the context of Rx. Called for drivers * which set IEEE80211_KEY_FLAG_TKIP_REQ_RX_P1_KEY. + * The callback can sleep. * * @hw_scan: Ask the hardware to service the scan request, no need to start * the scan state machine in stack. The scan must honour the channel @@ -1457,21 +1460,28 @@ enum ieee80211_ampdu_mlme_action { * When the scan finishes, ieee80211_scan_completed() must be called; * note that it also must be called when the scan cannot finish due to * any error unless this callback returned a negative error code. + * The callback can sleep. * * @sw_scan_start: Notifier function that is called just before a software scan * is started. Can be NULL, if the driver doesn't need this notification. + * The callback can sleep. * - * @sw_scan_complete: Notifier function that is called just after a software scan - * finished. Can be NULL, if the driver doesn't need this notification. + * @sw_scan_complete: Notifier function that is called just after a + * software scan finished. Can be NULL, if the driver doesn't need + * this notification. + * The callback can sleep. * * @get_stats: Return low-level statistics. * Returns zero if statistics are available. + * The callback can sleep. * * @get_tkip_seq: If your device implements TKIP encryption in hardware this * callback should be provided to read the TKIP transmit IVs (both IV32 * and IV16) for the given key from hardware. + * The callback must be atomic. * * @set_rts_threshold: Configuration of RTS threshold (if device needs it) + * The callback can sleep. * * @sta_notify: Notifies low level driver about addition, removal or power * state transition of an associated station, AP, IBSS/WDS/mesh peer etc. @@ -1480,30 +1490,36 @@ enum ieee80211_ampdu_mlme_action { * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max), * bursting) for a hardware TX queue. * Returns a negative error code on failure. + * The callback can sleep. * * @get_tx_stats: Get statistics of the current TX queue status. This is used * to get number of currently queued packets (queue length), maximum queue * size (limit), and total number of packets sent using each TX queue * (count). The 'stats' pointer points to an array that has hw->queues * items. + * The callback must be atomic. * * @get_tsf: Get the current TSF timer value from firmware/hardware. Currently, * this is only used for IBSS mode BSSID merging and debugging. Is not a * required function. + * The callback can sleep. * * @set_tsf: Set the TSF timer to the specified value in the firmware/hardware. * Currently, this is only used for IBSS mode debugging. Is not a * required function. + * The callback can sleep. * * @reset_tsf: Reset the TSF timer and allow firmware/hardware to synchronize * with other STAs in the IBSS. This is only used in IBSS mode. This * function is optional if the firmware/hardware takes full care of * TSF synchronization. + * The callback can sleep. * * @tx_last_beacon: Determine whether the last IBSS beacon was sent by us. * This is needed only for IBSS mode and the result of this function is * used to determine whether to reply to Probe Requests. * Returns non-zero if this device sent the last beacon. + * The callback can sleep. * * @ampdu_action: Perform a certain A-MPDU action * The RA/TID combination determines the destination and TID we want @@ -1512,16 +1528,19 @@ enum ieee80211_ampdu_mlme_action { * is the first frame we expect to perform the action on. Notice * that TX/RX_STOP can pass NULL for this parameter. * Returns a negative error code on failure. + * The callback must be atomic. * * @rfkill_poll: Poll rfkill hardware state. If you need this, you also * need to set wiphy->rfkill_poll to %true before registration, * and need to call wiphy_rfkill_set_hw_state() in the callback. + * The callback can sleep. * * @testmode_cmd: Implement a cfg80211 test mode command. + * The callback can sleep. * * @flush: Flush all pending frames from the hardware queue, making sure * that the hardware queues are empty. If the parameter @drop is set - * to %true, pending frames may be dropped. + * to %true, pending frames may be dropped. The callback can sleep. */ struct ieee80211_ops { int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index bc7c8f55487a..8757ea73d544 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -14,6 +14,8 @@ static inline int drv_start(struct ieee80211_local *local) { int ret; + might_sleep(); + local->started = true; smp_mb(); ret = local->ops->start(&local->hw); @@ -23,6 +25,8 @@ static inline int drv_start(struct ieee80211_local *local) static inline void drv_stop(struct ieee80211_local *local) { + might_sleep(); + local->ops->stop(&local->hw); trace_drv_stop(local); @@ -38,7 +42,11 @@ static inline void drv_stop(struct ieee80211_local *local) static inline int drv_add_interface(struct ieee80211_local *local, struct ieee80211_vif *vif) { - int ret = local->ops->add_interface(&local->hw, vif); + int ret; + + might_sleep(); + + ret = local->ops->add_interface(&local->hw, vif); trace_drv_add_interface(local, vif_to_sdata(vif), ret); return ret; } @@ -46,13 +54,19 @@ static inline int drv_add_interface(struct ieee80211_local *local, static inline void drv_remove_interface(struct ieee80211_local *local, struct ieee80211_vif *vif) { + might_sleep(); + local->ops->remove_interface(&local->hw, vif); trace_drv_remove_interface(local, vif_to_sdata(vif)); } static inline int drv_config(struct ieee80211_local *local, u32 changed) { - int ret = local->ops->config(&local->hw, changed); + int ret; + + might_sleep(); + + ret = local->ops->config(&local->hw, changed); trace_drv_config(local, changed, ret); return ret; } @@ -62,6 +76,8 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, struct ieee80211_bss_conf *info, u32 changed) { + might_sleep(); + if (local->ops->bss_info_changed) local->ops->bss_info_changed(&local->hw, &sdata->vif, info, changed); trace_drv_bss_info_changed(local, sdata, info, changed); @@ -111,7 +127,11 @@ static inline int drv_set_key(struct ieee80211_local *local, struct ieee80211_sta *sta, struct ieee80211_key_conf *key) { - int ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key); + int ret; + + might_sleep(); + + ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key); trace_drv_set_key(local, cmd, sdata, sta, key, ret); return ret; } @@ -121,6 +141,8 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local, const u8 *address, u32 iv32, u16 *phase1key) { + might_sleep(); + if (local->ops->update_tkip_key) local->ops->update_tkip_key(&local->hw, conf, address, iv32, phase1key); @@ -130,13 +152,19 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local, static inline int drv_hw_scan(struct ieee80211_local *local, struct cfg80211_scan_request *req) { - int ret = local->ops->hw_scan(&local->hw, req); + int ret; + + might_sleep(); + + ret = local->ops->hw_scan(&local->hw, req); trace_drv_hw_scan(local, req, ret); return ret; } static inline void drv_sw_scan_start(struct ieee80211_local *local) { + might_sleep(); + if (local->ops->sw_scan_start) local->ops->sw_scan_start(&local->hw); trace_drv_sw_scan_start(local); @@ -144,6 +172,8 @@ static inline void drv_sw_scan_start(struct ieee80211_local *local) static inline void drv_sw_scan_complete(struct ieee80211_local *local) { + might_sleep(); + if (local->ops->sw_scan_complete) local->ops->sw_scan_complete(&local->hw); trace_drv_sw_scan_complete(local); @@ -154,6 +184,8 @@ static inline int drv_get_stats(struct ieee80211_local *local, { int ret = -EOPNOTSUPP; + might_sleep(); + if (local->ops->get_stats) ret = local->ops->get_stats(&local->hw, stats); trace_drv_get_stats(local, stats, ret); @@ -173,6 +205,9 @@ static inline int drv_set_rts_threshold(struct ieee80211_local *local, u32 value) { int ret = 0; + + might_sleep(); + if (local->ops->set_rts_threshold) ret = local->ops->set_rts_threshold(&local->hw, value); trace_drv_set_rts_threshold(local, value, ret); @@ -193,6 +228,9 @@ static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue, const struct ieee80211_tx_queue_params *params) { int ret = -EOPNOTSUPP; + + might_sleep(); + if (local->ops->conf_tx) ret = local->ops->conf_tx(&local->hw, queue, params); trace_drv_conf_tx(local, queue, params, ret); @@ -210,6 +248,9 @@ static inline int drv_get_tx_stats(struct ieee80211_local *local, static inline u64 drv_get_tsf(struct ieee80211_local *local) { u64 ret = -1ULL; + + might_sleep(); + if (local->ops->get_tsf) ret = local->ops->get_tsf(&local->hw); trace_drv_get_tsf(local, ret); @@ -218,6 +259,8 @@ static inline u64 drv_get_tsf(struct ieee80211_local *local) static inline void drv_set_tsf(struct ieee80211_local *local, u64 tsf) { + might_sleep(); + if (local->ops->set_tsf) local->ops->set_tsf(&local->hw, tsf); trace_drv_set_tsf(local, tsf); @@ -225,6 +268,8 @@ static inline void drv_set_tsf(struct ieee80211_local *local, u64 tsf) static inline void drv_reset_tsf(struct ieee80211_local *local) { + might_sleep(); + if (local->ops->reset_tsf) local->ops->reset_tsf(&local->hw); trace_drv_reset_tsf(local); @@ -233,6 +278,9 @@ static inline void drv_reset_tsf(struct ieee80211_local *local) static inline int drv_tx_last_beacon(struct ieee80211_local *local) { int ret = 1; + + might_sleep(); + if (local->ops->tx_last_beacon) ret = local->ops->tx_last_beacon(&local->hw); trace_drv_tx_last_beacon(local, ret); @@ -256,12 +304,16 @@ static inline int drv_ampdu_action(struct ieee80211_local *local, static inline void drv_rfkill_poll(struct ieee80211_local *local) { + might_sleep(); + if (local->ops->rfkill_poll) local->ops->rfkill_poll(&local->hw); } static inline void drv_flush(struct ieee80211_local *local, bool drop) { + might_sleep(); + trace_drv_flush(local, drop); if (local->ops->flush) local->ops->flush(&local->hw, drop); -- cgit v1.2.3 From 6f7edb4881bf51300060e89915926e070ace8c4d Mon Sep 17 00:00:00 2001 From: "Catalin(ux) M. BOIE" Date: Tue, 5 Jan 2010 05:50:24 +0100 Subject: IPVS: Allow boot time change of hash size I was very frustrated about the fact that I have to recompile the kernel to change the hash size. So, I created this patch. If IPVS is built-in you can append ip_vs.conn_tab_bits=?? to kernel command line, or, if you built IPVS as modules, you can add options ip_vs conn_tab_bits=??. To keep everything backward compatible, you still can select the size at compile time, and that will be used as default. It has been about a year since this patch was originally posted and subsequently dropped on the basis of insufficient test data. Mark Bergsma has provided the following test results which seem to strongly support the need for larger hash table sizes: We do however run into the same problem with the default setting (212 = 4096 entries), as most of our LVS balancers handle around a million connections/SLAB entries at any point in time (around 100-150 kpps load). With only 4096 hash table entries this implies that each entry consists of a linked list of 256 connections *on average*. To provide some statistics, I did an oprofile run on an 2.6.31 kernel, with both the default 4096 table size, and the same kernel recompiled with IP_VS_CONN_TAB_BITS set to 18 (218 = 262144 entries). I built a quick test setup with a part of Wikimedia/Wikipedia's live traffic mirrored by the switch to the test host. With the default setting, at ~ 120 kpps packet load we saw a typical %si CPU usage of around 30-35%, and oprofile reported a hot spot in ip_vs_conn_in_get: samples % image name app name symbol name 1719761 42.3741 ip_vs.ko ip_vs.ko ip_vs_conn_in_get 302577 7.4554 bnx2 bnx2 /bnx2 181984 4.4840 vmlinux vmlinux __ticket_spin_lock 128636 3.1695 vmlinux vmlinux ip_route_input 74345 1.8318 ip_vs.ko ip_vs.ko ip_vs_conn_out_get 68482 1.6874 vmlinux vmlinux mwait_idle After loading the recompiled kernel with 218 entries, %si CPU usage dropped in half to around 12-18%, and oprofile looks much healthier, with only 7% spent in ip_vs_conn_in_get: samples % image name app name symbol name 265641 14.4616 bnx2 bnx2 /bnx2 143251 7.7986 vmlinux vmlinux __ticket_spin_lock 140661 7.6576 ip_vs.ko ip_vs.ko ip_vs_conn_in_get 94364 5.1372 vmlinux vmlinux mwait_idle 86267 4.6964 vmlinux vmlinux ip_route_input [ horms@verge.net.au: trivial up-port and minor style fixes ] Signed-off-by: Catalin(ux) M. BOIE Cc: Mark Bergsma Signed-off-by: Simon Horman Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 16 +++++----------- net/netfilter/ipvs/Kconfig | 4 ++++ net/netfilter/ipvs/ip_vs_conn.c | 42 ++++++++++++++++++++++++++++++----------- net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++---- 4 files changed, 44 insertions(+), 26 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 8dc3296b7bea..a816c37417bb 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -26,6 +26,11 @@ #include /* for struct ipv6hdr */ #include /* for ipv6_addr_copy */ + +/* Connections' size value needed by ip_vs_ctl.c */ +extern int ip_vs_conn_tab_size; + + struct ip_vs_iphdr { int len; __u8 protocol; @@ -592,17 +597,6 @@ extern void ip_vs_init_hash_table(struct list_head *table, int rows); * (from ip_vs_conn.c) */ -/* - * IPVS connection entry hash table - */ -#ifndef CONFIG_IP_VS_TAB_BITS -#define CONFIG_IP_VS_TAB_BITS 12 -#endif - -#define IP_VS_CONN_TAB_BITS CONFIG_IP_VS_TAB_BITS -#define IP_VS_CONN_TAB_SIZE (1 << IP_VS_CONN_TAB_BITS) -#define IP_VS_CONN_TAB_MASK (IP_VS_CONN_TAB_SIZE - 1) - enum { IP_VS_DIR_INPUT = 0, IP_VS_DIR_OUTPUT, diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 79a698052218..c71e543d2549 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -68,6 +68,10 @@ config IP_VS_TAB_BITS each hash entry uses 8 bytes, so you can estimate how much memory is needed for your box. + You can overwrite this number setting conn_tab_bits module parameter + or by appending ip_vs.conn_tab_bits=? to the kernel command line + if IP VS was compiled built-in. + comment "IPVS transport protocol load balancing support" config IP_VS_PROTO_TCP diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 27c30cf933da..60bb41a8d8d4 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -40,6 +40,21 @@ #include +#ifndef CONFIG_IP_VS_TAB_BITS +#define CONFIG_IP_VS_TAB_BITS 12 +#endif + +/* + * Connection hash size. Default is what was selected at compile time. +*/ +int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; +module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444); +MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size"); + +/* size and mask values */ +int ip_vs_conn_tab_size; +int ip_vs_conn_tab_mask; + /* * Connection hash table: for input and output packets lookups of IPVS */ @@ -125,11 +140,11 @@ static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, if (af == AF_INET6) return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), (__force u32)port, proto, ip_vs_conn_rnd) - & IP_VS_CONN_TAB_MASK; + & ip_vs_conn_tab_mask; #endif return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, ip_vs_conn_rnd) - & IP_VS_CONN_TAB_MASK; + & ip_vs_conn_tab_mask; } @@ -760,7 +775,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) int idx; struct ip_vs_conn *cp; - for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) { + for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { ct_read_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { if (pos-- == 0) { @@ -797,7 +812,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) idx = l - ip_vs_conn_tab; ct_read_unlock_bh(idx); - while (++idx < IP_VS_CONN_TAB_SIZE) { + while (++idx < ip_vs_conn_tab_size) { ct_read_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { seq->private = &ip_vs_conn_tab[idx]; @@ -976,8 +991,8 @@ void ip_vs_random_dropentry(void) /* * Randomly scan 1/32 of the whole table every second */ - for (idx = 0; idx < (IP_VS_CONN_TAB_SIZE>>5); idx++) { - unsigned hash = net_random() & IP_VS_CONN_TAB_MASK; + for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { + unsigned hash = net_random() & ip_vs_conn_tab_mask; /* * Lock is actually needed in this loop. @@ -1029,7 +1044,7 @@ static void ip_vs_conn_flush(void) struct ip_vs_conn *cp; flush_again: - for (idx=0; idx Date: Mon, 4 Jan 2010 02:02:47 +0000 Subject: Phonet: zero-copy aligned GPRS RX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newer Nokia cellular modems can use aligned payload for their GPRS pipe. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pep.h | 1 + net/phonet/pep-gprs.c | 4 ++-- net/phonet/pep.c | 3 +++ 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index 4c61cdce4e5f..c37162547a93 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -77,6 +77,7 @@ static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb) enum { PNS_PIPE_DATA = 0x20, + PNS_PIPE_ALIGNED_DATA, PNS_PEP_CONNECT_REQ = 0x40, PNS_PEP_CONNECT_RESP, diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index d183509d3fa6..d01208968c83 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -96,11 +96,11 @@ static int gprs_recv(struct gprs_dev *gp, struct sk_buff *skb) goto drop; } - if (likely(skb_headroom(skb) & 3)) { + if (skb_headroom(skb) & 3) { struct sk_buff *rskb, *fs; int flen = 0; - /* Phonet Pipe data header is misaligned (3 bytes), + /* Phonet Pipe data header may be misaligned (3 bytes), * so wrap the IP packet as a single fragment of an head-less * socket buffer. The network stack will pull what it needs, * but at least, the whole IP payload is not memcpy'd. */ diff --git a/net/phonet/pep.c b/net/phonet/pep.c index b6356f3832f6..e23e30907d34 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -354,6 +354,9 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) queue = &pn->ctrlreq_queue; goto queue; + case PNS_PIPE_ALIGNED_DATA: + __skb_pull(skb, 1); + /* fall through */ case PNS_PIPE_DATA: __skb_pull(skb, 3); /* Pipe data header */ if (!pn_flow_safe(pn->rx_fc)) { -- cgit v1.2.3 From fea93ecef619b5779ca6984568517b1685079b05 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Mon, 4 Jan 2010 02:02:48 +0000 Subject: Phonet: zero-copy GPRS TX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Send aligned pipe payload if requested to do so. Then, the socket buffer needs not be fragmented anymore. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pep.h | 2 ++ net/phonet/pep.c | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index c37162547a93..35672b1cf44a 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -44,6 +44,7 @@ struct pep_sock { u8 rx_fc; /* RX flow control */ u8 tx_fc; /* TX flow control */ u8 init_enable; /* auto-enable at creation */ + u8 aligned; }; static inline struct pep_sock *pep_sk(struct sock *sk) @@ -139,6 +140,7 @@ enum { PN_PIPE_SB_NEGOTIATED_FC, PN_PIPE_SB_REQUIRED_FC_TX, PN_PIPE_SB_PREFERRED_FC_RX, + PN_PIPE_SB_ALIGNED_DATA, }; /* Phonet pipe flow control models */ diff --git a/net/phonet/pep.c b/net/phonet/pep.c index e23e30907d34..72db27e3fc06 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -444,6 +444,7 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) struct sockaddr_pn dst; u16 peer_type; u8 pipe_handle, enabled, n_sb; + u8 aligned = 0; if (!pskb_pull(skb, sizeof(*hdr) + 4)) return -EINVAL; @@ -482,6 +483,9 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) return -EINVAL; peer_type = (peer_type & 0xff00) | data[0]; break; + case PN_PIPE_SB_ALIGNED_DATA: + aligned = data[0] != 0; + break; } n_sb--; } @@ -513,6 +517,7 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) newpn->rx_credits = 0; newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; newpn->init_enable = enabled; + newpn->aligned = aligned; BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue)); skb_queue_head(&newsk->sk_receive_queue, skb); @@ -832,11 +837,15 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) return -ENOBUFS; } - skb_push(skb, 3); + skb_push(skb, 3 + pn->aligned); skb_reset_transport_header(skb); ph = pnp_hdr(skb); ph->utid = 0; - ph->message_id = PNS_PIPE_DATA; + if (pn->aligned) { + ph->message_id = PNS_PIPE_ALIGNED_DATA; + ph->data[0] = 0; /* padding */ + } else + ph->message_id = PNS_PIPE_DATA; ph->pipe_handle = pn->pipe_handle; return pn_skb_send(sk, skb, &pipe_srv); @@ -930,6 +939,9 @@ int pep_write(struct sock *sk, struct sk_buff *skb) struct sk_buff *rskb, *fs; int flen = 0; + if (pep_sk(sk)->aligned) + return pipe_skb_send(sk, skb); + rskb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC); if (!rskb) { kfree_skb(skb); -- cgit v1.2.3 From d218d11133d888f9745802146a50255a4781d37a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 11 Jan 2010 16:28:01 -0800 Subject: tcp: Generalized TTL Security Mechanism This patch adds the kernel portions needed to implement RFC 5082 Generalized TTL Security Mechanism (GTSM). It is a lightweight security measure against forged packets causing DoS attacks (for BGP). This is already implemented the same way in BSD kernels. For the necessary Quagga patch http://www.gossamer-threads.com/lists/quagga/dev/17389 Description from Cisco http://www.cisco.com/en/US/docs/ios/12_3t/12_3t7/feature/guide/gt_btsh.html It does add one byte to each socket structure, but I did a little rearrangement to reuse a hole (on 64 bit), but it does grow the structure on 32 bit This should be documented on ip(4) man page and the Glibc in.h file also needs update. IPV6_MINHOPLIMIT should also be added (although BSD doesn't support that). Only TCP is supported, but could also be added to UDP, DCCP, SCTP if desired. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/in.h | 2 ++ include/net/inet_sock.h | 4 +++- net/ipv4/ip_sockglue.c | 14 +++++++++++++- net/ipv4/tcp_ipv4.c | 3 +++ 4 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/linux/in.h b/include/linux/in.h index b615649db129..583c76f9c30f 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -84,6 +84,8 @@ struct in_addr { #define IP_ORIGDSTADDR 20 #define IP_RECVORIGDSTADDR IP_ORIGDSTADDR +#define IP_MINTTL 21 + /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ #define IP_PMTUDISC_WANT 1 /* Use per route hints */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index bd4c53f75ac0..83fd34437cf1 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -122,10 +122,12 @@ struct inet_sock { __be32 inet_saddr; __s16 uc_ttl; __u16 cmsg_flags; - struct ip_options *opt; __be16 inet_sport; __u16 inet_id; + + struct ip_options *opt; __u8 tos; + __u8 min_ttl; __u8 mc_ttl; __u8 pmtudisc; __u8 recverr:1, diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index cafad9baff03..644dc43a55de 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -451,7 +451,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<transparent = !!val; break; + case IP_MINTTL: + if (optlen < 1) + goto e_inval; + if (val < 0 || val > 255) + goto e_inval; + inet->min_ttl = val; + break; + default: err = -ENOPROTOOPT; break; @@ -1198,6 +1207,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_TRANSPARENT: val = inet->transparent; break; + case IP_MINTTL: + val = inet->min_ttl; + break; default: release_sock(sk); return -ENOPROTOOPT; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 65b8ebfd078a..382f667238ec 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1649,6 +1649,9 @@ int tcp_v4_rcv(struct sk_buff *skb) if (!sk) goto no_tcp_socket; + if (iph->ttl < inet_sk(sk)->min_ttl) + goto discard_and_relse; + process: if (sk->sk_state == TCP_TIME_WAIT) goto do_time_wait; -- cgit v1.2.3 From 81077e82c3f591578625805dd6464a27a9ff56ec Mon Sep 17 00:00:00 2001 From: Lukáš Turek <8an@praha12.net> Date: Mon, 21 Dec 2009 22:50:47 +0100 Subject: nl80211: Add new WIPHY attribute COVERAGE_CLASS The new attribute NL80211_ATTR_WIPHY_COVERAGE_CLASS sets IEEE 802.11 Coverage Class, which depends on maximum distance of nodes in a wireless network. It's required for long distance links (more than a few hundred meters). The attribute is now ignored by two non-mac80211 drivers, rndis and iwmc3200wifi, together with WIPHY_PARAM_RETRY_SHORT and WIPHY_PARAM_RETRY_LONG. If it turns out to be a problem, we could split set_wiphy_params callback or add new capability bits. Signed-off-by: Lukas Turek <8an@praha12.net> Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ include/net/cfg80211.h | 2 ++ net/wireless/core.c | 1 + net/wireless/nl80211.c | 15 +++++++++++++++ 4 files changed, 22 insertions(+) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 2bfbe88837ef..d4c556de7170 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -430,6 +430,8 @@ enum nl80211_commands { * @NL80211_ATTR_WIPHY_RTS_THRESHOLD: RTS threshold (TX frames with length * larger than or equal to this use RTS/CTS handshake); allowed range: * 0..65536, disable with (u32)-1; dot11RTSThreshold; u32 + * @NL80211_ATTR_WIPHY_COVERAGE_CLASS: Coverage Class as defined by IEEE 802.11 + * section 7.3.2.9; dot11CoverageClass; u8 * * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on * @NL80211_ATTR_IFNAME: network interface name @@ -779,6 +781,8 @@ enum nl80211_attrs { NL80211_ATTR_COOKIE, + NL80211_ATTR_WIPHY_COVERAGE_CLASS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index add79930f47d..a8d5d04314b9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -837,6 +837,7 @@ enum wiphy_params_flags { WIPHY_PARAM_RETRY_LONG = 1 << 1, WIPHY_PARAM_FRAG_THRESHOLD = 1 << 2, WIPHY_PARAM_RTS_THRESHOLD = 1 << 3, + WIPHY_PARAM_COVERAGE_CLASS = 1 << 4, }; /** @@ -1236,6 +1237,7 @@ struct wiphy { u8 retry_long; u32 frag_threshold; u32 rts_threshold; + u8 coverage_class; char fw_version[ETHTOOL_BUSINFO_LEN]; u32 hw_version; diff --git a/net/wireless/core.c b/net/wireless/core.c index c2a2c563d21a..0a545bb6ed05 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -402,6 +402,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.retry_long = 4; rdev->wiphy.frag_threshold = (u32) -1; rdev->wiphy.rts_threshold = (u32) -1; + rdev->wiphy.coverage_class = 0; return &rdev->wiphy; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e3bee3cecdfa..c09fbcd278fb 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -69,6 +69,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_WIPHY_RETRY_LONG] = { .type = NLA_U8 }, [NL80211_ATTR_WIPHY_FRAG_THRESHOLD] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_RTS_THRESHOLD] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_COVERAGE_CLASS] = { .type = NLA_U8 }, [NL80211_ATTR_IFTYPE] = { .type = NLA_U32 }, [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, @@ -444,6 +445,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, dev->wiphy.frag_threshold); NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD, dev->wiphy.rts_threshold); + NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, + dev->wiphy.coverage_class); NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, dev->wiphy.max_scan_ssids); @@ -684,6 +687,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) u32 changed; u8 retry_short = 0, retry_long = 0; u32 frag_threshold = 0, rts_threshold = 0; + u8 coverage_class = 0; rtnl_lock(); @@ -806,9 +810,16 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) changed |= WIPHY_PARAM_RTS_THRESHOLD; } + if (info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]) { + coverage_class = nla_get_u8( + info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]); + changed |= WIPHY_PARAM_COVERAGE_CLASS; + } + if (changed) { u8 old_retry_short, old_retry_long; u32 old_frag_threshold, old_rts_threshold; + u8 old_coverage_class; if (!rdev->ops->set_wiphy_params) { result = -EOPNOTSUPP; @@ -819,6 +830,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) old_retry_long = rdev->wiphy.retry_long; old_frag_threshold = rdev->wiphy.frag_threshold; old_rts_threshold = rdev->wiphy.rts_threshold; + old_coverage_class = rdev->wiphy.coverage_class; if (changed & WIPHY_PARAM_RETRY_SHORT) rdev->wiphy.retry_short = retry_short; @@ -828,6 +840,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev->wiphy.frag_threshold = frag_threshold; if (changed & WIPHY_PARAM_RTS_THRESHOLD) rdev->wiphy.rts_threshold = rts_threshold; + if (changed & WIPHY_PARAM_COVERAGE_CLASS) + rdev->wiphy.coverage_class = coverage_class; result = rdev->ops->set_wiphy_params(&rdev->wiphy, changed); if (result) { @@ -835,6 +849,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev->wiphy.retry_long = old_retry_long; rdev->wiphy.frag_threshold = old_frag_threshold; rdev->wiphy.rts_threshold = old_rts_threshold; + rdev->wiphy.coverage_class = old_coverage_class; } } -- cgit v1.2.3 From 310bc676e314e92c18257bfc916951879451ee32 Mon Sep 17 00:00:00 2001 From: Lukáš Turek <8an@praha12.net> Date: Mon, 21 Dec 2009 22:50:48 +0100 Subject: mac80211: Add new callback set_coverage_class Mac80211 callback to driver set_coverage_class() sets slot time and ACK timeout for given IEEE 802.11 coverage class. The callback is optional, but it's essential for long distance links. Signed-off-by: Lukas Turek <8an@praha12.net> Signed-off-by: John W. Linville --- include/net/mac80211.h | 5 +++++ net/mac80211/cfg.c | 7 +++++++ net/mac80211/driver-ops.h | 15 +++++++++++++++ net/mac80211/driver-trace.h | 23 +++++++++++++++++++++++ 4 files changed, 50 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f073a2a50574..ad4b70034e77 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1533,6 +1533,10 @@ enum ieee80211_ampdu_mlme_action { * and need to call wiphy_rfkill_set_hw_state() in the callback. * The callback can sleep. * + * @set_coverage_class: Set slot time for given coverage class as specified + * in IEEE 802.11-2007 section 17.3.8.6 and modify ACK timeout + * accordingly. This callback is not required and may sleep. + * * @testmode_cmd: Implement a cfg80211 test mode command. * The callback can sleep. * @@ -1592,6 +1596,7 @@ struct ieee80211_ops { struct ieee80211_sta *sta, u16 tid, u16 *ssn); void (*rfkill_poll)(struct ieee80211_hw *hw); + void (*set_coverage_class)(struct ieee80211_hw *hw, u8 coverage_class); #ifdef CONFIG_NL80211_TESTMODE int (*testmode_cmd)(struct ieee80211_hw *hw, void *data, int len); #endif diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 2e5e841e9b7b..976014c5e742 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1230,6 +1230,13 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) struct ieee80211_local *local = wiphy_priv(wiphy); int err; + if (changed & WIPHY_PARAM_COVERAGE_CLASS) { + err = drv_set_coverage_class(local, wiphy->coverage_class); + + if (err) + return err; + } + if (changed & WIPHY_PARAM_RTS_THRESHOLD) { err = drv_set_rts_threshold(local, wiphy->rts_threshold); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 8757ea73d544..de91d39e0276 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -214,6 +214,21 @@ static inline int drv_set_rts_threshold(struct ieee80211_local *local, return ret; } +static inline int drv_set_coverage_class(struct ieee80211_local *local, + u8 value) +{ + int ret = 0; + might_sleep(); + + if (local->ops->set_coverage_class) + local->ops->set_coverage_class(&local->hw, value); + else + ret = -EOPNOTSUPP; + + trace_drv_set_coverage_class(local, value, ret); + return ret; +} + static inline void drv_sta_notify(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, enum sta_notify_cmd cmd, diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 977cc7528bc6..0ea258123b8e 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -491,6 +491,29 @@ TRACE_EVENT(drv_set_rts_threshold, ) ); +TRACE_EVENT(drv_set_coverage_class, + TP_PROTO(struct ieee80211_local *local, u8 value, int ret), + + TP_ARGS(local, value, ret), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u8, value) + __field(int, ret) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->ret = ret; + __entry->value = value; + ), + + TP_printk( + LOCAL_PR_FMT " value:%d ret:%d", + LOCAL_PR_ARG, __entry->value, __entry->ret + ) +); + TRACE_EVENT(drv_sta_notify, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, -- cgit v1.2.3 From e00cfce0cb2a397859607bf515c6de9ce064b64a Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Tue, 29 Dec 2009 12:59:19 +0200 Subject: mac80211: Select lowest rate based on basic rate set in AP mode If the basic rate set is configured to not include the lowest rate (e.g., basic rate set = 6, 12, 24 Mbps in IEEE 802.11g mode), the AP should not send out broadcast frames at 1 Mbps. This type of configuration can be used to optimize channel usage in cases where there is no need for backwards compatibility with IEEE 802.11b-only devices. In AP mode, mac80211 was unconditionally using the lowest rate for Beacon frames and similarly, with all rate control algorithms that use rate_control_send_low(), the lowest rate ended up being used for all broadcast frames (and all unicast frames that are sent before association). Change this to take into account the basic rate configuration in AP mode, i.e., use the lowest rate in the basic rate set instead of the lowest supported rate when selecting the rate. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 ++ net/mac80211/rate.c | 25 +++++++++++++++++++++++++ net/mac80211/tx.c | 24 +++++++++++++----------- 3 files changed, 40 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ad4b70034e77..39c516c352be 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2299,6 +2299,7 @@ enum rate_control_changed { * @max_rate_idx: user-requested maximum rate (not MCS for now) * @skb: the skb that will be transmitted, the control information in it needs * to be filled in + * @ap: whether this frame is sent out in AP mode */ struct ieee80211_tx_rate_control { struct ieee80211_hw *hw; @@ -2308,6 +2309,7 @@ struct ieee80211_tx_rate_control { struct ieee80211_tx_rate reported_rate; bool rts, short_preamble; u8 max_rate_idx; + bool ap; }; struct rate_control_ops { diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index b9007f80cb92..6349e7f4dcae 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -207,6 +207,27 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc) return ((info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc)); } +static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx) +{ + u8 i; + + if (basic_rates == 0) + return; /* assume basic rates unknown and accept rate */ + if (*idx < 0) + return; + if (basic_rates & (1 << *idx)) + return; /* selected rate is a basic rate */ + + for (i = *idx + 1; i <= max_rate_idx; i++) { + if (basic_rates & (1 << i)) { + *idx = i; + return; + } + } + + /* could not find a basic rate; use original selection */ +} + bool rate_control_send_low(struct ieee80211_sta *sta, void *priv_sta, struct ieee80211_tx_rate_control *txrc) @@ -218,6 +239,10 @@ bool rate_control_send_low(struct ieee80211_sta *sta, info->control.rates[0].count = (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 1 : txrc->hw->max_rate_tries; + if (!sta && txrc->ap) + rc_send_low_broadcast(&info->control.rates[0].idx, + txrc->bss_conf->basic_rates, + txrc->sband->n_bitrates); return true; } return false; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 140da4a7f13d..4961168f5091 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -520,6 +520,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.skb = tx->skb; txrc.reported_rate.idx = -1; txrc.max_rate_idx = tx->sdata->max_ratectrl_rateidx; + txrc.ap = tx->sdata->vif.type == NL80211_IFTYPE_AP; /* set up RTS protection if desired */ if (len > tx->local->hw.wiphy->rts_threshold) { @@ -2060,6 +2061,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, struct beacon_data *beacon; struct ieee80211_supported_band *sband; enum ieee80211_band band = local->hw.conf.channel->band; + struct ieee80211_tx_rate_control txrc; sband = local->hw.wiphy->bands[band]; @@ -2167,21 +2169,21 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, info = IEEE80211_SKB_CB(skb); info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + info->flags |= IEEE80211_TX_CTL_NO_ACK; info->band = band; - /* - * XXX: For now, always use the lowest rate - */ - info->control.rates[0].idx = 0; - info->control.rates[0].count = 1; - info->control.rates[1].idx = -1; - info->control.rates[2].idx = -1; - info->control.rates[3].idx = -1; - info->control.rates[4].idx = -1; - BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 5); + + memset(&txrc, 0, sizeof(txrc)); + txrc.hw = hw; + txrc.sband = sband; + txrc.bss_conf = &sdata->vif.bss_conf; + txrc.skb = skb; + txrc.reported_rate.idx = -1; + txrc.max_rate_idx = sdata->max_ratectrl_rateidx; + txrc.ap = true; + rate_control_get_rate(sdata, NULL, &txrc); info->control.vif = vif; - info->flags |= IEEE80211_TX_CTL_NO_ACK; info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; out: -- cgit v1.2.3 From 37eb0b164cf9fa9f70c8500926f5cde7c652f48e Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 6 Jan 2010 13:09:08 +0200 Subject: cfg80211/mac80211: Use more generic bitrate mask for rate control Extend struct cfg80211_bitrate_mask to actually use a bitfield mask instead of just a single fixed or maximum rate index. This change itself does not modify the behavior (except for debugfs files), but it prepares cfg80211 and mac80211 for a new nl80211 command for setting which rates can be used in TX rate control. Since frames are now going through the rate control algorithm unconditionally, the internal IEEE80211_TX_INTFL_RCALGO flag can now be removed. The RC implementations can use the rate_idx_mask value to optimize their behavior if only a single rate is enabled. The old max_rate_idx in struct ieee80211_tx_rate_control is maintained (but commented as deprecated) for backwards compatibility with existing RC implementations. Once these implementations have been updated to use the more generic rate_idx_mask, the max_rate_idx value can be removed. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/net/cfg80211.h | 13 ++------- include/net/mac80211.h | 8 +++--- net/mac80211/cfg.c | 32 +++------------------- net/mac80211/debugfs_netdev.c | 22 ++++++++------- net/mac80211/ieee80211_i.h | 4 +-- net/mac80211/iface.c | 8 ++++-- net/mac80211/rate.c | 63 +++++++++++++++++++++++++++++++++++-------- net/mac80211/rate.h | 5 +--- net/mac80211/tx.c | 12 +++++++-- net/wireless/wext-compat.c | 34 ++++++++++++++++++++--- 10 files changed, 122 insertions(+), 79 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a8d5d04314b9..22e062afb5a1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -857,20 +857,11 @@ enum tx_power_setting { * cfg80211_bitrate_mask - masks for bitrate control */ struct cfg80211_bitrate_mask { -/* - * As discussed in Berlin, this struct really - * should look like this: - struct { u32 legacy; - u8 mcs[IEEE80211_HT_MCS_MASK_LEN]; + /* TODO: add support for masking MCS rates; e.g.: */ + /* u8 mcs[IEEE80211_HT_MCS_MASK_LEN]; */ } control[IEEE80211_NUM_BANDS]; - - * Since we can always fix in-kernel users, let's keep - * it simpler for now: - */ - u32 fixed; /* fixed bitrate, 0 == not fixed */ - u32 maxrate; /* in kbps, 0 == no limit */ }; /** * struct cfg80211_pmksa - PMK Security Association diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 39c516c352be..7e5af6d90b93 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -255,9 +255,6 @@ struct ieee80211_bss_conf { * @IEEE80211_TX_CTL_RATE_CTRL_PROBE: internal to mac80211, can be * set by rate control algorithms to indicate probe rate, will * be cleared for fragmented frames (except on the last fragment) - * @IEEE80211_TX_INTFL_RCALGO: mac80211 internal flag, do not test or - * set this flag in the driver; indicates that the rate control - * algorithm was used and should be notified of TX status * @IEEE80211_TX_INTFL_NEED_TXPROCESSING: completely internal to mac80211, * used to indicate that a pending frame requires TX processing before * it can be sent out. @@ -287,7 +284,6 @@ enum mac80211_tx_control_flags { IEEE80211_TX_STAT_AMPDU = BIT(10), IEEE80211_TX_STAT_AMPDU_NO_BACK = BIT(11), IEEE80211_TX_CTL_RATE_CTRL_PROBE = BIT(12), - IEEE80211_TX_INTFL_RCALGO = BIT(13), IEEE80211_TX_INTFL_NEED_TXPROCESSING = BIT(14), IEEE80211_TX_INTFL_RETRIED = BIT(15), IEEE80211_TX_INTFL_DONT_ENCRYPT = BIT(16), @@ -2297,6 +2293,9 @@ enum rate_control_changed { * @short_preamble: whether mac80211 will request short-preamble transmission * if the selected rate supports it * @max_rate_idx: user-requested maximum rate (not MCS for now) + * (deprecated; this will be removed once drivers get updated to use + * rate_idx_mask) + * @rate_idx_mask: user-requested rate mask (not MCS for now) * @skb: the skb that will be transmitted, the control information in it needs * to be filled in * @ap: whether this frame is sent out in AP mode @@ -2309,6 +2308,7 @@ struct ieee80211_tx_rate_control { struct ieee80211_tx_rate reported_rate; bool rts, short_preamble; u8 max_rate_idx; + u32 rate_idx_mask; bool ap; }; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 976014c5e742..e5dda6fb8dff 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1406,8 +1406,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); int i; - u32 target_rate; - struct ieee80211_supported_band *sband; /* * This _could_ be supported by providing a hook for @@ -1417,35 +1415,11 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) return -EOPNOTSUPP; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - - /* - * target_rate = -1, rate->fixed = 0 means auto only, so use all rates - * target_rate = X, rate->fixed = 1 means only rate X - * target_rate = X, rate->fixed = 0 means all rates <= X - */ - sdata->max_ratectrl_rateidx = -1; - sdata->force_unicast_rateidx = -1; - if (mask->fixed) - target_rate = mask->fixed / 100; - else if (mask->maxrate) - target_rate = mask->maxrate / 100; - else - return 0; + for (i = 0; i < IEEE80211_NUM_BANDS; i++) + sdata->rc_rateidx_mask[i] = mask->control[i].legacy; - for (i = 0; i< sband->n_bitrates; i++) { - if (target_rate != sband->bitrates[i].bitrate) - continue; - - /* requested bitrate found */ - sdata->max_ratectrl_rateidx = i; - if (mask->fixed) - sdata->force_unicast_rateidx = i; - return 0; - } - - return -EINVAL; + return 0; } static int ieee80211_remain_on_channel(struct wiphy *wiphy, diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 59f6e3bcbd09..1481049f0f71 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -127,8 +127,10 @@ __IEEE80211_IF_FILE(name, ieee80211_if_write_##name) /* common attributes */ IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC); -IEEE80211_IF_FILE(force_unicast_rateidx, force_unicast_rateidx, DEC); -IEEE80211_IF_FILE(max_ratectrl_rateidx, max_ratectrl_rateidx, DEC); +IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[IEEE80211_BAND_2GHZ], + HEX); +IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ], + HEX); /* STA attributes */ IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC); @@ -264,8 +266,8 @@ IEEE80211_IF_FILE(dot11MeshHWMPRootMode, static void add_sta_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted, sta); - DEBUGFS_ADD(force_unicast_rateidx, sta); - DEBUGFS_ADD(max_ratectrl_rateidx, sta); + DEBUGFS_ADD(rc_rateidx_mask_2ghz, sta); + DEBUGFS_ADD(rc_rateidx_mask_5ghz, sta); DEBUGFS_ADD(bssid, sta); DEBUGFS_ADD(aid, sta); @@ -275,8 +277,8 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) static void add_ap_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted, ap); - DEBUGFS_ADD(force_unicast_rateidx, ap); - DEBUGFS_ADD(max_ratectrl_rateidx, ap); + DEBUGFS_ADD(rc_rateidx_mask_2ghz, ap); + DEBUGFS_ADD(rc_rateidx_mask_5ghz, ap); DEBUGFS_ADD(num_sta_ps, ap); DEBUGFS_ADD(dtim_count, ap); @@ -286,8 +288,8 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata) static void add_wds_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted, wds); - DEBUGFS_ADD(force_unicast_rateidx, wds); - DEBUGFS_ADD(max_ratectrl_rateidx, wds); + DEBUGFS_ADD(rc_rateidx_mask_2ghz, wds); + DEBUGFS_ADD(rc_rateidx_mask_5ghz, wds); DEBUGFS_ADD(peer, wds); } @@ -295,8 +297,8 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata) static void add_vlan_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted, vlan); - DEBUGFS_ADD(force_unicast_rateidx, vlan); - DEBUGFS_ADD(max_ratectrl_rateidx, vlan); + DEBUGFS_ADD(rc_rateidx_mask_2ghz, vlan); + DEBUGFS_ADD(rc_rateidx_mask_5ghz, vlan); } static void add_monitor_files(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a27921ee6e63..3e4ac3f30857 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -494,8 +494,8 @@ struct ieee80211_sub_if_data { */ struct ieee80211_if_ap *bss; - int force_unicast_rateidx; /* forced TX rateidx for unicast frames */ - int max_ratectrl_rateidx; /* max TX rateidx for rate control */ + /* bitmap of allowed (non-MCS) rate indexes for rate control */ + u32 rc_rateidx_mask[IEEE80211_NUM_BANDS]; union { struct ieee80211_if_ap ap; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 264a6c975f8b..fe140bf033f9 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -856,8 +856,12 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, INIT_LIST_HEAD(&sdata->key_list); - sdata->force_unicast_rateidx = -1; - sdata->max_ratectrl_rateidx = -1; + for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + struct ieee80211_supported_band *sband; + sband = local->hw.wiphy->bands[i]; + sdata->rc_rateidx_mask[i] = + sband ? (1 << sband->n_bitrates) - 1 : 0; + } /* setup type-dependent data */ ieee80211_setup_sdata(sdata, type); diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 6349e7f4dcae..c74b7c85403c 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -249,6 +249,38 @@ bool rate_control_send_low(struct ieee80211_sta *sta, } EXPORT_SYMBOL(rate_control_send_low); +static void rate_idx_match_mask(struct ieee80211_tx_rate *rate, + int n_bitrates, u32 mask) +{ + int j; + + /* See whether the selected rate or anything below it is allowed. */ + for (j = rate->idx; j >= 0; j--) { + if (mask & (1 << j)) { + /* Okay, found a suitable rate. Use it. */ + rate->idx = j; + return; + } + } + + /* Try to find a higher rate that would be allowed */ + for (j = rate->idx + 1; j < n_bitrates; j++) { + if (mask & (1 << j)) { + /* Okay, found a suitable rate. Use it. */ + rate->idx = j; + return; + } + } + + /* + * Uh.. No suitable rate exists. This should not really happen with + * sane TX rate mask configurations. However, should someone manage to + * configure supported rates and TX rate mask in incompatible way, + * allow the frame to be transmitted with whatever the rate control + * selected. + */ +} + void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_tx_rate_control *txrc) @@ -258,6 +290,7 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *ista = NULL; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); int i; + u32 mask; if (sta) { ista = &sta->sta; @@ -270,23 +303,31 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, info->control.rates[i].count = 1; } - if (sta && sdata->force_unicast_rateidx > -1) { - info->control.rates[0].idx = sdata->force_unicast_rateidx; - } else { - ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); - info->flags |= IEEE80211_TX_INTFL_RCALGO; - } + ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); /* - * try to enforce the maximum rate the user wanted + * Try to enforce the rateidx mask the user wanted. skip this if the + * default mask (allow all rates) is used to save some processing for + * the common case. */ - if (sdata->max_ratectrl_rateidx > -1) + mask = sdata->rc_rateidx_mask[info->band]; + if (mask != (1 << txrc->sband->n_bitrates) - 1) { + if (sta) { + /* Filter out rates that the STA does not support */ + mask &= sta->sta.supp_rates[info->band]; + } + /* + * Make sure the rate index selected for each TX rate is + * included in the configured mask and change the rate indexes + * if needed. + */ for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { + /* Rate masking supports only legacy rates for now */ if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS) continue; - info->control.rates[i].idx = - min_t(s8, info->control.rates[i].idx, - sdata->max_ratectrl_rateidx); + rate_idx_match_mask(&info->control.rates[i], + txrc->sband->n_bitrates, mask); + } } BUG_ON(info->control.rates[0].idx < 0); diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index cb9bd1f65e27..669dddd40521 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -44,10 +44,7 @@ static inline void rate_control_tx_status(struct ieee80211_local *local, struct rate_control_ref *ref = local->rate_ctrl; struct ieee80211_sta *ista = &sta->sta; void *priv_sta = sta->rate_ctrl_priv; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - - if (likely(info->flags & IEEE80211_TX_INTFL_RCALGO)) - ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb); + ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 4961168f5091..d3a44812f8bf 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -519,7 +519,11 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.bss_conf = &tx->sdata->vif.bss_conf; txrc.skb = tx->skb; txrc.reported_rate.idx = -1; - txrc.max_rate_idx = tx->sdata->max_ratectrl_rateidx; + txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[tx->channel->band]; + if (txrc.rate_idx_mask == (1 << sband->n_bitrates) - 1) + txrc.max_rate_idx = -1; + else + txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; txrc.ap = tx->sdata->vif.type == NL80211_IFTYPE_AP; /* set up RTS protection if desired */ @@ -2178,7 +2182,11 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, txrc.bss_conf = &sdata->vif.bss_conf; txrc.skb = skb; txrc.reported_rate.idx = -1; - txrc.max_rate_idx = sdata->max_ratectrl_rateidx; + txrc.rate_idx_mask = sdata->rc_rateidx_mask[band]; + if (txrc.rate_idx_mask == (1 << sband->n_bitrates) - 1) + txrc.max_rate_idx = -1; + else + txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; txrc.ap = true; rate_control_get_rate(sdata, NULL, &txrc); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 4198243a3dff..966d2f01beac 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1204,21 +1204,47 @@ int cfg80211_wext_siwrate(struct net_device *dev, struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct cfg80211_bitrate_mask mask; + u32 fixed, maxrate; + struct ieee80211_supported_band *sband; + int band, ridx; + bool match = false; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; - mask.fixed = 0; - mask.maxrate = 0; + memset(&mask, 0, sizeof(mask)); + fixed = 0; + maxrate = 0; if (rate->value < 0) { /* nothing */ } else if (rate->fixed) { - mask.fixed = rate->value / 1000; /* kbps */ + fixed = rate->value / 100000; } else { - mask.maxrate = rate->value / 1000; /* kbps */ + maxrate = rate->value / 100000; + } + + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + sband = wdev->wiphy->bands[band]; + if (sband == NULL) + continue; + for (ridx = 0; ridx < sband->n_bitrates; ridx++) { + struct ieee80211_rate *srate = &sband->bitrates[ridx]; + if (fixed == srate->bitrate) { + mask.control[band].legacy = 1 << ridx; + match = true; + break; + } + if (srate->bitrate <= maxrate) { + mask.control[band].legacy |= 1 << ridx; + match = true; + } + } } + if (!match) + return -EINVAL; + return rdev->ops->set_bitrate_mask(wdev->wiphy, dev, NULL, &mask); } EXPORT_SYMBOL_GPL(cfg80211_wext_siwrate); -- cgit v1.2.3 From 13ae75b103e07304a34ab40c9136e9f53e06475c Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Tue, 29 Dec 2009 12:59:45 +0200 Subject: nl80211: New command for setting TX rate mask for rate control Add a new NL80211_CMD_SET_TX_BITRATE_MASK command and related attributes to provide support for setting TX rate mask for rate control. This uses the existing cfg80211 set_bitrate_mask operation that was previously used only with WEXT compat code (SIOCSIWRATE). The nl80211 command allows more generic configuration of allowed rates as a mask instead of fixed/max rate. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 44 +++++++++++++++++++ include/net/cfg80211.h | 4 +- net/wireless/nl80211.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index d4c556de7170..7a1c8c145b22 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -295,6 +295,10 @@ * This command is also used as an event to notify when a requested * remain-on-channel duration has expired. * + * @NL80211_CMD_SET_TX_BITRATE_MASK: Set the mask of rates to be used in TX + * rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface + * and @NL80211_ATTR_TX_RATES the set of allowed rates. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -381,6 +385,8 @@ enum nl80211_commands { NL80211_CMD_REMAIN_ON_CHANNEL, NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, + NL80211_CMD_SET_TX_BITRATE_MASK, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -640,6 +646,13 @@ enum nl80211_commands { * * @NL80211_ATTR_COOKIE: Generic 64-bit cookie to identify objects. * + * @NL80211_ATTR_TX_RATES: Nested set of attributes + * (enum nl80211_tx_rate_attributes) describing TX rates per band. The + * enum nl80211_band value is used as the index (nla_type() of the nested + * data. If a band is not included, it will be configured to allow all + * rates based on negotiated supported rates information. This attribute + * is used with %NL80211_CMD_SET_TX_BITRATE_MASK. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -783,6 +796,8 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_COVERAGE_CLASS, + NL80211_ATTR_TX_RATES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1482,4 +1497,33 @@ enum nl80211_key_attributes { NL80211_KEY_MAX = __NL80211_KEY_AFTER_LAST - 1 }; +/** + * enum nl80211_tx_rate_attributes - TX rate set attributes + * @__NL80211_TXRATE_INVALID: invalid + * @NL80211_TXRATE_LEGACY: Legacy (non-MCS) rates allowed for TX rate selection + * in an array of rates as defined in IEEE 802.11 7.3.2.2 (u8 values with + * 1 = 500 kbps) but without the IE length restriction (at most + * %NL80211_MAX_SUPP_RATES in a single array). + * @__NL80211_TXRATE_AFTER_LAST: internal + * @NL80211_TXRATE_MAX: highest TX rate attribute + */ +enum nl80211_tx_rate_attributes { + __NL80211_TXRATE_INVALID, + NL80211_TXRATE_LEGACY, + + /* keep last */ + __NL80211_TXRATE_AFTER_LAST, + NL80211_TXRATE_MAX = __NL80211_TXRATE_AFTER_LAST - 1 +}; + +/** + * enum nl80211_band - Frequency band + * @NL80211_BAND_2GHZ - 2.4 GHz ISM band + * @NL80211_BAND_5GHZ - around 5 GHz band (4.9 - 5.7 GHz) + */ +enum nl80211_band { + NL80211_BAND_2GHZ, + NL80211_BAND_5GHZ, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 22e062afb5a1..0d734413b5fb 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -39,8 +39,8 @@ * @IEEE80211_BAND_5GHZ: around 5GHz band (4.9-5.7) */ enum ieee80211_band { - IEEE80211_BAND_2GHZ, - IEEE80211_BAND_5GHZ, + IEEE80211_BAND_2GHZ = NL80211_BAND_2GHZ, + IEEE80211_BAND_5GHZ = NL80211_BAND_5GHZ, /* keep last */ IEEE80211_NUM_BANDS diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c09fbcd278fb..b804062e0179 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -144,6 +144,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { .len = WLAN_PMKID_LEN }, [NL80211_ATTR_DURATION] = { .type = NLA_U32 }, [NL80211_ATTR_COOKIE] = { .type = NLA_U64 }, + [NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED }, }; /* policy for the attributes */ @@ -575,6 +576,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(del_pmksa, DEL_PMKSA); CMD(flush_pmksa, FLUSH_PMKSA); CMD(remain_on_channel, REMAIN_ON_CHANNEL); + CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); @@ -4438,6 +4440,109 @@ static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, return err; } +static u32 rateset_to_mask(struct ieee80211_supported_band *sband, + u8 *rates, u8 rates_len) +{ + u8 i; + u32 mask = 0; + + for (i = 0; i < rates_len; i++) { + int rate = (rates[i] & 0x7f) * 5; + int ridx; + for (ridx = 0; ridx < sband->n_bitrates; ridx++) { + struct ieee80211_rate *srate = + &sband->bitrates[ridx]; + if (rate == srate->bitrate) { + mask |= 1 << ridx; + break; + } + } + if (ridx == sband->n_bitrates) + return 0; /* rate not found */ + } + + return mask; +} + +static struct nla_policy +nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] __read_mostly = { + [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_RATES }, +}; + +static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, + struct genl_info *info) +{ + struct nlattr *tb[NL80211_TXRATE_MAX + 1]; + struct cfg80211_registered_device *rdev; + struct cfg80211_bitrate_mask mask; + int err, rem, i; + struct net_device *dev; + struct nlattr *tx_rates; + struct ieee80211_supported_band *sband; + + if (info->attrs[NL80211_ATTR_TX_RATES] == NULL) + return -EINVAL; + + rtnl_lock(); + + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); + if (err) + goto unlock_rtnl; + + if (!rdev->ops->set_bitrate_mask) { + err = -EOPNOTSUPP; + goto unlock; + } + + memset(&mask, 0, sizeof(mask)); + /* Default to all rates enabled */ + for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + sband = rdev->wiphy.bands[i]; + mask.control[i].legacy = + sband ? (1 << sband->n_bitrates) - 1 : 0; + } + + /* + * The nested attribute uses enum nl80211_band as the index. This maps + * directly to the enum ieee80211_band values used in cfg80211. + */ + nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) + { + enum ieee80211_band band = nla_type(tx_rates); + if (band < 0 || band >= IEEE80211_NUM_BANDS) { + err = -EINVAL; + goto unlock; + } + sband = rdev->wiphy.bands[band]; + if (sband == NULL) { + err = -EINVAL; + goto unlock; + } + nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates), + nla_len(tx_rates), nl80211_txattr_policy); + if (tb[NL80211_TXRATE_LEGACY]) { + mask.control[band].legacy = rateset_to_mask( + sband, + nla_data(tb[NL80211_TXRATE_LEGACY]), + nla_len(tb[NL80211_TXRATE_LEGACY])); + if (mask.control[band].legacy == 0) { + err = -EINVAL; + goto unlock; + } + } + } + + err = rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, NULL, &mask); + + unlock: + dev_put(dev); + cfg80211_unlock_rdev(rdev); + unlock_rtnl: + rtnl_unlock(); + return err; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -4712,6 +4817,12 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_SET_TX_BITRATE_MASK, + .doit = nl80211_set_tx_bitrate_mask, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { -- cgit v1.2.3 From 7044cc565b45a898c140fb185174a66f2d68a163 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 5 Jan 2010 20:16:19 +0200 Subject: mac80211: add functions to create PS Poll and Nullfunc templates Some hardware, for example wl1251 and wl1271, handle the transmission of power save related frames in hardware, but the driver is responsible for creating the templates. It's better to create the templates in mac80211, that way all drivers can benefit from this. Add two new functions, ieee80211_pspoll_get() and ieee80211_nullfunc_get() which drivers need to call to get the frame. Drivers are also responsible for updating the templates after each association. Also new struct ieee80211_hdr_3addr is added to ieee80211.h to make it easy to calculate length of the Nullfunc frame. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 9 ++++++ include/net/mac80211.h | 30 ++++++++++++++++++ net/mac80211/tx.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+) (limited to 'include/net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index aeea282bd2fe..602c0692c3fc 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -130,6 +130,15 @@ struct ieee80211_hdr { u8 addr4[6]; } __attribute__ ((packed)); +struct ieee80211_hdr_3addr { + __le16 frame_control; + __le16 duration_id; + u8 addr1[6]; + u8 addr2[6]; + u8 addr3[6]; + __le16 seq_ctrl; +} __attribute__ ((packed)); + /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set * @fc: frame control bytes in little-endian byteorder diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7e5af6d90b93..75f46e26ad60 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1874,6 +1874,36 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, return ieee80211_beacon_get_tim(hw, vif, NULL, NULL); } +/** + * ieee80211_pspoll_get - retrieve a PS Poll template + * @hw: pointer obtained from ieee80211_alloc_hw(). + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * + * Creates a PS Poll a template which can, for example, uploaded to + * hardware. The template must be updated after association so that correct + * AID, BSSID and MAC address is used. + * + * Note: Caller (or hardware) is responsible for setting the + * &IEEE80211_FCTL_PM bit. + */ +struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); + +/** + * ieee80211_nullfunc_get - retrieve a nullfunc template + * @hw: pointer obtained from ieee80211_alloc_hw(). + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * + * Creates a Nullfunc template which can, for example, uploaded to + * hardware. The template must be updated after association so that correct + * BSSID and address is used. + * + * Note: Caller (or hardware) is responsible for setting the + * &IEEE80211_FCTL_PM bit as well as Duration and Sequence Control fields. + */ +struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); + /** * ieee80211_rts_get - RTS frame generation function * @hw: pointer obtained from ieee80211_alloc_hw(). diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d3a44812f8bf..055b45b146d9 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2200,6 +2200,84 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_beacon_get_tim); +struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_if_managed *ifmgd; + struct ieee80211_pspoll *pspoll; + struct ieee80211_local *local; + struct sk_buff *skb; + + if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) + return NULL; + + sdata = vif_to_sdata(vif); + ifmgd = &sdata->u.mgd; + local = sdata->local; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*pspoll)); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for " + "pspoll template\n", sdata->name); + return NULL; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + + pspoll = (struct ieee80211_pspoll *) skb_put(skb, sizeof(*pspoll)); + memset(pspoll, 0, sizeof(*pspoll)); + pspoll->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL | + IEEE80211_STYPE_PSPOLL); + pspoll->aid = cpu_to_le16(ifmgd->aid); + + /* aid in PS-Poll has its two MSBs each set to 1 */ + pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14); + + memcpy(pspoll->bssid, ifmgd->bssid, ETH_ALEN); + memcpy(pspoll->ta, vif->addr, ETH_ALEN); + + return skb; +} +EXPORT_SYMBOL(ieee80211_pspoll_get); + +struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct ieee80211_hdr_3addr *nullfunc; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_if_managed *ifmgd; + struct ieee80211_local *local; + struct sk_buff *skb; + + if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) + return NULL; + + sdata = vif_to_sdata(vif); + ifmgd = &sdata->u.mgd; + local = sdata->local; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc)); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc " + "template\n", sdata->name); + return NULL; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + + nullfunc = (struct ieee80211_hdr_3addr *) skb_put(skb, + sizeof(*nullfunc)); + memset(nullfunc, 0, sizeof(*nullfunc)); + nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | + IEEE80211_STYPE_NULLFUNC | + IEEE80211_FCTL_TODS); + memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN); + memcpy(nullfunc->addr2, vif->addr, ETH_ALEN); + memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN); + + return skb; +} +EXPORT_SYMBOL(ieee80211_nullfunc_get); + void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const void *frame, size_t frame_len, const struct ieee80211_tx_info *frame_txctl, -- cgit v1.2.3 From 05e54ea6cce400ac34528d705179b45244f61074 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 5 Jan 2010 20:16:38 +0200 Subject: mac80211: create Probe Request template Certain type of hardware, for example wl1251 and wl1271, need a template for the Probe Request. Create a function ieee80211_probereq_get() which creates the template and drivers send it to hardware. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- include/net/mac80211.h | 17 +++++++++++++++++ net/mac80211/tx.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 75f46e26ad60..e1e73c6abeff 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1904,6 +1904,23 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif); +/** + * ieee80211_probereq_get - retrieve a Probe Request template + * @hw: pointer obtained from ieee80211_alloc_hw(). + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @ssid: SSID buffer + * @ssid_len: length of SSID + * @ie: buffer containing all IEs except SSID for the template + * @ie_len: length of the IE buffer + * + * Creates a Probe Request template which can, for example, be uploaded to + * hardware. + */ +struct sk_buff *ieee80211_probereq_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + const u8 *ssid, size_t ssid_len, + const u8 *ie, size_t ie_len); + /** * ieee80211_rts_get - RTS frame generation function * @hw: pointer obtained from ieee80211_alloc_hw(). diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 055b45b146d9..0661e696a1dd 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2278,6 +2278,56 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_nullfunc_get); +struct sk_buff *ieee80211_probereq_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + const u8 *ssid, size_t ssid_len, + const u8 *ie, size_t ie_len) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_local *local; + struct ieee80211_hdr_3addr *hdr; + struct sk_buff *skb; + size_t ie_ssid_len; + u8 *pos; + + sdata = vif_to_sdata(vif); + local = sdata->local; + ie_ssid_len = 2 + ssid_len; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*hdr) + + ie_ssid_len + ie_len); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for probe " + "request template\n", sdata->name); + return NULL; + } + + skb_reserve(skb, local->hw.extra_tx_headroom); + + hdr = (struct ieee80211_hdr_3addr *) skb_put(skb, sizeof(*hdr)); + memset(hdr, 0, sizeof(*hdr)); + hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_PROBE_REQ); + memset(hdr->addr1, 0xff, ETH_ALEN); + memcpy(hdr->addr2, vif->addr, ETH_ALEN); + memset(hdr->addr3, 0xff, ETH_ALEN); + + pos = skb_put(skb, ie_ssid_len); + *pos++ = WLAN_EID_SSID; + *pos++ = ssid_len; + if (ssid) + memcpy(pos, ssid, ssid_len); + pos += ssid_len; + + if (ie) { + pos = skb_put(skb, ie_len); + memcpy(pos, ie, ie_len); + } + + return skb; +} +EXPORT_SYMBOL(ieee80211_probereq_get); + void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const void *frame, size_t frame_len, const struct ieee80211_tx_info *frame_txctl, -- cgit v1.2.3 From 34a6eddbabd704b3c7dae9362234552267573be2 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 6 Jan 2010 16:19:24 +0200 Subject: cfg80211: Store IEs from both Beacon and Probe Response frames Store information elements from Beacon and Probe Response frames in separate buffers to allow both sets to be made available through nl80211. This allows user space applications to get access to IEs from Beacon frames even if we have received Probe Response frames from the BSS. Previously, the IEs from Probe Response frames would have overridden the IEs from Beacon frames. This feature is of somewhat limited use since most protocols include the same (or extended) information in Probe Response frames. However, there are couple of exceptions where the IEs from Beacon frames could be of some use: TIM IE is only included in Beacon frames (and it would be needed to figure out the DTIM period used in the BSS) and at least some implementations of Wireless Provisioning Services seem to include the full IE only in Beacon frames). The new BSS attribute for scan results is added to allow both the IE sets to be delivered. This is done in a way that maintains the previously used behavior for applications that are not aware of the new NL80211_BSS_BEACON_IES attribute. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 10 +++- include/net/cfg80211.h | 12 ++++- net/wireless/core.h | 3 +- net/wireless/nl80211.c | 4 ++ net/wireless/scan.c | 120 ++++++++++++++++++++++++++++++++++++------------ 5 files changed, 116 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 7a1c8c145b22..127a73015760 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1378,13 +1378,20 @@ enum nl80211_channel_type { * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16) * @NL80211_BSS_CAPABILITY: capability field (CPU order, u16) * @NL80211_BSS_INFORMATION_ELEMENTS: binary attribute containing the - * raw information elements from the probe response/beacon (bin) + * raw information elements from the probe response/beacon (bin); + * if the %NL80211_BSS_BEACON_IES attribute is present, the IEs here are + * from a Probe Response frame; otherwise they are from a Beacon frame. + * However, if the driver does not indicate the source of the IEs, these + * IEs may be from either frame subtype. * @NL80211_BSS_SIGNAL_MBM: signal strength of probe response/beacon * in mBm (100 * dBm) (s32) * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon * in unspecified units, scaled to 0..100 (u8) * @NL80211_BSS_STATUS: status, if this BSS is "used" * @NL80211_BSS_SEEN_MS_AGO: age of this BSS entry in ms + * @NL80211_BSS_BEACON_IES: binary attribute containing the raw information + * elements from a Beacon frame (bin); not present if no Beacon frame has + * yet been received * @__NL80211_BSS_AFTER_LAST: internal * @NL80211_BSS_MAX: highest BSS attribute */ @@ -1400,6 +1407,7 @@ enum nl80211_bss { NL80211_BSS_SIGNAL_UNSPEC, NL80211_BSS_STATUS, NL80211_BSS_SEEN_MS_AGO, + NL80211_BSS_BEACON_IES, /* keep last */ __NL80211_BSS_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0d734413b5fb..2af52704e670 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -626,8 +626,14 @@ enum cfg80211_signal_type { * @beacon_interval: the beacon interval as from the frame * @capability: the capability field in host byte order * @information_elements: the information elements (Note that there - * is no guarantee that these are well-formed!) + * is no guarantee that these are well-formed!); this is a pointer to + * either the beacon_ies or proberesp_ies depending on whether Probe + * Response frame has been received * @len_information_elements: total length of the information elements + * @beacon_ies: the information elements from the last Beacon frame + * @len_beacon_ies: total length of the beacon_ies + * @proberesp_ies: the information elements from the last Probe Response frame + * @len_proberesp_ies: total length of the proberesp_ies * @signal: signal strength value (type depends on the wiphy's signal_type) * @free_priv: function pointer to free private data * @priv: private area for driver use, has at least wiphy->bss_priv_size bytes @@ -641,6 +647,10 @@ struct cfg80211_bss { u16 capability; u8 *information_elements; size_t len_information_elements; + u8 *beacon_ies; + size_t len_beacon_ies; + u8 *proberesp_ies; + size_t len_proberesp_ies; s32 signal; diff --git a/net/wireless/core.h b/net/wireless/core.h index 30ec95f05b52..2d6a6b9c0c43 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -111,7 +111,8 @@ struct cfg80211_internal_bss { unsigned long ts; struct kref ref; atomic_t hold; - bool ies_allocated; + bool beacon_ies_allocated; + bool proberesp_ies_allocated; /* must be last because of priv member */ struct cfg80211_bss pub; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b804062e0179..4af7991a9ec8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3163,6 +3163,10 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT(msg, NL80211_BSS_INFORMATION_ELEMENTS, res->len_information_elements, res->information_elements); + if (res->beacon_ies && res->len_beacon_ies && + res->beacon_ies != res->information_elements) + NLA_PUT(msg, NL80211_BSS_BEACON_IES, + res->len_beacon_ies, res->beacon_ies); if (res->tsf) NLA_PUT_U64(msg, NL80211_BSS_TSF, res->tsf); if (res->beacon_interval) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 0c2cbbebca95..06b0231ee5e3 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -100,8 +100,10 @@ static void bss_release(struct kref *ref) if (bss->pub.free_priv) bss->pub.free_priv(&bss->pub); - if (bss->ies_allocated) - kfree(bss->pub.information_elements); + if (bss->beacon_ies_allocated) + kfree(bss->pub.beacon_ies); + if (bss->proberesp_ies_allocated) + kfree(bss->pub.proberesp_ies); BUG_ON(atomic_read(&bss->hold)); @@ -375,8 +377,7 @@ rb_find_bss(struct cfg80211_registered_device *dev, static struct cfg80211_internal_bss * cfg80211_bss_update(struct cfg80211_registered_device *dev, - struct cfg80211_internal_bss *res, - bool overwrite) + struct cfg80211_internal_bss *res) { struct cfg80211_internal_bss *found = NULL; const u8 *meshid, *meshcfg; @@ -418,28 +419,64 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, found->pub.capability = res->pub.capability; found->ts = res->ts; - /* overwrite IEs */ - if (overwrite) { + /* Update IEs */ + if (res->pub.proberesp_ies) { size_t used = dev->wiphy.bss_priv_size + sizeof(*res); - size_t ielen = res->pub.len_information_elements; + size_t ielen = res->pub.len_proberesp_ies; + + if (found->pub.proberesp_ies && + !found->proberesp_ies_allocated && + ksize(found) >= used + ielen) { + memcpy(found->pub.proberesp_ies, + res->pub.proberesp_ies, ielen); + found->pub.len_proberesp_ies = ielen; + } else { + u8 *ies = found->pub.proberesp_ies; + + if (found->proberesp_ies_allocated) + ies = krealloc(ies, ielen, GFP_ATOMIC); + else + ies = kmalloc(ielen, GFP_ATOMIC); + + if (ies) { + memcpy(ies, res->pub.proberesp_ies, + ielen); + found->proberesp_ies_allocated = true; + found->pub.proberesp_ies = ies; + found->pub.len_proberesp_ies = ielen; + } + } - if (!found->ies_allocated && ksize(found) >= used + ielen) { - memcpy(found->pub.information_elements, - res->pub.information_elements, ielen); - found->pub.len_information_elements = ielen; + /* Override possible earlier Beacon frame IEs */ + found->pub.information_elements = + found->pub.proberesp_ies; + found->pub.len_information_elements = + found->pub.len_proberesp_ies; + } + if (res->pub.beacon_ies) { + size_t used = dev->wiphy.bss_priv_size + sizeof(*res); + size_t ielen = res->pub.len_beacon_ies; + + if (found->pub.beacon_ies && + !found->beacon_ies_allocated && + ksize(found) >= used + ielen) { + memcpy(found->pub.beacon_ies, + res->pub.beacon_ies, ielen); + found->pub.len_beacon_ies = ielen; } else { - u8 *ies = found->pub.information_elements; + u8 *ies = found->pub.beacon_ies; - if (found->ies_allocated) + if (found->beacon_ies_allocated) ies = krealloc(ies, ielen, GFP_ATOMIC); else ies = kmalloc(ielen, GFP_ATOMIC); if (ies) { - memcpy(ies, res->pub.information_elements, ielen); - found->ies_allocated = true; - found->pub.information_elements = ies; - found->pub.len_information_elements = ielen; + memcpy(ies, res->pub.beacon_ies, + ielen); + found->beacon_ies_allocated = true; + found->pub.beacon_ies = ies; + found->pub.len_beacon_ies = ielen; } } } @@ -489,14 +526,26 @@ cfg80211_inform_bss(struct wiphy *wiphy, res->pub.tsf = timestamp; res->pub.beacon_interval = beacon_interval; res->pub.capability = capability; - /* point to after the private area */ - res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz; - memcpy(res->pub.information_elements, ie, ielen); - res->pub.len_information_elements = ielen; + /* + * Since we do not know here whether the IEs are from a Beacon or Probe + * Response frame, we need to pick one of the options and only use it + * with the driver that does not provide the full Beacon/Probe Response + * frame. Use Beacon frame pointer to avoid indicating that this should + * override the information_elements pointer should we have received an + * earlier indication of Probe Response data. + * + * The initial buffer for the IEs is allocated with the BSS entry and + * is located after the private area. + */ + res->pub.beacon_ies = (u8 *)res + sizeof(*res) + privsz; + memcpy(res->pub.beacon_ies, ie, ielen); + res->pub.len_beacon_ies = ielen; + res->pub.information_elements = res->pub.beacon_ies; + res->pub.len_information_elements = res->pub.len_beacon_ies; kref_init(&res->ref); - res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, 0); + res = cfg80211_bss_update(wiphy_to_dev(wiphy), res); if (!res) return NULL; @@ -517,7 +566,6 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, struct cfg80211_internal_bss *res; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); - bool overwrite; size_t privsz = wiphy->bss_priv_size; if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC && @@ -538,16 +586,28 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, res->pub.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); res->pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); res->pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); - /* point to after the private area */ - res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz; - memcpy(res->pub.information_elements, mgmt->u.probe_resp.variable, ielen); - res->pub.len_information_elements = ielen; + /* + * The initial buffer for the IEs is allocated with the BSS entry and + * is located after the private area. + */ + if (ieee80211_is_probe_resp(mgmt->frame_control)) { + res->pub.proberesp_ies = (u8 *) res + sizeof(*res) + privsz; + memcpy(res->pub.proberesp_ies, mgmt->u.probe_resp.variable, + ielen); + res->pub.len_proberesp_ies = ielen; + res->pub.information_elements = res->pub.proberesp_ies; + res->pub.len_information_elements = res->pub.len_proberesp_ies; + } else { + res->pub.beacon_ies = (u8 *) res + sizeof(*res) + privsz; + memcpy(res->pub.beacon_ies, mgmt->u.beacon.variable, ielen); + res->pub.len_beacon_ies = ielen; + res->pub.information_elements = res->pub.beacon_ies; + res->pub.len_information_elements = res->pub.len_beacon_ies; + } kref_init(&res->ref); - overwrite = ieee80211_is_probe_resp(mgmt->frame_control); - - res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, overwrite); + res = cfg80211_bss_update(wiphy_to_dev(wiphy), res); if (!res) return NULL; -- cgit v1.2.3 From ab13315af97919fae0e014748105fdc2e30afb2d Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 12 Jan 2010 10:42:31 +0200 Subject: mac80211: add U-APSD client support Add Unscheduled Automatic Power-Save Delivery (U-APSD) client support. The idea is that the data frames from the client trigger AP to send the buffered frames with ACs which have U-APSD enabled. This decreases latency and makes it possible to save even more power. Driver needs to use IEEE80211_HW_UAPSD to enable the feature. The current implementation assumes that firmware takes care of the wakeup and hardware needing IEEE80211_HW_PS_NULLFUNC_STACK is not yet supported. Tested with wl1251 on a Nokia N900 and Cisco Aironet 1231G AP and running various test traffic with ping. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 18 ++++++++++++++++++ include/net/mac80211.h | 7 +++++++ net/mac80211/cfg.c | 7 +++++++ net/mac80211/ieee80211_i.h | 13 ++++++++++++- net/mac80211/main.c | 4 ++++ net/mac80211/mlme.c | 31 ++++++++++++++++++++++++++++--- net/mac80211/scan.c | 18 ++++++++++++++++++ net/mac80211/util.c | 2 ++ net/mac80211/work.c | 12 ++++++++++-- 9 files changed, 106 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 602c0692c3fc..a8c6069a0d9f 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -120,6 +120,24 @@ #define IEEE80211_QOS_CTL_TID_MASK 0x000F #define IEEE80211_QOS_CTL_TAG1D_MASK 0x0007 +/* U-APSD queue for WMM IEs sent by AP */ +#define IEEE80211_WMM_IE_AP_QOSINFO_UAPSD (1<<7) + +/* U-APSD queues for WMM IEs sent by STA */ +#define IEEE80211_WMM_IE_STA_QOSINFO_AC_VO (1<<0) +#define IEEE80211_WMM_IE_STA_QOSINFO_AC_VI (1<<1) +#define IEEE80211_WMM_IE_STA_QOSINFO_AC_BK (1<<2) +#define IEEE80211_WMM_IE_STA_QOSINFO_AC_BE (1<<3) +#define IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK 0x0f + +/* U-APSD max SP length for WMM IEs sent by STA */ +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL 0x00 +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_2 0x01 +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_4 0x02 +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_6 0x03 +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK 0x03 +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT 5 + struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e1e73c6abeff..f313a3cbabda 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -113,6 +113,7 @@ struct ieee80211_tx_queue_params { u16 cw_min; u16 cw_max; u8 aifs; + bool uapsd; }; /** @@ -929,6 +930,11 @@ enum ieee80211_tkip_key_type { * Hardware supports dynamic spatial multiplexing powersave, * ie. can turn off all but one chain and then wake the rest * up as required after, for example, rts/cts handshake. + * + * @IEEE80211_HW_SUPPORTS_UAPSD: + * Hardware supports Unscheduled Automatic Power Save Delivery + * (U-APSD) in managed mode. The mode is configured with + * conf_tx() operation. */ enum ieee80211_hw_flags { IEEE80211_HW_HAS_RATE_CONTROL = 1<<0, @@ -948,6 +954,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_BEACON_FILTER = 1<<14, IEEE80211_HW_SUPPORTS_STATIC_SMPS = 1<<15, IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS = 1<<16, + IEEE80211_HW_SUPPORTS_UAPSD = 1<<17, }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index dc12e9466ffd..8286df5822d5 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1128,6 +1128,13 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy, p.cw_max = params->cwmax; p.cw_min = params->cwmin; p.txop = params->txop; + + /* + * Setting tx queue params disables u-apsd because it's only + * called in master mode. + */ + p.uapsd = false; + if (drv_conf_tx(local, params->queue, &p)) { printk(KERN_DEBUG "%s: failed to set TX queue " "parameters for queue %d\n", diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 3e4ac3f30857..3468e378509a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -58,6 +58,15 @@ struct ieee80211_local; #define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024)) +#define IEEE80211_DEFAULT_UAPSD_QUEUES \ + (IEEE80211_WMM_IE_STA_QOSINFO_AC_BK | \ + IEEE80211_WMM_IE_STA_QOSINFO_AC_BE | \ + IEEE80211_WMM_IE_STA_QOSINFO_AC_VI | \ + IEEE80211_WMM_IE_STA_QOSINFO_AC_VO) + +#define IEEE80211_DEFAULT_MAX_SP_LEN \ + IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL + struct ieee80211_fragment_entry { unsigned long first_frag_time; unsigned int seq; @@ -78,6 +87,7 @@ struct ieee80211_bss { u8 dtim_period; bool wmm_used; + bool uapsd_supported; unsigned long last_probe_resp; @@ -285,7 +295,7 @@ struct ieee80211_work { u8 ssid[IEEE80211_MAX_SSID_LEN]; u8 ssid_len; u8 supp_rates_len; - bool wmm_used, use_11n; + bool wmm_used, use_11n, uapsd_used; } assoc; struct { u32 duration; @@ -306,6 +316,7 @@ enum ieee80211_sta_flags { IEEE80211_STA_DISABLE_11N = BIT(4), IEEE80211_STA_CSA_RECEIVED = BIT(5), IEEE80211_STA_MFP_ENABLED = BIT(6), + IEEE80211_STA_UAPSD_ENABLED = BIT(7), }; struct ieee80211_if_managed { diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 468829143991..0054bba08ce1 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -491,6 +491,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC; + WARN((local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) + && (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK), + "U-APSD not supported with HW_PS_NULLFUNC_STACK\n"); + /* * Calculate scan IE length -- we need this to alloc * memory and to subtract from the driver limit. It diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 86f025bc9456..39c27d83a4f2 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -569,7 +569,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, struct ieee80211_tx_queue_params params; size_t left; int count; - u8 *pos; + u8 *pos, uapsd_queues = 0; if (local->hw.queues < 4) return; @@ -579,6 +579,10 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1) return; + + if (ifmgd->flags & IEEE80211_STA_UAPSD_ENABLED) + uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES; + count = wmm_param[6] & 0x0f; if (count == ifmgd->wmm_last_param_set) return; @@ -593,6 +597,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, for (; left >= 4; left -= 4, pos += 4) { int aci = (pos[0] >> 5) & 0x03; int acm = (pos[0] >> 4) & 0x01; + bool uapsd = false; int queue; switch (aci) { @@ -600,22 +605,30 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, queue = 3; if (acm) local->wmm_acm |= BIT(1) | BIT(2); /* BK/- */ + if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK) + uapsd = true; break; case 2: /* AC_VI */ queue = 1; if (acm) local->wmm_acm |= BIT(4) | BIT(5); /* CL/VI */ + if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI) + uapsd = true; break; case 3: /* AC_VO */ queue = 0; if (acm) local->wmm_acm |= BIT(6) | BIT(7); /* VO/NC */ + if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO) + uapsd = true; break; case 0: /* AC_BE */ default: queue = 2; if (acm) local->wmm_acm |= BIT(0) | BIT(3); /* BE/EE */ + if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE) + uapsd = true; break; } @@ -623,11 +636,14 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4); params.cw_min = ecw2cw(pos[1] & 0x0f); params.txop = get_unaligned_le16(pos + 2); + params.uapsd = uapsd; + #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d " - "cWmin=%d cWmax=%d txop=%d\n", + "cWmin=%d cWmax=%d txop=%d uapsd=%d\n", wiphy_name(local->hw.wiphy), queue, aci, acm, - params.aifs, params.cw_min, params.cw_max, params.txop); + params.aifs, params.cw_min, params.cw_max, params.txop, + params.uapsd); #endif if (drv_conf_tx(local, queue, ¶ms) && local->ops->conf_tx) printk(KERN_DEBUG "%s: failed to set TX queue " @@ -1906,6 +1922,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, wk->assoc.ht_information_ie = ieee80211_bss_get_ie(req->bss, WLAN_EID_HT_INFORMATION); + if (bss->wmm_used && bss->uapsd_supported && + (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) { + wk->assoc.uapsd_used = true; + ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED; + } else { + wk->assoc.uapsd_used = false; + ifmgd->flags &= ~IEEE80211_STA_UAPSD_ENABLED; + } + ssid = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID); memcpy(wk->assoc.ssid, ssid + 2, ssid[1]); wk->assoc.ssid_len = ssid[1]; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 30cb62bb45b3..9afe2f9885dc 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -54,6 +54,23 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local, cfg80211_put_bss(container_of((void *)bss, struct cfg80211_bss, priv)); } +static bool is_uapsd_supported(struct ieee802_11_elems *elems) +{ + u8 qos_info; + + if (elems->wmm_info && elems->wmm_info_len == 7 + && elems->wmm_info[5] == 1) + qos_info = elems->wmm_info[6]; + else if (elems->wmm_param && elems->wmm_param_len == 24 + && elems->wmm_param[5] == 1) + qos_info = elems->wmm_param[6]; + else + /* no valid wmm information or parameter element found */ + return false; + + return qos_info & IEEE80211_WMM_IE_AP_QOSINFO_UAPSD; +} + struct ieee80211_bss * ieee80211_bss_info_update(struct ieee80211_local *local, struct ieee80211_rx_status *rx_status, @@ -117,6 +134,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local, } bss->wmm_used = elems->wmm_param || elems->wmm_info; + bss->uapsd_supported = is_uapsd_supported(elems); if (!beacon) bss->last_probe_resp = jiffies; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index a2ba6e29bd9a..e278f97c8305 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -792,6 +792,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata) break; } + qparam.uapsd = false; + drv_conf_tx(local, queue, &qparam); } } diff --git a/net/mac80211/work.c b/net/mac80211/work.c index 7c5d95b1bc04..a74fd6ee0083 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -202,7 +202,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - u8 *pos; + u8 *pos, qos_info; const u8 *ies; size_t offset = 0, noffset; int i, len, count, rates_len, supp_rates_len; @@ -375,6 +375,14 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, } if (wk->assoc.wmm_used && local->hw.queues >= 4) { + if (wk->assoc.uapsd_used) { + qos_info = IEEE80211_DEFAULT_UAPSD_QUEUES; + qos_info |= (IEEE80211_DEFAULT_MAX_SP_LEN << + IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT); + } else { + qos_info = 0; + } + pos = skb_put(skb, 9); *pos++ = WLAN_EID_VENDOR_SPECIFIC; *pos++ = 7; /* len */ @@ -384,7 +392,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, *pos++ = 2; /* WME */ *pos++ = 0; /* WME info */ *pos++ = 1; /* WME ver */ - *pos++ = 0; + *pos++ = qos_info; } /* add any remaining custom (i.e. vendor specific here) IEs */ -- cgit v1.2.3 From cd8c20b650f49354722b8cc1f03320b004815a0a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 13 Jan 2010 16:02:14 +0100 Subject: netfilter: nfnetlink: netns support Make nfnl socket per-petns. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink.h | 8 ++--- include/net/net_namespace.h | 2 ++ net/netfilter/nf_conntrack_netlink.c | 13 ++++---- net/netfilter/nfnetlink.c | 65 +++++++++++++++++++++++------------- net/netfilter/nfnetlink_log.c | 3 +- net/netfilter/nfnetlink_queue.c | 2 +- 6 files changed, 58 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 49d321f3ccd2..53923868c9bd 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -73,11 +73,11 @@ struct nfnetlink_subsystem { extern int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); -extern int nfnetlink_has_listeners(unsigned int group); -extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, +extern int nfnetlink_has_listeners(struct net *net, unsigned int group); +extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo, gfp_t flags); -extern void nfnetlink_set_err(u32 pid, u32 group, int error); -extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags); +extern void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error); +extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags); extern void nfnl_lock(void); extern void nfnl_unlock(void); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index f307e133d14c..82b7be4db89a 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -81,6 +81,8 @@ struct net { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif + struct sock *nfnl; + struct sock *nfnl_stash; #endif #ifdef CONFIG_XFRM struct netns_xfrm xfrm; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 59d8064eb522..d4c5d06677f9 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -482,7 +482,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) } else return 0; - if (!item->report && !nfnetlink_has_listeners(group)) + if (!item->report && !nfnetlink_has_listeners(&init_net, group)) return 0; skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC); @@ -559,7 +559,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC); + err = nfnetlink_send(skb, &init_net, item->pid, group, item->report, + GFP_ATOMIC); if (err == -ENOBUFS || err == -EAGAIN) return -ENOBUFS; @@ -571,7 +572,7 @@ nla_put_failure: nlmsg_failure: kfree_skb(skb); errout: - nfnetlink_set_err(0, group, -ENOBUFS); + nfnetlink_set_err(&init_net, 0, group, -ENOBUFS); return 0; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ @@ -1539,7 +1540,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) return 0; if (!item->report && - !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) + !nfnetlink_has_listeners(&init_net, NFNLGRP_CONNTRACK_EXP_NEW)) return 0; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); @@ -1562,7 +1563,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, + nfnetlink_send(skb, &init_net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report, GFP_ATOMIC); return 0; @@ -1572,7 +1573,7 @@ nla_put_failure: nlmsg_failure: kfree_skb(skb); errout: - nfnetlink_set_err(0, 0, -ENOBUFS); + nfnetlink_set_err(&init_net, 0, 0, -ENOBUFS); return 0; } #endif diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index eedc0c1ac7a4..8eb0cc23ada3 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -40,7 +40,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; -static struct sock *nfnl = NULL; static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; static DEFINE_MUTEX(nfnl_mutex); @@ -101,34 +100,35 @@ nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss) return &ss->cb[cb_id]; } -int nfnetlink_has_listeners(unsigned int group) +int nfnetlink_has_listeners(struct net *net, unsigned int group) { - return netlink_has_listeners(nfnl, group); + return netlink_has_listeners(net->nfnl, group); } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -int nfnetlink_send(struct sk_buff *skb, u32 pid, +int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo, gfp_t flags) { - return nlmsg_notify(nfnl, skb, pid, group, echo, flags); + return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags); } EXPORT_SYMBOL_GPL(nfnetlink_send); -void nfnetlink_set_err(u32 pid, u32 group, int error) +void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) { - netlink_set_err(nfnl, pid, group, error); + netlink_set_err(net->nfnl, pid, group, error); } EXPORT_SYMBOL_GPL(nfnetlink_set_err); -int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags) +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags) { - return netlink_unicast(nfnl, skb, pid, flags); + return netlink_unicast(net->nfnl, skb, pid, flags); } EXPORT_SYMBOL_GPL(nfnetlink_unicast); /* Process one complete nfnetlink message. */ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { + struct net *net = sock_net(skb->sk); const struct nfnl_callback *nc; const struct nfnetlink_subsystem *ss; int type, err; @@ -170,7 +170,7 @@ replay: if (err < 0) return err; - err = nc->call(nfnl, skb, nlh, (const struct nlattr **)cda); + err = nc->call(net->nfnl, skb, nlh, (const struct nlattr **)cda); if (err == -EAGAIN) goto replay; return err; @@ -184,26 +184,45 @@ static void nfnetlink_rcv(struct sk_buff *skb) nfnl_unlock(); } -static void __exit nfnetlink_exit(void) +static int __net_init nfnetlink_net_init(struct net *net) { - printk("Removing netfilter NETLINK layer.\n"); - netlink_kernel_release(nfnl); - return; + struct sock *nfnl; + + nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, NFNLGRP_MAX, + nfnetlink_rcv, NULL, THIS_MODULE); + if (!nfnl) + return -ENOMEM; + net->nfnl_stash = nfnl; + rcu_assign_pointer(net->nfnl, nfnl); + return 0; } -static int __init nfnetlink_init(void) +static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list) { - printk("Netfilter messages via NETLINK v%s.\n", nfversion); + struct net *net; - nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX, - nfnetlink_rcv, NULL, THIS_MODULE); - if (!nfnl) { - printk(KERN_ERR "cannot initialize nfnetlink!\n"); - return -ENOMEM; - } + list_for_each_entry(net, net_exit_list, exit_list) + rcu_assign_pointer(net->nfnl, NULL); + synchronize_net(); + list_for_each_entry(net, net_exit_list, exit_list) + netlink_kernel_release(net->nfnl_stash); +} - return 0; +static struct pernet_operations nfnetlink_net_ops = { + .init = nfnetlink_net_init, + .exit_batch = nfnetlink_net_exit_batch, +}; + +static int __init nfnetlink_init(void) +{ + printk("Netfilter messages via NETLINK v%s.\n", nfversion); + return register_pernet_subsys(&nfnetlink_net_ops); } +static void __exit nfnetlink_exit(void) +{ + printk("Removing netfilter NETLINK layer.\n"); + unregister_pernet_subsys(&nfnetlink_net_ops); +} module_init(nfnetlink_init); module_exit(nfnetlink_exit); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 9de0470d557e..285e9029a9ff 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -323,7 +323,8 @@ __nfulnl_send(struct nfulnl_instance *inst) NLMSG_DONE, sizeof(struct nfgenmsg)); - status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT); + status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid, + MSG_DONTWAIT); inst->qlen = 0; inst->skb = NULL; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7e3fa410641e..5c589b27d6eb 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -420,7 +420,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) } /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT); + err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); if (err < 0) { queue->queue_user_dropped++; goto err_out_unlock; -- cgit v1.2.3 From 9d173fc5dfa8c1b4578b331ac7ff3ce8af27006e Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Thu, 14 Jan 2010 13:09:14 +0200 Subject: mac80211: fix mac80211.h documentation warnings There were some warnings about missing documentation and a missing reference. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- Documentation/DocBook/mac80211.tmpl | 2 +- include/net/mac80211.h | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/Documentation/DocBook/mac80211.tmpl b/Documentation/DocBook/mac80211.tmpl index f3f37f141dbd..971d1c0c83e5 100644 --- a/Documentation/DocBook/mac80211.tmpl +++ b/Documentation/DocBook/mac80211.tmpl @@ -144,7 +144,7 @@ usage should require reading the full document. this though and the recommendation to allow only a single interface in STA mode at first! -!Finclude/net/mac80211.h ieee80211_if_init_conf +!Finclude/net/mac80211.h ieee80211_vif diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f313a3cbabda..bbfa4750092e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -107,6 +107,7 @@ enum ieee80211_max_queues { * 2^n-1 in the range 1..32767] * @cw_max: maximum contention window [like @cw_min] * @txop: maximum burst time in units of 32 usecs, 0 meaning disabled + * @uapsd: is U-APSD mode enabled for the queue */ struct ieee80211_tx_queue_params { u16 txop; @@ -608,7 +609,11 @@ enum ieee80211_conf_changed { /** * enum ieee80211_smps_mode - spatial multiplexing power save mode * - * @ + * @IEEE80211_SMPS_AUTOMATIC: automatic + * @IEEE80211_SMPS_OFF: off + * @IEEE80211_SMPS_STATIC: static + * @IEEE80211_SMPS_DYNAMIC: dynamic + * @IEEE80211_SMPS_NUM_MODES: internal, don't use */ enum ieee80211_smps_mode { IEEE80211_SMPS_AUTOMATIC, -- cgit v1.2.3 From c99445b14054e0c4ed4715df1dad1fc608cbab46 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Thu, 14 Jan 2010 13:09:21 +0200 Subject: mac80211: improve powersave documentation There has been some confusion how drivers should implement powersave support. Improve the documentation a bit to make it more clear what drivers need to do. Also mention about U-APSD. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- include/net/mac80211.h | 75 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index bbfa4750092e..c90047de4428 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -569,7 +569,13 @@ struct ieee80211_rx_status { * @IEEE80211_CONF_MONITOR: there's a monitor interface present -- use this * to determine for example whether to calculate timestamps for packets * or not, do not use instead of filter flags! - * @IEEE80211_CONF_PS: Enable 802.11 power save mode (managed mode only) + * @IEEE80211_CONF_PS: Enable 802.11 power save mode (managed mode only). + * This is the power save mode defined by IEEE 802.11-2007 section 11.2, + * meaning that the hardware still wakes up for beacons, is able to + * transmit frames and receive the possible acknowledgment frames. + * Not to be confused with hardware specific wakeup/sleep states, + * driver is responsible for that. See the section "Powersave support" + * for more. * @IEEE80211_CONF_IDLE: The device is running, but idle; if the flag is set * the driver should be prepared to handle configuration requests but * may turn the device off as much as possible. Typically, this flag will @@ -1138,18 +1144,24 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * * mac80211 has support for various powersave implementations. * - * First, it can support hardware that handles all powersaving by - * itself, such hardware should simply set the %IEEE80211_HW_SUPPORTS_PS - * hardware flag. In that case, it will be told about the desired - * powersave mode depending on the association status, and the driver - * must take care of sending nullfunc frames when necessary, i.e. when - * entering and leaving powersave mode. The driver is required to look at - * the AID in beacons and signal to the AP that it woke up when it finds - * traffic directed to it. This mode supports dynamic PS by simply - * enabling/disabling PS. - * - * Additionally, such hardware may set the %IEEE80211_HW_SUPPORTS_DYNAMIC_PS - * flag to indicate that it can support dynamic PS mode itself (see below). + * First, it can support hardware that handles all powersaving by itself, + * such hardware should simply set the %IEEE80211_HW_SUPPORTS_PS hardware + * flag. In that case, it will be told about the desired powersave mode + * with the %IEEE80211_CONF_PS flag depending on the association status. + * The hardware must take care of sending nullfunc frames when necessary, + * i.e. when entering and leaving powersave mode. The hardware is required + * to look at the AID in beacons and signal to the AP that it woke up when + * it finds traffic directed to it. + * + * %IEEE80211_CONF_PS flag enabled means that the powersave mode defined in + * IEEE 802.11-2007 section 11.2 is enabled. This is not to be confused + * with hardware wakeup and sleep states. Driver is responsible for waking + * up the hardware before issueing commands to the hardware and putting it + * back to sleep at approriate times. + * + * When PS is enabled, hardware needs to wakeup for beacons and receive the + * buffered multicast/broadcast frames after the beacon. Also it must be + * possible to send frames and receive the acknowledment frame. * * Other hardware designs cannot send nullfunc frames by themselves and also * need software support for parsing the TIM bitmap. This is also supported @@ -1157,14 +1169,35 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * %IEEE80211_HW_PS_NULLFUNC_STACK flags. The hardware is of course still * required to pass up beacons. The hardware is still required to handle * waking up for multicast traffic; if it cannot the driver must handle that - * as best as it can, mac80211 is too slow. - * - * Dynamic powersave mode is an extension to normal powersave mode in which - * the hardware stays awake for a user-specified period of time after sending - * a frame so that reply frames need not be buffered and therefore delayed - * to the next wakeup. This can either be supported by hardware, in which case - * the driver needs to look at the @dynamic_ps_timeout hardware configuration - * value, or by the stack if all nullfunc handling is in the stack. + * as best as it can, mac80211 is too slow to do that. + * + * Dynamic powersave is an extension to normal powersave in which the + * hardware stays awake for a user-specified period of time after sending a + * frame so that reply frames need not be buffered and therefore delayed to + * the next wakeup. It's compromise of getting good enough latency when + * there's data traffic and still saving significantly power in idle + * periods. + * + * Dynamic powersave is supported by simply mac80211 enabling and disabling + * PS based on traffic. Driver needs to only set %IEEE80211_HW_SUPPORTS_PS + * flag and mac80211 will handle everything automatically. Additionally, + * hardware having support for the dynamic PS feature may set the + * %IEEE80211_HW_SUPPORTS_DYNAMIC_PS flag to indicate that it can support + * dynamic PS mode itself. The driver needs to look at the + * @dynamic_ps_timeout hardware configuration value and use it that value + * whenever %IEEE80211_CONF_PS is set. In this case mac80211 will disable + * dynamic PS feature in stack and will just keep %IEEE80211_CONF_PS + * enabled whenever user has enabled powersave. + * + * Driver informs U-APSD client support by enabling + * %IEEE80211_HW_SUPPORTS_UAPSD flag. The mode is configured through the + * uapsd paramater in conf_tx() operation. Hardware needs to send the QoS + * Nullfunc frames and stay awake until the service period has ended. To + * utilize U-APSD, dynamic powersave is disabled for voip AC and all frames + * from that AC are transmitted with powersave enabled. + * + * Note: U-APSD client mode is not yet supported with + * %IEEE80211_HW_PS_NULLFUNC_STACK. */ /** -- cgit v1.2.3 From d00c362f1b0ff54161e0a42b4554ac621a9ef92d Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sat, 16 Jan 2010 01:04:04 -0800 Subject: ax25: netrom: rose: Fix timer oopses Wrong ax25_cb refcounting in ax25_send_frame() and by its callers can cause timer oopses (first reported with 2.6.29.6 kernel). Fixes: http://bugzilla.kernel.org/show_bug.cgi?id=14905 Reported-by: Bernard Pidoux Tested-by: Bernard Pidoux Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/netrom.h | 2 ++ net/ax25/ax25_out.c | 6 ++++++ net/netrom/nr_route.c | 11 ++++++----- net/rose/rose_link.c | 8 ++++++++ net/rose/rose_route.c | 5 +++++ 5 files changed, 27 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/netrom.h b/include/net/netrom.h index 15696b1fd30f..ab170a60e7d3 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -132,6 +132,8 @@ static __inline__ void nr_node_put(struct nr_node *nr_node) static __inline__ void nr_neigh_put(struct nr_neigh *nr_neigh) { if (atomic_dec_and_test(&nr_neigh->refcount)) { + if (nr_neigh->ax25) + ax25_cb_put(nr_neigh->ax25); kfree(nr_neigh->digipeat); kfree(nr_neigh); } diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c index bf706f83a5c9..14912600ec57 100644 --- a/net/ax25/ax25_out.c +++ b/net/ax25/ax25_out.c @@ -92,6 +92,12 @@ ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, ax25_address *src, ax2 #endif } + /* + * There is one ref for the state machine; a caller needs + * one more to put it back, just like with the existing one. + */ + ax25_cb_hold(ax25); + ax25_cb_add(ax25); ax25->state = AX25_STATE_1; diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index aacba76070fc..e2e2d33cafdf 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -843,12 +843,13 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) dptr = skb_push(skb, 1); *dptr = AX25_P_NETROM; - ax25s = ax25_send_frame(skb, 256, (ax25_address *)dev->dev_addr, &nr_neigh->callsign, nr_neigh->digipeat, nr_neigh->dev); - if (nr_neigh->ax25 && ax25s) { - /* We were already holding this ax25_cb */ + ax25s = nr_neigh->ax25; + nr_neigh->ax25 = ax25_send_frame(skb, 256, + (ax25_address *)dev->dev_addr, + &nr_neigh->callsign, + nr_neigh->digipeat, nr_neigh->dev); + if (ax25s) ax25_cb_put(ax25s); - } - nr_neigh->ax25 = ax25s; dev_put(dev); ret = (nr_neigh->ax25 != NULL); diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index bd86a63960ce..5ef5f6988a2e 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -101,13 +101,17 @@ static void rose_t0timer_expiry(unsigned long param) static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh) { ax25_address *rose_call; + ax25_cb *ax25s; if (ax25cmp(&rose_callsign, &null_ax25_address) == 0) rose_call = (ax25_address *)neigh->dev->dev_addr; else rose_call = &rose_callsign; + ax25s = neigh->ax25; neigh->ax25 = ax25_send_frame(skb, 260, rose_call, &neigh->callsign, neigh->digipeat, neigh->dev); + if (ax25s) + ax25_cb_put(ax25s); return (neigh->ax25 != NULL); } @@ -120,13 +124,17 @@ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh) static int rose_link_up(struct rose_neigh *neigh) { ax25_address *rose_call; + ax25_cb *ax25s; if (ax25cmp(&rose_callsign, &null_ax25_address) == 0) rose_call = (ax25_address *)neigh->dev->dev_addr; else rose_call = &rose_callsign; + ax25s = neigh->ax25; neigh->ax25 = ax25_find_cb(rose_call, &neigh->callsign, neigh->digipeat, neigh->dev); + if (ax25s) + ax25_cb_put(ax25s); return (neigh->ax25 != NULL); } diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 795c4b025e31..70a0b3b4b4d2 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -235,6 +235,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh) if ((s = rose_neigh_list) == rose_neigh) { rose_neigh_list = rose_neigh->next; + if (rose_neigh->ax25) + ax25_cb_put(rose_neigh->ax25); kfree(rose_neigh->digipeat); kfree(rose_neigh); return; @@ -243,6 +245,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh) while (s != NULL && s->next != NULL) { if (s->next == rose_neigh) { s->next = rose_neigh->next; + if (rose_neigh->ax25) + ax25_cb_put(rose_neigh->ax25); kfree(rose_neigh->digipeat); kfree(rose_neigh); return; @@ -812,6 +816,7 @@ void rose_link_failed(ax25_cb *ax25, int reason) if (rose_neigh != NULL) { rose_neigh->ax25 = NULL; + ax25_cb_put(ax25); rose_del_route_by_neigh(rose_neigh); rose_kill_by_neigh(rose_neigh); -- cgit v1.2.3 From 72659ecce68588b74f6c46862c2b4cec137d7a5a Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Sun, 17 Jan 2010 19:09:39 -0800 Subject: tcp: account SYN-ACK timeouts & retransmissions Currently we don't increment SYN-ACK timeouts & retransmissions although we do increment the same stats for SYN. We seem to have lost the SYN-ACK accounting with the introduction of tcp_syn_recv_timer (commit 2248761e in the netdev-vger-cvs tree). This patch fixes this issue. In the process we also rename the v4/v6 syn/ack retransmit functions for clarity. We also add a new request_socket operations (syn_ack_timeout) so we can keep code in inet_connection_sock.c protocol agnostic. Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/request_sock.h | 2 ++ include/net/tcp.h | 2 ++ net/ipv4/inet_connection_sock.c | 2 ++ net/ipv4/tcp_ipv4.c | 18 ++++++++++-------- net/ipv4/tcp_timer.c | 6 ++++++ net/ipv6/tcp_ipv6.c | 12 ++++++++++-- 6 files changed, 32 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index c9b50ebd9ce9..99e6e19b57c2 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -45,6 +45,8 @@ struct request_sock_ops { void (*send_reset)(struct sock *sk, struct sk_buff *skb); void (*destructor)(struct request_sock *req); + void (*syn_ack_timeout)(struct sock *sk, + struct request_sock *req); }; /* struct request_sock - mini sock to represent a connection request diff --git a/include/net/tcp.h b/include/net/tcp.h index 788c99f98597..87d164b9bd8f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -400,6 +400,8 @@ extern int compat_tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); extern void tcp_set_keepalive(struct sock *sk, int val); +extern void tcp_syn_ack_timeout(struct sock *sk, + struct request_sock *req); extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index ee16475f8fc3..8da6429269dd 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -529,6 +529,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, syn_ack_recalc(req, thresh, max_retries, queue->rskq_defer_accept, &expire, &resend); + if (req->rsk_ops->syn_ack_timeout) + req->rsk_ops->syn_ack_timeout(parent, req); if (!expire && (!resend || !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 382f667238ec..356f544c4c10 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -742,9 +742,9 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, * This still operates on a request_sock only, not on a big * socket. */ -static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, - struct request_sock *req, - struct request_values *rvp) +static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, + struct request_sock *req, + struct request_values *rvp) { const struct inet_request_sock *ireq = inet_rsk(req); int err = -1; @@ -775,10 +775,11 @@ static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, return err; } -static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, +static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, struct request_values *rvp) { - return __tcp_v4_send_synack(sk, NULL, req, rvp); + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + return tcp_v4_send_synack(sk, NULL, req, rvp); } /* @@ -1192,10 +1193,11 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) struct request_sock_ops tcp_request_sock_ops __read_mostly = { .family = PF_INET, .obj_size = sizeof(struct tcp_request_sock), - .rtx_syn_ack = tcp_v4_send_synack, + .rtx_syn_ack = tcp_v4_rtx_synack, .send_ack = tcp_v4_reqsk_send_ack, .destructor = tcp_v4_reqsk_destructor, .send_reset = tcp_v4_send_reset, + .syn_ack_timeout = tcp_syn_ack_timeout, }; #ifdef CONFIG_TCP_MD5SIG @@ -1373,8 +1375,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) } tcp_rsk(req)->snt_isn = isn; - if (__tcp_v4_send_synack(sk, dst, req, - (struct request_values *)&tmp_ext) || + if (tcp_v4_send_synack(sk, dst, req, + (struct request_values *)&tmp_ext) || want_cookie) goto drop_and_free; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8816a20c2597..de7d1bf9114f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -474,6 +474,12 @@ static void tcp_synack_timer(struct sock *sk) TCP_TIMEOUT_INIT, TCP_RTO_MAX); } +void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req) +{ + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); +} +EXPORT_SYMBOL(tcp_syn_ack_timeout); + void tcp_set_keepalive(struct sock *sk, int val) { if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1c832bf198b3..82f2dea0e39e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -520,6 +520,13 @@ done: return err; } +static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, + struct request_values *rvp) +{ + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + return tcp_v6_send_synack(sk, req, rvp); +} + static inline void syn_flood_warning(struct sk_buff *skb) { #ifdef CONFIG_SYN_COOKIES @@ -890,10 +897,11 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), - .rtx_syn_ack = tcp_v6_send_synack, + .rtx_syn_ack = tcp_v6_rtx_synack, .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, - .send_reset = tcp_v6_send_reset + .send_reset = tcp_v6_send_reset, + .syn_ack_timeout = tcp_syn_ack_timeout, }; #ifdef CONFIG_TCP_MD5SIG -- cgit v1.2.3 From e9d3897cc2205eec8b7afcc022e4730914b4f48c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 18 Jan 2010 08:08:37 +0100 Subject: netfilter: netns: #ifdef ->iptable_security, ->ip6table_security 'security' tables depend on SECURITY, so ifdef them. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/ipv4.h | 2 ++ include/net/netns/ipv6.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2eb3814d6258..8098f6b8319d 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -37,7 +37,9 @@ struct netns_ipv4 { struct xt_table *iptable_mangle; struct xt_table *iptable_raw; struct xt_table *arptable_filter; +#ifdef CONFIG_SECURITY struct xt_table *iptable_security; +#endif struct xt_table *nat_table; struct hlist_head *nat_bysource; int nat_vmalloced; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index dfeb2d7c425b..1f11ebc22151 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -36,7 +36,9 @@ struct netns_ipv6 { struct xt_table *ip6table_filter; struct xt_table *ip6table_mangle; struct xt_table *ip6table_raw; +#ifdef CONFIG_SECURITY struct xt_table *ip6table_security; +#endif #endif struct rt6_info *ip6_null_entry; struct rt6_statistics *rt6_stats; -- cgit v1.2.3 From c6fcf6bcfc3cfc1c00cc7fd9610cfa2b1a18041f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 17 Jan 2010 01:47:59 +0100 Subject: mac80211: re-enable re-transmission of filtered frames In an earlier commit, mac80211: disable software retry for now Pavel Roskin reported a problem that seems to be due to software retry of already transmitted frames. It turns out that we've never done that correctly, but due to some recent changes it now crashes in the TX code. I've added a comment in the patch that explains the problem better and also points to possible solutions -- which I can't implement right now. I disabled software retry of failed/filtered frames because it was broken. With the work of the previous patches, it now becomes fairly easy to re-enable it by adding a flag indicating that the frame shouldn't be modified, but still running it through the transmit handlers to populate the control information. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 4 ++++ net/mac80211/status.c | 32 +++++--------------------------- net/mac80211/tx.c | 7 ++++++- 3 files changed, 15 insertions(+), 28 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c90047de4428..f03f97b627fe 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -271,6 +271,9 @@ struct ieee80211_bss_conf { * transmit function after the current frame, this can be used * by drivers to kick the DMA queue only if unset or when the * queue gets full. + * @IEEE80211_TX_INTFL_RETRANSMISSION: This frame is being retransmitted + * after TX status because the destination was asleep, it must not + * be modified again (no seqno assignment, crypto, etc.) */ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_REQ_TX_STATUS = BIT(0), @@ -291,6 +294,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_INTFL_DONT_ENCRYPT = BIT(16), IEEE80211_TX_CTL_PSPOLL_RESPONSE = BIT(17), IEEE80211_TX_CTL_MORE_FRAMES = BIT(18), + IEEE80211_TX_INTFL_RETRANSMISSION = BIT(19), }; /** diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 9e171b178276..800b6777e0ed 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -44,38 +44,17 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - /* - * XXX: This is temporary! - * - * The problem here is that when we get here, the driver will - * quite likely have pretty much overwritten info->control by - * using info->driver_data or info->rate_driver_data. Thus, - * when passing out the frame to the driver again, we would be - * passing completely bogus data since the driver would then - * expect a properly filled info->control. In mac80211 itself - * the same problem occurs, since we need info->control.vif - * internally. - * - * To fix this, we should send the frame through TX processing - * again. However, it's not that simple, since the frame will - * have been software-encrypted (if applicable) already, and - * encrypting it again doesn't do much good. So to properly do - * that, we not only have to skip the actual 'raw' encryption - * (key selection etc. still has to be done!) but also the - * sequence number assignment since that impacts the crypto - * encapsulation, of course. - * - * Hence, for now, fix the bug by just dropping the frame. - */ - goto drop; - /* * This skb 'survived' a round-trip through the driver, and * hopefully the driver didn't mangle it too badly. However, * we can definitely not rely on the the control information - * being correct. Clear it so we don't get junk there. + * being correct. Clear it so we don't get junk there, and + * indicate that it needs new processing, but must not be + * modified/encrypted again. */ memset(&info->control, 0, sizeof(info->control)); + info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING | + IEEE80211_TX_INTFL_RETRANSMISSION; sta->tx_filtered_count++; @@ -130,7 +109,6 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, return; } - drop: #ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (net_ratelimit()) printk(KERN_DEBUG "%s: dropped TX filtered frame, " diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e3d8ff533ee6..da557b0d0114 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1285,6 +1285,7 @@ static int __ieee80211_tx(struct ieee80211_local *local, static int invoke_tx_handlers(struct ieee80211_tx_data *tx) { struct sk_buff *skb = tx->skb; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); ieee80211_tx_result res = TX_DROP; #define CALL_TXH(txh) \ @@ -1299,9 +1300,13 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) CALL_TXH(ieee80211_tx_h_ps_buf); CALL_TXH(ieee80211_tx_h_select_key); CALL_TXH(ieee80211_tx_h_sta); - CALL_TXH(ieee80211_tx_h_michael_mic_add); if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) CALL_TXH(ieee80211_tx_h_rate_ctrl); + + if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) + goto txh_done; + + CALL_TXH(ieee80211_tx_h_michael_mic_add); CALL_TXH(ieee80211_tx_h_sequence); CALL_TXH(ieee80211_tx_h_fragment); /* handlers after fragment must be aware of tx info fragmentation! */ -- cgit v1.2.3 From 7c070aa947d1a4105742378579c267f6e7fd08a1 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Wed, 20 Jan 2010 10:42:41 +0100 Subject: IPv6: reassembly: replace magic number with macro definitions Use macro to define high/low thresh value, refer to IPV6_FRAG_TIMEOUT. Signed-off-by: Shan Wei Acked-by: David S. Miller Signed-off-by: Patrick McHardy --- include/net/ipv6.h | 2 ++ net/ipv6/netfilter/nf_conntrack_reasm.c | 4 ++-- net/ipv6/reassembly.c | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ccab5946c830..299bbf5adfb6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -246,6 +246,8 @@ extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb); int ip6_frag_nqueues(struct net *net); int ip6_frag_mem(struct net *net); +#define IPV6_FRAG_HIGH_THRESH 262144 /* == 256*1024 */ +#define IPV6_FRAG_LOW_THRESH 196608 /* == 192*1024 */ #define IPV6_FRAG_TIMEOUT (60*HZ) /* 60 seconds */ extern int __ipv6_addr_type(const struct in6_addr *addr); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 1030ce1e6c79..744ea49de356 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -666,8 +666,8 @@ int nf_ct_frag6_init(void) nf_frags.frag_expire = nf_ct_frag6_expire; nf_frags.secret_interval = 10 * 60 * HZ; nf_init_frags.timeout = IPV6_FRAG_TIMEOUT; - nf_init_frags.high_thresh = 256 * 1024; - nf_init_frags.low_thresh = 192 * 1024; + nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH; inet_frags_init_net(&nf_init_frags); inet_frags_init(&nf_frags); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 2cddea3bd6be..15bb122e1ce4 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -747,8 +747,8 @@ static inline void ip6_frags_sysctl_unregister(void) static int ipv6_frags_init_net(struct net *net) { - net->ipv6.frags.high_thresh = 256 * 1024; - net->ipv6.frags.low_thresh = 192 * 1024; + net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; inet_frags_init_net(&net->ipv6.frags); -- cgit v1.2.3 From b3fbdcf49f940d0703c356441e0daf045e64e076 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Jan 2010 11:40:47 +0100 Subject: mac80211: pass vif and station to update_tkip_key When a TKIP key is updated, we should pass the station pointer instead of just the address, since drivers can use that to store their own data. We also need to pass the virtual interface pointer. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/b43/main.c | 11 ++++++++--- drivers/net/wireless/iwlwifi/iwl-agn.c | 10 +++++++--- include/net/mac80211.h | 6 ++++-- net/mac80211/driver-ops.h | 14 ++++++++++---- net/mac80211/driver-trace.h | 15 +++++++++------ net/mac80211/tkip.c | 12 +++++------- 6 files changed, 43 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index c238468bca7f..c699e46534dc 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -844,8 +844,10 @@ static void rx_tkip_phase1_write(struct b43_wldev *dev, u8 index, u32 iv32, } static void b43_op_update_tkip_key(struct ieee80211_hw *hw, - struct ieee80211_key_conf *keyconf, const u8 *addr, - u32 iv32, u16 *phase1key) + struct ieee80211_vif *vif, + struct ieee80211_key_conf *keyconf, + struct ieee80211_sta *sta, + u32 iv32, u16 *phase1key) { struct b43_wl *wl = hw_to_b43_wl(hw); struct b43_wldev *dev; @@ -863,7 +865,10 @@ static void b43_op_update_tkip_key(struct ieee80211_hw *hw, keymac_write(dev, index, NULL); /* First zero out mac to avoid race */ rx_tkip_phase1_write(dev, index, iv32, phase1key); - keymac_write(dev, index, addr); + /* only pairwise TKIP keys are supported right now */ + if (WARN_ON(!sta)) + goto out_unlock; + keymac_write(dev, index, sta->addr); out_unlock: mutex_unlock(&wl->mutex); diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 8db86239bd6a..62b6939df52e 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -2839,14 +2839,18 @@ void iwl_config_ap(struct iwl_priv *priv) } static void iwl_mac_update_tkip_key(struct ieee80211_hw *hw, - struct ieee80211_key_conf *keyconf, const u8 *addr, - u32 iv32, u16 *phase1key) + struct ieee80211_vif *vif, + struct ieee80211_key_conf *keyconf, + struct ieee80211_sta *sta, + u32 iv32, u16 *phase1key) { struct iwl_priv *priv = hw->priv; IWL_DEBUG_MAC80211(priv, "enter\n"); - iwl_update_tkip_key(priv, keyconf, addr, iv32, phase1key); + iwl_update_tkip_key(priv, keyconf, + sta ? sta->addr : iwl_bcast_addr, + iv32, phase1key); IWL_DEBUG_MAC80211(priv, "leave\n"); } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f03f97b627fe..f56d6f479532 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1614,8 +1614,10 @@ struct ieee80211_ops { struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_key_conf *key); void (*update_tkip_key)(struct ieee80211_hw *hw, - struct ieee80211_key_conf *conf, const u8 *address, - u32 iv32, u16 *phase1key); + struct ieee80211_vif *vif, + struct ieee80211_key_conf *conf, + struct ieee80211_sta *sta, + u32 iv32, u16 *phase1key); int (*hw_scan)(struct ieee80211_hw *hw, struct cfg80211_scan_request *req); void (*sw_scan_start)(struct ieee80211_hw *hw); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index de91d39e0276..40c6e9a89864 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -137,16 +137,22 @@ static inline int drv_set_key(struct ieee80211_local *local, } static inline void drv_update_tkip_key(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, struct ieee80211_key_conf *conf, - const u8 *address, u32 iv32, + struct sta_info *sta, u32 iv32, u16 *phase1key) { + struct ieee80211_sta *ista = NULL; + might_sleep(); + if (sta) + ista = &sta->sta; + if (local->ops->update_tkip_key) - local->ops->update_tkip_key(&local->hw, conf, address, - iv32, phase1key); - trace_drv_update_tkip_key(local, conf, address, iv32); + local->ops->update_tkip_key(&local->hw, &sdata->vif, conf, + ista, iv32, phase1key); + trace_drv_update_tkip_key(local, sdata, conf, ista, iv32); } static inline int drv_hw_scan(struct ieee80211_local *local, diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 0ea258123b8e..fefa6e6b01bc 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -331,26 +331,29 @@ TRACE_EVENT(drv_set_key, TRACE_EVENT(drv_update_tkip_key, TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, struct ieee80211_key_conf *conf, - const u8 *address, u32 iv32), + struct ieee80211_sta *sta, u32 iv32), - TP_ARGS(local, conf, address, iv32), + TP_ARGS(local, sdata, conf, sta, iv32), TP_STRUCT__entry( LOCAL_ENTRY - __array(u8, addr, 6) + VIF_ENTRY + STA_ENTRY __field(u32, iv32) ), TP_fast_assign( LOCAL_ASSIGN; - memcpy(__entry->addr, address, 6); + VIF_ASSIGN; + STA_ASSIGN; __entry->iv32 = iv32; ), TP_printk( - LOCAL_PR_FMT " addr:%pM iv32:%#x", - LOCAL_PR_ARG, __entry->addr, __entry->iv32 + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " iv32:%#x", + LOCAL_PR_ARG,VIF_PR_ARG,STA_PR_ARG, __entry->iv32 ) ); diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 14fe49332c02..7ef491e9d66d 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -304,14 +304,12 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, if (key->local->ops->update_tkip_key && key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && key->u.tkip.rx[queue].state != TKIP_STATE_PHASE1_HW_UPLOADED) { - static const u8 bcast[ETH_ALEN] = - {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - const u8 *sta_addr = key->sta->sta.addr; + struct ieee80211_sub_if_data *sdata = key->sdata; - if (is_multicast_ether_addr(ra)) - sta_addr = bcast; - - drv_update_tkip_key(key->local, &key->conf, sta_addr, + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(key->sdata->bss, + struct ieee80211_sub_if_data, u.ap); + drv_update_tkip_key(key->local, sdata, &key->conf, key->sta, iv32, key->u.tkip.rx[queue].p1k); key->u.tkip.rx[queue].state = TKIP_STATE_PHASE1_HW_UPLOADED; } -- cgit v1.2.3 From ef15aac6073b27fd4f70007784d2d52ed394bf43 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Jan 2010 12:02:33 +0100 Subject: cfg80211: export multiple MAC addresses in sysfs If a device has multiple MAC addresses, userspace will need to know about that. Similarly, if it allows the MAC addresses to vary by a bitmask. If a driver exports multiple addresses, it is assumed that it will be able to deal with that many different addresses, which need not necessarily match the ones programmed into the device; if a mask is set then the device should deal addresses within that mask based on an arbitrary "base address". To test it all and show how it is used, add support to hwsim even though it can't actually deal with addresses different from the default. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/mac80211_hwsim.c | 8 +++++++- include/net/cfg80211.h | 22 +++++++++++++++++++++- net/wireless/core.c | 12 ++++++++++++ net/wireless/sysfs.c | 20 ++++++++++++++++++++ 4 files changed, 60 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 84df3fcf37b3..0dbda8dfbd99 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -281,6 +281,8 @@ struct mac80211_hwsim_data { struct ieee80211_channel channels_5ghz[ARRAY_SIZE(hwsim_channels_5ghz)]; struct ieee80211_rate rates[ARRAY_SIZE(hwsim_rates)]; + struct mac_address addresses[2]; + struct ieee80211_channel *channel; unsigned long beacon_int; /* in jiffies unit */ unsigned int rx_filter; @@ -1154,7 +1156,11 @@ static int __init init_mac80211_hwsim(void) SET_IEEE80211_DEV(hw, data->dev); addr[3] = i >> 8; addr[4] = i; - SET_IEEE80211_PERM_ADDR(hw, addr); + memcpy(data->addresses[0].addr, addr, ETH_ALEN); + memcpy(data->addresses[1].addr, addr, ETH_ALEN); + data->addresses[1].addr[0] |= 0x40; + hw->wiphy->n_addresses = 2; + hw->wiphy->addresses = data->addresses; hw->channel_change_time = 1; hw->queues = 4; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2af52704e670..c5d16f299d6f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1195,6 +1195,10 @@ enum wiphy_flags { WIPHY_FLAG_4ADDR_STATION = BIT(6), }; +struct mac_address { + u8 addr[ETH_ALEN]; +}; + /** * struct wiphy - wireless hardware description * @idx: the wiphy index assigned to this item @@ -1213,12 +1217,28 @@ enum wiphy_flags { * -1 = fragmentation disabled, only odd values >= 256 used * @rts_threshold: RTS threshold (dot11RTSThreshold); -1 = RTS/CTS disabled * @net: the network namespace this wiphy currently lives in + * @perm_addr: permanent MAC address of this device + * @addr_mask: If the device supports multiple MAC addresses by masking, + * set this to a mask with variable bits set to 1, e.g. if the last + * four bits are variable then set it to 00:...:00:0f. The actual + * variable bits shall be determined by the interfaces added, with + * interfaces not matching the mask being rejected to be brought up. + * @n_addresses: number of addresses in @addresses. + * @addresses: If the device has more than one address, set this pointer + * to a list of addresses (6 bytes each). The first one will be used + * by default for perm_addr. In this case, the mask should be set to + * all-zeroes. In this case it is assumed that the device can handle + * the same number of arbitrary MAC addresses. */ struct wiphy { /* assign these fields before you register the wiphy */ - /* permanent MAC address */ + /* permanent MAC address(es) */ u8 perm_addr[ETH_ALEN]; + u8 addr_mask[ETH_ALEN]; + + u16 n_addresses; + struct mac_address *addresses; /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ u16 interface_modes; diff --git a/net/wireless/core.c b/net/wireless/core.c index d07f57c906db..71b6b3a9cf1f 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -413,6 +413,18 @@ int wiphy_register(struct wiphy *wiphy) int i; u16 ifmodes = wiphy->interface_modes; + if (WARN_ON(wiphy->addresses && !wiphy->n_addresses)) + return -EINVAL; + + if (WARN_ON(wiphy->addresses && + !is_zero_ether_addr(wiphy->perm_addr) && + memcmp(wiphy->perm_addr, wiphy->addresses[0].addr, + ETH_ALEN))) + return -EINVAL; + + if (wiphy->addresses) + memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN); + /* sanity check ifmodes */ WARN_ON(!ifmodes); ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1; diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index efe3c5c92b2d..9f2cef3e0ca0 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -33,10 +33,30 @@ static ssize_t name ## _show(struct device *dev, \ SHOW_FMT(index, "%d", wiphy_idx); SHOW_FMT(macaddress, "%pM", wiphy.perm_addr); +SHOW_FMT(address_mask, "%pM", wiphy.addr_mask); + +static ssize_t addresses_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct wiphy *wiphy = &dev_to_rdev(dev)->wiphy; + char *start = buf; + int i; + + if (!wiphy->addresses) + return sprintf(buf, "%pM\n", wiphy->perm_addr); + + for (i = 0; i < wiphy->n_addresses; i++) + buf += sprintf(buf, "%pM\n", &wiphy->addresses[i].addr); + + return buf - start; +} static struct device_attribute ieee80211_dev_attrs[] = { __ATTR_RO(index), __ATTR_RO(macaddress), + __ATTR_RO(address_mask), + __ATTR_RO(addresses), {} }; -- cgit v1.2.3 From 5833929cc2ad2b3064b4fac8c44e293972d240d8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 22 Jan 2010 10:17:26 +0000 Subject: net: constify MIB name tables Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/snmp.h | 2 +- net/ipv4/proc.c | 4 ++-- net/ipv6/proc.c | 12 ++++++------ net/sctp/proc.c | 2 +- net/xfrm/xfrm_proc.c | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/snmp.h b/include/net/snmp.h index f0d756f2ac99..da02ee027d69 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -32,7 +32,7 @@ * - name of entries. */ struct snmp_mib { - char *name; + const char *name; int entry; }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f25542c48b7d..1b09a6dde7c0 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -127,8 +127,8 @@ static const struct snmp_mib snmp4_ipextstats_list[] = { SNMP_MIB_SENTINEL }; -static struct { - char *name; +static const struct { + const char *name; int index; } icmpmibmap[] = { { "DestUnreachs", ICMP_DEST_UNREACH }, diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 02f20016b4c9..bfe2598dd563 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -59,7 +59,7 @@ static const struct file_operations sockstat6_seq_fops = { .release = single_release_net, }; -static struct snmp_mib snmp6_ipstats_list[] = { +static const struct snmp_mib snmp6_ipstats_list[] = { /* ipv6 mib according to RFC 2465 */ SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INPKTS), SNMP_MIB_ITEM("Ip6InHdrErrors", IPSTATS_MIB_INHDRERRORS), @@ -92,7 +92,7 @@ static struct snmp_mib snmp6_ipstats_list[] = { SNMP_MIB_SENTINEL }; -static struct snmp_mib snmp6_icmp6_list[] = { +static const struct snmp_mib snmp6_icmp6_list[] = { /* icmpv6 mib according to RFC 2466 */ SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS), SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS), @@ -120,7 +120,7 @@ static const char *const icmp6type2name[256] = { }; -static struct snmp_mib snmp6_udp6_list[] = { +static const struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_ITEM("Udp6InDatagrams", UDP_MIB_INDATAGRAMS), SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS), SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS), @@ -128,7 +128,7 @@ static struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_SENTINEL }; -static struct snmp_mib snmp6_udplite6_list[] = { +static const struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS), SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS), SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS), @@ -170,8 +170,8 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) return; } -static inline void -snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist) +static void snmp6_seq_show_item(struct seq_file *seq, void **mib, + const struct snmp_mib *itemlist) { int i; for (i=0; itemlist[i].name; i++) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index d093cbfeaac4..a5ac6e0a8d9c 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -40,7 +40,7 @@ #include #include /* for snmp_fold_field */ -static struct snmp_mib sctp_snmp_list[] = { +static const struct snmp_mib sctp_snmp_list[] = { SNMP_MIB_ITEM("SctpCurrEstab", SCTP_MIB_CURRESTAB), SNMP_MIB_ITEM("SctpActiveEstabs", SCTP_MIB_ACTIVEESTABS), SNMP_MIB_ITEM("SctpPassiveEstabs", SCTP_MIB_PASSIVEESTABS), diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index fef8db553e8d..c083a4e4e796 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -15,7 +15,7 @@ #include #include -static struct snmp_mib xfrm_mib_list[] = { +static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmInError", LINUX_MIB_XFRMINERROR), SNMP_MIB_ITEM("XfrmInBufferError", LINUX_MIB_XFRMINBUFFERERROR), SNMP_MIB_ITEM("XfrmInHdrError", LINUX_MIB_XFRMINHDRERROR), -- cgit v1.2.3 From e754834e65220b2b674c55c3b6dfb2fb1a2804d0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 22 Jan 2010 10:18:25 +0000 Subject: icmp: move icmp_err_convert[] to .rodata Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/icmp.h | 2 +- net/ipv4/icmp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/icmp.h b/include/net/icmp.h index dfa72d4e8907..15b3dfe9fce8 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -28,7 +28,7 @@ struct icmp_err { unsigned fatal:1; }; -extern struct icmp_err icmp_err_convert[]; +extern const struct icmp_err icmp_err_convert[]; #define ICMP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.icmp_statistics, field) #define ICMP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field) #define ICMPMSGOUT_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.icmpmsg_statistics, field+256) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index fe11f60ce41b..4b4c2bcd15db 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -114,7 +114,7 @@ struct icmp_bxm { /* An array of errno for error messages from dest unreach. */ /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ -struct icmp_err icmp_err_convert[] = { +const struct icmp_err icmp_err_convert[] = { { .errno = ENETUNREACH, /* ICMP_NET_UNREACH */ .fatal = 0, -- cgit v1.2.3 From e071041be037eca208b62b84469a06bdfc692bea Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 23 Jan 2010 13:37:10 +0000 Subject: netns xfrm: fix "ip xfrm state|policy count" misreport "ip xfrm state|policy count" report SA/SP count from init_net, not from netns of caller process. Signed-off-by: Alexey Dobriyan Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++-- net/xfrm/xfrm_policy.c | 16 ++++++++-------- net/xfrm/xfrm_state.c | 6 +++--- net/xfrm/xfrm_user.c | 14 ++++++++------ 4 files changed, 21 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 6d85861ab990..60c27706e7b9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1367,8 +1367,8 @@ struct xfrmk_spdinfo { extern struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info); -extern void xfrm_sad_getinfo(struct xfrmk_sadinfo *si); -extern void xfrm_spd_getinfo(struct xfrmk_spdinfo *si); +extern void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); +extern void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); extern int xfrm_replay_check(struct xfrm_state *x, struct sk_buff *skb, __be32 seq); extern void xfrm_replay_advance(struct xfrm_state *x, __be32 seq); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4725a549ad4d..d2c8cb57ee4c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -469,16 +469,16 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total) return 0; } -void xfrm_spd_getinfo(struct xfrmk_spdinfo *si) +void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) { read_lock_bh(&xfrm_policy_lock); - si->incnt = init_net.xfrm.policy_count[XFRM_POLICY_IN]; - si->outcnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT]; - si->fwdcnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD]; - si->inscnt = init_net.xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; - si->outscnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; - si->fwdscnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; - si->spdhcnt = init_net.xfrm.policy_idx_hmask; + si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; + si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; + si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; + si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; + si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; + si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; + si->spdhcnt = net->xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; read_unlock_bh(&xfrm_policy_lock); } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d847f1a52b44..b36cc344474b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -641,11 +641,11 @@ out: } EXPORT_SYMBOL(xfrm_state_flush); -void xfrm_sad_getinfo(struct xfrmk_sadinfo *si) +void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) { spin_lock_bh(&xfrm_state_lock); - si->sadcnt = init_net.xfrm.state_num; - si->sadhcnt = init_net.xfrm.state_hmask; + si->sadcnt = net->xfrm.state_num; + si->sadhcnt = net->xfrm.state_hmask; si->sadhmcnt = xfrm_state_hashmax; spin_unlock_bh(&xfrm_state_lock); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 1ada6186933c..d5a712976004 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -781,7 +781,8 @@ static inline size_t xfrm_spdinfo_msgsize(void) + nla_total_size(sizeof(struct xfrmu_spdhinfo)); } -static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) +static int build_spdinfo(struct sk_buff *skb, struct net *net, + u32 pid, u32 seq, u32 flags) { struct xfrmk_spdinfo si; struct xfrmu_spdinfo spc; @@ -795,7 +796,7 @@ static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) f = nlmsg_data(nlh); *f = flags; - xfrm_spd_getinfo(&si); + xfrm_spd_getinfo(net, &si); spc.incnt = si.incnt; spc.outcnt = si.outcnt; spc.fwdcnt = si.fwdcnt; @@ -828,7 +829,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, if (r_skb == NULL) return -ENOMEM; - if (build_spdinfo(r_skb, spid, seq, *flags) < 0) + if (build_spdinfo(r_skb, net, spid, seq, *flags) < 0) BUG(); return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); @@ -841,7 +842,8 @@ static inline size_t xfrm_sadinfo_msgsize(void) + nla_total_size(4); /* XFRMA_SAD_CNT */ } -static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) +static int build_sadinfo(struct sk_buff *skb, struct net *net, + u32 pid, u32 seq, u32 flags) { struct xfrmk_sadinfo si; struct xfrmu_sadhinfo sh; @@ -854,7 +856,7 @@ static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) f = nlmsg_data(nlh); *f = flags; - xfrm_sad_getinfo(&si); + xfrm_sad_getinfo(net, &si); sh.sadhmcnt = si.sadhmcnt; sh.sadhcnt = si.sadhcnt; @@ -882,7 +884,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, if (r_skb == NULL) return -ENOMEM; - if (build_sadinfo(r_skb, spid, seq, *flags) < 0) + if (build_sadinfo(r_skb, net, spid, seq, *flags) < 0) BUG(); return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); -- cgit v1.2.3 From d7c7544c3d5f59033d1bf3236bc7b289f5f26b75 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 24 Jan 2010 22:47:53 -0800 Subject: netns xfrm: deal with dst entries in netns GC is non-existent in netns, so after you hit GC threshold, no new dst entries will be created until someone triggers cleanup in init_net. Make xfrm4_dst_ops and xfrm6_dst_ops per-netns. This is not done in a generic way, because it woule waste (AF_MAX - 2) * sizeof(struct dst_ops) bytes per-netns. Reorder GC threshold initialization so it'd be done before registering XFRM policies. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 6 +++++ net/ipv4/xfrm4_policy.c | 14 +++++++----- net/ipv6/xfrm6_policy.c | 25 +++++++++++--------- net/xfrm/xfrm_policy.c | 59 +++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 84 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 56f8e5585df7..74f119a2829a 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -5,6 +5,7 @@ #include #include #include +#include struct ctl_table_header; @@ -42,6 +43,11 @@ struct netns_xfrm { unsigned int policy_count[XFRM_POLICY_MAX * 2]; struct work_struct policy_hash_work; + struct dst_ops xfrm4_dst_ops; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct dst_ops xfrm6_dst_ops; +#endif + struct sock *nlsk; struct sock *nlsk_stash; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 8c08a28d8f83..67107d63c1cd 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -15,7 +15,6 @@ #include #include -static struct dst_ops xfrm4_dst_ops; static struct xfrm_policy_afinfo xfrm4_policy_afinfo; static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, @@ -190,8 +189,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) static inline int xfrm4_garbage_collect(struct dst_ops *ops) { - xfrm4_policy_afinfo.garbage_collect(&init_net); - return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2); + struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); + + xfrm4_policy_afinfo.garbage_collect(net); + return (atomic_read(&ops->entries) > ops->gc_thresh * 2); } static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -268,7 +269,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { static struct ctl_table xfrm4_policy_table[] = { { .procname = "xfrm4_gc_thresh", - .data = &xfrm4_dst_ops.gc_thresh, + .data = &init_net.xfrm.xfrm4_dst_ops.gc_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, @@ -295,8 +296,6 @@ static void __exit xfrm4_policy_fini(void) void __init xfrm4_init(int rt_max_size) { - xfrm4_state_init(); - xfrm4_policy_init(); /* * Select a default value for the gc_thresh based on the main route * table hash size. It seems to me the worst case scenario is when @@ -308,6 +307,9 @@ void __init xfrm4_init(int rt_max_size) * and start cleaning when were 1/2 full */ xfrm4_dst_ops.gc_thresh = rt_max_size/2; + + xfrm4_state_init(); + xfrm4_policy_init(); #ifdef CONFIG_SYSCTL sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, xfrm4_policy_table); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 7254e3f899a7..dbdc696f5fc5 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -24,7 +24,6 @@ #include #endif -static struct dst_ops xfrm6_dst_ops; static struct xfrm_policy_afinfo xfrm6_policy_afinfo; static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, @@ -224,8 +223,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) static inline int xfrm6_garbage_collect(struct dst_ops *ops) { - xfrm6_policy_afinfo.garbage_collect(&init_net); - return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2); + struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); + + xfrm6_policy_afinfo.garbage_collect(net); + return (atomic_read(&ops->entries) > ops->gc_thresh * 2); } static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -310,7 +311,7 @@ static void xfrm6_policy_fini(void) static struct ctl_table xfrm6_policy_table[] = { { .procname = "xfrm6_gc_thresh", - .data = &xfrm6_dst_ops.gc_thresh, + .data = &init_net.xfrm.xfrm6_dst_ops.gc_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, @@ -326,13 +327,6 @@ int __init xfrm6_init(void) int ret; unsigned int gc_thresh; - ret = xfrm6_policy_init(); - if (ret) - goto out; - - ret = xfrm6_state_init(); - if (ret) - goto out_policy; /* * We need a good default value for the xfrm6 gc threshold. * In ipv4 we set it to the route hash table size * 8, which @@ -346,6 +340,15 @@ int __init xfrm6_init(void) */ gc_thresh = FIB6_TABLE_HASHSZ * 8; xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh; + + ret = xfrm6_policy_init(); + if (ret) + goto out; + + ret = xfrm6_state_init(); + if (ret) + goto out_policy; + #ifdef CONFIG_SYSCTL sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv6_ctl_path, xfrm6_policy_table); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d2c8cb57ee4c..0ecb16a9a883 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1309,15 +1309,28 @@ static inline int xfrm_get_tos(struct flowi *fl, int family) return tos; } -static inline struct xfrm_dst *xfrm_alloc_dst(int family) +static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + struct dst_ops *dst_ops; struct xfrm_dst *xdst; if (!afinfo) return ERR_PTR(-EINVAL); - xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS); + switch (family) { + case AF_INET: + dst_ops = &net->xfrm.xfrm4_dst_ops; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + dst_ops = &net->xfrm.xfrm6_dst_ops; + break; +#endif + default: + BUG(); + } + xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS); xfrm_policy_put_afinfo(afinfo); @@ -1366,6 +1379,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, struct flowi *fl, struct dst_entry *dst) { + struct net *net = xp_net(policy); unsigned long now = jiffies; struct net_device *dev; struct dst_entry *dst_prev = NULL; @@ -1389,7 +1403,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst_hold(dst); for (; i < nx; i++) { - struct xfrm_dst *xdst = xfrm_alloc_dst(family); + struct xfrm_dst *xdst = xfrm_alloc_dst(net, family); struct dst_entry *dst1 = &xdst->u.dst; err = PTR_ERR(xdst); @@ -2279,6 +2293,7 @@ EXPORT_SYMBOL(xfrm_bundle_ok); int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { + struct net *net; int err = 0; if (unlikely(afinfo == NULL)) return -EINVAL; @@ -2302,6 +2317,27 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) xfrm_policy_afinfo[afinfo->family] = afinfo; } write_unlock_bh(&xfrm_policy_afinfo_lock); + + rtnl_lock(); + for_each_net(net) { + struct dst_ops *xfrm_dst_ops; + + switch (afinfo->family) { + case AF_INET: + xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; + break; +#endif + default: + BUG(); + } + *xfrm_dst_ops = *afinfo->dst_ops; + } + rtnl_unlock(); + return err; } EXPORT_SYMBOL(xfrm_policy_register_afinfo); @@ -2332,6 +2368,22 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); +static void __net_init xfrm_dst_ops_init(struct net *net) +{ + struct xfrm_policy_afinfo *afinfo; + + read_lock_bh(&xfrm_policy_afinfo_lock); + afinfo = xfrm_policy_afinfo[AF_INET]; + if (afinfo) + net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + afinfo = xfrm_policy_afinfo[AF_INET6]; + if (afinfo) + net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; +#endif + read_unlock_bh(&xfrm_policy_afinfo_lock); +} + static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) { struct xfrm_policy_afinfo *afinfo; @@ -2494,6 +2546,7 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_policy_init(net); if (rv < 0) goto out_policy; + xfrm_dst_ops_init(net); rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; -- cgit v1.2.3 From eb807fb23878bc319e029ed8ce3d835d239723a5 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Sun, 24 Jan 2010 14:55:12 +0200 Subject: mac80211: fix update_tkip_key() documentation about the context Johannes noticed that I had incorrectly documented the context of update_tkip_key() driver operation. It must be atomic because all RX code is run inside rcu critical section. Reported-by: Johannes Berg Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 +- net/mac80211/driver-ops.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f56d6f479532..f64402f6312b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1489,7 +1489,7 @@ enum ieee80211_ampdu_mlme_action { * @update_tkip_key: See the section "Hardware crypto acceleration" * This callback will be called in the context of Rx. Called for drivers * which set IEEE80211_KEY_FLAG_TKIP_REQ_RX_P1_KEY. - * The callback can sleep. + * The callback must be atomic. * * @hw_scan: Ask the hardware to service the scan request, no need to start * the scan state machine in stack. The scan must honour the channel diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 40c6e9a89864..6c31f38ac7f5 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -144,8 +144,6 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local, { struct ieee80211_sta *ista = NULL; - might_sleep(); - if (sta) ista = &sta->sta; -- cgit v1.2.3 From c21dbf9214bce129f92e1af05552553ff0e318ed Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Jan 2010 14:15:46 +0100 Subject: cfg80211: export cfg80211_find_ie This new function (previously a static function called just "find_ie" can be used to find a specific IE in a buffer of IEs. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 16 ++++++++++++++++ net/wireless/scan.c | 38 ++++++++++++++++++++------------------ 2 files changed, 36 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c5d16f299d6f..a3f0a7ed31ac 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1658,6 +1658,22 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, */ unsigned int cfg80211_classify8021d(struct sk_buff *skb); +/** + * cfg80211_find_ie - find information element in data + * + * @eid: element ID + * @ies: data consisting of IEs + * @len: length of data + * + * This function will return %NULL if the element ID could + * not be found or if the element is invalid (claims to be + * longer than the given data), or a pointer to the first byte + * of the requested element, that is the byte containing the + * element ID. There are no checks on the element length + * other than having to fit into the given data. + */ +const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len); + /* * Regulatory helper functions for wiphys */ diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 06b0231ee5e3..978cac3414b5 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -143,9 +143,9 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *dev) dev->bss_generation++; } -static u8 *find_ie(u8 num, u8 *ies, int len) +const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) { - while (len > 2 && ies[0] != num) { + while (len > 2 && ies[0] != eid) { len -= ies[1] + 2; ies += ies[1] + 2; } @@ -155,11 +155,12 @@ static u8 *find_ie(u8 num, u8 *ies, int len) return NULL; return ies; } +EXPORT_SYMBOL(cfg80211_find_ie); static int cmp_ies(u8 num, u8 *ies1, size_t len1, u8 *ies2, size_t len2) { - const u8 *ie1 = find_ie(num, ies1, len1); - const u8 *ie2 = find_ie(num, ies2, len2); + const u8 *ie1 = cfg80211_find_ie(num, ies1, len1); + const u8 *ie2 = cfg80211_find_ie(num, ies2, len2); int r; if (!ie1 && !ie2) @@ -185,9 +186,9 @@ static bool is_bss(struct cfg80211_bss *a, if (!ssid) return true; - ssidie = find_ie(WLAN_EID_SSID, - a->information_elements, - a->len_information_elements); + ssidie = cfg80211_find_ie(WLAN_EID_SSID, + a->information_elements, + a->len_information_elements); if (!ssidie) return false; if (ssidie[1] != ssid_len) @@ -204,9 +205,9 @@ static bool is_mesh(struct cfg80211_bss *a, if (!is_zero_ether_addr(a->bssid)) return false; - ie = find_ie(WLAN_EID_MESH_ID, - a->information_elements, - a->len_information_elements); + ie = cfg80211_find_ie(WLAN_EID_MESH_ID, + a->information_elements, + a->len_information_elements); if (!ie) return false; if (ie[1] != meshidlen) @@ -214,9 +215,9 @@ static bool is_mesh(struct cfg80211_bss *a, if (memcmp(ie + 2, meshid, meshidlen)) return false; - ie = find_ie(WLAN_EID_MESH_CONFIG, - a->information_elements, - a->len_information_elements); + ie = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, + a->information_elements, + a->len_information_elements); if (!ie) return false; if (ie[1] != sizeof(struct ieee80211_meshconf_ie)) @@ -395,11 +396,12 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, if (is_zero_ether_addr(res->pub.bssid)) { /* must be mesh, verify */ - meshid = find_ie(WLAN_EID_MESH_ID, res->pub.information_elements, - res->pub.len_information_elements); - meshcfg = find_ie(WLAN_EID_MESH_CONFIG, - res->pub.information_elements, - res->pub.len_information_elements); + meshid = cfg80211_find_ie(WLAN_EID_MESH_ID, + res->pub.information_elements, + res->pub.len_information_elements); + meshcfg = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, + res->pub.information_elements, + res->pub.len_information_elements); if (!meshid || !meshcfg || meshcfg[1] != sizeof(struct ieee80211_meshconf_ie)) { /* bogus mesh */ -- cgit v1.2.3 From 56007a028c51cbf800a6c969d6f6431d23443b99 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Jan 2010 14:19:52 +0100 Subject: mac80211: wait for beacon before enabling powersave Because DTIM information is required for powersave but is only conveyed in beacons, wait for a beacon before enabling powersave, and change the way the information is conveyed to the driver accordingly. mwl8k doesn't currently seem to implement PS but requires the DTIM period in a different way; after talking to Lennert we agreed to just have mwl8k do the parsing itself in the finalize_join work. Signed-off-by: Johannes Berg Acked-by: Lennert Buytenhek Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-power.c | 2 +- drivers/net/wireless/mwl8k.c | 14 +++++++++----- drivers/net/wireless/wl12xx/wl1251.h | 3 --- drivers/net/wireless/wl12xx/wl1251_main.c | 25 +++++++------------------ include/net/mac80211.h | 7 ++++++- net/mac80211/mlme.c | 27 ++++++++++++++++++++++++--- net/mac80211/scan.c | 4 ---- 7 files changed, 47 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/iwlwifi/iwl-power.c b/drivers/net/wireless/iwlwifi/iwl-power.c index 8599444bef01..9e3ca0641451 100644 --- a/drivers/net/wireless/iwlwifi/iwl-power.c +++ b/drivers/net/wireless/iwlwifi/iwl-power.c @@ -319,7 +319,7 @@ int iwl_power_update_mode(struct iwl_priv *priv, bool force) priv->chain_noise_data.state == IWL_CHAIN_NOISE_ALIVE; if (priv->vif) - dtimper = priv->vif->bss_conf.dtim_period; + dtimper = priv->hw->conf.ps_dtim_period; else dtimper = 1; diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c index 68546ca0ba37..f0f08f3919cc 100644 --- a/drivers/net/wireless/mwl8k.c +++ b/drivers/net/wireless/mwl8k.c @@ -3881,12 +3881,16 @@ static void mwl8k_finalize_join_worker(struct work_struct *work) struct mwl8k_priv *priv = container_of(work, struct mwl8k_priv, finalize_join_worker); struct sk_buff *skb = priv->beacon_skb; - struct mwl8k_vif *mwl8k_vif; + struct ieee80211_mgmt *mgmt = (void *)skb->data; + int len = skb->len - offsetof(struct ieee80211_mgmt, u.beacon.variable); + const u8 *tim = cfg80211_find_ie(WLAN_EID_TIM, + mgmt->u.beacon.variable, len); + int dtim_period = 1; + + if (tim && tim[1] >= 2) + dtim_period = tim[3]; - mwl8k_vif = mwl8k_first_vif(priv); - if (mwl8k_vif != NULL) - mwl8k_cmd_finalize_join(priv->hw, skb->data, skb->len, - mwl8k_vif->vif->bss_conf.dtim_period); + mwl8k_cmd_finalize_join(priv->hw, skb->data, skb->len, dtim_period); dev_kfree_skb(skb); priv->beacon_skb = NULL; diff --git a/drivers/net/wireless/wl12xx/wl1251.h b/drivers/net/wireless/wl12xx/wl1251.h index 6301578d1565..37c61c19cae5 100644 --- a/drivers/net/wireless/wl12xx/wl1251.h +++ b/drivers/net/wireless/wl12xx/wl1251.h @@ -341,9 +341,6 @@ struct wl1251 { /* Are we currently scanning */ bool scanning; - /* Our association ID */ - u16 aid; - /* Default key (for WEP) */ u32 default_key; diff --git a/drivers/net/wireless/wl12xx/wl1251_main.c b/drivers/net/wireless/wl12xx/wl1251_main.c index 595f0f94d16e..a717dde4822e 100644 --- a/drivers/net/wireless/wl12xx/wl1251_main.c +++ b/drivers/net/wireless/wl12xx/wl1251_main.c @@ -617,10 +617,13 @@ static int wl1251_op_config(struct ieee80211_hw *hw, u32 changed) wl->psm_requested = true; + wl->dtim_period = conf->ps_dtim_period; + + ret = wl1251_acx_wr_tbtt_and_dtim(wl, wl->beacon_int, + wl->dtim_period); + /* - * We enter PSM only if we're already associated. - * If we're not, we'll enter it when joining an SSID, - * through the bss_info_changed() hook. + * mac80211 enables PSM only if we're already associated. */ ret = wl1251_ps_set_mode(wl, STATION_POWER_SAVE_MODE); if (ret < 0) @@ -943,7 +946,6 @@ static void wl1251_op_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_bss_conf *bss_conf, u32 changed) { - enum wl1251_cmd_ps_mode mode; struct wl1251 *wl = hw->priv; struct sk_buff *beacon, *skb; int ret; @@ -984,11 +986,6 @@ static void wl1251_op_bss_info_changed(struct ieee80211_hw *hw, if (changed & BSS_CHANGED_ASSOC) { if (bss_conf->assoc) { wl->beacon_int = bss_conf->beacon_int; - wl->dtim_period = bss_conf->dtim_period; - - ret = wl1251_acx_wr_tbtt_and_dtim(wl, wl->beacon_int, - wl->dtim_period); - wl->aid = bss_conf->aid; skb = ieee80211_pspoll_get(wl->hw, wl->vif); if (!skb) @@ -1001,17 +998,9 @@ static void wl1251_op_bss_info_changed(struct ieee80211_hw *hw, if (ret < 0) goto out_sleep; - ret = wl1251_acx_aid(wl, wl->aid); + ret = wl1251_acx_aid(wl, bss_conf->aid); if (ret < 0) goto out_sleep; - - /* If we want to go in PSM but we're not there yet */ - if (wl->psm_requested && !wl->psm) { - mode = STATION_POWER_SAVE_MODE; - ret = wl1251_ps_set_mode(wl, mode); - if (ret < 0) - goto out_sleep; - } } else { /* use defaults when not associated */ wl->beacon_int = WL1251_DEFAULT_BEACON_INT; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f64402f6312b..1e9c93024cf2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -186,7 +186,8 @@ enum ieee80211_bss_change { * @use_short_slot: use short slot time (only relevant for ERP); * if the hardware cannot handle this it must set the * IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE hardware flag - * @dtim_period: num of beacons before the next DTIM, for PSM + * @dtim_period: num of beacons before the next DTIM, for beaconing, + * not valid in station mode (cf. hw conf ps_dtim_period) * @timestamp: beacon timestamp * @beacon_int: beacon interval * @assoc_capability: capabilities taken from assoc resp @@ -648,6 +649,9 @@ enum ieee80211_smps_mode { * value will be only achievable between DTIM frames, the hardware * needs to check for the multicast traffic bit in DTIM beacons. * This variable is valid only when the CONF_PS flag is set. + * @ps_dtim_period: The DTIM period of the AP we're connected to, for use + * in power saving. Power saving will not be enabled until a beacon + * has been received and the DTIM period is known. * @dynamic_ps_timeout: The dynamic powersave timeout (in ms), see the * powersave documentation below. This variable is valid only when * the CONF_PS flag is set. @@ -674,6 +678,7 @@ struct ieee80211_conf { int max_sleep_period; u16 listen_interval; + u8 ps_dtim_period; u8 long_frame_max_tx_count, short_frame_max_tx_count; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1e1d16c55ee5..86c6ad1b058d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -484,6 +484,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) if (count == 1 && found->u.mgd.powersave && found->u.mgd.associated && + found->u.mgd.associated->beacon_ies && !(found->u.mgd.flags & (IEEE80211_STA_BEACON_POLL | IEEE80211_STA_CONNECTION_POLL))) { s32 beaconint_us; @@ -497,14 +498,22 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) if (beaconint_us > latency) { local->ps_sdata = NULL; } else { - u8 dtimper = found->vif.bss_conf.dtim_period; + struct ieee80211_bss *bss; int maxslp = 1; + u8 dtimper; - if (dtimper > 1) + bss = (void *)found->u.mgd.associated->priv; + dtimper = bss->dtim_period; + + /* If the TIM IE is invalid, pretend the value is 1 */ + if (!dtimper) + dtimper = 1; + else if (dtimper > 1) maxslp = min_t(int, dtimper, latency / beaconint_us); local->hw.conf.max_sleep_period = maxslp; + local->hw.conf.ps_dtim_period = dtimper; local->ps_sdata = found; } } else { @@ -702,7 +711,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, /* set timing information */ sdata->vif.bss_conf.beacon_int = cbss->beacon_interval; sdata->vif.bss_conf.timestamp = cbss->tsf; - sdata->vif.bss_conf.dtim_period = bss->dtim_period; bss_info_changed |= BSS_CHANGED_BEACON_INT; bss_info_changed |= ieee80211_handle_bss_capability(sdata, @@ -1168,6 +1176,13 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, int freq; struct ieee80211_bss *bss; struct ieee80211_channel *channel; + bool need_ps = false; + + if (sdata->u.mgd.associated) { + bss = (void *)sdata->u.mgd.associated->priv; + /* not previously set so we may need to recalc */ + need_ps = !bss->dtim_period; + } if (elems->ds_params && elems->ds_params_len == 1) freq = ieee80211_channel_to_frequency(elems->ds_params[0]); @@ -1187,6 +1202,12 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, if (!sdata->u.mgd.associated) return; + if (need_ps) { + mutex_lock(&local->iflist_mtx); + ieee80211_recalc_ps(local, -1); + mutex_unlock(&local->iflist_mtx); + } + if (elems->ch_switch_elem && (elems->ch_switch_elem_len == 3) && (memcmp(mgmt->bssid, sdata->u.mgd.associated->bssid, ETH_ALEN) == 0)) { diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 9afe2f9885dc..bc061f629674 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -111,10 +111,6 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bss->dtim_period = tim_ie->dtim_period; } - /* set default value for buggy AP/no TIM element */ - if (bss->dtim_period == 0) - bss->dtim_period = 1; - bss->supp_rates_len = 0; if (elems->supp_rates) { clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; -- cgit v1.2.3 From a1664773907a2b69e2a3019598dcbeffa6bc724b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 25 Jan 2010 10:37:54 +0000 Subject: netns xfrm: xfrm6_tunnel in netns I'm not sure about rcu stuff near kmem cache destruction: * checks for non-empty hashes look bogus, they're done _before_ rcu_berrier() * unregistering netns ops is done before kmem_cache destoy (as it should), and unregistering involves rcu barriers by itself So it looks nothing should be done. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 6 +-- net/ipv6/ipcomp6.c | 4 +- net/ipv6/xfrm6_tunnel.c | 140 ++++++++++++++++++++++++++++-------------------- 3 files changed, 88 insertions(+), 62 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 60c27706e7b9..fcee547ca7e3 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1408,9 +1408,9 @@ extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family); extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family); -extern __be32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr); -extern void xfrm6_tunnel_free_spi(xfrm_address_t *saddr); -extern __be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr); +extern __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr); +extern void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr); +extern __be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr); extern int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_output(struct sk_buff *skb); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 2f2a5ca2c878..1d1faf757c9a 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -81,7 +81,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) goto out; t->id.proto = IPPROTO_IPV6; - t->id.spi = xfrm6_tunnel_alloc_spi((xfrm_address_t *)&x->props.saddr); + t->id.spi = xfrm6_tunnel_alloc_spi(&init_net, (xfrm_address_t *)&x->props.saddr); if (!t->id.spi) goto error; @@ -112,7 +112,7 @@ static int ipcomp6_tunnel_attach(struct xfrm_state *x) struct xfrm_state *t = NULL; __be32 spi; - spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr); + spi = xfrm6_tunnel_spi_lookup(&init_net, (xfrm_address_t *)&x->props.saddr); if (spi) t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr, spi, IPPROTO_IPV6, AF_INET6); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 23fb1002124c..d6f9aeec69f7 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -30,6 +30,25 @@ #include #include #include +#include + +#define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256 +#define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256 + +#define XFRM6_TUNNEL_SPI_MIN 1 +#define XFRM6_TUNNEL_SPI_MAX 0xffffffff + +struct xfrm6_tunnel_net { + struct hlist_head spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE]; + struct hlist_head spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE]; + u32 spi; +}; + +static int xfrm6_tunnel_net_id __read_mostly; +static inline struct xfrm6_tunnel_net *xfrm6_tunnel_pernet(struct net *net) +{ + return net_generic(net, xfrm6_tunnel_net_id); +} /* * xfrm_tunnel_spi things are for allocating unique id ("spi") @@ -46,19 +65,8 @@ struct xfrm6_tunnel_spi { static DEFINE_SPINLOCK(xfrm6_tunnel_spi_lock); -static u32 xfrm6_tunnel_spi; - -#define XFRM6_TUNNEL_SPI_MIN 1 -#define XFRM6_TUNNEL_SPI_MAX 0xffffffff - static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly; -#define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256 -#define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256 - -static struct hlist_head xfrm6_tunnel_spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE]; -static struct hlist_head xfrm6_tunnel_spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE]; - static inline unsigned xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr) { unsigned h; @@ -77,49 +85,30 @@ static inline unsigned xfrm6_tunnel_spi_hash_byspi(u32 spi) } -static int xfrm6_tunnel_spi_init(void) +static int __init xfrm6_tunnel_spi_init(void) { - int i; - - xfrm6_tunnel_spi = 0; xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi", sizeof(struct xfrm6_tunnel_spi), 0, SLAB_HWCACHE_ALIGN, NULL); if (!xfrm6_tunnel_spi_kmem) return -ENOMEM; - - for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) - INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byaddr[i]); - for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++) - INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byspi[i]); return 0; } static void xfrm6_tunnel_spi_fini(void) { - int i; - - for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) { - if (!hlist_empty(&xfrm6_tunnel_spi_byaddr[i])) - return; - } - for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++) { - if (!hlist_empty(&xfrm6_tunnel_spi_byspi[i])) - return; - } - rcu_barrier(); kmem_cache_destroy(xfrm6_tunnel_spi_kmem); - xfrm6_tunnel_spi_kmem = NULL; } -static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) +static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr) { + struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; struct hlist_node *pos; hlist_for_each_entry_rcu(x6spi, pos, - &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], + &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) return x6spi; @@ -128,13 +117,13 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) return NULL; } -__be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) +__be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr) { struct xfrm6_tunnel_spi *x6spi; u32 spi; rcu_read_lock_bh(); - x6spi = __xfrm6_tunnel_spi_lookup(saddr); + x6spi = __xfrm6_tunnel_spi_lookup(net, saddr); spi = x6spi ? x6spi->spi : 0; rcu_read_unlock_bh(); return htonl(spi); @@ -142,14 +131,15 @@ __be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup); -static int __xfrm6_tunnel_spi_check(u32 spi) +static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi) { + struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; int index = xfrm6_tunnel_spi_hash_byspi(spi); struct hlist_node *pos; hlist_for_each_entry(x6spi, pos, - &xfrm6_tunnel_spi_byspi[index], + &xfrm6_tn->spi_byspi[index], list_byspi) { if (x6spi->spi == spi) return -1; @@ -157,32 +147,33 @@ static int __xfrm6_tunnel_spi_check(u32 spi) return index; } -static u32 __xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr) +static u32 __xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr) { + struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); u32 spi; struct xfrm6_tunnel_spi *x6spi; int index; - if (xfrm6_tunnel_spi < XFRM6_TUNNEL_SPI_MIN || - xfrm6_tunnel_spi >= XFRM6_TUNNEL_SPI_MAX) - xfrm6_tunnel_spi = XFRM6_TUNNEL_SPI_MIN; + if (xfrm6_tn->spi < XFRM6_TUNNEL_SPI_MIN || + xfrm6_tn->spi >= XFRM6_TUNNEL_SPI_MAX) + xfrm6_tn->spi = XFRM6_TUNNEL_SPI_MIN; else - xfrm6_tunnel_spi++; + xfrm6_tn->spi++; - for (spi = xfrm6_tunnel_spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) { - index = __xfrm6_tunnel_spi_check(spi); + for (spi = xfrm6_tn->spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) { + index = __xfrm6_tunnel_spi_check(net, spi); if (index >= 0) goto alloc_spi; } - for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tunnel_spi; spi++) { - index = __xfrm6_tunnel_spi_check(spi); + for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tn->spi; spi++) { + index = __xfrm6_tunnel_spi_check(net, spi); if (index >= 0) goto alloc_spi; } spi = 0; goto out; alloc_spi: - xfrm6_tunnel_spi = spi; + xfrm6_tn->spi = spi; x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, GFP_ATOMIC); if (!x6spi) goto out; @@ -192,26 +183,26 @@ alloc_spi: x6spi->spi = spi; atomic_set(&x6spi->refcnt, 1); - hlist_add_head_rcu(&x6spi->list_byspi, &xfrm6_tunnel_spi_byspi[index]); + hlist_add_head_rcu(&x6spi->list_byspi, &xfrm6_tn->spi_byspi[index]); index = xfrm6_tunnel_spi_hash_byaddr(saddr); - hlist_add_head_rcu(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]); + hlist_add_head_rcu(&x6spi->list_byaddr, &xfrm6_tn->spi_byaddr[index]); out: return spi; } -__be32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr) +__be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr) { struct xfrm6_tunnel_spi *x6spi; u32 spi; spin_lock_bh(&xfrm6_tunnel_spi_lock); - x6spi = __xfrm6_tunnel_spi_lookup(saddr); + x6spi = __xfrm6_tunnel_spi_lookup(net, saddr); if (x6spi) { atomic_inc(&x6spi->refcnt); spi = x6spi->spi; } else - spi = __xfrm6_tunnel_alloc_spi(saddr); + spi = __xfrm6_tunnel_alloc_spi(net, saddr); spin_unlock_bh(&xfrm6_tunnel_spi_lock); return htonl(spi); @@ -225,15 +216,16 @@ static void x6spi_destroy_rcu(struct rcu_head *head) container_of(head, struct xfrm6_tunnel_spi, rcu_head)); } -void xfrm6_tunnel_free_spi(xfrm_address_t *saddr) +void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) { + struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; struct hlist_node *pos, *n; spin_lock_bh(&xfrm6_tunnel_spi_lock); hlist_for_each_entry_safe(x6spi, pos, n, - &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], + &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) { @@ -263,10 +255,11 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_tunnel_rcv(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = ipv6_hdr(skb); __be32 spi; - spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); + spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&iph->saddr); return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi) > 0 ? : 0; } @@ -326,7 +319,9 @@ static int xfrm6_tunnel_init_state(struct xfrm_state *x) static void xfrm6_tunnel_destroy(struct xfrm_state *x) { - xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr); + struct net *net = xs_net(x); + + xfrm6_tunnel_free_spi(net, (xfrm_address_t *)&x->props.saddr); } static const struct xfrm_type xfrm6_tunnel_type = { @@ -351,6 +346,31 @@ static struct xfrm6_tunnel xfrm46_tunnel_handler = { .priority = 2, }; +static int __net_init xfrm6_tunnel_net_init(struct net *net) +{ + struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); + unsigned int i; + + for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) + INIT_HLIST_HEAD(&xfrm6_tn->spi_byaddr[i]); + for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++) + INIT_HLIST_HEAD(&xfrm6_tn->spi_byspi[i]); + xfrm6_tn->spi = 0; + + return 0; +} + +static void __net_exit xfrm6_tunnel_net_exit(struct net *net) +{ +} + +static struct pernet_operations xfrm6_tunnel_net_ops = { + .init = xfrm6_tunnel_net_init, + .exit = xfrm6_tunnel_net_exit, + .id = &xfrm6_tunnel_net_id, + .size = sizeof(struct xfrm6_tunnel_net), +}; + static int __init xfrm6_tunnel_init(void) { int rv; @@ -367,8 +387,13 @@ static int __init xfrm6_tunnel_init(void) rv = xfrm6_tunnel_spi_init(); if (rv < 0) goto dereg46; + rv = register_pernet_subsys(&xfrm6_tunnel_net_ops); + if (rv < 0) + goto deregspi; return 0; +deregspi: + xfrm6_tunnel_spi_fini(); dereg46: xfrm6_tunnel_deregister(&xfrm46_tunnel_handler, AF_INET); dereg6: @@ -381,6 +406,7 @@ err: static void __exit xfrm6_tunnel_fini(void) { + unregister_pernet_subsys(&xfrm6_tunnel_net_ops); xfrm6_tunnel_spi_fini(); xfrm6_tunnel_deregister(&xfrm46_tunnel_handler, AF_INET); xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6); -- cgit v1.2.3 From 57dbb2d83d100ea601c54fe129bfde0678db5dee Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Sun, 24 Jan 2010 12:30:59 +0000 Subject: sched: add head drop fifo queue This adds an additional queuing strategy, called pfifo_head_drop, to remove the oldest skb in the case of an overflow within the queue - the head element - instead of the last skb (tail). To remove the oldest skb in congested situations is useful for sensor network environments where newer packets reflect the superior information. Reviewed-by: Florian Westphal Acked-by: Patrick McHardy Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 1 + include/net/sch_generic.h | 19 +++++++++++++++++++ net/sched/sch_api.c | 1 + net/sched/sch_fifo.c | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 2d567265363e..b6cdc33b39c1 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -71,6 +71,7 @@ extern void qdisc_watchdog_cancel(struct qdisc_watchdog *wd); extern struct Qdisc_ops pfifo_qdisc_ops; extern struct Qdisc_ops bfifo_qdisc_ops; +extern struct Qdisc_ops pfifo_head_drop_qdisc_ops; extern int fifo_set_limit(struct Qdisc *q, unsigned int limit); extern struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index dad558bc06fa..67dc08eaaa45 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -427,6 +427,25 @@ static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) return __qdisc_dequeue_head(sch, &sch->q); } +static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch, + struct sk_buff_head *list) +{ + struct sk_buff *skb = __qdisc_dequeue_head(sch, list); + + if (likely(skb != NULL)) { + unsigned int len = qdisc_pkt_len(skb); + kfree_skb(skb); + return len; + } + + return 0; +} + +static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch) +{ + return __qdisc_queue_drop_head(sch, &sch->q); +} + static inline struct sk_buff *__qdisc_dequeue_tail(struct Qdisc *sch, struct sk_buff_head *list) { diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 75fd1c672c61..6cd491013b50 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1707,6 +1707,7 @@ static int __init pktsched_init(void) { register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); + register_qdisc(&pfifo_head_drop_qdisc_ops); register_qdisc(&mq_qdisc_ops); proc_net_fops_create(&init_net, "psched", 0, &psched_fops); diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 69188e8358b4..4b0a6cc44c77 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -43,6 +43,26 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) return qdisc_reshape_fail(skb, sch); } +static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct sk_buff *skb_head; + struct fifo_sched_data *q = qdisc_priv(sch); + + if (likely(skb_queue_len(&sch->q) < q->limit)) + return qdisc_enqueue_tail(skb, sch); + + /* queue full, remove one skb to fulfill the limit */ + skb_head = qdisc_dequeue_head(sch); + sch->bstats.bytes -= qdisc_pkt_len(skb_head); + sch->bstats.packets--; + sch->qstats.drops++; + kfree_skb(skb_head); + + qdisc_enqueue_tail(skb, sch); + + return NET_XMIT_CN; +} + static int fifo_init(struct Qdisc *sch, struct nlattr *opt) { struct fifo_sched_data *q = qdisc_priv(sch); @@ -108,6 +128,20 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { }; EXPORT_SYMBOL(bfifo_qdisc_ops); +struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = { + .id = "pfifo_head_drop", + .priv_size = sizeof(struct fifo_sched_data), + .enqueue = pfifo_tail_enqueue, + .dequeue = qdisc_dequeue_head, + .peek = qdisc_peek_head, + .drop = qdisc_queue_drop_head, + .init = fifo_init, + .reset = qdisc_reset_queue, + .change = fifo_init, + .dump = fifo_dump, + .owner = THIS_MODULE, +}; + /* Pass size change message down to embedded FIFO */ int fifo_set_limit(struct Qdisc *q, unsigned int limit) { -- cgit v1.2.3 From 09d989d179d0c679043556dda77c51b41a2dae7e Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 29 Jan 2010 19:58:57 -0500 Subject: cfg80211: add regulatory hint disconnect support This adds a new regulatory hint to be used when we know all devices have been disconnected and idle. This can happen when we suspend, for instance. When we disconnect we can no longer assume the same regulatory rules learned from a country IE or beacon hints are applicable so restore regulatory settings to an initial state. Since driver hints are cached on the wiphy that called the hint, those hints are not reproduced onto cfg80211 as the wiphy will respect its own wiphy->regd regardless. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/net/regulatory.h | 1 + net/wireless/reg.c | 157 ++++++++++++++++++++++++++++++++++++++++++++++- net/wireless/reg.h | 18 ++++++ net/wireless/sme.c | 40 ++++++++++++ 4 files changed, 213 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 47995b81c5d7..f873ee37f7e4 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -39,6 +39,7 @@ enum environment_cap { * 00 - World regulatory domain * 99 - built by driver but a specific alpha2 cannot be determined * 98 - result of an intersection between two regulatory domains + * 97 - regulatory domain has not yet been configured * @intersect: indicates whether the wireless core should intersect * the requested regulatory domain with the presently set regulatory * domain. diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5dcda28b6f04..ed89c59bb431 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -134,6 +134,7 @@ static const struct ieee80211_regdomain *cfg80211_world_regdom = &world_regdom; static char *ieee80211_regdom = "00"; +static char user_alpha2[2]; module_param(ieee80211_regdom, charp, 0444); MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); @@ -252,6 +253,27 @@ static bool regdom_changes(const char *alpha2) return true; } +/* + * The NL80211_REGDOM_SET_BY_USER regdom alpha2 is cached, this lets + * you know if a valid regulatory hint with NL80211_REGDOM_SET_BY_USER + * has ever been issued. + */ +static bool is_user_regdom_saved(void) +{ + if (user_alpha2[0] == '9' && user_alpha2[1] == '7') + return false; + + /* This would indicate a mistake on the design */ + if (WARN((!is_world_regdom(user_alpha2) && + !is_an_alpha2(user_alpha2)), + "Unexpected user alpha2: %c%c\n", + user_alpha2[0], + user_alpha2[1])) + return false; + + return true; +} + /** * country_ie_integrity_changes - tells us if the country IE has changed * @checksum: checksum of country IE of fields we are interested in @@ -1646,7 +1668,7 @@ static int ignore_request(struct wiphy *wiphy, switch (pending_request->initiator) { case NL80211_REGDOM_SET_BY_CORE: - return -EINVAL; + return 0; case NL80211_REGDOM_SET_BY_COUNTRY_IE: last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); @@ -1785,6 +1807,11 @@ new_request: pending_request = NULL; + if (last_request->initiator == NL80211_REGDOM_SET_BY_USER) { + user_alpha2[0] = last_request->alpha2[0]; + user_alpha2[1] = last_request->alpha2[1]; + } + /* When r == REG_INTERSECT we do need to call CRDA */ if (r < 0) { /* @@ -1904,12 +1931,16 @@ static void queue_regulatory_request(struct regulatory_request *request) schedule_work(®_work); } -/* Core regulatory hint -- happens once during cfg80211_init() */ +/* + * Core regulatory hint -- happens during cfg80211_init() + * and when we restore regulatory settings. + */ static int regulatory_hint_core(const char *alpha2) { struct regulatory_request *request; - BUG_ON(last_request); + kfree(last_request); + last_request = NULL; request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); @@ -2107,6 +2138,123 @@ out: mutex_unlock(®_mutex); } +static void restore_alpha2(char *alpha2, bool reset_user) +{ + /* indicates there is no alpha2 to consider for restoration */ + alpha2[0] = '9'; + alpha2[1] = '7'; + + /* The user setting has precedence over the module parameter */ + if (is_user_regdom_saved()) { + /* Unless we're asked to ignore it and reset it */ + if (reset_user) { + REG_DBG_PRINT("cfg80211: Restoring regulatory settings " + "including user preference\n"); + user_alpha2[0] = '9'; + user_alpha2[1] = '7'; + + /* + * If we're ignoring user settings, we still need to + * check the module parameter to ensure we put things + * back as they were for a full restore. + */ + if (!is_world_regdom(ieee80211_regdom)) { + REG_DBG_PRINT("cfg80211: Keeping preference on " + "module parameter ieee80211_regdom: %c%c\n", + ieee80211_regdom[0], + ieee80211_regdom[1]); + alpha2[0] = ieee80211_regdom[0]; + alpha2[1] = ieee80211_regdom[1]; + } + } else { + REG_DBG_PRINT("cfg80211: Restoring regulatory settings " + "while preserving user preference for: %c%c\n", + user_alpha2[0], + user_alpha2[1]); + alpha2[0] = user_alpha2[0]; + alpha2[1] = user_alpha2[1]; + } + } else if (!is_world_regdom(ieee80211_regdom)) { + REG_DBG_PRINT("cfg80211: Keeping preference on " + "module parameter ieee80211_regdom: %c%c\n", + ieee80211_regdom[0], + ieee80211_regdom[1]); + alpha2[0] = ieee80211_regdom[0]; + alpha2[1] = ieee80211_regdom[1]; + } else + REG_DBG_PRINT("cfg80211: Restoring regulatory settings\n"); +} + +/* + * Restoring regulatory settings involves ingoring any + * possibly stale country IE information and user regulatory + * settings if so desired, this includes any beacon hints + * learned as we could have traveled outside to another country + * after disconnection. To restore regulatory settings we do + * exactly what we did at bootup: + * + * - send a core regulatory hint + * - send a user regulatory hint if applicable + * + * Device drivers that send a regulatory hint for a specific country + * keep their own regulatory domain on wiphy->regd so that does does + * not need to be remembered. + */ +static void restore_regulatory_settings(bool reset_user) +{ + char alpha2[2]; + struct reg_beacon *reg_beacon, *btmp; + + mutex_lock(&cfg80211_mutex); + mutex_lock(®_mutex); + + reset_regdomains(); + restore_alpha2(alpha2, reset_user); + + /* Clear beacon hints */ + spin_lock_bh(®_pending_beacons_lock); + if (!list_empty(®_pending_beacons)) { + list_for_each_entry_safe(reg_beacon, btmp, + ®_pending_beacons, list) { + list_del(®_beacon->list); + kfree(reg_beacon); + } + } + spin_unlock_bh(®_pending_beacons_lock); + + if (!list_empty(®_beacon_list)) { + list_for_each_entry_safe(reg_beacon, btmp, + ®_beacon_list, list) { + list_del(®_beacon->list); + kfree(reg_beacon); + } + } + + /* First restore to the basic regulatory settings */ + cfg80211_regdomain = cfg80211_world_regdom; + + mutex_unlock(®_mutex); + mutex_unlock(&cfg80211_mutex); + + regulatory_hint_core(cfg80211_regdomain->alpha2); + + /* + * This restores the ieee80211_regdom module parameter + * preference or the last user requested regulatory + * settings, user regulatory settings takes precedence. + */ + if (is_an_alpha2(alpha2)) + regulatory_hint_user(user_alpha2); +} + + +void regulatory_hint_disconnect(void) +{ + REG_DBG_PRINT("cfg80211: All devices are disconnected, going to " + "restore regulatory settings\n"); + restore_regulatory_settings(false); +} + static bool freq_is_chan_12_13_14(u16 freq) { if (freq == ieee80211_channel_to_frequency(12) || @@ -2496,6 +2644,9 @@ int regulatory_init(void) cfg80211_regdomain = cfg80211_world_regdom; + user_alpha2[0] = '9'; + user_alpha2[1] = '7'; + /* We always try to get an update for the static regdomain */ err = regulatory_hint_core(cfg80211_regdomain->alpha2); if (err) { diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 3018508226ab..b26224a9f3bc 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -63,4 +63,22 @@ void regulatory_hint_11d(struct wiphy *wiphy, u8 *country_ie, u8 country_ie_len); +/** + * regulatory_hint_disconnect - informs all devices have been disconneted + * + * Regulotory rules can be enhanced further upon scanning and upon + * connection to an AP. These rules become stale if we disconnect + * and go to another country, whether or not we suspend and resume. + * If we suspend, go to another country and resume we'll automatically + * get disconnected shortly after resuming and things will be reset as well. + * This routine is a helper to restore regulatory settings to how they were + * prior to our first connect attempt. This includes ignoring country IE and + * beacon regulatory hints. The ieee80211_regdom module parameter will always + * be respected but if a user had set the regulatory domain that will take + * precedence. + * + * Must be called from process context. + */ +void regulatory_hint_disconnect(void); + #endif /* __NET_WIRELESS_REG_H */ diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 745c37e7992e..17fde0da1b08 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -34,6 +34,44 @@ struct cfg80211_conn { bool auto_auth, prev_bssid_valid; }; +bool cfg80211_is_all_idle(void) +{ + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; + bool is_all_idle = true; + + mutex_lock(&cfg80211_mutex); + + /* + * All devices must be idle as otherwise if you are actively + * scanning some new beacon hints could be learned and would + * count as new regulatory hints. + */ + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + cfg80211_lock_rdev(rdev); + list_for_each_entry(wdev, &rdev->netdev_list, list) { + wdev_lock(wdev); + if (wdev->sme_state != CFG80211_SME_IDLE) + is_all_idle = false; + wdev_unlock(wdev); + } + cfg80211_unlock_rdev(rdev); + } + + mutex_unlock(&cfg80211_mutex); + + return is_all_idle; +} + +static void disconnect_work(struct work_struct *work) +{ + if (!cfg80211_is_all_idle()) + return; + + regulatory_hint_disconnect(); +} + +static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); static int cfg80211_conn_scan(struct wireless_dev *wdev) { @@ -658,6 +696,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); wdev->wext.connect.ssid_len = 0; #endif + + schedule_work(&cfg80211_disconnect_work); } void cfg80211_disconnected(struct net_device *dev, u16 reason, -- cgit v1.2.3 From 17ad353b8d9843731258b5d23556667b764939e9 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 31 Jan 2010 21:56:25 +0100 Subject: mac80211: fix monitor mode tx radiotap header handling When an injected frame gets buffered for a powersave STA or filtered and retransmitted, mac80211 attempts to parse the radiotap header again, which doesn't work because it's gone at that point. This patch adds a new flag for checking the availability of a radiotap header, so that it only attempts to parse it once, reusing the tx info on the next call to ieee80211_tx(). This fixes severe issues with rekeying in AP mode. Signed-off-by: Felix Fietkau Cc: stable@kernel.org Signed-off-by: John W. Linville --- include/net/mac80211.h | 3 +++ net/mac80211/tx.c | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 1e9c93024cf2..74ccf30fdf8e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -275,6 +275,8 @@ struct ieee80211_bss_conf { * @IEEE80211_TX_INTFL_RETRANSMISSION: This frame is being retransmitted * after TX status because the destination was asleep, it must not * be modified again (no seqno assignment, crypto, etc.) + * @IEEE80211_TX_INTFL_HAS_RADIOTAP: This frame was injected and still + * has a radiotap header at skb->data. */ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_REQ_TX_STATUS = BIT(0), @@ -296,6 +298,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_PSPOLL_RESPONSE = BIT(17), IEEE80211_TX_CTL_MORE_FRAMES = BIT(18), IEEE80211_TX_INTFL_RETRANSMISSION = BIT(19), + IEEE80211_TX_INTFL_HAS_RADIOTAP = BIT(20), }; /** diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 14c70452c245..e7b1cdc7651b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1108,7 +1108,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, tx->flags |= IEEE80211_TX_FRAGMENTED; /* process and remove the injection radiotap header */ - if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) { + if (unlikely(info->flags & IEEE80211_TX_INTFL_HAS_RADIOTAP)) { if (!__ieee80211_parse_tx_radiotap(tx, skb)) return TX_DROP; @@ -1117,6 +1117,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, * the radiotap header that was present and pre-filled * 'tx' with tx control information. */ + info->flags &= ~IEEE80211_TX_INTFL_HAS_RADIOTAP; } /* @@ -1499,7 +1500,8 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, int hdrlen; u16 len_rthdr; - info->flags |= IEEE80211_TX_CTL_INJECTED; + info->flags |= IEEE80211_TX_CTL_INJECTED | + IEEE80211_TX_INTFL_HAS_RADIOTAP; len_rthdr = ieee80211_get_radiotap_len(skb->data); hdr = (struct ieee80211_hdr *)(skb->data + len_rthdr); -- cgit v1.2.3 From 794e68716bab578ae8f8912dc934496d7c7abc90 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 13:41:29 +0100 Subject: netfilter: ctnetlink: only assign helpers for matching protocols Make sure not to assign a helper for a different network or transport layer protocol to a connection. Additionally change expectation deletion by helper to compare the name directly - there might be multiple helper registrations using the same name, currently one of them is chosen in an unpredictable manner and only those expectations are removed. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_helper.h | 2 +- net/netfilter/nf_conntrack_helper.c | 8 +++++--- net/netfilter/nf_conntrack_netlink.c | 23 +++++++++++------------ 3 files changed, 17 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index d015de92e03f..86be7c4816d6 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -40,7 +40,7 @@ struct nf_conntrack_helper { }; extern struct nf_conntrack_helper * -__nf_conntrack_helper_find_byname(const char *name); +__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum); extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 65c2a7bc3afc..c0e461f466ae 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -65,7 +65,7 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) } struct nf_conntrack_helper * -__nf_conntrack_helper_find_byname(const char *name) +__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; struct hlist_node *n; @@ -73,13 +73,15 @@ __nf_conntrack_helper_find_byname(const char *name) for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry_rcu(h, n, &nf_ct_helper_hash[i], hnode) { - if (!strcmp(h->name, name)) + if (!strcmp(h->name, name) && + h->tuple.src.l3num == l3num && + h->tuple.dst.protonum == protonum) return h; } } return NULL; } -EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find_byname); +EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find); struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 79478dfba27e..16f86d61e5d1 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1001,7 +1001,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) return 0; } - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper == NULL) { #ifdef CONFIG_MODULES spin_unlock_bh(&nf_conntrack_lock); @@ -1012,7 +1013,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) } spin_lock_bh(&nf_conntrack_lock); - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper) return -EAGAIN; #endif @@ -1211,7 +1213,8 @@ ctnetlink_create_conntrack(struct net *net, if (err < 0) goto err2; - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper == NULL) { rcu_read_unlock(); #ifdef CONFIG_MODULES @@ -1221,7 +1224,9 @@ ctnetlink_create_conntrack(struct net *net, } rcu_read_lock(); - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, + nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper) { err = -EAGAIN; goto err2; @@ -1714,7 +1719,6 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct net *net = sock_net(ctnl); struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; - struct nf_conntrack_helper *h; struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct hlist_node *n, *next; u_int8_t u3 = nfmsg->nfgen_family; @@ -1751,18 +1755,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* delete all expectations for this helper */ spin_lock_bh(&nf_conntrack_lock); - h = __nf_conntrack_helper_find_byname(name); - if (!h) { - spin_unlock_bh(&nf_conntrack_lock); - return -EOPNOTSUPP; - } for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, &net->ct.expect_hash[i], hnode) { m_help = nfct_help(exp->master); - if (m_help->helper == h - && del_timer(&exp->timeout)) { + if (!strcmp(m_help->helper->name, name) && + del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); } -- cgit v1.2.3 From 858b31330054a9ad259feceea0ad1ce5385c47f0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 13:48:53 +0100 Subject: netfilter: nf_conntrack: split up IPCT_STATUS event Split up the IPCT_STATUS event into an IPCT_REPLY event, which is generated when the IPS_SEEN_REPLY bit is set, and an IPCT_ASSURED event, which is generated when the IPS_ASSURED bit is set. In combination with a following patch to support selective event delivery, this can be used for "sparse" conntrack replication: start replicating the conntrack entry after it reached the ASSURED state and that way it's SYN-flood resistant. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_ecache.h | 21 +++++++++++---------- net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 6 ++++-- net/netfilter/nf_conntrack_proto_gre.c | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 2 +- net/netfilter/nf_conntrack_proto_udp.c | 2 +- net/netfilter/nf_conntrack_proto_udplite.c | 2 +- 8 files changed, 21 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 475facc3051a..5e05fb883ab1 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -14,19 +14,20 @@ /* Connection tracking event types */ enum ip_conntrack_events { - IPCT_NEW = 0, /* new conntrack */ - IPCT_RELATED = 1, /* related conntrack */ - IPCT_DESTROY = 2, /* destroyed conntrack */ - IPCT_STATUS = 3, /* status has changed */ - IPCT_PROTOINFO = 4, /* protocol information has changed */ - IPCT_HELPER = 5, /* new helper has been set */ - IPCT_MARK = 6, /* new mark has been set */ - IPCT_NATSEQADJ = 7, /* NAT is doing sequence adjustment */ - IPCT_SECMARK = 8, /* new security mark has been set */ + IPCT_NEW, /* new conntrack */ + IPCT_RELATED, /* related conntrack */ + IPCT_DESTROY, /* destroyed conntrack */ + IPCT_REPLY, /* connection has seen two-way traffic */ + IPCT_ASSURED, /* connection status has changed to assured */ + IPCT_PROTOINFO, /* protocol information has changed */ + IPCT_HELPER, /* new helper has been set */ + IPCT_MARK, /* new mark has been set */ + IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ + IPCT_SECMARK, /* new security mark has been set */ }; enum ip_conntrack_expect_events { - IPEXP_NEW = 0, /* new expectation */ + IPEXP_NEW, /* new expectation */ }; struct nf_conntrack_ecache { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0e98c3282d42..091ff770eb7b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -825,7 +825,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, } if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_REPLY, ct); return ret; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 16f86d61e5d1..ff594eb138c1 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1371,7 +1371,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, else events = IPCT_NEW; - nf_conntrack_eventmask_report((1 << IPCT_STATUS) | + nf_conntrack_eventmask_report((1 << IPCT_REPLY) | + (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | @@ -1396,7 +1397,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err == 0) { nf_conntrack_get(&ct->ct_general); spin_unlock_bh(&nf_conntrack_lock); - nf_conntrack_eventmask_report((1 << IPCT_STATUS) | + nf_conntrack_eventmask_report((1 << IPCT_REPLY) | + (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index c99cfba64ddc..d899b1a69940 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -241,7 +241,7 @@ static int gre_packet(struct nf_conn *ct, ct->proto.gre.stream_timeout); /* Also, more likely to be important, and not a probe. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, ct->proto.gre.timeout); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index f9d930f80276..b68ff15ed979 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -377,7 +377,7 @@ static int sctp_packet(struct nf_conn *ct, new_state == SCTP_CONNTRACK_ESTABLISHED) { pr_debug("Setting assured bit\n"); set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } return NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 3c96437b45ad..ad118053971a 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1045,7 +1045,7 @@ static int tcp_packet(struct nf_conn *ct, after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 5c5518bedb4b..8d38f9a4bed8 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -77,7 +77,7 @@ static int udp_packet(struct nf_conn *ct, nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout); diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 458655bb2106..0b1bc9ba6678 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -75,7 +75,7 @@ static int udplite_packet(struct nf_conn *ct, nf_ct_udplite_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout); -- cgit v1.2.3 From 0cebe4b4163b6373c9d24c1a192939777bc27e55 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 13:51:51 +0100 Subject: netfilter: ctnetlink: support selective event delivery Add two masks for conntrack end expectation events to struct nf_conntrack_ecache and use them to filter events. Their default value is "all events" when the event sysctl is on and "no events" when it is off. A following patch will add specific initializations. Expectation events depend on the ecache struct of their master conntrack. Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_common.h | 18 ++++++++ include/net/netfilter/nf_conntrack_ecache.h | 59 +++++++++++++-------------- net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 2 +- 4 files changed, 48 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index a374787ed9b0..ebfed90733f7 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -74,6 +74,24 @@ enum ip_conntrack_status { IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), }; +/* Connection tracking event types */ +enum ip_conntrack_events { + IPCT_NEW, /* new conntrack */ + IPCT_RELATED, /* related conntrack */ + IPCT_DESTROY, /* destroyed conntrack */ + IPCT_REPLY, /* connection has seen two-way traffic */ + IPCT_ASSURED, /* connection status has changed to assured */ + IPCT_PROTOINFO, /* protocol information has changed */ + IPCT_HELPER, /* new helper has been set */ + IPCT_MARK, /* new mark has been set */ + IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ + IPCT_SECMARK, /* new security mark has been set */ +}; + +enum ip_conntrack_expect_events { + IPEXP_NEW, /* new expectation */ +}; + #ifdef __KERNEL__ struct ip_conntrack_stat { unsigned int searched; diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 5e05fb883ab1..96ba5f7dcab6 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -12,28 +12,12 @@ #include #include -/* Connection tracking event types */ -enum ip_conntrack_events { - IPCT_NEW, /* new conntrack */ - IPCT_RELATED, /* related conntrack */ - IPCT_DESTROY, /* destroyed conntrack */ - IPCT_REPLY, /* connection has seen two-way traffic */ - IPCT_ASSURED, /* connection status has changed to assured */ - IPCT_PROTOINFO, /* protocol information has changed */ - IPCT_HELPER, /* new helper has been set */ - IPCT_MARK, /* new mark has been set */ - IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ - IPCT_SECMARK, /* new security mark has been set */ -}; - -enum ip_conntrack_expect_events { - IPEXP_NEW, /* new expectation */ -}; - struct nf_conntrack_ecache { - unsigned long cache; /* bitops want long */ - unsigned long missed; /* missed events */ - u32 pid; /* netlink pid of destroyer */ + unsigned long cache; /* bitops want long */ + unsigned long missed; /* missed events */ + u16 ctmask; /* bitmask of ct events to be delivered */ + u16 expmask; /* bitmask of expect events to be delivered */ + u32 pid; /* netlink pid of destroyer */ }; static inline struct nf_conntrack_ecache * @@ -43,14 +27,24 @@ nf_ct_ecache_find(const struct nf_conn *ct) } static inline struct nf_conntrack_ecache * -nf_ct_ecache_ext_add(struct nf_conn *ct, gfp_t gfp) +nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) { struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *e; - if (!net->ct.sysctl_events) + if (!ctmask && !expmask && net->ct.sysctl_events) { + ctmask = ~0; + expmask = ~0; + } + if (!ctmask && !expmask) return NULL; - return nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); + e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); + if (e) { + e->ctmask = ctmask; + e->expmask = expmask; + } + return e; }; #ifdef CONFIG_NF_CONNTRACK_EVENTS @@ -83,6 +77,9 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) if (e == NULL) return; + if (!(e->ctmask & (1 << event))) + return; + set_bit(event, &e->cache); } @@ -93,7 +90,6 @@ nf_conntrack_eventmask_report(unsigned int eventmask, int report) { int ret = 0; - struct net *net = nf_ct_net(ct); struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; @@ -102,9 +98,6 @@ nf_conntrack_eventmask_report(unsigned int eventmask, if (notify == NULL) goto out_unlock; - if (!net->ct.sysctl_events) - goto out_unlock; - e = nf_ct_ecache_find(ct); if (e == NULL) goto out_unlock; @@ -118,6 +111,9 @@ nf_conntrack_eventmask_report(unsigned int eventmask, /* This is a resent of a destroy event? If so, skip missed */ unsigned long missed = e->pid ? 0 : e->missed; + if (!((eventmask | missed) & e->ctmask)) + goto out_unlock; + ret = notify->fcn(eventmask | missed, &item); if (unlikely(ret < 0 || missed)) { spin_lock_bh(&ct->lock); @@ -173,18 +169,19 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event, u32 pid, int report) { - struct net *net = nf_ct_exp_net(exp); struct nf_exp_event_notifier *notify; + struct nf_conntrack_ecache *e; rcu_read_lock(); notify = rcu_dereference(nf_expect_event_cb); if (notify == NULL) goto out_unlock; - if (!net->ct.sysctl_events) + e = nf_ct_ecache_find(exp->master); + if (e == NULL) goto out_unlock; - { + if (e->expmask & (1 << event)) { struct nf_exp_event item = { .exp = exp, .pid = pid, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 091ff770eb7b..53b8da6ad6b7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -648,7 +648,7 @@ init_conntrack(struct net *net, } nf_ct_acct_ext_add(ct, GFP_ATOMIC); - nf_ct_ecache_ext_add(ct, GFP_ATOMIC); + nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); spin_lock_bh(&nf_conntrack_lock); exp = nf_ct_find_expectation(net, tuple); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ff594eb138c1..f5c0b09e12f1 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1281,7 +1281,7 @@ ctnetlink_create_conntrack(struct net *net, } nf_ct_acct_ext_add(ct, GFP_ATOMIC); - nf_ct_ecache_ext_add(ct, GFP_ATOMIC); + nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) -- cgit v1.2.3 From b2a15a604d379af323645e330638e2cfcc696aff Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 14:13:03 +0100 Subject: netfilter: nf_conntrack: support conntrack templates Support initializing selected parameters of new conntrack entries from a "conntrack template", which is a specially marked conntrack entry attached to the skb. Currently the helper and the event delivery masks can be initialized this way. Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_common.h | 4 +++ include/net/netfilter/nf_conntrack.h | 5 +++ include/net/netfilter/nf_conntrack_helper.h | 3 +- net/ipv4/netfilter/nf_defrag_ipv4.c | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 50 +++++++++++++++++--------- net/netfilter/nf_conntrack_helper.c | 17 ++++++--- net/netfilter/nf_conntrack_netlink.c | 2 +- 8 files changed, 61 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index ebfed90733f7..c608677dda60 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -72,6 +72,10 @@ enum ip_conntrack_status { /* Connection has fixed timeout. */ IPS_FIXED_TIMEOUT_BIT = 10, IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), + + /* Conntrack is a template */ + IPS_TEMPLATE_BIT = 11, + IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT), }; /* Connection tracking event types */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index a0904adfb8f7..5043d61c99a7 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -272,6 +272,11 @@ nf_conntrack_alloc(struct net *net, const struct nf_conntrack_tuple *repl, gfp_t gfp); +static inline int nf_ct_is_template(const struct nf_conn *ct) +{ + return test_bit(IPS_TEMPLATE_BIT, &ct->status); +} + /* It's confirmed if it is, or has been in the hash table. */ static inline int nf_ct_is_confirmed(struct nf_conn *ct) { diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 86be7c4816d6..e17aaa3e19fd 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -47,7 +47,8 @@ extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); -extern int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags); +extern int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, + gfp_t flags); extern void nf_ct_helper_destroy(struct nf_conn *ct); diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 331ead3ebd1b..77627fa80561 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -59,7 +59,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) /* Previously seen (loopback)? Ignore. Do this before fragment check. */ - if (skb->nfct) + if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) return NF_ACCEPT; #endif #endif diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 0956ebabbff2..55ce22e5de49 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -212,7 +212,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, struct sk_buff *reasm; /* Previously seen (loopback)? */ - if (skb->nfct) + if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) return NF_ACCEPT; reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 53b8da6ad6b7..471e2a79d26f 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -618,7 +618,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * -init_conntrack(struct net *net, +init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, @@ -628,6 +628,7 @@ init_conntrack(struct net *net, struct nf_conn *ct; struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; + struct nf_conntrack_ecache *ecache; struct nf_conntrack_expect *exp; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { @@ -648,7 +649,11 @@ init_conntrack(struct net *net, } nf_ct_acct_ext_add(ct, GFP_ATOMIC); - nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); + + ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; + nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, + ecache ? ecache->expmask : 0, + GFP_ATOMIC); spin_lock_bh(&nf_conntrack_lock); exp = nf_ct_find_expectation(net, tuple); @@ -673,7 +678,7 @@ init_conntrack(struct net *net, nf_conntrack_get(&ct->master->ct_general); NF_CT_STAT_INC(net, expect_new); } else { - __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); NF_CT_STAT_INC(net, new); } @@ -694,7 +699,7 @@ init_conntrack(struct net *net, /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ static inline struct nf_conn * -resolve_normal_ct(struct net *net, +resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int16_t l3num, @@ -718,7 +723,8 @@ resolve_normal_ct(struct net *net, /* look for tuple match */ h = nf_conntrack_find_get(net, &tuple); if (!h) { - h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff); + h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, + skb, dataoff); if (!h) return NULL; if (IS_ERR(h)) @@ -755,7 +761,7 @@ unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, struct sk_buff *skb) { - struct nf_conn *ct; + struct nf_conn *ct, *tmpl = NULL; enum ip_conntrack_info ctinfo; struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; @@ -764,10 +770,14 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, int set_reply = 0; int ret; - /* Previously seen (loopback or untracked)? Ignore. */ if (skb->nfct) { - NF_CT_STAT_INC_ATOMIC(net, ignore); - return NF_ACCEPT; + /* Previously seen (loopback or untracked)? Ignore. */ + tmpl = (struct nf_conn *)skb->nfct; + if (!nf_ct_is_template(tmpl)) { + NF_CT_STAT_INC_ATOMIC(net, ignore); + return NF_ACCEPT; + } + skb->nfct = NULL; } /* rcu_read_lock()ed by nf_hook_slow */ @@ -778,7 +788,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, pr_debug("not prepared to track yet or error occured\n"); NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); - return -ret; + ret = -ret; + goto out; } l4proto = __nf_ct_l4proto_find(pf, protonum); @@ -791,22 +802,25 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, if (ret <= 0) { NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); - return -ret; + ret = -ret; + goto out; } } - ct = resolve_normal_ct(net, skb, dataoff, pf, protonum, + ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ NF_CT_STAT_INC_ATOMIC(net, invalid); - return NF_ACCEPT; + ret = NF_ACCEPT; + goto out; } if (IS_ERR(ct)) { /* Too stressed to deal. */ NF_CT_STAT_INC_ATOMIC(net, drop); - return NF_DROP; + ret = NF_DROP; + goto out; } NF_CT_ASSERT(skb->nfct); @@ -821,11 +835,15 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_STAT_INC_ATOMIC(net, invalid); if (ret == -NF_DROP) NF_CT_STAT_INC_ATOMIC(net, drop); - return -ret; + ret = -ret; + goto out; } if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_REPLY, ct); +out: + if (tmpl) + nf_ct_put(tmpl); return ret; } @@ -864,7 +882,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, return; rcu_read_lock(); - __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index c0e461f466ae..8144b0da5515 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -96,13 +96,22 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) } EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); -int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags) +int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, + gfp_t flags) { + struct nf_conntrack_helper *helper = NULL; + struct nf_conn_help *help; int ret = 0; - struct nf_conntrack_helper *helper; - struct nf_conn_help *help = nfct_help(ct); - helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + if (tmpl != NULL) { + help = nfct_help(tmpl); + if (help != NULL) + helper = help->helper; + } + + help = nfct_help(ct); + if (helper == NULL) + helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); if (helper == NULL) { if (help) rcu_assign_pointer(help->helper, NULL); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index f5c0b09e12f1..09044f9f4b2e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1249,7 +1249,7 @@ ctnetlink_create_conntrack(struct net *net, } } else { /* try an implicit helper assignation */ - err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + err = __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); if (err < 0) goto err2; } -- cgit v1.2.3 From 84f3bb9ae9db90f7fb15d98b55279a58ab1b2363 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 17:17:06 +0100 Subject: netfilter: xtables: add CT target Add a new target for the raw table, which can be used to specify conntrack parameters for specific connections, f.i. the conntrack helper. The target attaches a "template" connection tracking entry to the skb, which is used by the conntrack core when initializing a new conntrack. Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_CT.h | 17 +++ include/net/netfilter/nf_conntrack_helper.h | 3 + net/netfilter/Kconfig | 12 +++ net/netfilter/Makefile | 1 + net/netfilter/nf_conntrack_helper.c | 19 ++++ net/netfilter/xt_CT.c | 158 ++++++++++++++++++++++++++++ 7 files changed, 211 insertions(+) create mode 100644 include/linux/netfilter/xt_CT.h create mode 100644 net/netfilter/xt_CT.c (limited to 'include/net') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 2aea50399c0b..a5a63e41b8af 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -6,6 +6,7 @@ header-y += nfnetlink_queue.h header-y += xt_CLASSIFY.h header-y += xt_CONNMARK.h header-y += xt_CONNSECMARK.h +header-y += xt_CT.h header-y += xt_DSCP.h header-y += xt_LED.h header-y += xt_MARK.h diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h new file mode 100644 index 000000000000..7fd0effe1316 --- /dev/null +++ b/include/linux/netfilter/xt_CT.h @@ -0,0 +1,17 @@ +#ifndef _XT_CT_H +#define _XT_CT_H + +#define XT_CT_NOTRACK 0x1 + +struct xt_ct_target_info { + u_int16_t flags; + u_int16_t __unused; + u_int32_t ct_events; + u_int32_t exp_events; + char helper[16]; + + /* Used internally by the kernel */ + struct nf_conn *ct __attribute__((aligned(8))); +}; + +#endif /* _XT_CT_H */ diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index e17aaa3e19fd..32c305dbdab6 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -42,6 +42,9 @@ struct nf_conntrack_helper { extern struct nf_conntrack_helper * __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum); +extern struct nf_conntrack_helper * +nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); + extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 634d14affc8d..4469d45261f4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -341,6 +341,18 @@ config NETFILTER_XT_TARGET_CONNSECMARK To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_CT + tristate '"CT" target support' + depends on NF_CONNTRACK + depends on IP_NF_RAW || IP6_NF_RAW + depends on NETFILTER_ADVANCED + help + This options adds a `CT' target, which allows to specify initial + connection tracking parameters like events to be delivered and + the helper to be used. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_DSCP tristate '"DSCP" and "TOS" target support' depends on IP_NF_MANGLE || IP6_NF_MANGLE diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 49f62ee4e9ff..f873644f02f6 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 8144b0da5515..a74a5769877b 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -83,6 +83,25 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) } EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find); +struct nf_conntrack_helper * +nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) +{ + struct nf_conntrack_helper *h; + + h = __nf_conntrack_helper_find(name, l3num, protonum); +#ifdef CONFIG_MODULES + if (h == NULL) { + if (request_module("nfct-helper-%s", name) == 0) + h = __nf_conntrack_helper_find(name, l3num, protonum); + } +#endif + if (h != NULL && !try_module_get(h->me)) + h = NULL; + + return h; +} +EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); + struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) { struct nf_conn_help *help; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c new file mode 100644 index 000000000000..8183a054256f --- /dev/null +++ b/net/netfilter/xt_CT.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2010 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int xt_ct_target(struct sk_buff *skb, + const struct xt_target_param *par) +{ + const struct xt_ct_target_info *info = par->targinfo; + struct nf_conn *ct = info->ct; + + /* Previously seen (loopback)? Ignore. */ + if (skb->nfct != NULL) + return XT_CONTINUE; + + atomic_inc(&ct->ct_general.use); + skb->nfct = &ct->ct_general; + skb->nfctinfo = IP_CT_NEW; + + return XT_CONTINUE; +} + +static u8 xt_ct_find_proto(const struct xt_tgchk_param *par) +{ + if (par->family == AF_INET) { + const struct ipt_entry *e = par->entryinfo; + + if (e->ip.invflags & IPT_INV_PROTO) + return 0; + return e->ip.proto; + } else if (par->family == AF_INET6) { + const struct ip6t_entry *e = par->entryinfo; + + if (e->ipv6.invflags & IP6T_INV_PROTO) + return 0; + return e->ipv6.proto; + } else + return 0; +} + +static bool xt_ct_tg_check(const struct xt_tgchk_param *par) +{ + struct xt_ct_target_info *info = par->targinfo; + struct nf_conntrack_tuple t; + struct nf_conn_help *help; + struct nf_conn *ct; + u8 proto; + + if (info->flags & ~XT_CT_NOTRACK) + return false; + + if (info->flags & XT_CT_NOTRACK) { + ct = &nf_conntrack_untracked; + atomic_inc(&ct->ct_general.use); + goto out; + } + + if (nf_ct_l3proto_try_module_get(par->family) < 0) + goto err1; + + memset(&t, 0, sizeof(t)); + ct = nf_conntrack_alloc(par->net, &t, &t, GFP_KERNEL); + if (IS_ERR(ct)) + goto err2; + + if ((info->ct_events || info->exp_events) && + !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events, + GFP_KERNEL)) + goto err3; + + if (info->helper[0]) { + proto = xt_ct_find_proto(par); + if (!proto) + goto err3; + + help = nf_ct_helper_ext_add(ct, GFP_KERNEL); + if (help == NULL) + goto err3; + + help->helper = nf_conntrack_helper_try_module_get(info->helper, + par->family, + proto); + if (help->helper == NULL) + goto err3; + } + + __set_bit(IPS_TEMPLATE_BIT, &ct->status); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); +out: + info->ct = ct; + return true; + +err3: + nf_conntrack_free(ct); +err2: + nf_ct_l3proto_module_put(par->family); +err1: + return false; +} + +static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par) +{ + struct xt_ct_target_info *info = par->targinfo; + struct nf_conn *ct = info->ct; + struct nf_conn_help *help; + + if (ct != &nf_conntrack_untracked) { + help = nfct_help(ct); + if (help) + module_put(help->helper->me); + + nf_ct_l3proto_module_put(par->family); + } + nf_ct_put(info->ct); +} + +static struct xt_target xt_ct_tg __read_mostly = { + .name = "CT", + .family = NFPROTO_UNSPEC, + .targetsize = XT_ALIGN(sizeof(struct xt_ct_target_info)), + .checkentry = xt_ct_tg_check, + .destroy = xt_ct_tg_destroy, + .target = xt_ct_target, + .table = "raw", + .me = THIS_MODULE, +}; + +static int __init xt_ct_tg_init(void) +{ + return xt_register_target(&xt_ct_tg); +} + +static void __exit xt_ct_tg_exit(void) +{ + xt_unregister_target(&xt_ct_tg); +} + +module_init(xt_ct_tg_init); +module_exit(xt_ct_tg_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: connection tracking target"); +MODULE_ALIAS("ipt_CT"); +MODULE_ALIAS("ip6t_CT"); -- cgit v1.2.3 From 5b3501faa8741d50617ce4191c20061c6ef36cb3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Feb 2010 11:16:56 -0800 Subject: netfilter: nf_conntrack: per netns nf_conntrack_cachep nf_conntrack_cachep is currently shared by all netns instances, but because of SLAB_DESTROY_BY_RCU special semantics, this is wrong. If we use a shared slab cache, one object can instantly flight between one hash table (netns ONE) to another one (netns TWO), and concurrent reader (doing a lookup in netns ONE, 'finding' an object of netns TWO) can be fooled without notice, because no RCU grace period has to be observed between object freeing and its reuse. We dont have this problem with UDP/TCP slab caches because TCP/UDP hashtables are global to the machine (and each object has a pointer to its netns). If we use per netns conntrack hash tables, we also *must* use per netns conntrack slab caches, to guarantee an object can not escape from one namespace to another one. Signed-off-by: Eric Dumazet [Patrick: added unique slab name allocation] Cc: stable@kernel.org Signed-off-by: Patrick McHardy --- include/net/netns/conntrack.h | 2 ++ net/netfilter/nf_conntrack_core.c | 39 +++++++++++++++++++++++---------------- 2 files changed, 25 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index ba1ba0c5efd1..aed23b6c8478 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -11,6 +11,7 @@ struct nf_conntrack_ecache; struct netns_ct { atomic_t count; unsigned int expect_count; + struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; struct hlist_nulls_head unconfirmed; @@ -28,5 +29,6 @@ struct netns_ct { #endif int hash_vmalloc; int expect_vmalloc; + char *slabname; }; #endif diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 37e2b88313f2..9de4bd4c0dd7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -63,8 +63,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max); struct nf_conn nf_conntrack_untracked __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_untracked); -static struct kmem_cache *nf_conntrack_cachep __read_mostly; - static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; @@ -572,7 +570,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, * Do not use kmem_cache_zalloc(), as this cache uses * SLAB_DESTROY_BY_RCU. */ - ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); + ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp); if (ct == NULL) { pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); atomic_dec(&net->ct.count); @@ -611,7 +609,7 @@ void nf_conntrack_free(struct nf_conn *ct) nf_ct_ext_destroy(ct); atomic_dec(&net->ct.count); nf_ct_ext_free(ct); - kmem_cache_free(nf_conntrack_cachep, ct); + kmem_cache_free(net->ct.nf_conntrack_cachep, ct); } EXPORT_SYMBOL_GPL(nf_conntrack_free); @@ -1119,7 +1117,6 @@ static void nf_conntrack_cleanup_init_net(void) nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); - kmem_cache_destroy(nf_conntrack_cachep); } static void nf_conntrack_cleanup_net(struct net *net) @@ -1137,6 +1134,8 @@ static void nf_conntrack_cleanup_net(struct net *net) nf_conntrack_ecache_fini(net); nf_conntrack_acct_fini(net); nf_conntrack_expect_fini(net); + kmem_cache_destroy(net->ct.nf_conntrack_cachep); + kfree(net->ct.slabname); free_percpu(net->ct.stat); } @@ -1272,15 +1271,6 @@ static int nf_conntrack_init_init_net(void) NF_CONNTRACK_VERSION, nf_conntrack_htable_size, nf_conntrack_max); - nf_conntrack_cachep = kmem_cache_create("nf_conntrack", - sizeof(struct nf_conn), - 0, SLAB_DESTROY_BY_RCU, NULL); - if (!nf_conntrack_cachep) { - printk(KERN_ERR "Unable to create nf_conn slab cache\n"); - ret = -ENOMEM; - goto err_cache; - } - ret = nf_conntrack_proto_init(); if (ret < 0) goto err_proto; @@ -1302,8 +1292,6 @@ static int nf_conntrack_init_init_net(void) err_helper: nf_conntrack_proto_fini(); err_proto: - kmem_cache_destroy(nf_conntrack_cachep); -err_cache: return ret; } @@ -1325,6 +1313,21 @@ static int nf_conntrack_init_net(struct net *net) ret = -ENOMEM; goto err_stat; } + + net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); + if (!net->ct.slabname) { + ret = -ENOMEM; + goto err_slabname; + } + + net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname, + sizeof(struct nf_conn), 0, + SLAB_DESTROY_BY_RCU, NULL); + if (!net->ct.nf_conntrack_cachep) { + printk(KERN_ERR "Unable to create nf_conn slab cache\n"); + ret = -ENOMEM; + goto err_cache; + } net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, &net->ct.hash_vmalloc, 1); if (!net->ct.hash) { @@ -1352,6 +1355,10 @@ err_expect: nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); err_hash: + kmem_cache_destroy(net->ct.nf_conntrack_cachep); +err_cache: + kfree(net->ct.slabname); +err_slabname: free_percpu(net->ct.stat); err_stat: return ret; -- cgit v1.2.3 From d696c7bdaa55e2208e56c6f98e6bc1599f34286d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 8 Feb 2010 11:18:07 -0800 Subject: netfilter: nf_conntrack: fix hash resizing with namespaces As noticed by Jon Masters , the conntrack hash size is global and not per namespace, but modifiable at runtime through /sys/module/nf_conntrack/hashsize. Changing the hash size will only resize the hash in the current namespace however, so other namespaces will use an invalid hash size. This can cause crashes when enlarging the hashsize, or false negative lookups when shrinking it. Move the hash size into the per-namespace data and only use the global hash size to initialize the per-namespace value when instanciating a new namespace. Additionally restrict hash resizing to init_net for now as other namespaces are not handled currently. Cc: stable@kernel.org Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netns/conntrack.h | 1 + include/net/netns/ipv4.h | 1 + net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 4 +- net/ipv4/netfilter/nf_nat_core.c | 22 ++++----- net/netfilter/nf_conntrack_core.c | 53 ++++++++++++---------- net/netfilter/nf_conntrack_expect.c | 2 +- net/netfilter/nf_conntrack_helper.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 2 +- net/netfilter/nf_conntrack_standalone.c | 7 +-- 10 files changed, 49 insertions(+), 47 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index aed23b6c8478..63d449807d9b 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -11,6 +11,7 @@ struct nf_conntrack_ecache; struct netns_ct { atomic_t count; unsigned int expect_count; + unsigned int htable_size; struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2eb3814d6258..9a4b8b714079 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -40,6 +40,7 @@ struct netns_ipv4 { struct xt_table *iptable_security; struct xt_table *nat_table; struct hlist_head *nat_bysource; + unsigned int nat_htable_size; int nat_vmalloced; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index d171b123a656..d1ea38a7c490 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -210,7 +210,7 @@ static ctl_table ip_ct_sysctl_table[] = { }, { .procname = "ip_conntrack_buckets", - .data = &nf_conntrack_htable_size, + .data = &init_net.ct.htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, .proc_handler = proc_dointvec, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 8668a3defda6..2fb7b76da94f 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -32,7 +32,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < nf_conntrack_htable_size; + st->bucket < net->ct.htable_size; st->bucket++) { n = rcu_dereference(net->ct.hash[st->bucket].first); if (!is_a_nulls(n)) @@ -50,7 +50,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, head = rcu_dereference(head->next); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= nf_conntrack_htable_size) + if (++st->bucket >= net->ct.htable_size) return NULL; } head = rcu_dereference(net->ct.hash[st->bucket].first); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index fe1a64479dd0..26066a2327ad 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -35,9 +35,6 @@ static DEFINE_SPINLOCK(nf_nat_lock); static struct nf_conntrack_l3proto *l3proto __read_mostly; -/* Calculated at init based on memory size */ -static unsigned int nf_nat_htable_size __read_mostly; - #define MAX_IP_NAT_PROTO 256 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] __read_mostly; @@ -72,7 +69,7 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int -hash_by_src(const struct nf_conntrack_tuple *tuple) +hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple) { unsigned int hash; @@ -80,7 +77,7 @@ hash_by_src(const struct nf_conntrack_tuple *tuple) hash = jhash_3words((__force u32)tuple->src.u3.ip, (__force u32)tuple->src.u.all, tuple->dst.protonum, 0); - return ((u64)hash * nf_nat_htable_size) >> 32; + return ((u64)hash * net->ipv4.nat_htable_size) >> 32; } /* Is this tuple already taken? (not by us) */ @@ -147,7 +144,7 @@ find_appropriate_src(struct net *net, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { - unsigned int h = hash_by_src(tuple); + unsigned int h = hash_by_src(net, tuple); const struct nf_conn_nat *nat; const struct nf_conn *ct; const struct hlist_node *n; @@ -330,7 +327,7 @@ nf_nat_setup_info(struct nf_conn *ct, if (have_to_hash) { unsigned int srchash; - srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); spin_lock_bh(&nf_nat_lock); /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); @@ -679,8 +676,10 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, static int __net_init nf_nat_net_init(struct net *net) { - net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, - &net->ipv4.nat_vmalloced, 0); + /* Leave them the same for the moment. */ + net->ipv4.nat_htable_size = net->ct.htable_size; + net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, + &net->ipv4.nat_vmalloced, 0); if (!net->ipv4.nat_bysource) return -ENOMEM; return 0; @@ -703,7 +702,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) nf_ct_iterate_cleanup(net, &clean_nat, NULL); synchronize_rcu(); nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, - nf_nat_htable_size); + net->ipv4.nat_htable_size); } static struct pernet_operations nf_nat_net_ops = { @@ -724,9 +723,6 @@ static int __init nf_nat_init(void) return ret; } - /* Leave them the same for the moment. */ - nf_nat_htable_size = nf_conntrack_htable_size; - ret = register_pernet_subsys(&nf_nat_net_ops); if (ret < 0) goto cleanup_extend; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9de4bd4c0dd7..4d79e3c1616c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -84,9 +85,10 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, return ((u64)h * size) >> 32; } -static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) +static inline u_int32_t hash_conntrack(const struct net *net, + const struct nf_conntrack_tuple *tuple) { - return __hash_conntrack(tuple, nf_conntrack_htable_size, + return __hash_conntrack(tuple, net->ct.htable_size, nf_conntrack_hash_rnd); } @@ -294,7 +296,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(tuple); + unsigned int hash = hash_conntrack(net, tuple); /* Disable BHs the entire time since we normally need to disable them * at least once for the stats anyway. @@ -364,10 +366,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, void nf_conntrack_hash_insert(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); unsigned int hash, repl_hash; - hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); __nf_conntrack_hash_insert(ct, hash, repl_hash); } @@ -395,8 +398,8 @@ __nf_conntrack_confirm(struct sk_buff *skb) if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return NF_ACCEPT; - hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); /* We're not in hash table, and we refuse to set up related connections for unconfirmed conns. But packet copies and @@ -466,7 +469,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, struct net *net = nf_ct_net(ignored_conntrack); struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(tuple); + unsigned int hash = hash_conntrack(net, tuple); /* Disable BHs the entire time since we need to disable them at * least once for the stats anyway. @@ -501,7 +504,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) int dropped = 0; rcu_read_lock(); - for (i = 0; i < nf_conntrack_htable_size; i++) { + for (i = 0; i < net->ct.htable_size; i++) { hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); @@ -521,7 +524,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) if (cnt >= NF_CT_EVICTION_RANGE) break; - hash = (hash + 1) % nf_conntrack_htable_size; + hash = (hash + 1) % net->ct.htable_size; } rcu_read_unlock(); @@ -555,7 +558,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, if (nf_conntrack_max && unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { - unsigned int hash = hash_conntrack(orig); + unsigned int hash = hash_conntrack(net, orig); if (!early_drop(net, hash)) { atomic_dec(&net->ct.count); if (net_ratelimit()) @@ -1012,7 +1015,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), struct hlist_nulls_node *n; spin_lock_bh(&nf_conntrack_lock); - for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { + for (; *bucket < net->ct.htable_size; (*bucket)++) { hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) @@ -1130,7 +1133,7 @@ static void nf_conntrack_cleanup_net(struct net *net) } nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, - nf_conntrack_htable_size); + net->ct.htable_size); nf_conntrack_ecache_fini(net); nf_conntrack_acct_fini(net); nf_conntrack_expect_fini(net); @@ -1190,10 +1193,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) { int i, bucket, vmalloced, old_vmalloced; unsigned int hashsize, old_size; - int rnd; struct hlist_nulls_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; + if (current->nsproxy->net_ns != &init_net) + return -EOPNOTSUPP; + /* On boot, we can set this without any fancy locking. */ if (!nf_conntrack_htable_size) return param_set_uint(val, kp); @@ -1206,33 +1211,29 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) if (!hash) return -ENOMEM; - /* We have to rehahs for the new table anyway, so we also can - * use a newrandom seed */ - get_random_bytes(&rnd, sizeof(rnd)); - /* Lookups in the old hash might happen in parallel, which means we * might get false negatives during connection lookup. New connections * created because of a false negative won't make it into the hash * though since that required taking the lock. */ spin_lock_bh(&nf_conntrack_lock); - for (i = 0; i < nf_conntrack_htable_size; i++) { + for (i = 0; i < init_net.ct.htable_size; i++) { while (!hlist_nulls_empty(&init_net.ct.hash[i])) { h = hlist_nulls_entry(init_net.ct.hash[i].first, struct nf_conntrack_tuple_hash, hnnode); hlist_nulls_del_rcu(&h->hnnode); - bucket = __hash_conntrack(&h->tuple, hashsize, rnd); + bucket = __hash_conntrack(&h->tuple, hashsize, + nf_conntrack_hash_rnd); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } } - old_size = nf_conntrack_htable_size; + old_size = init_net.ct.htable_size; old_vmalloced = init_net.ct.hash_vmalloc; old_hash = init_net.ct.hash; - nf_conntrack_htable_size = hashsize; + init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; init_net.ct.hash_vmalloc = vmalloced; init_net.ct.hash = hash; - nf_conntrack_hash_rnd = rnd; spin_unlock_bh(&nf_conntrack_lock); nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); @@ -1328,7 +1329,9 @@ static int nf_conntrack_init_net(struct net *net) ret = -ENOMEM; goto err_cache; } - net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, + + net->ct.htable_size = nf_conntrack_htable_size; + net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, &net->ct.hash_vmalloc, 1); if (!net->ct.hash) { ret = -ENOMEM; @@ -1353,7 +1356,7 @@ err_acct: nf_conntrack_expect_fini(net); err_expect: nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, - nf_conntrack_htable_size); + net->ct.htable_size); err_hash: kmem_cache_destroy(net->ct.nf_conntrack_cachep); err_cache: diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 4ad7d1d809af..2f25ff610982 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -577,7 +577,7 @@ int nf_conntrack_expect_init(struct net *net) if (net_eq(net, &init_net)) { if (!nf_ct_expect_hsize) { - nf_ct_expect_hsize = nf_conntrack_htable_size / 256; + nf_ct_expect_hsize = net->ct.htable_size / 256; if (!nf_ct_expect_hsize) nf_ct_expect_hsize = 1; } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 65c2a7bc3afc..4b1a56bd074c 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -192,7 +192,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, /* Get rid of expecteds, set helpers to NULL. */ hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode) unhelp(h, me); - for (i = 0; i < nf_conntrack_htable_size; i++) { + for (i = 0; i < net->ct.htable_size; i++) { hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) unhelp(h, me); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 42f21c01a93e..0ffe689dfe97 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -594,7 +594,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); last = (struct nf_conn *)cb->args[1]; - for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { + for (; cb->args[0] < init_net.ct.htable_size; cb->args[0]++) { restart: hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]], hnnode) { diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 028aba667ef7..e310f1561bb2 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -51,7 +51,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < nf_conntrack_htable_size; + st->bucket < net->ct.htable_size; st->bucket++) { n = rcu_dereference(net->ct.hash[st->bucket].first); if (!is_a_nulls(n)) @@ -69,7 +69,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, head = rcu_dereference(head->next); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= nf_conntrack_htable_size) + if (++st->bucket >= net->ct.htable_size) return NULL; } head = rcu_dereference(net->ct.hash[st->bucket].first); @@ -355,7 +355,7 @@ static ctl_table nf_ct_sysctl_table[] = { }, { .procname = "nf_conntrack_buckets", - .data = &nf_conntrack_htable_size, + .data = &init_net.ct.htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, .proc_handler = proc_dointvec, @@ -421,6 +421,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) goto out_kmemdup; table[1].data = &net->ct.count; + table[2].data = &net->ct.htable_size; table[3].data = &net->ct.sysctl_checksum; table[4].data = &net->ct.sysctl_log_invalid; -- cgit v1.2.3 From 33e5a2f776e331dc8a4379b6efb660d38f182d96 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 3 Feb 2010 10:24:30 +0100 Subject: wireless: update radiotap parser Upstream radiotap has adopted the namespace proposal David Young made and I then took care of, for which I had adapted the radiotap parser as a library outside the kernel. This brings the in-kernel parser up to speed. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 83 ++++++++--- include/net/ieee80211_radiotap.h | 4 + net/mac80211/tx.c | 9 +- net/wireless/radiotap.c | 305 +++++++++++++++++++++++++-------------- 4 files changed, 271 insertions(+), 130 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a3f0a7ed31ac..5b3569b2a74c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1561,37 +1561,82 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, * Documentation in Documentation/networking/radiotap-headers.txt */ +struct radiotap_align_size { + uint8_t align:4, size:4; +}; + +struct ieee80211_radiotap_namespace { + const struct radiotap_align_size *align_size; + int n_bits; + uint32_t oui; + uint8_t subns; +}; + +struct ieee80211_radiotap_vendor_namespaces { + const struct ieee80211_radiotap_namespace *ns; + int n_ns; +}; + /** * struct ieee80211_radiotap_iterator - tracks walk thru present radiotap args - * @rtheader: pointer to the radiotap header we are walking through - * @max_length: length of radiotap header in cpu byte ordering - * @this_arg_index: IEEE80211_RADIOTAP_... index of current arg - * @this_arg: pointer to current radiotap arg - * @arg_index: internal next argument index - * @arg: internal next argument pointer - * @next_bitmap: internal pointer to next present u32 - * @bitmap_shifter: internal shifter for curr u32 bitmap, b0 set == arg present + * @this_arg_index: index of current arg, valid after each successful call + * to ieee80211_radiotap_iterator_next() + * @this_arg: pointer to current radiotap arg; it is valid after each + * call to ieee80211_radiotap_iterator_next() but also after + * ieee80211_radiotap_iterator_init() where it will point to + * the beginning of the actual data portion + * @this_arg_size: length of the current arg, for convenience + * @current_namespace: pointer to the current namespace definition + * (or internally %NULL if the current namespace is unknown) + * @is_radiotap_ns: indicates whether the current namespace is the default + * radiotap namespace or not + * + * @overrides: override standard radiotap fields + * @n_overrides: number of overrides + * + * @_rtheader: pointer to the radiotap header we are walking through + * @_max_length: length of radiotap header in cpu byte ordering + * @_arg_index: next argument index + * @_arg: next argument pointer + * @_next_bitmap: internal pointer to next present u32 + * @_bitmap_shifter: internal shifter for curr u32 bitmap, b0 set == arg present + * @_vns: vendor namespace definitions + * @_next_ns_data: beginning of the next namespace's data + * @_reset_on_ext: internal; reset the arg index to 0 when going to the + * next bitmap word + * + * Describes the radiotap parser state. Fields prefixed with an underscore + * must not be used by users of the parser, only by the parser internally. */ struct ieee80211_radiotap_iterator { - struct ieee80211_radiotap_header *rtheader; - int max_length; + struct ieee80211_radiotap_header *_rtheader; + const struct ieee80211_radiotap_vendor_namespaces *_vns; + const struct ieee80211_radiotap_namespace *current_namespace; + + unsigned char *_arg, *_next_ns_data; + uint32_t *_next_bitmap; + + unsigned char *this_arg; int this_arg_index; - u8 *this_arg; + int this_arg_size; - int arg_index; - u8 *arg; - __le32 *next_bitmap; - u32 bitmap_shifter; + int is_radiotap_ns; + + int _max_length; + int _arg_index; + uint32_t _bitmap_shifter; + int _reset_on_ext; }; extern int ieee80211_radiotap_iterator_init( - struct ieee80211_radiotap_iterator *iterator, - struct ieee80211_radiotap_header *radiotap_header, - int max_length); + struct ieee80211_radiotap_iterator *iterator, + struct ieee80211_radiotap_header *radiotap_header, + int max_length, const struct ieee80211_radiotap_vendor_namespaces *vns); extern int ieee80211_radiotap_iterator_next( - struct ieee80211_radiotap_iterator *iterator); + struct ieee80211_radiotap_iterator *iterator); + extern const unsigned char rfc1042_header[6]; extern const unsigned char bridge_tunnel_header[6]; diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 9d3d86aaccbb..af49f8ab7f81 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -198,6 +198,10 @@ enum ieee80211_radiotap_type { IEEE80211_RADIOTAP_TX_FLAGS = 15, IEEE80211_RADIOTAP_RTS_RETRIES = 16, IEEE80211_RADIOTAP_DATA_RETRIES = 17, + + /* valid in every it_present bitmap, even vendor namespaces */ + IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29, + IEEE80211_RADIOTAP_VENDOR_NAMESPACE = 30, IEEE80211_RADIOTAP_EXT = 31 }; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 85e382aa894e..e392820a4c33 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1010,7 +1010,8 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, (struct ieee80211_radiotap_header *) skb->data; struct ieee80211_supported_band *sband; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len); + int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, + NULL); sband = tx->local->hw.wiphy->bands[tx->channel->band]; @@ -1046,7 +1047,7 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, * because it will be recomputed and added * on transmission */ - if (skb->len < (iterator.max_length + FCS_LEN)) + if (skb->len < (iterator._max_length + FCS_LEN)) return false; skb_trim(skb, skb->len - FCS_LEN); @@ -1073,10 +1074,10 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, /* * remove the radiotap header - * iterator->max_length was sanity-checked against + * iterator->_max_length was sanity-checked against * skb->len by iterator init */ - skb_pull(skb, iterator.max_length); + skb_pull(skb, iterator._max_length); return true; } diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index f591871a7b4f..1332c445d1c7 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -2,6 +2,16 @@ * Radiotap parser * * Copyright 2007 Andy Green + * Copyright 2009 Johannes Berg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Alternatively, this software may be distributed under the terms of BSD + * license. + * + * See COPYING for more details. */ #include @@ -10,6 +20,35 @@ /* function prototypes and related defs are in include/net/cfg80211.h */ +static const struct radiotap_align_size rtap_namespace_sizes[] = { + [IEEE80211_RADIOTAP_TSFT] = { .align = 8, .size = 8, }, + [IEEE80211_RADIOTAP_FLAGS] = { .align = 1, .size = 1, }, + [IEEE80211_RADIOTAP_RATE] = { .align = 1, .size = 1, }, + [IEEE80211_RADIOTAP_CHANNEL] = { .align = 2, .size = 4, }, + [IEEE80211_RADIOTAP_FHSS] = { .align = 2, .size = 2, }, + [IEEE80211_RADIOTAP_DBM_ANTSIGNAL] = { .align = 1, .size = 1, }, + [IEEE80211_RADIOTAP_DBM_ANTNOISE] = { .align = 1, .size = 1, }, + [IEEE80211_RADIOTAP_LOCK_QUALITY] = { .align = 2, .size = 2, }, + [IEEE80211_RADIOTAP_TX_ATTENUATION] = { .align = 2, .size = 2, }, + [IEEE80211_RADIOTAP_DB_TX_ATTENUATION] = { .align = 2, .size = 2, }, + [IEEE80211_RADIOTAP_DBM_TX_POWER] = { .align = 1, .size = 1, }, + [IEEE80211_RADIOTAP_ANTENNA] = { .align = 1, .size = 1, }, + [IEEE80211_RADIOTAP_DB_ANTSIGNAL] = { .align = 1, .size = 1, }, + [IEEE80211_RADIOTAP_DB_ANTNOISE] = { .align = 1, .size = 1, }, + [IEEE80211_RADIOTAP_RX_FLAGS] = { .align = 2, .size = 2, }, + [IEEE80211_RADIOTAP_TX_FLAGS] = { .align = 2, .size = 2, }, + [IEEE80211_RADIOTAP_RTS_RETRIES] = { .align = 1, .size = 1, }, + [IEEE80211_RADIOTAP_DATA_RETRIES] = { .align = 1, .size = 1, }, + /* + * add more here as they are defined in radiotap.h + */ +}; + +static const struct ieee80211_radiotap_namespace radiotap_ns = { + .n_bits = sizeof(rtap_namespace_sizes) / sizeof(rtap_namespace_sizes[0]), + .align_size = rtap_namespace_sizes, +}; + /** * ieee80211_radiotap_iterator_init - radiotap parser iterator initialization * @iterator: radiotap_iterator to initialize @@ -50,9 +89,9 @@ */ int ieee80211_radiotap_iterator_init( - struct ieee80211_radiotap_iterator *iterator, - struct ieee80211_radiotap_header *radiotap_header, - int max_length) + struct ieee80211_radiotap_iterator *iterator, + struct ieee80211_radiotap_header *radiotap_header, + int max_length, const struct ieee80211_radiotap_vendor_namespaces *vns) { /* Linux only supports version 0 radiotap format */ if (radiotap_header->it_version) @@ -62,19 +101,24 @@ int ieee80211_radiotap_iterator_init( if (max_length < get_unaligned_le16(&radiotap_header->it_len)) return -EINVAL; - iterator->rtheader = radiotap_header; - iterator->max_length = get_unaligned_le16(&radiotap_header->it_len); - iterator->arg_index = 0; - iterator->bitmap_shifter = get_unaligned_le32(&radiotap_header->it_present); - iterator->arg = (u8 *)radiotap_header + sizeof(*radiotap_header); - iterator->this_arg = NULL; + iterator->_rtheader = radiotap_header; + iterator->_max_length = get_unaligned_le16(&radiotap_header->it_len); + iterator->_arg_index = 0; + iterator->_bitmap_shifter = get_unaligned_le32(&radiotap_header->it_present); + iterator->_arg = (uint8_t *)radiotap_header + sizeof(*radiotap_header); + iterator->_reset_on_ext = 0; + iterator->_next_bitmap = &radiotap_header->it_present; + iterator->_next_bitmap++; + iterator->_vns = vns; + iterator->current_namespace = &radiotap_ns; + iterator->is_radiotap_ns = 1; /* find payload start allowing for extended bitmap(s) */ - if (unlikely(iterator->bitmap_shifter & (1<arg) & - (1 << IEEE80211_RADIOTAP_EXT)) { - iterator->arg += sizeof(u32); + if (iterator->_bitmap_shifter & (1<_arg) & + (1 << IEEE80211_RADIOTAP_EXT)) { + iterator->_arg += sizeof(uint32_t); /* * check for insanity where the present bitmaps @@ -82,12 +126,13 @@ int ieee80211_radiotap_iterator_init( * stated radiotap header length */ - if (((ulong)iterator->arg - - (ulong)iterator->rtheader) > iterator->max_length) + if ((unsigned long)iterator->_arg - + (unsigned long)iterator->_rtheader > + (unsigned long)iterator->_max_length) return -EINVAL; } - iterator->arg += sizeof(u32); + iterator->_arg += sizeof(uint32_t); /* * no need to check again for blowing past stated radiotap @@ -96,12 +141,36 @@ int ieee80211_radiotap_iterator_init( */ } + iterator->this_arg = iterator->_arg; + /* we are all initialized happily */ return 0; } EXPORT_SYMBOL(ieee80211_radiotap_iterator_init); +static void find_ns(struct ieee80211_radiotap_iterator *iterator, + uint32_t oui, uint8_t subns) +{ + int i; + + iterator->current_namespace = NULL; + + if (!iterator->_vns) + return; + + for (i = 0; i < iterator->_vns->n_ns; i++) { + if (iterator->_vns->ns[i].oui != oui) + continue; + if (iterator->_vns->ns[i].subns != subns) + continue; + + iterator->current_namespace = &iterator->_vns->ns[i]; + break; + } +} + + /** * ieee80211_radiotap_iterator_next - return next radiotap parser iterator arg @@ -127,99 +196,80 @@ EXPORT_SYMBOL(ieee80211_radiotap_iterator_init); */ int ieee80211_radiotap_iterator_next( - struct ieee80211_radiotap_iterator *iterator) + struct ieee80211_radiotap_iterator *iterator) { - - /* - * small length lookup table for all radiotap types we heard of - * starting from b0 in the bitmap, so we can walk the payload - * area of the radiotap header - * - * There is a requirement to pad args, so that args - * of a given length must begin at a boundary of that length - * -- but note that compound args are allowed (eg, 2 x u16 - * for IEEE80211_RADIOTAP_CHANNEL) so total arg length is not - * a reliable indicator of alignment requirement. - * - * upper nybble: content alignment for arg - * lower nybble: content length for arg - */ - - static const u8 rt_sizes[] = { - [IEEE80211_RADIOTAP_TSFT] = 0x88, - [IEEE80211_RADIOTAP_FLAGS] = 0x11, - [IEEE80211_RADIOTAP_RATE] = 0x11, - [IEEE80211_RADIOTAP_CHANNEL] = 0x24, - [IEEE80211_RADIOTAP_FHSS] = 0x22, - [IEEE80211_RADIOTAP_DBM_ANTSIGNAL] = 0x11, - [IEEE80211_RADIOTAP_DBM_ANTNOISE] = 0x11, - [IEEE80211_RADIOTAP_LOCK_QUALITY] = 0x22, - [IEEE80211_RADIOTAP_TX_ATTENUATION] = 0x22, - [IEEE80211_RADIOTAP_DB_TX_ATTENUATION] = 0x22, - [IEEE80211_RADIOTAP_DBM_TX_POWER] = 0x11, - [IEEE80211_RADIOTAP_ANTENNA] = 0x11, - [IEEE80211_RADIOTAP_DB_ANTSIGNAL] = 0x11, - [IEEE80211_RADIOTAP_DB_ANTNOISE] = 0x11, - [IEEE80211_RADIOTAP_RX_FLAGS] = 0x22, - [IEEE80211_RADIOTAP_TX_FLAGS] = 0x22, - [IEEE80211_RADIOTAP_RTS_RETRIES] = 0x11, - [IEEE80211_RADIOTAP_DATA_RETRIES] = 0x11, - /* - * add more here as they are defined in - * include/net/ieee80211_radiotap.h - */ - }; - - /* - * for every radiotap entry we can at - * least skip (by knowing the length)... - */ - - while (iterator->arg_index < sizeof(rt_sizes)) { + while (1) { int hit = 0; - int pad; + int pad, align, size, subns, vnslen; + uint32_t oui; - if (!(iterator->bitmap_shifter & 1)) + /* if no more EXT bits, that's it */ + if ((iterator->_arg_index % 32) == IEEE80211_RADIOTAP_EXT && + !(iterator->_bitmap_shifter & 1)) + return -ENOENT; + + if (!(iterator->_bitmap_shifter & 1)) goto next_entry; /* arg not present */ + /* get alignment/size of data */ + switch (iterator->_arg_index % 32) { + case IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE: + case IEEE80211_RADIOTAP_EXT: + align = 1; + size = 0; + break; + case IEEE80211_RADIOTAP_VENDOR_NAMESPACE: + align = 2; + size = 6; + break; + default: + if (!iterator->current_namespace || + iterator->_arg_index >= iterator->current_namespace->n_bits) { + if (iterator->current_namespace == &radiotap_ns) + return -ENOENT; + align = 0; + } else { + align = iterator->current_namespace->align_size[iterator->_arg_index].align; + size = iterator->current_namespace->align_size[iterator->_arg_index].size; + } + if (!align) { + /* skip all subsequent data */ + iterator->_arg = iterator->_next_ns_data; + /* give up on this namespace */ + iterator->current_namespace = NULL; + goto next_entry; + } + break; + } + /* * arg is present, account for alignment padding - * 8-bit args can be at any alignment - * 16-bit args must start on 16-bit boundary - * 32-bit args must start on 32-bit boundary - * 64-bit args must start on 64-bit boundary * - * note that total arg size can differ from alignment of - * elements inside arg, so we use upper nybble of length - * table to base alignment on - * - * also note: these alignments are ** relative to the - * start of the radiotap header **. There is no guarantee + * Note that these alignments are relative to the start + * of the radiotap header. There is no guarantee * that the radiotap header itself is aligned on any * kind of boundary. * - * the above is why get_unaligned() is used to dereference - * multibyte elements from the radiotap area + * The above is why get_unaligned() is used to dereference + * multibyte elements from the radiotap area. */ - pad = (((ulong)iterator->arg) - - ((ulong)iterator->rtheader)) & - ((rt_sizes[iterator->arg_index] >> 4) - 1); + pad = ((unsigned long)iterator->_arg - + (unsigned long)iterator->_rtheader) & (align - 1); if (pad) - iterator->arg += - (rt_sizes[iterator->arg_index] >> 4) - pad; + iterator->_arg += align - pad; /* * this is what we will return to user, but we need to * move on first so next call has something fresh to test */ - iterator->this_arg_index = iterator->arg_index; - iterator->this_arg = iterator->arg; - hit = 1; + iterator->this_arg_index = iterator->_arg_index; + iterator->this_arg = iterator->_arg; + iterator->this_arg_size = size; /* internally move on the size of this arg */ - iterator->arg += rt_sizes[iterator->arg_index] & 0x0f; + iterator->_arg += size; /* * check for insanity where we are given a bitmap that @@ -228,32 +278,73 @@ int ieee80211_radiotap_iterator_next( * max_length on the last arg, never exceeding it. */ - if (((ulong)iterator->arg - (ulong)iterator->rtheader) > - iterator->max_length) + if ((unsigned long)iterator->_arg - + (unsigned long)iterator->_rtheader > + (unsigned long)iterator->_max_length) return -EINVAL; - next_entry: - iterator->arg_index++; - if (unlikely((iterator->arg_index & 31) == 0)) { - /* completed current u32 bitmap */ - if (iterator->bitmap_shifter & 1) { - /* b31 was set, there is more */ - /* move to next u32 bitmap */ - iterator->bitmap_shifter = - get_unaligned_le32(iterator->next_bitmap); - iterator->next_bitmap++; - } else - /* no more bitmaps: end */ - iterator->arg_index = sizeof(rt_sizes); - } else /* just try the next bit */ - iterator->bitmap_shifter >>= 1; + /* these special ones are valid in each bitmap word */ + switch (iterator->_arg_index % 32) { + case IEEE80211_RADIOTAP_VENDOR_NAMESPACE: + iterator->_bitmap_shifter >>= 1; + iterator->_arg_index++; + + iterator->_reset_on_ext = 1; + + vnslen = get_unaligned_le16(iterator->this_arg + 4); + iterator->_next_ns_data = iterator->_arg + vnslen; + oui = (*iterator->this_arg << 16) | + (*(iterator->this_arg + 1) << 8) | + *(iterator->this_arg + 2); + subns = *(iterator->this_arg + 3); + + find_ns(iterator, oui, subns); + + iterator->is_radiotap_ns = 0; + /* allow parsers to show this information */ + iterator->this_arg_index = + IEEE80211_RADIOTAP_VENDOR_NAMESPACE; + iterator->this_arg_size += vnslen; + if ((unsigned long)iterator->this_arg + + iterator->this_arg_size - + (unsigned long)iterator->_rtheader > + (unsigned long)(unsigned long)iterator->_max_length) + return -EINVAL; + hit = 1; + break; + case IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE: + iterator->_bitmap_shifter >>= 1; + iterator->_arg_index++; + + iterator->_reset_on_ext = 1; + iterator->current_namespace = &radiotap_ns; + iterator->is_radiotap_ns = 1; + break; + case IEEE80211_RADIOTAP_EXT: + /* + * bit 31 was set, there is more + * -- move to next u32 bitmap + */ + iterator->_bitmap_shifter = + get_unaligned_le32(iterator->_next_bitmap); + iterator->_next_bitmap++; + if (iterator->_reset_on_ext) + iterator->_arg_index = 0; + else + iterator->_arg_index++; + iterator->_reset_on_ext = 0; + break; + default: + /* we've got a hit! */ + hit = 1; + next_entry: + iterator->_bitmap_shifter >>= 1; + iterator->_arg_index++; + } /* if we found a valid arg earlier, return it now */ if (hit) return 0; } - - /* we don't know how to handle any more args, we're done */ - return -ENOENT; } EXPORT_SYMBOL(ieee80211_radiotap_iterator_next); -- cgit v1.2.3 From 34e895075e21be3e21e71d6317440d1ee7969ad0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 3 Feb 2010 13:59:58 +0100 Subject: mac80211: allow station add/remove to sleep Many drivers would like to sleep during station addition and removal, and currently have a high complexity there from not being able to. This introduces two new callbacks sta_add() and sta_remove() that drivers can implement instead of using sta_notify() and that can sleep, and the new sta_add() callback is also allowed to fail. The reason we didn't do this previously is that the IBSS code wants to insert stations from the RX path, which is a tasklet, so cannot sleep. This patch will keep the station allocation in that path, but moves adding the station to the driver out of line. Since the addition can now fail, we can have IBSS peer structs the driver rejected -- in that case we still talk to the station but never tell the driver about it in the control.sta pointer. If there will ever be a driver that has a low limit on the number of stations and that cannot talk to any stations that are not known to it, we need to do come up with a new strategy of handling larger IBSSs, maybe quicker expiry or rejecting peers. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 21 +- net/mac80211/cfg.c | 23 +- net/mac80211/driver-ops.h | 34 +++ net/mac80211/driver-trace.h | 52 ++++ net/mac80211/ibss.c | 22 +- net/mac80211/ieee80211_i.h | 18 +- net/mac80211/mesh_plink.c | 17 +- net/mac80211/mlme.c | 14 +- net/mac80211/pm.c | 10 +- net/mac80211/rx.c | 4 +- net/mac80211/sta_info.c | 731 ++++++++++++++++++++++---------------------- net/mac80211/sta_info.h | 32 +- net/mac80211/tx.c | 2 +- net/mac80211/util.c | 16 +- 14 files changed, 526 insertions(+), 470 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 74ccf30fdf8e..a19fac35259a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -814,7 +814,7 @@ enum set_key_cmd { * mac80211, any ieee80211_sta pointer you get access to must * either be protected by rcu_read_lock() explicitly or implicitly, * or you must take good care to not use such a pointer after a - * call to your sta_notify callback that removed it. + * call to your sta_remove callback that removed it. * * @addr: MAC address * @aid: AID we assigned to the station if we're an AP @@ -840,8 +840,8 @@ struct ieee80211_sta { * indicates addition and removal of a station to station table, * or if a associated station made a power state transition. * - * @STA_NOTIFY_ADD: a station was added to the station table - * @STA_NOTIFY_REMOVE: a station being removed from the station table + * @STA_NOTIFY_ADD: (DEPRECATED) a station was added to the station table + * @STA_NOTIFY_REMOVE: (DEPRECATED) a station being removed from the station table * @STA_NOTIFY_SLEEP: a station is now sleeping * @STA_NOTIFY_AWAKE: a sleeping station woke up */ @@ -1534,9 +1534,14 @@ enum ieee80211_ampdu_mlme_action { * @set_rts_threshold: Configuration of RTS threshold (if device needs it) * The callback can sleep. * - * @sta_notify: Notifies low level driver about addition, removal or power - * state transition of an associated station, AP, IBSS/WDS/mesh peer etc. - * Must be atomic. + * @sta_add: Notifies low level driver about addition of an associated station, + * AP, IBSS/WDS/mesh peer etc. This callback can sleep. + * + * @sta_remove: Notifies low level driver about removal of an associated + * station, AP, IBSS/WDS/mesh peer etc. This callback can sleep. + * + * @sta_notify: Notifies low level driver about power state transition of an + * associated station, AP, IBSS/WDS/mesh peer etc. Must be atomic. * * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max), * bursting) for a hardware TX queue. @@ -1635,6 +1640,10 @@ struct ieee80211_ops { void (*get_tkip_seq)(struct ieee80211_hw *hw, u8 hw_key_idx, u32 *iv32, u16 *iv16); int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value); + int (*sta_add)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_sta *sta); + int (*sta_remove)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_sta *sta); void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); int (*conf_tx)(struct ieee80211_hw *hw, u16 queue, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index facf233843e0..a362523d8eb7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -747,9 +747,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, layer2_update = sdata->vif.type == NL80211_IFTYPE_AP_VLAN || sdata->vif.type == NL80211_IFTYPE_AP; - rcu_read_lock(); - - err = sta_info_insert(sta); + err = sta_info_insert_rcu(sta); if (err) { rcu_read_unlock(); return err; @@ -768,26 +766,13 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; - struct sta_info *sta; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (mac) { - rcu_read_lock(); - - sta = sta_info_get_bss(sdata, mac); - if (!sta) { - rcu_read_unlock(); - return -ENOENT; - } - - sta_info_unlink(&sta); - rcu_read_unlock(); - - sta_info_destroy(sta); - } else - sta_info_flush(local, sdata); + if (mac) + return sta_info_destroy_addr_bss(sdata, mac); + sta_info_flush(local, sdata); return 0; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 6c31f38ac7f5..855e85b55061 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -243,6 +243,40 @@ static inline void drv_sta_notify(struct ieee80211_local *local, trace_drv_sta_notify(local, sdata, cmd, sta); } +static inline int drv_sta_add(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta) +{ + int ret = 0; + + might_sleep(); + + if (local->ops->sta_add) + ret = local->ops->sta_add(&local->hw, &sdata->vif, sta); + else if (local->ops->sta_notify) + local->ops->sta_notify(&local->hw, &sdata->vif, + STA_NOTIFY_ADD, sta); + + trace_drv_sta_add(local, sdata, sta, ret); + + return ret; +} + +static inline void drv_sta_remove(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta) +{ + might_sleep(); + + if (local->ops->sta_remove) + local->ops->sta_remove(&local->hw, &sdata->vif, sta); + else if (local->ops->sta_notify) + local->ops->sta_notify(&local->hw, &sdata->vif, + STA_NOTIFY_REMOVE, sta); + + trace_drv_sta_remove(local, sdata, sta); +} + static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue, const struct ieee80211_tx_queue_params *params) { diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 502424b2538a..c984910bf275 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -545,6 +545,58 @@ TRACE_EVENT(drv_sta_notify, ) ); +TRACE_EVENT(drv_sta_add, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, int ret), + + TP_ARGS(local, sdata, sta, ret), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + STA_ENTRY + __field(int, ret) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + STA_ASSIGN; + __entry->ret = ret; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " ret:%d", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ret + ) +); + +TRACE_EVENT(drv_sta_remove, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta), + + TP_ARGS(local, sdata, sta), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + STA_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + STA_ASSIGN; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT, + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG + ) +); + TRACE_EVENT(drv_conf_tx, TP_PROTO(struct ieee80211_local *local, u16 queue, const struct ieee80211_tx_queue_params *params, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 85c4ba14c77d..f3e942486749 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -275,10 +275,12 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, (unsigned long long) supp_rates, (unsigned long long) sta->sta.supp_rates[band]); #endif - } else - ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); - - rcu_read_unlock(); + rcu_read_unlock(); + } else { + rcu_read_unlock(); + ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, + supp_rates, GFP_KERNEL); + } } bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, @@ -368,7 +370,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, sdata->name, mgmt->bssid); #endif ieee80211_sta_join_ibss(sdata, bss); - ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); + ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, + supp_rates, GFP_KERNEL); } put_bss: @@ -381,7 +384,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, * must be callable in atomic context. */ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, - u8 *bssid,u8 *addr, u32 supp_rates) + u8 *bssid,u8 *addr, u32 supp_rates, + gfp_t gfp) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; @@ -410,7 +414,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, wiphy_name(local->hw.wiphy), addr, sdata->name); #endif - sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); + sta = sta_info_alloc(sdata, addr, gfp); if (!sta) return NULL; @@ -422,9 +426,9 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, rate_control_rate_init(sta); + /* If it fails, maybe we raced another insertion? */ if (sta_info_insert(sta)) - return NULL; - + return sta_info_get(sdata, addr); return sta; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 3067fbd69d63..a5911191f224 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -688,15 +688,18 @@ struct ieee80211_local { /* Station data */ /* - * The lock only protects the list, hash, timer and counter - * against manipulation, reads are done in RCU. Additionally, - * the lock protects each BSS's TIM bitmap. + * The mutex only protects the list and counter, + * reads are done in RCU. + * Additionally, the lock protects the hash table, + * the pending list and each BSS's TIM bitmap. */ + struct mutex sta_mtx; spinlock_t sta_lock; unsigned long num_sta; - struct list_head sta_list; + struct list_head sta_list, sta_pending_list; struct sta_info *sta_hash[STA_HASH_SIZE]; struct timer_list sta_cleanup; + struct work_struct sta_finish_work; int sta_generation; struct sk_buff_head pending[IEEE80211_MAX_QUEUES]; @@ -770,10 +773,6 @@ struct ieee80211_local { assoc_led_name[32], radio_led_name[32]; #endif -#ifdef CONFIG_MAC80211_DEBUGFS - struct work_struct sta_debugfs_add; -#endif - #ifdef CONFIG_MAC80211_DEBUG_COUNTERS /* TX/RX handler statistics */ unsigned int tx_handlers_drop; @@ -985,7 +984,8 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata); ieee80211_rx_result ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, - u8 *bssid, u8 *addr, u32 supp_rates); + u8 *bssid, u8 *addr, u32 supp_rates, + gfp_t gfp); int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, struct cfg80211_ibss_params *params); int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 7985e5150898..bc4e20e57ff5 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -102,7 +102,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, if (local->num_sta >= MESH_MAX_PLINKS) return NULL; - sta = sta_info_alloc(sdata, hw_addr, GFP_ATOMIC); + sta = sta_info_alloc(sdata, hw_addr, GFP_KERNEL); if (!sta) return NULL; @@ -236,12 +236,12 @@ void mesh_neighbour_update(u8 *hw_addr, u32 rates, struct ieee80211_sub_if_data sta = sta_info_get(sdata, hw_addr); if (!sta) { + rcu_read_unlock(); + sta = mesh_plink_alloc(sdata, hw_addr, rates); - if (!sta) { - rcu_read_unlock(); + if (!sta) return; - } - if (sta_info_insert(sta)) { + if (sta_info_insert_rcu(sta)) { rcu_read_unlock(); return; } @@ -485,9 +485,11 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } else if (!sta) { /* ftype == PLINK_OPEN */ u32 rates; + + rcu_read_unlock(); + if (!mesh_plink_free_count(sdata)) { mpl_dbg("Mesh plink error: no more free plinks\n"); - rcu_read_unlock(); return; } @@ -495,10 +497,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m sta = mesh_plink_alloc(sdata, mgmt->sa, rates); if (!sta) { mpl_dbg("Mesh plink error: plink table full\n"); - rcu_read_unlock(); return; } - if (sta_info_insert(sta)) { + if (sta_info_insert_rcu(sta)) { rcu_read_unlock(); return; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f437284830ef..ac9429e8d72b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -822,19 +822,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata) changed |= BSS_CHANGED_BSSID; ieee80211_bss_info_change_notify(sdata, changed); - rcu_read_lock(); - - sta = sta_info_get(sdata, bssid); - if (!sta) { - rcu_read_unlock(); - return; - } - - sta_info_unlink(&sta); - - rcu_read_unlock(); - - sta_info_destroy(sta); + sta_info_destroy_addr(sdata, bssid); } void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 47f818959ad7..0e64484e861c 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -11,7 +11,6 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; struct sta_info *sta; - unsigned long flags; ieee80211_scan_cancel(local); @@ -55,22 +54,21 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) rcu_read_unlock(); /* remove STAs */ - spin_lock_irqsave(&local->sta_lock, flags); + mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { - if (local->ops->sta_notify) { + if (sta->uploaded) { sdata = sta->sdata; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); - drv_sta_notify(local, sdata, STA_NOTIFY_REMOVE, - &sta->sta); + drv_sta_remove(local, sdata, &sta->sta); } mesh_plink_quiesce(sta); } - spin_unlock_irqrestore(&local->sta_lock, flags); + mutex_unlock(&local->sta_mtx); /* remove all interfaces */ list_for_each_entry(sdata, &local->interfaces, list) { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5709307fcb9b..01dba7618397 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2244,8 +2244,8 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, rate_idx = 0; /* TODO: HT rates */ else rate_idx = status->rate_idx; - rx->sta = ieee80211_ibss_add_sta(sdata, bssid, hdr->addr2, - BIT(rate_idx)); + rx->sta = ieee80211_ibss_add_sta(sdata, bssid, + hdr->addr2, BIT(rate_idx), GFP_ATOMIC); } break; case NL80211_IFTYPE_MESH_POINT: diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f735826f055c..211c475f73c6 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -32,49 +32,33 @@ * for faster lookup and a list for iteration. They are managed using * RCU, i.e. access to the list and hash table is protected by RCU. * - * Upon allocating a STA info structure with sta_info_alloc(), the caller owns - * that structure. It must then either destroy it using sta_info_destroy() - * (which is pretty useless) or insert it into the hash table using - * sta_info_insert() which demotes the reference from ownership to a regular - * RCU-protected reference; if the function is called without protection by an - * RCU critical section the reference is instantly invalidated. Note that the - * caller may not do much with the STA info before inserting it, in particular, - * it may not start any mesh peer link management or add encryption keys. + * Upon allocating a STA info structure with sta_info_alloc(), the caller + * owns that structure. It must then insert it into the hash table using + * either sta_info_insert() or sta_info_insert_rcu(); only in the latter + * case (which acquires an rcu read section but must not be called from + * within one) will the pointer still be valid after the call. Note that + * the caller may not do much with the STA info before inserting it, in + * particular, it may not start any mesh peer link management or add + * encryption keys. * * When the insertion fails (sta_info_insert()) returns non-zero), the * structure will have been freed by sta_info_insert()! * - * sta entries are added by mac80211 when you establish a link with a + * Station entries are added by mac80211 when you establish a link with a * peer. This means different things for the different type of interfaces * we support. For a regular station this mean we add the AP sta when we * receive an assocation response from the AP. For IBSS this occurs when - * we receive a probe response or a beacon from target IBSS network. For - * WDS we add the sta for the peer imediately upon device open. When using - * AP mode we add stations for each respective station upon request from - * userspace through nl80211. + * get to know about a peer on the same IBSS. For WDS we add the sta for + * the peer imediately upon device open. When using AP mode we add stations + * for each respective station upon request from userspace through nl80211. * - * Because there are debugfs entries for each station, and adding those - * must be able to sleep, it is also possible to "pin" a station entry, - * that means it can be removed from the hash table but not be freed. - * See the comment in __sta_info_unlink() for more information, this is - * an internal capability only. + * In order to remove a STA info structure, various sta_info_destroy_*() + * calls are available. * - * In order to remove a STA info structure, the caller needs to first - * unlink it (sta_info_unlink()) from the list and hash tables and - * then destroy it; sta_info_destroy() will wait for an RCU grace period - * to elapse before actually freeing it. Due to the pinning and the - * possibility of multiple callers trying to remove the same STA info at - * the same time, sta_info_unlink() can clear the STA info pointer it is - * passed to indicate that the STA info is owned by somebody else now. - * - * If sta_info_unlink() did not clear the pointer then the caller owns - * the STA info structure now and is responsible of destroying it with - * a call to sta_info_destroy(). - * - * In all other cases, there is no concept of ownership on a STA entry, - * each structure is owned by the global hash table/list until it is - * removed. All users of the structure need to be RCU protected so that - * the structure won't be freed before they are done using it. + * There is no concept of ownership on a STA entry, each structure is + * owned by the global hash table/list until it is removed. All users of + * the structure need to be RCU protected so that the structure won't be + * freed before they are done using it. */ /* Caller must hold local->sta_lock */ @@ -185,101 +169,6 @@ static void __sta_info_free(struct ieee80211_local *local, kfree(sta); } -void sta_info_destroy(struct sta_info *sta) -{ - struct ieee80211_local *local; - struct sk_buff *skb; - int i; - - might_sleep(); - - if (!sta) - return; - - local = sta->local; - - cancel_work_sync(&sta->drv_unblock_wk); - - rate_control_remove_sta_debugfs(sta); - ieee80211_sta_debugfs_remove(sta); - -#ifdef CONFIG_MAC80211_MESH - if (ieee80211_vif_is_mesh(&sta->sdata->vif)) - mesh_plink_deactivate(sta); -#endif - - /* - * We have only unlinked the key, and actually destroying it - * may mean it is removed from hardware which requires that - * the key->sta pointer is still valid, so flush the key todo - * list here. - * - * ieee80211_key_todo() will synchronize_rcu() so after this - * nothing can reference this sta struct any more. - */ - ieee80211_key_todo(); - -#ifdef CONFIG_MAC80211_MESH - if (ieee80211_vif_is_mesh(&sta->sdata->vif)) - del_timer_sync(&sta->plink_timer); -#endif - - while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { - local->total_ps_buffered--; - dev_kfree_skb_any(skb); - } - - while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) - dev_kfree_skb_any(skb); - - for (i = 0; i < STA_TID_NUM; i++) { - struct tid_ampdu_rx *tid_rx; - struct tid_ampdu_tx *tid_tx; - - spin_lock_bh(&sta->lock); - tid_rx = sta->ampdu_mlme.tid_rx[i]; - /* Make sure timer won't free the tid_rx struct, see below */ - if (tid_rx) - tid_rx->shutdown = true; - - spin_unlock_bh(&sta->lock); - - /* - * Outside spinlock - shutdown is true now so that the timer - * won't free tid_rx, we have to do that now. Can't let the - * timer do it because we have to sync the timer outside the - * lock that it takes itself. - */ - if (tid_rx) { - del_timer_sync(&tid_rx->session_timer); - kfree(tid_rx); - } - - /* - * No need to do such complications for TX agg sessions, the - * path leading to freeing the tid_tx struct goes via a call - * from the driver, and thus needs to look up the sta struct - * again, which cannot be found when we get here. Hence, we - * just need to delete the timer and free the aggregation - * info; we won't be telling the peer about it then but that - * doesn't matter if we're not talking to it again anyway. - */ - tid_tx = sta->ampdu_mlme.tid_tx[i]; - if (tid_tx) { - del_timer_sync(&tid_tx->addba_resp_timer); - /* - * STA removed while aggregation session being - * started? Bit odd, but purge frames anyway. - */ - skb_queue_purge(&tid_tx->pending); - kfree(tid_tx); - } - } - - __sta_info_free(local, sta); -} - - /* Caller must hold local->sta_lock */ static void sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) @@ -376,7 +265,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, return sta; } -int sta_info_insert(struct sta_info *sta) +static int sta_info_finish_insert(struct sta_info *sta, bool async) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; @@ -384,6 +273,91 @@ int sta_info_insert(struct sta_info *sta) unsigned long flags; int err = 0; + WARN_ON(!mutex_is_locked(&local->sta_mtx)); + + /* notify driver */ + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); + err = drv_sta_add(local, sdata, &sta->sta); + if (err) { + if (!async) + return err; + printk(KERN_DEBUG "%s: failed to add IBSS STA %pM to driver (%d)" + " - keeping it anyway.\n", + sdata->name, sta->sta.addr, err); + } else { + sta->uploaded = true; +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + if (async) + printk(KERN_DEBUG "%s: Finished adding IBSS STA %pM\n", + wiphy_name(local->hw.wiphy), sta->sta.addr); +#endif + } + + sdata = sta->sdata; + + if (!async) { + local->num_sta++; + local->sta_generation++; + smp_mb(); + + /* make the station visible */ + spin_lock_irqsave(&local->sta_lock, flags); + sta_info_hash_add(local, sta); + spin_unlock_irqrestore(&local->sta_lock, flags); + } + + list_add(&sta->list, &local->sta_list); + + ieee80211_sta_debugfs_add(sta); + rate_control_add_sta_debugfs(sta); + + sinfo.filled = 0; + sinfo.generation = local->sta_generation; + cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); + + + return 0; +} + +static void sta_info_finish_pending(struct ieee80211_local *local) +{ + struct sta_info *sta; + unsigned long flags; + + spin_lock_irqsave(&local->sta_lock, flags); + while (!list_empty(&local->sta_pending_list)) { + sta = list_first_entry(&local->sta_pending_list, + struct sta_info, list); + list_del(&sta->list); + spin_unlock_irqrestore(&local->sta_lock, flags); + + sta_info_finish_insert(sta, true); + + spin_lock_irqsave(&local->sta_lock, flags); + } + spin_unlock_irqrestore(&local->sta_lock, flags); +} + +static void sta_info_finish_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, sta_finish_work); + + mutex_lock(&local->sta_mtx); + sta_info_finish_pending(local); + mutex_unlock(&local->sta_mtx); +} + +int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) +{ + struct ieee80211_local *local = sta->local; + struct ieee80211_sub_if_data *sdata = sta->sdata; + unsigned long flags; + int err = 0; + /* * Can't be a WARN_ON because it can be triggered through a race: * something inserts a STA (on one CPU) without holding the RTNL @@ -391,36 +365,87 @@ int sta_info_insert(struct sta_info *sta) */ if (unlikely(!ieee80211_sdata_running(sdata))) { err = -ENETDOWN; + rcu_read_lock(); goto out_free; } if (WARN_ON(compare_ether_addr(sta->sta.addr, sdata->vif.addr) == 0 || is_multicast_ether_addr(sta->sta.addr))) { err = -EINVAL; + rcu_read_lock(); goto out_free; } + /* + * In ad-hoc mode, we sometimes need to insert stations + * from tasklet context from the RX path. To avoid races, + * always do so in that case -- see the comment below. + */ + if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { + spin_lock_irqsave(&local->sta_lock, flags); + /* check if STA exists already */ + if (sta_info_get_bss(sdata, sta->sta.addr)) { + spin_unlock_irqrestore(&local->sta_lock, flags); + rcu_read_lock(); + err = -EEXIST; + goto out_free; + } + + local->num_sta++; + local->sta_generation++; + smp_mb(); + sta_info_hash_add(local, sta); + + list_add_tail(&sta->list, &local->sta_pending_list); + + rcu_read_lock(); + spin_unlock_irqrestore(&local->sta_lock, flags); + +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: Added IBSS STA %pM\n", + wiphy_name(local->hw.wiphy), sta->sta.addr); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + + ieee80211_queue_work(&local->hw, &local->sta_finish_work); + + return 0; + } + + /* + * On first glance, this will look racy, because the code + * below this point, which inserts a station with sleeping, + * unlocks the sta_lock between checking existence in the + * hash table and inserting into it. + * + * However, it is not racy against itself because it keeps + * the mutex locked. It still seems to race against the + * above code that atomically inserts the station... That, + * however, is not true because the above code can only + * be invoked for IBSS interfaces, and the below code will + * not be -- and the two do not race against each other as + * the hash table also keys off the interface. + */ + + might_sleep(); + + mutex_lock(&local->sta_mtx); + spin_lock_irqsave(&local->sta_lock, flags); /* check if STA exists already */ - if (sta_info_get(sdata, sta->sta.addr)) { + if (sta_info_get_bss(sdata, sta->sta.addr)) { spin_unlock_irqrestore(&local->sta_lock, flags); + rcu_read_lock(); err = -EEXIST; goto out_free; } - list_add(&sta->list, &local->sta_list); - local->sta_generation++; - local->num_sta++; - sta_info_hash_add(local, sta); - /* notify driver */ - if (local->ops->sta_notify) { - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - sdata = container_of(sdata->bss, - struct ieee80211_sub_if_data, - u.ap); + spin_unlock_irqrestore(&local->sta_lock, flags); - drv_sta_notify(local, sdata, STA_NOTIFY_ADD, &sta->sta); - sdata = sta->sdata; + err = sta_info_finish_insert(sta, false); + if (err) { + mutex_unlock(&local->sta_mtx); + rcu_read_lock(); + goto out_free; } #ifdef CONFIG_MAC80211_VERBOSE_DEBUG @@ -428,22 +453,9 @@ int sta_info_insert(struct sta_info *sta) wiphy_name(local->hw.wiphy), sta->sta.addr); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - spin_unlock_irqrestore(&local->sta_lock, flags); - - sinfo.filled = 0; - sinfo.generation = local->sta_generation; - cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_ATOMIC); - -#ifdef CONFIG_MAC80211_DEBUGFS - /* - * Debugfs entry adding might sleep, so schedule process - * context task for adding entry for STAs that do not yet - * have one. - * NOTE: due to auto-freeing semantics this may only be done - * if the insertion is successful! - */ - schedule_work(&local->sta_debugfs_add); -#endif + /* move reference to rcu-protected */ + rcu_read_lock(); + mutex_unlock(&local->sta_mtx); if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_accept_plinks_update(sdata); @@ -455,6 +467,15 @@ int sta_info_insert(struct sta_info *sta) return err; } +int sta_info_insert(struct sta_info *sta) +{ + int err = sta_info_insert_rcu(sta); + + rcu_read_unlock(); + + return err; +} + static inline void __bss_tim_set(struct ieee80211_if_ap *bss, u16 aid) { /* @@ -523,108 +544,6 @@ void sta_info_clear_tim_bit(struct sta_info *sta) spin_unlock_irqrestore(&sta->local->sta_lock, flags); } -static void __sta_info_unlink(struct sta_info **sta) -{ - struct ieee80211_local *local = (*sta)->local; - struct ieee80211_sub_if_data *sdata = (*sta)->sdata; - /* - * pull caller's reference if we're already gone. - */ - if (sta_info_hash_del(local, *sta)) { - *sta = NULL; - return; - } - - if ((*sta)->key) { - ieee80211_key_free((*sta)->key); - WARN_ON((*sta)->key); - } - - list_del(&(*sta)->list); - (*sta)->dead = true; - - if (test_and_clear_sta_flags(*sta, - WLAN_STA_PS_STA | WLAN_STA_PS_DRIVER)) { - BUG_ON(!sdata->bss); - - atomic_dec(&sdata->bss->num_sta_ps); - __sta_info_clear_tim_bit(sdata->bss, *sta); - } - - local->num_sta--; - local->sta_generation++; - - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - rcu_assign_pointer(sdata->u.vlan.sta, NULL); - - if (local->ops->sta_notify) { - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - sdata = container_of(sdata->bss, - struct ieee80211_sub_if_data, - u.ap); - - drv_sta_notify(local, sdata, STA_NOTIFY_REMOVE, - &(*sta)->sta); - sdata = (*sta)->sdata; - } - - if (ieee80211_vif_is_mesh(&sdata->vif)) { - mesh_accept_plinks_update(sdata); -#ifdef CONFIG_MAC80211_MESH - del_timer(&(*sta)->plink_timer); -#endif - } - -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Removed STA %pM\n", - wiphy_name(local->hw.wiphy), (*sta)->sta.addr); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - - /* - * Finally, pull caller's reference if the STA is pinned by the - * task that is adding the debugfs entries. In that case, we - * leave the STA "to be freed". - * - * The rules are not trivial, but not too complex either: - * (1) pin_status is only modified under the sta_lock - * (2) STAs may only be pinned under the RTNL so that - * sta_info_flush() is guaranteed to actually destroy - * all STAs that are active for a given interface, this - * is required for correctness because otherwise we - * could notify a driver that an interface is going - * away and only after that (!) notify it about a STA - * on that interface going away. - * (3) sta_info_debugfs_add_work() will set the status - * to PINNED when it found an item that needs a new - * debugfs directory created. In that case, that item - * must not be freed although all *RCU* users are done - * with it. Hence, we tell the caller of _unlink() - * that the item is already gone (as can happen when - * two tasks try to unlink/destroy at the same time) - * (4) We set the pin_status to DESTROY here when we - * find such an item. - * (5) sta_info_debugfs_add_work() will reset the pin_status - * from PINNED to NORMAL when it is done with the item, - * but will check for DESTROY before resetting it in - * which case it will free the item. - */ - if ((*sta)->pin_status == STA_INFO_PIN_STAT_PINNED) { - (*sta)->pin_status = STA_INFO_PIN_STAT_DESTROY; - *sta = NULL; - return; - } -} - -void sta_info_unlink(struct sta_info **sta) -{ - struct ieee80211_local *local = (*sta)->local; - unsigned long flags; - - spin_lock_irqsave(&local->sta_lock, flags); - __sta_info_unlink(sta); - spin_unlock_irqrestore(&local->sta_lock, flags); -} - static int sta_info_buffer_expired(struct sta_info *sta, struct sk_buff *skb) { @@ -681,109 +600,209 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, } } - -static void sta_info_cleanup(unsigned long data) +static int __must_check __sta_info_destroy(struct sta_info *sta) { - struct ieee80211_local *local = (struct ieee80211_local *) data; - struct sta_info *sta; + struct ieee80211_local *local; + struct ieee80211_sub_if_data *sdata; + struct sk_buff *skb; + unsigned long flags; + int ret, i; - rcu_read_lock(); - list_for_each_entry_rcu(sta, &local->sta_list, list) - sta_info_cleanup_expire_buffered(local, sta); - rcu_read_unlock(); + might_sleep(); - if (local->quiescing) - return; + if (!sta) + return -ENOENT; - local->sta_cleanup.expires = - round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL); - add_timer(&local->sta_cleanup); -} + local = sta->local; + sdata = sta->sdata; -#ifdef CONFIG_MAC80211_DEBUGFS -/* - * See comment in __sta_info_unlink, - * caller must hold local->sta_lock. - */ -static void __sta_info_pin(struct sta_info *sta) -{ - WARN_ON(sta->pin_status != STA_INFO_PIN_STAT_NORMAL); - sta->pin_status = STA_INFO_PIN_STAT_PINNED; + spin_lock_irqsave(&local->sta_lock, flags); + ret = sta_info_hash_del(local, sta); + /* this might still be the pending list ... which is fine */ + if (!ret) + list_del(&sta->list); + spin_unlock_irqrestore(&local->sta_lock, flags); + if (ret) + return ret; + + if (sta->key) { + ieee80211_key_free(sta->key); + /* + * We have only unlinked the key, and actually destroying it + * may mean it is removed from hardware which requires that + * the key->sta pointer is still valid, so flush the key todo + * list here. + * + * ieee80211_key_todo() will synchronize_rcu() so after this + * nothing can reference this sta struct any more. + */ + ieee80211_key_todo(); + + WARN_ON(sta->key); + } + + sta->dead = true; + + if (test_and_clear_sta_flags(sta, + WLAN_STA_PS_STA | WLAN_STA_PS_DRIVER)) { + BUG_ON(!sdata->bss); + + atomic_dec(&sdata->bss->num_sta_ps); + __sta_info_clear_tim_bit(sdata->bss, sta); + } + + local->num_sta--; + local->sta_generation++; + + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + rcu_assign_pointer(sdata->u.vlan.sta, NULL); + + if (sta->uploaded) { + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); + drv_sta_remove(local, sdata, &sta->sta); + sdata = sta->sdata; + } + +#ifdef CONFIG_MAC80211_MESH + if (ieee80211_vif_is_mesh(&sdata->vif)) { + mesh_accept_plinks_update(sdata); + del_timer(&sta->plink_timer); + } +#endif + +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: Removed STA %pM\n", + wiphy_name(local->hw.wiphy), sta->sta.addr); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + cancel_work_sync(&sta->drv_unblock_wk); + + rate_control_remove_sta_debugfs(sta); + ieee80211_sta_debugfs_remove(sta); + +#ifdef CONFIG_MAC80211_MESH + if (ieee80211_vif_is_mesh(&sta->sdata->vif)) { + mesh_plink_deactivate(sta); + del_timer_sync(&sta->plink_timer); + } +#endif + + while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { + local->total_ps_buffered--; + dev_kfree_skb_any(skb); + } + + while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) + dev_kfree_skb_any(skb); + + for (i = 0; i < STA_TID_NUM; i++) { + struct tid_ampdu_rx *tid_rx; + struct tid_ampdu_tx *tid_tx; + + spin_lock_bh(&sta->lock); + tid_rx = sta->ampdu_mlme.tid_rx[i]; + /* Make sure timer won't free the tid_rx struct, see below */ + if (tid_rx) + tid_rx->shutdown = true; + + spin_unlock_bh(&sta->lock); + + /* + * Outside spinlock - shutdown is true now so that the timer + * won't free tid_rx, we have to do that now. Can't let the + * timer do it because we have to sync the timer outside the + * lock that it takes itself. + */ + if (tid_rx) { + del_timer_sync(&tid_rx->session_timer); + kfree(tid_rx); + } + + /* + * No need to do such complications for TX agg sessions, the + * path leading to freeing the tid_tx struct goes via a call + * from the driver, and thus needs to look up the sta struct + * again, which cannot be found when we get here. Hence, we + * just need to delete the timer and free the aggregation + * info; we won't be telling the peer about it then but that + * doesn't matter if we're not talking to it again anyway. + */ + tid_tx = sta->ampdu_mlme.tid_tx[i]; + if (tid_tx) { + del_timer_sync(&tid_tx->addba_resp_timer); + /* + * STA removed while aggregation session being + * started? Bit odd, but purge frames anyway. + */ + skb_queue_purge(&tid_tx->pending); + kfree(tid_tx); + } + } + + __sta_info_free(local, sta); + + return 0; } -/* - * See comment in __sta_info_unlink, returns sta if it - * needs to be destroyed. - */ -static struct sta_info *__sta_info_unpin(struct sta_info *sta) +int sta_info_destroy_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr) { - struct sta_info *ret = NULL; - unsigned long flags; + struct sta_info *sta; + int ret; - spin_lock_irqsave(&sta->local->sta_lock, flags); - WARN_ON(sta->pin_status != STA_INFO_PIN_STAT_DESTROY && - sta->pin_status != STA_INFO_PIN_STAT_PINNED); - if (sta->pin_status == STA_INFO_PIN_STAT_DESTROY) - ret = sta; - sta->pin_status = STA_INFO_PIN_STAT_NORMAL; - spin_unlock_irqrestore(&sta->local->sta_lock, flags); + mutex_lock(&sdata->local->sta_mtx); + sta = sta_info_get(sdata, addr); + ret = __sta_info_destroy(sta); + mutex_unlock(&sdata->local->sta_mtx); return ret; } -static void sta_info_debugfs_add_work(struct work_struct *work) +int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata, + const u8 *addr) { - struct ieee80211_local *local = - container_of(work, struct ieee80211_local, sta_debugfs_add); - struct sta_info *sta, *tmp; - unsigned long flags; + struct sta_info *sta; + int ret; - /* We need to keep the RTNL across the whole pinned status. */ - rtnl_lock(); - while (1) { - sta = NULL; + mutex_lock(&sdata->local->sta_mtx); + sta = sta_info_get_bss(sdata, addr); + ret = __sta_info_destroy(sta); + mutex_unlock(&sdata->local->sta_mtx); - spin_lock_irqsave(&local->sta_lock, flags); - list_for_each_entry(tmp, &local->sta_list, list) { - /* - * debugfs.add_has_run will be set by - * ieee80211_sta_debugfs_add regardless - * of what else it does. - */ - if (!tmp->debugfs.add_has_run) { - sta = tmp; - __sta_info_pin(sta); - break; - } - } - spin_unlock_irqrestore(&local->sta_lock, flags); + return ret; +} - if (!sta) - break; +static void sta_info_cleanup(unsigned long data) +{ + struct ieee80211_local *local = (struct ieee80211_local *) data; + struct sta_info *sta; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &local->sta_list, list) + sta_info_cleanup_expire_buffered(local, sta); + rcu_read_unlock(); - ieee80211_sta_debugfs_add(sta); - rate_control_add_sta_debugfs(sta); + if (local->quiescing) + return; - sta = __sta_info_unpin(sta); - sta_info_destroy(sta); - } - rtnl_unlock(); + local->sta_cleanup.expires = + round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL); + add_timer(&local->sta_cleanup); } -#endif void sta_info_init(struct ieee80211_local *local) { spin_lock_init(&local->sta_lock); + mutex_init(&local->sta_mtx); INIT_LIST_HEAD(&local->sta_list); + INIT_LIST_HEAD(&local->sta_pending_list); + INIT_WORK(&local->sta_finish_work, sta_info_finish_work); setup_timer(&local->sta_cleanup, sta_info_cleanup, (unsigned long)local); local->sta_cleanup.expires = round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL); - -#ifdef CONFIG_MAC80211_DEBUGFS - INIT_WORK(&local->sta_debugfs_add, sta_info_debugfs_add_work); -#endif } int sta_info_start(struct ieee80211_local *local) @@ -795,16 +814,6 @@ int sta_info_start(struct ieee80211_local *local) void sta_info_stop(struct ieee80211_local *local) { del_timer(&local->sta_cleanup); -#ifdef CONFIG_MAC80211_DEBUGFS - /* - * Make sure the debugfs adding work isn't pending after this - * because we're about to be destroyed. It doesn't matter - * whether it ran or not since we're going to flush all STAs - * anyway. - */ - cancel_work_sync(&local->sta_debugfs_add); -#endif - sta_info_flush(local, NULL); } @@ -820,26 +829,19 @@ int sta_info_flush(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { struct sta_info *sta, *tmp; - LIST_HEAD(tmp_list); int ret = 0; - unsigned long flags; might_sleep(); - spin_lock_irqsave(&local->sta_lock, flags); + mutex_lock(&local->sta_mtx); + + sta_info_finish_pending(local); + list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { - if (!sdata || sdata == sta->sdata) { - __sta_info_unlink(&sta); - if (sta) { - list_add_tail(&sta->list, &tmp_list); - ret++; - } - } + if (!sdata || sdata == sta->sdata) + WARN_ON(__sta_info_destroy(sta)); } - spin_unlock_irqrestore(&local->sta_lock, flags); - - list_for_each_entry_safe(sta, tmp, &tmp_list, list) - sta_info_destroy(sta); + mutex_unlock(&local->sta_mtx); return ret; } @@ -849,24 +851,17 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; - LIST_HEAD(tmp_list); - unsigned long flags; - spin_lock_irqsave(&local->sta_lock, flags); + mutex_lock(&local->sta_mtx); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) if (time_after(jiffies, sta->last_rx + exp_time)) { #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: expiring inactive STA %pM\n", sdata->name, sta->sta.addr); #endif - __sta_info_unlink(&sta); - if (sta) - list_add(&sta->list, &tmp_list); + WARN_ON(__sta_info_destroy(sta)); } - spin_unlock_irqrestore(&local->sta_lock, flags); - - list_for_each_entry_safe(sta, tmp, &tmp_list, list) - sta_info_destroy(sta); + mutex_unlock(&local->sta_mtx); } struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw, diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 6f79bba5706e..5ff611a35979 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -162,11 +162,6 @@ struct sta_ampdu_mlme { }; -/* see __sta_info_unlink */ -#define STA_INFO_PIN_STAT_NORMAL 0 -#define STA_INFO_PIN_STAT_PINNED 1 -#define STA_INFO_PIN_STAT_DESTROY 2 - /** * struct sta_info - STA information * @@ -187,7 +182,6 @@ struct sta_ampdu_mlme { * @flaglock: spinlock for flags accesses * @drv_unblock_wk: used for driver PS unblocking * @listen_interval: listen interval of this station, when we're acting as AP - * @pin_status: used internally for pinning a STA struct into memory * @flags: STA flags, see &enum ieee80211_sta_info_flags * @ps_tx_buf: buffer of frames to transmit to this station * when it leaves power saving state @@ -226,6 +220,7 @@ struct sta_ampdu_mlme { * @debugfs: debug filesystem info * @sta: station information we share with the driver * @dead: set to true when sta is unlinked + * @uploaded: set to true when sta is uploaded to the driver */ struct sta_info { /* General information, mostly static */ @@ -245,11 +240,7 @@ struct sta_info { bool dead; - /* - * for use by the internal lifetime management, - * see __sta_info_unlink - */ - u8 pin_status; + bool uploaded; /* * frequently updated, locked with own spinlock (flaglock), @@ -449,18 +440,19 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, * Insert STA info into hash table/list, returns zero or a * -EEXIST if (if the same MAC address is already present). * - * Calling this without RCU protection makes the caller - * relinquish its reference to @sta. + * Calling the non-rcu version makes the caller relinquish, + * the _rcu version calls read_lock_rcu() and must be called + * without it held. */ int sta_info_insert(struct sta_info *sta); -/* - * Unlink a STA info from the hash table/list. - * This can NULL the STA pointer if somebody else - * has already unlinked it. - */ -void sta_info_unlink(struct sta_info **sta); +int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU); +int sta_info_insert_atomic(struct sta_info *sta); + +int sta_info_destroy_addr(struct ieee80211_sub_if_data *sdata, + const u8 *addr); +int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata, + const u8 *addr); -void sta_info_destroy(struct sta_info *sta); void sta_info_set_tim_bit(struct sta_info *sta); void sta_info_clear_tim_bit(struct sta_info *sta); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e392820a4c33..cbe53ed4fb0b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -571,7 +571,7 @@ ieee80211_tx_h_sta(struct ieee80211_tx_data *tx) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); - if (tx->sta) + if (tx->sta && tx->sta->uploaded) info->control.sta = &tx->sta->sta; return TX_CONTINUE; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ca170b417da6..3af439a85b33 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1082,7 +1082,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) struct ieee80211_hw *hw = &local->hw; struct ieee80211_sub_if_data *sdata; struct sta_info *sta; - unsigned long flags; int res; if (local->suspended) @@ -1116,20 +1115,19 @@ int ieee80211_reconfig(struct ieee80211_local *local) } /* add STAs back */ - if (local->ops->sta_notify) { - spin_lock_irqsave(&local->sta_lock, flags); - list_for_each_entry(sta, &local->sta_list, list) { + mutex_lock(&local->sta_mtx); + list_for_each_entry(sta, &local->sta_list, list) { + if (sta->uploaded) { sdata = sta->sdata; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); - drv_sta_notify(local, sdata, STA_NOTIFY_ADD, - &sta->sta); + WARN_ON(drv_sta_add(local, sdata, &sta->sta)); } - spin_unlock_irqrestore(&local->sta_lock, flags); } + mutex_unlock(&local->sta_mtx); /* Clear Suspend state so that ADDBA requests can be processed */ @@ -1219,10 +1217,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) add_timer(&local->sta_cleanup); - spin_lock_irqsave(&local->sta_lock, flags); + mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) mesh_plink_restart(sta); - spin_unlock_irqrestore(&local->sta_lock, flags); + mutex_unlock(&local->sta_mtx); #else WARN_ON(1); #endif -- cgit v1.2.3 From 349e6b7289f8a3d3d5d3b859e00b41f27d1211df Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Sun, 7 Feb 2010 10:22:01 +0200 Subject: mac80211: remove get_tx_stats() driver op get_tx_stats() driver operation is not currently used anywhere in mac80211 and there are no plans to use it in the not-so-near future. So it can go without anyone missing it. Signed-off-by: Kalle Valo Acked-by: Johannes Berg Signed-off-by: John W. Linville --- Documentation/DocBook/mac80211.tmpl | 1 - include/net/mac80211.h | 22 ---------------------- net/mac80211/driver-ops.h | 8 -------- net/mac80211/driver-trace.h | 23 ----------------------- 4 files changed, 54 deletions(-) (limited to 'include/net') diff --git a/Documentation/DocBook/mac80211.tmpl b/Documentation/DocBook/mac80211.tmpl index 971d1c0c83e5..affb15a344a1 100644 --- a/Documentation/DocBook/mac80211.tmpl +++ b/Documentation/DocBook/mac80211.tmpl @@ -234,7 +234,6 @@ usage should require reading the full document. Multiple queues and QoS support TBD !Finclude/net/mac80211.h ieee80211_tx_queue_params -!Finclude/net/mac80211.h ieee80211_tx_queue_stats diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a19fac35259a..414d774028ba 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -117,19 +117,6 @@ struct ieee80211_tx_queue_params { bool uapsd; }; -/** - * struct ieee80211_tx_queue_stats - transmit queue statistics - * - * @len: number of packets in queue - * @limit: queue length limit - * @count: number of frames sent - */ -struct ieee80211_tx_queue_stats { - unsigned int len; - unsigned int limit; - unsigned int count; -}; - struct ieee80211_low_level_stats { unsigned int dot11ACKFailureCount; unsigned int dot11RTSFailureCount; @@ -1548,13 +1535,6 @@ enum ieee80211_ampdu_mlme_action { * Returns a negative error code on failure. * The callback can sleep. * - * @get_tx_stats: Get statistics of the current TX queue status. This is used - * to get number of currently queued packets (queue length), maximum queue - * size (limit), and total number of packets sent using each TX queue - * (count). The 'stats' pointer points to an array that has hw->queues - * items. - * The callback must be atomic. - * * @get_tsf: Get the current TSF timer value from firmware/hardware. Currently, * this is only used for IBSS mode BSSID merging and debugging. Is not a * required function. @@ -1648,8 +1628,6 @@ struct ieee80211_ops { enum sta_notify_cmd, struct ieee80211_sta *sta); int (*conf_tx)(struct ieee80211_hw *hw, u16 queue, const struct ieee80211_tx_queue_params *params); - int (*get_tx_stats)(struct ieee80211_hw *hw, - struct ieee80211_tx_queue_stats *stats); u64 (*get_tsf)(struct ieee80211_hw *hw); void (*set_tsf)(struct ieee80211_hw *hw, u64 tsf); void (*reset_tsf)(struct ieee80211_hw *hw); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 855e85b55061..c3d844093a2f 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -290,14 +290,6 @@ static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue, return ret; } -static inline int drv_get_tx_stats(struct ieee80211_local *local, - struct ieee80211_tx_queue_stats *stats) -{ - int ret = local->ops->get_tx_stats(&local->hw, stats); - trace_drv_get_tx_stats(local, stats, ret); - return ret; -} - static inline u64 drv_get_tsf(struct ieee80211_local *local) { u64 ret = -1ULL; diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index c984910bf275..41baf730a5c7 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -630,29 +630,6 @@ TRACE_EVENT(drv_conf_tx, ) ); -TRACE_EVENT(drv_get_tx_stats, - TP_PROTO(struct ieee80211_local *local, - struct ieee80211_tx_queue_stats *stats, - int ret), - - TP_ARGS(local, stats, ret), - - TP_STRUCT__entry( - LOCAL_ENTRY - __field(int, ret) - ), - - TP_fast_assign( - LOCAL_ASSIGN; - __entry->ret = ret; - ), - - TP_printk( - LOCAL_PR_FMT " ret:%d", - LOCAL_PR_ARG, __entry->ret - ) -); - TRACE_EVENT(drv_get_tsf, TP_PROTO(struct ieee80211_local *local, u64 ret), -- cgit v1.2.3 From 375177bf35efc08e1bd37bbda4cc0c8cc4db8500 Mon Sep 17 00:00:00 2001 From: Vivek Natarajan Date: Tue, 9 Feb 2010 14:50:28 +0530 Subject: mac80211: Retry null data frame for power save. Even if the null data frame is not acked by the AP, mac80211 goes into power save. This might lead to loss of frames from the AP. Prevent this by restarting dynamic_ps_timer when ack is not received for null data frames. Cc: Johannes Berg Signed-off-by: Vivek Natarajan Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ++++++ net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 20 +++++++++++++++----- net/mac80211/status.c | 17 +++++++++++++++-- 4 files changed, 37 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 414d774028ba..314e98173166 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -945,6 +945,11 @@ enum ieee80211_tkip_key_type { * Hardware supports Unscheduled Automatic Power Save Delivery * (U-APSD) in managed mode. The mode is configured with * conf_tx() operation. + * + * @IEEE80211_HW_REPORTS_TX_ACK_STATUS: + * Hardware can provide ack status reports of Tx frames to + * the stack. + * */ enum ieee80211_hw_flags { IEEE80211_HW_HAS_RATE_CONTROL = 1<<0, @@ -965,6 +970,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_STATIC_SMPS = 1<<15, IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS = 1<<16, IEEE80211_HW_SUPPORTS_UAPSD = 1<<17, + IEEE80211_HW_REPORTS_TX_ACK_STATUS = 1<<18, }; /** diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a5911191f224..9dd98b674cbc 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -316,6 +316,7 @@ enum ieee80211_sta_flags { IEEE80211_STA_CSA_RECEIVED = BIT(5), IEEE80211_STA_MFP_ENABLED = BIT(6), IEEE80211_STA_UAPSD_ENABLED = BIT(7), + IEEE80211_STA_NULLFUNC_ACKED = BIT(8), }; struct ieee80211_if_managed { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7a7921476582..ee9443dc20ff 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -434,8 +434,11 @@ static void ieee80211_enable_ps(struct ieee80211_local *local, } else { if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) ieee80211_send_nullfunc(local, sdata, 1); - conf->flags |= IEEE80211_CONF_PS; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + + if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { + conf->flags |= IEEE80211_CONF_PS; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + } } } @@ -541,6 +544,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) container_of(work, struct ieee80211_local, dynamic_ps_enable_work); struct ieee80211_sub_if_data *sdata = local->ps_sdata; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; /* can only happen when PS was just disabled anyway */ if (!sdata) @@ -549,11 +553,16 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) if (local->hw.conf.flags & IEEE80211_CONF_PS) return; - if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) + if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && + (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED))) ieee80211_send_nullfunc(local, sdata, 1); - local->hw.conf.flags |= IEEE80211_CONF_PS; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) || + (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) { + ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; + local->hw.conf.flags |= IEEE80211_CONF_PS; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + } } void ieee80211_dynamic_ps_timer(unsigned long data) @@ -1892,6 +1901,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, return -ENOMEM; ifmgd->flags &= ~IEEE80211_STA_DISABLE_11N; + ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; for (i = 0; i < req->crypto.n_ciphers_pairwise; i++) if (req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP40 || diff --git a/net/mac80211/status.c b/net/mac80211/status.c index e57ad6b1d7ea..ded98730c111 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -188,6 +188,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) rcu_read_lock(); sband = local->hw.wiphy->bands[info->band]; + fc = hdr->frame_control; for_each_sta_info(local, hdr->addr1, sta, tmp) { /* skip wrong virtual interface */ @@ -205,8 +206,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) return; } - fc = hdr->frame_control; - if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) && (ieee80211_is_data_qos(fc))) { u16 tid, ssn; @@ -275,6 +274,20 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) local->dot11FailedCount++; } + if (ieee80211_is_nullfunc(fc) && ieee80211_has_pm(fc) && + (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) && + !(info->flags & IEEE80211_TX_CTL_INJECTED) && + local->ps_sdata && !(local->scanning)) { + if (info->flags & IEEE80211_TX_STAT_ACK) { + local->ps_sdata->u.mgd.flags |= + IEEE80211_STA_NULLFUNC_ACKED; + ieee80211_queue_work(&local->hw, + &local->dynamic_ps_enable_work); + } else + mod_timer(&local->dynamic_ps_timer, jiffies + + msecs_to_jiffies(10)); + } + /* this was a transmitted frame, but now we want to reuse it */ skb_orphan(skb); -- cgit v1.2.3 From c4146644a56b1f213c4c5567c75771883bec33c7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 8 Feb 2010 23:18:45 +0000 Subject: net: add a wrapper sk_entry() Signed-off-by: Li Zefan Signed-off-by: David S. Miller --- include/net/sock.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 3f1a4804bb3f..c8d400063c16 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -317,6 +317,11 @@ struct sock { /* * Hashed lists helper routines */ +static inline struct sock *sk_entry(const struct hlist_node *node) +{ + return hlist_entry(node, struct sock, sk_node); +} + static inline struct sock *__sk_head(const struct hlist_head *head) { return hlist_entry(head->first, struct sock, sk_node); -- cgit v1.2.3 From b87921bdf25485afd8f5a5f25e86b5acef32a9cf Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 11 Feb 2010 12:22:48 +0100 Subject: netfilter: nf_conntrack: show helper and class in /proc/net/nf_conntrack_expect Make the output a bit more informative by showing the helper an expectation belongs to and the expectation class. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_expect.h | 1 + net/netfilter/nf_conntrack_expect.c | 9 +++++++++ net/netfilter/nf_conntrack_sip.c | 3 +++ 3 files changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 9a2b9cb52271..917e170fa752 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -66,6 +66,7 @@ static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) struct nf_conntrack_expect_policy { unsigned int max_expected; unsigned int timeout; + const char *name; }; #define NF_CT_EXPECT_CLASS_DEFAULT 0 diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 2f25ff610982..33b85f834c06 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -500,6 +500,7 @@ static void exp_seq_stop(struct seq_file *seq, void *v) static int exp_seq_show(struct seq_file *s, void *v) { struct nf_conntrack_expect *expect; + struct nf_conntrack_helper *helper; struct hlist_node *n = v; char *delim = ""; @@ -525,6 +526,14 @@ static int exp_seq_show(struct seq_file *s, void *v) if (expect->flags & NF_CT_EXPECT_INACTIVE) seq_printf(s, "%sINACTIVE", delim); + helper = rcu_dereference(nfct_help(expect->master)->helper); + if (helper) { + seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name); + if (helper->expect_policy[expect->class].name) + seq_printf(s, "/%s", + helper->expect_policy[expect->class].name); + } + return seq_putc(s, '\n'); } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 023966b569bf..419c5cabb332 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1305,14 +1305,17 @@ static char sip_names[MAX_PORTS][2][sizeof("sip-65535")] __read_mostly; static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = { [SIP_EXPECT_SIGNALLING] = { + .name = "signalling", .max_expected = 1, .timeout = 3 * 60, }, [SIP_EXPECT_AUDIO] = { + .name = "audio", .max_expected = 2 * IP_CT_DIR_MAX, .timeout = 3 * 60, }, [SIP_EXPECT_VIDEO] = { + .name = "video", .max_expected = 2 * IP_CT_DIR_MAX, .timeout = 3 * 60, }, -- cgit v1.2.3 From 010c0b9f34a4c567b431f8b49a58b7332ed42e47 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 11 Feb 2010 12:27:09 +0100 Subject: netfilter: nf_nat: support mangling a single TCP packet multiple times nf_nat_mangle_tcp_packet() can currently only handle a single mangling per window because it only maintains two sequence adjustment positions: the one before the last adjustment and the one after. This patch makes sequence number adjustment tracking in nf_nat_mangle_tcp_packet() optional and allows a helper to manually update the offsets after the packet has been fully handled. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_nat_helper.h | 32 +++++++++++++++++++++------- net/ipv4/netfilter/nf_nat_helper.c | 39 +++++++++++++++++++++-------------- 2 files changed, 48 insertions(+), 23 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index 4222220920a5..02bb6c29dc3d 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -7,13 +7,27 @@ struct sk_buff; /* These return true or false. */ -extern int nf_nat_mangle_tcp_packet(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len); +extern int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len, bool adjust); + +static inline int nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len) +{ + return __nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + match_offset, match_len, + rep_buffer, rep_len, true); +} + extern int nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -21,6 +35,10 @@ extern int nf_nat_mangle_udp_packet(struct sk_buff *skb, unsigned int match_len, const char *rep_buffer, unsigned int rep_len); + +extern void nf_nat_set_seq_adjust(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + __be32 seq, s16 off); extern int nf_nat_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 7f10a6be0191..4b6af4bb1f50 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -141,6 +141,17 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) return 1; } +void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + __be32 seq, s16 off) +{ + if (!off) + return; + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); + adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo); + nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); +} +EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); + /* Generic function for mangling variable-length address changes inside * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX * command in FTP). @@ -149,14 +160,13 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) * skb enlargement, ... * * */ -int -nf_nat_mangle_tcp_packet(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len) +int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len, bool adjust) { struct rtable *rt = skb_rtable(skb); struct iphdr *iph; @@ -202,16 +212,13 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, inet_proto_csum_replace2(&tcph->check, skb, htons(oldlen), htons(datalen), 1); - if (rep_len != match_len) { - set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); - adjust_tcp_sequence(ntohl(tcph->seq), - (int)rep_len - (int)match_len, - ct, ctinfo); - nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); - } + if (adjust && rep_len != match_len) + nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, + (int)rep_len - (int)match_len); + return 1; } -EXPORT_SYMBOL(nf_nat_mangle_tcp_packet); +EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); /* Generic function for mangling variable-length address changes inside * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX -- cgit v1.2.3 From 9d288dffe3a276e1f06ba556845c456d696c5a4f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 11 Feb 2010 12:30:21 +0100 Subject: netfilter: nf_conntrack_sip: add T.38 FAX support Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_sip.h | 1 + include/net/netfilter/nf_conntrack.h | 2 +- net/netfilter/nf_conntrack_sip.c | 28 +++++++++++++++++++++++----- 3 files changed, 25 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index cd84d6f44d11..ff8cfbcf3b81 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -14,6 +14,7 @@ enum sip_expectation_classes { SIP_EXPECT_SIGNALLING, SIP_EXPECT_AUDIO, SIP_EXPECT_VIDEO, + SIP_EXPECT_IMAGE, __SIP_EXPECT_MAX }; #define SIP_EXPECT_MAX (__SIP_EXPECT_MAX - 1) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5043d61c99a7..5b7d8835523f 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -70,7 +70,7 @@ union nf_conntrack_help { struct nf_conntrack_helper; /* Must be kept in sync with the classes defined by helpers */ -#define NF_CT_MAX_EXPECT_CLASSES 3 +#define NF_CT_MAX_EXPECT_CLASSES 4 /* nf_conn feature for connections that have a helper */ struct nf_conn_help { diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 3bb3aaff76e9..fbe8ff5a420a 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -907,6 +907,7 @@ err1: static const struct sdp_media_type sdp_media_types[] = { SDP_MEDIA_TYPE("audio ", SIP_EXPECT_AUDIO), SDP_MEDIA_TYPE("video ", SIP_EXPECT_VIDEO), + SDP_MEDIA_TYPE("image ", SIP_EXPECT_IMAGE), }; static const struct sdp_media_type *sdp_media_type(const char *dptr, @@ -932,7 +933,6 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); unsigned int matchoff, matchlen; unsigned int mediaoff, medialen; unsigned int sdpoff; @@ -1024,9 +1024,6 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff, ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff, &rtp_addr); - if (ret == NF_ACCEPT && i > 0) - help->help.ct_sip_info.invite_cseq = cseq; - return ret; } static int process_invite_response(struct sk_buff *skb, unsigned int dataoff, @@ -1077,6 +1074,22 @@ static int process_prack_response(struct sk_buff *skb, unsigned int dataoff, return NF_ACCEPT; } +static int process_invite_request(struct sk_buff *skb, unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int cseq) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); + unsigned int ret; + + flush_expectations(ct, true); + ret = process_sdp(skb, dataoff, dptr, datalen, cseq); + if (ret == NF_ACCEPT) + help->help.ct_sip_info.invite_cseq = cseq; + return ret; +} + static int process_bye_request(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) @@ -1257,7 +1270,7 @@ flush: } static const struct sip_handler sip_handlers[] = { - SIP_HANDLER("INVITE", process_sdp, process_invite_response), + SIP_HANDLER("INVITE", process_invite_request, process_invite_response), SIP_HANDLER("UPDATE", process_sdp, process_update_response), SIP_HANDLER("ACK", process_sdp, NULL), SIP_HANDLER("PRACK", process_sdp, process_prack_response), @@ -1473,6 +1486,11 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1 .max_expected = 2 * IP_CT_DIR_MAX, .timeout = 3 * 60, }, + [SIP_EXPECT_IMAGE] = { + .name = "image", + .max_expected = IP_CT_DIR_MAX, + .timeout = 3 * 60, + }, }; static void nf_conntrack_sip_fini(void) -- cgit v1.2.3 From 857b409a48bdc33e824dff2d730e271b964e78bd Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 12 Feb 2010 06:24:46 +0100 Subject: netfilter: nf_conntrack: elegantly simplify nf_ct_exp_net() Remove #ifdef at nf_ct_exp_net() by using nf_ct_net(). Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_expect.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 917e170fa752..4b47ec19ef39 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -56,11 +56,7 @@ struct nf_conntrack_expect { static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) { -#ifdef CONFIG_NET_NS - return exp->master->ct_net; /* by definition */ -#else - return &init_net; -#endif + return nf_ct_net(exp->master); } struct nf_conntrack_expect_policy { -- cgit v1.2.3 From 2bec5a369ee79576a3eea2c23863325089785a2c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 8 Feb 2010 05:19:03 +0000 Subject: ipv6: fib: fix crash when changing large fib while dumping it When the fib size exceeds what can be dumped in a single skb, the dump is suspended and resumed once the last skb has been received by userspace. When the fib is changed while the dump is suspended, the walker might contain stale pointers, causing a crash when the dump is resumed. BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: [] fib6_walk_continue+0xbb/0x124 [ipv6] PGD 5347a067 PUD 65c7067 PMD 0 Oops: 0000 [#1] PREEMPT SMP ... RIP: 0010:[] [] fib6_walk_continue+0xbb/0x124 [ipv6] ... Call Trace: [] ? mutex_spin_on_owner+0x59/0x71 [] inet6_dump_fib+0x11b/0x1b9 [ipv6] [] netlink_dump+0x5b/0x19e [] ? consume_skb+0x28/0x2a [] netlink_recvmsg+0x1ab/0x2c6 [] ? netlink_unicast+0xfa/0x151 [] __sock_recvmsg+0x6d/0x79 [] sock_recvmsg+0xca/0xe3 [] ? autoremove_wake_function+0x0/0x38 [] ? radix_tree_lookup_slot+0xe/0x10 [] ? find_get_page+0x90/0xa5 [] ? filemap_fault+0x201/0x34f [] ? fget_light+0x2f/0xac [] ? verify_iovec+0x4f/0x94 [] sys_recvmsg+0x14d/0x223 Store the serial number when beginning to walk the fib and reload pointers when continuing to walk after a change occured. Similar to other dumping functions, this might cause unrelated entries to be missed when entries are deleted. Tested-by: Ben Greear Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ net/ipv6/ip6_fib.c | 29 +++++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 257808188add..8f279ddb3593 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -129,6 +129,8 @@ struct fib6_walker_t { struct rt6_info *leaf; unsigned char state; unsigned char prune; + unsigned int skip; + unsigned int count; int (*func)(struct fib6_walker_t *); void *args; }; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index f626ea2b304f..77e122f53ea6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -319,12 +319,26 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, w->root = &table->tb6_root; if (cb->args[4] == 0) { + w->count = 0; + w->skip = 0; + read_lock_bh(&table->tb6_lock); res = fib6_walk(w); read_unlock_bh(&table->tb6_lock); - if (res > 0) + if (res > 0) { cb->args[4] = 1; + cb->args[5] = w->root->fn_sernum; + } } else { + if (cb->args[5] != w->root->fn_sernum) { + /* Begin at the root if the tree changed */ + cb->args[5] = w->root->fn_sernum; + w->state = FWS_INIT; + w->node = w->root; + w->skip = w->count; + } else + w->skip = 0; + read_lock_bh(&table->tb6_lock); res = fib6_walk_continue(w); read_unlock_bh(&table->tb6_lock); @@ -1250,9 +1264,18 @@ static int fib6_walk_continue(struct fib6_walker_t *w) w->leaf = fn->leaf; case FWS_C: if (w->leaf && fn->fn_flags&RTN_RTINFO) { - int err = w->func(w); + int err; + + if (w->count < w->skip) { + w->count++; + continue; + } + + err = w->func(w); if (err) return err; + + w->count++; continue; } w->state = FWS_U; @@ -1346,6 +1369,8 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, c.w.root = root; c.w.func = fib6_clean_node; c.w.prune = prune; + c.w.count = 0; + c.w.skip = 0; c.func = func; c.arg = arg; c.net = net; -- cgit v1.2.3 From a63374631e7192a64648ecc6672619a7abf9ebcd Mon Sep 17 00:00:00 2001 From: jamal Date: Tue, 9 Feb 2010 13:21:17 +0000 Subject: xfrm: use proper kernel types kernel side should use uxx instead of __uxx types Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/xfrm.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index fcee547ca7e3..0beb413c01c4 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -317,8 +317,8 @@ extern void xfrm_state_delete_tunnel(struct xfrm_state *x); struct xfrm_type { char *description; struct module *owner; - __u8 proto; - __u8 flags; + u8 proto; + u8 flags; #define XFRM_TYPE_NON_FRAGMENT 1 #define XFRM_TYPE_REPLAY_PROT 2 #define XFRM_TYPE_LOCAL_COADDR 4 @@ -434,24 +434,24 @@ struct xfrm_tmpl { unsigned short encap_family; - __u32 reqid; + u32 reqid; /* Mode: transport, tunnel etc. */ - __u8 mode; + u8 mode; /* Sharing mode: unique, this session only, this user only etc. */ - __u8 share; + u8 share; /* May skip this transfomration if no SA is found */ - __u8 optional; + u8 optional; /* Skip aalgos/ealgos/calgos checks. */ - __u8 allalgs; + u8 allalgs; /* Bit mask of algos allowed for acquisition */ - __u32 aalgos; - __u32 ealgos; - __u32 calgos; + u32 aalgos; + u32 ealgos; + u32 calgos; }; #define XFRM_MAX_DEPTH 6 @@ -770,7 +770,7 @@ static __inline__ int addr_match(void *token1, void *token2, int prefixlen) int pdw; int pbi; - pdw = prefixlen >> 5; /* num of whole __u32 in prefix */ + pdw = prefixlen >> 5; /* num of whole u32 in prefix */ pbi = prefixlen & 0x1f; /* num of bits in incomplete u32 in prefix */ if (pdw) @@ -1259,7 +1259,7 @@ struct xfrm_algo_desc { /* XFRM tunnel handlers. */ struct xfrm_tunnel { int (*handler)(struct sk_buff *skb); - int (*err_handler)(struct sk_buff *skb, __u32 info); + int (*err_handler)(struct sk_buff *skb, u32 info); struct xfrm_tunnel *next; int priority; @@ -1500,7 +1500,7 @@ static inline int xfrm_addr_cmp(xfrm_address_t *a, xfrm_address_t *b, switch (family) { default: case AF_INET: - return (__force __u32)a->a4 - (__force __u32)b->a4; + return (__force u32)a->a4 - (__force u32)b->a4; case AF_INET6: return ipv6_addr_cmp((struct in6_addr *)a, (struct in6_addr *)b); -- cgit v1.2.3 From 1a5778aa000ebfec7f07eed0ffa2852ffb5d16bb Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 14 Feb 2010 22:35:47 -0800 Subject: net: Fix first line of kernel-doc for a few functions The function name must be followed by a space, hypen, space, and a short description. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- include/net/sock.h | 2 +- net/sunrpc/rpc_pipe.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ae836fded530..ba0f8e3a9cda 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -738,7 +738,7 @@ static inline struct sk_buff *skb_unshare(struct sk_buff *skb, } /** - * skb_peek + * skb_peek - peek at the head of an &sk_buff_head * @list_: list to peek at * * Peek an &sk_buff. Unlike most other operations you _MUST_ @@ -759,7 +759,7 @@ static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) } /** - * skb_peek_tail + * skb_peek_tail - peek at the tail of an &sk_buff_head * @list_: list to peek at * * Peek an &sk_buff. Unlike most other operations you _MUST_ diff --git a/include/net/sock.h b/include/net/sock.h index c8d400063c16..580d51fa28e9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1049,7 +1049,7 @@ extern void sk_common_release(struct sock *sk); extern void sock_init_data(struct socket *sock, struct sock *sk); /** - * sk_filter_release: Release a socket filter + * sk_filter_release - release a socket filter * @fp: filter to remove * * Remove a filter from a socket and release its resources. diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 49278f830367..9ea45383480e 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -78,7 +78,7 @@ rpc_timeout_upcall_queue(struct work_struct *work) } /** - * rpc_queue_upcall + * rpc_queue_upcall - queue an upcall message to userspace * @inode: inode of upcall pipe on which to queue given message * @msg: message to queue * -- cgit v1.2.3 From dfff0615d28bdb3e8d213e5537dd069265912667 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 12 Feb 2010 21:58:11 +0100 Subject: tree-wide: fix typos "ass?o[sc]iac?te" -> "associate" in comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Uwe Kleine-König Signed-off-by: Jiri Kosina --- drivers/usb/musb/davinci.c | 2 +- drivers/zorro/zorro.ids | 2 +- include/net/irda/irttp.h | 2 +- kernel/irq/chip.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/drivers/usb/musb/davinci.c b/drivers/usb/musb/davinci.c index 66913811af5e..a883f9dd3f8a 100644 --- a/drivers/usb/musb/davinci.c +++ b/drivers/usb/musb/davinci.c @@ -274,7 +274,7 @@ static irqreturn_t davinci_interrupt(int irq, void *__hci) /* NOTE: DaVinci shadows the Mentor IRQs. Don't manage them through * the Mentor registers (except for setup), use the TI ones and EOI. * - * Docs describe irq "vector" registers asociated with the CPPI and + * Docs describe irq "vector" registers associated with the CPPI and * USB EOI registers. These hold a bitmask corresponding to the * current IRQ, not an irq handler address. Would using those bits * resolve some of the races observed in this dispatch code?? diff --git a/drivers/zorro/zorro.ids b/drivers/zorro/zorro.ids index 0c0f99e2dd62..de24e3decedd 100644 --- a/drivers/zorro/zorro.ids +++ b/drivers/zorro/zorro.ids @@ -108,7 +108,7 @@ 0c00 500XP/SupraDrive WordSync [SCSI Host Adapter] 0d00 SupraDrive WordSync II [SCSI Host Adapter] 1000 2400zi+ [Modem] -0422 Computer Systems Assosiates +0422 Computer Systems Associates 1100 Magnum 40 [Accelerator and SCSI Host Adapter] 1500 12 Gauge [SCSI Host Adapter] 0439 Marc Michael Groth diff --git a/include/net/irda/irttp.h b/include/net/irda/irttp.h index 0788c23d2828..11aee7a2972a 100644 --- a/include/net/irda/irttp.h +++ b/include/net/irda/irttp.h @@ -97,7 +97,7 @@ #define TTP_MAX_SDU_SIZE 0x01 /* - * This structure contains all data assosiated with one instance of a TTP + * This structure contains all data associated with one instance of a TTP * connection. */ struct tsap_cb { diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index ecc3fa28f666..ec8a96382461 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -520,7 +520,7 @@ out: * signal. The occurence is latched into the irq controller hardware * and must be acked in order to be reenabled. After the ack another * interrupt can happen on the same source even before the first one - * is handled by the assosiacted event handler. If this happens it + * is handled by the associated event handler. If this happens it * might be necessary to disable (mask) the interrupt depending on the * controller hardware. This requires to reenable the interrupt inside * of the loop which handles the interrupts which have arrived while -- cgit v1.2.3 From 8fea97ec1772bbf553d89187340ef624d548e115 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 15 Feb 2010 17:45:08 +0100 Subject: netfilter: nf_conntrack: pass template to l4proto ->error() handler The error handlers might need the template to get the conntrack zone introduced in the next patches to perform a conntrack lookup. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_l4proto.h | 4 ++-- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 3 ++- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 3 ++- net/netfilter/nf_conntrack_core.c | 3 ++- net/netfilter/nf_conntrack_proto_dccp.c | 5 +++-- net/netfilter/nf_conntrack_proto_tcp.c | 2 +- net/netfilter/nf_conntrack_proto_udp.c | 4 ++-- net/netfilter/nf_conntrack_proto_udplite.c | 2 +- 8 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index ca6dcf3445ab..e3d3ee3c06a2 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -49,8 +49,8 @@ struct nf_conntrack_l4proto { /* Called when a conntrack entry is destroyed */ void (*destroy)(struct nf_conn *ct); - int (*error)(struct net *net, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, + int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum); /* Print out the per-protocol part of the tuple. Return like seq_* */ diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 7afd39b5b781..327826a968a8 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -163,7 +163,8 @@ icmp_error_message(struct net *net, struct sk_buff *skb, /* Small and modified version of icmp_rcv */ static int -icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, +icmp_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmphdr *icmph; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index c7b8bd1d7984..d772dc21857f 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -179,7 +179,8 @@ icmpv6_error_message(struct net *net, } static int -icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, +icmpv6_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmp6hdr *icmp6h; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index bd831410a396..65351ed5d815 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -799,7 +799,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, * inverse of the return code tells to the netfilter * core what to do with the packet. */ if (l4proto->error != NULL) { - ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum); + ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo, + pf, hooknum); if (ret <= 0) { NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index dd375500dccc..9a2815549375 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -561,8 +561,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, return NF_ACCEPT; } -static int dccp_error(struct net *net, struct sk_buff *skb, - unsigned int dataoff, enum ip_conntrack_info *ctinfo, +static int dccp_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { struct dccp_hdr _dh, *dh; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index ad118053971a..9dd8cd4fb6e6 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -760,7 +760,7 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = }; /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ -static int tcp_error(struct net *net, +static int tcp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 8d38f9a4bed8..8289088b8218 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -91,8 +91,8 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, +static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 0b1bc9ba6678..263b5a72588d 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -89,7 +89,7 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udplite_error(struct net *net, +static int udplite_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, -- cgit v1.2.3 From 5d0aa2ccd4699a01cfdf14886191c249d7b45a01 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 15 Feb 2010 18:13:33 +0100 Subject: netfilter: nf_conntrack: add support for "conntrack zones" Normally, each connection needs a unique identity. Conntrack zones allow to specify a numerical zone using the CT target, connections in different zones can use the same identity. Example: iptables -t raw -A PREROUTING -i veth0 -j CT --zone 1 iptables -t raw -A OUTPUT -o veth1 -j CT --zone 1 Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_CT.h | 2 +- include/net/ip.h | 3 + include/net/ipv6.h | 3 + include/net/netfilter/nf_conntrack.h | 5 +- include/net/netfilter/nf_conntrack_core.h | 3 +- include/net/netfilter/nf_conntrack_expect.h | 9 +- include/net/netfilter/nf_conntrack_extend.h | 2 + include/net/netfilter/nf_conntrack_zones.h | 23 ++++++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 3 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 8 +- net/ipv4/netfilter/nf_defrag_ipv4.c | 12 ++- net/ipv4/netfilter/nf_nat_core.c | 24 +++--- net/ipv4/netfilter/nf_nat_pptp.c | 3 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 12 ++- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 8 +- net/netfilter/Kconfig | 13 +++ net/netfilter/nf_conntrack_core.c | 109 +++++++++++++++++++------ net/netfilter/nf_conntrack_expect.c | 21 +++-- net/netfilter/nf_conntrack_h323_main.c | 3 +- net/netfilter/nf_conntrack_netlink.c | 20 ++--- net/netfilter/nf_conntrack_pptp.c | 14 ++-- net/netfilter/nf_conntrack_sip.c | 3 +- net/netfilter/nf_conntrack_standalone.c | 6 ++ net/netfilter/xt_CT.c | 8 +- net/netfilter/xt_connlimit.c | 4 +- 25 files changed, 236 insertions(+), 85 deletions(-) create mode 100644 include/net/netfilter/nf_conntrack_zones.h (limited to 'include/net') diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h index 7fd0effe1316..1b564106891d 100644 --- a/include/linux/netfilter/xt_CT.h +++ b/include/linux/netfilter/xt_CT.h @@ -5,7 +5,7 @@ struct xt_ct_target_info { u_int16_t flags; - u_int16_t __unused; + u_int16_t zone; u_int32_t ct_events; u_int32_t exp_events; char helper[16]; diff --git a/include/net/ip.h b/include/net/ip.h index fb63371c07a8..7bc47873e3fc 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -352,8 +352,11 @@ enum ip_defrag_users { IP_DEFRAG_LOCAL_DELIVER, IP_DEFRAG_CALL_RA_CHAIN, IP_DEFRAG_CONNTRACK_IN, + __IP_DEFRAG_CONNTRACK_IN_END = IP_DEFRAG_CONNTRACK_IN + USHORT_MAX, IP_DEFRAG_CONNTRACK_OUT, + __IP_DEFRAG_CONNTRACK_OUT_END = IP_DEFRAG_CONNTRACK_OUT + USHORT_MAX, IP_DEFRAG_CONNTRACK_BRIDGE_IN, + __IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX, IP_DEFRAG_VS_IN, IP_DEFRAG_VS_OUT, IP_DEFRAG_VS_FWD diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 299bbf5adfb6..639ec53ea081 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -355,8 +355,11 @@ struct inet_frag_queue; enum ip6_defrag_users { IP6_DEFRAG_LOCAL_DELIVER, IP6_DEFRAG_CONNTRACK_IN, + __IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHORT_MAX, IP6_DEFRAG_CONNTRACK_OUT, + __IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHORT_MAX, IP6_DEFRAG_CONNTRACK_BRIDGE_IN, + __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX, }; struct ip6_create_arg { diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5b7d8835523f..bde095f7e845 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -198,7 +198,8 @@ extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int null extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size); extern struct nf_conntrack_tuple_hash * -__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple); +__nf_conntrack_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); extern void nf_conntrack_hash_insert(struct nf_conn *ct); extern void nf_ct_delete_from_lists(struct nf_conn *ct); @@ -267,7 +268,7 @@ extern void nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data); extern void nf_conntrack_free(struct nf_conn *ct); extern struct nf_conn * -nf_conntrack_alloc(struct net *net, +nf_conntrack_alloc(struct net *net, u16 zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp); diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 5a449b44ba33..dffde8e6920e 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -49,7 +49,8 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, /* Find a connection corresponding to a tuple. */ extern struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple); +nf_conntrack_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); extern int __nf_conntrack_confirm(struct sk_buff *skb); diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 4b47ec19ef39..11e815084fcf 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -74,13 +74,16 @@ int nf_conntrack_expect_init(struct net *net); void nf_conntrack_expect_fini(struct net *net); struct nf_conntrack_expect * -__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple); +__nf_ct_expect_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple); +nf_ct_expect_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple); +nf_ct_find_expectation(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); void nf_ct_remove_expectations(struct nf_conn *ct); diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index e192dc17c583..2d2a1f9a61d8 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -8,6 +8,7 @@ enum nf_ct_ext_id { NF_CT_EXT_NAT, NF_CT_EXT_ACCT, NF_CT_EXT_ECACHE, + NF_CT_EXT_ZONE, NF_CT_EXT_NUM, }; @@ -15,6 +16,7 @@ enum nf_ct_ext_id { #define NF_CT_EXT_NAT_TYPE struct nf_conn_nat #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache +#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h new file mode 100644 index 000000000000..0bbb2bd51e89 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -0,0 +1,23 @@ +#ifndef _NF_CONNTRACK_ZONES_H +#define _NF_CONNTRACK_ZONES_H + +#include + +#define NF_CT_DEFAULT_ZONE 0 + +struct nf_conntrack_zone { + u16 id; +}; + +static inline u16 nf_ct_zone(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_ZONES + struct nf_conntrack_zone *nf_ct_zone; + nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE); + if (nf_ct_zone) + return nf_ct_zone->id; +#endif + return NF_CT_DEFAULT_ZONE; +} + +#endif /* _NF_CONNTRACK_ZONES_H */ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index d1ea38a7c490..2bb1f87051c4 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -266,7 +267,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -EINVAL; } - h = nf_conntrack_find_get(sock_net(sk), &tuple); + h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); if (h) { struct sockaddr_in sin; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 327826a968a8..7404bde95994 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; @@ -114,13 +115,14 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ static int -icmp_error_message(struct net *net, struct sk_buff *skb, +icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum) { struct nf_conntrack_tuple innertuple, origtuple; const struct nf_conntrack_l4proto *innerproto; const struct nf_conntrack_tuple_hash *h; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; NF_CT_ASSERT(skb->nfct == NULL); @@ -146,7 +148,7 @@ icmp_error_message(struct net *net, struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(net, &innertuple); + h = nf_conntrack_find_get(net, zone, &innertuple); if (!h) { pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; @@ -209,7 +211,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(net, skb, ctinfo, hooknum); + return icmp_error_message(net, tmpl, skb, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index f6f46686cbc0..d498a704d456 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -39,15 +40,20 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, struct sk_buff *skb) { + u16 zone = NF_CT_DEFAULT_ZONE; + + if (skb->nfct) + zone = nf_ct_zone((struct nf_conn *)skb->nfct); + #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge && skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) - return IP_DEFRAG_CONNTRACK_BRIDGE_IN; + return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone; #endif if (hooknum == NF_INET_PRE_ROUTING) - return IP_DEFRAG_CONNTRACK_IN; + return IP_DEFRAG_CONNTRACK_IN + zone; else - return IP_DEFRAG_CONNTRACK_OUT; + return IP_DEFRAG_CONNTRACK_OUT + zone; } static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 26066a2327ad..4595281c2863 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -30,6 +30,7 @@ #include #include #include +#include static DEFINE_SPINLOCK(nf_nat_lock); @@ -69,13 +70,14 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int -hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple) +hash_by_src(const struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { unsigned int hash; /* Original src, to ensure we map it consistently if poss. */ hash = jhash_3words((__force u32)tuple->src.u3.ip, - (__force u32)tuple->src.u.all, + (__force u32)tuple->src.u.all ^ zone, tuple->dst.protonum, 0); return ((u64)hash * net->ipv4.nat_htable_size) >> 32; } @@ -139,12 +141,12 @@ same_src(const struct nf_conn *ct, /* Only called for SRC manip */ static int -find_appropriate_src(struct net *net, +find_appropriate_src(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { - unsigned int h = hash_by_src(net, tuple); + unsigned int h = hash_by_src(net, zone, tuple); const struct nf_conn_nat *nat; const struct nf_conn *ct; const struct hlist_node *n; @@ -152,7 +154,7 @@ find_appropriate_src(struct net *net, rcu_read_lock(); hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { ct = nat->ct; - if (same_src(ct, tuple)) { + if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { /* Copy source part from reply tuple. */ nf_ct_invert_tuplepr(result, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); @@ -175,7 +177,7 @@ find_appropriate_src(struct net *net, the ip with the lowest src-ip/dst-ip/proto usage. */ static void -find_best_ips_proto(struct nf_conntrack_tuple *tuple, +find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, const struct nf_conn *ct, enum nf_nat_manip_type maniptype) @@ -209,7 +211,7 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple, maxip = ntohl(range->max_ip); j = jhash_2words((__force u32)tuple->src.u3.ip, range->flags & IP_NAT_RANGE_PERSISTENT ? - 0 : (__force u32)tuple->dst.u3.ip, 0); + 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); j = ((u64)j * (maxip - minip + 1)) >> 32; *var_ipp = htonl(minip + j); } @@ -229,6 +231,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, { struct net *net = nf_ct_net(ct); const struct nf_nat_protocol *proto; + u16 zone = nf_ct_zone(ct); /* 1) If this srcip/proto/src-proto-part is currently mapped, and that same mapping gives a unique tuple within the given @@ -239,7 +242,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, manips not an issue. */ if (maniptype == IP_NAT_MANIP_SRC && !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { - if (find_appropriate_src(net, orig_tuple, tuple, range)) { + if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); if (!nf_nat_used_tuple(tuple, ct)) return; @@ -249,7 +252,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, /* 2) Select the least-used IP/proto combination in the given range. */ *tuple = *orig_tuple; - find_best_ips_proto(tuple, range, ct, maniptype); + find_best_ips_proto(zone, tuple, range, ct, maniptype); /* 3) The per-protocol part of the manip is made to map into the range to make a unique tuple. */ @@ -327,7 +330,8 @@ nf_nat_setup_info(struct nf_conn *ct, if (have_to_hash) { unsigned int srchash; - srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + srchash = hash_by_src(net, nf_ct_zone(ct), + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); spin_lock_bh(&nf_nat_lock); /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 9eb171056c63..4c060038d29f 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -74,7 +75,7 @@ static void pptp_nat_expected(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple_ip(&t); - other_exp = nf_ct_expect_find_get(net, &t); + other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t); if (other_exp) { nf_ct_unexpect_related(other_exp); nf_ct_expect_put(other_exp); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 55ce22e5de49..996c3f41fecd 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -191,15 +192,20 @@ out: static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, struct sk_buff *skb) { + u16 zone = NF_CT_DEFAULT_ZONE; + + if (skb->nfct) + zone = nf_ct_zone((struct nf_conn *)skb->nfct); + #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge && skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) - return IP6_DEFRAG_CONNTRACK_BRIDGE_IN; + return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone; #endif if (hooknum == NF_INET_PRE_ROUTING) - return IP6_DEFRAG_CONNTRACK_IN; + return IP6_DEFRAG_CONNTRACK_IN + zone; else - return IP6_DEFRAG_CONNTRACK_OUT; + return IP6_DEFRAG_CONNTRACK_OUT + zone; } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index d772dc21857f..9be81776415e 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -128,7 +129,7 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, } static int -icmpv6_error_message(struct net *net, +icmpv6_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int icmp6off, enum ip_conntrack_info *ctinfo, @@ -137,6 +138,7 @@ icmpv6_error_message(struct net *net, struct nf_conntrack_tuple intuple, origtuple; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_l4proto *inproto; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; NF_CT_ASSERT(skb->nfct == NULL); @@ -163,7 +165,7 @@ icmpv6_error_message(struct net *net, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(net, &intuple); + h = nf_conntrack_find_get(net, zone, &intuple); if (!h) { pr_debug("icmpv6_error: no match\n"); return -NF_ACCEPT; @@ -216,7 +218,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; - return icmpv6_error_message(net, skb, dataoff, ctinfo, hooknum); + return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4469d45261f4..18d77b5c351a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -83,6 +83,19 @@ config NF_CONNTRACK_SECMARK If unsure, say 'N'. +config NF_CONNTRACK_ZONES + bool 'Connection tracking zones' + depends on NETFILTER_ADVANCED + depends on NETFILTER_XT_TARGET_CT + help + This option enables support for connection tracking zones. + Normally, each connection needs to have a unique system wide + identity. Connection tracking zones allow to have multiple + connections using the same identity, as long as they are + contained in different zones. + + If unsure, say `N'. + config NF_CONNTRACK_EVENTS bool "Connection tracking events" depends on NETFILTER_ADVANCED diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 65351ed5d815..0c9bbe93cc16 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -68,7 +69,7 @@ static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, - unsigned int size, unsigned int rnd) + u16 zone, unsigned int size, unsigned int rnd) { unsigned int n; u_int32_t h; @@ -79,16 +80,16 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, */ n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); h = jhash2((u32 *)tuple, n, - rnd ^ (((__force __u16)tuple->dst.u.all << 16) | - tuple->dst.protonum)); + zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) | + tuple->dst.protonum)); return ((u64)h * size) >> 32; } -static inline u_int32_t hash_conntrack(const struct net *net, +static inline u_int32_t hash_conntrack(const struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple) { - return __hash_conntrack(tuple, net->ct.htable_size, + return __hash_conntrack(tuple, zone, net->ct.htable_size, nf_conntrack_hash_rnd); } @@ -292,11 +293,12 @@ static void death_by_timeout(unsigned long ul_conntrack) * - Caller must lock nf_conntrack_lock before calling this function */ struct nf_conntrack_tuple_hash * -__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) +__nf_conntrack_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(net, tuple); + unsigned int hash = hash_conntrack(net, zone, tuple); /* Disable BHs the entire time since we normally need to disable them * at least once for the stats anyway. @@ -304,7 +306,8 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) local_bh_disable(); begin: hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { - if (nf_ct_tuple_equal(tuple, &h->tuple)) { + if (nf_ct_tuple_equal(tuple, &h->tuple) && + nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) { NF_CT_STAT_INC(net, found); local_bh_enable(); return h; @@ -326,21 +329,23 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find); /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) +nf_conntrack_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; rcu_read_lock(); begin: - h = __nf_conntrack_find(net, tuple); + h = __nf_conntrack_find(net, zone, tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); if (unlikely(nf_ct_is_dying(ct) || !atomic_inc_not_zero(&ct->ct_general.use))) h = NULL; else { - if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) { + if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) || + nf_ct_zone(ct) != zone)) { nf_ct_put(ct); goto begin; } @@ -368,9 +373,11 @@ void nf_conntrack_hash_insert(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); unsigned int hash, repl_hash; + u16 zone; - hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + zone = nf_ct_zone(ct); + hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); __nf_conntrack_hash_insert(ct, hash, repl_hash); } @@ -387,6 +394,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct hlist_nulls_node *n; enum ip_conntrack_info ctinfo; struct net *net; + u16 zone; ct = nf_ct_get(skb, &ctinfo); net = nf_ct_net(ct); @@ -398,8 +406,9 @@ __nf_conntrack_confirm(struct sk_buff *skb) if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return NF_ACCEPT; - hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + zone = nf_ct_zone(ct); + hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); /* We're not in hash table, and we refuse to set up related connections for unconfirmed conns. But packet copies and @@ -418,11 +427,13 @@ __nf_conntrack_confirm(struct sk_buff *skb) not in the hash. If there is, we lost race. */ hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &h->tuple)) + &h->tuple) && + zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, - &h->tuple)) + &h->tuple) && + zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; /* Remove from unconfirmed list */ @@ -469,15 +480,19 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, struct net *net = nf_ct_net(ignored_conntrack); struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(net, tuple); + struct nf_conn *ct; + u16 zone = nf_ct_zone(ignored_conntrack); + unsigned int hash = hash_conntrack(net, zone, tuple); /* Disable BHs the entire time since we need to disable them at * least once for the stats anyway. */ rcu_read_lock_bh(); hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { - if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && - nf_ct_tuple_equal(tuple, &h->tuple)) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (ct != ignored_conntrack && + nf_ct_tuple_equal(tuple, &h->tuple) && + nf_ct_zone(ct) == zone) { NF_CT_STAT_INC(net, found); rcu_read_unlock_bh(); return 1; @@ -540,7 +555,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) return dropped; } -struct nf_conn *nf_conntrack_alloc(struct net *net, +struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp) @@ -558,7 +573,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, if (nf_conntrack_max && unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { - unsigned int hash = hash_conntrack(net, orig); + unsigned int hash = hash_conntrack(net, zone, orig); if (!early_drop(net, hash)) { atomic_dec(&net->ct.count); if (net_ratelimit()) @@ -595,13 +610,28 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, #ifdef CONFIG_NET_NS ct->ct_net = net; #endif - +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (zone) { + struct nf_conntrack_zone *nf_ct_zone; + + nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC); + if (!nf_ct_zone) + goto out_free; + nf_ct_zone->id = zone; + } +#endif /* * changes to lookup keys must be done before setting refcnt to 1 */ smp_wmb(); atomic_set(&ct->ct_general.use, 1); return ct; + +#ifdef CONFIG_NF_CONNTRACK_ZONES +out_free: + kmem_cache_free(net->ct.nf_conntrack_cachep, ct); + return ERR_PTR(-ENOMEM); +#endif } EXPORT_SYMBOL_GPL(nf_conntrack_alloc); @@ -631,13 +661,14 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_tuple repl_tuple; struct nf_conntrack_ecache *ecache; struct nf_conntrack_expect *exp; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { pr_debug("Can't invert tuple.\n"); return NULL; } - ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC); + ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC); if (IS_ERR(ct)) { pr_debug("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)ct; @@ -657,7 +688,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, GFP_ATOMIC); spin_lock_bh(&nf_conntrack_lock); - exp = nf_ct_find_expectation(net, tuple); + exp = nf_ct_find_expectation(net, zone, tuple); if (exp) { pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", ct, exp); @@ -713,6 +744,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, protonum, &tuple, l3proto, @@ -722,7 +754,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, } /* look for tuple match */ - h = nf_conntrack_find_get(net, &tuple); + h = nf_conntrack_find_get(net, zone, &tuple); if (!h) { h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, skb, dataoff); @@ -958,6 +990,14 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, } EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); +#ifdef CONFIG_NF_CONNTRACK_ZONES +static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = { + .len = sizeof(struct nf_conntrack_zone), + .align = __alignof__(struct nf_conntrack_zone), + .id = NF_CT_EXT_ZONE, +}; +#endif + #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) #include @@ -1139,6 +1179,9 @@ static void nf_conntrack_cleanup_init_net(void) nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); +#ifdef CONFIG_NF_CONNTRACK_ZONES + nf_ct_extend_unregister(&nf_ct_zone_extend); +#endif } static void nf_conntrack_cleanup_net(struct net *net) @@ -1214,6 +1257,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) unsigned int hashsize, old_size; struct hlist_nulls_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; if (current->nsproxy->net_ns != &init_net) return -EOPNOTSUPP; @@ -1240,8 +1284,10 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) while (!hlist_nulls_empty(&init_net.ct.hash[i])) { h = hlist_nulls_entry(init_net.ct.hash[i].first, struct nf_conntrack_tuple_hash, hnnode); + ct = nf_ct_tuplehash_to_ctrack(h); hlist_nulls_del_rcu(&h->hnnode); - bucket = __hash_conntrack(&h->tuple, hashsize, + bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct), + hashsize, nf_conntrack_hash_rnd); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } @@ -1299,6 +1345,11 @@ static int nf_conntrack_init_init_net(void) if (ret < 0) goto err_helper; +#ifdef CONFIG_NF_CONNTRACK_ZONES + ret = nf_ct_extend_register(&nf_ct_zone_extend); + if (ret < 0) + goto err_extend; +#endif /* Set up fake conntrack: to never be deleted, not in any hashes */ #ifdef CONFIG_NET_NS nf_conntrack_untracked.ct_net = &init_net; @@ -1309,6 +1360,10 @@ static int nf_conntrack_init_init_net(void) return 0; +#ifdef CONFIG_NF_CONNTRACK_ZONES +err_extend: + nf_conntrack_helper_fini(); +#endif err_helper: nf_conntrack_proto_fini(); err_proto: diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 6182fb1b55de..acb29ccaa41f 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -27,6 +27,7 @@ #include #include #include +#include unsigned int nf_ct_expect_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); @@ -84,7 +85,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple } struct nf_conntrack_expect * -__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple) +__nf_ct_expect_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; struct hlist_node *n; @@ -95,7 +97,8 @@ __nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple) h = nf_ct_expect_dst_hash(tuple); hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) { - if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) + if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && + nf_ct_zone(i->master) == zone) return i; } return NULL; @@ -104,12 +107,13 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find); /* Just find a expectation corresponding to a tuple. */ struct nf_conntrack_expect * -nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) +nf_ct_expect_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; rcu_read_lock(); - i = __nf_ct_expect_find(net, tuple); + i = __nf_ct_expect_find(net, zone, tuple); if (i && !atomic_inc_not_zero(&i->use)) i = NULL; rcu_read_unlock(); @@ -121,7 +125,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); /* If an expectation for this connection is found, it gets delete from * global list then returned. */ struct nf_conntrack_expect * -nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple) +nf_ct_find_expectation(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i, *exp = NULL; struct hlist_node *n; @@ -133,7 +138,8 @@ nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple) h = nf_ct_expect_dst_hash(tuple); hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && - nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { + nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && + nf_ct_zone(i->master) == zone) { exp = i; break; } @@ -204,7 +210,8 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, { return a->master == b->master && a->class == b->class && nf_ct_tuple_equal(&a->tuple, &b->tuple) && - nf_ct_tuple_mask_equal(&a->mask, &b->mask); + nf_ct_tuple_mask_equal(&a->mask, &b->mask) && + nf_ct_zone(a->master) == nf_ct_zone(b->master); } /* Generally a bad idea to call this: could have matched already. */ diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 66369490230e..a1c8dd917e12 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -29,6 +29,7 @@ #include #include #include +#include #include /* Parameters */ @@ -1216,7 +1217,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, tuple.dst.u.tcp.port = port; tuple.dst.protonum = IPPROTO_TCP; - exp = __nf_ct_expect_find(net, &tuple); + exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); if (exp && exp->master == ct) return exp; return NULL; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index db35edac307b..51089cfe1167 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -811,7 +811,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(net, &tuple); + h = nf_conntrack_find_get(net, 0, &tuple); if (!h) return -ENOENT; @@ -872,7 +872,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(net, &tuple); + h = nf_conntrack_find_get(net, 0, &tuple); if (!h) return -ENOENT; @@ -1221,7 +1221,7 @@ ctnetlink_create_conntrack(struct net *net, int err = -EINVAL; struct nf_conntrack_helper *helper; - ct = nf_conntrack_alloc(net, otuple, rtuple, GFP_ATOMIC); + ct = nf_conntrack_alloc(net, 0, otuple, rtuple, GFP_ATOMIC); if (IS_ERR(ct)) return ERR_PTR(-ENOMEM); @@ -1325,7 +1325,7 @@ ctnetlink_create_conntrack(struct net *net, if (err < 0) goto err2; - master_h = nf_conntrack_find_get(net, &master); + master_h = nf_conntrack_find_get(net, 0, &master); if (master_h == NULL) { err = -ENOENT; goto err2; @@ -1374,9 +1374,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) - h = __nf_conntrack_find(net, &otuple); + h = __nf_conntrack_find(net, 0, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = __nf_conntrack_find(net, &rtuple); + h = __nf_conntrack_find(net, 0, &rtuple); if (h == NULL) { err = -ENOENT; @@ -1714,7 +1714,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = nf_ct_expect_find_get(net, &tuple); + exp = nf_ct_expect_find_get(net, 0, &tuple); if (!exp) return -ENOENT; @@ -1770,7 +1770,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return err; /* bump usage count to 2 */ - exp = nf_ct_expect_find_get(net, &tuple); + exp = nf_ct_expect_find_get(net, 0, &tuple); if (!exp) return -ENOENT; @@ -1855,7 +1855,7 @@ ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[], return err; /* Look for master conntrack of this expectation */ - h = nf_conntrack_find_get(net, &master_tuple); + h = nf_conntrack_find_get(net, 0, &master_tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); @@ -1912,7 +1912,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, return err; spin_lock_bh(&nf_conntrack_lock); - exp = __nf_ct_expect_find(net, &tuple); + exp = __nf_ct_expect_find(net, 0, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 3807ac7faf4c..088944824e13 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -123,7 +124,7 @@ static void pptp_expectfn(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple(&inv_t); - exp_other = nf_ct_expect_find_get(net, &inv_t); + exp_other = nf_ct_expect_find_get(net, nf_ct_zone(ct), &inv_t); if (exp_other) { /* delete other expectation. */ pr_debug("found\n"); @@ -136,17 +137,18 @@ static void pptp_expectfn(struct nf_conn *ct, rcu_read_unlock(); } -static int destroy_sibling_or_exp(struct net *net, +static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, const struct nf_conntrack_tuple *t) { const struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; struct nf_conn *sibling; + u16 zone = nf_ct_zone(ct); pr_debug("trying to timeout ct or exp for tuple "); nf_ct_dump_tuple(t); - h = nf_conntrack_find_get(net, t); + h = nf_conntrack_find_get(net, zone, t); if (h) { sibling = nf_ct_tuplehash_to_ctrack(h); pr_debug("setting timeout of conntrack %p to 0\n", sibling); @@ -157,7 +159,7 @@ static int destroy_sibling_or_exp(struct net *net, nf_ct_put(sibling); return 1; } else { - exp = nf_ct_expect_find_get(net, t); + exp = nf_ct_expect_find_get(net, zone, t); if (exp) { pr_debug("unexpect_related of expect %p\n", exp); nf_ct_unexpect_related(exp); @@ -182,7 +184,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.dst.protonum = IPPROTO_GRE; t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id; - if (!destroy_sibling_or_exp(net, &t)) + if (!destroy_sibling_or_exp(net, ct, &t)) pr_debug("failed to timeout original pns->pac ct/exp\n"); /* try reply (pac->pns) tuple */ @@ -190,7 +192,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.dst.protonum = IPPROTO_GRE; t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id; - if (!destroy_sibling_or_exp(net, &t)) + if (!destroy_sibling_or_exp(net, ct, &t)) pr_debug("failed to timeout reply pac->pns ct/exp\n"); } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index fbe8ff5a420a..8dd75d90efc0 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -23,6 +23,7 @@ #include #include #include +#include #include MODULE_LICENSE("GPL"); @@ -836,7 +837,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff, rcu_read_lock(); do { - exp = __nf_ct_expect_find(net, &tuple); + exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); if (!exp || exp->master == ct || nfct_help(exp->master)->helper != nfct_help(ct)->helper || diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index e310f1561bb2..24a42efe62ef 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -26,6 +26,7 @@ #include #include #include +#include MODULE_LICENSE("GPL"); @@ -171,6 +172,11 @@ static int ct_seq_show(struct seq_file *s, void *v) goto release; #endif +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (seq_printf(s, "zone=%u ", nf_ct_zone(ct))) + goto release; +#endif + if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) goto release; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 8183a054256f..61c50fa84703 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -16,6 +16,7 @@ #include #include #include +#include static unsigned int xt_ct_target(struct sk_buff *skb, const struct xt_target_param *par) @@ -69,11 +70,16 @@ static bool xt_ct_tg_check(const struct xt_tgchk_param *par) goto out; } +#ifndef CONFIG_NF_CONNTRACK_ZONES + if (info->zone) + goto err1; +#endif + if (nf_ct_l3proto_try_module_get(par->family) < 0) goto err1; memset(&t, 0, sizeof(t)); - ct = nf_conntrack_alloc(par->net, &t, &t, GFP_KERNEL); + ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL); if (IS_ERR(ct)) goto err2; diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 0d9d18ea2b09..26997ce90e48 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -28,6 +28,7 @@ #include #include #include +#include /* we will save the tuples of all connections we care about */ struct xt_connlimit_conn { @@ -114,7 +115,8 @@ static int count_them(struct net *net, /* check the saved connections */ list_for_each_entry_safe(conn, tmp, hash, list) { - found = nf_conntrack_find_get(net, &conn->tuple); + found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, + &conn->tuple); found_ct = NULL; if (found != NULL) -- cgit v1.2.3 From 026331c4d9b526561ea96f95fac4bfc52b69e316 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 15 Feb 2010 12:53:10 +0200 Subject: cfg80211/mac80211: allow registering for and sending action frames This implements a new command to register for action frames that userspace wants to handle instead of the in-kernel rejection. It is then responsible for rejecting ones that it decided not to handle. There is no unregistration, but the socket can be closed for that. Frames that are not registered for will not be forwarded to userspace and will be rejected by the kernel, the cfg80211 API helps implementing that. Additionally, this patch adds a new command that allows doing action frame transmission from userspace. It can be used either to exchange action frames on the current operational channel (e.g., with the AP with which we are currently associated) or to exchange off-channel Public Action frames with the remain-on-channel command. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 41 ++++++- include/net/cfg80211.h | 47 +++++++- include/net/mac80211.h | 6 +- net/mac80211/cfg.c | 12 ++- net/mac80211/ieee80211_i.h | 6 +- net/mac80211/mlme.c | 35 ++++++ net/mac80211/rx.c | 42 ++++++-- net/mac80211/status.c | 7 +- net/wireless/core.c | 4 + net/wireless/core.h | 9 ++ net/wireless/mlme.c | 166 +++++++++++++++++++++++++++++ net/wireless/nl80211.c | 260 ++++++++++++++++++++++++++++++++++++++++++++- net/wireless/nl80211.h | 8 ++ 13 files changed, 625 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 127a73015760..8e6384f8fda6 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -3,7 +3,7 @@ /* * 802.11 netlink interface public header * - * Copyright 2006, 2007, 2008 Johannes Berg + * Copyright 2006-2010 Johannes Berg * Copyright 2008 Michael Wu * Copyright 2008 Luis Carlos Cobo * Copyright 2008 Michael Buesch @@ -299,6 +299,31 @@ * rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface * and @NL80211_ATTR_TX_RATES the set of allowed rates. * + * @NL80211_CMD_REGISTER_ACTION: Register for receiving certain action frames + * (via @NL80211_CMD_ACTION) for processing in userspace. This command + * requires an interface index and a match attribute containing the first + * few bytes of the frame that should match, e.g. a single byte for only + * a category match or four bytes for vendor frames including the OUI. + * The registration cannot be dropped, but is removed automatically + * when the netlink socket is closed. Multiple registrations can be made. + * @NL80211_CMD_ACTION: Action frame TX request and RX notification. This + * command is used both as a request to transmit an Action frame and as an + * event indicating reception of an Action frame that was not processed in + * kernel code, but is for us (i.e., which may need to be processed in a + * user space application). %NL80211_ATTR_FRAME is used to specify the + * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ (and + * optionally %NL80211_ATTR_WIPHY_CHANNEL_TYPE) is used to indicate on + * which channel the frame is to be transmitted or was received. This + * channel has to be the current channel (remain-on-channel or the + * operational channel). When called, this operation returns a cookie + * (%NL80211_ATTR_COOKIE) that will be included with the TX status event + * pertaining to the TX request. + * @NL80211_CMD_ACTION_TX_STATUS: Report TX status of an Action frame + * transmitted with %NL80211_CMD_ACTION. %NL80211_ATTR_COOKIE identifies + * the TX command and %NL80211_ATTR_FRAME includes the contents of the + * frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged + * the frame. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -387,6 +412,10 @@ enum nl80211_commands { NL80211_CMD_SET_TX_BITRATE_MASK, + NL80211_CMD_REGISTER_ACTION, + NL80211_CMD_ACTION, + NL80211_CMD_ACTION_TX_STATUS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -653,6 +682,12 @@ enum nl80211_commands { * rates based on negotiated supported rates information. This attribute * is used with %NL80211_CMD_SET_TX_BITRATE_MASK. * + * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain + * at least one byte, currently used with @NL80211_CMD_REGISTER_ACTION. + * + * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was + * acknowledged by the recipient. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -798,6 +833,10 @@ enum nl80211_attrs { NL80211_ATTR_TX_RATES, + NL80211_ATTR_FRAME_MATCH, + + NL80211_ATTR_ACK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5b3569b2a74c..7188934b64d3 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3,7 +3,7 @@ /* * 802.11 device and configuration interface * - * Copyright 2006-2009 Johannes Berg + * Copyright 2006-2010 Johannes Berg * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -998,6 +998,7 @@ struct cfg80211_pmksa { * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation. * This allows the operation to be terminated prior to timeout based on * the duration value. + * @action: Transmit an action frame * * @testmode_cmd: run a test mode command * @@ -1144,6 +1145,11 @@ struct cfg80211_ops { struct net_device *dev, u64 cookie); + int (*action)(struct wiphy *wiphy, struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + const u8 *buf, size_t len, u64 *cookie); + /* some temporary stuff to finish wext */ int (*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout); @@ -1445,6 +1451,8 @@ struct cfg80211_cached_keys; * set by driver (if supported) on add_interface BEFORE registering the * netdev and may otherwise be used by driver read-only, will be update * by cfg80211 on change_interface + * @action_registrations: list of registrations for action frames + * @action_registrations_lock: lock for the list */ struct wireless_dev { struct wiphy *wiphy; @@ -1454,6 +1462,9 @@ struct wireless_dev { struct list_head list; struct net_device *netdev; + struct list_head action_registrations; + spinlock_t action_registrations_lock; + struct mutex mtx; struct work_struct cleanup_work; @@ -2291,4 +2302,38 @@ void cfg80211_remain_on_channel_expired(struct net_device *dev, void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp); +/** + * cfg80211_rx_action - notification of received, unprocessed Action frame + * @dev: network device + * @freq: Frequency on which the frame was received in MHz + * @buf: Action frame (header + body) + * @len: length of the frame data + * @gfp: context flags + * Returns %true if a user space application is responsible for rejecting the + * unrecognized Action frame; %false if no such application is registered + * (i.e., the driver is responsible for rejecting the unrecognized Action + * frame) + * + * This function is called whenever an Action frame is received for a station + * mode interface, but is not processed in kernel. + */ +bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, + size_t len, gfp_t gfp); + +/** + * cfg80211_action_tx_status - notification of TX status for Action frame + * @dev: network device + * @cookie: Cookie returned by cfg80211_ops::action() + * @buf: Action frame (header + body) + * @len: length of the frame data + * @ack: Whether frame was acknowledged + * @gfp: context flags + * + * This function is called whenever an Action frame was requested to be + * transmitted with cfg80211_ops::action() to report the TX status of the + * transmission attempt. + */ +void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, + const u8 *buf, size_t len, bool ack, gfp_t gfp); + #endif /* __NET_CFG80211_H */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 314e98173166..80eb7cc42ce9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3,7 +3,7 @@ * * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc - * Copyright 2007-2008 Johannes Berg + * Copyright 2007-2010 Johannes Berg * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -264,6 +264,9 @@ struct ieee80211_bss_conf { * be modified again (no seqno assignment, crypto, etc.) * @IEEE80211_TX_INTFL_HAS_RADIOTAP: This frame was injected and still * has a radiotap header at skb->data. + * @IEEE80211_TX_INTFL_NL80211_FRAME_TX: Frame was requested through nl80211 + * MLME command (internal to mac80211 to figure out whether to send TX + * status to user space) */ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_REQ_TX_STATUS = BIT(0), @@ -286,6 +289,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_MORE_FRAMES = BIT(18), IEEE80211_TX_INTFL_RETRANSMISSION = BIT(19), IEEE80211_TX_INTFL_HAS_RADIOTAP = BIT(20), + IEEE80211_TX_INTFL_NL80211_FRAME_TX = BIT(21), }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e1731b7c2523..b7116ef84a3b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1,7 +1,7 @@ /* * mac80211 configuration hooks for cfg80211 * - * Copyright 2006, 2007 Johannes Berg + * Copyright 2006-2010 Johannes Berg * * This file is GPLv2 as found in COPYING. */ @@ -1448,6 +1448,15 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, return ieee80211_wk_cancel_remain_on_channel(sdata, cookie); } +static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + const u8 *buf, size_t len, u64 *cookie) +{ + return ieee80211_mgd_action(IEEE80211_DEV_TO_SUB_IF(dev), chan, + channel_type, buf, len, cookie); +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1496,4 +1505,5 @@ struct cfg80211_ops mac80211_config_ops = { .set_bitrate_mask = ieee80211_set_bitrate_mask, .remain_on_channel = ieee80211_remain_on_channel, .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, + .action = ieee80211_action, }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9dd98b674cbc..241533e1bc03 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2,7 +2,7 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc - * Copyright 2007-2008 Johannes Berg + * Copyright 2007-2010 Johannes Berg * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -966,6 +966,10 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, struct cfg80211_disassoc_request *req, void *cookie); +int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + const u8 *buf, size_t len, u64 *cookie); ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); void ieee80211_send_pspoll(struct ieee80211_local *local, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index bfc4a5070013..41812a15eea0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2084,3 +2084,38 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, return 0; } + +int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + const u8 *buf, size_t len, u64 *cookie) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + struct sk_buff *skb; + + /* Check that we are on the requested channel for transmission */ + if ((chan != local->tmp_channel || + channel_type != local->tmp_channel_type) && + (chan != local->oper_channel || + channel_type != local->oper_channel_type)) + return -EBUSY; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + len); + if (!skb) + return -ENOMEM; + skb_reserve(skb, local->hw.extra_tx_headroom); + + memcpy(skb_put(skb, len), buf, len); + + if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED)) + IEEE80211_SKB_CB(skb)->flags |= + IEEE80211_TX_INTFL_DONT_ENCRYPT; + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_NL80211_FRAME_TX | + IEEE80211_TX_CTL_REQ_TX_STATUS; + skb->dev = sdata->dev; + ieee80211_tx_skb(sdata, skb); + + *cookie = (unsigned long) skb; + return 0; +} diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index a177472adc13..a6080d8d72bb 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1856,28 +1856,25 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; struct sk_buff *nskb; + struct ieee80211_rx_status *status; int len = rx->skb->len; if (!ieee80211_is_action(mgmt->frame_control)) return RX_CONTINUE; - if (!rx->sta) + /* drop too small frames */ + if (len < IEEE80211_MIN_ACTION_SIZE) return RX_DROP_UNUSABLE; - if (!(rx->flags & IEEE80211_RX_RA_MATCH)) + if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) return RX_DROP_UNUSABLE; - if (ieee80211_drop_unencrypted(rx, mgmt->frame_control)) + if (!(rx->flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_UNUSABLE; - /* drop too small frames */ - if (len < IEEE80211_MIN_ACTION_SIZE) + if (ieee80211_drop_unencrypted(rx, mgmt->frame_control)) return RX_DROP_UNUSABLE; - /* return action frames that have *only* category */ - if (len < IEEE80211_MIN_ACTION_SIZE + 1) - goto return_frame; - switch (mgmt->u.action.category) { case WLAN_CATEGORY_BACK: /* @@ -1891,6 +1888,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) sdata->vif.type != NL80211_IFTYPE_AP) break; + /* verify action_code is present */ + if (len < IEEE80211_MIN_ACTION_SIZE + 1) + break; + switch (mgmt->u.action.u.addba_req.action_code) { case WLAN_ACTION_ADDBA_REQ: if (len < (IEEE80211_MIN_ACTION_SIZE + @@ -1919,6 +1920,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (sdata->vif.type != NL80211_IFTYPE_STATION) break; + /* verify action_code is present */ + if (len < IEEE80211_MIN_ACTION_SIZE + 1) + break; + switch (mgmt->u.action.u.measurement.action_code) { case WLAN_ACTION_SPCT_MSR_REQ: if (len < (IEEE80211_MIN_ACTION_SIZE + @@ -1954,7 +1959,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } break; } - return_frame: + /* * For AP mode, hostapd is responsible for handling any action * frames that we didn't handle, including returning unknown @@ -1966,6 +1971,20 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) sdata->vif.type == NL80211_IFTYPE_AP_VLAN) return RX_DROP_MONITOR; + /* + * Getting here means the kernel doesn't know how to handle + * it, but maybe userspace does ... include returned frames + * so userspace can register for those to know whether ones + * it transmitted were processed or returned. + */ + status = IEEE80211_SKB_RXCB(rx->skb); + + if (sdata->vif.type == NL80211_IFTYPE_STATION && + cfg80211_rx_action(rx->sdata->dev, status->freq, + rx->skb->data, rx->skb->len, + GFP_ATOMIC)) + goto handled; + /* do not return rejected action frames */ if (mgmt->u.action.category & 0x80) return RX_DROP_UNUSABLE; @@ -1985,7 +2004,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } handled: - rx->sta->rx_packets++; + if (rx->sta) + rx->sta->rx_packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; } diff --git a/net/mac80211/status.c b/net/mac80211/status.c index ded98730c111..56d5b9a6ec5b 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -2,7 +2,7 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc - * Copyright 2008-2009 Johannes Berg + * Copyright 2008-2010 Johannes Berg * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -288,6 +288,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) msecs_to_jiffies(10)); } + if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) + cfg80211_action_tx_status( + skb->dev, (unsigned long) skb, skb->data, skb->len, + !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); + /* this was a transmitted frame, but now we want to reuse it */ skb_orphan(skb); diff --git a/net/wireless/core.c b/net/wireless/core.c index 71b6b3a9cf1f..51908dc2ea00 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -677,6 +677,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work); INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); + INIT_LIST_HEAD(&wdev->action_registrations); + spin_lock_init(&wdev->action_registrations_lock); + mutex_lock(&rdev->devlist_mtx); list_add_rcu(&wdev->list, &rdev->netdev_list); rdev->devlist_generation++; @@ -792,6 +795,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, sysfs_remove_link(&dev->dev.kobj, "phy80211"); list_del_rcu(&wdev->list); rdev->devlist_generation++; + cfg80211_mlme_purge_actions(wdev); #ifdef CONFIG_CFG80211_WEXT kfree(wdev->wext.keys); #endif diff --git a/net/wireless/core.h b/net/wireless/core.h index c326a667022a..d52da913145a 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -329,6 +329,15 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, const u8 *resp_ie, size_t resp_ie_len, u16 status, bool wextev, struct cfg80211_bss *bss); +int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, + const u8 *match_data, int match_len); +void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid); +void cfg80211_mlme_purge_actions(struct wireless_dev *wdev); +int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + const u8 *buf, size_t len, u64 *cookie); /* SME */ int __cfg80211_connect(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 94d151f6f73e..62bc8855e123 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -728,3 +728,169 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, nl80211_send_sta_event(rdev, dev, mac_addr, sinfo, gfp); } EXPORT_SYMBOL(cfg80211_new_sta); + +struct cfg80211_action_registration { + struct list_head list; + + u32 nlpid; + + int match_len; + + u8 match[]; +}; + +int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, + const u8 *match_data, int match_len) +{ + struct cfg80211_action_registration *reg, *nreg; + int err = 0; + + nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL); + if (!nreg) + return -ENOMEM; + + spin_lock_bh(&wdev->action_registrations_lock); + + list_for_each_entry(reg, &wdev->action_registrations, list) { + int mlen = min(match_len, reg->match_len); + + if (memcmp(reg->match, match_data, mlen) == 0) { + err = -EALREADY; + break; + } + } + + if (err) { + kfree(nreg); + goto out; + } + + memcpy(nreg->match, match_data, match_len); + nreg->match_len = match_len; + nreg->nlpid = snd_pid; + list_add(&nreg->list, &wdev->action_registrations); + + out: + spin_unlock_bh(&wdev->action_registrations_lock); + return err; +} + +void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid) +{ + struct cfg80211_action_registration *reg, *tmp; + + spin_lock_bh(&wdev->action_registrations_lock); + + list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { + if (reg->nlpid == nlpid) { + list_del(®->list); + kfree(reg); + } + } + + spin_unlock_bh(&wdev->action_registrations_lock); +} + +void cfg80211_mlme_purge_actions(struct wireless_dev *wdev) +{ + struct cfg80211_action_registration *reg, *tmp; + + spin_lock_bh(&wdev->action_registrations_lock); + + list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { + list_del(®->list); + kfree(reg); + } + + spin_unlock_bh(&wdev->action_registrations_lock); +} + +int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + const u8 *buf, size_t len, u64 *cookie) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + const struct ieee80211_mgmt *mgmt; + + if (rdev->ops->action == NULL) + return -EOPNOTSUPP; + if (len < 24 + 1) + return -EINVAL; + + mgmt = (const struct ieee80211_mgmt *) buf; + if (!ieee80211_is_action(mgmt->frame_control)) + return -EINVAL; + if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) { + /* Verify that we are associated with the destination AP */ + if (!wdev->current_bss || + memcmp(wdev->current_bss->pub.bssid, mgmt->bssid, + ETH_ALEN) != 0 || + memcmp(wdev->current_bss->pub.bssid, mgmt->da, + ETH_ALEN) != 0) + return -ENOTCONN; + } + + if (memcmp(mgmt->sa, dev->dev_addr, ETH_ALEN) != 0) + return -EINVAL; + + /* Transmit the Action frame as requested by user space */ + return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type, + buf, len, cookie); +} + +bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, + size_t len, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_action_registration *reg; + const u8 *action_data; + int action_data_len; + bool result = false; + + /* frame length - min size excluding category */ + action_data_len = len - (IEEE80211_MIN_ACTION_SIZE - 1); + + /* action data starts with category */ + action_data = buf + IEEE80211_MIN_ACTION_SIZE - 1; + + spin_lock_bh(&wdev->action_registrations_lock); + + list_for_each_entry(reg, &wdev->action_registrations, list) { + if (reg->match_len > action_data_len) + continue; + + if (memcmp(reg->match, action_data, reg->match_len)) + continue; + + /* found match! */ + + /* Indicate the received Action frame to user space */ + if (nl80211_send_action(rdev, dev, reg->nlpid, freq, + buf, len, gfp)) + continue; + + result = true; + break; + } + + spin_unlock_bh(&wdev->action_registrations_lock); + + return result; +} +EXPORT_SYMBOL(cfg80211_rx_action); + +void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, + const u8 *buf, size_t len, bool ack, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + /* Indicate TX status of the Action frame to user space */ + nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp); +} +EXPORT_SYMBOL(cfg80211_action_tx_status); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a95ab9e4c19e..328112081358 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1,7 +1,7 @@ /* * This is the new netlink-based wireless configuration interface. * - * Copyright 2006-2009 Johannes Berg + * Copyright 2006-2010 Johannes Berg */ #include @@ -145,6 +145,9 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_DURATION] = { .type = NLA_U32 }, [NL80211_ATTR_COOKIE] = { .type = NLA_U64 }, [NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED }, + [NL80211_ATTR_FRAME] = { .type = NLA_BINARY, + .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, }, }; /* policy for the attributes */ @@ -577,6 +580,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(flush_pmksa, FLUSH_PMKSA); CMD(remain_on_channel, REMAIN_ON_CHANNEL); CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); + CMD(action, ACTION); if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); @@ -4526,6 +4530,139 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, return err; } +static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + struct net_device *dev; + int err; + + if (!info->attrs[NL80211_ATTR_FRAME_MATCH]) + return -EINVAL; + + if (nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]) < 1) + return -EINVAL; + + rtnl_lock(); + + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); + if (err) + goto unlock_rtnl; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + err = -EOPNOTSUPP; + goto out; + } + + /* not much point in registering if we can't reply */ + if (!rdev->ops->action) { + err = -EOPNOTSUPP; + goto out; + } + + err = cfg80211_mlme_register_action(dev->ieee80211_ptr, info->snd_pid, + nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), + nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); + out: + cfg80211_unlock_rdev(rdev); + dev_put(dev); + unlock_rtnl: + rtnl_unlock(); + return err; +} + +static int nl80211_action(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + struct net_device *dev; + struct ieee80211_channel *chan; + enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; + u32 freq; + int err; + void *hdr; + u64 cookie; + struct sk_buff *msg; + + if (!info->attrs[NL80211_ATTR_FRAME] || + !info->attrs[NL80211_ATTR_WIPHY_FREQ]) + return -EINVAL; + + rtnl_lock(); + + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); + if (err) + goto unlock_rtnl; + + if (!rdev->ops->action) { + err = -EOPNOTSUPP; + goto out; + } + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + err = -EOPNOTSUPP; + goto out; + } + + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { + channel_type = nla_get_u32( + info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]); + if (channel_type != NL80211_CHAN_NO_HT && + channel_type != NL80211_CHAN_HT20 && + channel_type != NL80211_CHAN_HT40PLUS && + channel_type != NL80211_CHAN_HT40MINUS) + err = -EINVAL; + goto out; + } + + freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); + chan = rdev_freq_to_chan(rdev, freq, channel_type); + if (chan == NULL) { + err = -EINVAL; + goto out; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + err = -ENOMEM; + goto out; + } + + hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + NL80211_CMD_ACTION); + + if (IS_ERR(hdr)) { + err = PTR_ERR(hdr); + goto free_msg; + } + err = cfg80211_mlme_action(rdev, dev, chan, channel_type, + nla_data(info->attrs[NL80211_ATTR_FRAME]), + nla_len(info->attrs[NL80211_ATTR_FRAME]), + &cookie); + if (err) + goto free_msg; + + NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie); + + genlmsg_end(msg, hdr); + err = genlmsg_reply(msg, info); + goto out; + + nla_put_failure: + err = -ENOBUFS; + free_msg: + nlmsg_free(msg); + out: + cfg80211_unlock_rdev(rdev); + dev_put(dev); +unlock_rtnl: + rtnl_unlock(); + return err; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -4806,6 +4943,18 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_REGISTER_ACTION, + .doit = nl80211_register_action, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_ACTION, + .doit = nl80211_action, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -5478,6 +5627,110 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, nl80211_mlme_mcgrp.id, gfp); } +int nl80211_send_action(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u32 nlpid, + int freq, const u8 *buf, size_t len, gfp_t gfp) +{ + struct sk_buff *msg; + void *hdr; + int err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return -ENOMEM; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION); + if (!hdr) { + nlmsg_free(msg); + return -ENOMEM; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_FREQ, freq); + NLA_PUT(msg, NL80211_ATTR_FRAME, len, buf); + + err = genlmsg_end(msg, hdr); + if (err < 0) { + nlmsg_free(msg); + return err; + } + + err = genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + if (err < 0) + return err; + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); + return -ENOBUFS; +} + +void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u64 cookie, + const u8 *buf, size_t len, bool ack, + gfp_t gfp) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION_TX_STATUS); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + NLA_PUT(msg, NL80211_ATTR_FRAME, len, buf); + NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie); + if (ack) + NLA_PUT_FLAG(msg, NL80211_ATTR_ACK); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + +static int nl80211_netlink_notify(struct notifier_block * nb, + unsigned long state, + void *_notify) +{ + struct netlink_notify *notify = _notify; + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; + + if (state != NETLINK_URELEASE) + return NOTIFY_DONE; + + rcu_read_lock(); + + list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) + list_for_each_entry_rcu(wdev, &rdev->netdev_list, list) + cfg80211_mlme_unregister_actions(wdev, notify->pid); + + rcu_read_unlock(); + + return NOTIFY_DONE; +} + +static struct notifier_block nl80211_netlink_notifier = { + .notifier_call = nl80211_netlink_notify, +}; + /* initialisation/exit functions */ int nl80211_init(void) @@ -5511,6 +5764,10 @@ int nl80211_init(void) goto err_out; #endif + err = netlink_register_notifier(&nl80211_netlink_notifier); + if (err) + goto err_out; + return 0; err_out: genl_unregister_family(&nl80211_fam); @@ -5519,5 +5776,6 @@ int nl80211_init(void) void nl80211_exit(void) { + netlink_unregister_notifier(&nl80211_netlink_notifier); genl_unregister_family(&nl80211_fam); } diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 14855b8fb430..4ca511102c6c 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -74,4 +74,12 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp); +int nl80211_send_action(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u32 nlpid, int freq, + const u8 *buf, size_t len, gfp_t gfp); +void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u64 cookie, + const u8 *buf, size_t len, bool ack, + gfp_t gfp); + #endif /* __NET_WIRELESS_NL80211_H */ -- cgit v1.2.3 From 54716e3beb0ab20c49471348dfe399a71bfc8fd3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 14 Feb 2010 03:27:03 +0000 Subject: net neigh: Decouple per interface neighbour table controls from binary sysctls Stop computing the number of neighbour table settings we have by counting the number of binary sysctls. This behaviour was silly and meant that we could not add another neighbour table setting without also adding another binary sysctl. Don't pass the binary sysctl path for neighour table entries into neigh_sysctl_register. These parameters are no longer used and so are just dead code. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/linux/sysctl.h | 1 - include/net/neighbour.h | 1 - net/core/neighbour.c | 7 ++++--- net/ipv4/arp.c | 3 +-- net/ipv4/devinet.c | 3 +-- net/ipv6/addrconf.c | 3 +-- net/ipv6/ndisc.c | 3 +-- 7 files changed, 8 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 7c74e919cabe..f66014c90c9f 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -596,7 +596,6 @@ enum { NET_NEIGH_GC_THRESH3=16, NET_NEIGH_RETRANS_TIME_MS=17, NET_NEIGH_REACHABLE_TIME_MS=18, - __NET_NEIGH_MAX }; /* /proc/sys/net/dccp */ diff --git a/include/net/neighbour.h b/include/net/neighbour.h index b0173202cad9..7834f470d14a 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -251,7 +251,6 @@ extern void neigh_seq_stop(struct seq_file *, void *); extern int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, - int p_id, int pdev_id, char *p_name, proc_handler *proc_handler); extern void neigh_sysctl_unregister(struct neigh_parms *p); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f2efd72da799..d102f6d9abdc 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2556,9 +2556,11 @@ EXPORT_SYMBOL(neigh_app_ns); #ifdef CONFIG_SYSCTL +#define NEIGH_VARS_MAX 19 + static struct neigh_sysctl_table { struct ctl_table_header *sysctl_header; - struct ctl_table neigh_vars[__NET_NEIGH_MAX]; + struct ctl_table neigh_vars[NEIGH_VARS_MAX]; char *dev_name; } neigh_sysctl_template __read_mostly = { .neigh_vars = { @@ -2675,8 +2677,7 @@ static struct neigh_sysctl_table { }; int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, - int p_id, int pdev_id, char *p_name, - proc_handler *handler) + char *p_name, proc_handler *handler) { struct neigh_sysctl_table *t; const char *dev_name_source = NULL; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 1940b4df7699..c4dd13542802 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1284,8 +1284,7 @@ void __init arp_init(void) dev_add_pack(&arp_packet_type); arp_proc_init(); #ifdef CONFIG_SYSCTL - neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, - NET_IPV4_NEIGH, "ipv4", NULL); + neigh_sysctl_register(NULL, &arp_tbl.parms, "ipv4", NULL); #endif register_netdevice_notifier(&arp_netdev_notifier); } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b1eddee9bf94..014982b61564 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1487,8 +1487,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) static void devinet_sysctl_register(struct in_device *idev) { - neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4, - NET_IPV4_NEIGH, "ipv4", NULL); + neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL); __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, &idev->cnf); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 764ad37ca070..c79cbff54370 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4416,8 +4416,7 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) static void addrconf_sysctl_register(struct inet6_dev *idev) { - neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6, - NET_IPV6_NEIGH, "ipv6", + neigh_sysctl_register(idev->dev, idev->nd_parms, "ipv6", &ndisc_ifinfo_sysctl_change); __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, idev, &idev->cnf); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2dfec6bb8ada..8bcc4b7db3bf 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1820,8 +1820,7 @@ int __init ndisc_init(void) neigh_table_init(&nd_tbl); #ifdef CONFIG_SYSCTL - err = neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, - NET_IPV6_NEIGH, "ipv6", + err = neigh_sysctl_register(NULL, &nd_tbl.parms, "ipv6", &ndisc_ifinfo_sysctl_change); if (err) goto out_unregister_pernet; -- cgit v1.2.3 From 7d720c3e4f0c4fc152a6bf17e24244a3c85412d2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 16 Feb 2010 15:20:26 +0000 Subject: percpu: add __percpu sparse annotations to net Add __percpu sparse annotations to net. These annotations are to make sparse consider percpu variables to be in a different address space and warn if accessed without going through percpu accessors. This patch doesn't affect normal builds. The macro and type tricks around snmp stats make things a bit interesting. DEFINE/DECLARE_SNMP_STAT() macros mark the target field as __percpu and SNMP_UPD_PO_STATS() macro is updated accordingly. All snmp_mib_*() users which used to cast the argument to (void **) are updated to cast it to (void __percpu **). Signed-off-by: Tejun Heo Acked-by: David S. Miller Cc: Patrick McHardy Cc: Arnaldo Carvalho de Melo Cc: Vlad Yasevich Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- include/net/ip.h | 6 +++--- include/net/ipcomp.h | 2 +- include/net/neighbour.h | 2 +- include/net/netns/conntrack.h | 2 +- include/net/netns/core.h | 2 +- include/net/route.h | 2 +- include/net/snmp.h | 12 +++++++---- include/net/tcp.h | 2 +- net/8021q/vlan.h | 2 +- net/dccp/proto.c | 5 +++-- net/ipv4/af_inet.c | 46 +++++++++++++++++++++---------------------- net/ipv4/proc.c | 28 +++++++++++++------------- net/ipv4/route.c | 2 +- net/ipv4/tcp.c | 21 +++++++++++--------- net/ipv6/addrconf.c | 24 +++++++++++----------- net/ipv6/af_inet6.c | 28 +++++++++++++------------- net/ipv6/proc.c | 23 ++++++++++++---------- net/sctp/proc.c | 2 +- net/sctp/protocol.c | 5 +++-- net/xfrm/xfrm_ipcomp.c | 16 +++++++-------- net/xfrm/xfrm_policy.c | 6 +++--- net/xfrm/xfrm_proc.c | 3 ++- 22 files changed, 127 insertions(+), 114 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 7bc47873e3fc..503994a38ed1 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -174,9 +174,9 @@ extern struct ipv4_config ipv4_config; #define NET_ADD_STATS_BH(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd) #define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd) -extern unsigned long snmp_fold_field(void *mib[], int offt); -extern int snmp_mib_init(void *ptr[2], size_t mibsize); -extern void snmp_mib_free(void *ptr[2]); +extern unsigned long snmp_fold_field(void __percpu *mib[], int offt); +extern int snmp_mib_init(void __percpu *ptr[2], size_t mibsize); +extern void snmp_mib_free(void __percpu *ptr[2]); extern struct local_ports { seqlock_t lock; diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h index 2a1092abaa07..cc4f30cd7315 100644 --- a/include/net/ipcomp.h +++ b/include/net/ipcomp.h @@ -9,7 +9,7 @@ struct crypto_comp; struct ipcomp_data { u16 threshold; - struct crypto_comp **tfms; + struct crypto_comp * __percpu *tfms; }; struct ip_comp_hdr; diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 7834f470d14a..da1d58be31b7 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -164,7 +164,7 @@ struct neigh_table { rwlock_t lock; unsigned long last_rand; struct kmem_cache *kmem_cachep; - struct neigh_statistics *stats; + struct neigh_statistics __percpu *stats; struct neighbour **hash_buckets; unsigned int hash_mask; __u32 hash_rnd; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 63d449807d9b..d4958d4c6574 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -17,7 +17,7 @@ struct netns_ct { struct hlist_head *expect_hash; struct hlist_nulls_head unconfirmed; struct hlist_nulls_head dying; - struct ip_conntrack_stat *stat; + struct ip_conntrack_stat __percpu *stat; int sysctl_events; unsigned int sysctl_events_retry_timeout; int sysctl_acct; diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 24d4be76bbd1..78eb1ff75475 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -10,7 +10,7 @@ struct netns_core { int sysctl_somaxconn; - struct prot_inuse *inuse; + struct prot_inuse __percpu *inuse; }; #endif diff --git a/include/net/route.h b/include/net/route.h index bce6dd68d27b..2c9fba7f7731 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -101,7 +101,7 @@ struct rt_cache_stat { unsigned int out_hlist_search; }; -extern struct ip_rt_acct *ip_rt_acct; +extern struct ip_rt_acct __percpu *ip_rt_acct; struct in_device; extern int ip_rt_init(void); diff --git a/include/net/snmp.h b/include/net/snmp.h index da02ee027d69..692ee0061dc4 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -129,9 +129,9 @@ struct linux_xfrm_mib { * nonlocked_atomic_inc() primitives -AK */ #define DEFINE_SNMP_STAT(type, name) \ - __typeof__(type) *name[2] + __typeof__(type) __percpu *name[2] #define DECLARE_SNMP_STAT(type, name) \ - extern __typeof__(type) *name[2] + extern __typeof__(type) __percpu *name[2] #define SNMP_STAT_BHPTR(name) (name[0]) #define SNMP_STAT_USRPTR(name) (name[1]) @@ -148,9 +148,13 @@ struct linux_xfrm_mib { __this_cpu_add(mib[0]->mibs[field], addend) #define SNMP_ADD_STATS_USER(mib, field, addend) \ this_cpu_add(mib[1]->mibs[field], addend) +/* + * Use "__typeof__(*mib[0]) *ptr" instead of "__typeof__(mib[0]) ptr" + * to make @ptr a non-percpu pointer. + */ #define SNMP_UPD_PO_STATS(mib, basefield, addend) \ do { \ - __typeof__(mib[0]) ptr; \ + __typeof__(*mib[0]) *ptr; \ preempt_disable(); \ ptr = this_cpu_ptr((mib)[!in_softirq()]); \ ptr->mibs[basefield##PKTS]++; \ @@ -159,7 +163,7 @@ struct linux_xfrm_mib { } while (0) #define SNMP_UPD_PO_STATS_BH(mib, basefield, addend) \ do { \ - __typeof__(mib[0]) ptr = \ + __typeof__(*mib[0]) *ptr = \ __this_cpu_ptr((mib)[!in_softirq()]); \ ptr->mibs[basefield##PKTS]++; \ ptr->mibs[basefield##OCTETS] += addend;\ diff --git a/include/net/tcp.h b/include/net/tcp.h index 87d164b9bd8f..75a00c80bdda 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1189,7 +1189,7 @@ extern int tcp_v4_md5_do_del(struct sock *sk, #define tcp_twsk_md5_key(twsk) NULL #endif -extern struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *); +extern struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *); extern void tcp_free_md5sig_pool(void); extern struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu); diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 5685296017e9..6abdcac1b2e8 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -61,7 +61,7 @@ struct vlan_dev_info { struct proc_dir_entry *dent; unsigned long cnt_inc_headroom_on_tx; unsigned long cnt_encap_on_xmit; - struct vlan_rx_stats *vlan_rx_stats; + struct vlan_rx_stats __percpu *vlan_rx_stats; }; static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 85ec1cb7fd41..0ef7061920c0 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1005,12 +1005,13 @@ EXPORT_SYMBOL_GPL(dccp_shutdown); static inline int dccp_mib_init(void) { - return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib)); + return snmp_mib_init((void __percpu **)dccp_statistics, + sizeof(struct dccp_mib)); } static inline void dccp_mib_exit(void) { - snmp_mib_free((void**)dccp_statistics); + snmp_mib_free((void __percpu **)dccp_statistics); } static int thash_entries; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7d12c6a9b19b..33b7dffa7732 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1385,7 +1385,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, } EXPORT_SYMBOL_GPL(inet_ctl_sock_create); -unsigned long snmp_fold_field(void *mib[], int offt) +unsigned long snmp_fold_field(void __percpu *mib[], int offt) { unsigned long res = 0; int i; @@ -1398,7 +1398,7 @@ unsigned long snmp_fold_field(void *mib[], int offt) } EXPORT_SYMBOL_GPL(snmp_fold_field); -int snmp_mib_init(void *ptr[2], size_t mibsize) +int snmp_mib_init(void __percpu *ptr[2], size_t mibsize) { BUG_ON(ptr == NULL); ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); @@ -1416,7 +1416,7 @@ err0: } EXPORT_SYMBOL_GPL(snmp_mib_init); -void snmp_mib_free(void *ptr[2]) +void snmp_mib_free(void __percpu *ptr[2]) { BUG_ON(ptr == NULL); free_percpu(ptr[0]); @@ -1460,25 +1460,25 @@ static const struct net_protocol icmp_protocol = { static __net_init int ipv4_mib_init_net(struct net *net) { - if (snmp_mib_init((void **)net->mib.tcp_statistics, + if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, sizeof(struct tcp_mib)) < 0) goto err_tcp_mib; - if (snmp_mib_init((void **)net->mib.ip_statistics, + if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, sizeof(struct ipstats_mib)) < 0) goto err_ip_mib; - if (snmp_mib_init((void **)net->mib.net_statistics, + if (snmp_mib_init((void __percpu **)net->mib.net_statistics, sizeof(struct linux_mib)) < 0) goto err_net_mib; - if (snmp_mib_init((void **)net->mib.udp_statistics, + if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, sizeof(struct udp_mib)) < 0) goto err_udp_mib; - if (snmp_mib_init((void **)net->mib.udplite_statistics, + if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, sizeof(struct udp_mib)) < 0) goto err_udplite_mib; - if (snmp_mib_init((void **)net->mib.icmp_statistics, + if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, sizeof(struct icmp_mib)) < 0) goto err_icmp_mib; - if (snmp_mib_init((void **)net->mib.icmpmsg_statistics, + if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, sizeof(struct icmpmsg_mib)) < 0) goto err_icmpmsg_mib; @@ -1486,30 +1486,30 @@ static __net_init int ipv4_mib_init_net(struct net *net) return 0; err_icmpmsg_mib: - snmp_mib_free((void **)net->mib.icmp_statistics); + snmp_mib_free((void __percpu **)net->mib.icmp_statistics); err_icmp_mib: - snmp_mib_free((void **)net->mib.udplite_statistics); + snmp_mib_free((void __percpu **)net->mib.udplite_statistics); err_udplite_mib: - snmp_mib_free((void **)net->mib.udp_statistics); + snmp_mib_free((void __percpu **)net->mib.udp_statistics); err_udp_mib: - snmp_mib_free((void **)net->mib.net_statistics); + snmp_mib_free((void __percpu **)net->mib.net_statistics); err_net_mib: - snmp_mib_free((void **)net->mib.ip_statistics); + snmp_mib_free((void __percpu **)net->mib.ip_statistics); err_ip_mib: - snmp_mib_free((void **)net->mib.tcp_statistics); + snmp_mib_free((void __percpu **)net->mib.tcp_statistics); err_tcp_mib: return -ENOMEM; } static __net_exit void ipv4_mib_exit_net(struct net *net) { - snmp_mib_free((void **)net->mib.icmpmsg_statistics); - snmp_mib_free((void **)net->mib.icmp_statistics); - snmp_mib_free((void **)net->mib.udplite_statistics); - snmp_mib_free((void **)net->mib.udp_statistics); - snmp_mib_free((void **)net->mib.net_statistics); - snmp_mib_free((void **)net->mib.ip_statistics); - snmp_mib_free((void **)net->mib.tcp_statistics); + snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics); + snmp_mib_free((void __percpu **)net->mib.icmp_statistics); + snmp_mib_free((void __percpu **)net->mib.udplite_statistics); + snmp_mib_free((void __percpu **)net->mib.udp_statistics); + snmp_mib_free((void __percpu **)net->mib.net_statistics); + snmp_mib_free((void __percpu **)net->mib.ip_statistics); + snmp_mib_free((void __percpu **)net->mib.tcp_statistics); } static __net_initdata struct pernet_operations ipv4_mib_ops = { diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 1b09a6dde7c0..242ed2307370 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -280,7 +280,7 @@ static void icmpmsg_put(struct seq_file *seq) count = 0; for (i = 0; i < ICMPMSG_MIB_MAX; i++) { - val = snmp_fold_field((void **) net->mib.icmpmsg_statistics, i); + val = snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, i); if (val) { type[count] = i; vals[count++] = val; @@ -307,18 +307,18 @@ static void icmp_put(struct seq_file *seq) for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu", - snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), - snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); + snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), + snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **) net->mib.icmpmsg_statistics, + snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", - snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), - snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); + snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), + snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **) net->mib.icmpmsg_statistics, + snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, icmpmibmap[i].index | 0x100)); } @@ -341,7 +341,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)net->mib.ip_statistics, + snmp_fold_field((void __percpu **)net->mib.ip_statistics, snmp4_ipstats_list[i].entry)); icmp_put(seq); /* RFC 2011 compatibility */ @@ -356,11 +356,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v) /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", - snmp_fold_field((void **)net->mib.tcp_statistics, + snmp_fold_field((void __percpu **)net->mib.tcp_statistics, snmp4_tcp_list[i].entry)); else seq_printf(seq, " %lu", - snmp_fold_field((void **)net->mib.tcp_statistics, + snmp_fold_field((void __percpu **)net->mib.tcp_statistics, snmp4_tcp_list[i].entry)); } @@ -371,7 +371,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)net->mib.udp_statistics, + snmp_fold_field((void __percpu **)net->mib.udp_statistics, snmp4_udp_list[i].entry)); /* the UDP and UDP-Lite MIBs are the same */ @@ -382,7 +382,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)net->mib.udplite_statistics, + snmp_fold_field((void __percpu **)net->mib.udplite_statistics, snmp4_udp_list[i].entry)); seq_putc(seq, '\n'); @@ -419,7 +419,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nTcpExt:"); for (i = 0; snmp4_net_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)net->mib.net_statistics, + snmp_fold_field((void __percpu **)net->mib.net_statistics, snmp4_net_list[i].entry)); seq_puts(seq, "\nIpExt:"); @@ -429,7 +429,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nIpExt:"); for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void **)net->mib.ip_statistics, + snmp_fold_field((void __percpu **)net->mib.ip_statistics, snmp4_ipextstats_list[i].entry)); seq_putc(seq, '\n'); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b16dfadbe6d6..04762d3bef71 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3334,7 +3334,7 @@ static __net_initdata struct pernet_operations rt_secret_timer_ops = { #ifdef CONFIG_NET_CLS_ROUTE -struct ip_rt_acct *ip_rt_acct __read_mostly; +struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; #endif /* CONFIG_NET_CLS_ROUTE */ static __initdata unsigned long rhash_entries; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d5d69ea8f249..e471d037fcc9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2788,10 +2788,10 @@ EXPORT_SYMBOL(tcp_gro_complete); #ifdef CONFIG_TCP_MD5SIG static unsigned long tcp_md5sig_users; -static struct tcp_md5sig_pool **tcp_md5sig_pool; +static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool; static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) +static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool) { int cpu; for_each_possible_cpu(cpu) { @@ -2808,7 +2808,7 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool) void tcp_free_md5sig_pool(void) { - struct tcp_md5sig_pool **pool = NULL; + struct tcp_md5sig_pool * __percpu *pool = NULL; spin_lock_bh(&tcp_md5sig_pool_lock); if (--tcp_md5sig_users == 0) { @@ -2822,10 +2822,11 @@ void tcp_free_md5sig_pool(void) EXPORT_SYMBOL(tcp_free_md5sig_pool); -static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk) +static struct tcp_md5sig_pool * __percpu * +__tcp_alloc_md5sig_pool(struct sock *sk) { int cpu; - struct tcp_md5sig_pool **pool; + struct tcp_md5sig_pool * __percpu *pool; pool = alloc_percpu(struct tcp_md5sig_pool *); if (!pool) @@ -2852,9 +2853,9 @@ out_free: return NULL; } -struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk) +struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk) { - struct tcp_md5sig_pool **pool; + struct tcp_md5sig_pool * __percpu *pool; int alloc = 0; retry: @@ -2873,7 +2874,9 @@ retry: if (alloc) { /* we cannot hold spinlock here because this may sleep. */ - struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk); + struct tcp_md5sig_pool * __percpu *p; + + p = __tcp_alloc_md5sig_pool(sk); spin_lock_bh(&tcp_md5sig_pool_lock); if (!p) { tcp_md5sig_users--; @@ -2897,7 +2900,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) { - struct tcp_md5sig_pool **p; + struct tcp_md5sig_pool * __percpu *p; spin_lock_bh(&tcp_md5sig_pool_lock); p = tcp_md5sig_pool; if (p) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c79cbff54370..b0d4a4b23db5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -278,31 +278,31 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, static int snmp6_alloc_dev(struct inet6_dev *idev) { - if (snmp_mib_init((void **)idev->stats.ipv6, + if (snmp_mib_init((void __percpu **)idev->stats.ipv6, sizeof(struct ipstats_mib)) < 0) goto err_ip; - if (snmp_mib_init((void **)idev->stats.icmpv6, + if (snmp_mib_init((void __percpu **)idev->stats.icmpv6, sizeof(struct icmpv6_mib)) < 0) goto err_icmp; - if (snmp_mib_init((void **)idev->stats.icmpv6msg, + if (snmp_mib_init((void __percpu **)idev->stats.icmpv6msg, sizeof(struct icmpv6msg_mib)) < 0) goto err_icmpmsg; return 0; err_icmpmsg: - snmp_mib_free((void **)idev->stats.icmpv6); + snmp_mib_free((void __percpu **)idev->stats.icmpv6); err_icmp: - snmp_mib_free((void **)idev->stats.ipv6); + snmp_mib_free((void __percpu **)idev->stats.ipv6); err_ip: return -ENOMEM; } static void snmp6_free_dev(struct inet6_dev *idev) { - snmp_mib_free((void **)idev->stats.icmpv6msg); - snmp_mib_free((void **)idev->stats.icmpv6); - snmp_mib_free((void **)idev->stats.ipv6); + snmp_mib_free((void __percpu **)idev->stats.icmpv6msg); + snmp_mib_free((void __percpu **)idev->stats.icmpv6); + snmp_mib_free((void __percpu **)idev->stats.ipv6); } /* Nobody refers to this device, we may destroy it. */ @@ -3766,8 +3766,8 @@ static inline size_t inet6_if_nlmsg_size(void) ); } -static inline void __snmp6_fill_stats(u64 *stats, void **mib, int items, - int bytes) +static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib, + int items, int bytes) { int i; int pad = bytes - sizeof(u64) * items; @@ -3786,10 +3786,10 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, { switch(attrtype) { case IFLA_INET6_STATS: - __snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes); + __snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes); break; case IFLA_INET6_ICMP6STATS: - __snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes); + __snmp6_fill_stats(stats, (void __percpu **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes); break; } } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e29160ff4a0f..37d14e735c27 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -971,41 +971,41 @@ static void ipv6_packet_cleanup(void) static int __net_init ipv6_init_mibs(struct net *net) { - if (snmp_mib_init((void **)net->mib.udp_stats_in6, + if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, sizeof (struct udp_mib)) < 0) return -ENOMEM; - if (snmp_mib_init((void **)net->mib.udplite_stats_in6, + if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6, sizeof (struct udp_mib)) < 0) goto err_udplite_mib; - if (snmp_mib_init((void **)net->mib.ipv6_statistics, + if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics, sizeof(struct ipstats_mib)) < 0) goto err_ip_mib; - if (snmp_mib_init((void **)net->mib.icmpv6_statistics, + if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, sizeof(struct icmpv6_mib)) < 0) goto err_icmp_mib; - if (snmp_mib_init((void **)net->mib.icmpv6msg_statistics, + if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics, sizeof(struct icmpv6msg_mib)) < 0) goto err_icmpmsg_mib; return 0; err_icmpmsg_mib: - snmp_mib_free((void **)net->mib.icmpv6_statistics); + snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); err_icmp_mib: - snmp_mib_free((void **)net->mib.ipv6_statistics); + snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); err_ip_mib: - snmp_mib_free((void **)net->mib.udplite_stats_in6); + snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); err_udplite_mib: - snmp_mib_free((void **)net->mib.udp_stats_in6); + snmp_mib_free((void __percpu **)net->mib.udp_stats_in6); return -ENOMEM; } static void ipv6_cleanup_mibs(struct net *net) { - snmp_mib_free((void **)net->mib.udp_stats_in6); - snmp_mib_free((void **)net->mib.udplite_stats_in6); - snmp_mib_free((void **)net->mib.ipv6_statistics); - snmp_mib_free((void **)net->mib.icmpv6_statistics); - snmp_mib_free((void **)net->mib.icmpv6msg_statistics); + snmp_mib_free((void __percpu **)net->mib.udp_stats_in6); + snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); + snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); + snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); + snmp_mib_free((void __percpu **)net->mib.icmpv6msg_statistics); } static int __net_init inet6_net_init(struct net *net) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index bfe2598dd563..58344c0fbd13 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -136,7 +136,7 @@ static const struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_SENTINEL }; -static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) +static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **mib) { char name[32]; int i; @@ -170,7 +170,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) return; } -static void snmp6_seq_show_item(struct seq_file *seq, void **mib, +static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **mib, const struct snmp_mib *itemlist) { int i; @@ -183,14 +183,15 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) { struct net *net = (struct net *)seq->private; - snmp6_seq_show_item(seq, (void **)net->mib.ipv6_statistics, + snmp6_seq_show_item(seq, (void __percpu **)net->mib.ipv6_statistics, snmp6_ipstats_list); - snmp6_seq_show_item(seq, (void **)net->mib.icmpv6_statistics, + snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics, snmp6_icmp6_list); - snmp6_seq_show_icmpv6msg(seq, (void **)net->mib.icmpv6msg_statistics); - snmp6_seq_show_item(seq, (void **)net->mib.udp_stats_in6, + snmp6_seq_show_icmpv6msg(seq, + (void __percpu **)net->mib.icmpv6msg_statistics); + snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6, snmp6_udp6_list); - snmp6_seq_show_item(seq, (void **)net->mib.udplite_stats_in6, + snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6, snmp6_udplite6_list); return 0; } @@ -213,9 +214,11 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v) struct inet6_dev *idev = (struct inet6_dev *)seq->private; seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); - snmp6_seq_show_item(seq, (void **)idev->stats.ipv6, snmp6_ipstats_list); - snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list); - snmp6_seq_show_icmpv6msg(seq, (void **)idev->stats.icmpv6msg); + snmp6_seq_show_item(seq, (void __percpu **)idev->stats.ipv6, + snmp6_ipstats_list); + snmp6_seq_show_item(seq, (void __percpu **)idev->stats.icmpv6, + snmp6_icmp6_list); + snmp6_seq_show_icmpv6msg(seq, (void __percpu **)idev->stats.icmpv6msg); return 0; } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index a5ac6e0a8d9c..784bcc9a979d 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -83,7 +83,7 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field((void **)sctp_statistics, + snmp_fold_field((void __percpu **)sctp_statistics, sctp_snmp_list[i].entry)); return 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index a3c8988758b1..9687177b026b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -996,12 +996,13 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family) static inline int init_sctp_mibs(void) { - return snmp_mib_init((void**)sctp_statistics, sizeof(struct sctp_mib)); + return snmp_mib_init((void __percpu **)sctp_statistics, + sizeof(struct sctp_mib)); } static inline void cleanup_sctp_mibs(void) { - snmp_mib_free((void**)sctp_statistics); + snmp_mib_free((void __percpu **)sctp_statistics); } static void sctp_v4_pf_init(void) diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 42cd18391f46..0fc5ff66d1fa 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -30,12 +30,12 @@ struct ipcomp_tfms { struct list_head list; - struct crypto_comp **tfms; + struct crypto_comp * __percpu *tfms; int users; }; static DEFINE_MUTEX(ipcomp_resource_mutex); -static void **ipcomp_scratches; +static void * __percpu *ipcomp_scratches; static int ipcomp_scratch_users; static LIST_HEAD(ipcomp_tfms_list); @@ -200,7 +200,7 @@ EXPORT_SYMBOL_GPL(ipcomp_output); static void ipcomp_free_scratches(void) { int i; - void **scratches; + void * __percpu *scratches; if (--ipcomp_scratch_users) return; @@ -215,10 +215,10 @@ static void ipcomp_free_scratches(void) free_percpu(scratches); } -static void **ipcomp_alloc_scratches(void) +static void * __percpu *ipcomp_alloc_scratches(void) { int i; - void **scratches; + void * __percpu *scratches; if (ipcomp_scratch_users++) return ipcomp_scratches; @@ -239,7 +239,7 @@ static void **ipcomp_alloc_scratches(void) return scratches; } -static void ipcomp_free_tfms(struct crypto_comp **tfms) +static void ipcomp_free_tfms(struct crypto_comp * __percpu *tfms) { struct ipcomp_tfms *pos; int cpu; @@ -267,10 +267,10 @@ static void ipcomp_free_tfms(struct crypto_comp **tfms) free_percpu(tfms); } -static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) +static struct crypto_comp * __percpu *ipcomp_alloc_tfms(const char *alg_name) { struct ipcomp_tfms *pos; - struct crypto_comp **tfms; + struct crypto_comp * __percpu *tfms; int cpu; /* This can be any valid CPU ID so we don't need locking. */ diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index eb870fcc29cc..cfceb6616ec1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2428,19 +2428,19 @@ static int __net_init xfrm_statistics_init(struct net *net) { int rv; - if (snmp_mib_init((void **)net->mib.xfrm_statistics, + if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, sizeof(struct linux_xfrm_mib)) < 0) return -ENOMEM; rv = xfrm_proc_init(net); if (rv < 0) - snmp_mib_free((void **)net->mib.xfrm_statistics); + snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); return rv; } static void xfrm_statistics_fini(struct net *net) { xfrm_proc_fini(net); - snmp_mib_free((void **)net->mib.xfrm_statistics); + snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); } #else static int __net_init xfrm_statistics_init(struct net *net) diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index c083a4e4e796..003f2c437ac3 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -50,7 +50,8 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) int i; for (i=0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - snmp_fold_field((void **)net->mib.xfrm_statistics, + snmp_fold_field((void __percpu **) + net->mib.xfrm_statistics, xfrm_mib_list[i].entry)); return 0; } -- cgit v1.2.3 From 9874c41cd5e70bfc97bcd52a8b6c98c2a6ba7299 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 16 Feb 2010 18:40:04 +0000 Subject: ipv6.h: reassembly: replace calculated magic number with multiplication On Tue, 2010-02-16 at 16:47 +0100, Patrick McHardy wrote: > Joe Perches wrote: > >> @@ -246,6 +246,8 @@ extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb); > >> int ip6_frag_nqueues(struct net *net); > >> int ip6_frag_mem(struct net *net); > >> > >> +#define IPV6_FRAG_HIGH_THRESH 262144 /* == 256*1024 */ > >> +#define IPV6_FRAG_LOW_THRESH 196608 /* == 192*1024 */ > >> #define IPV6_FRAG_TIMEOUT (60*HZ) /* 60 seconds */ > > > > 196608 isn't a number I want to remember. > > Is this better as: > > > > #define IPV6_FRAG_HIGH_THRESH (256 * 1024) /* 262144 */ > > #define IPV6_FRAG_LOW_THRESH (192 * 1024) /* 196608 */ > > Please send a patch, I'll apply it once these patches are in Dave's > tree. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/ipv6.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 639ec53ea081..d067db1f88c7 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -246,9 +246,9 @@ extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb); int ip6_frag_nqueues(struct net *net); int ip6_frag_mem(struct net *net); -#define IPV6_FRAG_HIGH_THRESH 262144 /* == 256*1024 */ -#define IPV6_FRAG_LOW_THRESH 196608 /* == 192*1024 */ -#define IPV6_FRAG_TIMEOUT (60*HZ) /* 60 seconds */ +#define IPV6_FRAG_HIGH_THRESH (256 * 1024) /* 262144 */ +#define IPV6_FRAG_LOW_THRESH (192 * 1024) /* 196608 */ +#define IPV6_FRAG_TIMEOUT (60 * HZ) /* 60 seconds */ extern int __ipv6_addr_type(const struct in6_addr *addr); static inline int ipv6_addr_type(const struct in6_addr *addr) -- cgit v1.2.3 From 6457d26bd40077238799e31df2b800bcf4ef9177 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 17 Feb 2010 18:48:44 -0800 Subject: IPv6: convert mc_lock to spinlock Only used for writing, so convert to spinlock Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/if_inet6.h | 2 +- net/ipv6/mcast.c | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index e9d69d198495..545d8b059bef 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -157,7 +157,7 @@ struct inet6_dev { struct ifmcaddr6 *mc_list; struct ifmcaddr6 *mc_tomb; - rwlock_t mc_lock; + spinlock_t mc_lock; unsigned char mc_qrv; unsigned char mc_gq_running; unsigned char mc_ifc_count; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 25f6cca79e6b..bcd971915969 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -793,10 +793,10 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) } spin_unlock_bh(&im->mca_lock); - write_lock_bh(&idev->mc_lock); + spin_lock_bh(&idev->mc_lock); pmc->next = idev->mc_tomb; idev->mc_tomb = pmc; - write_unlock_bh(&idev->mc_lock); + spin_unlock_bh(&idev->mc_lock); } static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *pmca) @@ -804,7 +804,7 @@ static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *pmca) struct ifmcaddr6 *pmc, *pmc_prev; struct ip6_sf_list *psf, *psf_next; - write_lock_bh(&idev->mc_lock); + spin_lock_bh(&idev->mc_lock); pmc_prev = NULL; for (pmc=idev->mc_tomb; pmc; pmc=pmc->next) { if (ipv6_addr_equal(&pmc->mca_addr, pmca)) @@ -817,7 +817,8 @@ static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *pmca) else idev->mc_tomb = pmc->next; } - write_unlock_bh(&idev->mc_lock); + spin_unlock_bh(&idev->mc_lock); + if (pmc) { for (psf=pmc->mca_tomb; psf; psf=psf_next) { psf_next = psf->sf_next; @@ -832,10 +833,10 @@ static void mld_clear_delrec(struct inet6_dev *idev) { struct ifmcaddr6 *pmc, *nextpmc; - write_lock_bh(&idev->mc_lock); + spin_lock_bh(&idev->mc_lock); pmc = idev->mc_tomb; idev->mc_tomb = NULL; - write_unlock_bh(&idev->mc_lock); + spin_unlock_bh(&idev->mc_lock); for (; pmc; pmc = nextpmc) { nextpmc = pmc->next; @@ -1696,7 +1697,7 @@ static void mld_send_cr(struct inet6_dev *idev) int type, dtype; read_lock_bh(&idev->lock); - write_lock_bh(&idev->mc_lock); + spin_lock(&idev->mc_lock); /* deleted MCA's */ pmc_prev = NULL; @@ -1730,7 +1731,7 @@ static void mld_send_cr(struct inet6_dev *idev) } else pmc_prev = pmc; } - write_unlock_bh(&idev->mc_lock); + spin_unlock(&idev->mc_lock); /* change recs */ for (pmc=idev->mc_list; pmc; pmc=pmc->next) { @@ -2311,7 +2312,7 @@ void ipv6_mc_up(struct inet6_dev *idev) void ipv6_mc_init_dev(struct inet6_dev *idev) { write_lock_bh(&idev->lock); - rwlock_init(&idev->mc_lock); + spin_lock_init(&idev->mc_lock); idev->mc_gq_running = 0; setup_timer(&idev->mc_gq_timer, mld_gq_timer_expire, (unsigned long)idev); -- cgit v1.2.3 From 2906f66a5682e5670a5eefe991843689b8d8563f Mon Sep 17 00:00:00 2001 From: Venkata Mohan Reddy Date: Thu, 18 Feb 2010 12:31:05 +0100 Subject: ipvs: SCTP Trasport Loadbalancing Support Enhance IPVS to load balance SCTP transport protocol packets. This is done based on the SCTP rfc 4960. All possible control chunks have been taken care. The state machine used in this code looks some what lengthy. I tried to make the state machine easy to understand. Signed-off-by: Venkata Mohan Reddy Koppula Signed-off-by: Simon Horman Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 22 +- net/netfilter/ipvs/Kconfig | 7 + net/netfilter/ipvs/Makefile | 1 + net/netfilter/ipvs/ip_vs_core.c | 62 +- net/netfilter/ipvs/ip_vs_ctl.c | 5 +- net/netfilter/ipvs/ip_vs_proto.c | 3 + net/netfilter/ipvs/ip_vs_proto_sctp.c | 1183 +++++++++++++++++++++++++++++++++ net/netfilter/ipvs/ip_vs_sync.c | 14 + 8 files changed, 1285 insertions(+), 12 deletions(-) create mode 100644 net/netfilter/ipvs/ip_vs_proto_sctp.c (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a816c37417bb..fe82b1e10a29 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -224,6 +224,26 @@ enum { IP_VS_ICMP_S_LAST, }; +/* + * SCTP State Values + */ +enum ip_vs_sctp_states { + IP_VS_SCTP_S_NONE, + IP_VS_SCTP_S_INIT_CLI, + IP_VS_SCTP_S_INIT_SER, + IP_VS_SCTP_S_INIT_ACK_CLI, + IP_VS_SCTP_S_INIT_ACK_SER, + IP_VS_SCTP_S_ECHO_CLI, + IP_VS_SCTP_S_ECHO_SER, + IP_VS_SCTP_S_ESTABLISHED, + IP_VS_SCTP_S_SHUT_CLI, + IP_VS_SCTP_S_SHUT_SER, + IP_VS_SCTP_S_SHUT_ACK_CLI, + IP_VS_SCTP_S_SHUT_ACK_SER, + IP_VS_SCTP_S_CLOSED, + IP_VS_SCTP_S_LAST +}; + /* * Delta sequence info structure * Each ip_vs_conn has 2 (output AND input seq. changes). @@ -741,7 +761,7 @@ extern struct ip_vs_protocol ip_vs_protocol_udp; extern struct ip_vs_protocol ip_vs_protocol_icmp; extern struct ip_vs_protocol ip_vs_protocol_esp; extern struct ip_vs_protocol ip_vs_protocol_ah; - +extern struct ip_vs_protocol ip_vs_protocol_sctp; /* * Registering/unregistering scheduler functions diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 817a8898203b..712ccad13344 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -104,6 +104,13 @@ config IP_VS_PROTO_AH This option enables support for load balancing AH (Authentication Header) transport protocol. Say Y if unsure. +config IP_VS_PROTO_SCTP + bool "SCTP load balancing support" + select LIBCRC32C + ---help--- + This option enables support for load balancing SCTP transport + protocol. Say Y if unsure. + comment "IPVS scheduler" config IP_VS_RR diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile index 73a46fe1fe4c..e3baefd7066e 100644 --- a/net/netfilter/ipvs/Makefile +++ b/net/netfilter/ipvs/Makefile @@ -7,6 +7,7 @@ ip_vs_proto-objs-y := ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o +ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 847ffca40184..72e96d823ebf 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -81,6 +82,8 @@ const char *ip_vs_proto_name(unsigned proto) return "UDP"; case IPPROTO_TCP: return "TCP"; + case IPPROTO_SCTP: + return "SCTP"; case IPPROTO_ICMP: return "ICMP"; #ifdef CONFIG_IP_VS_IPV6 @@ -589,8 +592,9 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, ip_send_check(ciph); } - /* the TCP/UDP port */ - if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) { + /* the TCP/UDP/SCTP port */ + if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol || + IPPROTO_SCTP == ciph->protocol) { __be16 *ports = (void *)ciph + ciph->ihl*4; if (inout) @@ -630,8 +634,9 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, ciph->saddr = cp->daddr.in6; } - /* the TCP/UDP port */ - if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) { + /* the TCP/UDP/SCTP port */ + if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr || + IPPROTO_SCTP == ciph->nexthdr) { __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr); if (inout) @@ -679,7 +684,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, goto out; } - if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol) + if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol || + IPPROTO_SCTP == protocol) offset += 2 * sizeof(__u16); if (!skb_make_writable(skb, offset)) goto out; @@ -857,6 +863,21 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) } #endif +/* + * Check if sctp chunc is ABORT chunk + */ +static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len) +{ + sctp_chunkhdr_t *sch, schunk; + sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t), + sizeof(schunk), &schunk); + if (sch == NULL) + return 0; + if (sch->type == SCTP_CID_ABORT) + return 1; + return 0; +} + static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) { struct tcphdr _tcph, *th; @@ -999,7 +1020,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, if (unlikely(!cp)) { if (sysctl_ip_vs_nat_icmp_send && (pp->protocol == IPPROTO_TCP || - pp->protocol == IPPROTO_UDP)) { + pp->protocol == IPPROTO_UDP || + pp->protocol == IPPROTO_SCTP)) { __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, iph.len, @@ -1014,8 +1036,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, * existing entry if it is not RST * packet or not TCP packet. */ - if (iph.protocol != IPPROTO_TCP - || !is_tcp_reset(skb, iph.len)) { + if ((iph.protocol != IPPROTO_TCP && + iph.protocol != IPPROTO_SCTP) + || ((iph.protocol == IPPROTO_TCP + && !is_tcp_reset(skb, iph.len)) + || (iph.protocol == IPPROTO_SCTP + && !is_sctp_abort(skb, + iph.len)))) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) icmpv6_send(skb, @@ -1235,7 +1262,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); - if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr) + if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr || + IPPROTO_SCTP == cih->nexthdr) offset += 2 * sizeof(__u16); verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset); /* do not touch skb anymore */ @@ -1358,6 +1386,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, * encorage the standby servers to update the connections timeout */ pkts = atomic_add_return(1, &cp->in_pkts); + if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && + cp->protocol == IPPROTO_SCTP) { + if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && + (atomic_read(&cp->in_pkts) % + sysctl_ip_vs_sync_threshold[1] + == sysctl_ip_vs_sync_threshold[0])) || + (cp->old_state != cp->state && + ((cp->state == IP_VS_SCTP_S_CLOSED) || + (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || + (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { + ip_vs_sync_conn(cp); + goto out; + } + } + if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || @@ -1370,6 +1413,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || (cp->state == IP_VS_TCP_S_TIME_WAIT))))) ip_vs_sync_conn(cp); +out: cp->old_state = cp->state; ip_vs_conn_put(cp); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 00d0b152db39..7ee9c3426f44 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2132,8 +2132,9 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) } } - /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */ - if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) { + /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */ + if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP && + usvc.protocol != IPPROTO_SCTP) { pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n", usvc.protocol, &usvc.addr.ip, ntohs(usvc.port), usvc.sched_name); diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 3e7671674549..0e584553819d 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -257,6 +257,9 @@ int __init ip_vs_protocol_init(void) #ifdef CONFIG_IP_VS_PROTO_UDP REGISTER_PROTOCOL(&ip_vs_protocol_udp); #endif +#ifdef CONFIG_IP_VS_PROTO_SCTP + REGISTER_PROTOCOL(&ip_vs_protocol_sctp); +#endif #ifdef CONFIG_IP_VS_PROTO_AH REGISTER_PROTOCOL(&ip_vs_protocol_ah); #endif diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c new file mode 100644 index 000000000000..c9a3f7a21d53 --- /dev/null +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -0,0 +1,1183 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +static struct ip_vs_conn * +sctp_conn_in_get(int af, + const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, + int inverse) +{ + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) + return NULL; + + if (likely(!inverse)) + return ip_vs_conn_in_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); + else + return ip_vs_conn_in_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); +} + +static struct ip_vs_conn * +sctp_conn_out_get(int af, + const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, + int inverse) +{ + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) + return NULL; + + if (likely(!inverse)) + return ip_vs_conn_out_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); + else + return ip_vs_conn_out_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); +} + +static int +sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, + int *verdict, struct ip_vs_conn **cpp) +{ + struct ip_vs_service *svc; + sctp_chunkhdr_t _schunkh, *sch; + sctp_sctphdr_t *sh, _sctph; + struct ip_vs_iphdr iph; + + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + + sh = skb_header_pointer(skb, iph.len, sizeof(_sctph), &_sctph); + if (sh == NULL) + return 0; + + sch = skb_header_pointer(skb, iph.len + sizeof(sctp_sctphdr_t), + sizeof(_schunkh), &_schunkh); + if (sch == NULL) + return 0; + + if ((sch->type == SCTP_CID_INIT) && + (svc = ip_vs_service_get(af, skb->mark, iph.protocol, + &iph.daddr, sh->dest))) { + if (ip_vs_todrop()) { + /* + * It seems that we are very loaded. + * We have to drop this packet :( + */ + ip_vs_service_put(svc); + *verdict = NF_DROP; + return 0; + } + /* + * Let the virtual server select a real server for the + * incoming connection, and create a connection entry. + */ + *cpp = ip_vs_schedule(svc, skb); + if (!*cpp) { + *verdict = ip_vs_leave(svc, skb, pp); + return 0; + } + ip_vs_service_put(svc); + } + + return 1; +} + +static int +sctp_snat_handler(struct sk_buff *skb, + struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +{ + sctp_sctphdr_t *sctph; + unsigned int sctphoff; + __be32 crc32; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + sctphoff = sizeof(struct ipv6hdr); + else +#endif + sctphoff = ip_hdrlen(skb); + + /* csum_check requires unshared skb */ + if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) + return 0; + + if (unlikely(cp->app != NULL)) { + /* Some checks before mangling */ + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + return 0; + + /* Call application helper if needed */ + if (!ip_vs_app_pkt_out(cp, skb)) + return 0; + } + + sctph = (void *) skb_network_header(skb) + sctphoff; + sctph->source = cp->vport; + + /* Calculate the checksum */ + crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff); + for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next) + crc32 = sctp_update_cksum((u8 *) skb->data, skb_headlen(skb), + crc32); + crc32 = sctp_end_cksum(crc32); + sctph->checksum = crc32; + + return 1; +} + +static int +sctp_dnat_handler(struct sk_buff *skb, + struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +{ + + sctp_sctphdr_t *sctph; + unsigned int sctphoff; + __be32 crc32; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + sctphoff = sizeof(struct ipv6hdr); + else +#endif + sctphoff = ip_hdrlen(skb); + + /* csum_check requires unshared skb */ + if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) + return 0; + + if (unlikely(cp->app != NULL)) { + /* Some checks before mangling */ + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + return 0; + + /* Call application helper if needed */ + if (!ip_vs_app_pkt_out(cp, skb)) + return 0; + } + + sctph = (void *) skb_network_header(skb) + sctphoff; + sctph->dest = cp->dport; + + /* Calculate the checksum */ + crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff); + for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next) + crc32 = sctp_update_cksum((u8 *) skb->data, skb_headlen(skb), + crc32); + crc32 = sctp_end_cksum(crc32); + sctph->checksum = crc32; + + return 1; +} + +static int +sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) +{ + struct sk_buff *list = skb_shinfo(skb)->frag_list; + unsigned int sctphoff; + struct sctphdr *sh, _sctph; + __le32 cmp; + __le32 val; + __u32 tmp; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + sctphoff = sizeof(struct ipv6hdr); + else +#endif + sctphoff = ip_hdrlen(skb); + + sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph); + if (sh == NULL) + return 0; + + cmp = sh->checksum; + + tmp = sctp_start_cksum((__u8 *) sh, skb_headlen(skb)); + for (; list; list = list->next) + tmp = sctp_update_cksum((__u8 *) list->data, + skb_headlen(list), tmp); + + val = sctp_end_cksum(tmp); + + if (val != cmp) { + /* CRC failure, dump it. */ + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "Failed checksum for"); + return 0; + } + return 1; +} + +struct ipvs_sctp_nextstate { + int next_state; +}; +enum ipvs_sctp_event_t { + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_DATA_SER, + IP_VS_SCTP_EVE_INIT_CLI, + IP_VS_SCTP_EVE_INIT_SER, + IP_VS_SCTP_EVE_INIT_ACK_CLI, + IP_VS_SCTP_EVE_INIT_ACK_SER, + IP_VS_SCTP_EVE_COOKIE_ECHO_CLI, + IP_VS_SCTP_EVE_COOKIE_ECHO_SER, + IP_VS_SCTP_EVE_COOKIE_ACK_CLI, + IP_VS_SCTP_EVE_COOKIE_ACK_SER, + IP_VS_SCTP_EVE_ABORT_CLI, + IP_VS_SCTP_EVE__ABORT_SER, + IP_VS_SCTP_EVE_SHUT_CLI, + IP_VS_SCTP_EVE_SHUT_SER, + IP_VS_SCTP_EVE_SHUT_ACK_CLI, + IP_VS_SCTP_EVE_SHUT_ACK_SER, + IP_VS_SCTP_EVE_SHUT_COM_CLI, + IP_VS_SCTP_EVE_SHUT_COM_SER, + IP_VS_SCTP_EVE_LAST +}; + +static enum ipvs_sctp_event_t sctp_events[255] = { + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_INIT_CLI, + IP_VS_SCTP_EVE_INIT_ACK_CLI, + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_ABORT_CLI, + IP_VS_SCTP_EVE_SHUT_CLI, + IP_VS_SCTP_EVE_SHUT_ACK_CLI, + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_COOKIE_ECHO_CLI, + IP_VS_SCTP_EVE_COOKIE_ACK_CLI, + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_SHUT_COM_CLI, +}; + +static struct ipvs_sctp_nextstate + sctp_states_table[IP_VS_SCTP_S_LAST][IP_VS_SCTP_EVE_LAST] = { + /* + * STATE : IP_VS_SCTP_S_NONE + */ + /*next state *//*event */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }, + }, + /* + * STATE : IP_VS_SCTP_S_INIT_CLI + * Cient sent INIT and is waiting for reply from server(In ECHO_WAIT) + */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ECHO_CLI */ }, + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_ECHO_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_INIT_SER + * Server sent INIT and waiting for INIT ACK from the client + */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_INIT_ACK_CLI + * Client sent INIT ACK and waiting for ECHO from the server + */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK has been resent by the client, let us stay is in + * the same state + */ + {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + /* + * INIT_ACK sent by the server, close the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * ECHO by client, it should not happen, close the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + /* + * ECHO by server, this is what we are expecting, move to ECHO_SER + */ + {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, it should not happen, close the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + /* + * Unexpected COOKIE ACK from server, staty in the same state + */ + {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_INIT_ACK_SER + * Server sent INIT ACK and waiting for ECHO from the client + */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * Unexpected INIT_ACK by the client, let us close the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + /* + * INIT_ACK resent by the server, let us move to same state + */ + {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client send the ECHO, this is what we are expecting, + * move to ECHO_CLI + */ + {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + /* + * ECHO received from the server, Not sure what to do, + * let us close it + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, let us stay in the same state + */ + {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + /* + * COOKIE ACK from server, hmm... this should not happen, lets close + * the connection. + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_ECHO_CLI + * Cient sent ECHO and waiting COOKEI ACK from the Server + */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK has been by the client, let us close the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + /* + * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, + * “If an INIT ACK is received by an endpoint in any state other + * than the COOKIE-WAIT state, the endpoint should discard the + * INIT ACK chunk”. Stay in the same state + */ + {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client resent the ECHO, let us stay in the same state + */ + {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + /* + * ECHO received from the server, Not sure what to do, + * let us close it + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, this shoud not happen, let's close the + * connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + /* + * COOKIE ACK from server, this is what we are awaiting,lets move to + * ESTABLISHED. + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_ECHO_SER + * Server sent ECHO and waiting COOKEI ACK from the client + */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, + * “If an INIT ACK is received by an endpoint in any state other + * than the COOKIE-WAIT state, the endpoint should discard the + * INIT ACK chunk”. Stay in the same state + */ + {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + /* + * INIT_ACK has been by the server, let us close the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client sent the ECHO, not sure what to do, let's close the + * connection. + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + /* + * ECHO resent by the server, stay in the same state + */ + {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, this is what we are expecting, let's move + * to ESTABLISHED. + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + /* + * COOKIE ACK from server, this should not happen, lets close the + * connection. + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_ESTABLISHED + * Association established + */ + {{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, + * “If an INIT ACK is received by an endpoint in any state other + * than the COOKIE-WAIT state, the endpoint should discard the + * INIT ACK chunk”. Stay in the same state + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the + * peer and peer shall move to the ESTABISHED. if it doesn't handle + * it will send ERROR chunk. So, stay in the same state + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, not sure what to do stay in the same state + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + /* + * SHUTDOWN from the client, move to SHUDDOWN_CLI + */ + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + /* + * SHUTDOWN from the server, move to SHUTDOWN_SER + */ + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, + /* + * client sent SHUDTDOWN_ACK, this should not happen, let's close + * the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_SHUT_CLI + * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server + */ + /* + * We recieved the data chuck, keep the state unchanged. I assume + * that still data chuncks can be received by both the peers in + * SHUDOWN state + */ + + {{IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, + * “If an INIT ACK is received by an endpoint in any state other + * than the COOKIE-WAIT state, the endpoint should discard the + * INIT ACK chunk”. Stay in the same state + */ + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the + * peer and peer shall move to the ESTABISHED. if it doesn't handle + * it will send ERROR chunk. So, stay in the same state + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, not sure what to do stay in the same state + */ + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + /* + * SHUTDOWN resent from the client, move to SHUDDOWN_CLI + */ + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + /* + * SHUTDOWN from the server, move to SHUTDOWN_SER + */ + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, + /* + * client sent SHUDTDOWN_ACK, this should not happen, let's close + * the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + /* + * Server sent SHUTDOWN ACK, this is what we are expecting, let's move + * to SHUDOWN_ACK_SER + */ + {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + /* + * SHUTDOWN COM from client, this should not happen, let's close the + * connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_SHUT_SER + * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client + */ + /* + * We recieved the data chuck, keep the state unchanged. I assume + * that still data chuncks can be received by both the peers in + * SHUDOWN state + */ + + {{IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, + * “If an INIT ACK is received by an endpoint in any state other + * than the COOKIE-WAIT state, the endpoint should discard the + * INIT ACK chunk”. Stay in the same state + */ + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the + * peer and peer shall move to the ESTABISHED. if it doesn't handle + * it will send ERROR chunk. So, stay in the same state + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, not sure what to do stay in the same state + */ + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + /* + * SHUTDOWN resent from the client, move to SHUDDOWN_CLI + */ + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + /* + * SHUTDOWN resent from the server, move to SHUTDOWN_SER + */ + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, + /* + * client sent SHUDTDOWN_ACK, this is what we are expecting, let's + * move to SHUT_ACK_CLI + */ + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + /* + * Server sent SHUTDOWN ACK, this should not happen, let's close the + * connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + /* + * SHUTDOWN COM from client, this should not happen, let's close the + * connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + + /* + * State : IP_VS_SCTP_S_SHUT_ACK_CLI + * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server + */ + /* + * We recieved the data chuck, keep the state unchanged. I assume + * that still data chuncks can be received by both the peers in + * SHUDOWN state + */ + + {{IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, + * “If an INIT ACK is received by an endpoint in any state other + * than the COOKIE-WAIT state, the endpoint should discard the + * INIT ACK chunk”. Stay in the same state + */ + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the + * peer and peer shall move to the ESTABISHED. if it doesn't handle + * it will send ERROR chunk. So, stay in the same state + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, not sure what to do stay in the same state + */ + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + /* + * SHUTDOWN sent from the client, move to SHUDDOWN_CLI + */ + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + /* + * SHUTDOWN sent from the server, move to SHUTDOWN_SER + */ + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, + /* + * client resent SHUDTDOWN_ACK, let's stay in the same state + */ + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + /* + * Server sent SHUTDOWN ACK, this should not happen, let's close the + * connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + /* + * SHUTDOWN COM from client, this should not happen, let's close the + * connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + /* + * SHUTDOWN COMPLETE from server this is what we are expecting. + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + + /* + * State : IP_VS_SCTP_S_SHUT_ACK_SER + * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client + */ + /* + * We recieved the data chuck, keep the state unchanged. I assume + * that still data chuncks can be received by both the peers in + * SHUDOWN state + */ + + {{IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, + * “If an INIT ACK is received by an endpoint in any state other + * than the COOKIE-WAIT state, the endpoint should discard the + * INIT ACK chunk”. Stay in the same state + */ + {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the + * peer and peer shall move to the ESTABISHED. if it doesn't handle + * it will send ERROR chunk. So, stay in the same state + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, not sure what to do stay in the same state + */ + {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + /* + * SHUTDOWN sent from the client, move to SHUDDOWN_CLI + */ + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + /* + * SHUTDOWN sent from the server, move to SHUTDOWN_SER + */ + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, + /* + * client sent SHUDTDOWN_ACK, this should not happen let's close + * the connection. + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + /* + * Server resent SHUTDOWN ACK, stay in the same state + */ + {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + /* + * SHUTDOWN COM from client, this what we are expecting, let's close + * the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + /* + * SHUTDOWN COMPLETE from server this should not happen. + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_CLOSED + */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + } +}; + +/* + * Timeout table[state] + */ +static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { + [IP_VS_SCTP_S_NONE] = 2 * HZ, + [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, + [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, + [IP_VS_SCTP_S_INIT_ACK_CLI] = 1 * 60 * HZ, + [IP_VS_SCTP_S_INIT_ACK_SER] = 1 * 60 * HZ, + [IP_VS_SCTP_S_ECHO_CLI] = 1 * 60 * HZ, + [IP_VS_SCTP_S_ECHO_SER] = 1 * 60 * HZ, + [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, + [IP_VS_SCTP_S_SHUT_CLI] = 1 * 60 * HZ, + [IP_VS_SCTP_S_SHUT_SER] = 1 * 60 * HZ, + [IP_VS_SCTP_S_SHUT_ACK_CLI] = 1 * 60 * HZ, + [IP_VS_SCTP_S_SHUT_ACK_SER] = 1 * 60 * HZ, + [IP_VS_SCTP_S_CLOSED] = 10 * HZ, + [IP_VS_SCTP_S_LAST] = 2 * HZ, +}; + +static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = { + [IP_VS_SCTP_S_NONE] = "NONE", + [IP_VS_SCTP_S_INIT_CLI] = "INIT_CLI", + [IP_VS_SCTP_S_INIT_SER] = "INIT_SER", + [IP_VS_SCTP_S_INIT_ACK_CLI] = "INIT_ACK_CLI", + [IP_VS_SCTP_S_INIT_ACK_SER] = "INIT_ACK_SER", + [IP_VS_SCTP_S_ECHO_CLI] = "COOKIE_ECHO_CLI", + [IP_VS_SCTP_S_ECHO_SER] = "COOKIE_ECHO_SER", + [IP_VS_SCTP_S_ESTABLISHED] = "ESTABISHED", + [IP_VS_SCTP_S_SHUT_CLI] = "SHUTDOWN_CLI", + [IP_VS_SCTP_S_SHUT_SER] = "SHUTDOWN_SER", + [IP_VS_SCTP_S_SHUT_ACK_CLI] = "SHUTDOWN_ACK_CLI", + [IP_VS_SCTP_S_SHUT_ACK_SER] = "SHUTDOWN_ACK_SER", + [IP_VS_SCTP_S_CLOSED] = "CLOSED", + [IP_VS_SCTP_S_LAST] = "BUG!" +}; + + +static const char *sctp_state_name(int state) +{ + if (state >= IP_VS_SCTP_S_LAST) + return "ERR!"; + if (sctp_state_name_table[state]) + return sctp_state_name_table[state]; + return "?"; +} + +static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags) +{ +} + +static int +sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) +{ + +return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST, + sctp_state_name_table, sname, to); +} + +static inline int +set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, + int direction, const struct sk_buff *skb) +{ + sctp_chunkhdr_t _sctpch, *sch; + unsigned char chunk_type; + int event, next_state; + int ihl; + +#ifdef CONFIG_IP_VS_IPV6 + ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); +#else + ihl = ip_hdrlen(skb); +#endif + + sch = skb_header_pointer(skb, ihl + sizeof(sctp_sctphdr_t), + sizeof(_sctpch), &_sctpch); + if (sch == NULL) + return 0; + + chunk_type = sch->type; + /* + * Section 3: Multiple chunks can be bundled into one SCTP packet + * up to the MTU size, except for the INIT, INIT ACK, and + * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with + * any other chunk in a packet. + * + * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control + * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be + * bundled with an ABORT, but they MUST be placed before the ABORT + * in the SCTP packet or they will be ignored by the receiver. + */ + if ((sch->type == SCTP_CID_COOKIE_ECHO) || + (sch->type == SCTP_CID_COOKIE_ACK)) { + sch = skb_header_pointer(skb, (ihl + sizeof(sctp_sctphdr_t) + + sch->length), sizeof(_sctpch), &_sctpch); + if (sch) { + if (sch->type == SCTP_CID_ABORT) + chunk_type = sch->type; + } + } + + event = sctp_events[chunk_type]; + + /* + * If the direction is IP_VS_DIR_OUTPUT, this event is from server + */ + if (direction == IP_VS_DIR_OUTPUT) + event++; + /* + * get next state + */ + next_state = sctp_states_table[cp->state][event].next_state; + + if (next_state != cp->state) { + struct ip_vs_dest *dest = cp->dest; + + IP_VS_DBG_BUF(8, "%s %s %s:%d->" + "%s:%d state: %s->%s conn->refcnt:%d\n", + pp->name, + ((direction == IP_VS_DIR_OUTPUT) ? + "output " : "input "), + IP_VS_DBG_ADDR(cp->af, &cp->daddr), + ntohs(cp->dport), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + sctp_state_name(cp->state), + sctp_state_name(next_state), + atomic_read(&cp->refcnt)); + if (dest) { + if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && + (next_state != IP_VS_SCTP_S_ESTABLISHED)) { + atomic_dec(&dest->activeconns); + atomic_inc(&dest->inactconns); + cp->flags |= IP_VS_CONN_F_INACTIVE; + } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && + (next_state == IP_VS_SCTP_S_ESTABLISHED)) { + atomic_inc(&dest->activeconns); + atomic_dec(&dest->inactconns); + cp->flags &= ~IP_VS_CONN_F_INACTIVE; + } + } + } + + cp->timeout = pp->timeout_table[cp->state = next_state]; + + return 1; +} + +static int +sctp_state_transition(struct ip_vs_conn *cp, int direction, + const struct sk_buff *skb, struct ip_vs_protocol *pp) +{ + int ret = 0; + + spin_lock(&cp->lock); + ret = set_sctp_state(pp, cp, direction, skb); + spin_unlock(&cp->lock); + + return ret; +} + +/* + * Hash table for SCTP application incarnations + */ +#define SCTP_APP_TAB_BITS 4 +#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS) +#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) + +static struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; +static DEFINE_SPINLOCK(sctp_app_lock); + +static inline __u16 sctp_app_hashkey(__be16 port) +{ + return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) + & SCTP_APP_TAB_MASK; +} + +static int sctp_register_app(struct ip_vs_app *inc) +{ + struct ip_vs_app *i; + __u16 hash; + __be16 port = inc->port; + int ret = 0; + + hash = sctp_app_hashkey(port); + + spin_lock_bh(&sctp_app_lock); + list_for_each_entry(i, &sctp_apps[hash], p_list) { + if (i->port == port) { + ret = -EEXIST; + goto out; + } + } + list_add(&inc->p_list, &sctp_apps[hash]); + atomic_inc(&ip_vs_protocol_sctp.appcnt); +out: + spin_unlock_bh(&sctp_app_lock); + + return ret; +} + +static void sctp_unregister_app(struct ip_vs_app *inc) +{ + spin_lock_bh(&sctp_app_lock); + atomic_dec(&ip_vs_protocol_sctp.appcnt); + list_del(&inc->p_list); + spin_unlock_bh(&sctp_app_lock); +} + +static int sctp_app_conn_bind(struct ip_vs_conn *cp) +{ + int hash; + struct ip_vs_app *inc; + int result = 0; + + /* Default binding: bind app only for NAT */ + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + return 0; + /* Lookup application incarnations and bind the right one */ + hash = sctp_app_hashkey(cp->vport); + + spin_lock(&sctp_app_lock); + list_for_each_entry(inc, &sctp_apps[hash], p_list) { + if (inc->port == cp->vport) { + if (unlikely(!ip_vs_app_inc_get(inc))) + break; + spin_unlock(&sctp_app_lock); + + IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" + "%s:%u to app %s on port %u\n", + __func__, + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport), + inc->name, ntohs(inc->port)); + cp->app = inc; + if (inc->init_conn) + result = inc->init_conn(inc, cp); + goto out; + } + } + spin_unlock(&sctp_app_lock); +out: + return result; +} + +static void ip_vs_sctp_init(struct ip_vs_protocol *pp) +{ + IP_VS_INIT_HASH_TABLE(sctp_apps); + pp->timeout_table = sctp_timeouts; +} + + +static void ip_vs_sctp_exit(struct ip_vs_protocol *pp) +{ + +} + +struct ip_vs_protocol ip_vs_protocol_sctp = { + .name = "SCTP", + .protocol = IPPROTO_SCTP, + .num_states = IP_VS_SCTP_S_LAST, + .dont_defrag = 0, + .appcnt = ATOMIC_INIT(0), + .init = ip_vs_sctp_init, + .exit = ip_vs_sctp_exit, + .register_app = sctp_register_app, + .unregister_app = sctp_unregister_app, + .conn_schedule = sctp_conn_schedule, + .conn_in_get = sctp_conn_in_get, + .conn_out_get = sctp_conn_out_get, + .snat_handler = sctp_snat_handler, + .dnat_handler = sctp_dnat_handler, + .csum_check = sctp_csum_check, + .state_name = sctp_state_name, + .state_transition = sctp_state_transition, + .app_conn_bind = sctp_app_conn_bind, + .debug_packet = ip_vs_tcpudp_debug_packet, + .timeout_change = sctp_timeout_change, + .set_state_timeout = sctp_set_state_timeout, +}; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index e177f0dc2084..8fb0ae616761 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -400,6 +400,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) flags |= IP_VS_CONN_F_INACTIVE; else flags &= ~IP_VS_CONN_F_INACTIVE; + } else if (s->protocol == IPPROTO_SCTP) { + if (state != IP_VS_SCTP_S_ESTABLISHED) + flags |= IP_VS_CONN_F_INACTIVE; + else + flags &= ~IP_VS_CONN_F_INACTIVE; } cp = ip_vs_conn_new(AF_INET, s->protocol, (union nf_inet_addr *)&s->caddr, @@ -434,6 +439,15 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) atomic_dec(&dest->inactconns); cp->flags &= ~IP_VS_CONN_F_INACTIVE; } + } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && + (cp->state != state)) { + dest = cp->dest; + if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && + (state != IP_VS_SCTP_S_ESTABLISHED)) { + atomic_dec(&dest->activeconns); + atomic_inc(&dest->inactconns); + cp->flags &= ~IP_VS_CONN_F_INACTIVE; + } } if (opt) -- cgit v1.2.3 From 37ee3d5b3e979a168536e7e2f15bd1e769cb4122 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 18 Feb 2010 19:04:44 +0100 Subject: netfilter: nf_defrag_ipv4: fix compilation error with NF_CONNTRACK=n As reported by Randy Dunlap , compilation of nf_defrag_ipv4 fails with: include/net/netfilter/nf_conntrack.h:94: error: field 'ct_general' has incomplete type include/net/netfilter/nf_conntrack.h:178: error: 'const struct sk_buff' has no member named 'nfct' include/net/netfilter/nf_conntrack.h:185: error: implicit declaration of function 'nf_conntrack_put' include/net/netfilter/nf_conntrack.h:294: error: 'const struct sk_buff' has no member named 'nfct' net/ipv4/netfilter/nf_defrag_ipv4.c:45: error: 'struct sk_buff' has no member named 'nfct' net/ipv4/netfilter/nf_defrag_ipv4.c:46: error: 'struct sk_buff' has no member named 'nfct' net/nf_conntrack.h must not be included with NF_CONNTRACK=n, add a few #ifdefs. Long term the header file should be fixed to be usable even with NF_CONNTRACK=n. Tested-by: Randy Dunlap Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_zones.h | 6 ++++-- net/ipv4/netfilter/nf_defrag_ipv4.c | 6 +++++- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h index 0bbb2bd51e89..034efe8d45a5 100644 --- a/include/net/netfilter/nf_conntrack_zones.h +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -1,10 +1,11 @@ #ifndef _NF_CONNTRACK_ZONES_H #define _NF_CONNTRACK_ZONES_H -#include - #define NF_CT_DEFAULT_ZONE 0 +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#include + struct nf_conntrack_zone { u16 id; }; @@ -20,4 +21,5 @@ static inline u16 nf_ct_zone(const struct nf_conn *ct) return NF_CT_DEFAULT_ZONE; } +#endif /* CONFIG_NF_CONNTRACK || CONFIG_NF_CONNTRACK_MODULE */ #endif /* _NF_CONNTRACK_ZONES_H */ diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index d498a704d456..cb763ae9ed90 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -16,9 +16,11 @@ #include #include -#include #include +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include +#endif +#include /* Returns new sk_buff, or NULL */ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) @@ -42,8 +44,10 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, { u16 zone = NF_CT_DEFAULT_ZONE; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (skb->nfct) zone = nf_ct_zone((struct nf_conn *)skb->nfct); +#endif #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge && -- cgit v1.2.3 From bbef49daca35d4fd21bf606a10b6980f17d9df5d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 18 Feb 2010 08:13:30 +0000 Subject: ipv6: use standard lists for FIB walks Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 +- net/ipv6/ip6_fib.c | 17 ++++------------- 2 files changed, 5 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 8f279ddb3593..86f46c49e318 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -124,7 +124,7 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) } struct fib6_walker_t { - struct fib6_walker_t *prev, *next; + struct list_head lh; struct fib6_node *root, *node; struct rt6_info *leaf; unsigned char state; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 77e122f53ea6..2f9847924fa5 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -93,29 +93,20 @@ static __u32 rt_sernum; static void fib6_gc_timer_cb(unsigned long arg); -static struct fib6_walker_t fib6_walker_list = { - .prev = &fib6_walker_list, - .next = &fib6_walker_list, -}; - -#define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next) +static LIST_HEAD(fib6_walkers); +#define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh) static inline void fib6_walker_link(struct fib6_walker_t *w) { write_lock_bh(&fib6_walker_lock); - w->next = fib6_walker_list.next; - w->prev = &fib6_walker_list; - w->next->prev = w; - w->prev->next = w; + list_add(&w->lh, &fib6_walkers); write_unlock_bh(&fib6_walker_lock); } static inline void fib6_walker_unlink(struct fib6_walker_t *w) { write_lock_bh(&fib6_walker_lock); - w->next->prev = w->prev; - w->prev->next = w->next; - w->prev = w->next = w; + list_del(&w->lh); write_unlock_bh(&fib6_walker_lock); } static __inline__ u32 fib6_new_sernum(void) -- cgit v1.2.3 From b54452b07a7b1b8cc1385edba3ef2ef6d4679d5a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 18 Feb 2010 08:14:31 +0000 Subject: const: struct nla_policy Make remaining netlink policies as const. Fixup coding style where needed. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netlink.h | 2 +- kernel/taskstats.c | 6 ++---- net/dcb/dcbnl.c | 16 ++++++++-------- net/irda/irnetlink.c | 2 +- net/wimax/op-msg.c | 3 +-- net/wimax/op-reset.c | 3 +-- net/wimax/op-rfkill.c | 3 +-- net/wimax/op-state-get.c | 3 +-- net/wimax/stack.c | 3 +-- net/wireless/nl80211.c | 14 +++++--------- 10 files changed, 22 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index a63b2192ac1c..f82e463c875a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -196,7 +196,7 @@ enum { * All other Exact length of attribute payload * * Example: - * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = { + * static const struct nla_policy my_policy[ATTR_MAX+1] = { * [ATTR_FOO] = { .type = NLA_U16 }, * [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ }, * [ATTR_BAZ] = { .len = sizeof(struct mystruct) }, diff --git a/kernel/taskstats.c b/kernel/taskstats.c index ea8384d3caa7..899ca51be5e8 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -46,15 +46,13 @@ static struct genl_family family = { .maxattr = TASKSTATS_CMD_ATTR_MAX, }; -static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] -__read_mostly = { +static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = { [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; -static struct nla_policy -cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] __read_mostly = { +static const struct nla_policy cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] = { [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 }, }; diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index db9f5b39388f..813e399220a7 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -54,7 +54,7 @@ MODULE_LICENSE("GPL"); /**************** DCB attribute policies *************************************/ /* DCB netlink attributes policy */ -static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { +static const struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { [DCB_ATTR_IFNAME] = {.type = NLA_NUL_STRING, .len = IFNAMSIZ - 1}, [DCB_ATTR_STATE] = {.type = NLA_U8}, [DCB_ATTR_PFC_CFG] = {.type = NLA_NESTED}, @@ -68,7 +68,7 @@ static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { }; /* DCB priority flow control to User Priority nested attributes */ -static struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = { +static const struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = { [DCB_PFC_UP_ATTR_0] = {.type = NLA_U8}, [DCB_PFC_UP_ATTR_1] = {.type = NLA_U8}, [DCB_PFC_UP_ATTR_2] = {.type = NLA_U8}, @@ -81,7 +81,7 @@ static struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = { }; /* DCB priority grouping nested attributes */ -static struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = { +static const struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = { [DCB_PG_ATTR_TC_0] = {.type = NLA_NESTED}, [DCB_PG_ATTR_TC_1] = {.type = NLA_NESTED}, [DCB_PG_ATTR_TC_2] = {.type = NLA_NESTED}, @@ -103,7 +103,7 @@ static struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = { }; /* DCB traffic class nested attributes. */ -static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = { +static const struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = { [DCB_TC_ATTR_PARAM_PGID] = {.type = NLA_U8}, [DCB_TC_ATTR_PARAM_UP_MAPPING] = {.type = NLA_U8}, [DCB_TC_ATTR_PARAM_STRICT_PRIO] = {.type = NLA_U8}, @@ -112,7 +112,7 @@ static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = { }; /* DCB capabilities nested attributes. */ -static struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = { +static const struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = { [DCB_CAP_ATTR_ALL] = {.type = NLA_FLAG}, [DCB_CAP_ATTR_PG] = {.type = NLA_U8}, [DCB_CAP_ATTR_PFC] = {.type = NLA_U8}, @@ -124,14 +124,14 @@ static struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = { }; /* DCB capabilities nested attributes. */ -static struct nla_policy dcbnl_numtcs_nest[DCB_NUMTCS_ATTR_MAX + 1] = { +static const struct nla_policy dcbnl_numtcs_nest[DCB_NUMTCS_ATTR_MAX + 1] = { [DCB_NUMTCS_ATTR_ALL] = {.type = NLA_FLAG}, [DCB_NUMTCS_ATTR_PG] = {.type = NLA_U8}, [DCB_NUMTCS_ATTR_PFC] = {.type = NLA_U8}, }; /* DCB BCN nested attributes. */ -static struct nla_policy dcbnl_bcn_nest[DCB_BCN_ATTR_MAX + 1] = { +static const struct nla_policy dcbnl_bcn_nest[DCB_BCN_ATTR_MAX + 1] = { [DCB_BCN_ATTR_RP_0] = {.type = NLA_U8}, [DCB_BCN_ATTR_RP_1] = {.type = NLA_U8}, [DCB_BCN_ATTR_RP_2] = {.type = NLA_U8}, @@ -160,7 +160,7 @@ static struct nla_policy dcbnl_bcn_nest[DCB_BCN_ATTR_MAX + 1] = { }; /* DCB APP nested attributes. */ -static struct nla_policy dcbnl_app_nest[DCB_APP_ATTR_MAX + 1] = { +static const struct nla_policy dcbnl_app_nest[DCB_APP_ATTR_MAX + 1] = { [DCB_APP_ATTR_IDTYPE] = {.type = NLA_U8}, [DCB_APP_ATTR_ID] = {.type = NLA_U16}, [DCB_APP_ATTR_PRIORITY] = {.type = NLA_U8}, diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index 476b307bd801..69b5b75f5431 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -124,7 +124,7 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) return ret; } -static struct nla_policy irda_nl_policy[IRDA_NL_ATTR_MAX + 1] = { +static const struct nla_policy irda_nl_policy[IRDA_NL_ATTR_MAX + 1] = { [IRDA_NL_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 }, [IRDA_NL_ATTR_MODE] = { .type = NLA_U32 }, diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c index d3bfb6ef13ae..7718657e93dc 100644 --- a/net/wimax/op-msg.c +++ b/net/wimax/op-msg.c @@ -320,8 +320,7 @@ int wimax_msg(struct wimax_dev *wimax_dev, const char *pipe_name, EXPORT_SYMBOL_GPL(wimax_msg); -static const -struct nla_policy wimax_gnl_msg_policy[WIMAX_GNL_ATTR_MAX + 1] = { +static const struct nla_policy wimax_gnl_msg_policy[WIMAX_GNL_ATTR_MAX + 1] = { [WIMAX_GNL_MSG_IFIDX] = { .type = NLA_U32, }, diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c index 35f370091f4f..4dc82a54ba30 100644 --- a/net/wimax/op-reset.c +++ b/net/wimax/op-reset.c @@ -91,8 +91,7 @@ int wimax_reset(struct wimax_dev *wimax_dev) EXPORT_SYMBOL(wimax_reset); -static const -struct nla_policy wimax_gnl_reset_policy[WIMAX_GNL_ATTR_MAX + 1] = { +static const struct nla_policy wimax_gnl_reset_policy[WIMAX_GNL_ATTR_MAX + 1] = { [WIMAX_GNL_RESET_IFIDX] = { .type = NLA_U32, }, diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c index ae752a64d920..e978c7136c97 100644 --- a/net/wimax/op-rfkill.c +++ b/net/wimax/op-rfkill.c @@ -410,8 +410,7 @@ void wimax_rfkill_rm(struct wimax_dev *wimax_dev) * just query). */ -static const -struct nla_policy wimax_gnl_rfkill_policy[WIMAX_GNL_ATTR_MAX + 1] = { +static const struct nla_policy wimax_gnl_rfkill_policy[WIMAX_GNL_ATTR_MAX + 1] = { [WIMAX_GNL_RFKILL_IFIDX] = { .type = NLA_U32, }, diff --git a/net/wimax/op-state-get.c b/net/wimax/op-state-get.c index a76b8fcb056d..11ad3356eb56 100644 --- a/net/wimax/op-state-get.c +++ b/net/wimax/op-state-get.c @@ -33,8 +33,7 @@ #include "debug-levels.h" -static const -struct nla_policy wimax_gnl_state_get_policy[WIMAX_GNL_ATTR_MAX + 1] = { +static const struct nla_policy wimax_gnl_state_get_policy[WIMAX_GNL_ATTR_MAX + 1] = { [WIMAX_GNL_STGET_IFIDX] = { .type = NLA_U32, }, diff --git a/net/wimax/stack.c b/net/wimax/stack.c index c8866412f830..813e1eaea29b 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -75,8 +75,7 @@ MODULE_PARM_DESC(debug, * close to where the data is generated. */ /* -static const -struct nla_policy wimax_gnl_re_status_change[WIMAX_GNL_ATTR_MAX + 1] = { +static const struct nla_policy wimax_gnl_re_status_change[WIMAX_GNL_ATTR_MAX + 1] = { [WIMAX_GNL_STCH_STATE_OLD] = { .type = NLA_U8 }, [WIMAX_GNL_STCH_STATE_NEW] = { .type = NLA_U8 }, }; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5b79ecf17bea..a001ea32cb7d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -58,7 +58,7 @@ static int get_rdev_dev_by_info_ifindex(struct genl_info *info, } /* policy for the attributes */ -static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { +static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING, .len = 20-1 }, @@ -148,8 +148,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { }; /* policy for the attributes */ -static struct nla_policy -nl80211_key_policy[NL80211_KEY_MAX + 1] __read_mostly = { +static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = { [NL80211_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN }, [NL80211_KEY_IDX] = { .type = NLA_U8 }, [NL80211_KEY_CIPHER] = { .type = NLA_U32 }, @@ -2501,8 +2500,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) return err; } -static const struct nla_policy - reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = { +static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = { [NL80211_ATTR_REG_RULE_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_FREQ_RANGE_START] = { .type = NLA_U32 }, [NL80211_ATTR_FREQ_RANGE_END] = { .type = NLA_U32 }, @@ -2671,8 +2669,7 @@ do {\ } \ } while (0);\ -static struct nla_policy -nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] __read_mostly = { +static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] = { [NL80211_MESHCONF_RETRY_TIMEOUT] = { .type = NLA_U16 }, [NL80211_MESHCONF_CONFIRM_TIMEOUT] = { .type = NLA_U16 }, [NL80211_MESHCONF_HOLDING_TIMEOUT] = { .type = NLA_U16 }, @@ -4470,8 +4467,7 @@ static u32 rateset_to_mask(struct ieee80211_supported_band *sband, return mask; } -static struct nla_policy -nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] __read_mostly = { +static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_RATES }, }; -- cgit v1.2.3 From 5aa4b32fc86408705337e941ed716880c63d1590 Mon Sep 17 00:00:00 2001 From: Andreas Petlund Date: Thu, 18 Feb 2010 02:45:45 +0000 Subject: net: TCP thin-stream detection Inline function to dynamically detect thin streams based on the number of packets in flight. Used to dynamically trigger thin-stream mechanisms if enabled by ioctl or sysctl. Signed-off-by: Andreas Petlund Signed-off-by: David S. Miller --- Documentation/networking/tcp-thin.txt | 47 +++++++++++++++++++++++++++++++++++ include/net/tcp.h | 8 ++++++ 2 files changed, 55 insertions(+) create mode 100644 Documentation/networking/tcp-thin.txt (limited to 'include/net') diff --git a/Documentation/networking/tcp-thin.txt b/Documentation/networking/tcp-thin.txt new file mode 100644 index 000000000000..151e229980f1 --- /dev/null +++ b/Documentation/networking/tcp-thin.txt @@ -0,0 +1,47 @@ +Thin-streams and TCP +==================== +A wide range of Internet-based services that use reliable transport +protocols display what we call thin-stream properties. This means +that the application sends data with such a low rate that the +retransmission mechanisms of the transport protocol are not fully +effective. In time-dependent scenarios (like online games, control +systems, stock trading etc.) where the user experience depends +on the data delivery latency, packet loss can be devastating for +the service quality. Extreme latencies are caused by TCP's +dependency on the arrival of new data from the application to trigger +retransmissions effectively through fast retransmit instead of +waiting for long timeouts. + +After analysing a large number of time-dependent interactive +applications, we have seen that they often produce thin streams +and also stay with this traffic pattern throughout its entire +lifespan. The combination of time-dependency and the fact that the +streams provoke high latencies when using TCP is unfortunate. + +In order to reduce application-layer latency when packets are lost, +a set of mechanisms has been made, which address these latency issues +for thin streams. In short, if the kernel detects a thin stream, +the retransmission mechanisms are modified in the following manner: + +1) If the stream is thin, fast retransmit on the first dupACK. +2) If the stream is thin, do not apply exponential backoff. + +These enhancements are applied only if the stream is detected as +thin. This is accomplished by defining a threshold for the number +of packets in flight. If there are less than 4 packets in flight, +fast retransmissions can not be triggered, and the stream is prone +to experience high retransmission latencies. + +Since these mechanisms are targeted at time-dependent applications, +they must be specifically activated by the application using the +TCP_THIN_LINEAR_TIMEOUTS and TCP_THIN_DUPACK IOCTLS or the +tcp_thin_linear_timeouts and tcp_thin_dupack sysctls. Both +modifications are turned off by default. + +References +========== +More information on the modifications, as well as a wide range of +experimental data can be found here: +"Improving latency for interactive, thin-stream applications over +reliable transport" +http://simula.no/research/nd/publications/Simula.nd.477/simula_pdf_file diff --git a/include/net/tcp.h b/include/net/tcp.h index 75a00c80bdda..0bdc3f640247 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1386,6 +1386,14 @@ static inline void tcp_highest_sack_combine(struct sock *sk, tcp_sk(sk)->highest_sack = new; } +/* Determines whether this is a thin stream (which may suffer from + * increased latency). Used to trigger latency-reducing mechanisms. + */ +static inline unsigned int tcp_stream_is_thin(struct tcp_sock *tp) +{ + return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp); +} + /* /proc */ enum tcp_seq_states { TCP_SEQ_STATE_LISTENING, -- cgit v1.2.3 From 36e31b0af58728071e8023cf8e20c5166b700717 Mon Sep 17 00:00:00 2001 From: Andreas Petlund Date: Thu, 18 Feb 2010 02:47:01 +0000 Subject: net: TCP thin linear timeouts This patch will make TCP use only linear timeouts if the stream is thin. This will help to avoid the very high latencies that thin stream suffer because of exponential backoff. This mechanism is only active if enabled by iocontrol or syscontrol and the stream is identified as thin. A maximum of 6 linear timeouts is tried before exponential backoff is resumed. Signed-off-by: Andreas Petlund Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 12 ++++++++++++ include/linux/tcp.h | 5 ++++- include/net/tcp.h | 4 ++++ net/ipv4/sysctl_net_ipv4.c | 7 +++++++ net/ipv4/tcp.c | 7 +++++++ net/ipv4/tcp_timer.c | 21 ++++++++++++++++++++- 6 files changed, 54 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 2dc7a1d97686..f147310d9af4 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -487,6 +487,18 @@ tcp_dma_copybreak - INTEGER and CONFIG_NET_DMA is enabled. Default: 4096 +tcp_thin_linear_timeouts - BOOLEAN + Enable dynamic triggering of linear timeouts for thin streams. + If set, a check is performed upon retransmission by timeout to + determine if the stream is thin (less than 4 packets in flight). + As long as the stream is found to be thin, up to 6 linear + timeouts may be performed before exponential backoff mode is + initiated. This improves retransmission latency for + non-aggressive thin streams, often found to be time-dependent. + For more information on thin streams, see + Documentation/networking/tcp-thin.txt + Default: 0 + UDP variables: udp_mem - vector of 3 INTEGERs: min, pressure, max diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 7fee8a4df931..3ba8b074612f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -103,6 +103,7 @@ enum { #define TCP_CONGESTION 13 /* Congestion control algorithm */ #define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ #define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ +#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ /* for TCP_INFO socket option */ #define TCPI_OPT_TIMESTAMPS 1 @@ -340,7 +341,9 @@ struct tcp_sock { u32 frto_highmark; /* snd_nxt when RTO occurred */ u16 advmss; /* Advertised MSS */ u8 frto_counter; /* Number of new acks after RTO */ - u8 nonagle; /* Disable Nagle algorithm? */ + u8 nonagle : 4,/* Disable Nagle algorithm? */ + thin_lto : 1,/* Use linear timeouts for thin streams */ + unused : 3; /* RTT measurement */ u32 srtt; /* smoothed round trip time << 3 */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 0bdc3f640247..6278fc734abd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -196,6 +196,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_NAGLE_CORK 2 /* Socket is corked */ #define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */ +/* TCP thin-stream limits */ +#define TCP_THIN_LINEAR_RETRIES 6 /* After 6 linear retries, do exp. backoff */ + extern struct inet_timewait_death_row tcp_death_row; /* sysctl variables for tcp */ @@ -241,6 +244,7 @@ extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; extern int sysctl_tcp_cookie_size; +extern int sysctl_tcp_thin_linear_timeouts; extern atomic_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7e3712ce3994..e6a2460587d4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -575,6 +575,13 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_thin_linear_timeouts", + .data = &sysctl_tcp_thin_linear_timeouts, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { .procname = "udp_mem", .data = &sysctl_udp_mem, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e471d037fcc9..21bae9afefea 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2229,6 +2229,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level, } break; + case TCP_THIN_LINEAR_TIMEOUTS: + if (val < 0 || val > 1) + err = -EINVAL; + else + tp->thin_lto = val; + break; + case TCP_CORK: /* When set indicates to always queue non-full frames. * Later the user clears this option and we transmit diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index de7d1bf9114f..a17629b8912e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -29,6 +29,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; int sysctl_tcp_orphan_retries __read_mostly; +int sysctl_tcp_thin_linear_timeouts __read_mostly; static void tcp_write_timer(unsigned long); static void tcp_delack_timer(unsigned long); @@ -415,7 +416,25 @@ void tcp_retransmit_timer(struct sock *sk) icsk->icsk_retransmits++; out_reset_timer: - icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); + /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is + * used to reset timer, set to 0. Recalculate 'icsk_rto' as this + * might be increased if the stream oscillates between thin and thick, + * thus the old value might already be too high compared to the value + * set by 'tcp_set_rto' in tcp_input.c which resets the rto without + * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating + * exponential backoff behaviour to avoid continue hammering + * linear-timeout retransmissions into a black hole + */ + if (sk->sk_state == TCP_ESTABLISHED && + (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && + tcp_stream_is_thin(tp) && + icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { + icsk->icsk_backoff = 0; + icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); + } else { + /* Use normal (exponential) backoff */ + icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); + } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) __sk_dst_reset(sk); -- cgit v1.2.3 From 7e38017557bc0b87434d184f8804cadb102bb903 Mon Sep 17 00:00:00 2001 From: Andreas Petlund Date: Thu, 18 Feb 2010 04:48:19 +0000 Subject: net: TCP thin dupack This patch enables fast retransmissions after one dupACK for TCP if the stream is identified as thin. This will reduce latencies for thin streams that are not able to trigger fast retransmissions due to high packet interarrival time. This mechanism is only active if enabled by iocontrol or syscontrol and the stream is identified as thin. Signed-off-by: Andreas Petlund Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 12 ++++++++++++ include/linux/tcp.h | 4 +++- include/net/tcp.h | 1 + net/ipv4/sysctl_net_ipv4.c | 7 +++++++ net/ipv4/tcp.c | 7 +++++++ net/ipv4/tcp_input.c | 12 ++++++++++++ 6 files changed, 42 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index f147310d9af4..2571a62d923e 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -499,6 +499,18 @@ tcp_thin_linear_timeouts - BOOLEAN Documentation/networking/tcp-thin.txt Default: 0 +tcp_thin_dupack - BOOLEAN + Enable dynamic triggering of retransmissions after one dupACK + for thin streams. If set, a check is performed upon reception + of a dupACK to determine if the stream is thin (less than 4 + packets in flight). As long as the stream is found to be thin, + data is retransmitted on the first received dupACK. This + improves retransmission latency for non-aggressive thin + streams, often found to be time-dependent. + For more information on thin streams, see + Documentation/networking/tcp-thin.txt + Default: 0 + UDP variables: udp_mem - vector of 3 INTEGERs: min, pressure, max diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3ba8b074612f..a778ee024590 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -104,6 +104,7 @@ enum { #define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ #define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ #define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ +#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ /* for TCP_INFO socket option */ #define TCPI_OPT_TIMESTAMPS 1 @@ -343,7 +344,8 @@ struct tcp_sock { u8 frto_counter; /* Number of new acks after RTO */ u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ - unused : 3; + thin_dupack : 1,/* Fast retransmit on first dupack */ + unused : 2; /* RTT measurement */ u32 srtt; /* smoothed round trip time << 3 */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 6278fc734abd..56f0aec40ed6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -245,6 +245,7 @@ extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; extern int sysctl_tcp_cookie_size; extern int sysctl_tcp_thin_linear_timeouts; +extern int sysctl_tcp_thin_dupack; extern atomic_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e6a2460587d4..c1bc074f61b7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -582,6 +582,13 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_thin_dupack", + .data = &sysctl_tcp_thin_dupack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { .procname = "udp_mem", .data = &sysctl_udp_mem, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 21bae9afefea..5901010fad55 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2236,6 +2236,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level, tp->thin_lto = val; break; + case TCP_THIN_DUPACK: + if (val < 0 || val > 1) + err = -EINVAL; + else + tp->thin_dupack = val; + break; + case TCP_CORK: /* When set indicates to always queue non-full frames. * Later the user clears this option and we transmit diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3fddc69ccccc..788851ca8c5d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_nometrics_save __read_mostly; +int sysctl_tcp_thin_dupack __read_mostly; + int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_abc __read_mostly; @@ -2447,6 +2449,16 @@ static int tcp_time_to_recover(struct sock *sk) return 1; } + /* If a thin stream is detected, retransmit after first + * received dupack. Employ only if SACK is supported in order + * to avoid possible corner-case series of spurious retransmissions + * Use only if there are no unsent data. + */ + if ((tp->thin_dupack || sysctl_tcp_thin_dupack) && + tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 && + tcp_is_sack(tp) && !tcp_send_head(sk)) + return 1; + return 0; } -- cgit v1.2.3 From ffb9eb3d8b450c22bbbc688c6b630141ac476fd9 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Wed, 17 Feb 2010 17:58:10 +0200 Subject: nl80211: add power save commands The most needed command from nl80211, which Wireless Extensions had, is support for power save mode. Add a simple command to make it possible to enable and disable power save via nl80211. I was also planning about extending the interface, for example adding the timeout value, but after thinking more about this I decided not to do it. Basically there were three reasons: Firstly, the parameters for power save are very much hardware dependent. Trying to find a unified interface which would work with all hardware, and still make sense to users, will be very difficult. Secondly, IEEE 802.11 power save implementation in Linux is still in state of flux. We have a long way to still to go and there is no way to predict what kind of implementation we will have after few years. And because we need to support nl80211 interface a long time, practically forever, adding now parameters to nl80211 might create maintenance problems later on. Third issue are the users. Power save parameters are mostly used for debugging, so debugfs is better, more flexible, interface for this. For example, wpa_supplicant currently doesn't configure anything related to power save mode. It's better to strive that kernel can automatically optimise the power save parameters, like with help of pm qos network and other traffic parameters. Later on, when we have better understanding of power save, we can extend this command with more features, if there's a need for that. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- include/linux/nl80211.h | 10 ++++ include/net/cfg80211.h | 7 +-- net/wireless/core.c | 16 +++--- net/wireless/nl80211.c | 131 +++++++++++++++++++++++++++++++++++++++++++++ net/wireless/wext-compat.c | 10 ++-- 5 files changed, 159 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 8e6384f8fda6..28ba20fda3e2 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -416,6 +416,9 @@ enum nl80211_commands { NL80211_CMD_ACTION, NL80211_CMD_ACTION_TX_STATUS, + NL80211_CMD_SET_POWER_SAVE, + NL80211_CMD_GET_POWER_SAVE, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -837,6 +840,8 @@ enum nl80211_attrs { NL80211_ATTR_ACK, + NL80211_ATTR_PS_STATE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1573,4 +1578,9 @@ enum nl80211_band { NL80211_BAND_5GHZ, }; +enum nl80211_ps_state { + NL80211_PS_DISABLED, + NL80211_PS_ENABLED, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7188934b64d3..3d134a1fb96b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1150,7 +1150,6 @@ struct cfg80211_ops { enum nl80211_channel_type channel_type, const u8 *buf, size_t len, u64 *cookie); - /* some temporary stuff to finish wext */ int (*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout); }; @@ -1489,6 +1488,9 @@ struct wireless_dev { struct cfg80211_internal_bss *auth_bsses[MAX_AUTH_BSSES]; struct cfg80211_internal_bss *current_bss; /* associated / joined */ + bool ps; + int ps_timeout; + #ifdef CONFIG_CFG80211_WEXT /* wext data */ struct { @@ -1500,8 +1502,7 @@ struct wireless_dev { u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; u8 ssid[IEEE80211_MAX_SSID_LEN]; s8 default_key, default_mgmt_key; - bool ps, prev_bssid_valid; - int ps_timeout; + bool prev_bssid_valid; } wext; #endif }; diff --git a/net/wireless/core.c b/net/wireless/core.c index 51908dc2ea00..7fdb9409ad2a 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -698,19 +698,21 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, wdev->wext.default_key = -1; wdev->wext.default_mgmt_key = -1; wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; +#endif + if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT) - wdev->wext.ps = true; + wdev->ps = true; else - wdev->wext.ps = false; - wdev->wext.ps_timeout = 100; + wdev->ps = false; + wdev->ps_timeout = 100; if (rdev->ops->set_power_mgmt) if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, - wdev->wext.ps, - wdev->wext.ps_timeout)) { + wdev->ps, + wdev->ps_timeout)) { /* assume this means it's off */ - wdev->wext.ps = false; + wdev->ps = false; } -#endif + if (!dev->ethtool_ops) dev->ethtool_ops = &cfg80211_ethtool_ops; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 328112081358..b0495a1da22e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -148,6 +148,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_FRAME] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, [NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, }, + [NL80211_ATTR_PS_STATE] = { .type = NLA_U32 }, }; /* policy for the attributes */ @@ -4663,6 +4664,124 @@ unlock_rtnl: return err; } +static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; + struct net_device *dev; + u8 ps_state; + bool state; + int err; + + if (!info->attrs[NL80211_ATTR_PS_STATE]) { + err = -EINVAL; + goto out; + } + + ps_state = nla_get_u32(info->attrs[NL80211_ATTR_PS_STATE]); + + if (ps_state != NL80211_PS_DISABLED && ps_state != NL80211_PS_ENABLED) { + err = -EINVAL; + goto out; + } + + rtnl_lock(); + + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); + if (err) + goto unlock_rdev; + + wdev = dev->ieee80211_ptr; + + if (!rdev->ops->set_power_mgmt) { + err = -EOPNOTSUPP; + goto unlock_rdev; + } + + state = (ps_state == NL80211_PS_ENABLED) ? true : false; + + if (state == wdev->ps) + goto unlock_rdev; + + wdev->ps = state; + + if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, wdev->ps, + wdev->ps_timeout)) + /* assume this means it's off */ + wdev->ps = false; + +unlock_rdev: + cfg80211_unlock_rdev(rdev); + dev_put(dev); + rtnl_unlock(); + +out: + return err; +} + +static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + enum nl80211_ps_state ps_state; + struct wireless_dev *wdev; + struct net_device *dev; + struct sk_buff *msg; + void *hdr; + int err; + + rtnl_lock(); + + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); + if (err) + goto unlock_rtnl; + + wdev = dev->ieee80211_ptr; + + if (!rdev->ops->set_power_mgmt) { + err = -EOPNOTSUPP; + goto out; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + err = -ENOMEM; + goto out; + } + + hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + NL80211_CMD_GET_POWER_SAVE); + if (!hdr) { + err = -ENOMEM; + goto free_msg; + } + + if (wdev->ps) + ps_state = NL80211_PS_ENABLED; + else + ps_state = NL80211_PS_DISABLED; + + NLA_PUT_U32(msg, NL80211_ATTR_PS_STATE, ps_state); + + genlmsg_end(msg, hdr); + err = genlmsg_reply(msg, info); + goto out; + +nla_put_failure: + err = -ENOBUFS; + +free_msg: + nlmsg_free(msg); + +out: + cfg80211_unlock_rdev(rdev); + dev_put(dev); + +unlock_rtnl: + rtnl_unlock(); + + return err; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -4955,6 +5074,18 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_SET_POWER_SAVE, + .doit = nl80211_set_power_save, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_GET_POWER_SAVE, + .doit = nl80211_get_power_save, + .policy = nl80211_policy, + /* can be retrieved by unprivileged users */ + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index b17eeae448d5..9ab51838849e 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1099,8 +1099,8 @@ int cfg80211_wext_siwpower(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - bool ps = wdev->wext.ps; - int timeout = wdev->wext.ps_timeout; + bool ps = wdev->ps; + int timeout = wdev->ps_timeout; int err; if (wdev->iftype != NL80211_IFTYPE_STATION) @@ -1133,8 +1133,8 @@ int cfg80211_wext_siwpower(struct net_device *dev, if (err) return err; - wdev->wext.ps = ps; - wdev->wext.ps_timeout = timeout; + wdev->ps = ps; + wdev->ps_timeout = timeout; return 0; @@ -1147,7 +1147,7 @@ int cfg80211_wext_giwpower(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; - wrq->disabled = !wdev->wext.ps; + wrq->disabled = !wdev->ps; return 0; } -- cgit v1.2.3 From 808f5114a9206fee855117d416440e1071ab375c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 22 Feb 2010 07:57:18 +0000 Subject: packet: convert socket list to RCU (v3) Convert AF_PACKET to use RCU, eliminating one more reader/writer lock. There is no need for a real sk_del_node_init_rcu(), because sk_del_node_init is doing the equivalent thing to hlst_del_init_rcu already; but added some comments to try and make that obvious. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/netns/packet.h | 4 +-- include/net/sock.h | 10 ++++++++ net/packet/af_packet.c | 62 +++++++++++++++++++++++----------------------- 3 files changed, 43 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/packet.h b/include/net/netns/packet.h index 637daf698884..cb4e894c0f8d 100644 --- a/include/net/netns/packet.h +++ b/include/net/netns/packet.h @@ -4,11 +4,11 @@ #ifndef __NETNS_PACKET_H__ #define __NETNS_PACKET_H__ -#include +#include #include struct netns_packet { - rwlock_t sklist_lock; + spinlock_t sklist_lock; struct hlist_head sklist; }; diff --git a/include/net/sock.h b/include/net/sock.h index 580d51fa28e9..6cb1676e409a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -381,6 +381,7 @@ static __inline__ void __sk_del_node(struct sock *sk) __hlist_del(&sk->sk_node); } +/* NB: equivalent to hlist_del_init_rcu */ static __inline__ int __sk_del_node_init(struct sock *sk) { if (sk_hashed(sk)) { @@ -421,6 +422,7 @@ static __inline__ int sk_del_node_init(struct sock *sk) } return rc; } +#define sk_del_node_init_rcu(sk) sk_del_node_init(sk) static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk) { @@ -454,6 +456,12 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) __sk_add_node(sk, list); } +static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +{ + sock_hold(sk); + hlist_add_head_rcu(&sk->sk_node, list); +} + static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); @@ -478,6 +486,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk, #define sk_for_each(__sk, node, list) \ hlist_for_each_entry(__sk, node, list, sk_node) +#define sk_for_each_rcu(__sk, node, list) \ + hlist_for_each_entry_rcu(__sk, node, list, sk_node) #define sk_nulls_for_each(__sk, node, list) \ hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) #define sk_nulls_for_each_rcu(__sk, node, list) \ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 10f7295bcefb..2f0369367ee0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1262,24 +1262,22 @@ static int packet_release(struct socket *sock) net = sock_net(sk); po = pkt_sk(sk); - write_lock_bh(&net->packet.sklist_lock); - sk_del_node_init(sk); + spin_lock_bh(&net->packet.sklist_lock); + sk_del_node_init_rcu(sk); sock_prot_inuse_add(net, sk->sk_prot, -1); - write_unlock_bh(&net->packet.sklist_lock); - - /* - * Unhook packet receive handler. - */ + spin_unlock_bh(&net->packet.sklist_lock); + spin_lock(&po->bind_lock); if (po->running) { /* - * Remove the protocol hook + * Remove from protocol table */ - dev_remove_pack(&po->prot_hook); po->running = 0; po->num = 0; + __dev_remove_pack(&po->prot_hook); __sock_put(sk); } + spin_unlock(&po->bind_lock); packet_flush_mclist(sk); @@ -1291,10 +1289,10 @@ static int packet_release(struct socket *sock) if (po->tx_ring.pg_vec) packet_set_ring(sk, &req, 1, 1); + synchronize_net(); /* * Now the socket is dead. No more input will appear. */ - sock_orphan(sk); sock->sk = NULL; @@ -1478,10 +1476,11 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po->running = 1; } - write_lock_bh(&net->packet.sklist_lock); - sk_add_node(sk, &net->packet.sklist); + spin_lock_bh(&net->packet.sklist_lock); + sk_add_node_rcu(sk, &net->packet.sklist); sock_prot_inuse_add(net, &packet_proto, 1); - write_unlock_bh(&net->packet.sklist_lock); + spin_unlock_bh(&net->packet.sklist_lock); + return 0; out: return err; @@ -2075,8 +2074,8 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void struct net_device *dev = data; struct net *net = dev_net(dev); - read_lock(&net->packet.sklist_lock); - sk_for_each(sk, node, &net->packet.sklist) { + rcu_read_lock(); + sk_for_each_rcu(sk, node, &net->packet.sklist) { struct packet_sock *po = pkt_sk(sk); switch (msg) { @@ -2104,18 +2103,19 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void } break; case NETDEV_UP: - spin_lock(&po->bind_lock); - if (dev->ifindex == po->ifindex && po->num && - !po->running) { - dev_add_pack(&po->prot_hook); - sock_hold(sk); - po->running = 1; + if (dev->ifindex == po->ifindex) { + spin_lock(&po->bind_lock); + if (po->num && !po->running) { + dev_add_pack(&po->prot_hook); + sock_hold(sk); + po->running = 1; + } + spin_unlock(&po->bind_lock); } - spin_unlock(&po->bind_lock); break; } } - read_unlock(&net->packet.sklist_lock); + rcu_read_unlock(); return NOTIFY_DONE; } @@ -2512,24 +2512,24 @@ static struct notifier_block packet_netdev_notifier = { #ifdef CONFIG_PROC_FS static void *packet_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(seq_file_net(seq)->packet.sklist_lock) + __acquires(RCU) { struct net *net = seq_file_net(seq); - read_lock(&net->packet.sklist_lock); - return seq_hlist_start_head(&net->packet.sklist, *pos); + + rcu_read_lock(); + return seq_hlist_start_head_rcu(&net->packet.sklist, *pos); } static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct net *net = seq_file_net(seq); - return seq_hlist_next(v, &net->packet.sklist, pos); + return seq_hlist_next_rcu(v, &net->packet.sklist, pos); } static void packet_seq_stop(struct seq_file *seq, void *v) - __releases(seq_file_net(seq)->packet.sklist_lock) + __releases(RCU) { - struct net *net = seq_file_net(seq); - read_unlock(&net->packet.sklist_lock); + rcu_read_unlock(); } static int packet_seq_show(struct seq_file *seq, void *v) @@ -2581,7 +2581,7 @@ static const struct file_operations packet_seq_fops = { static int __net_init packet_net_init(struct net *net) { - rwlock_init(&net->packet.sklist_lock); + spin_lock_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) -- cgit v1.2.3 From bf825f81b454fae2ffe1b675f3a549656726440e Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 22 Feb 2010 11:32:54 +0000 Subject: xfrm: introduce basic mark infrastructure Add basic structuring and accessors for xfrm mark Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/xfrm.h | 12 +++++++++--- include/net/xfrm.h | 22 ++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 29e04beb1fc9..b971e3848493 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -267,8 +267,8 @@ enum xfrm_attr_type_t { XFRMA_ALG_COMP, /* struct xfrm_algo */ XFRMA_ENCAP, /* struct xfrm_algo + struct xfrm_encap_tmpl */ XFRMA_TMPL, /* 1 or more struct xfrm_user_tmpl */ - XFRMA_SA, - XFRMA_POLICY, + XFRMA_SA, /* struct xfrm_usersa_info */ + XFRMA_POLICY, /*struct xfrm_userpolicy_info */ XFRMA_SEC_CTX, /* struct xfrm_sec_ctx */ XFRMA_LTIME_VAL, XFRMA_REPLAY_VAL, @@ -276,17 +276,23 @@ enum xfrm_attr_type_t { XFRMA_ETIMER_THRESH, XFRMA_SRCADDR, /* xfrm_address_t */ XFRMA_COADDR, /* xfrm_address_t */ - XFRMA_LASTUSED, + XFRMA_LASTUSED, /* unsigned long */ XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */ XFRMA_MIGRATE, XFRMA_ALG_AEAD, /* struct xfrm_algo_aead */ XFRMA_KMADDRESS, /* struct xfrm_user_kmaddress */ XFRMA_ALG_AUTH_TRUNC, /* struct xfrm_algo_auth */ + XFRMA_MARK, /* struct xfrm_mark */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) }; +struct xfrm_mark { + __u32 v; /* value */ + __u32 m; /* mask */ +}; + enum xfrm_sadattr_type_t { XFRMA_SAD_UNSPEC, XFRMA_SAD_CNT, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0beb413c01c4..39f151c7f251 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -140,6 +140,7 @@ struct xfrm_state { struct xfrm_id id; struct xfrm_selector sel; + struct xfrm_mark mark; u32 genid; @@ -481,6 +482,7 @@ struct xfrm_policy { u32 priority; u32 index; + struct xfrm_mark mark; struct xfrm_selector selector; struct xfrm_lifetime_cfg lft; struct xfrm_lifetime_cur curlft; @@ -1570,4 +1572,24 @@ static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) } #endif +static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m) +{ + if (attrs[XFRMA_MARK]) + memcpy(m, nla_data(attrs[XFRMA_MARK]), sizeof(m)); + else + m->v = m->m = 0; + + return m->v & m->m; +} + +static inline int xfrm_mark_put(struct sk_buff *skb, struct xfrm_mark *m) +{ + if (m->m | m->v) + NLA_PUT(skb, XFRMA_MARK, sizeof(struct xfrm_mark), m); + return 0; + +nla_put_failure: + return -1; +} + #endif /* _NET_XFRM_H */ -- cgit v1.2.3 From bd55775c8dd656fc69b3a42a1c4ab32abb7e8af9 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 22 Feb 2010 16:20:22 -0800 Subject: xfrm: SA lookups signature with mark pass mark to all SA lookups to prepare them for when we add code to have them search. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/xfrm.h | 23 ++++++++++++++------ net/core/pktgen.c | 3 ++- net/ipv4/ah4.c | 2 +- net/ipv4/esp4.c | 2 +- net/ipv4/ipcomp.c | 6 ++++-- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 2 +- net/ipv6/ipcomp6.c | 6 ++++-- net/ipv6/xfrm6_input.c | 2 +- net/key/af_key.c | 14 ++++++------ net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_state.c | 58 +++++++++++++++++++++++++++++--------------------- net/xfrm/xfrm_user.c | 17 +++++++++------ 13 files changed, 84 insertions(+), 55 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 39f151c7f251..693523c870b9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1319,7 +1319,7 @@ extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family); -extern struct xfrm_state * xfrm_stateonly_find(struct net *net, +extern struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, @@ -1328,8 +1328,14 @@ extern int xfrm_state_check_expire(struct xfrm_state *x); extern void xfrm_state_insert(struct xfrm_state *x); extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); -extern struct xfrm_state *xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family); -extern struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); +extern struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark, + xfrm_address_t *daddr, __be32 spi, + u8 proto, unsigned short family); +extern struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark, + xfrm_address_t *daddr, + xfrm_address_t *saddr, + u8 proto, + unsigned short family); #ifdef CONFIG_XFRM_SUB_POLICY extern int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, unsigned short family); @@ -1366,7 +1372,8 @@ struct xfrmk_spdinfo { u32 spdhmcnt; }; -extern struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq); +extern struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, + u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info); extern void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); @@ -1451,9 +1458,11 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u8, int dir, u32 id, int d int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info); u32 xfrm_get_acqseq(void); extern int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); -struct xfrm_state * xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create, unsigned short family); +struct xfrm_state *xfrm_find_acq(struct net *net, struct xfrm_mark *mark, + u8 mode, u32 reqid, u8 proto, + xfrm_address_t *daddr, + xfrm_address_t *saddr, int create, + unsigned short family); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, struct flowi *fl, int family, int strict); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 2e692afdc55d..43923811bd6a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2188,12 +2188,13 @@ static inline int f_pick(struct pktgen_dev *pkt_dev) /* If there was already an IPSEC SA, we keep it as is, else * we go look for it ... */ +#define DUMMY_MARK 0 static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) { struct xfrm_state *x = pkt_dev->flows[flow].x; if (!x) { /*slow path: we dont already have xfrm_state*/ - x = xfrm_stateonly_find(&init_net, + x = xfrm_stateonly_find(&init_net, DUMMY_MARK, (xfrm_address_t *)&pkt_dev->cur_daddr, (xfrm_address_t *)&pkt_dev->cur_saddr, AF_INET, diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 7ed3e4ae93ae..987b47dc69ad 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -393,7 +393,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); + x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1948895beb6d..14ca1f1c3fb0 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -422,7 +422,7 @@ static void esp4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); + x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); if (!x) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 83ed71500898..629067571f02 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -36,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) return; spi = htonl(ntohs(ipch->cpi)); - x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, + x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET); if (!x) return; @@ -63,6 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->props.mode = x->props.mode; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; + memcpy(&t->mark, &x->mark, sizeof(t->mark)); if (xfrm_init_state(t)) goto error; @@ -87,8 +88,9 @@ static int ipcomp_tunnel_attach(struct xfrm_state *x) struct net *net = xs_net(x); int err = 0; struct xfrm_state *t; + u32 mark = x->mark.v & x->mark.m; - t = xfrm_state_lookup(net, (xfrm_address_t *)&x->id.daddr.a4, + t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr.a4, x->props.saddr.a4, IPPROTO_IPIP, AF_INET); if (!t) { t = ipcomp_tunnel_create(x); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index c2f300c314be..5ac89025f9de 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -614,7 +614,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); + x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) return; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 668a46b655e6..ee9b93bdd6a2 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -365,7 +365,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); + x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n", diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index bb42f39c1db8..85cccd6ed0b7 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -64,7 +64,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; spi = htonl(ntohs(ipcomph->cpi)); - x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); + x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); if (!x) return; @@ -92,6 +92,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) t->props.family = AF_INET6; t->props.mode = x->props.mode; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); + memcpy(&t->mark, &x->mark, sizeof(t->mark)); if (xfrm_init_state(t)) goto error; @@ -114,10 +115,11 @@ static int ipcomp6_tunnel_attach(struct xfrm_state *x) int err = 0; struct xfrm_state *t = NULL; __be32 spi; + u32 mark = x->mark.m & x->mark.v; spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&x->props.saddr); if (spi) - t = xfrm_state_lookup(net, (xfrm_address_t *)&x->id.daddr, + t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr, spi, IPPROTO_IPV6, AF_INET6); if (!t) { t = ipcomp6_tunnel_create(x); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 9084582d236b..2bc98ede1235 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -101,7 +101,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, break; } - x = xfrm_state_lookup_byaddr(net, dst, src, proto, AF_INET6); + x = xfrm_state_lookup_byaddr(net, skb->mark, dst, src, proto, AF_INET6); if (!x) continue; diff --git a/net/key/af_key.c b/net/key/af_key.c index da2fe5f57619..aae3cd86ccd7 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -43,6 +43,8 @@ struct netns_pfkey { }; static DEFINE_MUTEX(pfkey_mutex); +#define DUMMY_MARK 0 +static struct xfrm_mark dummy_mark = {0, 0}; struct pfkey_sock { /* struct sock must be the first member of struct pfkey_sock */ struct sock sk; @@ -647,7 +649,7 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, struct sadb_ if (!xaddr) return NULL; - return xfrm_state_lookup(net, xaddr, sa->sadb_sa_spi, proto, family); + return xfrm_state_lookup(net, DUMMY_MARK, xaddr, sa->sadb_sa_spi, proto, family); } #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1))) @@ -1316,7 +1318,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h } if (hdr->sadb_msg_seq) { - x = xfrm_find_acq_byseq(net, hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq); if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) { xfrm_state_put(x); x = NULL; @@ -1324,7 +1326,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h } if (!x) - x = xfrm_find_acq(net, mode, reqid, proto, xdaddr, xsaddr, 1, family); + x = xfrm_find_acq(net, &dummy_mark, mode, reqid, proto, xdaddr, xsaddr, 1, family); if (x == NULL) return -ENOENT; @@ -1373,7 +1375,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg * if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0) return 0; - x = xfrm_find_acq_byseq(net, hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq); if (x == NULL) return 0; @@ -2572,8 +2574,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return -EINVAL; delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2); - xp = xfrm_policy_byid(net, XFRM_POLICY_TYPE_MAIN, dir, - pol->sadb_x_policy_id, delete, &err); + xp = xfrm_policy_byid(net, XFRM_POLICY_TYPE_MAIN, + dir, pol->sadb_x_policy_id, delete, &err); if (xp == NULL) return -ENOENT; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index e0009c17d809..45f1c98d4fce 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -152,7 +152,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } - x = xfrm_state_lookup(net, daddr, spi, nexthdr, family); + x = xfrm_state_lookup(net, skb->mark, daddr, spi, nexthdr, family); if (x == NULL) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound(skb, family, spi, seq); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9fa3322b2a7d..9f8530356b86 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -669,7 +669,7 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, return 0; } -static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; @@ -689,7 +689,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *d return NULL; } -static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; @@ -713,12 +713,14 @@ static inline struct xfrm_state * __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) { struct net *net = xs_net(x); + u32 mark = x->mark.v & x->mark.m; if (use_spi) - return __xfrm_state_lookup(net, &x->id.daddr, x->id.spi, - x->id.proto, family); + return __xfrm_state_lookup(net, mark, &x->id.daddr, + x->id.spi, x->id.proto, family); else - return __xfrm_state_lookup_byaddr(net, &x->id.daddr, + return __xfrm_state_lookup_byaddr(net, mark, + &x->id.daddr, &x->props.saddr, x->id.proto, family); } @@ -783,6 +785,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, int acquire_in_progress = 0; int error = 0; struct xfrm_state *best = NULL; + u32 mark = pol->mark.v & pol->mark.m; to_put = NULL; @@ -819,7 +822,7 @@ found: x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = __xfrm_state_lookup(net, daddr, tmpl->id.spi, + (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi, tmpl->id.proto, family)) != NULL) { to_put = x0; error = -EEXIST; @@ -833,6 +836,7 @@ found: /* Initialize temporary selector matching only * to current session. */ xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + memcpy(&x->mark, &pol->mark, sizeof(x->mark)); error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); if (error) { @@ -875,7 +879,7 @@ out: } struct xfrm_state * -xfrm_stateonly_find(struct net *net, +xfrm_stateonly_find(struct net *net, u32 mark, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, u8 mode, u8 proto, u32 reqid) { @@ -971,7 +975,7 @@ void xfrm_state_insert(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_insert); /* xfrm_state_lock is held */ -static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) +static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); struct hlist_node *entry; @@ -1026,6 +1030,8 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family x->props.family = family; x->props.mode = mode; x->props.reqid = reqid; + x->mark.v = m->v; + x->mark.m = m->m; x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; xfrm_state_hold(x); tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); @@ -1042,7 +1048,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family return x; } -static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq); +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); int xfrm_state_add(struct xfrm_state *x) { @@ -1050,6 +1056,7 @@ int xfrm_state_add(struct xfrm_state *x) struct xfrm_state *x1, *to_put; int family; int err; + u32 mark = x->mark.v & x->mark.m; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; @@ -1067,7 +1074,7 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && x->km.seq) { - x1 = __xfrm_find_acq_byseq(net, x->km.seq); + x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq); if (x1 && ((x1->id.proto != x->id.proto) || xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) { to_put = x1; @@ -1076,8 +1083,8 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && !x1) - x1 = __find_acq_core(net, family, x->props.mode, x->props.reqid, - x->id.proto, + x1 = __find_acq_core(net, &x->mark, family, x->props.mode, + x->props.reqid, x->id.proto, &x->id.daddr, &x->props.saddr, 0); __xfrm_state_bump_genids(x); @@ -1151,6 +1158,8 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) goto error; } + memcpy(&x->mark, &orig->mark, sizeof(x->mark)); + err = xfrm_init_state(x); if (err) goto error; @@ -1342,41 +1351,41 @@ int xfrm_state_check_expire(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_check_expire); struct xfrm_state * -xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, - unsigned short family) +xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, + u8 proto, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup(net, daddr, spi, proto, family); + x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); spin_unlock_bh(&xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup); struct xfrm_state * -xfrm_state_lookup_byaddr(struct net *net, +xfrm_state_lookup_byaddr(struct net *net, u32 mark, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup_byaddr(net, daddr, saddr, proto, family); + x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family); spin_unlock_bh(&xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup_byaddr); struct xfrm_state * -xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto, +xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __find_acq_core(net, family, mode, reqid, proto, daddr, saddr, create); + x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create); spin_unlock_bh(&xfrm_state_lock); return x; @@ -1423,7 +1432,7 @@ EXPORT_SYMBOL(xfrm_state_sort); /* Silly enough, but I'm lazy to build resolution list */ -static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq) +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) { int i; @@ -1442,12 +1451,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq) return NULL; } -struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq) +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_find_acq_byseq(net, seq); + x = __xfrm_find_acq_byseq(net, mark, seq); spin_unlock_bh(&xfrm_state_lock); return x; } @@ -1474,6 +1483,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) int err = -ENOENT; __be32 minspi = htonl(low); __be32 maxspi = htonl(high); + u32 mark = x->mark.v & x->mark.m; spin_lock_bh(&x->lock); if (x->km.state == XFRM_STATE_DEAD) @@ -1486,7 +1496,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) err = -ENOENT; if (minspi == maxspi) { - x0 = xfrm_state_lookup(net, &x->id.daddr, minspi, x->id.proto, x->props.family); + x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family); if (x0) { xfrm_state_put(x0); goto unlock; @@ -1496,7 +1506,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) u32 spi = 0; for (h=0; hid.daddr, htonl(spi), x->id.proto, x->props.family); + x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); if (x0 == NULL) { x->id.spi = htonl(spi); break; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ee04e6bf0e54..331ae731080a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -31,6 +31,9 @@ #include #endif +#define DUMMY_MARK 0 +static struct xfrm_mark dummy_mark = {0, 0}; + static inline int aead_len(struct xfrm_algo_aead *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); @@ -530,7 +533,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct net *net, if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { err = -ESRCH; - x = xfrm_state_lookup(net, &p->daddr, p->spi, p->proto, p->family); + x = xfrm_state_lookup(net, DUMMY_MARK, &p->daddr, p->spi, p->proto, p->family); } else { xfrm_address_t *saddr = NULL; @@ -541,7 +544,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct net *net, } err = -ESRCH; - x = xfrm_state_lookup_byaddr(net, &p->daddr, saddr, + x = xfrm_state_lookup_byaddr(net, DUMMY_MARK, &p->daddr, saddr, p->proto, p->family); } @@ -958,7 +961,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, x = NULL; if (p->info.seq) { - x = xfrm_find_acq_byseq(net, p->info.seq); + x = xfrm_find_acq_byseq(net, DUMMY_MARK, p->info.seq); if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) { xfrm_state_put(x); x = NULL; @@ -966,7 +969,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, } if (!x) - x = xfrm_find_acq(net, p->info.mode, p->info.reqid, + x = xfrm_find_acq(net, &dummy_mark, p->info.mode, p->info.reqid, p->info.id.proto, daddr, &p->info.saddr, 1, family); @@ -1598,7 +1601,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (r_skb == NULL) return -ENOMEM; - x = xfrm_state_lookup(net, &id->daddr, id->spi, id->proto, id->family); + x = xfrm_state_lookup(net, DUMMY_MARK, &id->daddr, id->spi, id->proto, id->family); if (x == NULL) { kfree_skb(r_skb); return -ESRCH; @@ -1640,7 +1643,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (!(nlh->nlmsg_flags&NLM_F_REPLACE)) return err; - x = xfrm_state_lookup(net, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); + x = xfrm_state_lookup(net, DUMMY_MARK, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); if (x == NULL) return -ESRCH; @@ -1767,7 +1770,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_user_expire *ue = nlmsg_data(nlh); struct xfrm_usersa_info *p = &ue->state; - x = xfrm_state_lookup(net, &p->id.daddr, p->id.spi, p->id.proto, p->family); + x = xfrm_state_lookup(net, DUMMY_MARK, &p->id.daddr, p->id.spi, p->id.proto, p->family); err = -ENOENT; if (x == NULL) -- cgit v1.2.3 From 8ca2e93b557f2a0b35f7769038abf600177e1122 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 22 Feb 2010 11:32:57 +0000 Subject: xfrm: SP lookups signature with mark pass mark to all SP lookups to prepare them for when we add code to have them search. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/xfrm.h | 5 +++-- net/key/af_key.c | 4 ++-- net/xfrm/xfrm_policy.c | 8 ++++---- net/xfrm/xfrm_user.c | 10 +++++----- 4 files changed, 14 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 693523c870b9..a7df3275b860 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1450,11 +1450,12 @@ extern int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *); extern void xfrm_policy_walk_done(struct xfrm_policy_walk *walk); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, + u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err); -struct xfrm_policy *xfrm_policy_byid(struct net *net, u8, int dir, u32 id, int delete, int *err); +struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir, u32 id, int delete, int *err); int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info); u32 xfrm_get_acqseq(void); extern int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); diff --git a/net/key/af_key.c b/net/key/af_key.c index aae3cd86ccd7..368707882647 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2326,7 +2326,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg return err; } - xp = xfrm_policy_bysel_ctx(net, XFRM_POLICY_TYPE_MAIN, + xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir - 1, &sel, pol_ctx, 1, &err); security_xfrm_policy_free(pol_ctx); @@ -2574,7 +2574,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return -EINVAL; delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2); - xp = xfrm_policy_byid(net, XFRM_POLICY_TYPE_MAIN, + xp = xfrm_policy_byid(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, delete, &err); if (xp == NULL) return -ENOENT; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d6eb16d75243..e67d3ca6e657 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -635,8 +635,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir, - struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, + int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err) { @@ -676,8 +676,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir, } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(struct net *net, u8 type, int dir, u32 id, - int delete, int *err) +struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, + int dir, u32 id, int delete, int *err) { struct xfrm_policy *pol, *ret; struct hlist_head *chain; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 331ae731080a..02a67b4a64dd 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1457,7 +1457,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, return err; if (p->index) - xp = xfrm_policy_byid(net, type, p->dir, p->index, delete, &err); + xp = xfrm_policy_byid(net, DUMMY_MARK, type, p->dir, p->index, delete, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -1474,8 +1474,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(net, type, p->dir, &p->sel, ctx, - delete, &err); + xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, type, p->dir, + &p->sel, ctx, delete, &err); security_xfrm_policy_free(ctx); } if (xp == NULL) @@ -1712,7 +1712,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, return err; if (p->index) - xp = xfrm_policy_byid(net, type, p->dir, p->index, 0, &err); + xp = xfrm_policy_byid(net, DUMMY_MARK, type, p->dir, p->index, 0, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -1729,7 +1729,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(net, type, p->dir, &p->sel, ctx, 0, &err); + xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, type, p->dir, &p->sel, ctx, 0, &err); security_xfrm_policy_free(ctx); } if (xp == NULL) -- cgit v1.2.3 From a898def29e4119bc01ebe7ca97423181f4c0ea2d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:04:49 -0800 Subject: net: Add checking to rcu_dereference() primitives Update rcu_dereference() primitives to use new lockdep-based checking. The rcu_dereference() in __in6_dev_get() may be protected either by rcu_read_lock() or RTNL, per Eric Dumazet. The rcu_dereference() in __sk_free() is protected by the fact that it is never reached if an update could change it. Check for this by using rcu_dereference_check() to verify that the struct sock's ->sk_wmem_alloc counter is zero. Acked-by: Eric Dumazet Acked-by: David S. Miller Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-5-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/rtnetlink.h | 3 +++ include/net/addrconf.h | 4 +++- net/core/dev.c | 2 +- net/core/filter.c | 6 +++--- net/core/rtnetlink.c | 8 ++++++++ net/core/sock.c | 3 ++- net/decnet/dn_route.c | 14 +++++++------- net/ipv4/route.c | 14 +++++++------- net/packet/af_packet.c | 2 +- 9 files changed, 35 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 05330fc5b436..5c52fa43785c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -735,6 +735,9 @@ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); +#ifdef CONFIG_PROVE_LOCKING +extern int lockdep_rtnl_is_held(void); +#endif /* #ifdef CONFIG_PROVE_LOCKING */ extern void rtnetlink_init(void); extern void __rtnl_unlock(void); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 0f7c37825fc1..45375b41a2a0 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -177,7 +177,9 @@ extern int unregister_inet6addr_notifier(struct notifier_block *nb); static inline struct inet6_dev * __in6_dev_get(struct net_device *dev) { - return rcu_dereference(dev->ip6_ptr); + return rcu_dereference_check(dev->ip6_ptr, + rcu_read_lock_held() || + lockdep_rtnl_is_held()); } static inline struct inet6_dev * diff --git a/net/core/dev.c b/net/core/dev.c index ec874218b206..bb1f1da2b8a7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2041,7 +2041,7 @@ gso: rcu_read_lock_bh(); txq = dev_pick_tx(dev, skb); - q = rcu_dereference(txq->qdisc); + q = rcu_dereference_bh(txq->qdisc); #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); diff --git a/net/core/filter.c b/net/core/filter.c index 08db7b9143a3..3541aa48d21d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -86,7 +86,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) return err; rcu_read_lock_bh(); - filter = rcu_dereference(sk->sk_filter); + filter = rcu_dereference_bh(sk->sk_filter); if (filter) { unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len); @@ -521,7 +521,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) } rcu_read_lock_bh(); - old_fp = rcu_dereference(sk->sk_filter); + old_fp = rcu_dereference_bh(sk->sk_filter); rcu_assign_pointer(sk->sk_filter, fp); rcu_read_unlock_bh(); @@ -536,7 +536,7 @@ int sk_detach_filter(struct sock *sk) struct sk_filter *filter; rcu_read_lock_bh(); - filter = rcu_dereference(sk->sk_filter); + filter = rcu_dereference_bh(sk->sk_filter); if (filter) { rcu_assign_pointer(sk->sk_filter, NULL); sk_filter_delayed_uncharge(sk, filter); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 794bcb897ff0..4c7d3f635ba7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -89,6 +89,14 @@ int rtnl_is_locked(void) } EXPORT_SYMBOL(rtnl_is_locked); +#ifdef CONFIG_PROVE_LOCKING +int lockdep_rtnl_is_held(void) +{ + return lockdep_is_held(&rtnl_mutex); +} +EXPORT_SYMBOL(lockdep_rtnl_is_held); +#endif /* #ifdef CONFIG_PROVE_LOCKING */ + static struct rtnl_link *rtnl_msg_handlers[NPROTO]; static inline int rtm_msgindex(int msgtype) diff --git a/net/core/sock.c b/net/core/sock.c index e1f6f225f012..305cba401ae6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1073,7 +1073,8 @@ static void __sk_free(struct sock *sk) if (sk->sk_destruct) sk->sk_destruct(sk); - filter = rcu_dereference(sk->sk_filter); + filter = rcu_dereference_check(sk->sk_filter, + atomic_read(&sk->sk_wmem_alloc) == 0); if (filter) { sk_filter_uncharge(sk, filter); rcu_assign_pointer(sk->sk_filter, NULL); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index a03284061a31..a7bf03ca0a36 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1155,8 +1155,8 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl if (!(flags & MSG_TRYHARD)) { rcu_read_lock_bh(); - for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt; - rt = rcu_dereference(rt->u.dst.dn_next)) { + for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt; + rt = rcu_dereference_bh(rt->u.dst.dn_next)) { if ((flp->fld_dst == rt->fl.fld_dst) && (flp->fld_src == rt->fl.fld_src) && (flp->mark == rt->fl.mark) && @@ -1618,9 +1618,9 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) if (h > s_h) s_idx = 0; rcu_read_lock_bh(); - for(rt = rcu_dereference(dn_rt_hash_table[h].chain), idx = 0; + for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0; rt; - rt = rcu_dereference(rt->u.dst.dn_next), idx++) { + rt = rcu_dereference_bh(rt->u.dst.dn_next), idx++) { if (idx < s_idx) continue; skb_dst_set(skb, dst_clone(&rt->u.dst)); @@ -1654,12 +1654,12 @@ static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq) for(s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) { rcu_read_lock_bh(); - rt = dn_rt_hash_table[s->bucket].chain; + rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain); if (rt) break; rcu_read_unlock_bh(); } - return rcu_dereference(rt); + return rt; } static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt) @@ -1674,7 +1674,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou rcu_read_lock_bh(); rt = dn_rt_hash_table[s->bucket].chain; } - return rcu_dereference(rt); + return rcu_dereference_bh(rt); } static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d62b05d33384..4f11faa5c824 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -287,12 +287,12 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) if (!rt_hash_table[st->bucket].chain) continue; rcu_read_lock_bh(); - r = rcu_dereference(rt_hash_table[st->bucket].chain); + r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); while (r) { if (dev_net(r->u.dst.dev) == seq_file_net(seq) && r->rt_genid == st->genid) return r; - r = rcu_dereference(r->u.dst.rt_next); + r = rcu_dereference_bh(r->u.dst.rt_next); } rcu_read_unlock_bh(); } @@ -314,7 +314,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, rcu_read_lock_bh(); r = rt_hash_table[st->bucket].chain; } - return rcu_dereference(r); + return rcu_dereference_bh(r); } static struct rtable *rt_cache_get_next(struct seq_file *seq, @@ -2689,8 +2689,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); rcu_read_lock_bh(); - for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->u.dst.rt_next)) { + for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; + rth = rcu_dereference_bh(rth->u.dst.rt_next)) { if (rth->fl.fl4_dst == flp->fl4_dst && rth->fl.fl4_src == flp->fl4_src && rth->fl.iif == 0 && @@ -3008,8 +3008,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!rt_hash_table[h].chain) continue; rcu_read_lock_bh(); - for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; - rt = rcu_dereference(rt->u.dst.rt_next), idx++) { + for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; + rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) { if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) continue; if (rt_is_expired(rt)) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f126d18dbdc4..939471ef8d50 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -508,7 +508,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, struct sk_filter *filter; rcu_read_lock_bh(); - filter = rcu_dereference(sk->sk_filter); + filter = rcu_dereference_bh(sk->sk_filter); if (filter != NULL) res = sk_run_filter(skb, filter->insns, filter->len); rcu_read_unlock_bh(); -- cgit v1.2.3 From 45bb00609022ecf1d97e083666c68c74d237b799 Mon Sep 17 00:00:00 2001 From: Ulrich Weber Date: Thu, 25 Feb 2010 23:28:58 +0000 Subject: ipv6: Remove IPV6_ADDR_RESERVED RFC 4291 section 2.4 states that all uncategorized addresses should be considered as Global Unicast. This will remove IPV6_ADDR_RESERVED completely and return IPV6_ADDR_UNICAST in ipv6_addr_type() instead. Signed-off-by: Ulrich Weber Signed-off-by: David S. Miller --- include/net/ipv6.h | 1 - net/ipv6/addrconf.c | 3 +-- net/ipv6/addrconf_core.c | 2 +- net/ipv6/route.c | 2 +- 4 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d067db1f88c7..e72fb10ce573 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -73,7 +73,6 @@ #define IPV6_ADDR_SCOPE_MASK 0x00f0U #define IPV6_ADDR_MAPPED 0x1000U -#define IPV6_ADDR_RESERVED 0x2000U /* reserved address space */ /* * Addr scopes diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1b327f15e7e7..88fd8c5877ee 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -992,8 +992,7 @@ struct ipv6_saddr_dst { static inline int ipv6_saddr_preferred(int type) { - if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4| - IPV6_ADDR_LOOPBACK|IPV6_ADDR_RESERVED)) + if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4|IPV6_ADDR_LOOPBACK)) return 1; return 0; } diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 3f82e9542eda..6b03826552e1 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -72,7 +72,7 @@ int __ipv6_addr_type(const struct in6_addr *addr) IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.3 */ } - return (IPV6_ADDR_RESERVED | + return (IPV6_ADDR_UNICAST | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.4 */ } EXPORT_SYMBOL(__ipv6_addr_type); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 88c0a5c49ae8..b08879e97f22 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1873,7 +1873,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) switch (ipstats_mib_noroutes) { case IPSTATS_MIB_INNOROUTES: type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); - if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) { + if (type == IPV6_ADDR_ANY) { IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); break; -- cgit v1.2.3 From 3729d5021257b283f7fce33d957893162ccb2c9d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 26 Feb 2010 06:34:54 +0000 Subject: rtnetlink: support specifying device flags on device creation commit e8469ed959c373c2ff9e6f488aa5a14971aebe1f Author: Patrick McHardy Date: Tue Feb 23 20:41:30 2010 +0100 Support specifying the initial device flags when creating a device though rtnl_link. Devices allocated by rtnl_create_link() are marked as INITIALIZING in order to surpress netlink registration notifications. To complete setup, rtnl_configure_link() must be called, which performs the device flag changes and invokes the deferred notifiers if everything went well. Two examples: # add macvlan to eth0 # $ ip link add link eth0 up allmulticast on type macvlan [LINK]11: macvlan0@eth0: mtu 1500 qdisc noqueue state UNKNOWN link/ether 26:f8:84:02:f9:2a brd ff:ff:ff:ff:ff:ff [ROUTE]ff00::/8 dev macvlan0 table local metric 256 mtu 1500 advmss 1440 hoplimit 0 [ROUTE]fe80::/64 dev macvlan0 proto kernel metric 256 mtu 1500 advmss 1440 hoplimit 0 [LINK]11: macvlan0@eth0: mtu 1500 link/ether 26:f8:84:02:f9:2a [ADDR]11: macvlan0 inet6 fe80::24f8:84ff:fe02:f92a/64 scope link valid_lft forever preferred_lft forever [ROUTE]local fe80::24f8:84ff:fe02:f92a via :: dev lo table local proto none metric 0 mtu 16436 advmss 16376 hoplimit 0 [ROUTE]default via fe80::215:e9ff:fef0:10f8 dev macvlan0 proto kernel metric 1024 mtu 1500 advmss 1440 hoplimit 0 [NEIGH]fe80::215:e9ff:fef0:10f8 dev macvlan0 lladdr 00:15:e9:f0:10:f8 router STALE [ROUTE]2001:6f8:974::/64 dev macvlan0 proto kernel metric 256 expires 0sec mtu 1500 advmss 1440 hoplimit 0 [PREFIX]prefix 2001:6f8:974::/64 dev macvlan0 onlink autoconf valid 14400 preferred 131084 [ADDR]11: macvlan0 inet6 2001:6f8:974:0:24f8:84ff:fe02:f92a/64 scope global dynamic valid_lft 86399sec preferred_lft 14399sec # add VLAN to eth1, eth1 is down # $ ip link add link eth1 up type vlan id 1000 RTNETLINK answers: Network is down Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/net/veth.c | 15 +++++++++----- include/net/rtnetlink.h | 2 ++ net/core/rtnetlink.c | 52 ++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 55 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 35609e64f6fd..b583d4968add 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -333,19 +333,17 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, struct veth_priv *priv; char ifname[IFNAMSIZ]; struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; + struct ifinfomsg *ifmp; struct net *net; /* * create and register peer first - * - * struct ifinfomsg is at the head of VETH_INFO_PEER, but we - * skip it since no info from it is useful yet */ - if (data != NULL && data[VETH_INFO_PEER] != NULL) { struct nlattr *nla_peer; nla_peer = data[VETH_INFO_PEER]; + ifmp = nla_data(nla_peer); err = nla_parse(peer_tb, IFLA_MAX, nla_data(nla_peer) + sizeof(struct ifinfomsg), nla_len(nla_peer) - sizeof(struct ifinfomsg), @@ -358,8 +356,10 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, return err; tbp = peer_tb; - } else + } else { + ifmp = NULL; tbp = tb; + } if (tbp[IFLA_IFNAME]) nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); @@ -387,6 +387,10 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, netif_carrier_off(peer); + err = rtnl_configure_link(peer, ifmp); + if (err < 0) + goto err_configure_peer; + /* * register dev last * @@ -428,6 +432,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, err_register_dev: /* nothing to do */ err_alloc_name: +err_configure_peer: unregister_netdevice(peer); return err; diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 48d3efcb0880..af60fd050844 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -87,6 +87,8 @@ extern void rtnl_link_unregister(struct rtnl_link_ops *ops); extern struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]); extern struct net_device *rtnl_create_link(struct net *src_net, struct net *net, char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]); +extern int rtnl_configure_link(struct net_device *dev, + const struct ifinfomsg *ifm); extern const struct nla_policy ifla_policy[IFLA_MAX+1]; #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c21ec4236dd0..d1472a423323 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -549,6 +549,19 @@ static void set_operstate(struct net_device *dev, unsigned char transition) } } +static unsigned int rtnl_dev_combine_flags(const struct net_device *dev, + const struct ifinfomsg *ifm) +{ + unsigned int flags = ifm->ifi_flags; + + /* bugwards compatibility: ifi_change == 0 is treated as ~0 */ + if (ifm->ifi_change) + flags = (flags & ifm->ifi_change) | + (dev->flags & ~ifm->ifi_change); + + return flags; +} + static void copy_rtnl_link_stats(struct rtnl_link_stats *a, const struct net_device_stats *b) { @@ -904,13 +917,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, } if (ifm->ifi_flags || ifm->ifi_change) { - unsigned int flags = ifm->ifi_flags; - - /* bugwards compatibility: ifi_change == 0 is treated as ~0 */ - if (ifm->ifi_change) - flags = (flags & ifm->ifi_change) | - (dev->flags & ~ifm->ifi_change); - err = dev_change_flags(dev, flags); + err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm)); if (err < 0) goto errout; } @@ -1053,6 +1060,26 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return 0; } +int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) +{ + unsigned int old_flags; + int err; + + old_flags = dev->flags; + if (ifm && (ifm->ifi_flags || ifm->ifi_change)) { + err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm)); + if (err < 0) + return err; + } + + dev->rtnl_link_state = RTNL_LINK_INITIALIZED; + rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); + + __dev_notify_flags(dev, old_flags); + return 0; +} +EXPORT_SYMBOL(rtnl_configure_link); + struct net_device *rtnl_create_link(struct net *src_net, struct net *net, char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]) { @@ -1074,6 +1101,7 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, dev_net_set(dev, net); dev->rtnl_link_ops = ops; + dev->rtnl_link_state = RTNL_LINK_INITIALIZING; dev->real_num_tx_queues = real_num_queues; if (strchr(dev->name, '%')) { @@ -1203,7 +1231,7 @@ replay: if (!(nlh->nlmsg_flags & NLM_F_CREATE)) return -ENODEV; - if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change) + if (ifm->ifi_index) return -EOPNOTSUPP; if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) return -EOPNOTSUPP; @@ -1234,9 +1262,15 @@ replay: err = ops->newlink(net, dev, tb, data); else err = register_netdevice(dev); - if (err < 0 && !IS_ERR(dev)) + if (err < 0 && !IS_ERR(dev)) { free_netdev(dev); + goto out; + } + err = rtnl_configure_link(dev, ifm); + if (err < 0) + unregister_netdevice(dev); +out: put_net(dest_net); return err; } -- cgit v1.2.3 From c13854cef4751000b968d4e8ac95796562d5b96f Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 8 Feb 2010 15:27:07 +0100 Subject: Bluetooth: Convert controller hdev->type to hdev->bus The hdev->type is misnamed and should be actually hdev->bus instead. So convert it now. Signed-off-by: Marcel Holtmann --- drivers/bluetooth/bfusb.c | 2 +- drivers/bluetooth/bluecard_cs.c | 2 +- drivers/bluetooth/bpa10x.c | 2 +- drivers/bluetooth/bt3c_cs.c | 2 +- drivers/bluetooth/btmrvl_main.c | 2 +- drivers/bluetooth/btsdio.c | 2 +- drivers/bluetooth/btuart_cs.c | 2 +- drivers/bluetooth/btusb.c | 2 +- drivers/bluetooth/dtl1_cs.c | 2 +- drivers/bluetooth/hci_ldisc.c | 2 +- drivers/bluetooth/hci_vhci.c | 2 +- include/net/bluetooth/hci.h | 2 +- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_core.c | 8 ++++---- net/bluetooth/hci_sysfs.c | 16 ++++++++-------- 15 files changed, 25 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c index 2a00707aba3b..005919ab043c 100644 --- a/drivers/bluetooth/bfusb.c +++ b/drivers/bluetooth/bfusb.c @@ -703,7 +703,7 @@ static int bfusb_probe(struct usb_interface *intf, const struct usb_device_id *i data->hdev = hdev; - hdev->type = HCI_USB; + hdev->bus = HCI_USB; hdev->driver_data = data; SET_HCIDEV_DEV(hdev, &intf->dev); diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c index c2cf81144715..d9bf87ca9e83 100644 --- a/drivers/bluetooth/bluecard_cs.c +++ b/drivers/bluetooth/bluecard_cs.c @@ -736,7 +736,7 @@ static int bluecard_open(bluecard_info_t *info) info->hdev = hdev; - hdev->type = HCI_PCCARD; + hdev->bus = HCI_PCCARD; hdev->driver_data = info; SET_HCIDEV_DEV(hdev, &info->p_dev->dev); diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index c115285867c3..d945cd12433a 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -469,7 +469,7 @@ static int bpa10x_probe(struct usb_interface *intf, const struct usb_device_id * return -ENOMEM; } - hdev->type = HCI_USB; + hdev->bus = HCI_USB; hdev->driver_data = data; data->hdev = hdev; diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c index 9f5926aaf57f..027cb8bf650f 100644 --- a/drivers/bluetooth/bt3c_cs.c +++ b/drivers/bluetooth/bt3c_cs.c @@ -582,7 +582,7 @@ static int bt3c_open(bt3c_info_t *info) info->hdev = hdev; - hdev->type = HCI_PCCARD; + hdev->bus = HCI_PCCARD; hdev->driver_data = info; SET_HCIDEV_DEV(hdev, &info->p_dev->dev); diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c index f97771ce432c..53a43adf2e21 100644 --- a/drivers/bluetooth/btmrvl_main.c +++ b/drivers/bluetooth/btmrvl_main.c @@ -563,7 +563,7 @@ struct btmrvl_private *btmrvl_add_card(void *card) priv->btmrvl_dev.tx_dnld_rdy = true; - hdev->type = HCI_SDIO; + hdev->bus = HCI_SDIO; hdev->open = btmrvl_open; hdev->close = btmrvl_close; hdev->flush = btmrvl_flush; diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c index 7e298275c8f6..76e5127884f0 100644 --- a/drivers/bluetooth/btsdio.c +++ b/drivers/bluetooth/btsdio.c @@ -326,7 +326,7 @@ static int btsdio_probe(struct sdio_func *func, return -ENOMEM; } - hdev->type = HCI_SDIO; + hdev->bus = HCI_SDIO; hdev->driver_data = data; data->hdev = hdev; diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c index 91c523099804..60c0953d7d00 100644 --- a/drivers/bluetooth/btuart_cs.c +++ b/drivers/bluetooth/btuart_cs.c @@ -500,7 +500,7 @@ static int btuart_open(btuart_info_t *info) info->hdev = hdev; - hdev->type = HCI_PCCARD; + hdev->bus = HCI_PCCARD; hdev->driver_data = info; SET_HCIDEV_DEV(hdev, &info->p_dev->dev); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index a699f09ddf7c..5d9cc53bd643 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -939,7 +939,7 @@ static int btusb_probe(struct usb_interface *intf, return -ENOMEM; } - hdev->type = HCI_USB; + hdev->bus = HCI_USB; hdev->driver_data = data; data->hdev = hdev; diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c index 697591941e17..17788317c51a 100644 --- a/drivers/bluetooth/dtl1_cs.c +++ b/drivers/bluetooth/dtl1_cs.c @@ -485,7 +485,7 @@ static int dtl1_open(dtl1_info_t *info) info->hdev = hdev; - hdev->type = HCI_PCCARD; + hdev->bus = HCI_PCCARD; hdev->driver_data = info; SET_HCIDEV_DEV(hdev, &info->p_dev->dev); diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index aa0919386b8c..76a1abb8f214 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -383,7 +383,7 @@ static int hci_uart_register_dev(struct hci_uart *hu) hu->hdev = hdev; - hdev->type = HCI_UART; + hdev->bus = HCI_UART; hdev->driver_data = hu; hdev->open = hci_uart_open; diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 7595274103fd..bb0aefdb4267 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -236,7 +236,7 @@ static int vhci_open(struct inode *inode, struct file *file) data->hdev = hdev; - hdev->type = HCI_VIRTUAL; + hdev->bus = HCI_VIRTUAL; hdev->driver_data = data; hdev->open = vhci_open_dev; diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index ed3aea1605e8..3350a665180f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -43,7 +43,7 @@ #define HCI_NOTIFY_CONN_DEL 2 #define HCI_NOTIFY_VOICE_SETTING 3 -/* HCI device types */ +/* HCI bus types */ #define HCI_VIRTUAL 0 #define HCI_USB 1 #define HCI_PCCARD 2 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 7b86094a894b..7e65885290d5 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -70,7 +70,7 @@ struct hci_dev { char name[8]; unsigned long flags; __u16 id; - __u8 type; + __u8 bus; bdaddr_t bdaddr; __u8 dev_name[248]; __u8 dev_class[3]; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 94ba34982021..4b62ed01ddc6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -797,7 +797,7 @@ int hci_get_dev_info(void __user *arg) strcpy(di.name, hdev->name); di.bdaddr = hdev->bdaddr; - di.type = hdev->type; + di.type = hdev->bus; di.flags = hdev->flags; di.pkt_type = hdev->pkt_type; di.acl_mtu = hdev->acl_mtu; @@ -869,8 +869,8 @@ int hci_register_dev(struct hci_dev *hdev) struct list_head *head = &hci_dev_list, *p; int i, id = 0; - BT_DBG("%p name %s type %d owner %p", hdev, hdev->name, - hdev->type, hdev->owner); + BT_DBG("%p name %s bus %d owner %p", hdev, hdev->name, + hdev->bus, hdev->owner); if (!hdev->open || !hdev->close || !hdev->destruct) return -EINVAL; @@ -946,7 +946,7 @@ int hci_unregister_dev(struct hci_dev *hdev) { int i; - BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type); + BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); write_lock_bh(&hci_dev_list_lock); list_del(&hdev->list); diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 2bc6f6a8de68..9b5f376813b7 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -166,9 +166,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn) queue_work(bt_workq, &conn->work_del); } -static inline char *host_typetostr(int type) +static inline char *host_bustostr(int bus) { - switch (type) { + switch (bus) { case HCI_VIRTUAL: return "VIRTUAL"; case HCI_USB: @@ -188,10 +188,10 @@ static inline char *host_typetostr(int type) } } -static ssize_t show_type(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t show_bus(struct device *dev, struct device_attribute *attr, char *buf) { struct hci_dev *hdev = dev_get_drvdata(dev); - return sprintf(buf, "%s\n", host_typetostr(hdev->type)); + return sprintf(buf, "%s\n", host_bustostr(hdev->bus)); } static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf) @@ -355,7 +355,7 @@ static ssize_t store_sniff_min_interval(struct device *dev, struct device_attrib return count; } -static DEVICE_ATTR(type, S_IRUGO, show_type, NULL); +static DEVICE_ATTR(bus, S_IRUGO, show_bus, NULL); static DEVICE_ATTR(name, S_IRUGO, show_name, NULL); static DEVICE_ATTR(class, S_IRUGO, show_class, NULL); static DEVICE_ATTR(address, S_IRUGO, show_address, NULL); @@ -373,7 +373,7 @@ static DEVICE_ATTR(sniff_min_interval, S_IRUGO | S_IWUSR, show_sniff_min_interval, store_sniff_min_interval); static struct attribute *bt_host_attrs[] = { - &dev_attr_type.attr, + &dev_attr_bus.attr, &dev_attr_name.attr, &dev_attr_class.attr, &dev_attr_address.attr, @@ -414,7 +414,7 @@ int hci_register_sysfs(struct hci_dev *hdev) struct device *dev = &hdev->dev; int err; - BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type); + BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); dev->type = &bt_host; dev->class = bt_class; @@ -433,7 +433,7 @@ int hci_register_sysfs(struct hci_dev *hdev) void hci_unregister_sysfs(struct hci_dev *hdev) { - BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type); + BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); device_del(&hdev->dev); } -- cgit v1.2.3 From ca325f698996c1a0770a67f41e7dc97a007d8bc2 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 8 Feb 2010 16:22:31 +0100 Subject: Bluetooth: Convert inquiry cache to use debugfs instead of sysfs The output of the inquiry cache is only useful for debugging purposes and so move it into debugfs. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 + net/bluetooth/hci_sysfs.c | 92 +++++++++++++++++++++++++++------------- 2 files changed, 64 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 7e65885290d5..4c94c1e233a2 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -134,6 +134,8 @@ struct hci_dev { atomic_t promisc; + struct dentry *debugfs; + struct device *parent; struct device dev; diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 9b5f376813b7..f9d93f9cbcd2 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -9,6 +10,9 @@ struct class *bt_class = NULL; EXPORT_SYMBOL_GPL(bt_class); +struct dentry *bt_debugfs = NULL; +EXPORT_SYMBOL_GPL(bt_debugfs); + static struct workqueue_struct *bt_workq; static inline char *link_typetostr(int type) @@ -251,32 +255,6 @@ static ssize_t show_hci_revision(struct device *dev, struct device_attribute *at return sprintf(buf, "%d\n", hdev->hci_rev); } -static ssize_t show_inquiry_cache(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct hci_dev *hdev = dev_get_drvdata(dev); - struct inquiry_cache *cache = &hdev->inq_cache; - struct inquiry_entry *e; - int n = 0; - - hci_dev_lock_bh(hdev); - - for (e = cache->list; e; e = e->next) { - struct inquiry_data *data = &e->data; - bdaddr_t bdaddr; - baswap(&bdaddr, &data->bdaddr); - n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", - batostr(&bdaddr), - data->pscan_rep_mode, data->pscan_period_mode, - data->pscan_mode, data->dev_class[2], - data->dev_class[1], data->dev_class[0], - __le16_to_cpu(data->clock_offset), - data->rssi, data->ssp_mode, e->timestamp); - } - - hci_dev_unlock_bh(hdev); - return n; -} - static ssize_t show_idle_timeout(struct device *dev, struct device_attribute *attr, char *buf) { struct hci_dev *hdev = dev_get_drvdata(dev); @@ -363,7 +341,6 @@ static DEVICE_ATTR(features, S_IRUGO, show_features, NULL); static DEVICE_ATTR(manufacturer, S_IRUGO, show_manufacturer, NULL); static DEVICE_ATTR(hci_version, S_IRUGO, show_hci_version, NULL); static DEVICE_ATTR(hci_revision, S_IRUGO, show_hci_revision, NULL); -static DEVICE_ATTR(inquiry_cache, S_IRUGO, show_inquiry_cache, NULL); static DEVICE_ATTR(idle_timeout, S_IRUGO | S_IWUSR, show_idle_timeout, store_idle_timeout); @@ -381,7 +358,6 @@ static struct attribute *bt_host_attrs[] = { &dev_attr_manufacturer.attr, &dev_attr_hci_version.attr, &dev_attr_hci_revision.attr, - &dev_attr_inquiry_cache.attr, &dev_attr_idle_timeout.attr, &dev_attr_sniff_max_interval.attr, &dev_attr_sniff_min_interval.attr, @@ -409,6 +385,46 @@ static struct device_type bt_host = { .release = bt_host_release, }; +static int inquiry_cache_open(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return 0; +} + +static ssize_t inquiry_cache_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + struct inquiry_cache *cache = &hdev->inq_cache; + struct inquiry_entry *e; + char buf[4096]; + int n = 0; + + hci_dev_lock_bh(hdev); + + for (e = cache->list; e; e = e->next) { + struct inquiry_data *data = &e->data; + bdaddr_t bdaddr; + baswap(&bdaddr, &data->bdaddr); + n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", + batostr(&bdaddr), + data->pscan_rep_mode, data->pscan_period_mode, + data->pscan_mode, data->dev_class[2], + data->dev_class[1], data->dev_class[0], + __le16_to_cpu(data->clock_offset), + data->rssi, data->ssp_mode, e->timestamp); + } + + hci_dev_unlock_bh(hdev); + + return simple_read_from_buffer(userbuf, count, ppos, buf, n); +} + +static const struct file_operations inquiry_cache_fops = { + .open = inquiry_cache_open, + .read = inquiry_cache_read, +}; + int hci_register_sysfs(struct hci_dev *hdev) { struct device *dev = &hdev->dev; @@ -428,6 +444,16 @@ int hci_register_sysfs(struct hci_dev *hdev) if (err < 0) return err; + if (!bt_debugfs) + return 0; + + hdev->debugfs = debugfs_create_dir(hdev->name, bt_debugfs); + if (!hdev->debugfs) + return 0; + + debugfs_create_file("inquiry_cache", 0444, hdev->debugfs, + hdev, &inquiry_cache_fops); + return 0; } @@ -435,6 +461,8 @@ void hci_unregister_sysfs(struct hci_dev *hdev) { BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); + debugfs_remove_recursive(hdev->debugfs); + device_del(&hdev->dev); } @@ -444,6 +472,8 @@ int __init bt_sysfs_init(void) if (!bt_workq) return -ENOMEM; + bt_debugfs = debugfs_create_dir("bluetooth", NULL); + bt_class = class_create(THIS_MODULE, "bluetooth"); if (IS_ERR(bt_class)) { destroy_workqueue(bt_workq); @@ -455,7 +485,9 @@ int __init bt_sysfs_init(void) void bt_sysfs_cleanup(void) { - destroy_workqueue(bt_workq); - class_destroy(bt_class); + + debugfs_remove_recursive(bt_debugfs); + + destroy_workqueue(bt_workq); } -- cgit v1.2.3 From 943da25d95c7e8fd8c39dbf09e030f5da46f5d85 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 13 Feb 2010 02:28:41 +0100 Subject: Bluetooth: Add controller types for BR/EDR and 802.11 AMP With the Bluetooth 3.0 specification and the introduction of alternate MAC/PHY (AMP) support, it is required to differentiate between primary BR/EDR controllers and 802.11 AMP controllers. So introduce a special type inside HCI device for differentiation. For now all AMP controllers will be treated as raw devices until an AMP manager has been implemented. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 4 ++++ include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 6 +++++- net/bluetooth/hci_sysfs.c | 20 ++++++++++++++++++++ 4 files changed, 30 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 3350a665180f..fc0c502d9fd1 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -52,6 +52,10 @@ #define HCI_PCI 5 #define HCI_SDIO 6 +/* HCI controller types */ +#define HCI_BREDR 0x00 +#define HCI_80211 0x01 + /* HCI device quirks */ enum { HCI_QUIRK_NO_RESET, diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4c94c1e233a2..ce3c99e5fa25 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -71,6 +71,7 @@ struct hci_dev { unsigned long flags; __u16 id; __u8 bus; + __u8 dev_type; bdaddr_t bdaddr; __u8 dev_name[248]; __u8 dev_class[3]; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 4b62ed01ddc6..4ad23192c7a5 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -491,6 +491,10 @@ int hci_dev_open(__u16 dev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) set_bit(HCI_RAW, &hdev->flags); + /* Treat all non BR/EDR controllers as raw devices for now */ + if (hdev->dev_type != HCI_BREDR) + set_bit(HCI_RAW, &hdev->flags); + if (hdev->open(hdev)) { ret = -EIO; goto done; @@ -797,7 +801,7 @@ int hci_get_dev_info(void __user *arg) strcpy(di.name, hdev->name); di.bdaddr = hdev->bdaddr; - di.type = hdev->bus; + di.type = (hdev->bus & 0x0f) | (hdev->dev_type << 4); di.flags = hdev->flags; di.pkt_type = hdev->pkt_type; di.acl_mtu = hdev->acl_mtu; diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index f9d93f9cbcd2..1a79a6c7e30e 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -192,12 +192,30 @@ static inline char *host_bustostr(int bus) } } +static inline char *host_typetostr(int type) +{ + switch (type) { + case HCI_BREDR: + return "BR/EDR"; + case HCI_80211: + return "802.11"; + default: + return "UNKNOWN"; + } +} + static ssize_t show_bus(struct device *dev, struct device_attribute *attr, char *buf) { struct hci_dev *hdev = dev_get_drvdata(dev); return sprintf(buf, "%s\n", host_bustostr(hdev->bus)); } +static ssize_t show_type(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "%s\n", host_typetostr(hdev->dev_type)); +} + static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf) { struct hci_dev *hdev = dev_get_drvdata(dev); @@ -334,6 +352,7 @@ static ssize_t store_sniff_min_interval(struct device *dev, struct device_attrib } static DEVICE_ATTR(bus, S_IRUGO, show_bus, NULL); +static DEVICE_ATTR(type, S_IRUGO, show_type, NULL); static DEVICE_ATTR(name, S_IRUGO, show_name, NULL); static DEVICE_ATTR(class, S_IRUGO, show_class, NULL); static DEVICE_ATTR(address, S_IRUGO, show_address, NULL); @@ -351,6 +370,7 @@ static DEVICE_ATTR(sniff_min_interval, S_IRUGO | S_IWUSR, static struct attribute *bt_host_attrs[] = { &dev_attr_bus.attr, + &dev_attr_type.attr, &dev_attr_name.attr, &dev_attr_class.attr, &dev_attr_address.attr, -- cgit v1.2.3 From 87c1e12b5eeb7b30b4b41291bef8e0b41fc3dde9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 2 Mar 2010 02:51:56 +0000 Subject: ipsec: Fix bogus bundle flowi When I merged the bundle creation code, I introduced a bogus flowi value in the bundle. Instead of getting from the caller, it was instead set to the flow in the route object, which is totally different. The end result is that the bundles we created never match, and we instead end up with an ever growing bundle list. Thanks to Jamal for find this problem. Reported-by: Jamal Hadi Salim Signed-off-by: Herbert Xu Acked-by: Steffen Klassert Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 ++- net/ipv4/xfrm4_policy.c | 5 +++-- net/ipv6/xfrm6_policy.c | 3 ++- net/xfrm/xfrm_policy.c | 7 ++++--- 4 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a7df3275b860..d74e080ba6c9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -275,7 +275,8 @@ struct xfrm_policy_afinfo { struct dst_entry *dst, int nfheader_len); int (*fill_dst)(struct xfrm_dst *xdst, - struct net_device *dev); + struct net_device *dev, + struct flowi *fl); }; extern int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 67107d63c1cd..e4a1483fba77 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -91,11 +91,12 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, return 0; } -static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct rtable *rt = (struct rtable *)xdst->route; - xdst->u.rt.fl = rt->fl; + xdst->u.rt.fl = *fl; xdst->u.dst.dev = dev; dev_hold(dev); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index dbdc696f5fc5..ae181651c75a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -116,7 +116,8 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, return 0; } -static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct rt6_info *rt = (struct rt6_info*)xdst->route; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 34a5ef8316e7..843e066649cb 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1372,7 +1372,8 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, return err; } -static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(xdst->u.dst.ops->family); @@ -1381,7 +1382,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) if (!afinfo) return -EINVAL; - err = afinfo->fill_dst(xdst, dev); + err = afinfo->fill_dst(xdst, dev, fl); xfrm_policy_put_afinfo(afinfo); @@ -1486,7 +1487,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; - err = xfrm_fill_dst(xdst, dev); + err = xfrm_fill_dst(xdst, dev, fl); if (err) goto free_dst; -- cgit v1.2.3 From 4fa004373133ece3d9b1c0a7e243b0e53760b165 Mon Sep 17 00:00:00 2001 From: Sujith Date: Mon, 1 Mar 2010 14:42:57 +0530 Subject: mac80211: Fix HT rate control configuration Handling HT configuration changes involved setting the channel with the new HT parameters and then issuing a rate_update() notification to the driver. This behavior changed after the off-channel changes. Now, the channel is not updated with the new HT params in enable_ht() - instead, it is now done when the scan work terminates. This results in the driver depending on stale information, defaulting to non-HT mode always. Fix this by passing the new channel type to the driver. Cc: stable@kernel.org Signed-off-by: Sujith Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/rc.c | 6 +++--- include/net/mac80211.h | 3 ++- net/mac80211/mlme.c | 3 ++- net/mac80211/rate.h | 5 +++-- 4 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c index ac34a055c713..0e79e58cf4c9 100644 --- a/drivers/net/wireless/ath/ath9k/rc.c +++ b/drivers/net/wireless/ath/ath9k/rc.c @@ -1323,7 +1323,7 @@ static void ath_rate_init(void *priv, struct ieee80211_supported_band *sband, static void ath_rate_update(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, void *priv_sta, - u32 changed) + u32 changed, enum nl80211_channel_type oper_chan_type) { struct ath_softc *sc = priv; struct ath_rate_priv *ath_rc_priv = priv_sta; @@ -1340,8 +1340,8 @@ static void ath_rate_update(void *priv, struct ieee80211_supported_band *sband, if (sc->sc_ah->opmode != NL80211_IFTYPE_STATION) return; - if (sc->hw->conf.channel_type == NL80211_CHAN_HT40MINUS || - sc->hw->conf.channel_type == NL80211_CHAN_HT40PLUS) + if (oper_chan_type == NL80211_CHAN_HT40MINUS || + oper_chan_type == NL80211_CHAN_HT40PLUS) oper_cw40 = true; oper_sgi40 = (sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_40) ? diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 80eb7cc42ce9..45d7d44d7cbe 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2426,7 +2426,8 @@ struct rate_control_ops { struct ieee80211_sta *sta, void *priv_sta); void (*rate_update)(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, - void *priv_sta, u32 changed); + void *priv_sta, u32 changed, + enum nl80211_channel_type oper_chan_type); void (*free_sta)(void *priv, struct ieee80211_sta *sta, void *priv_sta); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5a268761e4c5..0ab284c32135 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -177,7 +177,8 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, sta = sta_info_get(sdata, bssid); if (sta) rate_control_rate_update(local, sband, sta, - IEEE80211_RC_HT_CHANGED); + IEEE80211_RC_HT_CHANGED, + local->oper_channel_type); rcu_read_unlock(); } diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index b6108bca96d4..065a96190e32 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -66,7 +66,8 @@ static inline void rate_control_rate_init(struct sta_info *sta) static inline void rate_control_rate_update(struct ieee80211_local *local, struct ieee80211_supported_band *sband, - struct sta_info *sta, u32 changed) + struct sta_info *sta, u32 changed, + enum nl80211_channel_type oper_chan_type) { struct rate_control_ref *ref = local->rate_ctrl; struct ieee80211_sta *ista = &sta->sta; @@ -74,7 +75,7 @@ static inline void rate_control_rate_update(struct ieee80211_local *local, if (ref && ref->ops->rate_update) ref->ops->rate_update(ref->priv, sband, ista, - priv_sta, changed); + priv_sta, changed, oper_chan_type); } static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, -- cgit v1.2.3 From c839d30a41dd92eb32d7fcfa2b4e99042fc64bf2 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 3 Mar 2010 04:46:50 +0000 Subject: net: add scheduler sync hint to tcp_prequeue(). Decreases the odds wakee will suffer from frequent cache misses. Signed-off-by: Mike Galbraith Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 56f0aec40ed6..75be5a28815d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -939,7 +939,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { - wake_up_interruptible_poll(sk->sk_sleep, + wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, -- cgit v1.2.3 From 0fb80abd911a7cb1e6548b5279568dc1e8949702 Mon Sep 17 00:00:00 2001 From: Sripathi Kodi Date: Fri, 5 Mar 2010 18:49:11 +0000 Subject: 9P2010.L handshake: Add mount option Add new mount V9FS mount option to specify protocol version This patch adds a new mount option to specify protocol version. With this option it is possible to use "-o version=" switch to specify 9P protocol version to use. Valid options for version are: 9p2000 9p2000.u 9p2010.L Signed-off-by: Sripathi Kodi Signed-off-by: Eric Van Hensbergen --- include/net/9p/client.h | 15 +++++++++++++++ net/9p/client.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) (limited to 'include/net') diff --git a/include/net/9p/client.h b/include/net/9p/client.h index fb00b329f0d3..d40f8c55dfae 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -29,6 +29,19 @@ /* Number of requests per row */ #define P9_ROW_MAXTAG 255 +/** enum p9_proto_versions - 9P protocol versions + * @p9_proto_legacy: 9P Legacy mode, pre-9P2000.u + * @p9_proto_2000u: 9P2000.u extension + * @p9_proto_2010L: 9P2010.L extension + */ + +enum p9_proto_versions{ + p9_proto_legacy = 0, + p9_proto_2000u = 1, + p9_proto_2010L = 2, +}; + + /** * enum p9_trans_status - different states of underlying transports * @Connected: transport is connected and healthy @@ -111,6 +124,7 @@ struct p9_req_t { * @lock: protect @fidlist * @msize: maximum data size negotiated by protocol * @dotu: extension flags negotiated by protocol + * @proto_version: 9P protocol version to use * @trans_mod: module API instantiated with this client * @trans: tranport instance state and API * @conn: connection state information used by trans_fd @@ -138,6 +152,7 @@ struct p9_client { spinlock_t lock; /* protect client structure */ int msize; unsigned char dotu; + unsigned char proto_version; struct p9_trans_module *trans_mod; enum p9_trans_status status; void *trans; diff --git a/net/9p/client.c b/net/9p/client.c index 09d4f1e2e4a8..3b5f3c94a6eb 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -46,6 +46,7 @@ enum { Opt_msize, Opt_trans, Opt_legacy, + Opt_version, Opt_err, }; @@ -53,9 +54,30 @@ static const match_table_t tokens = { {Opt_msize, "msize=%u"}, {Opt_legacy, "noextend"}, {Opt_trans, "trans=%s"}, + {Opt_version, "version=%s"}, {Opt_err, NULL}, }; +/* Interpret mount option for protocol version */ +static unsigned char get_protocol_version(const substring_t *name) +{ + unsigned char version = -EINVAL; + if (!strncmp("9p2000", name->from, name->to-name->from)) { + version = p9_proto_legacy; + P9_DPRINTK(P9_DEBUG_9P, "Protocol version: Legacy\n"); + } else if (!strncmp("9p2000.u", name->from, name->to-name->from)) { + version = p9_proto_2000u; + P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); + } else if (!strncmp("9p2010.L", name->from, name->to-name->from)) { + version = p9_proto_2010L; + P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2010.L\n"); + } else { + P9_DPRINTK(P9_DEBUG_ERROR, "Unknown protocol version %s. ", + name->from); + } + return version; +} + static struct p9_req_t * p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); @@ -120,6 +142,12 @@ static int parse_opts(char *opts, struct p9_client *clnt) case Opt_legacy: clnt->dotu = 0; break; + case Opt_version: + ret = get_protocol_version(&args[0]); + if (ret == -EINVAL) + goto free_and_return; + clnt->proto_version = ret; + break; default: continue; } -- cgit v1.2.3 From 342fee1d5c7dfa05f4e14ec1e583df4553b09776 Mon Sep 17 00:00:00 2001 From: Sripathi Kodi Date: Fri, 5 Mar 2010 18:50:14 +0000 Subject: 9P2010.L handshake: Remove "dotu" variable Removes 'dotu' variable and make everything dependent on 'proto_version' field. Signed-off-by: Sripathi Kodi Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs.c | 2 +- fs/9p/vfs_dir.c | 2 +- include/net/9p/client.h | 3 +- net/9p/client.c | 65 ++++++++++++++++++++++++++----------------- net/9p/protocol.c | 74 +++++++++++++++++++++++++++---------------------- net/9p/protocol.h | 6 ++-- 6 files changed, 87 insertions(+), 65 deletions(-) (limited to 'include/net') diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 247f10a934ed..6c7f6a251115 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -262,7 +262,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, goto error; } - if (!v9ses->clnt->dotu) + if (!p9_is_proto_dotu(v9ses->clnt)) v9ses->flags &= ~V9FS_PROTO_2000U; v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 15cce53bf61e..6580aa449541 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -135,7 +135,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) while (rdir->head < rdir->tail) { err = p9stat_read(rdir->buf + rdir->head, buflen - rdir->head, &st, - fid->clnt->dotu); + fid->clnt->proto_version); if (err) { P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); err = -EIO; diff --git a/include/net/9p/client.h b/include/net/9p/client.h index d40f8c55dfae..52e1fff709e4 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -151,7 +151,6 @@ struct p9_req_t { struct p9_client { spinlock_t lock; /* protect client structure */ int msize; - unsigned char dotu; unsigned char proto_version; struct p9_trans_module *trans_mod; enum p9_trans_status status; @@ -224,5 +223,7 @@ int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int); int p9stat_read(char *, int, struct p9_wstat *, int); void p9stat_free(struct p9_wstat *); +int p9_is_proto_dotu(struct p9_client *clnt); +int p9_is_proto_dotl(struct p9_client *clnt); #endif /* NET_9P_CLIENT_H */ diff --git a/net/9p/client.c b/net/9p/client.c index 3b5f3c94a6eb..9994676e57da 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -58,6 +58,18 @@ static const match_table_t tokens = { {Opt_err, NULL}, }; +inline int p9_is_proto_dotl(struct p9_client *clnt) +{ + return (clnt->proto_version == p9_proto_2010L); +} +EXPORT_SYMBOL(p9_is_proto_dotl); + +inline int p9_is_proto_dotu(struct p9_client *clnt) +{ + return (clnt->proto_version == p9_proto_2000u); +} +EXPORT_SYMBOL(p9_is_proto_dotu); + /* Interpret mount option for protocol version */ static unsigned char get_protocol_version(const substring_t *name) { @@ -97,7 +109,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) int option; int ret = 0; - clnt->dotu = 1; + clnt->proto_version = p9_proto_2000u; clnt->msize = 8192; if (!opts) @@ -140,7 +152,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) } break; case Opt_legacy: - clnt->dotu = 0; + clnt->proto_version = p9_proto_legacy; break; case Opt_version: ret = get_protocol_version(&args[0]); @@ -438,14 +450,15 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) int ecode; char *ename; - err = p9pdu_readf(req->rc, c->dotu, "s?d", &ename, &ecode); + err = p9pdu_readf(req->rc, c->proto_version, "s?d", + &ename, &ecode); if (err) { P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); return err; } - if (c->dotu) + if (p9_is_proto_dotu(c)) err = -ecode; if (!err || !IS_ERR_VALUE(err)) @@ -543,7 +556,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) /* marshall the data */ p9pdu_prepare(req->tc, tag, type); va_start(ap, fmt); - err = p9pdu_vwritef(req->tc, c->dotu, fmt, ap); + err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); va_end(ap); p9pdu_finalize(req->tc); @@ -655,14 +668,14 @@ int p9_client_version(struct p9_client *c) char *version; int msize; - P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d extended %d\n", - c->msize, c->dotu); + P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n", + c->msize, c->proto_version); req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize, - c->dotu ? "9P2000.u" : "9P2000"); + p9_is_proto_dotu(c) ? "9P2000.u" : "9P2000"); if (IS_ERR(req)) return PTR_ERR(req); - err = p9pdu_readf(req->rc, c->dotu, "ds", &msize, &version); + err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version); if (err) { P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err); p9pdu_dump(1, req->rc); @@ -670,10 +683,10 @@ int p9_client_version(struct p9_client *c) } P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); - if (!memcmp(version, "9P2000.u", 8)) - c->dotu = 1; - else if (!memcmp(version, "9P2000", 6)) - c->dotu = 0; + if (!strncmp(version, "9P2000.u", 8)) + c->proto_version = p9_proto_2000u; + else if (!strncmp(version, "9P2000", 6)) + c->proto_version = p9_proto_legacy; else { err = -EREMOTEIO; goto error; @@ -728,8 +741,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) goto put_trans; } - P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n", - clnt, clnt->trans_mod, clnt->msize, clnt->dotu); + P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n", + clnt, clnt->trans_mod, clnt->msize, clnt->proto_version); err = clnt->trans_mod->create(clnt, dev_name, options); if (err) @@ -812,7 +825,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid); + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid); if (err) { p9pdu_dump(1, req->rc); p9_free_req(clnt, req); @@ -861,7 +874,7 @@ p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname) goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid); + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid); if (err) { p9pdu_dump(1, req->rc); p9_free_req(clnt, req); @@ -919,7 +932,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "R", &nwqids, &wqids); + err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids); if (err) { p9pdu_dump(1, req->rc); p9_free_req(clnt, req); @@ -980,7 +993,7 @@ int p9_client_open(struct p9_fid *fid, int mode) goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit); + err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { p9pdu_dump(1, req->rc); goto free_and_error; @@ -1025,7 +1038,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit); + err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { p9pdu_dump(1, req->rc); goto free_and_error; @@ -1126,7 +1139,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "D", &count, &dataptr); + err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { p9pdu_dump(1, req->rc); goto free_and_error; @@ -1187,7 +1200,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "d", &count); + err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count); if (err) { p9pdu_dump(1, req->rc); goto free_and_error; @@ -1227,7 +1240,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) goto error; } - err = p9pdu_readf(req->rc, clnt->dotu, "wS", &ignored, ret); + err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret); if (err) { p9pdu_dump(1, req->rc); p9_free_req(clnt, req); @@ -1254,7 +1267,7 @@ error: } EXPORT_SYMBOL(p9_client_stat); -static int p9_client_statsize(struct p9_wstat *wst, int optional) +static int p9_client_statsize(struct p9_wstat *wst, int proto_version) { int ret; @@ -1273,7 +1286,7 @@ static int p9_client_statsize(struct p9_wstat *wst, int optional) if (wst->muid) ret += strlen(wst->muid); - if (optional) { + if (proto_version == p9_proto_2000u) { ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */ if (wst->extension) ret += strlen(wst->extension); @@ -1290,7 +1303,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) err = 0; clnt = fid->clnt; - wst->size = p9_client_statsize(wst, clnt->dotu); + wst->size = p9_client_statsize(wst, clnt->proto_version); P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); P9_DPRINTK(P9_DEBUG_9P, " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" diff --git a/net/9p/protocol.c b/net/9p/protocol.c index fc70147c771e..94f5a8f65e9c 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -52,7 +52,7 @@ #endif static int -p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...); +p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); #ifdef CONFIG_NET_9P_DEBUG void @@ -144,7 +144,8 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) */ static int -p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) +p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, + va_list ap) { const char *ptr; int errcode = 0; @@ -194,7 +195,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) int16_t len; int size; - errcode = p9pdu_readf(pdu, optional, "w", &len); + errcode = p9pdu_readf(pdu, proto_version, + "w", &len); if (errcode) break; @@ -217,7 +219,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) struct p9_qid *qid = va_arg(ap, struct p9_qid *); - errcode = p9pdu_readf(pdu, optional, "bdq", + errcode = p9pdu_readf(pdu, proto_version, "bdq", &qid->type, &qid->version, &qid->path); } @@ -230,7 +232,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) stbuf->n_uid = stbuf->n_gid = stbuf->n_muid = -1; errcode = - p9pdu_readf(pdu, optional, + p9pdu_readf(pdu, proto_version, "wwdQdddqssss?sddd", &stbuf->size, &stbuf->type, &stbuf->dev, &stbuf->qid, @@ -250,7 +252,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) void **data = va_arg(ap, void **); errcode = - p9pdu_readf(pdu, optional, "d", count); + p9pdu_readf(pdu, proto_version, "d", count); if (!errcode) { *count = MIN(*count, @@ -263,8 +265,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) int16_t *nwname = va_arg(ap, int16_t *); char ***wnames = va_arg(ap, char ***); - errcode = - p9pdu_readf(pdu, optional, "w", nwname); + errcode = p9pdu_readf(pdu, proto_version, + "w", nwname); if (!errcode) { *wnames = kmalloc(sizeof(char *) * *nwname, @@ -278,7 +280,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) for (i = 0; i < *nwname; i++) { errcode = - p9pdu_readf(pdu, optional, + p9pdu_readf(pdu, + proto_version, "s", &(*wnames)[i]); if (errcode) @@ -306,7 +309,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) *wqids = NULL; errcode = - p9pdu_readf(pdu, optional, "w", nwqid); + p9pdu_readf(pdu, proto_version, "w", nwqid); if (!errcode) { *wqids = kmalloc(*nwqid * @@ -321,7 +324,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) for (i = 0; i < *nwqid; i++) { errcode = - p9pdu_readf(pdu, optional, + p9pdu_readf(pdu, + proto_version, "Q", &(*wqids)[i]); if (errcode) @@ -336,7 +340,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) } break; case '?': - if (!optional) + if (proto_version != p9_proto_2000u) return 0; break; default: @@ -352,7 +356,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) } int -p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) +p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, + va_list ap) { const char *ptr; int errcode = 0; @@ -389,7 +394,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) if (sptr) len = MIN(strlen(sptr), USHORT_MAX); - errcode = p9pdu_writef(pdu, optional, "w", len); + errcode = p9pdu_writef(pdu, proto_version, + "w", len); if (!errcode && pdu_write(pdu, sptr, len)) errcode = -EFAULT; } @@ -398,7 +404,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) const struct p9_qid *qid = va_arg(ap, const struct p9_qid *); errcode = - p9pdu_writef(pdu, optional, "bdq", + p9pdu_writef(pdu, proto_version, "bdq", qid->type, qid->version, qid->path); } break; @@ -406,7 +412,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) const struct p9_wstat *stbuf = va_arg(ap, const struct p9_wstat *); errcode = - p9pdu_writef(pdu, optional, + p9pdu_writef(pdu, proto_version, "wwdQdddqssss?sddd", stbuf->size, stbuf->type, stbuf->dev, &stbuf->qid, @@ -421,8 +427,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) int32_t count = va_arg(ap, int32_t); const void *data = va_arg(ap, const void *); - errcode = - p9pdu_writef(pdu, optional, "d", count); + errcode = p9pdu_writef(pdu, proto_version, "d", + count); if (!errcode && pdu_write(pdu, data, count)) errcode = -EFAULT; } @@ -431,8 +437,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) int32_t count = va_arg(ap, int32_t); const char __user *udata = va_arg(ap, const void __user *); - errcode = - p9pdu_writef(pdu, optional, "d", count); + errcode = p9pdu_writef(pdu, proto_version, "d", + count); if (!errcode && pdu_write_u(pdu, udata, count)) errcode = -EFAULT; } @@ -441,14 +447,15 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) int16_t nwname = va_arg(ap, int); const char **wnames = va_arg(ap, const char **); - errcode = - p9pdu_writef(pdu, optional, "w", nwname); + errcode = p9pdu_writef(pdu, proto_version, "w", + nwname); if (!errcode) { int i; for (i = 0; i < nwname; i++) { errcode = - p9pdu_writef(pdu, optional, + p9pdu_writef(pdu, + proto_version, "s", wnames[i]); if (errcode) @@ -462,14 +469,15 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) struct p9_qid *wqids = va_arg(ap, struct p9_qid *); - errcode = - p9pdu_writef(pdu, optional, "w", nwqid); + errcode = p9pdu_writef(pdu, proto_version, "w", + nwqid); if (!errcode) { int i; for (i = 0; i < nwqid; i++) { errcode = - p9pdu_writef(pdu, optional, + p9pdu_writef(pdu, + proto_version, "Q", &wqids[i]); if (errcode) @@ -479,7 +487,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) } break; case '?': - if (!optional) + if (proto_version != p9_proto_2000u) return 0; break; default: @@ -494,32 +502,32 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) return errcode; } -int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...) +int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...) { va_list ap; int ret; va_start(ap, fmt); - ret = p9pdu_vreadf(pdu, optional, fmt, ap); + ret = p9pdu_vreadf(pdu, proto_version, fmt, ap); va_end(ap); return ret; } static int -p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...) +p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...) { va_list ap; int ret; va_start(ap, fmt); - ret = p9pdu_vwritef(pdu, optional, fmt, ap); + ret = p9pdu_vwritef(pdu, proto_version, fmt, ap); va_end(ap); return ret; } -int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu) +int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version) { struct p9_fcall fake_pdu; int ret; @@ -529,7 +537,7 @@ int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu) fake_pdu.sdata = buf; fake_pdu.offset = 0; - ret = p9pdu_readf(&fake_pdu, dotu, "S", st); + ret = p9pdu_readf(&fake_pdu, proto_version, "S", st); if (ret) { P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); p9pdu_dump(1, &fake_pdu); diff --git a/net/9p/protocol.h b/net/9p/protocol.h index ccde462e7ac5..2431c0f38d56 100644 --- a/net/9p/protocol.h +++ b/net/9p/protocol.h @@ -25,9 +25,9 @@ * */ -int -p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap); -int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...); +int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, + va_list ap); +int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type); int p9pdu_finalize(struct p9_fcall *pdu); void p9pdu_dump(int, struct p9_fcall *); -- cgit v1.2.3 From 8eae939f1400326b06d0c9afe53d2a484a326871 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 4 Mar 2010 18:01:40 +0000 Subject: net: add limit for socket backlog We got system OOM while running some UDP netperf testing on the loopback device. The case is multiple senders sent stream UDP packets to a single receiver via loopback on local host. Of course, the receiver is not able to handle all the packets in time. But we surprisingly found that these packets were not discarded due to the receiver's sk->sk_rcvbuf limit. Instead, they are kept queuing to sk->sk_backlog and finally ate up all the memory. We believe this is a secure hole that a none privileged user can crash the system. The root cause for this problem is, when the receiver is doing __release_sock() (i.e. after userspace recv, kernel udp_recvmsg -> skb_free_datagram_locked -> release_sock), it moves skbs from backlog to sk_receive_queue with the softirq enabled. In the above case, multiple busy senders will almost make it an endless loop. The skbs in the backlog end up eat all the system memory. The issue is not only for UDP. Any protocols using socket backlog is potentially affected. The patch adds limit for socket backlog so that the backlog size cannot be expanded endlessly. Reported-by: Alex Shi Cc: David Miller Cc: Arnaldo Carvalho de Melo Cc: Alexey Kuznetsov Cc: Patrick McHardy Cc: Vlad Yasevich Cc: Sridhar Samudrala Cc: Jon Maloy Cc: Allan Stephens Cc: Andrew Hendry Signed-off-by: Zhu Yi Signed-off-by: Eric Dumazet Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/sock.h | 15 ++++++++++++++- net/core/sock.c | 16 ++++++++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 6cb1676e409a..2516d76f043c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -253,6 +253,8 @@ struct sock { struct { struct sk_buff *head; struct sk_buff *tail; + int len; + int limit; } sk_backlog; wait_queue_head_t *sk_sleep; struct dst_entry *sk_dst_cache; @@ -589,7 +591,7 @@ static inline int sk_stream_memory_free(struct sock *sk) return sk->sk_wmem_queued < sk->sk_sndbuf; } -/* The per-socket spinlock must be held here. */ +/* OOB backlog add */ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) { if (!sk->sk_backlog.tail) { @@ -601,6 +603,17 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) skb->next = NULL; } +/* The per-socket spinlock must be held here. */ +static inline int sk_add_backlog_limited(struct sock *sk, struct sk_buff *skb) +{ + if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1)) + return -ENOBUFS; + + sk_add_backlog(sk, skb); + sk->sk_backlog.len += skb->truesize; + return 0; +} + static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { return sk->sk_backlog_rcv(sk, skb); diff --git a/net/core/sock.c b/net/core/sock.c index fcd397a762ff..6e22dc973d23 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -340,8 +340,12 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); - } else - sk_add_backlog(sk, skb); + } else if (sk_add_backlog_limited(sk, skb)) { + bh_unlock_sock(sk); + atomic_inc(&sk->sk_drops); + goto discard_and_relse; + } + bh_unlock_sock(sk); out: sock_put(sk); @@ -1139,6 +1143,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) sock_lock_init(newsk); bh_lock_sock(newsk); newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; + newsk->sk_backlog.len = 0; atomic_set(&newsk->sk_rmem_alloc, 0); /* @@ -1542,6 +1547,12 @@ static void __release_sock(struct sock *sk) bh_lock_sock(sk); } while ((skb = sk->sk_backlog.head) != NULL); + + /* + * Doing the zeroing here guarantee we can not loop forever + * while a wild producer attempts to flood us. + */ + sk->sk_backlog.len = 0; } /** @@ -1874,6 +1885,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_sndbuf = sysctl_wmem_default; + sk->sk_backlog.limit = sk->sk_rcvbuf << 1; sk->sk_state = TCP_CLOSE; sk_set_socket(sk, sock); -- cgit v1.2.3 From a3a858ff18a72a8d388e31ab0d98f7e944841a62 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 4 Mar 2010 18:01:47 +0000 Subject: net: backlog functions rename sk_add_backlog -> __sk_add_backlog sk_add_backlog_limited -> sk_add_backlog Signed-off-by: Zhu Yi Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 6 +++--- net/core/sock.c | 2 +- net/dccp/minisocks.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 4 ++-- net/llc/llc_c_ac.c | 2 +- net/llc/llc_conn.c | 2 +- net/sctp/input.c | 4 ++-- net/tipc/socket.c | 2 +- net/x25/x25_dev.c | 2 +- 13 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 2516d76f043c..170353dd9570 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -592,7 +592,7 @@ static inline int sk_stream_memory_free(struct sock *sk) } /* OOB backlog add */ -static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) +static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) { if (!sk->sk_backlog.tail) { sk->sk_backlog.head = sk->sk_backlog.tail = skb; @@ -604,12 +604,12 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) } /* The per-socket spinlock must be held here. */ -static inline int sk_add_backlog_limited(struct sock *sk, struct sk_buff *skb) +static inline int sk_add_backlog(struct sock *sk, struct sk_buff *skb) { if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1)) return -ENOBUFS; - sk_add_backlog(sk, skb); + __sk_add_backlog(sk, skb); sk->sk_backlog.len += skb->truesize; return 0; } diff --git a/net/core/sock.c b/net/core/sock.c index 6e22dc973d23..61a65a2e0455 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -340,7 +340,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); - } else if (sk_add_backlog_limited(sk, skb)) { + } else if (sk_add_backlog(sk, skb)) { bh_unlock_sock(sk); atomic_inc(&sk->sk_drops); goto discard_and_relse; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index af226a063141..0d508c359fa9 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -254,7 +254,7 @@ int dccp_child_process(struct sock *parent, struct sock *child, * in main socket hash table and lock on listening * socket does not protect us more. */ - sk_add_backlog(child, skb); + __sk_add_backlog(child, skb); } bh_unlock_sock(child); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4baf1943b1bd..1915f7dc30e6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1682,7 +1682,7 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); } - } else if (sk_add_backlog_limited(sk, skb)) { + } else if (sk_add_backlog(sk, skb)) { bh_unlock_sock(sk); goto discard_and_relse; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f206ee5dda80..4199bc6915c5 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -728,7 +728,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, * in main socket hash table and lock on listening * socket does not protect us more. */ - sk_add_backlog(child, skb); + __sk_add_backlog(child, skb); } bh_unlock_sock(child); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e7eb47f338d4..7af756d0f931 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1371,7 +1371,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) bh_lock_sock(sk); if (!sock_owned_by_user(sk)) rc = __udp_queue_rcv_skb(sk, skb); - else if (sk_add_backlog_limited(sk, skb)) { + else if (sk_add_backlog(sk, skb)) { bh_unlock_sock(sk); goto drop; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c4ea9d5cbfaa..2c378b1bd5cf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1740,7 +1740,7 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); } - } else if (sk_add_backlog_limited(sk, skb)) { + } else if (sk_add_backlog(sk, skb)) { bh_unlock_sock(sk); goto discard_and_relse; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 64804912b093..3c0c9c755c92 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -583,7 +583,7 @@ static void flush_stack(struct sock **stack, unsigned int count, bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb1); - else if (sk_add_backlog_limited(sk, skb1)) { + else if (sk_add_backlog(sk, skb1)) { kfree_skb(skb1); bh_unlock_sock(sk); goto drop; @@ -758,7 +758,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb); - else if (sk_add_backlog_limited(sk, skb)) { + else if (sk_add_backlog(sk, skb)) { atomic_inc(&sk->sk_drops); bh_unlock_sock(sk); sock_put(sk); diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 019c780512e8..86d6985b9d49 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -1437,7 +1437,7 @@ static void llc_process_tmr_ev(struct sock *sk, struct sk_buff *skb) llc_conn_state_process(sk, skb); else { llc_set_backlog_type(skb, LLC_EVENT); - sk_add_backlog(sk, skb); + __sk_add_backlog(sk, skb); } } } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index c0539ffdb272..a12144da7974 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -827,7 +827,7 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) else { dprintk("%s: adding to backlog...\n", __func__); llc_set_backlog_type(skb, LLC_PACKET); - if (sk_add_backlog_limited(sk, skb)) + if (sk_add_backlog(sk, skb)) goto drop_unlock; } out: diff --git a/net/sctp/input.c b/net/sctp/input.c index cbc063665e6b..3d74b264ea22 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -341,7 +341,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) sctp_bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - if (sk_add_backlog_limited(sk, skb)) + if (sk_add_backlog(sk, skb)) sctp_chunk_free(chunk); else backloged = 1; @@ -375,7 +375,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) struct sctp_ep_common *rcvr = chunk->rcvr; int ret; - ret = sk_add_backlog_limited(sk, skb); + ret = sk_add_backlog(sk, skb); if (!ret) { /* Hold the assoc/ep while hanging on the backlog queue. * This way, we know structures we need will not disappear diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 22bfbc33a8ac..4b235fc1c70f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1322,7 +1322,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) if (!sock_owned_by_user(sk)) { res = filter_rcv(sk, buf); } else { - if (sk_add_backlog_limited(sk, buf)) + if (sk_add_backlog(sk, buf)) res = TIPC_ERR_OVERLOAD; else res = TIPC_OK; diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index a9da0dc26f4f..52e304212241 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -53,7 +53,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) if (!sock_owned_by_user(sk)) { queued = x25_process_rx_frame(sk, skb); } else { - queued = !sk_add_backlog_limited(sk, skb); + queued = !sk_add_backlog(sk, skb); } bh_unlock_sock(sk); sock_put(sk); -- cgit v1.2.3 From 0c9a2ac1f8a2e55b3382dfc27256878a58ea49e9 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Sun, 7 Mar 2010 00:14:44 +0000 Subject: ipv6: Optmize translation between IPV6_PREFER_SRC_xxx and RT6_LOOKUP_F_xxx. IPV6_PREFER_SRC_xxx definitions: | #define IPV6_PREFER_SRC_TMP 0x0001 | #define IPV6_PREFER_SRC_PUBLIC 0x0002 | #define IPV6_PREFER_SRC_COA 0x0004 RT6_LOOKUP_F_xxx definitions: | #define RT6_LOOKUP_F_SRCPREF_TMP 0x00000008 | #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 | #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 So, we can translate between these two groups by shift operation instead of multiple 'if's. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ip6_route.h | 18 ++++++++++++++++++ net/ipv6/fib6_rules.c | 11 ++--------- net/ipv6/route.c | 11 ++--------- 3 files changed, 22 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 4a808de7c0f6..68f67836e146 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -37,6 +37,24 @@ struct route_info { #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 +/* + * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate + * between IPV6_ADDR_PREFERENCES socket option values + * IPV6_PREFER_SRC_TMP = 0x1 + * IPV6_PREFER_SRC_PUBLIC = 0x2 + * IPV6_PREFER_SRC_COA = 0x4 + * and above RT6_LOOKUP_F_SRCPREF_xxx flags. + */ +static inline int rt6_srcprefs2flags(unsigned int srcprefs) +{ + /* No need to bitmask because srcprefs have only 3 bits. */ + return srcprefs << 3; +} + +static inline unsigned int rt6_flags2srcprefs(int flags) +{ + return (flags >> 3) & 7; +} extern void ip6_route_input(struct sk_buff *skb); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 551882b9dfd6..5e463c43fcc2 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -84,18 +84,11 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if ((rule->flags & FIB_RULE_FIND_SADDR) && r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) { struct in6_addr saddr; - unsigned int srcprefs = 0; - - if (flags & RT6_LOOKUP_F_SRCPREF_TMP) - srcprefs |= IPV6_PREFER_SRC_TMP; - if (flags & RT6_LOOKUP_F_SRCPREF_PUBLIC) - srcprefs |= IPV6_PREFER_SRC_PUBLIC; - if (flags & RT6_LOOKUP_F_SRCPREF_COA) - srcprefs |= IPV6_PREFER_SRC_COA; if (ipv6_dev_get_saddr(net, ip6_dst_idev(&rt->u.dst)->dev, - &flp->fl6_dst, srcprefs, + &flp->fl6_dst, + rt6_flags2srcprefs(flags), &saddr)) goto again; if (!ipv6_prefix_equal(&saddr, &r->src.addr, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b08879e97f22..52cd3eff31dc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -819,15 +819,8 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, if (!ipv6_addr_any(&fl->fl6_src)) flags |= RT6_LOOKUP_F_HAS_SADDR; - else if (sk) { - unsigned int prefs = inet6_sk(sk)->srcprefs; - if (prefs & IPV6_PREFER_SRC_TMP) - flags |= RT6_LOOKUP_F_SRCPREF_TMP; - if (prefs & IPV6_PREFER_SRC_PUBLIC) - flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC; - if (prefs & IPV6_PREFER_SRC_COA) - flags |= RT6_LOOKUP_F_SRCPREF_COA; - } + else if (sk) + flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); } -- cgit v1.2.3 From 4045635318538d3ddd2007720412fdc4b08f6a62 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Sun, 7 Mar 2010 16:21:39 +0000 Subject: net: add __must_check to sk_add_backlog Add the "__must_check" tag to sk_add_backlog() so that any failure to check and drop packets will be warned about. Signed-off-by: Zhu Yi Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 170353dd9570..092b0551e77f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -604,7 +604,7 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) } /* The per-socket spinlock must be held here. */ -static inline int sk_add_backlog(struct sock *sk, struct sk_buff *skb) +static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb) { if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1)) return -ENOBUFS; -- cgit v1.2.3 From 2b4c32972b9bcfee29d5e2c1b6f261dda5ef2a21 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Tue, 9 Mar 2010 16:47:52 +0000 Subject: ipv6 ip6_tunnel: eliminate unused recursion field from ip6_tnl{}. Commit a43912ab19... ("tunnel: eliminate recursion field") eliminated use of recursion field from tunnel structures, but its definition still exists in ip6_tnl{}. Let's remove that unused field. Signed-off-by: YOSHIFUJI Hideaki Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 83b4e008b16d..fbf9d1cda27b 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -15,7 +15,6 @@ struct ip6_tnl { struct ip6_tnl *next; /* next tunnel in list */ struct net_device *dev; /* virtual device associated with tunnel */ - int recursion; /* depth of hard_start_xmit recursion */ struct ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ struct dst_entry *dst_cache; /* cached dst */ -- cgit v1.2.3 From 8467005da3ef6104b89a4cc5e9c9d9445b75565f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 10 Mar 2010 15:23:10 -0800 Subject: nsproxy: remove INIT_NSPROXY() Remove INIT_NSPROXY(), use C99 initializer. Remove INIT_IPC_NS(), INIT_NET_NS() while I'm at it. Note: headers trim will be done later, now it's quite pointless because results will be invalidated by merge window. Signed-off-by: Alexey Dobriyan Acked-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init_task.h | 8 -------- include/linux/ipc_namespace.h | 5 ----- include/net/net_namespace.h | 5 ----- kernel/nsproxy.c | 13 ++++++++++++- 4 files changed, 12 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index abec69b63d7e..b1ed1cd8e2a8 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -32,14 +32,6 @@ extern struct fs_struct init_fs; } extern struct nsproxy init_nsproxy; -#define INIT_NSPROXY(nsproxy) { \ - .pid_ns = &init_pid_ns, \ - .count = ATOMIC_INIT(1), \ - .uts_ns = &init_uts_ns, \ - .mnt_ns = NULL, \ - INIT_NET_NS(net_ns) \ - INIT_IPC_NS(ipc_ns) \ -} #define INIT_SIGHAND(sighand) { \ .count = ATOMIC_INIT(1), \ diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 07baa38bce37..51952989ad42 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -62,11 +62,6 @@ extern struct ipc_namespace init_ipc_ns; extern atomic_t nr_ipc_ns; extern spinlock_t mq_lock; -#if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) -#define INIT_IPC_NS(ns) .ns = &init_ipc_ns, -#else -#define INIT_IPC_NS(ns) -#endif #ifdef CONFIG_SYSVIPC extern int register_ipcns_notifier(struct ipc_namespace *); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 82b7be4db89a..bd10a7908993 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -100,14 +100,9 @@ struct net { extern struct net init_net; #ifdef CONFIG_NET -#define INIT_NET_NS(net_ns) .net_ns = &init_net, - extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns); #else /* CONFIG_NET */ - -#define INIT_NET_NS(net_ns) - static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns) { /* There is nothing to copy so this is a noop */ diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 09b4ff9711b2..2ab67233ee8f 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -24,7 +24,18 @@ static struct kmem_cache *nsproxy_cachep; -struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); +struct nsproxy init_nsproxy = { + .count = ATOMIC_INIT(1), + .uts_ns = &init_uts_ns, +#if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) + .ipc_ns = &init_ipc_ns, +#endif + .mnt_ns = NULL, + .pid_ns = &init_pid_ns, +#ifdef CONFIG_NET + .net_ns = &init_net, +#endif +}; static inline struct nsproxy *create_nsproxy(void) { -- cgit v1.2.3 From 45bc21edb52fa71dbb1324c6f573aa880e95519d Mon Sep 17 00:00:00 2001 From: Sripathi Kodi Date: Mon, 8 Mar 2010 17:33:04 +0000 Subject: 9p: Change the name of new protocol from 9p2010.L to 9p2000.L This patch changes the name of the new 9P protocol from 9p2010.L to 9p2000.u. This is because we learnt that the name 9p2010 is already being used by others. Signed-off-by: Sripathi Kodi Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs.h | 6 +++--- include/net/9p/client.h | 4 ++-- net/9p/client.c | 16 ++++++++-------- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 79000bf62491..6b801d1ddf4b 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -24,7 +24,7 @@ /** * enum p9_session_flags - option flags for each 9P session * @V9FS_PROTO_2000U: whether or not to use 9P2000.u extensions - * @V9FS_PROTO_2010L: whether or not to use 9P2010.l extensions + * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) * @V9FS_ACCESS_ANY: use a single attach for all users @@ -34,7 +34,7 @@ */ enum p9_session_flags { V9FS_PROTO_2000U = 0x01, - V9FS_PROTO_2010L = 0x02, + V9FS_PROTO_2000L = 0x02, V9FS_ACCESS_SINGLE = 0x04, V9FS_ACCESS_USER = 0x08, V9FS_ACCESS_ANY = 0x0C, @@ -130,5 +130,5 @@ static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses) static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses) { - return v9ses->flags & V9FS_PROTO_2010L; + return v9ses->flags & V9FS_PROTO_2000L; } diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 52e1fff709e4..f076dfa75ae8 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -32,13 +32,13 @@ /** enum p9_proto_versions - 9P protocol versions * @p9_proto_legacy: 9P Legacy mode, pre-9P2000.u * @p9_proto_2000u: 9P2000.u extension - * @p9_proto_2010L: 9P2010.L extension + * @p9_proto_2000L: 9P2000.L extension */ enum p9_proto_versions{ p9_proto_legacy = 0, p9_proto_2000u = 1, - p9_proto_2010L = 2, + p9_proto_2000L = 2, }; diff --git a/net/9p/client.c b/net/9p/client.c index bde9f3d38c57..e3e5bf4469ce 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -60,7 +60,7 @@ static const match_table_t tokens = { inline int p9_is_proto_dotl(struct p9_client *clnt) { - return (clnt->proto_version == p9_proto_2010L); + return (clnt->proto_version == p9_proto_2000L); } EXPORT_SYMBOL(p9_is_proto_dotl); @@ -80,9 +80,9 @@ static unsigned char get_protocol_version(const substring_t *name) } else if (!strncmp("9p2000.u", name->from, name->to-name->from)) { version = p9_proto_2000u; P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); - } else if (!strncmp("9p2010.L", name->from, name->to-name->from)) { - version = p9_proto_2010L; - P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2010.L\n"); + } else if (!strncmp("9p2000.L", name->from, name->to-name->from)) { + version = p9_proto_2000L; + P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); } else { P9_DPRINTK(P9_DEBUG_ERROR, "Unknown protocol version %s. ", name->from); @@ -672,9 +672,9 @@ int p9_client_version(struct p9_client *c) c->msize, c->proto_version); switch (c->proto_version) { - case p9_proto_2010L: + case p9_proto_2000L: req = p9_client_rpc(c, P9_TVERSION, "ds", - c->msize, "9P2010.L"); + c->msize, "9P2000.L"); break; case p9_proto_2000u: req = p9_client_rpc(c, P9_TVERSION, "ds", @@ -700,8 +700,8 @@ int p9_client_version(struct p9_client *c) } P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); - if (!strncmp(version, "9P2010.L", 8)) - c->proto_version = p9_proto_2010L; + if (!strncmp(version, "9P2000.L", 8)) + c->proto_version = p9_proto_2000L; else if (!strncmp(version, "9P2000.u", 8)) c->proto_version = p9_proto_2000u; else if (!strncmp(version, "9P2000", 6)) -- cgit v1.2.3