From 628e341f319f1a64a4639088faba952e4ec8f0a8 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 14 Aug 2013 13:05:23 +0200 Subject: xfrm: make local error reporting more robust In xfrm4 and xfrm6 we need to take care about sockets of the other address family. This could happen because a 6in4 or 4in6 tunnel could get protected by ipsec. Because we don't want to have a run-time dependency on ipv6 when only using ipv4 xfrm we have to embed a pointer to the correct local_error function in xfrm_state_afinet and look it up when returning an error depending on the socket address family. Thanks to vi0ss for the great bug report: v2: a) fix two more unsafe interpretations of skb->sk as ipv6 socket (xfrm6_local_dontfrag and __xfrm6_output) v3: a) add an EXPORT_SYMBOL_GPL(xfrm_local_error) to fix a link error when building ipv6 as a module (thanks to Steffen Klassert) Reported-by: Cc: Steffen Klassert Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 94ce082b29dc..e823786e7c66 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -341,10 +341,13 @@ struct xfrm_state_afinfo { struct sk_buff *skb); int (*transport_finish)(struct sk_buff *skb, int async); + void (*local_error)(struct sk_buff *skb, u32 mtu); }; extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); extern int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo); +extern struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); +extern void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); extern void xfrm_state_delete_tunnel(struct xfrm_state *x); @@ -1477,6 +1480,7 @@ extern int xfrm_input_resume(struct sk_buff *skb, int nexthdr); extern int xfrm_output_resume(struct sk_buff *skb, int err); extern int xfrm_output(struct sk_buff *skb); extern int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); +extern void xfrm_local_error(struct sk_buff *skb, int mtu); extern int xfrm4_extract_header(struct sk_buff *skb); extern int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, @@ -1497,6 +1501,7 @@ extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short fam extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel *handler); extern int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel *handler); +extern void xfrm4_local_error(struct sk_buff *skb, u32 mtu); extern int xfrm6_extract_header(struct sk_buff *skb); extern int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi); @@ -1514,6 +1519,7 @@ extern int xfrm6_output(struct sk_buff *skb); extern int xfrm6_output_finish(struct sk_buff *skb); extern int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); +extern void xfrm6_local_error(struct sk_buff *skb, u32 mtu); #ifdef CONFIG_XFRM extern int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); -- cgit v1.2.3 From 0ea9d5e3e0e03a63b11392f5613378977dae7eca Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 13 Aug 2013 04:35:58 +0200 Subject: xfrm: introduce helper for safe determination of mtu skb->sk socket can be of AF_INET or AF_INET6 address family. Thus we always have to make sure we a referring to the correct interpretation of skb->sk. We only depend on header defines to query the mtu, so we don't introduce a new dependency to ipv6 by this change. Cc: Steffen Klassert Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert --- include/net/route.h | 8 ++++++++ include/net/xfrm.h | 12 ++++++++++++ net/ipv4/ip_output.c | 8 -------- net/ipv4/xfrm4_output.c | 4 +--- net/ipv6/xfrm6_output.c | 5 ++++- 5 files changed, 25 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index 2ea40c1b5e00..afdeeb5bec25 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -317,4 +317,12 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) return hoplimit; } +static inline int ip_skb_dst_mtu(struct sk_buff *skb) +{ + struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; + + return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? + skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); +} + #endif /* _ROUTE_H */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e823786e7c66..b41d2d10ff0e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -1723,4 +1724,15 @@ static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m) return ret; } +static inline int xfrm_skb_dst_mtu(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + if (sk && sk->sk_family == AF_INET6) + return ip6_skb_dst_mtu(skb); + else if (sk && sk->sk_family == AF_INET) + return ip_skb_dst_mtu(skb); + return dst_mtu(skb_dst(skb)); +} + #endif /* _NET_XFRM_H */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4bcabf3ab4ca..9ee17e3d11c3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -211,14 +211,6 @@ static inline int ip_finish_output2(struct sk_buff *skb) return -EINVAL; } -static inline int ip_skb_dst_mtu(struct sk_buff *skb) -{ - struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; - - return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? - skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); -} - static int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 7a5491ffa4de..80baf4a3b1b5 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -21,7 +21,6 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; - struct dst_entry *dst; if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) goto out; @@ -29,8 +28,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) goto out; - dst = skb_dst(skb); - mtu = dst_mtu(dst); + mtu = xfrm_skb_dst_mtu(skb); if (skb->len > mtu) { if (skb->sk) xfrm_local_error(skb, mtu); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index b64fff30eb06..3ac5ab264fed 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -138,7 +138,10 @@ static int __xfrm6_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; - int mtu = ip6_skb_dst_mtu(skb); + int mtu = xfrm_skb_dst_mtu(skb); + + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); -- cgit v1.2.3 From 3f0fa9a808f98fa10a18ba2a73f13d65fda990fb Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Wed, 14 Aug 2013 16:05:02 -0700 Subject: regmap: Add another missing header for !CONFIG_REGMAP stubs The use of WARN_ON() needs the definitions from bug.h, without it you can get: include/linux/regmap.h: In function 'regmap_write': include/linux/regmap.h:525:2: error: implicit declaration of function 'WARN_ONCE' [-Werror=implicit-function-declaration] Signed-off-by: Kevin Hilman Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- include/linux/regmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 580a5320cc96..6d91fcb4c5cb 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -16,6 +16,7 @@ #include #include #include +#include struct module; struct device; -- cgit v1.2.3 From 844d48746e4b281a933aedc0428048a1219b42f4 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 18 Aug 2013 13:47:01 +0200 Subject: xfrm: choose protocol family by skb protocol We need to choose the protocol family by skb->protocol. Otherwise we call the wrong xfrm{4,6}_local_error handler in case an ipv6 sockets is used in ipv4 mode, in which case we should call down to xfrm4_local_error (ip6 sockets are a superset of ip4 ones). We are called before before ip_output functions, so skb->protocol is not reset. Cc: Steffen Klassert Acked-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 4 ++-- net/xfrm/xfrm_output.c | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b41d2d10ff0e..ac5b02515355 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1728,9 +1728,9 @@ static inline int xfrm_skb_dst_mtu(struct sk_buff *skb) { struct sock *sk = skb->sk; - if (sk && sk->sk_family == AF_INET6) + if (sk && skb->protocol == htons(ETH_P_IPV6)) return ip6_skb_dst_mtu(skb); - else if (sk && sk->sk_family == AF_INET) + else if (sk && skb->protocol == htons(ETH_P_IP)) return ip_skb_dst_mtu(skb); return dst_mtu(skb_dst(skb)); } diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 6f5fc612b162..3bb2cdc13b46 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -216,9 +216,17 @@ int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) void xfrm_local_error(struct sk_buff *skb, int mtu) { + unsigned int proto; struct xfrm_state_afinfo *afinfo; - afinfo = xfrm_state_get_afinfo(skb->sk->sk_family); + if (skb->protocol == htons(ETH_P_IP)) + proto = AF_INET; + else if (skb->protocol == htons(ETH_P_IPV6)) + proto = AF_INET6; + else + return; + + afinfo = xfrm_state_get_afinfo(proto); if (!afinfo) return; -- cgit v1.2.3 From 2dfca312a91631311c1cf7c090246cc8103de038 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 20 Aug 2013 19:43:54 +0200 Subject: mac80211: add a flag to indicate CCK support for HT clients brcm80211 cannot handle sending frames with CCK rates as part of an A-MPDU session. Other drivers may have issues too. Set the flag in all drivers that have been tested with CCK rates. This fixes a reported brcmsmac regression introduced in commit ef47a5e4f1aaf1d0e2e6875e34b2c9595897bef6 "mac80211/minstrel_ht: fix cck rate sampling" Cc: stable@vger.kernel.org # 3.10 Reported-by: Tom Gundersen Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath9k/init.c | 3 ++- drivers/net/wireless/ath/carl9170/main.c | 3 ++- drivers/net/wireless/rt2x00/rt2800lib.c | 3 ++- include/net/mac80211.h | 1 + net/mac80211/rc80211_minstrel_ht.c | 3 +++ 5 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index 16f8b201642b..026a2a067b46 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -802,7 +802,8 @@ void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) IEEE80211_HW_PS_NULLFUNC_STACK | IEEE80211_HW_SPECTRUM_MGMT | IEEE80211_HW_REPORTS_TX_ACK_STATUS | - IEEE80211_HW_SUPPORTS_RC_TABLE; + IEEE80211_HW_SUPPORTS_RC_TABLE | + IEEE80211_HW_SUPPORTS_HT_CCK_RATES; if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_HT) { hw->flags |= IEEE80211_HW_AMPDU_AGGREGATION; diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 4a33c6e39ca2..349fa22a921a 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1860,7 +1860,8 @@ void *carl9170_alloc(size_t priv_size) IEEE80211_HW_PS_NULLFUNC_STACK | IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC | IEEE80211_HW_SUPPORTS_RC_TABLE | - IEEE80211_HW_SIGNAL_DBM; + IEEE80211_HW_SIGNAL_DBM | + IEEE80211_HW_SUPPORTS_HT_CCK_RATES; if (!modparam_noht) { /* diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index 1f80ea5e29dd..1b41c8eda12d 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -6133,7 +6133,8 @@ static int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev) IEEE80211_HW_SUPPORTS_PS | IEEE80211_HW_PS_NULLFUNC_STACK | IEEE80211_HW_AMPDU_AGGREGATION | - IEEE80211_HW_REPORTS_TX_ACK_STATUS; + IEEE80211_HW_REPORTS_TX_ACK_STATUS | + IEEE80211_HW_SUPPORTS_HT_CCK_RATES; /* * Don't set IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING for USB devices diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5b7a3dadadde..551ba6a6a073 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1499,6 +1499,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_RC_TABLE = 1<<24, IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF = 1<<25, IEEE80211_HW_TIMING_BEACON_ONLY = 1<<26, + IEEE80211_HW_SUPPORTS_HT_CCK_RATES = 1<<27, }; /** diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index f5aed963b22e..f3bbea1eb9e7 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -828,6 +828,9 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (sband->band != IEEE80211_BAND_2GHZ) return; + if (!(mp->hw->flags & IEEE80211_HW_SUPPORTS_HT_CCK_RATES)) + return; + mi->cck_supported = 0; mi->cck_supported_short = 0; for (i = 0; i < 4; i++) { -- cgit v1.2.3 From 5a25cf1e310888eb333f9e034be84a8117111d30 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 26 Aug 2013 12:31:19 +0200 Subject: xfrm: revert ipv4 mtu determination to dst_mtu In commit 0ea9d5e3e0e03a63b11392f5613378977dae7eca ("xfrm: introduce helper for safe determination of mtu") I switched the determination of ipv4 mtus from dst_mtu to ip_skb_dst_mtu. This was an error because in case of IP_PMTUDISC_PROBE we fall back to the interface mtu, which is never correct for ipv4 ipsec. This patch partly reverts 0ea9d5e3e0e03a63b11392f5613378977dae7eca ("xfrm: introduce helper for safe determination of mtu"). Cc: Steffen Klassert Signed-off-by: Hannes Frederic Sowa Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 12 ------------ net/ipv4/xfrm4_output.c | 2 +- net/ipv6/xfrm6_output.c | 8 +++++--- 3 files changed, 6 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ac5b02515355..e823786e7c66 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -1724,15 +1723,4 @@ static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m) return ret; } -static inline int xfrm_skb_dst_mtu(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - if (sk && skb->protocol == htons(ETH_P_IPV6)) - return ip6_skb_dst_mtu(skb); - else if (sk && skb->protocol == htons(ETH_P_IP)) - return ip_skb_dst_mtu(skb); - return dst_mtu(skb_dst(skb)); -} - #endif /* _NET_XFRM_H */ diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 80baf4a3b1b5..baa0f63731fd 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -28,7 +28,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) goto out; - mtu = xfrm_skb_dst_mtu(skb); + mtu = dst_mtu(skb_dst(skb)); if (skb->len > mtu) { if (skb->sk) xfrm_local_error(skb, mtu); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index e092e306882d..6cd625e37706 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -140,10 +140,12 @@ static int __xfrm6_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; - int mtu = xfrm_skb_dst_mtu(skb); + int mtu; - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; + if (skb->protocol == htons(ETH_P_IPV6)) + mtu = ip6_skb_dst_mtu(skb); + else + mtu = dst_mtu(skb_dst(skb)); if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); -- cgit v1.2.3 From c2b1df2eb42978073ec27c99cc199d20ae48b849 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 22 Aug 2013 11:39:16 -0700 Subject: Rename nsproxy.pid_ns to nsproxy.pid_ns_for_children nsproxy.pid_ns is *not* the task's pid namespace. The name should clarify that. This makes it more obvious that setns on a pid namespace is weird -- it won't change the pid namespace shown in procfs. Signed-off-by: Andy Lutomirski Reviewed-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/nsproxy.h | 6 +++++- kernel/fork.c | 5 +++-- kernel/nsproxy.c | 27 ++++++++++++++------------- kernel/pid_namespace.c | 4 ++-- 4 files changed, 24 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 10e5947491c7..b4ec59d159ac 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -14,6 +14,10 @@ struct fs_struct; * A structure to contain pointers to all per-process * namespaces - fs (mount), uts, network, sysvipc, etc. * + * The pid namespace is an exception -- it's accessed using + * task_active_pid_ns. The pid namespace here is the + * namespace that children will use. + * * 'count' is the number of tasks holding a reference. * The count for each namespace, then, will be the number * of nsproxies pointing to it, not the number of tasks. @@ -27,7 +31,7 @@ struct nsproxy { struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; - struct pid_namespace *pid_ns; + struct pid_namespace *pid_ns_for_children; struct net *net_ns; }; extern struct nsproxy init_nsproxy; diff --git a/kernel/fork.c b/kernel/fork.c index e23bb19e2a3e..bf46287c91a4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1177,7 +1177,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, * don't allow the creation of threads. */ if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) && - (task_active_pid_ns(current) != current->nsproxy->pid_ns)) + (task_active_pid_ns(current) != + current->nsproxy->pid_ns_for_children)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); @@ -1351,7 +1352,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (pid != &init_struct_pid) { retval = -ENOMEM; - pid = alloc_pid(p->nsproxy->pid_ns); + pid = alloc_pid(p->nsproxy->pid_ns_for_children); if (!pid) goto bad_fork_cleanup_io; } diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 364ceab15f0c..997cbb951a3b 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -29,15 +29,15 @@ static struct kmem_cache *nsproxy_cachep; struct nsproxy init_nsproxy = { - .count = ATOMIC_INIT(1), - .uts_ns = &init_uts_ns, + .count = ATOMIC_INIT(1), + .uts_ns = &init_uts_ns, #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) - .ipc_ns = &init_ipc_ns, + .ipc_ns = &init_ipc_ns, #endif - .mnt_ns = NULL, - .pid_ns = &init_pid_ns, + .mnt_ns = NULL, + .pid_ns_for_children = &init_pid_ns, #ifdef CONFIG_NET - .net_ns = &init_net, + .net_ns = &init_net, #endif }; @@ -85,9 +85,10 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_ipc; } - new_nsp->pid_ns = copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns); - if (IS_ERR(new_nsp->pid_ns)) { - err = PTR_ERR(new_nsp->pid_ns); + new_nsp->pid_ns_for_children = + copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children); + if (IS_ERR(new_nsp->pid_ns_for_children)) { + err = PTR_ERR(new_nsp->pid_ns_for_children); goto out_pid; } @@ -100,8 +101,8 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, return new_nsp; out_net: - if (new_nsp->pid_ns) - put_pid_ns(new_nsp->pid_ns); + if (new_nsp->pid_ns_for_children) + put_pid_ns(new_nsp->pid_ns_for_children); out_pid: if (new_nsp->ipc_ns) put_ipc_ns(new_nsp->ipc_ns); @@ -174,8 +175,8 @@ void free_nsproxy(struct nsproxy *ns) put_uts_ns(ns->uts_ns); if (ns->ipc_ns) put_ipc_ns(ns->ipc_ns); - if (ns->pid_ns) - put_pid_ns(ns->pid_ns); + if (ns->pid_ns_for_children) + put_pid_ns(ns->pid_ns_for_children); put_net(ns->net_ns); kmem_cache_free(nsproxy_cachep, ns); } diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 6917e8edb48e..601bb361c235 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -349,8 +349,8 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns) if (ancestor != active) return -EINVAL; - put_pid_ns(nsproxy->pid_ns); - nsproxy->pid_ns = get_pid_ns(new); + put_pid_ns(nsproxy->pid_ns_for_children); + nsproxy->pid_ns_for_children = get_pid_ns(new); return 0; } -- cgit v1.2.3 From 33c6b1f6b154894321f5734e50c66621e9134e7e Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 23 Aug 2013 12:45:04 -0700 Subject: genl: Hold reference on correct module while netlink-dump. netlink dump operations take module as parameter to hold reference for entire netlink dump duration. Currently it holds ref only on genl module which is not correct when we use ops registered to genl from another module. Following patch adds module pointer to genl_ops so that netlink can hold ref count on it. CC: Jesse Gross CC: Johannes Berg Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/genetlink.h | 20 ++++++++++++++++++-- net/netlink/genetlink.c | 20 +++++++++++--------- 2 files changed, 29 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 93024a47e0e2..8e0b6c856a13 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -61,6 +61,7 @@ struct genl_family { struct list_head ops_list; /* private */ struct list_head family_list; /* private */ struct list_head mcast_groups; /* private */ + struct module *module; }; /** @@ -121,9 +122,24 @@ struct genl_ops { struct list_head ops_list; }; -extern int genl_register_family(struct genl_family *family); -extern int genl_register_family_with_ops(struct genl_family *family, +extern int __genl_register_family(struct genl_family *family); + +static inline int genl_register_family(struct genl_family *family) +{ + family->module = THIS_MODULE; + return __genl_register_family(family); +} + +extern int __genl_register_family_with_ops(struct genl_family *family, struct genl_ops *ops, size_t n_ops); + +static inline int genl_register_family_with_ops(struct genl_family *family, + struct genl_ops *ops, size_t n_ops) +{ + family->module = THIS_MODULE; + return __genl_register_family_with_ops(family, ops, n_ops); +} + extern int genl_unregister_family(struct genl_family *family); extern int genl_register_ops(struct genl_family *, struct genl_ops *ops); extern int genl_unregister_ops(struct genl_family *, struct genl_ops *ops); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 7e0f4d199ade..0c741cec4d0d 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -364,7 +364,7 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) EXPORT_SYMBOL(genl_unregister_ops); /** - * genl_register_family - register a generic netlink family + * __genl_register_family - register a generic netlink family * @family: generic netlink family * * Registers the specified family after validating it first. Only one @@ -374,7 +374,7 @@ EXPORT_SYMBOL(genl_unregister_ops); * * Return 0 on success or a negative error code. */ -int genl_register_family(struct genl_family *family) +int __genl_register_family(struct genl_family *family) { int err = -EINVAL; @@ -430,10 +430,10 @@ errout_locked: errout: return err; } -EXPORT_SYMBOL(genl_register_family); +EXPORT_SYMBOL(__genl_register_family); /** - * genl_register_family_with_ops - register a generic netlink family + * __genl_register_family_with_ops - register a generic netlink family * @family: generic netlink family * @ops: operations to be registered * @n_ops: number of elements to register @@ -457,12 +457,12 @@ EXPORT_SYMBOL(genl_register_family); * * Return 0 on success or a negative error code. */ -int genl_register_family_with_ops(struct genl_family *family, +int __genl_register_family_with_ops(struct genl_family *family, struct genl_ops *ops, size_t n_ops) { int err, i; - err = genl_register_family(family); + err = __genl_register_family(family); if (err) return err; @@ -476,7 +476,7 @@ err_out: genl_unregister_family(family); return err; } -EXPORT_SYMBOL(genl_register_family_with_ops); +EXPORT_SYMBOL(__genl_register_family_with_ops); /** * genl_unregister_family - unregister generic netlink family @@ -603,22 +603,24 @@ static int genl_family_rcv_msg(struct genl_family *family, if (!family->parallel_ops) { struct netlink_dump_control c = { + .module = family->module, .data = ops, .dump = genl_lock_dumpit, .done = genl_lock_done, }; genl_unlock(); - rc = netlink_dump_start(net->genl_sock, skb, nlh, &c); + rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c); genl_lock(); } else { struct netlink_dump_control c = { + .module = family->module, .dump = ops->dumpit, .done = ops->done, }; - rc = netlink_dump_start(net->genl_sock, skb, nlh, &c); + rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c); } return rc; -- cgit v1.2.3 From 3046e2f5b79a86044ac0a29c69610d6ac6a4b882 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Sun, 25 Aug 2013 10:23:46 +0300 Subject: net: add cpu_relax to busy poll loop Add a cpu_relaxt to sk_busy_loop. Julie Cummings reported performance issues when hyperthreading is on. Arjan van de Ven observed that we should have a cpu_relax() in the busy poll loop. Reported-by: Julie Cummings Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- include/net/busy_poll.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 8a358a2c97e6..829627d7b846 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -123,6 +123,7 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock) /* local bh are disabled so it is ok to use _BH */ NET_ADD_STATS_BH(sock_net(sk), LINUX_MIB_BUSYPOLLRXPACKETS, rc); + cpu_relax(); } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && !need_resched() && !busy_loop_timeout(end_time)); -- cgit v1.2.3 From 0f8f2aaaab0b0f9c13635cb02e7d19bdaa9aa1bb Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 28 Aug 2013 18:13:26 -0700 Subject: Add new lockref infrastructure reference implementation This introduces a new "lockref" structure that supports the concept of lockless updates of reference counts that still honor an attached spinlock. NOTE! This reference implementation is not the optimized lockless version, rather it is the fallback implementation using standard spinlocks. The actual optimized versions will be merged into 3.12, but I wanted to get the infrastructure in place and document the new interfaces. [ Also note that this particular commit is drastically cut-down minimal version of the original patch by Waiman. In order to properly credit the original author I'm marking Waiman as the author here, but in the end this patch bears little resemblance to the patch by Waiman. So blame any errors on me editing things down to the point where I can introduce the infrastructure before the merge window for 3.12 actually opens. - Linus ] Signed-off-by: Waiman Long Signed-off-by: Linus Torvalds --- include/linux/lockref.h | 71 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 include/linux/lockref.h (limited to 'include') diff --git a/include/linux/lockref.h b/include/linux/lockref.h new file mode 100644 index 000000000000..01233e01627a --- /dev/null +++ b/include/linux/lockref.h @@ -0,0 +1,71 @@ +#ifndef __LINUX_LOCKREF_H +#define __LINUX_LOCKREF_H + +/* + * Locked reference counts. + * + * These are different from just plain atomic refcounts in that they + * are atomic with respect to the spinlock that goes with them. In + * particular, there can be implementations that don't actually get + * the spinlock for the common decrement/increment operations, but they + * still have to check that the operation is done semantically as if + * the spinlock had been taken (using a cmpxchg operation that covers + * both the lock and the count word, or using memory transactions, for + * example). + */ + +#include + +struct lockref { + spinlock_t lock; + unsigned int count; +}; + +/** + * lockref_get - Increments reference count unconditionally + * @lockcnt: pointer to lockref structure + * + * This operation is only valid if you already hold a reference + * to the object, so you know the count cannot be zero. + */ +static inline void lockref_get(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + lockref->count++; + spin_unlock(&lockref->lock); +} + +/** + * lockref_get_not_zero - Increments count unless the count is 0 + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count is 0 + */ +static inline int lockref_get_not_zero(struct lockref *lockref) +{ + int retval = 0; + + spin_lock(&lockref->lock); + if (lockref->count) { + lockref->count++; + retval = 1; + } + spin_unlock(&lockref->lock); + return retval; +} + +/** + * lockref_put_or_lock - decrements count unless count <= 1 before decrement + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken + */ +static inline int lockref_put_or_lock(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + if (lockref->count <= 1) + return 0; + lockref->count--; + spin_unlock(&lockref->lock); + return 1; +} + +#endif /* __LINUX_LOCKREF_H */ -- cgit v1.2.3 From 98474236f72e5a8b89c14cd7c74f0bb77a4b1a99 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 28 Aug 2013 18:24:59 -0700 Subject: vfs: make the dentry cache use the lockref infrastructure This just replaces the dentry count/lock combination with the lockref structure that contains both a count and a spinlock, and does the mechanical conversion to use the lockref infrastructure. There are no semantic changes here, it's purely syntactic. The reference lockref implementation uses the spinlock exactly the same way that the old dcache code did, and the bulk of this patch is just expanding the internal "d_count" use in the dcache code to use "d_lockref.count" instead. This is purely preparation for the real change to make the reference count updates be lockless during the 3.12 merge window. [ As with the previous commit, this is a rewritten version of a concept originally from Waiman, so credit goes to him, blame for any errors goes to me. Waiman's patch had some semantic differences for taking advantage of the lockless update in dget_parent(), while this patch is intentionally a pure search-and-replace change with no semantic changes. - Linus ] Signed-off-by: Waiman Long Signed-off-by: Linus Torvalds --- fs/dcache.c | 57 ++++++++++++++++++++------------------------------ fs/namei.c | 6 +++--- include/linux/dcache.h | 19 ++++++++--------- 3 files changed, 35 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/fs/dcache.c b/fs/dcache.c index 83cfb834db03..b949af850cd6 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -229,7 +229,7 @@ static void __d_free(struct rcu_head *head) */ static void d_free(struct dentry *dentry) { - BUG_ON(dentry->d_count); + BUG_ON(dentry->d_lockref.count); this_cpu_dec(nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); @@ -467,7 +467,7 @@ relock: } if (ref) - dentry->d_count--; + dentry->d_lockref.count--; /* * inform the fs via d_prune that this dentry is about to be * unhashed and destroyed. @@ -513,15 +513,10 @@ void dput(struct dentry *dentry) return; repeat: - if (dentry->d_count == 1) + if (dentry->d_lockref.count == 1) might_sleep(); - spin_lock(&dentry->d_lock); - BUG_ON(!dentry->d_count); - if (dentry->d_count > 1) { - dentry->d_count--; - spin_unlock(&dentry->d_lock); + if (lockref_put_or_lock(&dentry->d_lockref)) return; - } if (dentry->d_flags & DCACHE_OP_DELETE) { if (dentry->d_op->d_delete(dentry)) @@ -535,7 +530,7 @@ repeat: dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); - dentry->d_count--; + dentry->d_lockref.count--; spin_unlock(&dentry->d_lock); return; @@ -590,7 +585,7 @@ int d_invalidate(struct dentry * dentry) * We also need to leave mountpoints alone, * directory or not. */ - if (dentry->d_count > 1 && dentry->d_inode) { + if (dentry->d_lockref.count > 1 && dentry->d_inode) { if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) { spin_unlock(&dentry->d_lock); return -EBUSY; @@ -606,14 +601,12 @@ EXPORT_SYMBOL(d_invalidate); /* This must be called with d_lock held */ static inline void __dget_dlock(struct dentry *dentry) { - dentry->d_count++; + dentry->d_lockref.count++; } static inline void __dget(struct dentry *dentry) { - spin_lock(&dentry->d_lock); - __dget_dlock(dentry); - spin_unlock(&dentry->d_lock); + lockref_get(&dentry->d_lockref); } struct dentry *dget_parent(struct dentry *dentry) @@ -634,8 +627,8 @@ repeat: goto repeat; } rcu_read_unlock(); - BUG_ON(!ret->d_count); - ret->d_count++; + BUG_ON(!ret->d_lockref.count); + ret->d_lockref.count++; spin_unlock(&ret->d_lock); return ret; } @@ -718,7 +711,7 @@ restart: spin_lock(&inode->i_lock); hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); - if (!dentry->d_count) { + if (!dentry->d_lockref.count) { __dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); @@ -763,12 +756,8 @@ static void try_prune_one_dentry(struct dentry *dentry) /* Prune ancestors. */ dentry = parent; while (dentry) { - spin_lock(&dentry->d_lock); - if (dentry->d_count > 1) { - dentry->d_count--; - spin_unlock(&dentry->d_lock); + if (lockref_put_or_lock(&dentry->d_lockref)) return; - } dentry = dentry_kill(dentry, 1); } } @@ -793,7 +782,7 @@ static void shrink_dentry_list(struct list_head *list) * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ - if (dentry->d_count) { + if (dentry->d_lockref.count) { dentry_lru_del(dentry); spin_unlock(&dentry->d_lock); continue; @@ -913,7 +902,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) dentry_lru_del(dentry); __d_shrink(dentry); - if (dentry->d_count != 0) { + if (dentry->d_lockref.count != 0) { printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%s}" " still in use (%d)" @@ -922,7 +911,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) dentry->d_inode ? dentry->d_inode->i_ino : 0UL, dentry->d_name.name, - dentry->d_count, + dentry->d_lockref.count, dentry->d_sb->s_type->name, dentry->d_sb->s_id); BUG(); @@ -933,7 +922,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) list_del(&dentry->d_u.d_child); } else { parent = dentry->d_parent; - parent->d_count--; + parent->d_lockref.count--; list_del(&dentry->d_u.d_child); } @@ -981,7 +970,7 @@ void shrink_dcache_for_umount(struct super_block *sb) dentry = sb->s_root; sb->s_root = NULL; - dentry->d_count--; + dentry->d_lockref.count--; shrink_dcache_for_umount_subtree(dentry); while (!hlist_bl_empty(&sb->s_anon)) { @@ -1147,7 +1136,7 @@ resume: * loop in shrink_dcache_parent() might not make any progress * and loop forever. */ - if (dentry->d_count) { + if (dentry->d_lockref.count) { dentry_lru_del(dentry); } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { dentry_lru_move_list(dentry, dispose); @@ -1269,7 +1258,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) smp_wmb(); dentry->d_name.name = dname; - dentry->d_count = 1; + dentry->d_lockref.count = 1; dentry->d_flags = 0; spin_lock_init(&dentry->d_lock); seqcount_init(&dentry->d_seq); @@ -1970,7 +1959,7 @@ struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) goto next; } - dentry->d_count++; + dentry->d_lockref.count++; found = dentry; spin_unlock(&dentry->d_lock); break; @@ -2069,7 +2058,7 @@ again: spin_lock(&dentry->d_lock); inode = dentry->d_inode; isdir = S_ISDIR(inode->i_mode); - if (dentry->d_count == 1) { + if (dentry->d_lockref.count == 1) { if (!spin_trylock(&inode->i_lock)) { spin_unlock(&dentry->d_lock); cpu_relax(); @@ -2948,7 +2937,7 @@ resume: } if (!(dentry->d_flags & DCACHE_GENOCIDE)) { dentry->d_flags |= DCACHE_GENOCIDE; - dentry->d_count--; + dentry->d_lockref.count--; } spin_unlock(&dentry->d_lock); } @@ -2956,7 +2945,7 @@ resume: struct dentry *child = this_parent; if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { this_parent->d_flags |= DCACHE_GENOCIDE; - this_parent->d_count--; + this_parent->d_lockref.count--; } this_parent = try_to_ascend(this_parent, locked, seq); if (!this_parent) diff --git a/fs/namei.c b/fs/namei.c index 8b61d103a8a7..7720fbd5277b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -536,8 +536,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) * a reference at this point. */ BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); - BUG_ON(!parent->d_count); - parent->d_count++; + BUG_ON(!parent->d_lockref.count); + parent->d_lockref.count++; spin_unlock(&dentry->d_lock); } spin_unlock(&parent->d_lock); @@ -3327,7 +3327,7 @@ void dentry_unhash(struct dentry *dentry) { shrink_dcache_parent(dentry); spin_lock(&dentry->d_lock); - if (dentry->d_count == 1) + if (dentry->d_lockref.count == 1) __d_drop(dentry); spin_unlock(&dentry->d_lock); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4a12532da8c4..efdc94434c30 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -9,6 +9,7 @@ #include #include #include +#include struct nameidata; struct path; @@ -100,6 +101,8 @@ extern unsigned int full_name_hash(const unsigned char *, unsigned int); # endif #endif +#define d_lock d_lockref.lock + struct dentry { /* RCU lookup touched fields */ unsigned int d_flags; /* protected by d_lock */ @@ -112,8 +115,7 @@ struct dentry { unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ /* Ref lookup also touches following */ - unsigned int d_count; /* protected by d_lock */ - spinlock_t d_lock; /* per dentry lock */ + struct lockref d_lockref; /* per-dentry lock and refcount */ const struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ unsigned long d_time; /* used by d_revalidate */ @@ -318,7 +320,7 @@ static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq) assert_spin_locked(&dentry->d_lock); if (!read_seqcount_retry(&dentry->d_seq, seq)) { ret = 1; - dentry->d_count++; + dentry->d_lockref.count++; } return ret; @@ -326,7 +328,7 @@ static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq) static inline unsigned d_count(const struct dentry *dentry) { - return dentry->d_count; + return dentry->d_lockref.count; } /* validate "insecure" dentry pointer */ @@ -357,17 +359,14 @@ extern char *dentry_path(struct dentry *, char *, int); static inline struct dentry *dget_dlock(struct dentry *dentry) { if (dentry) - dentry->d_count++; + dentry->d_lockref.count++; return dentry; } static inline struct dentry *dget(struct dentry *dentry) { - if (dentry) { - spin_lock(&dentry->d_lock); - dget_dlock(dentry); - spin_unlock(&dentry->d_lock); - } + if (dentry) + lockref_get(&dentry->d_lockref); return dentry; } -- cgit v1.2.3 From aaaafb7f953c60d9c9eeb1b10ecdbe6970421b24 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 28 Aug 2013 16:35:16 -0700 Subject: Omnikey Cardman 4000: pull in ioctl.h in user header This file uses the ioctl helpers (_IOR/_IOW/etc...), so include ioctl.h for the definitions. Signed-off-by: Mike Frysinger Cc: Harald Welte Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/cm4000_cs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/cm4000_cs.h b/include/uapi/linux/cm4000_cs.h index bc51f77db918..1217f751a1bc 100644 --- a/include/uapi/linux/cm4000_cs.h +++ b/include/uapi/linux/cm4000_cs.h @@ -2,6 +2,7 @@ #define _UAPI_CM4000_H_ #include +#include #define MAX_ATR 33 -- cgit v1.2.3