From 9b395bc3be1cebf0144a127c7e67d56dbdac0930 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 26 Oct 2012 00:36:40 +0200
Subject: mac80211: verify that skb data is present

A number of places in the mesh code don't check that
the frame data is present and in the skb header when
trying to access. Add those checks and the necessary
pskb_may_pull() calls. This prevents accessing data
that doesn't actually exist.

To do this, export ieee80211_get_mesh_hdrlen() to be
able to use it in mac80211.

Cc: stable@vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f8cd4cf3fad8..7d5b6000378b 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2651,6 +2651,15 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb);
  */
 unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc);
 
+/**
+ * ieee80211_get_mesh_hdrlen - get mesh extension header length
+ * @meshhdr: the mesh extension header, only the flags field
+ *	(first byte) will be accessed
+ * Returns the length of the extension header, which is always at
+ * least 6 bytes and at most 18 if address 5 and 6 are present.
+ */
+unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
+
 /**
  * DOC: Data path helpers
  *
-- 
cgit v1.2.3


From 95a7d76897c1e7243d4137037c66d15cbf2cce76 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 31 Oct 2012 12:38:31 -0400
Subject: xen/mmu: Use Xen specific TLB flush instead of the generic one.

As Mukesh explained it, the MMUEXT_TLB_FLUSH_ALL allows the
hypervisor to do a TLB flush on all active vCPUs. If instead
we were using the generic one (which ends up being xen_flush_tlb)
we end up making the MMUEXT_TLB_FLUSH_LOCAL hypercall. But
before we make that hypercall the kernel will IPI all of the
vCPUs (even those that were asleep from the hypervisor
perspective). The end result is that we needlessly wake them
up and do a TLB flush when we can just let the hypervisor
do it correctly.

This patch gives around 50% speed improvement when migrating
idle guest's from one host to another.

Oracle-bug: 14630170

CC: stable@vger.kernel.org
Tested-by:  Jingjie Jiang <jingjie.jiang@oracle.com>
Suggested-by:  Mukesh Rathor <mukesh.rathor@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c         | 21 ++++++++++++++++++++-
 include/trace/events/xen.h |  8 ++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 6226c99729b9..dcf5f2dd91ec 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1288,6 +1288,25 @@ unsigned long xen_read_cr2_direct(void)
 	return this_cpu_read(xen_vcpu_info.arch.cr2);
 }
 
+void xen_flush_tlb_all(void)
+{
+	struct mmuext_op *op;
+	struct multicall_space mcs;
+
+	trace_xen_mmu_flush_tlb_all(0);
+
+	preempt_disable();
+
+	mcs = xen_mc_entry(sizeof(*op));
+
+	op = mcs.args;
+	op->cmd = MMUEXT_TLB_FLUSH_ALL;
+	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+
+	xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+	preempt_enable();
+}
 static void xen_flush_tlb(void)
 {
 	struct mmuext_op *op;
@@ -2518,7 +2537,7 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 	err = 0;
 out:
 
-	flush_tlb_all();
+	xen_flush_tlb_all();
 
 	return err;
 }
diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h
index 15ba03bdd7c6..d06b6da5c1e3 100644
--- a/include/trace/events/xen.h
+++ b/include/trace/events/xen.h
@@ -377,6 +377,14 @@ DECLARE_EVENT_CLASS(xen_mmu_pgd,
 DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_pin);
 DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_unpin);
 
+TRACE_EVENT(xen_mmu_flush_tlb_all,
+	    TP_PROTO(int x),
+	    TP_ARGS(x),
+	    TP_STRUCT__entry(__array(char, x, 0)),
+	    TP_fast_assign((void)x),
+	    TP_printk("%s", "")
+	);
+
 TRACE_EVENT(xen_mmu_flush_tlb,
 	    TP_PROTO(int x),
 	    TP_ARGS(x),
-- 
cgit v1.2.3


From d9b482c8ba1973a189f2d4c8175d405b87fbf2d7 Mon Sep 17 00:00:00 2001
From: Sasha Levin <levinsasha928@gmail.com>
Date: Tue, 30 Oct 2012 14:45:57 -0400
Subject: hashtable: introduce a small and naive hashtable

This hashtable implementation is using hlist buckets to provide a simple
hashtable to prevent it from getting reimplemented all over the kernel.

Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
[ Merging this now, so that subsystems can start applying Sasha's
  patches that use this   - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hashtable.h | 192 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 192 insertions(+)
 create mode 100644 include/linux/hashtable.h

(limited to 'include')

diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h
new file mode 100644
index 000000000000..227c62424f3c
--- /dev/null
+++ b/include/linux/hashtable.h
@@ -0,0 +1,192 @@
+/*
+ * Statically sized hash table implementation
+ * (C) 2012  Sasha Levin <levinsasha928@gmail.com>
+ */
+
+#ifndef _LINUX_HASHTABLE_H
+#define _LINUX_HASHTABLE_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/hash.h>
+#include <linux/rculist.h>
+
+#define DEFINE_HASHTABLE(name, bits)						\
+	struct hlist_head name[1 << (bits)] =					\
+			{ [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }
+
+#define DECLARE_HASHTABLE(name, bits)                                   	\
+	struct hlist_head name[1 << (bits)]
+
+#define HASH_SIZE(name) (ARRAY_SIZE(name))
+#define HASH_BITS(name) ilog2(HASH_SIZE(name))
+
+/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */
+#define hash_min(val, bits)							\
+	(sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits))
+
+static inline void __hash_init(struct hlist_head *ht, unsigned int sz)
+{
+	unsigned int i;
+
+	for (i = 0; i < sz; i++)
+		INIT_HLIST_HEAD(&ht[i]);
+}
+
+/**
+ * hash_init - initialize a hash table
+ * @hashtable: hashtable to be initialized
+ *
+ * Calculates the size of the hashtable from the given parameter, otherwise
+ * same as hash_init_size.
+ *
+ * This has to be a macro since HASH_BITS() will not work on pointers since
+ * it calculates the size during preprocessing.
+ */
+#define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable))
+
+/**
+ * hash_add - add an object to a hashtable
+ * @hashtable: hashtable to add to
+ * @node: the &struct hlist_node of the object to be added
+ * @key: the key of the object to be added
+ */
+#define hash_add(hashtable, node, key)						\
+	hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))])
+
+/**
+ * hash_add_rcu - add an object to a rcu enabled hashtable
+ * @hashtable: hashtable to add to
+ * @node: the &struct hlist_node of the object to be added
+ * @key: the key of the object to be added
+ */
+#define hash_add_rcu(hashtable, node, key)					\
+	hlist_add_head_rcu(node, &hashtable[hash_min(key, HASH_BITS(hashtable))])
+
+/**
+ * hash_hashed - check whether an object is in any hashtable
+ * @node: the &struct hlist_node of the object to be checked
+ */
+static inline bool hash_hashed(struct hlist_node *node)
+{
+	return !hlist_unhashed(node);
+}
+
+static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz)
+{
+	unsigned int i;
+
+	for (i = 0; i < sz; i++)
+		if (!hlist_empty(&ht[i]))
+			return false;
+
+	return true;
+}
+
+/**
+ * hash_empty - check whether a hashtable is empty
+ * @hashtable: hashtable to check
+ *
+ * This has to be a macro since HASH_BITS() will not work on pointers since
+ * it calculates the size during preprocessing.
+ */
+#define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable))
+
+/**
+ * hash_del - remove an object from a hashtable
+ * @node: &struct hlist_node of the object to remove
+ */
+static inline void hash_del(struct hlist_node *node)
+{
+	hlist_del_init(node);
+}
+
+/**
+ * hash_del_rcu - remove an object from a rcu enabled hashtable
+ * @node: &struct hlist_node of the object to remove
+ */
+static inline void hash_del_rcu(struct hlist_node *node)
+{
+	hlist_del_init_rcu(node);
+}
+
+/**
+ * hash_for_each - iterate over a hashtable
+ * @name: hashtable to iterate
+ * @bkt: integer to use as bucket loop cursor
+ * @node: the &struct list_head to use as a loop cursor for each entry
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ */
+#define hash_for_each(name, bkt, node, obj, member)				\
+	for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\
+		hlist_for_each_entry(obj, node, &name[bkt], member)
+
+/**
+ * hash_for_each_rcu - iterate over a rcu enabled hashtable
+ * @name: hashtable to iterate
+ * @bkt: integer to use as bucket loop cursor
+ * @node: the &struct list_head to use as a loop cursor for each entry
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ */
+#define hash_for_each_rcu(name, bkt, node, obj, member)				\
+	for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\
+		hlist_for_each_entry_rcu(obj, node, &name[bkt], member)
+
+/**
+ * hash_for_each_safe - iterate over a hashtable safe against removal of
+ * hash entry
+ * @name: hashtable to iterate
+ * @bkt: integer to use as bucket loop cursor
+ * @node: the &struct list_head to use as a loop cursor for each entry
+ * @tmp: a &struct used for temporary storage
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ */
+#define hash_for_each_safe(name, bkt, node, tmp, obj, member)			\
+	for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\
+		hlist_for_each_entry_safe(obj, node, tmp, &name[bkt], member)
+
+/**
+ * hash_for_each_possible - iterate over all possible objects hashing to the
+ * same bucket
+ * @name: hashtable to iterate
+ * @obj: the type * to use as a loop cursor for each entry
+ * @node: the &struct list_head to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ * @key: the key of the objects to iterate over
+ */
+#define hash_for_each_possible(name, obj, node, member, key)			\
+	hlist_for_each_entry(obj, node,	&name[hash_min(key, HASH_BITS(name))], member)
+
+/**
+ * hash_for_each_possible_rcu - iterate over all possible objects hashing to the
+ * same bucket in an rcu enabled hashtable
+ * in a rcu enabled hashtable
+ * @name: hashtable to iterate
+ * @obj: the type * to use as a loop cursor for each entry
+ * @node: the &struct list_head to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ * @key: the key of the objects to iterate over
+ */
+#define hash_for_each_possible_rcu(name, obj, node, member, key)		\
+	hlist_for_each_entry_rcu(obj, node, &name[hash_min(key, HASH_BITS(name))], member)
+
+/**
+ * hash_for_each_possible_safe - iterate over all possible objects hashing to the
+ * same bucket safe against removals
+ * @name: hashtable to iterate
+ * @obj: the type * to use as a loop cursor for each entry
+ * @node: the &struct list_head to use as a loop cursor for each entry
+ * @tmp: a &struct used for temporary storage
+ * @member: the name of the hlist_node within the struct
+ * @key: the key of the objects to iterate over
+ */
+#define hash_for_each_possible_safe(name, obj, node, tmp, member, key)		\
+	hlist_for_each_entry_safe(obj, node, tmp,				\
+		&name[hash_min(key, HASH_BITS(name))], member)
+
+
+#endif
-- 
cgit v1.2.3


From 87f4d7c1d36f44b0822053b7e5dedc31fdd0ab99 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Thu, 1 Nov 2012 12:30:16 +0000
Subject: ptp: update adjfreq callback description

This patch updates the adjfreq callback description to include a note that the
delta in ppb is always relative to the base frequency, and not to the current
frequency of the hardware clock.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
CC: stable@vger.kernel.org [v3.5+]
CC: Richard Cochran <richard.cochran@gmail.com>
CC: John Stultz <john.stultz@linaro.org>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptp_clock_kernel.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index f2dc6d8fc680..38a993508327 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -54,7 +54,8 @@ struct ptp_clock_request {
  * clock operations
  *
  * @adjfreq:  Adjusts the frequency of the hardware clock.
- *            parameter delta: Desired period change in parts per billion.
+ *            parameter delta: Desired frequency offset from nominal frequency
+ *            in parts per billion
  *
  * @adjtime:  Shifts the time of the hardware clock.
  *            parameter delta: Desired change in nanoseconds.
-- 
cgit v1.2.3


From 6d877e6b85691e0b2b22e90aeb9b86c3dafcfc6b Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 19 Oct 2012 15:01:46 -0400
Subject: xen/hvm: If we fail to fetch an HVM parameter print out which flag it
 is.

Makes it easier to troubleshoot in the field.

Acked-by: Ian Campbell <ian.campbell@citrix.com>
[v1: Use macro per Ian's suggestion]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/xen/hvm.h | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/xen/hvm.h b/include/xen/hvm.h
index b193fa2f9fdd..13e43e41637d 100644
--- a/include/xen/hvm.h
+++ b/include/xen/hvm.h
@@ -5,6 +5,36 @@
 #include <xen/interface/hvm/params.h>
 #include <asm/xen/hypercall.h>
 
+static const char *param_name(int op)
+{
+#define PARAM(x) [HVM_PARAM_##x] = #x
+	static const char *const names[] = {
+		PARAM(CALLBACK_IRQ),
+		PARAM(STORE_PFN),
+		PARAM(STORE_EVTCHN),
+		PARAM(PAE_ENABLED),
+		PARAM(IOREQ_PFN),
+		PARAM(BUFIOREQ_PFN),
+		PARAM(TIMER_MODE),
+		PARAM(HPET_ENABLED),
+		PARAM(IDENT_PT),
+		PARAM(DM_DOMAIN),
+		PARAM(ACPI_S_STATE),
+		PARAM(VM86_TSS),
+		PARAM(VPT_ALIGN),
+		PARAM(CONSOLE_PFN),
+		PARAM(CONSOLE_EVTCHN),
+	};
+#undef PARAM
+
+	if (op >= ARRAY_SIZE(names))
+		return "unknown";
+
+	if (!names[op])
+		return "reserved";
+
+	return names[op];
+}
 static inline int hvm_get_parameter(int idx, uint64_t *value)
 {
 	struct xen_hvm_param xhv;
@@ -14,8 +44,8 @@ static inline int hvm_get_parameter(int idx, uint64_t *value)
 	xhv.index = idx;
 	r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
 	if (r < 0) {
-		printk(KERN_ERR "Cannot get hvm parameter %d: %d!\n",
-			idx, r);
+		printk(KERN_ERR "Cannot get hvm parameter %s (%d): %d!\n",
+			param_name(idx), idx, r);
 		return r;
 	}
 	*value = xhv.value;
-- 
cgit v1.2.3


From 0133370f93eae5ed3c0f16d9da2b7add7dda6076 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Fri, 2 Nov 2012 13:44:46 +0530
Subject: drivers: bus: ocp2scp: add pdata support

ocp2scp was not having pdata support which makes *musb* fail for non-dt
boot in OMAP platform. The pdata will have information about the devices
that is connected to ocp2scp. ocp2scp driver will now make use of this
information to create the devices that is attached to ocp2scp.

This is needed to fix MUSB regression caused by commit c9e4412a
(arm: omap: phy: remove unused functions from omap-phy-internal.c)

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Acked-by: Felipe Balbi <balbi@ti.com>
[tony@atomide.com: updated comments for regression info]
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/bus/omap-ocp2scp.c                 | 68 ++++++++++++++++++++++++++++--
 include/linux/platform_data/omap_ocp2scp.h | 31 ++++++++++++++
 2 files changed, 96 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/platform_data/omap_ocp2scp.h

(limited to 'include')

diff --git a/drivers/bus/omap-ocp2scp.c b/drivers/bus/omap-ocp2scp.c
index ff63560b8467..0c48b0e05ed6 100644
--- a/drivers/bus/omap-ocp2scp.c
+++ b/drivers/bus/omap-ocp2scp.c
@@ -22,6 +22,26 @@
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/platform_data/omap_ocp2scp.h>
+
+/**
+ * _count_resources - count for the number of resources
+ * @res: struct resource *
+ *
+ * Count and return the number of resources populated for the device that is
+ * connected to ocp2scp.
+ */
+static unsigned _count_resources(struct resource *res)
+{
+	int cnt	= 0;
+
+	while (res->start != res->end) {
+		cnt++;
+		res++;
+	}
+
+	return cnt;
+}
 
 static int ocp2scp_remove_devices(struct device *dev, void *c)
 {
@@ -34,20 +54,62 @@ static int ocp2scp_remove_devices(struct device *dev, void *c)
 
 static int __devinit omap_ocp2scp_probe(struct platform_device *pdev)
 {
-	int			ret;
-	struct device_node	*np = pdev->dev.of_node;
+	int ret;
+	unsigned res_cnt, i;
+	struct device_node *np = pdev->dev.of_node;
+	struct platform_device *pdev_child;
+	struct omap_ocp2scp_platform_data *pdata = pdev->dev.platform_data;
+	struct omap_ocp2scp_dev *dev;
 
 	if (np) {
 		ret = of_platform_populate(np, NULL, NULL, &pdev->dev);
 		if (ret) {
-			dev_err(&pdev->dev, "failed to add resources for ocp2scp child\n");
+			dev_err(&pdev->dev,
+			    "failed to add resources for ocp2scp child\n");
 			goto err0;
 		}
+	} else if (pdata) {
+		for (i = 0, dev = *pdata->devices; i < pdata->dev_cnt; i++,
+		    dev++) {
+			res_cnt = _count_resources(dev->res);
+
+			pdev_child = platform_device_alloc(dev->drv_name,
+			    PLATFORM_DEVID_AUTO);
+			if (!pdev_child) {
+				dev_err(&pdev->dev,
+				  "failed to allocate mem for ocp2scp child\n");
+				goto err0;
+			}
+
+			ret = platform_device_add_resources(pdev_child,
+			    dev->res, res_cnt);
+			if (ret) {
+				dev_err(&pdev->dev,
+				 "failed to add resources for ocp2scp child\n");
+				goto err1;
+			}
+
+			pdev_child->dev.parent	= &pdev->dev;
+
+			ret = platform_device_add(pdev_child);
+			if (ret) {
+				dev_err(&pdev->dev,
+				   "failed to register ocp2scp child device\n");
+				goto err1;
+			}
+		}
+	} else {
+		dev_err(&pdev->dev, "OCP2SCP initialized without plat data\n");
+		return -EINVAL;
 	}
+
 	pm_runtime_enable(&pdev->dev);
 
 	return 0;
 
+err1:
+	platform_device_put(pdev_child);
+
 err0:
 	device_for_each_child(&pdev->dev, NULL, ocp2scp_remove_devices);
 
diff --git a/include/linux/platform_data/omap_ocp2scp.h b/include/linux/platform_data/omap_ocp2scp.h
new file mode 100644
index 000000000000..5c6c3939355f
--- /dev/null
+++ b/include/linux/platform_data/omap_ocp2scp.h
@@ -0,0 +1,31 @@
+/*
+ * omap_ocp2scp.h -- ocp2scp header file
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __DRIVERS_OMAP_OCP2SCP_H
+#define __DRIVERS_OMAP_OCP2SCP_H
+
+struct omap_ocp2scp_dev {
+	const char			*drv_name;
+	struct resource			*res;
+};
+
+struct omap_ocp2scp_platform_data {
+	int				dev_cnt;
+	struct omap_ocp2scp_dev		**devices;
+};
+#endif /* __DRIVERS_OMAP_OCP2SCP_H */
-- 
cgit v1.2.3


From d676188e44680c2f2eb114a24b3b32e56165f079 Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Fri, 28 Sep 2012 14:21:59 +0900
Subject: mmc: dw_mmc: convert the variable type of irq

Even though platform_get_irq returns error, 'host->irq'
always has an unsigned value. Less-than-zero comparison
of an unsigned value is never true. Type of 'unsigned int'
will be changed for 'int'.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Acked-by: Will Newton <will.newton@imgtec.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 include/linux/mmc/dw_mmc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 7c6a1139d8fa..31416760723c 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -186,7 +186,7 @@ struct dw_mci {
 
 	struct regulator	*vmmc;	/* Power regulator */
 	unsigned long		irq_flags; /* IRQ flags */
-	unsigned int		irq;
+	int			irq;
 };
 
 /* DMA ops for Internal/External DMAC interface */
-- 
cgit v1.2.3


From 63ef5d8c28b2a944f104d854254941e7375c85a3 Mon Sep 17 00:00:00 2001
From: Jerry Huang <Chang-Ming.Huang@freescale.com>
Date: Thu, 25 Oct 2012 13:47:19 +0800
Subject: mmc: sdhci-of-esdhc: disable CMD23 for some Freescale SoCs

CMD23 causes lots of errors in kernel on some freescale SoCs
(P1020, P1021, P1022, P1024, P1025 and P4080) when MMC card used,
which is because these controllers does not support CMD23,
even on the SoCs which declares CMD23 is supported.
Therefore, we'll not use CMD23.

Signed-off-by: Jerry Huang <Chang-Ming.Huang@freescale.com>
Signed-off-by: Shaohui Xie <Shaohui.Xie@freescale.com>
Acked-by: Anton Vorontsov <cbouatmailru@gmail.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-of-esdhc.c | 11 +++++++++++
 drivers/mmc/host/sdhci-pltfm.c    |  7 +++++++
 drivers/mmc/host/sdhci.c          |  3 +++
 drivers/mmc/host/sdhci.h          |  1 +
 include/linux/mmc/sdhci.h         |  1 +
 5 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index ae5fcbfa1eef..63d219f57cae 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -169,6 +169,16 @@ static void esdhc_of_resume(struct sdhci_host *host)
 }
 #endif
 
+static void esdhc_of_platform_init(struct sdhci_host *host)
+{
+	u32 vvn;
+
+	vvn = in_be32(host->ioaddr + SDHCI_SLOT_INT_STATUS);
+	vvn = (vvn & SDHCI_VENDOR_VER_MASK) >> SDHCI_VENDOR_VER_SHIFT;
+	if (vvn == VENDOR_V_22)
+		host->quirks2 |= SDHCI_QUIRK2_HOST_NO_CMD23;
+}
+
 static struct sdhci_ops sdhci_esdhc_ops = {
 	.read_l = esdhc_readl,
 	.read_w = esdhc_readw,
@@ -180,6 +190,7 @@ static struct sdhci_ops sdhci_esdhc_ops = {
 	.enable_dma = esdhc_of_enable_dma,
 	.get_max_clock = esdhc_of_get_max_clock,
 	.get_min_clock = esdhc_of_get_min_clock,
+	.platform_init = esdhc_of_platform_init,
 #ifdef CONFIG_PM
 	.platform_suspend = esdhc_of_suspend,
 	.platform_resume = esdhc_of_resume,
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index 65551a9709cc..27164457f861 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -150,6 +150,13 @@ struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev,
 		goto err_remap;
 	}
 
+	/*
+	 * Some platforms need to probe the controller to be able to
+	 * determine which caps should be used.
+	 */
+	if (host->ops && host->ops->platform_init)
+		host->ops->platform_init(host);
+
 	platform_set_drvdata(pdev, host);
 
 	return host;
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 7922adb42386..f05a37747b3d 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2837,6 +2837,9 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (!(host->quirks & SDHCI_QUIRK_FORCE_1_BIT_DATA))
 		mmc->caps |= MMC_CAP_4_BIT_DATA;
 
+	if (host->quirks2 & SDHCI_QUIRK2_HOST_NO_CMD23)
+		mmc->caps &= ~MMC_CAP_CMD23;
+
 	if (caps[0] & SDHCI_CAN_DO_HISPD)
 		mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;
 
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 97653ea8942b..71a4a7ed46c5 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -278,6 +278,7 @@ struct sdhci_ops {
 	void	(*hw_reset)(struct sdhci_host *host);
 	void	(*platform_suspend)(struct sdhci_host *host);
 	void	(*platform_resume)(struct sdhci_host *host);
+	void	(*platform_init)(struct sdhci_host *host);
 };
 
 #ifdef CONFIG_MMC_SDHCI_IO_ACCESSORS
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index fa8529a859b8..1edcb4dad8c4 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -91,6 +91,7 @@ struct sdhci_host {
 	unsigned int quirks2;	/* More deviations from spec. */
 
 #define SDHCI_QUIRK2_HOST_OFF_CARD_ON			(1<<0)
+#define SDHCI_QUIRK2_HOST_NO_CMD23			(1<<1)
 
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
-- 
cgit v1.2.3


From 8e2b36ea6e3abc613cbbdb41692fbd2f9ee18996 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 6 Nov 2012 22:55:31 +0100
Subject: mmc: dw_mmc: constify dw_mci_idmac_ops in exynos back-end

The of_device_id match data is now marked as const and
must not be modified. This changes the dw_mmc to mark
all pointers passing the dw_mci_drv_data or dw_mci_dma_ops
structures as const, and also marks the static definitions
as const.

drivers/mmc/host/dw_mmc-exynos.c: In function 'dw_mci_exynos_probe':
drivers/mmc/host/dw_mmc-exynos.c:234:11: warning: assignment discards 'const' qualifier from pointer target type [enabled by default]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Thomas Abraham <thomas.abraham@linaro.org>
Cc: Will Newton <will.newton@imgtec.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/dw_mmc-exynos.c | 6 +++---
 drivers/mmc/host/dw_mmc-pltfm.c  | 2 +-
 drivers/mmc/host/dw_mmc-pltfm.h  | 2 +-
 drivers/mmc/host/dw_mmc.c        | 2 +-
 include/linux/mmc/dw_mmc.h       | 4 ++--
 5 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c
index 0147ac3aad59..4d50da618166 100644
--- a/drivers/mmc/host/dw_mmc-exynos.c
+++ b/drivers/mmc/host/dw_mmc-exynos.c
@@ -208,7 +208,7 @@ static unsigned long exynos5250_dwmmc_caps[4] = {
 	MMC_CAP_CMD23,
 };
 
-static struct dw_mci_drv_data exynos5250_drv_data = {
+static const struct dw_mci_drv_data exynos5250_drv_data = {
 	.caps			= exynos5250_dwmmc_caps,
 	.init			= dw_mci_exynos_priv_init,
 	.setup_clock		= dw_mci_exynos_setup_clock,
@@ -220,14 +220,14 @@ static struct dw_mci_drv_data exynos5250_drv_data = {
 
 static const struct of_device_id dw_mci_exynos_match[] = {
 	{ .compatible = "samsung,exynos5250-dw-mshc",
-			.data = (void *)&exynos5250_drv_data, },
+			.data = &exynos5250_drv_data, },
 	{},
 };
 MODULE_DEVICE_TABLE(of, dw_mci_exynos_match);
 
 int dw_mci_exynos_probe(struct platform_device *pdev)
 {
-	struct dw_mci_drv_data *drv_data;
+	const struct dw_mci_drv_data *drv_data;
 	const struct of_device_id *match;
 
 	match = of_match_node(dw_mci_exynos_match, pdev->dev.of_node);
diff --git a/drivers/mmc/host/dw_mmc-pltfm.c b/drivers/mmc/host/dw_mmc-pltfm.c
index e5957211b171..917936bee5d5 100644
--- a/drivers/mmc/host/dw_mmc-pltfm.c
+++ b/drivers/mmc/host/dw_mmc-pltfm.c
@@ -24,7 +24,7 @@
 #include "dw_mmc.h"
 
 int dw_mci_pltfm_register(struct platform_device *pdev,
-				struct dw_mci_drv_data *drv_data)
+				const struct dw_mci_drv_data *drv_data)
 {
 	struct dw_mci *host;
 	struct resource	*regs;
diff --git a/drivers/mmc/host/dw_mmc-pltfm.h b/drivers/mmc/host/dw_mmc-pltfm.h
index 301f24541fc2..2ac37b81de4d 100644
--- a/drivers/mmc/host/dw_mmc-pltfm.h
+++ b/drivers/mmc/host/dw_mmc-pltfm.h
@@ -13,7 +13,7 @@
 #define _DW_MMC_PLTFM_H_
 
 extern int dw_mci_pltfm_register(struct platform_device *pdev,
-				struct dw_mci_drv_data *drv_data);
+				const struct dw_mci_drv_data *drv_data);
 extern int __devexit dw_mci_pltfm_remove(struct platform_device *pdev);
 extern const struct dev_pm_ops dw_mci_pltfm_pmops;
 
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index b087f66e30c4..c0667c8af2bd 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -435,7 +435,7 @@ static int dw_mci_idmac_init(struct dw_mci *host)
 	return 0;
 }
 
-static struct dw_mci_dma_ops dw_mci_idmac_ops = {
+static const struct dw_mci_dma_ops dw_mci_idmac_ops = {
 	.init = dw_mci_idmac_init,
 	.start = dw_mci_idmac_start_dma,
 	.stop = dw_mci_idmac_stop_dma,
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 31416760723c..96531664a061 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -137,7 +137,7 @@ struct dw_mci {
 
 	dma_addr_t		sg_dma;
 	void			*sg_cpu;
-	struct dw_mci_dma_ops	*dma_ops;
+	const struct dw_mci_dma_ops	*dma_ops;
 #ifdef CONFIG_MMC_DW_IDMAC
 	unsigned int		ring_size;
 #else
@@ -162,7 +162,7 @@ struct dw_mci {
 	u16			data_offset;
 	struct device		*dev;
 	struct dw_mci_board	*pdata;
-	struct dw_mci_drv_data	*drv_data;
+	const struct dw_mci_drv_data	*drv_data;
 	void			*priv;
 	struct clk		*biu_clk;
 	struct clk		*ciu_clk;
-- 
cgit v1.2.3


From a80a6b85b428e6ce12a8363bb1f08d44c50f3252 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 8 Nov 2012 15:53:35 -0800
Subject: revert "epoll: support for disabling items, and a self-test app"

Revert commit 03a7beb55b9f ("epoll: support for disabling items, and a
self-test app") pending resolution of the issues identified by Michael
Kerrisk, copied below.

We'll revisit this for 3.8.

: I've taken a look at this patch as it currently stands in 3.7-rc1, and
: done a bit of testing. (By the way, the test program
: tools/testing/selftests/epoll/test_epoll.c does not compile...)
:
: There are one or two places where the behavior seems a little strange,
: so I have a question or two at the end of this mail. But other than
: that, I want to check my understanding so that the interface can be
: correctly documented.
:
: Just to go though my understanding, the problem is the following
: scenario in a multithreaded application:
:
: 1. Multiple threads are performing epoll_wait() operations,
:    and maintaining a user-space cache that contains information
:    corresponding to each file descriptor being monitored by
:    epoll_wait().
:
: 2. At some point, a thread wants to delete (EPOLL_CTL_DEL)
:    a file descriptor from the epoll interest list, and
:    delete the corresponding record from the user-space cache.
:
: 3. The problem with (2) is that some other thread may have
:    previously done an epoll_wait() that retrieved information
:    about the fd in question, and may be in the middle of using
:    information in the cache that relates to that fd. Thus,
:    there is a potential race.
:
: 4. The race can't solved purely in user space, because doing
:    so would require applying a mutex across the epoll_wait()
:    call, which would of course blow thread concurrency.
:
: Right?
:
: Your solution is the EPOLL_CTL_DISABLE operation. I want to
: confirm my understanding about how to use this flag, since
: the description that has accompanied the patches so far
: has been a bit sparse
:
: 0. In the scenario you're concerned about, deleting a file
:    descriptor means (safely) doing the following:
:    (a) Deleting the file descriptor from the epoll interest list
:        using EPOLL_CTL_DEL
:    (b) Deleting the corresponding record in the user-space cache
:
: 1. It's only meaningful to use this EPOLL_CTL_DISABLE in
:    conjunction with EPOLLONESHOT.
:
: 2. Using EPOLL_CTL_DISABLE without using EPOLLONESHOT in
:    conjunction is a logical error.
:
: 3. The correct way to code multithreaded applications using
:    EPOLL_CTL_DISABLE and EPOLLONESHOT is as follows:
:
:    a. All EPOLL_CTL_ADD and EPOLL_CTL_MOD operations should
:       should EPOLLONESHOT.
:
:    b. When a thread wants to delete a file descriptor, it
:       should do the following:
:
:       [1] Call epoll_ctl(EPOLL_CTL_DISABLE)
:       [2] If the return status from epoll_ctl(EPOLL_CTL_DISABLE)
:           was zero, then the file descriptor can be safely
:           deleted by the thread that made this call.
:       [3] If the epoll_ctl(EPOLL_CTL_DISABLE) fails with EBUSY,
:           then the descriptor is in use. In this case, the calling
:           thread should set a flag in the user-space cache to
:           indicate that the thread that is using the descriptor
:           should perform the deletion operation.
:
: Is all of the above correct?
:
: The implementation depends on checking on whether
: (events & ~EP_PRIVATE_BITS) == 0
: This replies on the fact that EPOLL_CTL_AD and EPOLL_CTL_MOD always
: set EPOLLHUP and EPOLLERR in the 'events' mask, and EPOLLONESHOT
: causes those flags (as well as all others in ~EP_PRIVATE_BITS) to be
: cleared.
:
: A corollary to the previous paragraph is that using EPOLL_CTL_DISABLE
: is only useful in conjunction with EPOLLONESHOT. However, as things
: stand, one can use EPOLL_CTL_DISABLE on a file descriptor that does
: not have EPOLLONESHOT set in 'events' This results in the following
: (slightly surprising) behavior:
:
: (a) The first call to epoll_ctl(EPOLL_CTL_DISABLE) returns 0
:     (the indicator that the file descriptor can be safely deleted).
: (b) The next call to epoll_ctl(EPOLL_CTL_DISABLE) fails with EBUSY.
:
: This doesn't seem particularly useful, and in fact is probably an
: indication that the user made a logic error: they should only be using
: epoll_ctl(EPOLL_CTL_DISABLE) on a file descriptor for which
: EPOLLONESHOT was set in 'events'. If that is correct, then would it
: not make sense to return an error to user space for this case?

Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: "Paton J. Lewis" <palewis@adobe.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c                             |  38 +---
 include/uapi/linux/eventpoll.h             |   1 -
 tools/testing/selftests/Makefile           |   2 +-
 tools/testing/selftests/epoll/Makefile     |  11 -
 tools/testing/selftests/epoll/test_epoll.c | 344 -----------------------------
 5 files changed, 4 insertions(+), 392 deletions(-)
 delete mode 100644 tools/testing/selftests/epoll/Makefile
 delete mode 100644 tools/testing/selftests/epoll/test_epoll.c

(limited to 'include')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index da72250ddc1c..cd96649bfe62 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -346,7 +346,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p)
 /* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
 static inline int ep_op_has_event(int op)
 {
-	return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD;
+	return op != EPOLL_CTL_DEL;
 }
 
 /* Initialize the poll safe wake up structure */
@@ -676,34 +676,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
 	return 0;
 }
 
-/*
- * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item
- * had no event flags set, indicating that another thread may be currently
- * handling that item's events (in the case that EPOLLONESHOT was being
- * used). Otherwise a zero result indicates that the item has been disabled
- * from receiving events. A disabled item may be re-enabled via
- * EPOLL_CTL_MOD. Must be called with "mtx" held.
- */
-static int ep_disable(struct eventpoll *ep, struct epitem *epi)
-{
-	int result = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ep->lock, flags);
-	if (epi->event.events & ~EP_PRIVATE_BITS) {
-		if (ep_is_linked(&epi->rdllink))
-			list_del_init(&epi->rdllink);
-		/* Ensure ep_poll_callback will not add epi back onto ready
-		   list: */
-		epi->event.events &= EP_PRIVATE_BITS;
-		}
-	else
-		result = -EBUSY;
-	spin_unlock_irqrestore(&ep->lock, flags);
-
-	return result;
-}
-
 static void ep_free(struct eventpoll *ep)
 {
 	struct rb_node *rbp;
@@ -1048,6 +1020,8 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
 	rb_insert_color(&epi->rbn, &ep->rbr);
 }
 
+
+
 #define PATH_ARR_SIZE 5
 /*
  * These are the number paths of length 1 to 5, that we are allowing to emanate
@@ -1813,12 +1787,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 		} else
 			error = -ENOENT;
 		break;
-	case EPOLL_CTL_DISABLE:
-		if (epi)
-			error = ep_disable(ep, epi);
-		else
-			error = -ENOENT;
-		break;
 	}
 	mutex_unlock(&ep->mtx);
 
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index 8c99ce7202c5..2c267bcbb85c 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -25,7 +25,6 @@
 #define EPOLL_CTL_ADD 1
 #define EPOLL_CTL_DEL 2
 #define EPOLL_CTL_MOD 3
-#define EPOLL_CTL_DISABLE 4
 
 /*
  * Request the handling of system wakeup events so as to prevent system suspends
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 43480149119e..85baf11e2acd 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,4 +1,4 @@
-TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug epoll
+TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug
 
 all:
 	for TARGET in $(TARGETS); do \
diff --git a/tools/testing/selftests/epoll/Makefile b/tools/testing/selftests/epoll/Makefile
deleted file mode 100644
index 19806ed62f50..000000000000
--- a/tools/testing/selftests/epoll/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# Makefile for epoll selftests
-
-all: test_epoll
-%: %.c
-	gcc -pthread -g -o $@ $^
-
-run_tests: all
-	./test_epoll
-
-clean:
-	$(RM) test_epoll
diff --git a/tools/testing/selftests/epoll/test_epoll.c b/tools/testing/selftests/epoll/test_epoll.c
deleted file mode 100644
index f7525392ce84..000000000000
--- a/tools/testing/selftests/epoll/test_epoll.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- *  tools/testing/selftests/epoll/test_epoll.c
- *
- *  Copyright 2012 Adobe Systems Incorporated
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  Paton J. Lewis <palewis@adobe.com>
- *
- */
-
-#include <errno.h>
-#include <fcntl.h>
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/epoll.h>
-#include <sys/socket.h>
-
-/*
- * A pointer to an epoll_item_private structure will be stored in the epoll
- * item's event structure so that we can get access to the epoll_item_private
- * data after calling epoll_wait:
- */
-struct epoll_item_private {
-	int index;  /* Position of this struct within the epoll_items array. */
-	int fd;
-	uint32_t events;
-	pthread_mutex_t mutex;  /* Guards the following variables... */
-	int stop;
-	int status;  /* Stores any error encountered while handling item. */
-	/* The following variable allows us to test whether we have encountered
-	   a problem while attempting to cancel and delete the associated
-	   event. When the test program exits, 'deleted' should be exactly
-	   one. If it is greater than one, then the failed test reflects a real
-	   world situation where we would have tried to access the epoll item's
-	   private data after deleting it: */
-	int deleted;
-};
-
-struct epoll_item_private *epoll_items;
-
-/*
- * Delete the specified item from the epoll set. In a real-world secneario this
- * is where we would free the associated data structure, but in this testing
- * environment we retain the structure so that we can test for double-deletion:
- */
-void delete_item(int index)
-{
-	__sync_fetch_and_add(&epoll_items[index].deleted, 1);
-}
-
-/*
- * A pointer to a read_thread_data structure will be passed as the argument to
- * each read thread:
- */
-struct read_thread_data {
-	int stop;
-	int status;  /* Indicates any error encountered by the read thread. */
-	int epoll_set;
-};
-
-/*
- * The function executed by the read threads:
- */
-void *read_thread_function(void *function_data)
-{
-	struct read_thread_data *thread_data =
-		(struct read_thread_data *)function_data;
-	struct epoll_event event_data;
-	struct epoll_item_private *item_data;
-	char socket_data;
-
-	/* Handle events until we encounter an error or this thread's 'stop'
-	   condition is set: */
-	while (1) {
-		int result = epoll_wait(thread_data->epoll_set,
-					&event_data,
-					1,	/* Number of desired events */
-					1000);  /* Timeout in ms */
-		if (result < 0) {
-			/* Breakpoints signal all threads. Ignore that while
-			   debugging: */
-			if (errno == EINTR)
-				continue;
-			thread_data->status = errno;
-			return 0;
-		} else if (thread_data->stop)
-			return 0;
-		else if (result == 0)  /* Timeout */
-			continue;
-
-		/* We need the mutex here because checking for the stop
-		   condition and re-enabling the epoll item need to be done
-		   together as one atomic operation when EPOLL_CTL_DISABLE is
-		   available: */
-		item_data = (struct epoll_item_private *)event_data.data.ptr;
-		pthread_mutex_lock(&item_data->mutex);
-
-		/* Remove the item from the epoll set if we want to stop
-		   handling that event: */
-		if (item_data->stop)
-			delete_item(item_data->index);
-		else {
-			/* Clear the data that was written to the other end of
-			   our non-blocking socket: */
-			do {
-				if (read(item_data->fd, &socket_data, 1) < 1) {
-					if ((errno == EAGAIN) ||
-					    (errno == EWOULDBLOCK))
-						break;
-					else
-						goto error_unlock;
-				}
-			} while (item_data->events & EPOLLET);
-
-			/* The item was one-shot, so re-enable it: */
-			event_data.events = item_data->events;
-			if (epoll_ctl(thread_data->epoll_set,
-						  EPOLL_CTL_MOD,
-						  item_data->fd,
-						  &event_data) < 0)
-				goto error_unlock;
-		}
-
-		pthread_mutex_unlock(&item_data->mutex);
-	}
-
-error_unlock:
-	thread_data->status = item_data->status = errno;
-	pthread_mutex_unlock(&item_data->mutex);
-	return 0;
-}
-
-/*
- * A pointer to a write_thread_data structure will be passed as the argument to
- * the write thread:
- */
-struct write_thread_data {
-	int stop;
-	int status;  /* Indicates any error encountered by the write thread. */
-	int n_fds;
-	int *fds;
-};
-
-/*
- * The function executed by the write thread. It writes a single byte to each
- * socket in turn until the stop condition for this thread is set. If writing to
- * a socket would block (i.e. errno was EAGAIN), we leave that socket alone for
- * the moment and just move on to the next socket in the list. We don't care
- * about the order in which we deliver events to the epoll set. In fact we don't
- * care about the data we're writing to the pipes at all; we just want to
- * trigger epoll events:
- */
-void *write_thread_function(void *function_data)
-{
-	const char data = 'X';
-	int index;
-	struct write_thread_data *thread_data =
-		(struct write_thread_data *)function_data;
-	while (!thread_data->stop)
-		for (index = 0;
-		     !thread_data->stop && (index < thread_data->n_fds);
-		     ++index)
-			if ((write(thread_data->fds[index], &data, 1) < 1) &&
-				(errno != EAGAIN) &&
-				(errno != EWOULDBLOCK)) {
-				thread_data->status = errno;
-				return;
-			}
-}
-
-/*
- * Arguments are currently ignored:
- */
-int main(int argc, char **argv)
-{
-	const int n_read_threads = 100;
-	const int n_epoll_items = 500;
-	int index;
-	int epoll_set = epoll_create1(0);
-	struct write_thread_data write_thread_data = {
-		0, 0, n_epoll_items, malloc(n_epoll_items * sizeof(int))
-	};
-	struct read_thread_data *read_thread_data =
-		malloc(n_read_threads * sizeof(struct read_thread_data));
-	pthread_t *read_threads = malloc(n_read_threads * sizeof(pthread_t));
-	pthread_t write_thread;
-
-	printf("-----------------\n");
-	printf("Runing test_epoll\n");
-	printf("-----------------\n");
-
-	epoll_items = malloc(n_epoll_items * sizeof(struct epoll_item_private));
-
-	if (epoll_set < 0 || epoll_items == 0 || write_thread_data.fds == 0 ||
-		read_thread_data == 0 || read_threads == 0)
-		goto error;
-
-	if (sysconf(_SC_NPROCESSORS_ONLN) < 2) {
-		printf("Error: please run this test on a multi-core system.\n");
-		goto error;
-	}
-
-	/* Create the socket pairs and epoll items: */
-	for (index = 0; index < n_epoll_items; ++index) {
-		int socket_pair[2];
-		struct epoll_event event_data;
-		if (socketpair(AF_UNIX,
-			       SOCK_STREAM | SOCK_NONBLOCK,
-			       0,
-			       socket_pair) < 0)
-			goto error;
-		write_thread_data.fds[index] = socket_pair[0];
-		epoll_items[index].index = index;
-		epoll_items[index].fd = socket_pair[1];
-		if (pthread_mutex_init(&epoll_items[index].mutex, NULL) != 0)
-			goto error;
-		/* We always use EPOLLONESHOT because this test is currently
-		   structured to demonstrate the need for EPOLL_CTL_DISABLE,
-		   which only produces useful information in the EPOLLONESHOT
-		   case (without EPOLLONESHOT, calling epoll_ctl with
-		   EPOLL_CTL_DISABLE will never return EBUSY). If support for
-		   testing events without EPOLLONESHOT is desired, it should
-		   probably be implemented in a separate unit test. */
-		epoll_items[index].events = EPOLLIN | EPOLLONESHOT;
-		if (index < n_epoll_items / 2)
-			epoll_items[index].events |= EPOLLET;
-		epoll_items[index].stop = 0;
-		epoll_items[index].status = 0;
-		epoll_items[index].deleted = 0;
-		event_data.events = epoll_items[index].events;
-		event_data.data.ptr = &epoll_items[index];
-		if (epoll_ctl(epoll_set,
-			      EPOLL_CTL_ADD,
-			      epoll_items[index].fd,
-			      &event_data) < 0)
-			goto error;
-	}
-
-	/* Create and start the read threads: */
-	for (index = 0; index < n_read_threads; ++index) {
-		read_thread_data[index].stop = 0;
-		read_thread_data[index].status = 0;
-		read_thread_data[index].epoll_set = epoll_set;
-		if (pthread_create(&read_threads[index],
-				   NULL,
-				   read_thread_function,
-				   &read_thread_data[index]) != 0)
-			goto error;
-	}
-
-	if (pthread_create(&write_thread,
-			   NULL,
-			   write_thread_function,
-			   &write_thread_data) != 0)
-		goto error;
-
-	/* Cancel all event pollers: */
-#ifdef EPOLL_CTL_DISABLE
-	for (index = 0; index < n_epoll_items; ++index) {
-		pthread_mutex_lock(&epoll_items[index].mutex);
-		++epoll_items[index].stop;
-		if (epoll_ctl(epoll_set,
-			      EPOLL_CTL_DISABLE,
-			      epoll_items[index].fd,
-			      NULL) == 0)
-			delete_item(index);
-		else if (errno != EBUSY) {
-			pthread_mutex_unlock(&epoll_items[index].mutex);
-			goto error;
-		}
-		/* EBUSY means events were being handled; allow the other thread
-		   to delete the item. */
-		pthread_mutex_unlock(&epoll_items[index].mutex);
-	}
-#else
-	for (index = 0; index < n_epoll_items; ++index) {
-		pthread_mutex_lock(&epoll_items[index].mutex);
-		++epoll_items[index].stop;
-		pthread_mutex_unlock(&epoll_items[index].mutex);
-		/* Wait in case a thread running read_thread_function is
-		   currently executing code between epoll_wait and
-		   pthread_mutex_lock with this item. Note that a longer delay
-		   would make double-deletion less likely (at the expense of
-		   performance), but there is no guarantee that any delay would
-		   ever be sufficient. Note also that we delete all event
-		   pollers at once for testing purposes, but in a real-world
-		   environment we are likely to want to be able to cancel event
-		   pollers at arbitrary times. Therefore we can't improve this
-		   situation by just splitting this loop into two loops
-		   (i.e. signal 'stop' for all items, sleep, and then delete all
-		   items). We also can't fix the problem via EPOLL_CTL_DEL
-		   because that command can't prevent the case where some other
-		   thread is executing read_thread_function within the region
-		   mentioned above: */
-		usleep(1);
-		pthread_mutex_lock(&epoll_items[index].mutex);
-		if (!epoll_items[index].deleted)
-			delete_item(index);
-		pthread_mutex_unlock(&epoll_items[index].mutex);
-	}
-#endif
-
-	/* Shut down the read threads: */
-	for (index = 0; index < n_read_threads; ++index)
-		__sync_fetch_and_add(&read_thread_data[index].stop, 1);
-	for (index = 0; index < n_read_threads; ++index) {
-		if (pthread_join(read_threads[index], NULL) != 0)
-			goto error;
-		if (read_thread_data[index].status)
-			goto error;
-	}
-
-	/* Shut down the write thread: */
-	__sync_fetch_and_add(&write_thread_data.stop, 1);
-	if ((pthread_join(write_thread, NULL) != 0) || write_thread_data.status)
-		goto error;
-
-	/* Check for final error conditions: */
-	for (index = 0; index < n_epoll_items; ++index) {
-		if (epoll_items[index].status != 0)
-			goto error;
-		if (pthread_mutex_destroy(&epoll_items[index].mutex) < 0)
-			goto error;
-	}
-	for (index = 0; index < n_epoll_items; ++index)
-		if (epoll_items[index].deleted != 1) {
-			printf("Error: item data deleted %1d times.\n",
-				   epoll_items[index].deleted);
-			goto error;
-		}
-
-	printf("[PASS]\n");
-	return 0;
-
- error:
-	printf("[FAIL]\n");
-	return errno;
-}
-- 
cgit v1.2.3


From 0bce04be442cf4d6e4ba9dac2f0a4c5ee88af5c5 Mon Sep 17 00:00:00 2001
From: Andreas Larsson <andreas@gaisler.com>
Date: Tue, 6 Nov 2012 00:12:03 +0000
Subject: of/address: sparc: Declare of_address_to_resource() as an extern
 function for sparc again

This bug-fix makes sure that of_address_to_resource is defined extern for sparc
so that the sparc-specific implementation of of_address_to_resource() is once
again used when including include/linux/of_address.h in a sparc context. A
number of drivers in mainline relies on this function working for sparc.

The bug was introduced in a850a7554442f08d3e910c6eeb4ee216868dda1e, "of/address:
add empty static inlines for !CONFIG_OF". Contrary to that commit title, the
static inlines are added for !CONFIG_OF_ADDRESS, and CONFIG_OF_ADDRESS is never
defined for sparc. This is good behavior for the other functions in
include/linux/of_address.h, as the extern functions defined in
drivers/of/address.c only gets linked when OF_ADDRESS is configured. However,
for of_address_to_resource there exists a sparc-specific implementation in
arch/sparc/arch/sparc/kernel/of_device_common.c

Solution suggested by: Sam Ravnborg <sam@ravnborg.org>

Signed-off-by: Andreas Larsson <andreas@gaisler.com>
Acked-by: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/prom.h | 5 +++++
 include/linux/of_address.h    | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index c28765110706..f93003123bce 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -63,5 +63,10 @@ extern char *of_console_options;
 extern void irq_trans_init(struct device_node *dp);
 extern char *build_path_component(struct device_node *dp);
 
+/* SPARC has a local implementation */
+extern int of_address_to_resource(struct device_node *dev, int index,
+				  struct resource *r);
+#define of_address_to_resource of_address_to_resource
+
 #endif /* __KERNEL__ */
 #endif /* _SPARC_PROM_H */
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index a1984dd037da..e20e3af68fb6 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -28,11 +28,13 @@ static inline unsigned long pci_address_to_pio(phys_addr_t addr) { return -1; }
 #endif
 
 #else /* CONFIG_OF_ADDRESS */
+#ifndef of_address_to_resource
 static inline int of_address_to_resource(struct device_node *dev, int index,
 					 struct resource *r)
 {
 	return -EINVAL;
 }
+#endif
 static inline struct device_node *of_find_matching_node_by_address(
 					struct device_node *from,
 					const struct of_device_id *matches,
-- 
cgit v1.2.3


From 703fb94ec58e0e8769380c2877a8a34aeb5b6c97 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 13 Nov 2012 08:52:24 +0100
Subject: xfrm: Fix the gc threshold value for ipv4

The xfrm gc threshold value depends on ip_rt_max_size. This
value was set to INT_MAX with the routing cache removal patch,
so we start doing garbage collecting when we have INT_MAX/2
IPsec routes cached. Fix this by going back to the static
threshold of 1024 routes.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h      |  2 +-
 net/ipv4/route.c        |  2 +-
 net/ipv4/xfrm4_policy.c | 13 +------------
 3 files changed, 3 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 6f0ba01afe73..63445ede48bb 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1351,7 +1351,7 @@ struct xfrm6_tunnel {
 };
 
 extern void xfrm_init(void);
-extern void xfrm4_init(int rt_hash_size);
+extern void xfrm4_init(void);
 extern int xfrm_state_init(struct net *net);
 extern void xfrm_state_fini(struct net *net);
 extern void xfrm4_state_init(void);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a8c651216fa6..200d287e49f5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2597,7 +2597,7 @@ int __init ip_rt_init(void)
 		pr_err("Unable to create route proc files\n");
 #ifdef CONFIG_XFRM
 	xfrm_init();
-	xfrm4_init(ip_rt_max_size);
+	xfrm4_init();
 #endif
 	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 05c5ab8d983c..3be0ac2c1920 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -279,19 +279,8 @@ static void __exit xfrm4_policy_fini(void)
 	xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
 }
 
-void __init xfrm4_init(int rt_max_size)
+void __init xfrm4_init(void)
 {
-	/*
-	 * Select a default value for the gc_thresh based on the main route
-	 * table hash size.  It seems to me the worst case scenario is when
-	 * we have ipsec operating in transport mode, in which we create a
-	 * dst_entry per socket.  The xfrm gc algorithm starts trying to remove
-	 * entries at gc_thresh, and prevents new allocations as 2*gc_thresh
-	 * so lets set an initial xfrm gc_thresh value at the rt_max_size/2.
-	 * That will let us store an ipsec connection per route table entry,
-	 * and start cleaning when were 1/2 full
-	 */
-	xfrm4_dst_ops.gc_thresh = rt_max_size/2;
 	dst_entries_init(&xfrm4_dst_ops);
 
 	xfrm4_state_init();
-- 
cgit v1.2.3


From 3c6bdaeab4fda6c9fdd5f3f5c610dea97bddf7d6 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 18 Sep 2012 12:19:30 -0400
Subject: [SCSI] Add a report opcode helper

The REPORT SUPPORTED OPERATION CODES command can be used to query
whether a given opcode is supported by a device. Add a helper function
that allows us to look up commands.

We only issue RSOC if the device reports compliance with SPC-3 or
later. But to err on the side of caution we disable the command for ATA,
FireWire and USB.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
---
 drivers/ata/libata-scsi.c      |  1 +
 drivers/firewire/sbp2.c        |  1 +
 drivers/scsi/scsi.c            | 45 ++++++++++++++++++++++++++++++++++++++++++
 drivers/usb/storage/scsiglue.c |  3 +++
 include/scsi/scsi_device.h     |  3 +++
 5 files changed, 53 insertions(+)

(limited to 'include')

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index e3bda074fa12..7c2dead60518 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1052,6 +1052,7 @@ static void ata_scsi_sdev_config(struct scsi_device *sdev)
 {
 	sdev->use_10_for_rw = 1;
 	sdev->use_10_for_ms = 1;
+	sdev->no_report_opcodes = 1;
 
 	/* Schedule policy is determined by ->qc_defer() callback and
 	 * it needs to see every deferred qc.  Set dev_blocked to 1 to
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index 1162d6b3bf85..f82e9d4295d0 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -1546,6 +1546,7 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev)
 	struct sbp2_logical_unit *lu = sdev->hostdata;
 
 	sdev->use_10_for_rw = 1;
+	sdev->no_report_opcodes = 1;
 
 	if (sbp2_param_exclusive_login)
 		sdev->manage_start_stop = 1;
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 2936b447cae9..2c0d0ec8150b 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -55,6 +55,7 @@
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 #include <linux/async.h>
+#include <asm/unaligned.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -1061,6 +1062,50 @@ int scsi_get_vpd_page(struct scsi_device *sdev, u8 page, unsigned char *buf,
 }
 EXPORT_SYMBOL_GPL(scsi_get_vpd_page);
 
+/**
+ * scsi_report_opcode - Find out if a given command opcode is supported
+ * @sdev:	scsi device to query
+ * @buffer:	scratch buffer (must be at least 20 bytes long)
+ * @len:	length of buffer
+ * @opcode:	opcode for command to look up
+ *
+ * Uses the REPORT SUPPORTED OPERATION CODES to look up the given
+ * opcode. Returns 0 if RSOC fails or if the command opcode is
+ * unsupported. Returns 1 if the device claims to support the command.
+ */
+int scsi_report_opcode(struct scsi_device *sdev, unsigned char *buffer,
+		       unsigned int len, unsigned char opcode)
+{
+	unsigned char cmd[16];
+	struct scsi_sense_hdr sshdr;
+	int result;
+
+	if (sdev->no_report_opcodes || sdev->scsi_level < SCSI_SPC_3)
+		return 0;
+
+	memset(cmd, 0, 16);
+	cmd[0] = MAINTENANCE_IN;
+	cmd[1] = MI_REPORT_SUPPORTED_OPERATION_CODES;
+	cmd[2] = 1;		/* One command format */
+	cmd[3] = opcode;
+	put_unaligned_be32(len, &cmd[6]);
+	memset(buffer, 0, len);
+
+	result = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buffer, len,
+				  &sshdr, 30 * HZ, 3, NULL);
+
+	if (result && scsi_sense_valid(&sshdr) &&
+	    sshdr.sense_key == ILLEGAL_REQUEST &&
+	    (sshdr.asc == 0x20 || sshdr.asc == 0x24) && sshdr.ascq == 0x00)
+		return 0;
+
+	if ((buffer[1] & 3) == 3) /* Command supported */
+		return 1;
+
+	return 0;
+}
+EXPORT_SYMBOL(scsi_report_opcode);
+
 /**
  * scsi_device_get  -  get an additional reference to a scsi_device
  * @sdev:	device to get a reference to
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index a3d54366afcc..6ab376a7c501 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -186,6 +186,9 @@ static int slave_configure(struct scsi_device *sdev)
 		/* Some devices don't handle VPD pages correctly */
 		sdev->skip_vpd_pages = 1;
 
+		/* Do not attempt to use REPORT SUPPORTED OPERATION CODES */
+		sdev->no_report_opcodes = 1;
+
 		/* Some disks return the total number of blocks in response
 		 * to READ CAPACITY rather than the highest block number.
 		 * If this device makes that mistake, tell the sd driver. */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 88fae8d20154..379d465e8070 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -135,6 +135,7 @@ struct scsi_device {
 				     * because we did a bus reset. */
 	unsigned use_10_for_rw:1; /* first try 10-byte read / write */
 	unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */
+	unsigned no_report_opcodes:1;	/* no REPORT SUPPORTED OPERATION CODES */
 	unsigned skip_ms_page_8:1;	/* do not use MODE SENSE page 0x08 */
 	unsigned skip_ms_page_3f:1;	/* do not use MODE SENSE page 0x3f */
 	unsigned skip_vpd_pages:1;	/* do not read VPD pages */
@@ -362,6 +363,8 @@ extern int scsi_test_unit_ready(struct scsi_device *sdev, int timeout,
 				int retries, struct scsi_sense_hdr *sshdr);
 extern int scsi_get_vpd_page(struct scsi_device *, u8 page, unsigned char *buf,
 			     int buf_len);
+extern int scsi_report_opcode(struct scsi_device *sdev, unsigned char *buffer,
+			      unsigned int len, unsigned char opcode);
 extern int scsi_device_set_state(struct scsi_device *sdev,
 				 enum scsi_device_state state);
 extern struct scsi_event *sdev_evt_alloc(enum scsi_device_event evt_type,
-- 
cgit v1.2.3


From 5db44863b6ebbb400c5e61d56ebe8f21ef48b1bd Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 18 Sep 2012 12:19:32 -0400
Subject: [SCSI] sd: Implement support for WRITE SAME

Implement support for WRITE SAME(10) and WRITE SAME(16) in the SCSI disk
driver.

 - We set the default maximum to 0xFFFF because there are several
   devices out there that only support two-byte block counts even with
   WRITE SAME(16). We only enable transfers bigger than 0xFFFF if the
   device explicitly reports MAXIMUM WRITE SAME LENGTH in the BLOCK
   LIMITS VPD.

 - max_write_same_blocks can be overriden per-device basis in sysfs.

 - The UNMAP discovery heuristics remain unchanged but the discard
   limits are tweaked to match the "real" WRITE SAME commands.

 - In the error handling logic we now distinguish between WRITE SAME
   with and without UNMAP set.

The discovery process heuristics are:

 - If the device reports a SCSI level of SPC-3 or greater we'll issue
   READ SUPPORTED OPERATION CODES to find out whether WRITE SAME(16) is
   supported. If that's the case we will use it.

 - If the device supports the block limits VPD and reports a MAXIMUM
   WRITE SAME LENGTH bigger than 0xFFFF we will use WRITE SAME(16).

 - Otherwise we will use WRITE SAME(10) unless the target LBA is beyond
   0xFFFFFFFF or the block count exceeds 0xFFFF.

 - no_write_same is set for ATA, FireWire and USB.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
---
 drivers/ata/libata-scsi.c      |   1 +
 drivers/firewire/sbp2.c        |   1 +
 drivers/scsi/scsi_lib.c        |  22 ++++--
 drivers/scsi/sd.c              | 172 ++++++++++++++++++++++++++++++++++++++---
 drivers/scsi/sd.h              |   7 ++
 drivers/usb/storage/scsiglue.c |   3 +
 include/scsi/scsi_device.h     |   1 +
 7 files changed, 191 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 7c2dead60518..a6df6a351d6e 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1053,6 +1053,7 @@ static void ata_scsi_sdev_config(struct scsi_device *sdev)
 	sdev->use_10_for_rw = 1;
 	sdev->use_10_for_ms = 1;
 	sdev->no_report_opcodes = 1;
+	sdev->no_write_same = 1;
 
 	/* Schedule policy is determined by ->qc_defer() callback and
 	 * it needs to see every deferred qc.  Set dev_blocked to 1 to
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index f82e9d4295d0..bb1b392f5cda 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -1547,6 +1547,7 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev)
 
 	sdev->use_10_for_rw = 1;
 	sdev->no_report_opcodes = 1;
+	sdev->no_write_same = 1;
 
 	if (sbp2_param_exclusive_login)
 		sdev->manage_start_stop = 1;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index da36a3a81a9e..9032e910bca3 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -900,11 +900,23 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 				action = ACTION_FAIL;
 				error = -EILSEQ;
 			/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
-			} else if ((sshdr.asc == 0x20 || sshdr.asc == 0x24) &&
-				   (cmd->cmnd[0] == UNMAP ||
-				    cmd->cmnd[0] == WRITE_SAME_16 ||
-				    cmd->cmnd[0] == WRITE_SAME)) {
-				description = "Discard failure";
+			} else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
+				switch (cmd->cmnd[0]) {
+				case UNMAP:
+					description = "Discard failure";
+					break;
+				case WRITE_SAME:
+				case WRITE_SAME_16:
+					if (cmd->cmnd[1] & 0x8)
+						description = "Discard failure";
+					else
+						description =
+							"Write same failure";
+					break;
+				default:
+					description = "Invalid command failure";
+					break;
+				}
 				action = ACTION_FAIL;
 				error = -EREMOTEIO;
 			} else
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index c6af4c1a9ca3..352bc77b7c88 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -99,6 +99,7 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC);
 #endif
 
 static void sd_config_discard(struct scsi_disk *, unsigned int);
+static void sd_config_write_same(struct scsi_disk *);
 static int  sd_revalidate_disk(struct gendisk *);
 static void sd_unlock_native_capacity(struct gendisk *disk);
 static int  sd_probe(struct device *);
@@ -395,6 +396,45 @@ sd_store_max_medium_access_timeouts(struct device *dev,
 	return err ? err : count;
 }
 
+static ssize_t
+sd_show_write_same_blocks(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct scsi_disk *sdkp = to_scsi_disk(dev);
+
+	return snprintf(buf, 20, "%u\n", sdkp->max_ws_blocks);
+}
+
+static ssize_t
+sd_store_write_same_blocks(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct scsi_disk *sdkp = to_scsi_disk(dev);
+	struct scsi_device *sdp = sdkp->device;
+	unsigned long max;
+	int err;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (sdp->type != TYPE_DISK)
+		return -EINVAL;
+
+	err = kstrtoul(buf, 10, &max);
+
+	if (err)
+		return err;
+
+	if (max == 0)
+		sdp->no_write_same = 1;
+	else if (max <= SD_MAX_WS16_BLOCKS)
+		sdkp->max_ws_blocks = max;
+
+	sd_config_write_same(sdkp);
+
+	return count;
+}
+
 static struct device_attribute sd_disk_attrs[] = {
 	__ATTR(cache_type, S_IRUGO|S_IWUSR, sd_show_cache_type,
 	       sd_store_cache_type),
@@ -410,6 +450,8 @@ static struct device_attribute sd_disk_attrs[] = {
 	__ATTR(thin_provisioning, S_IRUGO, sd_show_thin_provisioning, NULL),
 	__ATTR(provisioning_mode, S_IRUGO|S_IWUSR, sd_show_provisioning_mode,
 	       sd_store_provisioning_mode),
+	__ATTR(max_write_same_blocks, S_IRUGO|S_IWUSR,
+	       sd_show_write_same_blocks, sd_store_write_same_blocks),
 	__ATTR(max_medium_access_timeouts, S_IRUGO|S_IWUSR,
 	       sd_show_max_medium_access_timeouts,
 	       sd_store_max_medium_access_timeouts),
@@ -561,19 +603,23 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 		return;
 
 	case SD_LBP_UNMAP:
-		max_blocks = min_not_zero(sdkp->max_unmap_blocks, 0xffffffff);
+		max_blocks = min_not_zero(sdkp->max_unmap_blocks,
+					  (u32)SD_MAX_WS16_BLOCKS);
 		break;
 
 	case SD_LBP_WS16:
-		max_blocks = min_not_zero(sdkp->max_ws_blocks, 0xffffffff);
+		max_blocks = min_not_zero(sdkp->max_ws_blocks,
+					  (u32)SD_MAX_WS16_BLOCKS);
 		break;
 
 	case SD_LBP_WS10:
-		max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)0xffff);
+		max_blocks = min_not_zero(sdkp->max_ws_blocks,
+					  (u32)SD_MAX_WS10_BLOCKS);
 		break;
 
 	case SD_LBP_ZERO:
-		max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)0xffff);
+		max_blocks = min_not_zero(sdkp->max_ws_blocks,
+					  (u32)SD_MAX_WS10_BLOCKS);
 		q->limits.discard_zeroes_data = 1;
 		break;
 	}
@@ -667,6 +713,83 @@ out:
 	return ret;
 }
 
+static void sd_config_write_same(struct scsi_disk *sdkp)
+{
+	struct request_queue *q = sdkp->disk->queue;
+	unsigned int logical_block_size = sdkp->device->sector_size;
+	unsigned int blocks = 0;
+
+	if (sdkp->device->no_write_same) {
+		sdkp->max_ws_blocks = 0;
+		goto out;
+	}
+
+	/* Some devices can not handle block counts above 0xffff despite
+	 * supporting WRITE SAME(16). Consequently we default to 64k
+	 * blocks per I/O unless the device explicitly advertises a
+	 * bigger limit.
+	 */
+	if (sdkp->max_ws_blocks == 0)
+		sdkp->max_ws_blocks = SD_MAX_WS10_BLOCKS;
+
+	if (sdkp->ws16 || sdkp->max_ws_blocks > SD_MAX_WS10_BLOCKS)
+		blocks = min_not_zero(sdkp->max_ws_blocks,
+				      (u32)SD_MAX_WS16_BLOCKS);
+	else
+		blocks = min_not_zero(sdkp->max_ws_blocks,
+				      (u32)SD_MAX_WS10_BLOCKS);
+
+out:
+	blk_queue_max_write_same_sectors(q, blocks * (logical_block_size >> 9));
+}
+
+/**
+ * sd_setup_write_same_cmnd - write the same data to multiple blocks
+ * @sdp: scsi device to operate one
+ * @rq: Request to prepare
+ *
+ * Will issue either WRITE SAME(10) or WRITE SAME(16) depending on
+ * preference indicated by target device.
+ **/
+static int sd_setup_write_same_cmnd(struct scsi_device *sdp, struct request *rq)
+{
+	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+	struct bio *bio = rq->bio;
+	sector_t sector = blk_rq_pos(rq);
+	unsigned int nr_sectors = blk_rq_sectors(rq);
+	unsigned int nr_bytes = blk_rq_bytes(rq);
+	int ret;
+
+	if (sdkp->device->no_write_same)
+		return BLKPREP_KILL;
+
+	BUG_ON(bio_offset(bio) || bio_iovec(bio)->bv_len != sdp->sector_size);
+
+	sector >>= ilog2(sdp->sector_size) - 9;
+	nr_sectors >>= ilog2(sdp->sector_size) - 9;
+
+	rq->__data_len = sdp->sector_size;
+	rq->timeout = SD_WRITE_SAME_TIMEOUT;
+	memset(rq->cmd, 0, rq->cmd_len);
+
+	if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff) {
+		rq->cmd_len = 16;
+		rq->cmd[0] = WRITE_SAME_16;
+		put_unaligned_be64(sector, &rq->cmd[2]);
+		put_unaligned_be32(nr_sectors, &rq->cmd[10]);
+	} else {
+		rq->cmd_len = 10;
+		rq->cmd[0] = WRITE_SAME;
+		put_unaligned_be32(sector, &rq->cmd[2]);
+		put_unaligned_be16(nr_sectors, &rq->cmd[7]);
+	}
+
+	ret = scsi_setup_blk_pc_cmnd(sdp, rq);
+	rq->__data_len = nr_bytes;
+
+	return ret;
+}
+
 static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq)
 {
 	rq->timeout = SD_FLUSH_TIMEOUT;
@@ -712,6 +835,9 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 	if (rq->cmd_flags & REQ_DISCARD) {
 		ret = sd_setup_discard_cmnd(sdp, rq);
 		goto out;
+	} else if (rq->cmd_flags & REQ_WRITE_SAME) {
+		ret = sd_setup_write_same_cmnd(sdp, rq);
+		goto out;
 	} else if (rq->cmd_flags & REQ_FLUSH) {
 		ret = scsi_setup_flush_cmnd(sdp, rq);
 		goto out;
@@ -1484,8 +1610,9 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 	int sense_valid = 0;
 	int sense_deferred = 0;
 	unsigned char op = SCpnt->cmnd[0];
+	unsigned char unmap = SCpnt->cmnd[1] & 8;
 
-	if (req->cmd_flags & REQ_DISCARD) {
+	if (req->cmd_flags & REQ_DISCARD || req->cmd_flags & REQ_WRITE_SAME) {
 		if (!result) {
 			good_bytes = blk_rq_bytes(req);
 			scsi_set_resid(SCpnt, 0);
@@ -1542,9 +1669,25 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 		if (sshdr.asc == 0x10)  /* DIX: Host detected corruption */
 			good_bytes = sd_completed_bytes(SCpnt);
 		/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
-		if ((sshdr.asc == 0x20 || sshdr.asc == 0x24) &&
-		    (op == UNMAP || op == WRITE_SAME_16 || op == WRITE_SAME))
-			sd_config_discard(sdkp, SD_LBP_DISABLE);
+		if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
+			switch (op) {
+			case UNMAP:
+				sd_config_discard(sdkp, SD_LBP_DISABLE);
+				break;
+			case WRITE_SAME_16:
+			case WRITE_SAME:
+				if (unmap)
+					sd_config_discard(sdkp, SD_LBP_DISABLE);
+				else {
+					sdkp->device->no_write_same = 1;
+					sd_config_write_same(sdkp);
+
+					good_bytes = 0;
+					req->__data_len = blk_rq_bytes(req);
+					req->cmd_flags |= REQ_QUIET;
+				}
+			}
+		}
 		break;
 	default:
 		break;
@@ -2380,9 +2523,7 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
 	if (buffer[3] == 0x3c) {
 		unsigned int lba_count, desc_count;
 
-		sdkp->max_ws_blocks =
-			(u32) min_not_zero(get_unaligned_be64(&buffer[36]),
-					   (u64)0xffffffff);
+		sdkp->max_ws_blocks = (u32)get_unaligned_be64(&buffer[36]);
 
 		if (!sdkp->lbpme)
 			goto out;
@@ -2475,6 +2616,13 @@ static void sd_read_block_provisioning(struct scsi_disk *sdkp)
 	kfree(buffer);
 }
 
+static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer)
+{
+	if (scsi_report_opcode(sdkp->device, buffer, SD_BUF_SIZE,
+			       WRITE_SAME_16))
+		sdkp->ws16 = 1;
+}
+
 static int sd_try_extended_inquiry(struct scsi_device *sdp)
 {
 	/*
@@ -2534,6 +2682,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
 		sd_read_write_protect_flag(sdkp, buffer);
 		sd_read_cache_type(sdkp, buffer);
 		sd_read_app_tag_own(sdkp, buffer);
+		sd_read_write_same(sdkp, buffer);
 	}
 
 	sdkp->first_scan = 0;
@@ -2551,6 +2700,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
 	blk_queue_flush(sdkp->disk->queue, flush);
 
 	set_capacity(disk, sdkp->capacity);
+	sd_config_write_same(sdkp);
 	kfree(buffer);
 
  out:
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 47c52a6d733c..74a1e4ca5401 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -14,6 +14,7 @@
 #define SD_TIMEOUT		(30 * HZ)
 #define SD_MOD_TIMEOUT		(75 * HZ)
 #define SD_FLUSH_TIMEOUT	(60 * HZ)
+#define SD_WRITE_SAME_TIMEOUT	(120 * HZ)
 
 /*
  * Number of allowed retries
@@ -38,6 +39,11 @@ enum {
 	SD_MEMPOOL_SIZE = 2,	/* CDB pool size */
 };
 
+enum {
+	SD_MAX_WS10_BLOCKS = 0xffff,
+	SD_MAX_WS16_BLOCKS = 0x7fffff,
+};
+
 enum {
 	SD_LBP_FULL = 0,	/* Full logical block provisioning */
 	SD_LBP_UNMAP,		/* Use UNMAP command */
@@ -77,6 +83,7 @@ struct scsi_disk {
 	unsigned	lbpws : 1;
 	unsigned	lbpws10 : 1;
 	unsigned	lbpvpd : 1;
+	unsigned	ws16 : 1;
 };
 #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev)
 
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 6ab376a7c501..92f35abee92d 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -189,6 +189,9 @@ static int slave_configure(struct scsi_device *sdev)
 		/* Do not attempt to use REPORT SUPPORTED OPERATION CODES */
 		sdev->no_report_opcodes = 1;
 
+		/* Do not attempt to use WRITE SAME */
+		sdev->no_write_same = 1;
+
 		/* Some disks return the total number of blocks in response
 		 * to READ CAPACITY rather than the highest block number.
 		 * If this device makes that mistake, tell the sd driver. */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 379d465e8070..55367b04dc94 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -136,6 +136,7 @@ struct scsi_device {
 	unsigned use_10_for_rw:1; /* first try 10-byte read / write */
 	unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */
 	unsigned no_report_opcodes:1;	/* no REPORT SUPPORTED OPERATION CODES */
+	unsigned no_write_same:1;	/* no WRITE SAME command */
 	unsigned skip_ms_page_8:1;	/* do not use MODE SENSE page 0x08 */
 	unsigned skip_ms_page_3f:1;	/* do not use MODE SENSE page 0x3f */
 	unsigned skip_vpd_pages:1;	/* do not read VPD pages */
-- 
cgit v1.2.3


From 9aadd70aed60b47e367e7a1a6b9068daba04fe05 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Tue, 6 Nov 2012 16:31:32 +0000
Subject: Revert "ARM: OMAP: convert I2C driver to PM QoS for MPU latency
 constraints"

This reverts commit 3db11feffc1ad2ab9dea27789e6b5b3032827adc
(ARM: OMAP: convert I2C driver to PM QoS for MPU latency constraints).
This commit causes I2C timeouts to appear on several OMAP3430/3530-based
boards:

  http://marc.info/?l=linux-arm-kernel&m=135071372426971&w=2
  http://marc.info/?l=linux-arm-kernel&m=135067558415214&w=2
  http://marc.info/?l=linux-arm-kernel&m=135216013608196&w=2

and appears to have been sent for merging before one of its prerequisites
was merged:

  http://marc.info/?l=linux-arm-kernel&m=135219411617621&w=2

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Acked-by: Jean Pihet <j-pihet@ti.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 arch/arm/plat-omap/i2c.c      | 21 +++++++++++++++++++++
 drivers/i2c/busses/i2c-omap.c | 32 ++++++++++++++------------------
 include/linux/i2c-omap.h      |  1 +
 3 files changed, 36 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/arch/arm/plat-omap/i2c.c b/arch/arm/plat-omap/i2c.c
index a5683a84c6ee..6013831a043e 100644
--- a/arch/arm/plat-omap/i2c.c
+++ b/arch/arm/plat-omap/i2c.c
@@ -26,12 +26,14 @@
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
+#include <linux/i2c-omap.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/clk.h>
 
 #include <mach/irqs.h>
 #include <plat/i2c.h>
+#include <plat/omap-pm.h>
 #include <plat/omap_device.h>
 
 #define OMAP_I2C_SIZE		0x3f
@@ -127,6 +129,16 @@ static inline int omap1_i2c_add_bus(int bus_id)
 
 
 #ifdef CONFIG_ARCH_OMAP2PLUS
+/*
+ * XXX This function is a temporary compatibility wrapper - only
+ * needed until the I2C driver can be converted to call
+ * omap_pm_set_max_dev_wakeup_lat() and handle a return code.
+ */
+static void omap_pm_set_max_mpu_wakeup_lat_compat(struct device *dev, long t)
+{
+	omap_pm_set_max_mpu_wakeup_lat(dev, t);
+}
+
 static inline int omap2_i2c_add_bus(int bus_id)
 {
 	int l;
@@ -158,6 +170,15 @@ static inline int omap2_i2c_add_bus(int bus_id)
 	dev_attr = (struct omap_i2c_dev_attr *)oh->dev_attr;
 	pdata->flags = dev_attr->flags;
 
+	/*
+	 * When waiting for completion of a i2c transfer, we need to
+	 * set a wake up latency constraint for the MPU. This is to
+	 * ensure quick enough wakeup from idle, when transfer
+	 * completes.
+	 * Only omap3 has support for constraints
+	 */
+	if (cpu_is_omap34xx())
+		pdata->set_mpu_wkup_lat = omap_pm_set_max_mpu_wakeup_lat_compat;
 	pdev = omap_device_build(name, bus_id, oh, pdata,
 			sizeof(struct omap_i2c_bus_platform_data),
 			NULL, 0, 0);
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index db31eaed6ea5..0b0254312d21 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -43,7 +43,6 @@
 #include <linux/slab.h>
 #include <linux/i2c-omap.h>
 #include <linux/pm_runtime.h>
-#include <linux/pm_qos.h>
 
 /* I2C controller revisions */
 #define OMAP_I2C_OMAP1_REV_2		0x20
@@ -187,8 +186,9 @@ struct omap_i2c_dev {
 	int			reg_shift;      /* bit shift for I2C register addresses */
 	struct completion	cmd_complete;
 	struct resource		*ioarea;
-	u32			latency;	/* maximum MPU wkup latency */
-	struct pm_qos_request	pm_qos_request;
+	u32			latency;	/* maximum mpu wkup latency */
+	void			(*set_mpu_wkup_lat)(struct device *dev,
+						    long latency);
 	u32			speed;		/* Speed of bus in kHz */
 	u32			dtrev;		/* extra revision from DT */
 	u32			flags;
@@ -494,7 +494,9 @@ static void omap_i2c_resize_fifo(struct omap_i2c_dev *dev, u8 size, bool is_rx)
 		dev->b_hw = 1; /* Enable hardware fixes */
 
 	/* calculate wakeup latency constraint for MPU */
-	dev->latency = (1000000 * dev->threshold) / (1000 * dev->speed / 8);
+	if (dev->set_mpu_wkup_lat != NULL)
+		dev->latency = (1000000 * dev->threshold) /
+			(1000 * dev->speed / 8);
 }
 
 /*
@@ -629,16 +631,8 @@ omap_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 	if (r < 0)
 		goto out;
 
-	/*
-	 * When waiting for completion of a i2c transfer, we need to
-	 * set a wake up latency constraint for the MPU. This is to
-	 * ensure quick enough wakeup from idle, when transfer
-	 * completes.
-	 */
-	if (dev->latency)
-		pm_qos_add_request(&dev->pm_qos_request,
-				   PM_QOS_CPU_DMA_LATENCY,
-				   dev->latency);
+	if (dev->set_mpu_wkup_lat != NULL)
+		dev->set_mpu_wkup_lat(dev->dev, dev->latency);
 
 	for (i = 0; i < num; i++) {
 		r = omap_i2c_xfer_msg(adap, &msgs[i], (i == (num - 1)));
@@ -646,8 +640,8 @@ omap_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 			break;
 	}
 
-	if (dev->latency)
-		pm_qos_remove_request(&dev->pm_qos_request);
+	if (dev->set_mpu_wkup_lat != NULL)
+		dev->set_mpu_wkup_lat(dev->dev, -1);
 
 	if (r == 0)
 		r = num;
@@ -1104,6 +1098,7 @@ omap_i2c_probe(struct platform_device *pdev)
 	} else if (pdata != NULL) {
 		dev->speed = pdata->clkrate;
 		dev->flags = pdata->flags;
+		dev->set_mpu_wkup_lat = pdata->set_mpu_wkup_lat;
 		dev->dtrev = pdata->rev;
 	}
 
@@ -1159,8 +1154,9 @@ omap_i2c_probe(struct platform_device *pdev)
 			dev->b_hw = 1; /* Enable hardware fixes */
 
 		/* calculate wakeup latency constraint for MPU */
-		dev->latency = (1000000 * dev->fifo_size) /
-			       (1000 * dev->speed / 8);
+		if (dev->set_mpu_wkup_lat != NULL)
+			dev->latency = (1000000 * dev->fifo_size) /
+				       (1000 * dev->speed / 8);
 	}
 
 	/* reset ASAP, clearing any IRQs */
diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h
index df804ba73e0b..92a0dc75bc74 100644
--- a/include/linux/i2c-omap.h
+++ b/include/linux/i2c-omap.h
@@ -34,6 +34,7 @@ struct omap_i2c_bus_platform_data {
 	u32		clkrate;
 	u32		rev;
 	u32		flags;
+	void		(*set_mpu_wkup_lat)(struct device *dev, long set);
 };
 
 #endif
-- 
cgit v1.2.3


From 93532c8a4890871aa0d84dd91b80dad9f58542e0 Mon Sep 17 00:00:00 2001
From: Igor Mazanov <i.mazanov@gmail.com>
Date: Thu, 15 Nov 2012 21:07:00 +0400
Subject: clk: remove inline usage from clk-provider.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Users of GCC 4.7 have reported compiler errors due to having inline
applied to function declarations in clk-provider.h.  The definitions
exist in drivers/clk/clk.c.  An example error:

In file included from arch/arm/mach-omap2/clockdomain.c:25:0:
arch/arm/mach-omap2/clockdomain.c: In function ‘clkdm_clk_disable’:
include/linux/clk-provider.h:338:12: error: inlining failed in call to always_inline ‘__clk_get_enable_count’: function body not available
arch/arm/mach-omap2/clockdomain.c:1001:28: error: called from here
make[1]: *** [arch/arm/mach-omap2/clockdomain.o] Error 1
make: *** [arch/arm/mach-omap2] Error 2

This patch removes the use of inline from include/linux/clk-provider.h
but keeps the function definitions in drivers/clk/clk.c as inlined since
they are one-liners.

Signed-off-by: Igor Mazanov <i.mazanov@gmail.com>
Acked-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
[mturquette@linaro.org: improved subject, added changelog]
---
 include/linux/clk-provider.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index c12731582920..f9f5e9eeb9dd 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -335,8 +335,8 @@ const char *__clk_get_name(struct clk *clk);
 struct clk_hw *__clk_get_hw(struct clk *clk);
 u8 __clk_get_num_parents(struct clk *clk);
 struct clk *__clk_get_parent(struct clk *clk);
-inline int __clk_get_enable_count(struct clk *clk);
-inline int __clk_get_prepare_count(struct clk *clk);
+int __clk_get_enable_count(struct clk *clk);
+int __clk_get_prepare_count(struct clk *clk);
 unsigned long __clk_get_rate(struct clk *clk);
 unsigned long __clk_get_flags(struct clk *clk);
 int __clk_is_enabled(struct clk *clk);
-- 
cgit v1.2.3


From fa0cbbf145aabbf29c6f28f8a11935c0b0fd86fc Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 12 Nov 2012 17:53:04 -0800
Subject: mm, oom: reintroduce /proc/pid/oom_adj

This is mostly a revert of 01dc52ebdf47 ("oom: remove deprecated oom_adj")
from Davidlohr Bueso.

It reintroduces /proc/pid/oom_adj for backwards compatibility with earlier
kernels.  It simply scales the value linearly when /proc/pid/oom_score_adj
is written.

The major difference is that its scheduled removal is no longer included
in Documentation/feature-removal-schedule.txt.  We do warn users with a
single printk, though, to suggest the more powerful and supported
/proc/pid/oom_score_adj interface.

Reported-by: Artem S. Tashkinov <t.artem@lycos.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt |  16 ++++--
 fs/proc/base.c                     | 109 +++++++++++++++++++++++++++++++++++++
 include/uapi/linux/oom.h           |   9 +++
 3 files changed, 130 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index a1793d670cd0..3844d21d6ca3 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -33,7 +33,7 @@ Table of Contents
   2	Modifying System Parameters
 
   3	Per-Process Parameters
-  3.1	/proc/<pid>/oom_score_adj - Adjust the oom-killer
+  3.1	/proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj - Adjust the oom-killer
 								score
   3.2	/proc/<pid>/oom_score - Display current oom-killer score
   3.3	/proc/<pid>/io - Display the IO accounting fields
@@ -1320,10 +1320,10 @@ of the kernel.
 CHAPTER 3: PER-PROCESS PARAMETERS
 ------------------------------------------------------------------------------
 
-3.1 /proc/<pid>/oom_score_adj- Adjust the oom-killer score
+3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj- Adjust the oom-killer score
 --------------------------------------------------------------------------------
 
-This file can be used to adjust the badness heuristic used to select which
+These file can be used to adjust the badness heuristic used to select which
 process gets killed in out of memory conditions.
 
 The badness heuristic assigns a value to each candidate task ranging from 0
@@ -1361,6 +1361,12 @@ same system, cpuset, mempolicy, or memory controller resources to use at least
 equivalent to discounting 50% of the task's allowed memory from being considered
 as scoring against the task.
 
+For backwards compatibility with previous kernels, /proc/<pid>/oom_adj may also
+be used to tune the badness score.  Its acceptable values range from -16
+(OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17
+(OOM_DISABLE) to disable oom killing entirely for that task.  Its value is
+scaled linearly with /proc/<pid>/oom_score_adj.
+
 The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last
 value set by a CAP_SYS_RESOURCE process. To reduce the value any lower
 requires CAP_SYS_RESOURCE.
@@ -1375,7 +1381,9 @@ minimal amount of work.
 -------------------------------------------------------------
 
 This file can be used to check the current score used by the oom-killer is for
-any given <pid>.
+any given <pid>. Use it together with /proc/<pid>/oom_score_adj to tune which
+process should be killed in an out-of-memory situation.
+
 
 3.3  /proc/<pid>/io - Display the IO accounting fields
 -------------------------------------------------------
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 144a96732dd7..3c231adf8450 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -873,6 +873,113 @@ static const struct file_operations proc_environ_operations = {
 	.release	= mem_release,
 };
 
+static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
+			    loff_t *ppos)
+{
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+	char buffer[PROC_NUMBUF];
+	int oom_adj = OOM_ADJUST_MIN;
+	size_t len;
+	unsigned long flags;
+
+	if (!task)
+		return -ESRCH;
+	if (lock_task_sighand(task, &flags)) {
+		if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
+			oom_adj = OOM_ADJUST_MAX;
+		else
+			oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
+				  OOM_SCORE_ADJ_MAX;
+		unlock_task_sighand(task, &flags);
+	}
+	put_task_struct(task);
+	len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
+	return simple_read_from_buffer(buf, count, ppos, buffer, len);
+}
+
+static ssize_t oom_adj_write(struct file *file, const char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+	struct task_struct *task;
+	char buffer[PROC_NUMBUF];
+	int oom_adj;
+	unsigned long flags;
+	int err;
+
+	memset(buffer, 0, sizeof(buffer));
+	if (count > sizeof(buffer) - 1)
+		count = sizeof(buffer) - 1;
+	if (copy_from_user(buffer, buf, count)) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	err = kstrtoint(strstrip(buffer), 0, &oom_adj);
+	if (err)
+		goto out;
+	if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
+	     oom_adj != OOM_DISABLE) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	task = get_proc_task(file->f_path.dentry->d_inode);
+	if (!task) {
+		err = -ESRCH;
+		goto out;
+	}
+
+	task_lock(task);
+	if (!task->mm) {
+		err = -EINVAL;
+		goto err_task_lock;
+	}
+
+	if (!lock_task_sighand(task, &flags)) {
+		err = -ESRCH;
+		goto err_task_lock;
+	}
+
+	/*
+	 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
+	 * value is always attainable.
+	 */
+	if (oom_adj == OOM_ADJUST_MAX)
+		oom_adj = OOM_SCORE_ADJ_MAX;
+	else
+		oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
+
+	if (oom_adj < task->signal->oom_score_adj &&
+	    !capable(CAP_SYS_RESOURCE)) {
+		err = -EACCES;
+		goto err_sighand;
+	}
+
+	/*
+	 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
+	 * /proc/pid/oom_score_adj instead.
+	 */
+	printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
+		  current->comm, task_pid_nr(current), task_pid_nr(task),
+		  task_pid_nr(task));
+
+	task->signal->oom_score_adj = oom_adj;
+	trace_oom_score_adj_update(task);
+err_sighand:
+	unlock_task_sighand(task, &flags);
+err_task_lock:
+	task_unlock(task);
+	put_task_struct(task);
+out:
+	return err < 0 ? err : count;
+}
+
+static const struct file_operations proc_oom_adj_operations = {
+	.read		= oom_adj_read,
+	.write		= oom_adj_write,
+	.llseek		= generic_file_llseek,
+};
+
 static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
 					size_t count, loff_t *ppos)
 {
@@ -2598,6 +2705,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("cgroup",  S_IRUGO, proc_cgroup_operations),
 #endif
 	INF("oom_score",  S_IRUGO, proc_oom_score),
+	REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
 	REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
 #ifdef CONFIG_AUDITSYSCALL
 	REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
@@ -2964,6 +3072,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	REG("cgroup",  S_IRUGO, proc_cgroup_operations),
 #endif
 	INF("oom_score", S_IRUGO, proc_oom_score),
+	REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
 	REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
 #ifdef CONFIG_AUDITSYSCALL
 	REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
diff --git a/include/uapi/linux/oom.h b/include/uapi/linux/oom.h
index a49c4afc7060..b29272d621ce 100644
--- a/include/uapi/linux/oom.h
+++ b/include/uapi/linux/oom.h
@@ -8,4 +8,13 @@
 #define OOM_SCORE_ADJ_MIN	(-1000)
 #define OOM_SCORE_ADJ_MAX	1000
 
+/*
+ * /proc/<pid>/oom_adj set to -17 protects from the oom killer for legacy
+ * purposes.
+ */
+#define OOM_DISABLE (-17)
+/* inclusive */
+#define OOM_ADJUST_MIN (-16)
+#define OOM_ADJUST_MAX 15
+
 #endif /* _UAPI__INCLUDE_LINUX_OOM_H */
-- 
cgit v1.2.3


From bea8c150a7efbc0f204e709b7274fe273f55e0d3 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 16 Nov 2012 14:14:54 -0800
Subject: memcg: fix hotplugged memory zone oops

When MEMCG is configured on (even when it's disabled by boot option),
when adding or removing a page to/from its lru list, the zone pointer
used for stats updates is nowadays taken from the struct lruvec.  (On
many configurations, calculating zone from page is slower.)

But we have no code to update all the lruvecs (per zone, per memcg) when
a memory node is hotadded.  Here's an extract from the oops which
results when running numactl to bind a program to a newly onlined node:

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000f60
  IP:  __mod_zone_page_state+0x9/0x60
  Pid: 1219, comm: numactl Not tainted 3.6.0-rc5+ #180 Bochs Bochs
  Process numactl (pid: 1219, threadinfo ffff880039abc000, task ffff8800383c4ce0)
  Call Trace:
    __pagevec_lru_add_fn+0xdf/0x140
    pagevec_lru_move_fn+0xb1/0x100
    __pagevec_lru_add+0x1c/0x30
    lru_add_drain_cpu+0xa3/0x130
    lru_add_drain+0x2f/0x40
   ...

The natural solution might be to use a memcg callback whenever memory is
hotadded; but that solution has not been scoped out, and it happens that
we do have an easy location at which to update lruvec->zone.  The lruvec
pointer is discovered either by mem_cgroup_zone_lruvec() or by
mem_cgroup_page_lruvec(), and both of those do know the right zone.

So check and set lruvec->zone in those; and remove the inadequate
attempt to set lruvec->zone from lruvec_init(), which is called before
NODE_DATA(node) has been allocated in such cases.

Ah, there was one exceptionr.  For no particularly good reason,
mem_cgroup_force_empty_list() has its own code for deciding lruvec.
Change it to use the standard mem_cgroup_zone_lruvec() and
mem_cgroup_get_lru_size() too.  In fact it was already safe against such
an oops (the lru lists in danger could only be empty), but we're better
proofed against future changes this way.

I've marked this for stable (3.6) since we introduced the problem in 3.5
(now closed to stable); but I have no idea if this is the only fix
needed to get memory hotadd working with memcg in 3.6, and received no
answer when I enquired twice before.

Reported-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  2 +-
 mm/memcontrol.c        | 46 +++++++++++++++++++++++++++++++++++-----------
 mm/mmzone.c            |  6 +-----
 mm/page_alloc.c        |  2 +-
 4 files changed, 38 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 50aaca81f63d..a23923ba8263 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -752,7 +752,7 @@ extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
 				     unsigned long size,
 				     enum memmap_context context);
 
-extern void lruvec_init(struct lruvec *lruvec, struct zone *zone);
+extern void lruvec_init(struct lruvec *lruvec);
 
 static inline struct zone *lruvec_zone(struct lruvec *lruvec)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 93a7e36ded89..dd39ba000b31 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1055,12 +1055,24 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
 				      struct mem_cgroup *memcg)
 {
 	struct mem_cgroup_per_zone *mz;
+	struct lruvec *lruvec;
 
-	if (mem_cgroup_disabled())
-		return &zone->lruvec;
+	if (mem_cgroup_disabled()) {
+		lruvec = &zone->lruvec;
+		goto out;
+	}
 
 	mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone));
-	return &mz->lruvec;
+	lruvec = &mz->lruvec;
+out:
+	/*
+	 * Since a node can be onlined after the mem_cgroup was created,
+	 * we have to be prepared to initialize lruvec->zone here;
+	 * and if offlined then reonlined, we need to reinitialize it.
+	 */
+	if (unlikely(lruvec->zone != zone))
+		lruvec->zone = zone;
+	return lruvec;
 }
 
 /*
@@ -1087,9 +1099,12 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
 	struct mem_cgroup_per_zone *mz;
 	struct mem_cgroup *memcg;
 	struct page_cgroup *pc;
+	struct lruvec *lruvec;
 
-	if (mem_cgroup_disabled())
-		return &zone->lruvec;
+	if (mem_cgroup_disabled()) {
+		lruvec = &zone->lruvec;
+		goto out;
+	}
 
 	pc = lookup_page_cgroup(page);
 	memcg = pc->mem_cgroup;
@@ -1107,7 +1122,16 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
 		pc->mem_cgroup = memcg = root_mem_cgroup;
 
 	mz = page_cgroup_zoneinfo(memcg, page);
-	return &mz->lruvec;
+	lruvec = &mz->lruvec;
+out:
+	/*
+	 * Since a node can be onlined after the mem_cgroup was created,
+	 * we have to be prepared to initialize lruvec->zone here;
+	 * and if offlined then reonlined, we need to reinitialize it.
+	 */
+	if (unlikely(lruvec->zone != zone))
+		lruvec->zone = zone;
+	return lruvec;
 }
 
 /**
@@ -3697,17 +3721,17 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 				int node, int zid, enum lru_list lru)
 {
-	struct mem_cgroup_per_zone *mz;
+	struct lruvec *lruvec;
 	unsigned long flags, loop;
 	struct list_head *list;
 	struct page *busy;
 	struct zone *zone;
 
 	zone = &NODE_DATA(node)->node_zones[zid];
-	mz = mem_cgroup_zoneinfo(memcg, node, zid);
-	list = &mz->lruvec.lists[lru];
+	lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+	list = &lruvec->lists[lru];
 
-	loop = mz->lru_size[lru];
+	loop = mem_cgroup_get_lru_size(lruvec, lru);
 	/* give some margin against EBUSY etc...*/
 	loop += 256;
 	busy = NULL;
@@ -4745,7 +4769,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
 
 	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
 		mz = &pn->zoneinfo[zone];
-		lruvec_init(&mz->lruvec, &NODE_DATA(node)->node_zones[zone]);
+		lruvec_init(&mz->lruvec);
 		mz->usage_in_excess = 0;
 		mz->on_tree = false;
 		mz->memcg = memcg;
diff --git a/mm/mmzone.c b/mm/mmzone.c
index 3cef80f6ac79..4596d81b89b1 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -87,7 +87,7 @@ int memmap_valid_within(unsigned long pfn,
 }
 #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
 
-void lruvec_init(struct lruvec *lruvec, struct zone *zone)
+void lruvec_init(struct lruvec *lruvec)
 {
 	enum lru_list lru;
 
@@ -95,8 +95,4 @@ void lruvec_init(struct lruvec *lruvec, struct zone *zone)
 
 	for_each_lru(lru)
 		INIT_LIST_HEAD(&lruvec->lists[lru]);
-
-#ifdef CONFIG_MEMCG
-	lruvec->zone = zone;
-#endif
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5b74de6702e0..c91598b1b4c0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4505,7 +4505,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 		zone->zone_pgdat = pgdat;
 
 		zone_pcp_init(zone);
-		lruvec_init(&zone->lruvec, zone);
+		lruvec_init(&zone->lruvec);
 		if (!size)
 			continue;
 
-- 
cgit v1.2.3


From 2ca3cb50edc351875df13d083524f524cdeb3054 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 16 Nov 2012 14:14:56 -0800
Subject: rapidio: fix kernel-doc warnings

Fix rapidio kernel-doc warnings:

  Warning(drivers/rapidio/rio.c:415): No description found for parameter 'local'
  Warning(drivers/rapidio/rio.c:415): Excess function parameter 'lstart' description in 'rio_map_inb_region'
  Warning(include/linux/rio.h:290): No description found for parameter 'switches'
  Warning(include/linux/rio.h:290): No description found for parameter 'destid_table'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Matt Porter <mporter@kernel.crashing.org>
Acked-by: Alexandre Bounine <alexandre.bounine@idt.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio.c | 2 +-
 include/linux/rio.h   | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index c17ae22567e0..0c6fcb461faf 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -401,7 +401,7 @@ EXPORT_SYMBOL_GPL(rio_release_inb_pwrite);
 /**
  * rio_map_inb_region -- Map inbound memory region.
  * @mport: Master port.
- * @lstart: physical address of memory region to be mapped
+ * @local: physical address of memory region to be mapped
  * @rbase: RIO base address assigned to this window
  * @size: Size of the memory region
  * @rflags: Flags for mapping.
diff --git a/include/linux/rio.h b/include/linux/rio.h
index 4187da511006..a3e784278667 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -275,9 +275,11 @@ struct rio_id_table {
  * struct rio_net - RIO network info
  * @node: Node in global list of RIO networks
  * @devices: List of devices in this network
+ * @switches: List of switches in this netowrk
  * @mports: List of master ports accessing this network
  * @hport: Default port for accessing this network
  * @id: RIO network ID
+ * @destid_table: destID allocation table
  */
 struct rio_net {
 	struct list_head node;	/* node in list of networks */
-- 
cgit v1.2.3


From 5576646f3c1abd60d72d19829de6f5d8c2ca8ecf Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 16 Nov 2012 14:15:06 -0800
Subject: revert "mm: fix-up zone present pages"

Revert commit 7f1290f2f2a4 ("mm: fix-up zone present pages")

That patch tried to fix a issue when calculating zone->present_pages,
but it caused a regression on 32bit systems with HIGHMEM.  With that
change, reset_zone_present_pages() resets all zone->present_pages to
zero, and fixup_zone_present_pages() is called to recalculate
zone->present_pages when the boot allocator frees core memory pages into
buddy allocator.  Because highmem pages are not freed by bootmem
allocator, all highmem zones' present_pages becomes zero.

Various options for improving the situation are being discussed but for
now, let's return to the 3.6 code.

Cc: Jianguo Wu <wujianguo@huawei.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Petr Tesarik <ptesarik@suse.cz>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David Rientjes <rientjes@google.com>
Tested-by: Chris Clayton <chris2553@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/init.c |  1 -
 include/linux/mm.h  |  4 ----
 mm/bootmem.c        | 10 +---------
 mm/memory_hotplug.c |  7 -------
 mm/nobootmem.c      |  3 ---
 mm/page_alloc.c     | 34 ----------------------------------
 6 files changed, 1 insertion(+), 58 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index acd5b68e8871..082e383c1b6f 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -637,7 +637,6 @@ mem_init (void)
 
 	high_memory = __va(max_low_pfn * PAGE_SIZE);
 
-	reset_zone_present_pages();
 	for_each_online_pgdat(pgdat)
 		if (pgdat->bdata->node_bootmem_map)
 			totalram_pages += free_all_bootmem_node(pgdat);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fa0680402738..bcaab4e6fe91 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1684,9 +1684,5 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; }
 static inline bool page_is_guard(struct page *page) { return false; }
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
-extern void reset_zone_present_pages(void);
-extern void fixup_zone_present_pages(int nid, unsigned long start_pfn,
-				unsigned long end_pfn);
-
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 434be4ae7a04..f468185b3b28 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -198,8 +198,6 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 			int order = ilog2(BITS_PER_LONG);
 
 			__free_pages_bootmem(pfn_to_page(start), order);
-			fixup_zone_present_pages(page_to_nid(pfn_to_page(start)),
-					start, start + BITS_PER_LONG);
 			count += BITS_PER_LONG;
 			start += BITS_PER_LONG;
 		} else {
@@ -210,9 +208,6 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 				if (vec & 1) {
 					page = pfn_to_page(start + off);
 					__free_pages_bootmem(page, 0);
-					fixup_zone_present_pages(
-						page_to_nid(page),
-						start + off, start + off + 1);
 					count++;
 				}
 				vec >>= 1;
@@ -226,11 +221,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 	pages = bdata->node_low_pfn - bdata->node_min_pfn;
 	pages = bootmem_bootmap_pages(pages);
 	count += pages;
-	while (pages--) {
-		fixup_zone_present_pages(page_to_nid(page),
-				page_to_pfn(page), page_to_pfn(page) + 1);
+	while (pages--)
 		__free_pages_bootmem(page++, 0);
-	}
 
 	bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 56b758ae57d2..e4eeacae2b91 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -106,7 +106,6 @@ static void get_page_bootmem(unsigned long info,  struct page *page,
 void __ref put_page_bootmem(struct page *page)
 {
 	unsigned long type;
-	struct zone *zone;
 
 	type = (unsigned long) page->lru.next;
 	BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
@@ -117,12 +116,6 @@ void __ref put_page_bootmem(struct page *page)
 		set_page_private(page, 0);
 		INIT_LIST_HEAD(&page->lru);
 		__free_pages_bootmem(page, 0);
-
-		zone = page_zone(page);
-		zone_span_writelock(zone);
-		zone->present_pages++;
-		zone_span_writeunlock(zone);
-		totalram_pages++;
 	}
 
 }
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 714d5d650470..bd82f6b31411 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -116,8 +116,6 @@ static unsigned long __init __free_memory_core(phys_addr_t start,
 		return 0;
 
 	__free_pages_memory(start_pfn, end_pfn);
-	fixup_zone_present_pages(pfn_to_nid(start >> PAGE_SHIFT),
-			start_pfn, end_pfn);
 
 	return end_pfn - start_pfn;
 }
@@ -128,7 +126,6 @@ unsigned long __init free_low_memory_core_early(int nodeid)
 	phys_addr_t start, end, size;
 	u64 i;
 
-	reset_zone_present_pages();
 	for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL)
 		count += __free_memory_core(start, end);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c91598b1b4c0..7bb35ac0964a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6098,37 +6098,3 @@ void dump_page(struct page *page)
 	dump_page_flags(page->flags);
 	mem_cgroup_print_bad_page(page);
 }
-
-/* reset zone->present_pages */
-void reset_zone_present_pages(void)
-{
-	struct zone *z;
-	int i, nid;
-
-	for_each_node_state(nid, N_HIGH_MEMORY) {
-		for (i = 0; i < MAX_NR_ZONES; i++) {
-			z = NODE_DATA(nid)->node_zones + i;
-			z->present_pages = 0;
-		}
-	}
-}
-
-/* calculate zone's present pages in buddy system */
-void fixup_zone_present_pages(int nid, unsigned long start_pfn,
-				unsigned long end_pfn)
-{
-	struct zone *z;
-	unsigned long zone_start_pfn, zone_end_pfn;
-	int i;
-
-	for (i = 0; i < MAX_NR_ZONES; i++) {
-		z = NODE_DATA(nid)->node_zones + i;
-		zone_start_pfn = z->zone_start_pfn;
-		zone_end_pfn = zone_start_pfn + z->spanned_pages;
-
-		/* if the two regions intersect */
-		if (!(zone_start_pfn >= end_pfn	|| zone_end_pfn <= start_pfn))
-			z->present_pages += min(end_pfn, zone_end_pfn) -
-					    max(start_pfn, zone_start_pfn);
-	}
-}
-- 
cgit v1.2.3


From d2709c7ce4c513ab7f4ca9a106a930621811f2d3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 19 Nov 2012 22:21:03 +0000
Subject: perf: Make perf build for x86 with UAPI disintegration applied

Make perf build for x86 once the UAPI disintegration patches for that arch
have been applied by adding the appropriate -I flags - in the right order -
and then converting some #includes that use ../.. notation to find main kernel
headerfiles to use <asm/foo.h> and <linux/foo.h> instead.

Note that -Iarch/foo/include/uapi is present _before_ -Iarch/foo/include.
This makes sure we get the userspace version of the pt_regs struct.  Ideally,
we wouldn't have the latter -I flag at all, but unfortunately we want
asm/svm.h and asm/vmx.h in builtin-kvm.c and these aren't part of the UAPI -
at least not for x86.  I wonder if the bits outside of the __KERNEL__ guards
*should* be transferred there.

I note also that perf seems to do its dependency handling manually by listing
all the header files it might want to use in LIB_H in the Makefile.  Can this
be changed to use -MD?

Note that to do make this work, we need to export and UAPI disintegrate
linux/hw_breakpoint.h, which I think should've been exported previously so that
perf can access the bits.  We have to do this in the same patch to maintain
bisectability.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/hw_breakpoint.h           | 31 +------------------------------
 include/uapi/linux/Kbuild               |  1 +
 include/uapi/linux/hw_breakpoint.h      | 30 ++++++++++++++++++++++++++++++
 tools/perf/Makefile                     | 29 ++++++++++++++++++++++++++++-
 tools/perf/arch/x86/include/perf_regs.h |  2 +-
 tools/perf/builtin-kvm.c                |  6 +++---
 tools/perf/builtin-test.c               |  2 +-
 tools/perf/perf.h                       | 16 +++-------------
 tools/perf/util/evsel.c                 |  4 ++--
 tools/perf/util/evsel.h                 |  3 ++-
 tools/perf/util/header.h                |  2 +-
 tools/perf/util/parse-events-test.c     |  2 +-
 tools/perf/util/parse-events.c          |  2 +-
 tools/perf/util/parse-events.h          |  2 +-
 tools/perf/util/pmu.h                   |  2 +-
 tools/perf/util/session.h               |  2 +-
 16 files changed, 78 insertions(+), 58 deletions(-)
 create mode 100644 include/uapi/linux/hw_breakpoint.h

(limited to 'include')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 6ae9c631a1be..0464c85e63fd 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -1,35 +1,8 @@
 #ifndef _LINUX_HW_BREAKPOINT_H
 #define _LINUX_HW_BREAKPOINT_H
 
-enum {
-	HW_BREAKPOINT_LEN_1 = 1,
-	HW_BREAKPOINT_LEN_2 = 2,
-	HW_BREAKPOINT_LEN_4 = 4,
-	HW_BREAKPOINT_LEN_8 = 8,
-};
-
-enum {
-	HW_BREAKPOINT_EMPTY	= 0,
-	HW_BREAKPOINT_R		= 1,
-	HW_BREAKPOINT_W		= 2,
-	HW_BREAKPOINT_RW	= HW_BREAKPOINT_R | HW_BREAKPOINT_W,
-	HW_BREAKPOINT_X		= 4,
-	HW_BREAKPOINT_INVALID   = HW_BREAKPOINT_RW | HW_BREAKPOINT_X,
-};
-
-enum bp_type_idx {
-	TYPE_INST 	= 0,
-#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS
-	TYPE_DATA	= 0,
-#else
-	TYPE_DATA	= 1,
-#endif
-	TYPE_MAX
-};
-
-#ifdef __KERNEL__
-
 #include <linux/perf_event.h>
+#include <uapi/linux/hw_breakpoint.h>
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
@@ -151,6 +124,4 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
 }
 
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#endif /* __KERNEL__ */
-
 #endif /* _LINUX_HW_BREAKPOINT_H */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index e194387ef784..19e765fbfef7 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -415,3 +415,4 @@ header-y += wireless.h
 header-y += x25.h
 header-y += xattr.h
 header-y += xfrm.h
+header-y += hw_breakpoint.h
diff --git a/include/uapi/linux/hw_breakpoint.h b/include/uapi/linux/hw_breakpoint.h
new file mode 100644
index 000000000000..b04000a2296a
--- /dev/null
+++ b/include/uapi/linux/hw_breakpoint.h
@@ -0,0 +1,30 @@
+#ifndef _UAPI_LINUX_HW_BREAKPOINT_H
+#define _UAPI_LINUX_HW_BREAKPOINT_H
+
+enum {
+	HW_BREAKPOINT_LEN_1 = 1,
+	HW_BREAKPOINT_LEN_2 = 2,
+	HW_BREAKPOINT_LEN_4 = 4,
+	HW_BREAKPOINT_LEN_8 = 8,
+};
+
+enum {
+	HW_BREAKPOINT_EMPTY	= 0,
+	HW_BREAKPOINT_R		= 1,
+	HW_BREAKPOINT_W		= 2,
+	HW_BREAKPOINT_RW	= HW_BREAKPOINT_R | HW_BREAKPOINT_W,
+	HW_BREAKPOINT_X		= 4,
+	HW_BREAKPOINT_INVALID   = HW_BREAKPOINT_RW | HW_BREAKPOINT_X,
+};
+
+enum bp_type_idx {
+	TYPE_INST 	= 0,
+#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS
+	TYPE_DATA	= 0,
+#else
+	TYPE_DATA	= 1,
+#endif
+	TYPE_MAX
+};
+
+#endif /* _UAPI_LINUX_HW_BREAKPOINT_H */
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 00deed4d6159..0a619af5be43 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -169,7 +169,34 @@ endif
 
 ### --- END CONFIGURATION SECTION ---
 
-BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+ifneq ($(objtree),)
+#$(info Determined 'objtree' to be $(objtree))
+endif
+
+ifneq ($(OUTPUT),)
+#$(info Determined 'OUTPUT' to be $(OUTPUT))
+endif
+
+BASIC_CFLAGS = \
+	-Iutil/include \
+	-Iarch/$(ARCH)/include \
+	$(if $(objtree),-I$(objtree)/arch/$(ARCH)/include/generated/uapi) \
+	-I$(srctree)/arch/$(ARCH)/include/uapi \
+	-I$(srctree)/arch/$(ARCH)/include \
+	$(if $(objtree),-I$(objtree)/include/generated/uapi) \
+	-I$(srctree)/include/uapi \
+	-I$(srctree)/include \
+	-I$(OUTPUT)util \
+	-Iutil \
+	-I. \
+	-I$(TRACE_EVENT_DIR) \
+	-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
 BASIC_LDFLAGS =
 
 # Guard against environment variables
diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h
index 46fc9f15c6b3..7fcdcdbee917 100644
--- a/tools/perf/arch/x86/include/perf_regs.h
+++ b/tools/perf/arch/x86/include/perf_regs.h
@@ -3,7 +3,7 @@
 
 #include <stdlib.h>
 #include "../../util/types.h"
-#include "../../../../../arch/x86/include/asm/perf_regs.h"
+#include <asm/perf_regs.h>
 
 #ifndef ARCH_X86_64
 #define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1)
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 260abc535b5b..e013bdb5e24a 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -22,9 +22,9 @@
 #include <pthread.h>
 #include <math.h>
 
-#include "../../arch/x86/include/asm/svm.h"
-#include "../../arch/x86/include/asm/vmx.h"
-#include "../../arch/x86/include/asm/kvm.h"
+#include <asm/svm.h>
+#include <asm/vmx.h>
+#include <asm/kvm.h>
 
 struct event_key {
 	#define INVALID_KEY     (~0ULL)
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 484f26cc0c00..5acd6e8e658b 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -15,7 +15,7 @@
 #include "util/thread_map.h"
 #include "util/pmu.h"
 #include "event-parse.h"
-#include "../../include/linux/hw_breakpoint.h"
+#include <linux/hw_breakpoint.h>
 
 #include <sys/mman.h>
 
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index e2ba8f004d32..238f923f2218 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -5,8 +5,9 @@ struct winsize;
 
 void get_term_dimensions(struct winsize *ws);
 
+#include <asm/unistd.h>
+
 #if defined(__i386__)
-#include "../../arch/x86/include/asm/unistd.h"
 #define rmb()		asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
 #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #define CPUINFO_PROC	"model name"
@@ -16,7 +17,6 @@ void get_term_dimensions(struct winsize *ws);
 #endif
 
 #if defined(__x86_64__)
-#include "../../arch/x86/include/asm/unistd.h"
 #define rmb()		asm volatile("lfence" ::: "memory")
 #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #define CPUINFO_PROC	"model name"
@@ -26,20 +26,17 @@ void get_term_dimensions(struct winsize *ws);
 #endif
 
 #ifdef __powerpc__
-#include "../../arch/powerpc/include/uapi/asm/unistd.h"
 #define rmb()		asm volatile ("sync" ::: "memory")
 #define cpu_relax()	asm volatile ("" ::: "memory");
 #define CPUINFO_PROC	"cpu"
 #endif
 
 #ifdef __s390__
-#include "../../arch/s390/include/asm/unistd.h"
 #define rmb()		asm volatile("bcr 15,0" ::: "memory")
 #define cpu_relax()	asm volatile("" ::: "memory");
 #endif
 
 #ifdef __sh__
-#include "../../arch/sh/include/asm/unistd.h"
 #if defined(__SH4A__) || defined(__SH5__)
 # define rmb()		asm volatile("synco" ::: "memory")
 #else
@@ -50,35 +47,30 @@ void get_term_dimensions(struct winsize *ws);
 #endif
 
 #ifdef __hppa__
-#include "../../arch/parisc/include/asm/unistd.h"
 #define rmb()		asm volatile("" ::: "memory")
 #define cpu_relax()	asm volatile("" ::: "memory");
 #define CPUINFO_PROC	"cpu"
 #endif
 
 #ifdef __sparc__
-#include "../../arch/sparc/include/uapi/asm/unistd.h"
 #define rmb()		asm volatile("":::"memory")
 #define cpu_relax()	asm volatile("":::"memory")
 #define CPUINFO_PROC	"cpu"
 #endif
 
 #ifdef __alpha__
-#include "../../arch/alpha/include/asm/unistd.h"
 #define rmb()		asm volatile("mb" ::: "memory")
 #define cpu_relax()	asm volatile("" ::: "memory")
 #define CPUINFO_PROC	"cpu model"
 #endif
 
 #ifdef __ia64__
-#include "../../arch/ia64/include/asm/unistd.h"
 #define rmb()		asm volatile ("mf" ::: "memory")
 #define cpu_relax()	asm volatile ("hint @pause" ::: "memory")
 #define CPUINFO_PROC	"model name"
 #endif
 
 #ifdef __arm__
-#include "../../arch/arm/include/asm/unistd.h"
 /*
  * Use the __kuser_memory_barrier helper in the CPU helper page. See
  * arch/arm/kernel/entry-armv.S in the kernel source for details.
@@ -89,13 +81,11 @@ void get_term_dimensions(struct winsize *ws);
 #endif
 
 #ifdef __aarch64__
-#include "../../arch/arm64/include/asm/unistd.h"
 #define rmb()		asm volatile("dmb ld" ::: "memory")
 #define cpu_relax()	asm volatile("yield" ::: "memory")
 #endif
 
 #ifdef __mips__
-#include "../../arch/mips/include/asm/unistd.h"
 #define rmb()		asm volatile(					\
 				".set	mips2\n\t"			\
 				"sync\n\t"				\
@@ -112,7 +102,7 @@ void get_term_dimensions(struct winsize *ws);
 #include <sys/types.h>
 #include <sys/syscall.h>
 
-#include "../../include/uapi/linux/perf_event.h"
+#include <linux/perf_event.h>
 #include "util/types.h"
 #include <stdbool.h>
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 618d41140abd..d144d464ce39 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -18,8 +18,8 @@
 #include "cpumap.h"
 #include "thread_map.h"
 #include "target.h"
-#include "../../../include/linux/hw_breakpoint.h"
-#include "../../../include/uapi/linux/perf_event.h"
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
 #include "perf_regs.h"
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 6f94d6dea00f..d99b476ef37c 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -3,7 +3,8 @@
 
 #include <linux/list.h>
 #include <stdbool.h>
-#include "../../../include/uapi/linux/perf_event.h"
+#include <stddef.h>
+#include <linux/perf_event.h>
 #include "types.h"
 #include "xyarray.h"
 #include "cgroup.h"
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 879d215cdac9..9bc00783f24f 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -1,7 +1,7 @@
 #ifndef __PERF_HEADER_H
 #define __PERF_HEADER_H
 
-#include "../../../include/uapi/linux/perf_event.h"
+#include <linux/perf_event.h>
 #include <sys/types.h>
 #include <stdbool.h>
 #include "types.h"
diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c
index 516ecd9ddd6e..6ef213b35ecd 100644
--- a/tools/perf/util/parse-events-test.c
+++ b/tools/perf/util/parse-events-test.c
@@ -3,7 +3,7 @@
 #include "evsel.h"
 #include "evlist.h"
 #include "sysfs.h"
-#include "../../../include/linux/hw_breakpoint.h"
+#include <linux/hw_breakpoint.h>
 
 #define TEST_ASSERT_VAL(text, cond) \
 do { \
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 75c7b0fca6d9..6b6d03e93c3d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1,4 +1,4 @@
-#include "../../../include/linux/hw_breakpoint.h"
+#include <linux/hw_breakpoint.h>
 #include "util.h"
 #include "../perf.h"
 #include "evlist.h"
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 839230ceb18b..2820c407adb2 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -7,7 +7,7 @@
 #include <linux/list.h>
 #include <stdbool.h>
 #include "types.h"
-#include "../../../include/uapi/linux/perf_event.h"
+#include <linux/perf_event.h>
 #include "types.h"
 
 struct list_head;
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 39f3abac7744..fdeb8ac7c5d2 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -2,7 +2,7 @@
 #define __PMU_H
 
 #include <linux/bitops.h>
-#include "../../../include/uapi/linux/perf_event.h"
+#include <linux/perf_event.h>
 
 enum {
 	PERF_PMU_FORMAT_VALUE_CONFIG,
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index dd6426163ba6..0eae00ad5fe7 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -7,7 +7,7 @@
 #include "symbol.h"
 #include "thread.h"
 #include <linux/rbtree.h>
-#include "../../../include/uapi/linux/perf_event.h"
+#include <linux/perf_event.h>
 
 struct sample_queue;
 struct ip_callchain;
-- 
cgit v1.2.3


From 6b0d5d344a78d43957a9f49c549b6f3aa2dc2082 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Tue, 16 Oct 2012 06:40:45 -0300
Subject: [media] adv7604: Replace prim_mode by mode

Changes the way the primary mode is handled:
- Remove it from platform_data since it doesn't belong there.
- Add a new mode enum for use with s_routing.
- Collapse the two HDMI modes into one HDMI mode: when setting up the
  timings manually we do not need to select HDMI_COMP mode. That's only
  needed when selecting a preset.
This patch prepares for the next step where we switch to using the presets
where available.

Signed-off-by: Mats Randgaard <mats.randgaard@cisco.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/i2c/adv7604.c | 78 +++++++++++++++++----------------------------
 include/media/adv7604.h     | 21 ++++++------
 2 files changed, 39 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c
index 75a8395b8992..74a18c0fc10d 100644
--- a/drivers/media/i2c/adv7604.c
+++ b/drivers/media/i2c/adv7604.c
@@ -53,8 +53,7 @@ MODULE_LICENSE("GPL");
 /* ADV7604 system clock frequency */
 #define ADV7604_fsc (28636360)
 
-#define DIGITAL_INPUT ((state->prim_mode == ADV7604_PRIM_MODE_HDMI_COMP) || \
-			(state->prim_mode == ADV7604_PRIM_MODE_HDMI_GR))
+#define DIGITAL_INPUT (state->mode == ADV7604_MODE_HDMI)
 
 /*
  **********************************************************************
@@ -68,7 +67,7 @@ struct adv7604_state {
 	struct v4l2_subdev sd;
 	struct media_pad pad;
 	struct v4l2_ctrl_handler hdl;
-	enum adv7604_prim_mode prim_mode;
+	enum adv7604_mode mode;
 	struct v4l2_dv_timings timings;
 	u8 edid[256];
 	unsigned edid_blocks;
@@ -738,12 +737,7 @@ static void set_rgb_quantization_range(struct v4l2_subdev *sd)
 	switch (state->rgb_quantization_range) {
 	case V4L2_DV_RGB_RANGE_AUTO:
 		/* automatic */
-		if ((hdmi_read(sd, 0x05) & 0x80) ||
-				(state->prim_mode == ADV7604_PRIM_MODE_COMP) ||
-				(state->prim_mode == ADV7604_PRIM_MODE_RGB)) {
-			/* receiving HDMI or analog signal */
-			io_write_and_or(sd, 0x02, 0x0f, 0xf0);
-		} else {
+		if (DIGITAL_INPUT && !(hdmi_read(sd, 0x05) & 0x80)) {
 			/* receiving DVI-D signal */
 
 			/* ADV7604 selects RGB limited range regardless of
@@ -756,6 +750,9 @@ static void set_rgb_quantization_range(struct v4l2_subdev *sd)
 				/* RGB full range (0-255) */
 				io_write_and_or(sd, 0x02, 0x0f, 0x10);
 			}
+		} else {
+			/* receiving HDMI or analog signal, set automode */
+			io_write_and_or(sd, 0x02, 0x0f, 0xf0);
 		}
 		break;
 	case V4L2_DV_RGB_RANGE_LIMITED:
@@ -1203,24 +1200,25 @@ static int adv7604_g_dv_timings(struct v4l2_subdev *sd,
 	return 0;
 }
 
-static void enable_input(struct v4l2_subdev *sd, enum adv7604_prim_mode prim_mode)
+static void enable_input(struct v4l2_subdev *sd)
 {
-	switch (prim_mode) {
-	case ADV7604_PRIM_MODE_COMP:
-	case ADV7604_PRIM_MODE_RGB:
+	struct adv7604_state *state = to_state(sd);
+
+	switch (state->mode) {
+	case ADV7604_MODE_COMP:
+	case ADV7604_MODE_GR:
 		/* enable */
 		io_write(sd, 0x15, 0xb0);   /* Disable Tristate of Pins (no audio) */
 		break;
-	case ADV7604_PRIM_MODE_HDMI_COMP:
-	case ADV7604_PRIM_MODE_HDMI_GR:
+	case ADV7604_MODE_HDMI:
 		/* enable */
 		hdmi_write(sd, 0x1a, 0x0a); /* Unmute audio */
 		hdmi_write(sd, 0x01, 0x00); /* Enable HDMI clock terminators */
 		io_write(sd, 0x15, 0xa0);   /* Disable Tristate of Pins */
 		break;
 	default:
-		v4l2_err(sd, "%s: reserved primary mode 0x%0x\n",
-				__func__, prim_mode);
+		v4l2_dbg(2, debug, sd, "%s: Unknown mode %d\n",
+				__func__, state->mode);
 		break;
 	}
 }
@@ -1233,11 +1231,13 @@ static void disable_input(struct v4l2_subdev *sd)
 	hdmi_write(sd, 0x01, 0x78); /* Disable HDMI clock terminators */
 }
 
-static void select_input(struct v4l2_subdev *sd, enum adv7604_prim_mode prim_mode)
+static void select_input(struct v4l2_subdev *sd)
 {
-	switch (prim_mode) {
-	case ADV7604_PRIM_MODE_COMP:
-	case ADV7604_PRIM_MODE_RGB:
+	struct adv7604_state *state = to_state(sd);
+
+	switch (state->mode) {
+	case ADV7604_MODE_COMP:
+	case ADV7604_MODE_GR:
 		/* set mode and select free run resolution */
 		io_write(sd, 0x00, 0x07); /* video std */
 		io_write(sd, 0x01, 0x02); /* prim mode */
@@ -1271,13 +1271,10 @@ static void select_input(struct v4l2_subdev *sd, enum adv7604_prim_mode prim_mod
 		cp_write(sd, 0x40, 0x5c); /* CP core pre-gain control. Graphics mode */
 		break;
 
-	case ADV7604_PRIM_MODE_HDMI_COMP:
-	case ADV7604_PRIM_MODE_HDMI_GR:
+	case ADV7604_MODE_HDMI:
 		/* set mode and select free run resolution */
-		/* video std */
-		io_write(sd, 0x00,
-			(prim_mode == ADV7604_PRIM_MODE_HDMI_GR) ? 0x02 : 0x1e);
-		io_write(sd, 0x01, prim_mode); /* prim mode */
+		io_write(sd, 0x00, 0x02); /* video std */
+		io_write(sd, 0x01, 0x06); /* prim mode */
 		/* disable embedded syncs for auto graphics mode */
 		cp_write_and_or(sd, 0x81, 0xef, 0x00);
 
@@ -1309,7 +1306,8 @@ static void select_input(struct v4l2_subdev *sd, enum adv7604_prim_mode prim_mod
 
 		break;
 	default:
-		v4l2_err(sd, "%s: reserved primary mode 0x%0x\n", __func__, prim_mode);
+		v4l2_dbg(2, debug, sd, "%s: Unknown mode %d\n",
+				__func__, state->mode);
 		break;
 	}
 }
@@ -1321,26 +1319,13 @@ static int adv7604_s_routing(struct v4l2_subdev *sd,
 
 	v4l2_dbg(2, debug, sd, "%s: input %d", __func__, input);
 
-	switch (input) {
-	case 0:
-		/* TODO select HDMI_COMP or HDMI_GR */
-		state->prim_mode = ADV7604_PRIM_MODE_HDMI_COMP;
-		break;
-	case 1:
-		state->prim_mode = ADV7604_PRIM_MODE_RGB;
-		break;
-	case 2:
-		state->prim_mode = ADV7604_PRIM_MODE_COMP;
-		break;
-	default:
-		return -EINVAL;
-	}
+	state->mode = input;
 
 	disable_input(sd);
 
-	select_input(sd, state->prim_mode);
+	select_input(sd);
 
-	enable_input(sd, state->prim_mode);
+	enable_input(sd);
 
 	return 0;
 }
@@ -1724,11 +1709,6 @@ static int adv7604_core_init(struct v4l2_subdev *sd)
 	afe_write(sd, 0x02, pdata->ain_sel); /* Select analog input muxing mode */
 	io_write_and_or(sd, 0x30, ~(1 << 4), pdata->output_bus_lsb_to_msb << 4);
 
-	state->prim_mode = pdata->prim_mode;
-	select_input(sd, pdata->prim_mode);
-
-	enable_input(sd, pdata->prim_mode);
-
 	/* interrupts */
 	io_write(sd, 0x40, 0xc2); /* Configure INT1 */
 	io_write(sd, 0x41, 0xd7); /* STDI irq for any change, disable INT2 */
diff --git a/include/media/adv7604.h b/include/media/adv7604.h
index 171b957db743..dc004bc926c9 100644
--- a/include/media/adv7604.h
+++ b/include/media/adv7604.h
@@ -40,14 +40,6 @@ enum adv7604_op_ch_sel {
 	ADV7604_OP_CH_SEL_RBG = 5,
 };
 
-/* Primary mode (IO register 0x01, [3:0]) */
-enum adv7604_prim_mode {
-	ADV7604_PRIM_MODE_COMP = 1,
-	ADV7604_PRIM_MODE_RGB = 2,
-	ADV7604_PRIM_MODE_HDMI_COMP = 5,
-	ADV7604_PRIM_MODE_HDMI_GR = 6,
-};
-
 /* Input Color Space (IO register 0x02, [7:4]) */
 enum adv7604_inp_color_space {
 	ADV7604_INP_COLOR_SPACE_LIM_RGB = 0,
@@ -103,9 +95,6 @@ struct adv7604_platform_data {
 	/* Bus rotation and reordering */
 	enum adv7604_op_ch_sel op_ch_sel;
 
-	/* Primary mode */
-	enum adv7604_prim_mode prim_mode;
-
 	/* Select output format */
 	enum adv7604_op_format_sel op_format_sel;
 
@@ -142,6 +131,16 @@ struct adv7604_platform_data {
 	u8 i2c_vdp;
 };
 
+/*
+ * Mode of operation.
+ * This is used as the input argument of the s_routing video op.
+ */
+enum adv7604_mode {
+	ADV7604_MODE_COMP,
+	ADV7604_MODE_GR,
+	ADV7604_MODE_HDMI,
+};
+
 #define V4L2_CID_ADV_RX_ANALOG_SAMPLING_PHASE	(V4L2_CID_DV_CLASS_BASE + 0x1000)
 #define V4L2_CID_ADV_RX_FREE_RUN_COLOR_MANUAL	(V4L2_CID_DV_CLASS_BASE + 0x1001)
 #define V4L2_CID_ADV_RX_FREE_RUN_COLOR		(V4L2_CID_DV_CLASS_BASE + 0x1002)
-- 
cgit v1.2.3


From c4f4925439f13a243aecfb36c693613603c0bfbd Mon Sep 17 00:00:00 2001
From: Igor Grinberg <grinberg@compulab.co.il>
Date: Tue, 20 Nov 2012 23:00:10 -0800
Subject: Input: ads7846 - enable pendown GPIO debounce time setting

Some platforms need the pendown GPIO debounce time setting programmed.
Since the pendown GPIO is handled by the driver, the debounce time
should also be handled along with the pendown GPIO request.

Signed-off-by: Igor Grinberg <grinberg@compulab.co.il>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/ads7846.c | 6 +++++-
 include/linux/spi/ads7846.h         | 5 +++--
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index f02028ec3db6..78e5d9ab0ba7 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -955,7 +955,8 @@ static int ads7846_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(ads7846_pm, ads7846_suspend, ads7846_resume);
 
-static int __devinit ads7846_setup_pendown(struct spi_device *spi, struct ads7846 *ts)
+static int __devinit ads7846_setup_pendown(struct spi_device *spi,
+					   struct ads7846 *ts)
 {
 	struct ads7846_platform_data *pdata = spi->dev.platform_data;
 	int err;
@@ -981,6 +982,9 @@ static int __devinit ads7846_setup_pendown(struct spi_device *spi, struct ads784
 
 		ts->gpio_pendown = pdata->gpio_pendown;
 
+		if (pdata->gpio_pendown_debounce)
+			gpio_set_debounce(pdata->gpio_pendown,
+					  pdata->gpio_pendown_debounce);
 	} else {
 		dev_err(&spi->dev, "no get_pendown_state nor gpio_pendown?\n");
 		return -EINVAL;
diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h
index c64de9dd7631..2f694f3846a9 100644
--- a/include/linux/spi/ads7846.h
+++ b/include/linux/spi/ads7846.h
@@ -46,8 +46,9 @@ struct ads7846_platform_data {
 	u16	debounce_rep;		/* additional consecutive good readings
 					 * required after the first two */
 	int	gpio_pendown;		/* the GPIO used to decide the pendown
-					 * state if get_pendown_state == NULL
-					 */
+					 * state if get_pendown_state == NULL */
+	int	gpio_pendown_debounce;	/* platform specific debounce time for
+					 * the gpio_pendown */
 	int	(*get_pendown_state)(void);
 	int	(*filter_init)	(const struct ads7846_platform_data *pdata,
 				 void **filter_data);
-- 
cgit v1.2.3


From 0181bd5dea2ed0696f84591a92da0b6a1f1a2e62 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 21 Nov 2012 18:37:38 -0500
Subject: drm/radeon: add new SI pci id

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 include/drm/drm_pciids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index af1cbaf535ed..c5c35e629426 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -210,6 +210,7 @@
 	{0x1002, 0x6798, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6799, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x679A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x679B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x679E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x679F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
-- 
cgit v1.2.3


From 0e622d39197f0b64b9e043fe75ac3634bf9f3a05 Mon Sep 17 00:00:00 2001
From: Andreas Larsson <andreas@gaisler.com>
Date: Fri, 23 Nov 2012 12:24:09 +0100
Subject: of/address: sparc: Declare of_iomap as an extern function for sparc
 again

This bug-fix makes sure that of_iomap is defined extern for sparc so that the
sparc-specific implementation of_iomap is once again used when including
include/linux/of_address.h in a sparc context. OF_GPIO that is now available for
sparc relies on this.

The bug was inadvertently introduced in a850a75, "of/address: add empty static
inlines for !CONFIG_OF", that added a static dummy inline for of_iomap when
!CONFIG_OF_ADDRESS. However, CONFIG_OF_ADDRESS is never defined for sparc, but
there is a sparc-specific implementation /arch/sparc/kernel/of_device_common.c.

This fix takes the same approach as 0bce04b that solved the equivalent problem
for of_address_to_resource.

Signed-off-by: Andreas Larsson <andreas@gaisler.com>
Acked-by: David Miller <davem@davemloft.net>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/sparc/include/asm/prom.h | 5 ++++-
 include/linux/of_address.h    | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index f93003123bce..67c62578d170 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -63,10 +63,13 @@ extern char *of_console_options;
 extern void irq_trans_init(struct device_node *dp);
 extern char *build_path_component(struct device_node *dp);
 
-/* SPARC has a local implementation */
+/* SPARC has local implementations */
 extern int of_address_to_resource(struct device_node *dev, int index,
 				  struct resource *r);
 #define of_address_to_resource of_address_to_resource
 
+void __iomem *of_iomap(struct device_node *node, int index);
+#define of_iomap of_iomap
+
 #endif /* __KERNEL__ */
 #endif /* _SPARC_PROM_H */
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index e20e3af68fb6..0506eb53519b 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -42,10 +42,12 @@ static inline struct device_node *of_find_matching_node_by_address(
 {
 	return NULL;
 }
+#ifndef of_iomap
 static inline void __iomem *of_iomap(struct device_node *device, int index)
 {
 	return NULL;
 }
+#endif
 static inline const __be32 *of_get_address(struct device_node *dev, int index,
 					u64 *size, unsigned int *flags)
 {
-- 
cgit v1.2.3


From c5782e9f5a535af09d7834693a52afdbcc6e5f3f Mon Sep 17 00:00:00 2001
From: Tushar Behera <tushar.behera@linaro.org>
Date: Mon, 26 Nov 2012 16:29:38 -0800
Subject: include/linux/bug.h: fix sparse warning related to
 BUILD_BUG_ON_INVALID

Commit baf05aa9271b ("bug: introduce BUILD_BUG_ON_INVALID() macro")
introduces this macro only when _CHECKER_ is not defined.  Define a
silent macro in the else condition to fix following sparse warning:

  mm/filemap.c:395:9: error: undefined identifier 'BUILD_BUG_ON_INVALID'
  mm/filemap.c:396:9: error: undefined identifier 'BUILD_BUG_ON_INVALID'
  mm/filemap.c:397:9: error: undefined identifier 'BUILD_BUG_ON_INVALID'
  include/linux/mm.h:419:9: error: undefined identifier 'BUILD_BUG_ON_INVALID'
  include/linux/mm.h:419:9: error: not a function <noident>

Signed-off-by: Tushar Behera <tushar.behera@linaro.org>
Acked-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index aaac4bba6f5c..b1cf40de847e 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -15,6 +15,7 @@ struct pt_regs;
 #define BUILD_BUG_ON_NOT_POWER_OF_2(n)
 #define BUILD_BUG_ON_ZERO(e) (0)
 #define BUILD_BUG_ON_NULL(e) ((void*)0)
+#define BUILD_BUG_ON_INVALID(e) (0)
 #define BUILD_BUG_ON(condition)
 #define BUILD_BUG() (0)
 #else /* __CHECKER__ */
-- 
cgit v1.2.3


From 82b212f40059bffd6808c07266a942d444d5558a Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 26 Nov 2012 16:29:45 -0800
Subject: Revert "mm: remove __GFP_NO_KSWAPD"

With "mm: vmscan: scale number of pages reclaimed by reclaim/compaction
based on failures" reverted, Zdenek Kabelac reported the following

  Hmm,  so it's just took longer to hit the problem and observe
  kswapd0 spinning on my CPU again - it's not as endless like before -
  but still it easily eats minutes - it helps to	turn off  Firefox
  or TB  (memory hungry apps) so kswapd0 stops soon - and restart
  those apps again.  (And I still have like >1GB of cached memory)

  kswapd0         R  running task        0    30      2 0x00000000
  Call Trace:
    preempt_schedule+0x42/0x60
    _raw_spin_unlock+0x55/0x60
    put_super+0x31/0x40
    drop_super+0x22/0x30
    prune_super+0x149/0x1b0
    shrink_slab+0xba/0x510

The sysrq+m indicates the system has no swap so it'll never reclaim
anonymous pages as part of reclaim/compaction.  That is one part of the
problem but not the root cause as file-backed pages could also be
reclaimed.

The likely underlying problem is that kswapd is woken up or kept awake
for each THP allocation request in the page allocator slow path.

If compaction fails for the requesting process then compaction will be
deferred for a time and direct reclaim is avoided.  However, if there
are a storm of THP requests that are simply rejected, it will still be
the the case that kswapd is awake for a prolonged period of time as
pgdat->kswapd_max_order is updated each time.  This is noticed by the
main kswapd() loop and it will not call kswapd_try_to_sleep().  Instead
it will loopp, shrinking a small number of pages and calling
shrink_slab() on each iteration.

The temptation is to supply a patch that checks if kswapd was woken for
THP and if so ignore pgdat->kswapd_max_order but it'll be a hack and not
backed up by proper testing.  As 3.7 is very close to release and this
is not a bug we should release with, a safer path is to revert "mm:
remove __GFP_NO_KSWAPD" for now and revisit it with the view to ironing
out the balance_pgdat() logic in general.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Zdenek Kabelac <zkabelac@redhat.com>
Cc: Seth Jennings <sjenning@linux.vnet.ibm.com>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robert Jennings <rcj@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mtd/mtdcore.c           | 6 ++++--
 include/linux/gfp.h             | 5 ++++-
 include/trace/events/gfpflags.h | 1 +
 mm/page_alloc.c                 | 7 ++++---
 4 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 374c46dff7dd..ec794a72975d 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1077,7 +1077,8 @@ EXPORT_SYMBOL_GPL(mtd_writev);
  * until the request succeeds or until the allocation size falls below
  * the system page size. This attempts to make sure it does not adversely
  * impact system performance, so when allocating more than one page, we
- * ask the memory allocator to avoid re-trying.
+ * ask the memory allocator to avoid re-trying, swapping, writing back
+ * or performing I/O.
  *
  * Note, this function also makes sure that the allocated buffer is aligned to
  * the MTD device's min. I/O unit, i.e. the "mtd->writesize" value.
@@ -1091,7 +1092,8 @@ EXPORT_SYMBOL_GPL(mtd_writev);
  */
 void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size)
 {
-	gfp_t flags = __GFP_NOWARN | __GFP_WAIT | __GFP_NORETRY;
+	gfp_t flags = __GFP_NOWARN | __GFP_WAIT |
+		       __GFP_NORETRY | __GFP_NO_KSWAPD;
 	size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE);
 	void *kbuf;
 
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 02c1c9710be0..d0a79678f169 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -31,6 +31,7 @@ struct vm_area_struct;
 #define ___GFP_THISNODE		0x40000u
 #define ___GFP_RECLAIMABLE	0x80000u
 #define ___GFP_NOTRACK		0x200000u
+#define ___GFP_NO_KSWAPD	0x400000u
 #define ___GFP_OTHER_NODE	0x800000u
 #define ___GFP_WRITE		0x1000000u
 
@@ -85,6 +86,7 @@ struct vm_area_struct;
 #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
 #define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
 
+#define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
 #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
 
@@ -114,7 +116,8 @@ struct vm_area_struct;
 				 __GFP_MOVABLE)
 #define GFP_IOFS	(__GFP_IO | __GFP_FS)
 #define GFP_TRANSHUGE	(GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
-			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN)
+			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
+			 __GFP_NO_KSWAPD)
 
 #ifdef CONFIG_NUMA
 #define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index 9391706e9254..d6fd8e5b14b7 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -36,6 +36,7 @@
 	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\
 	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"},		\
 	{(unsigned long)__GFP_NOTRACK,		"GFP_NOTRACK"},		\
+	{(unsigned long)__GFP_NO_KSWAPD,	"GFP_NO_KSWAPD"},	\
 	{(unsigned long)__GFP_OTHER_NODE,	"GFP_OTHER_NODE"}	\
 	) : "GFP_NOWAIT"
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bcb72c6e2b2d..92871579cbee 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2416,8 +2416,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		goto nopage;
 
 restart:
-	wake_all_kswapd(order, zonelist, high_zoneidx,
-					zone_idx(preferred_zone));
+	if (!(gfp_mask & __GFP_NO_KSWAPD))
+		wake_all_kswapd(order, zonelist, high_zoneidx,
+						zone_idx(preferred_zone));
 
 	/*
 	 * OK, we're below the kswapd watermark and have kicked background
@@ -2494,7 +2495,7 @@ rebalance:
 	 * system then fail the allocation instead of entering direct reclaim.
 	 */
 	if ((deferred_compaction || contended_compaction) &&
-	    (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE)
+						(gfp_mask & __GFP_NO_KSWAPD))
 		goto nopage;
 
 	/* Try direct reclaim and then allocating */
-- 
cgit v1.2.3


From 4b05a1c74d1cfae37cf6ff293ee928350f031418 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 27 Nov 2012 22:59:52 -0500
Subject: percpu-rwsem: use synchronize_sched_expedited

Use synchronize_sched_expedited() instead of synchronize_sched()
to improve mount speed.

This patch improves mount time from 0.500s to 0.013s for Jeff's
test-case.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reported-and-tested-by: Jeff Chua <jeff.chua.linux@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu-rwsem.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 250a4acddb2b..bd1e86071e57 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -13,7 +13,7 @@ struct percpu_rw_semaphore {
 };
 
 #define light_mb()	barrier()
-#define heavy_mb()	synchronize_sched()
+#define heavy_mb()	synchronize_sched_expedited()
 
 static inline void percpu_down_read(struct percpu_rw_semaphore *p)
 {
@@ -51,7 +51,7 @@ static inline void percpu_down_write(struct percpu_rw_semaphore *p)
 {
 	mutex_lock(&p->mtx);
 	p->locked = true;
-	synchronize_sched(); /* make sure that all readers exit the rcu_read_lock_sched region */
+	synchronize_sched_expedited(); /* make sure that all readers exit the rcu_read_lock_sched region */
 	while (__percpu_count(p->counters))
 		msleep(1);
 	heavy_mb(); /* C, between read of p->counter and write to data, paired with B */
-- 
cgit v1.2.3


From 1e8b33328a5407b447ff80953655a47014a6dcb9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 29 Nov 2012 10:49:50 -0800
Subject: blockdev: remove bd_block_size_semaphore again

This reverts the block-device direct access code to the previous
unlocked code, now that fs/buffer.c no longer needs external locking.

With this, fs/block_dev.c is back to the original version, apart from a
whitespace cleanup that I didn't want to revert.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/raw.c |   2 +-
 fs/block_dev.c     | 105 ++---------------------------------------------------
 include/linux/fs.h |   4 --
 3 files changed, 5 insertions(+), 106 deletions(-)

(limited to 'include')

diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 0bb207eaef2f..54a3a6d09819 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -285,7 +285,7 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd,
 
 static const struct file_operations raw_fops = {
 	.read		= do_sync_read,
-	.aio_read	= blkdev_aio_read,
+	.aio_read	= generic_file_aio_read,
 	.write		= do_sync_write,
 	.aio_write	= blkdev_aio_write,
 	.fsync		= blkdev_fsync,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1a1e5e3b1eaf..47a949d8a07e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -116,8 +116,6 @@ EXPORT_SYMBOL(invalidate_bdev);
 
 int set_blocksize(struct block_device *bdev, int size)
 {
-	struct address_space *mapping;
-
 	/* Size must be a power of two, and between 512 and PAGE_SIZE */
 	if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
 		return -EINVAL;
@@ -126,19 +124,6 @@ int set_blocksize(struct block_device *bdev, int size)
 	if (size < bdev_logical_block_size(bdev))
 		return -EINVAL;
 
-	/* Prevent starting I/O or mapping the device */
-	percpu_down_write(&bdev->bd_block_size_semaphore);
-
-	/* Check that the block device is not memory mapped */
-	mapping = bdev->bd_inode->i_mapping;
-	mutex_lock(&mapping->i_mmap_mutex);
-	if (mapping_mapped(mapping)) {
-		mutex_unlock(&mapping->i_mmap_mutex);
-		percpu_up_write(&bdev->bd_block_size_semaphore);
-		return -EBUSY;
-	}
-	mutex_unlock(&mapping->i_mmap_mutex);
-
 	/* Don't change the size if it is same as current */
 	if (bdev->bd_block_size != size) {
 		sync_blockdev(bdev);
@@ -146,9 +131,6 @@ int set_blocksize(struct block_device *bdev, int size)
 		bdev->bd_inode->i_blkbits = blksize_bits(size);
 		kill_bdev(bdev);
 	}
-
-	percpu_up_write(&bdev->bd_block_size_semaphore);
-
 	return 0;
 }
 
@@ -459,12 +441,6 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
 	struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
-
-	if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) {
-		kmem_cache_free(bdev_cachep, ei);
-		return NULL;
-	}
-
 	return &ei->vfs_inode;
 }
 
@@ -473,8 +449,6 @@ static void bdev_i_callback(struct rcu_head *head)
 	struct inode *inode = container_of(head, struct inode, i_rcu);
 	struct bdev_inode *bdi = BDEV_I(inode);
 
-	percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore);
-
 	kmem_cache_free(bdev_cachep, bdi);
 }
 
@@ -1593,22 +1567,6 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	return blkdev_ioctl(bdev, mode, cmd, arg);
 }
 
-ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
-			unsigned long nr_segs, loff_t pos)
-{
-	ssize_t ret;
-	struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
-
-	percpu_down_read(&bdev->bd_block_size_semaphore);
-
-	ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
-
-	percpu_up_read(&bdev->bd_block_size_semaphore);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(blkdev_aio_read);
-
 /*
  * Write data to the block device.  Only intended for the block device itself
  * and the raw driver which basically is a fake block device.
@@ -1620,16 +1578,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
 			 unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
-	struct block_device *bdev = I_BDEV(file->f_mapping->host);
 	struct blk_plug plug;
 	ssize_t ret;
 
 	BUG_ON(iocb->ki_pos != pos);
 
 	blk_start_plug(&plug);
-
-	percpu_down_read(&bdev->bd_block_size_semaphore);
-
 	ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
 	if (ret > 0 || ret == -EIOCBQUEUED) {
 		ssize_t err;
@@ -1638,62 +1592,11 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
 		if (err < 0 && ret > 0)
 			ret = err;
 	}
-
-	percpu_up_read(&bdev->bd_block_size_semaphore);
-
 	blk_finish_plug(&plug);
-
 	return ret;
 }
 EXPORT_SYMBOL_GPL(blkdev_aio_write);
 
-static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	int ret;
-	struct block_device *bdev = I_BDEV(file->f_mapping->host);
-
-	percpu_down_read(&bdev->bd_block_size_semaphore);
-
-	ret = generic_file_mmap(file, vma);
-
-	percpu_up_read(&bdev->bd_block_size_semaphore);
-
-	return ret;
-}
-
-static ssize_t blkdev_splice_read(struct file *file, loff_t *ppos,
-				  struct pipe_inode_info *pipe, size_t len,
-				  unsigned int flags)
-{
-	ssize_t ret;
-	struct block_device *bdev = I_BDEV(file->f_mapping->host);
-
-	percpu_down_read(&bdev->bd_block_size_semaphore);
-
-	ret = generic_file_splice_read(file, ppos, pipe, len, flags);
-
-	percpu_up_read(&bdev->bd_block_size_semaphore);
-
-	return ret;
-}
-
-static ssize_t blkdev_splice_write(struct pipe_inode_info *pipe,
-				   struct file *file, loff_t *ppos, size_t len,
-				   unsigned int flags)
-{
-	ssize_t ret;
-	struct block_device *bdev = I_BDEV(file->f_mapping->host);
-
-	percpu_down_read(&bdev->bd_block_size_semaphore);
-
-	ret = generic_file_splice_write(pipe, file, ppos, len, flags);
-
-	percpu_up_read(&bdev->bd_block_size_semaphore);
-
-	return ret;
-}
-
-
 /*
  * Try to release a page associated with block device when the system
  * is under memory pressure.
@@ -1724,16 +1627,16 @@ const struct file_operations def_blk_fops = {
 	.llseek		= block_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
-  	.aio_read	= blkdev_aio_read,
+	.aio_read	= generic_file_aio_read,
 	.aio_write	= blkdev_aio_write,
-	.mmap		= blkdev_mmap,
+	.mmap		= generic_file_mmap,
 	.fsync		= blkdev_fsync,
 	.unlocked_ioctl	= block_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= compat_blkdev_ioctl,
 #endif
-	.splice_read	= blkdev_splice_read,
-	.splice_write	= blkdev_splice_write,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
 };
 
 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b33cfc97b9ca..44f288e9726d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -462,8 +462,6 @@ struct block_device {
 	int			bd_fsfreeze_count;
 	/* Mutex for freeze */
 	struct mutex		bd_fsfreeze_mutex;
-	/* A semaphore that prevents I/O while block size is being changed */
-	struct percpu_rw_semaphore	bd_block_size_semaphore;
 };
 
 /*
@@ -2379,8 +2377,6 @@ extern int generic_segment_checks(const struct iovec *iov,
 		unsigned long *nr_segs, size_t *count, int access_flags);
 
 /* fs/block_dev.c */
-extern ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
-			       unsigned long nr_segs, loff_t pos);
 extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos);
 extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
-- 
cgit v1.2.3


From bbec0270bdd887f96377065ee38b8848b5afa395 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 29 Nov 2012 12:31:52 -0800
Subject: blkdev_max_block: make private to fs/buffer.c

We really don't want to look at the block size for the raw block device
accesses in fs/block-dev.c, because it may be changing from under us.
So get rid of the max_block logic entirely, since the caller should
already have done it anyway.

That leaves the only user of this function in fs/buffer.c, so move the
whole function there and make it static.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/block_dev.c     | 55 +-----------------------------------------------------
 fs/buffer.c        | 14 +++++++++++++-
 include/linux/fs.h |  1 -
 3 files changed, 14 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 47a949d8a07e..a1e09b4fe1ba 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -70,19 +70,6 @@ static void bdev_inode_switch_bdi(struct inode *inode,
 	spin_unlock(&dst->wb.list_lock);
 }
 
-sector_t blkdev_max_block(struct block_device *bdev)
-{
-	sector_t retval = ~((sector_t)0);
-	loff_t sz = i_size_read(bdev->bd_inode);
-
-	if (sz) {
-		unsigned int size = block_size(bdev);
-		unsigned int sizebits = blksize_bits(size);
-		retval = (sz >> sizebits);
-	}
-	return retval;
-}
-
 /* Kill _all_ buffers and pagecache , dirty or not.. */
 void kill_bdev(struct block_device *bdev)
 {
@@ -163,52 +150,12 @@ static int
 blkdev_get_block(struct inode *inode, sector_t iblock,
 		struct buffer_head *bh, int create)
 {
-	if (iblock >= blkdev_max_block(I_BDEV(inode))) {
-		if (create)
-			return -EIO;
-
-		/*
-		 * for reads, we're just trying to fill a partial page.
-		 * return a hole, they will have to call get_block again
-		 * before they can fill it, and they will get -EIO at that
-		 * time
-		 */
-		return 0;
-	}
 	bh->b_bdev = I_BDEV(inode);
 	bh->b_blocknr = iblock;
 	set_buffer_mapped(bh);
 	return 0;
 }
 
-static int
-blkdev_get_blocks(struct inode *inode, sector_t iblock,
-		struct buffer_head *bh, int create)
-{
-	sector_t end_block = blkdev_max_block(I_BDEV(inode));
-	unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
-
-	if ((iblock + max_blocks) > end_block) {
-		max_blocks = end_block - iblock;
-		if ((long)max_blocks <= 0) {
-			if (create)
-				return -EIO;	/* write fully beyond EOF */
-			/*
-			 * It is a read which is fully beyond EOF.  We return
-			 * a !buffer_mapped buffer
-			 */
-			max_blocks = 0;
-		}
-	}
-
-	bh->b_bdev = I_BDEV(inode);
-	bh->b_blocknr = iblock;
-	bh->b_size = max_blocks << inode->i_blkbits;
-	if (max_blocks)
-		set_buffer_mapped(bh);
-	return 0;
-}
-
 static ssize_t
 blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 			loff_t offset, unsigned long nr_segs)
@@ -217,7 +164,7 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	struct inode *inode = file->f_mapping->host;
 
 	return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
-				    nr_segs, blkdev_get_blocks, NULL, NULL, 0);
+				    nr_segs, blkdev_get_block, NULL, NULL, 0);
 }
 
 int __sync_blockdev(struct block_device *bdev, int wait)
diff --git a/fs/buffer.c b/fs/buffer.c
index 28a74ff5324b..3586fb05c8ce 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -911,6 +911,18 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
 	attach_page_buffers(page, head);
 }
 
+static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
+{
+	sector_t retval = ~((sector_t)0);
+	loff_t sz = i_size_read(bdev->bd_inode);
+
+	if (sz) {
+		unsigned int sizebits = blksize_bits(size);
+		retval = (sz >> sizebits);
+	}
+	return retval;
+}
+
 /*
  * Initialise the state of a blockdev page's buffers.
  */ 
@@ -921,7 +933,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
 	struct buffer_head *head = page_buffers(page);
 	struct buffer_head *bh = head;
 	int uptodate = PageUptodate(page);
-	sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode));
+	sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
 
 	do {
 		if (!buffer_mapped(bh)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 44f288e9726d..75fe9a134803 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2047,7 +2047,6 @@ extern void unregister_blkdev(unsigned int, const char *);
 extern struct block_device *bdget(dev_t);
 extern struct block_device *bdgrab(struct block_device *bdev);
 extern void bd_set_size(struct block_device *, loff_t size);
-extern sector_t blkdev_max_block(struct block_device *bdev);
 extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
 extern void invalidate_bdev(struct block_device *);
-- 
cgit v1.2.3


From a50915394f1fc02c2861d3b7ce7014788aa5066e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 29 Nov 2012 13:54:27 -0800
Subject: revert "Revert "mm: remove __GFP_NO_KSWAPD""

It apepars that this patch was innocent, and we hope that "mm: avoid
waking kswapd for THP allocations when compaction is deferred or
contended" will fix the final kswapd-spinning cause.

Cc: Zdenek Kabelac <zkabelac@redhat.com>
Cc: Seth Jennings <sjenning@linux.vnet.ibm.com>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robert Jennings <rcj@linux.vnet.ibm.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mtd/mtdcore.c           |  6 ++----
 include/linux/gfp.h             | 13 +++++--------
 include/trace/events/gfpflags.h |  1 -
 mm/page_alloc.c                 |  7 +++----
 4 files changed, 10 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index ec794a72975d..374c46dff7dd 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1077,8 +1077,7 @@ EXPORT_SYMBOL_GPL(mtd_writev);
  * until the request succeeds or until the allocation size falls below
  * the system page size. This attempts to make sure it does not adversely
  * impact system performance, so when allocating more than one page, we
- * ask the memory allocator to avoid re-trying, swapping, writing back
- * or performing I/O.
+ * ask the memory allocator to avoid re-trying.
  *
  * Note, this function also makes sure that the allocated buffer is aligned to
  * the MTD device's min. I/O unit, i.e. the "mtd->writesize" value.
@@ -1092,8 +1091,7 @@ EXPORT_SYMBOL_GPL(mtd_writev);
  */
 void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size)
 {
-	gfp_t flags = __GFP_NOWARN | __GFP_WAIT |
-		       __GFP_NORETRY | __GFP_NO_KSWAPD;
+	gfp_t flags = __GFP_NOWARN | __GFP_WAIT | __GFP_NORETRY;
 	size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE);
 	void *kbuf;
 
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index d0a79678f169..76e1aa206f57 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -30,10 +30,9 @@ struct vm_area_struct;
 #define ___GFP_HARDWALL		0x20000u
 #define ___GFP_THISNODE		0x40000u
 #define ___GFP_RECLAIMABLE	0x80000u
-#define ___GFP_NOTRACK		0x200000u
-#define ___GFP_NO_KSWAPD	0x400000u
-#define ___GFP_OTHER_NODE	0x800000u
-#define ___GFP_WRITE		0x1000000u
+#define ___GFP_NOTRACK		0x100000u
+#define ___GFP_OTHER_NODE	0x200000u
+#define ___GFP_WRITE		0x400000u
 
 /*
  * GFP bitmasks..
@@ -86,7 +85,6 @@ struct vm_area_struct;
 #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
 #define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
 
-#define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
 #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
 
@@ -96,7 +94,7 @@ struct vm_area_struct;
  */
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
 
-#define __GFP_BITS_SHIFT 25	/* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 23	/* Room for N __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /* This equals 0, but use constants in case they ever change */
@@ -116,8 +114,7 @@ struct vm_area_struct;
 				 __GFP_MOVABLE)
 #define GFP_IOFS	(__GFP_IO | __GFP_FS)
 #define GFP_TRANSHUGE	(GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
-			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
-			 __GFP_NO_KSWAPD)
+			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN)
 
 #ifdef CONFIG_NUMA
 #define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index d6fd8e5b14b7..9391706e9254 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -36,7 +36,6 @@
 	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\
 	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"},		\
 	{(unsigned long)__GFP_NOTRACK,		"GFP_NOTRACK"},		\
-	{(unsigned long)__GFP_NO_KSWAPD,	"GFP_NO_KSWAPD"},	\
 	{(unsigned long)__GFP_OTHER_NODE,	"GFP_OTHER_NODE"}	\
 	) : "GFP_NOWAIT"
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7e208f0ad68c..8193809f3de0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2416,9 +2416,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		goto nopage;
 
 restart:
-	if (!(gfp_mask & __GFP_NO_KSWAPD))
-		wake_all_kswapd(order, zonelist, high_zoneidx,
-						zone_idx(preferred_zone));
+	wake_all_kswapd(order, zonelist, high_zoneidx,
+					zone_idx(preferred_zone));
 
 	/*
 	 * OK, we're below the kswapd watermark and have kicked background
@@ -2495,7 +2494,7 @@ rebalance:
 	 * system then fail the allocation instead of entering direct reclaim.
 	 */
 	if ((deferred_compaction || contended_compaction) &&
-						(gfp_mask & __GFP_NO_KSWAPD))
+	    (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE)
 		goto nopage;
 
 	/* Try direct reclaim and then allocating */
-- 
cgit v1.2.3