7 files changed, 495 insertions, 123 deletions
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index 365795447804..4a70be485ff8 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -1,17 +1,16 @@
 config ARCH_VEXPRESS
 	bool "ARM Ltd. Versatile Express family" if ARCH_MULTI_V7
 	select ARCH_REQUIRE_GPIOLIB
+	select ARCH_SUPPORTS_BIG_ENDIAN
 	select ARM_AMBA
 	select ARM_GIC
 	select ARM_TIMER_SP804
-	select CLKDEV_LOOKUP
 	select COMMON_CLK
 	select COMMON_CLK_VERSATILE
 	select CPU_V7
 	select GENERIC_CLOCKEVENTS
 	select HAVE_ARM_SCU if SMP
 	select HAVE_ARM_TWD if SMP
-	select HAVE_CLK
 	select HAVE_PATA_PLATFORM
 	select HAVE_SMP
 	select ICST
@@ -66,10 +65,22 @@ config ARCH_VEXPRESS_DCSCB
 	  This is needed to provide CPU and cluster power management
 	  on RTSM implementing big.LITTLE.
 
+config ARCH_VEXPRESS_SPC
+	bool "Versatile Express Serial Power Controller (SPC)"
+	select ARCH_HAS_CPUFREQ
+	select ARCH_HAS_OPP
+	select PM_OPP
+	help
+	  The TC2 (A15x2 A7x3) versatile express core tile integrates a logic
+	  block called Serial Power Controller (SPC) that provides the interface
+	  between the dual cluster test-chip and the M3 microcontroller that
+	  carries out power management.
+
 config ARCH_VEXPRESS_TC2_PM
 	bool "Versatile Express TC2 power management"
 	depends on MCPM
 	select ARM_CCI
+	select ARCH_VEXPRESS_SPC
 	help
 	  Support for CPU and cluster power management on Versatile Express
 	  with a TC2 (A15x2 A7x3) big.LITTLE core tile.
diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile
index 505e64ab3eae..0997e0b7494c 100644
--- a/arch/arm/mach-vexpress/Makefile
+++ b/arch/arm/mach-vexpress/Makefile
@@ -8,7 +8,8 @@ obj-y					:= v2m.o
 obj-$(CONFIG_ARCH_VEXPRESS_CA9X4)	+= ct-ca9x4.o
 obj-$(CONFIG_ARCH_VEXPRESS_DCSCB)	+= dcscb.o	dcscb_setup.o
 CFLAGS_dcscb.o				+= -march=armv7-a
-obj-$(CONFIG_ARCH_VEXPRESS_TC2_PM)	+= tc2_pm.o spc.o
+obj-$(CONFIG_ARCH_VEXPRESS_SPC)		+= spc.o
+obj-$(CONFIG_ARCH_VEXPRESS_TC2_PM)	+= tc2_pm.o
 CFLAGS_tc2_pm.o				+= -march=armv7-a
 obj-$(CONFIG_SMP)			+= platsmp.o
 obj-$(CONFIG_HOTPLUG_CPU)		+= hotplug.o
diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
index 3a6384c6c435..14d499688736 100644
--- a/arch/arm/mach-vexpress/dcscb.c
+++ b/arch/arm/mach-vexpress/dcscb.c
@@ -133,38 +133,8 @@ static void dcscb_power_down(void)
 	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
 		arch_spin_unlock(&dcscb_lock);
 
-		/*
-		 * Flush all cache levels for this cluster.
-		 *
-		 * To do so we do:
-		 * - Clear the SCTLR.C bit to prevent further cache allocations
-		 * - Flush the whole cache
-		 * - Clear the ACTLR "SMP" bit to disable local coherency
-		 *
-		 * Let's do it in the safest possible way i.e. with
-		 * no memory access within the following sequence
-		 * including to the stack.
-		 *
-		 * Note: fp is preserved to the stack explicitly prior doing
-		 * this since adding it to the clobber list is incompatible
-		 * with having CONFIG_FRAME_POINTER=y.
-		 */
-		asm volatile(
-		"str	fp, [sp, #-4]! \n\t"
-		"mrc	p15, 0, r0, c1, c0, 0	@ get CR \n\t"
-		"bic	r0, r0, #"__stringify(CR_C)" \n\t"
-		"mcr	p15, 0, r0, c1, c0, 0	@ set CR \n\t"
-		"isb	\n\t"
-		"bl	v7_flush_dcache_all \n\t"
-		"clrex	\n\t"
-		"mrc	p15, 0, r0, c1, c0, 1	@ get AUXCR \n\t"
-		"bic	r0, r0, #(1 << 6)	@ disable local coherency \n\t"
-		"mcr	p15, 0, r0, c1, c0, 1	@ set AUXCR \n\t"
-		"isb	\n\t"
-		"dsb	\n\t"
-		"ldr	fp, [sp], #4"
-		: : : "r0","r1","r2","r3","r4","r5","r6","r7",
-		      "r9","r10","lr","memory");
+		/* Flush all cache levels for this cluster. */
+		v7_exit_coherency_flush(all);
 
 		/*
 		 * This is a harmless no-op.  On platforms with a real
@@ -183,26 +153,8 @@ static void dcscb_power_down(void)
 	} else {
 		arch_spin_unlock(&dcscb_lock);
 
-		/*
-		 * Flush the local CPU cache.
-		 * Let's do it in the safest possible way as above.
-		 */
-		asm volatile(
-		"str	fp, [sp, #-4]! \n\t"
-		"mrc	p15, 0, r0, c1, c0, 0	@ get CR \n\t"
-		"bic	r0, r0, #"__stringify(CR_C)" \n\t"
-		"mcr	p15, 0, r0, c1, c0, 0	@ set CR \n\t"
-		"isb	\n\t"
-		"bl	v7_flush_dcache_louis \n\t"
-		"clrex	\n\t"
-		"mrc	p15, 0, r0, c1, c0, 1	@ get AUXCR \n\t"
-		"bic	r0, r0, #(1 << 6)	@ disable local coherency \n\t"
-		"mcr	p15, 0, r0, c1, c0, 1	@ set AUXCR \n\t"
-		"isb	\n\t"
-		"dsb	\n\t"
-		"ldr	fp, [sp], #4"
-		: : : "r0","r1","r2","r3","r4","r5","r6","r7",
-		      "r9","r10","lr","memory");
+		/* Disable and flush the local CPU cache. */
+		v7_exit_coherency_flush(louis);
 	}
 
 	__mcpm_cpu_down(cpu, cluster);
diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c
index eefb029197ca..c26ef5b92ca7 100644
--- a/arch/arm/mach-vexpress/spc.c
+++ b/arch/arm/mach-vexpress/spc.c
@@ -17,14 +17,31 @@
  * GNU General Public License for more details.
  */
 
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
 #include <linux/err.h>
+#include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
 #include <linux/slab.h>
+#include <linux/semaphore.h>
 
 #include <asm/cacheflush.h>
 
 #define SPCLOG "vexpress-spc: "
 
+#define PERF_LVL_A15		0x00
+#define PERF_REQ_A15		0x04
+#define PERF_LVL_A7		0x08
+#define PERF_REQ_A7		0x0c
+#define COMMS			0x10
+#define COMMS_REQ		0x14
+#define PWC_STATUS		0x18
+#define PWC_FLAG		0x1c
+
 /* SPC wake-up IRQs status and mask */
 #define WAKE_INT_MASK		0x24
 #define WAKE_INT_RAW		0x28
@@ -36,12 +53,50 @@
 #define A15_BX_ADDR0		0x68
 #define A7_BX_ADDR0		0x78
 
+/* SPC CPU/cluster reset statue */
+#define STANDBYWFI_STAT		0x3c
+#define STANDBYWFI_STAT_A15_CPU_MASK(cpu)	(1 << (cpu))
+#define STANDBYWFI_STAT_A7_CPU_MASK(cpu)	(1 << (3 + (cpu)))
+
+/* SPC system config interface registers */
+#define SYSCFG_WDATA		0x70
+#define SYSCFG_RDATA		0x74
+
+/* A15/A7 OPP virtual register base */
+#define A15_PERFVAL_BASE	0xC10
+#define A7_PERFVAL_BASE		0xC30
+
+/* Config interface control bits */
+#define SYSCFG_START		(1 << 31)
+#define SYSCFG_SCC		(6 << 20)
+#define SYSCFG_STAT		(14 << 20)
+
 /* wake-up interrupt masks */
 #define GBL_WAKEUP_INT_MSK	(0x3 << 10)
 
 /* TC2 static dual-cluster configuration */
 #define MAX_CLUSTERS		2
 
+/*
+ * Even though the SPC takes max 3-5 ms to complete any OPP/COMMS
+ * operation, the operation could start just before jiffie is about
+ * to be incremented. So setting timeout value of 20ms = 2jiffies@100Hz
+ */
+#define TIMEOUT_US	20000
+
+#define MAX_OPPS	8
+#define CA15_DVFS	0
+#define CA7_DVFS	1
+#define SPC_SYS_CFG	2
+#define STAT_COMPLETE(type)	((1 << 0) << (type << 2))
+#define STAT_ERR(type)		((1 << 1) << (type << 2))
+#define RESPONSE_MASK(type)	(STAT_COMPLETE(type) | STAT_ERR(type))
+
+struct ve_spc_opp {
+	unsigned long freq;
+	unsigned long u_volt;
+};
+
 struct ve_spc_drvdata {
 	void __iomem *baseaddr;
 	/*
@@ -49,6 +104,12 @@ struct ve_spc_drvdata {
 	 * It corresponds to A15 processors MPIDR[15:8] bitfield
 	 */
 	u32 a15_clusid;
+	uint32_t cur_rsp_mask;
+	uint32_t cur_rsp_stat;
+	struct semaphore sem;
+	struct completion done;
+	struct ve_spc_opp *opps[MAX_CLUSTERS];
+	int num_opps[MAX_CLUSTERS];
 };
 
 static struct ve_spc_drvdata *info;
@@ -157,8 +218,232 @@ void ve_spc_powerdown(u32 cluster, bool enable)
 	writel_relaxed(enable, info->baseaddr + pwdrn_reg);
 }
 
-int __init ve_spc_init(void __iomem *baseaddr, u32 a15_clusid)
+static u32 standbywfi_cpu_mask(u32 cpu, u32 cluster)
+{
+	return cluster_is_a15(cluster) ?
+		  STANDBYWFI_STAT_A15_CPU_MASK(cpu)
+		: STANDBYWFI_STAT_A7_CPU_MASK(cpu);
+}
+
+/**
+ * ve_spc_cpu_in_wfi(u32 cpu, u32 cluster)
+ *
+ * @cpu: mpidr[7:0] bitfield describing CPU affinity level within cluster
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ *
+ * @return: non-zero if and only if the specified CPU is in WFI
+ *
+ * Take care when interpreting the result of this function: a CPU might
+ * be in WFI temporarily due to idle, and is not necessarily safely
+ * parked.
+ */
+int ve_spc_cpu_in_wfi(u32 cpu, u32 cluster)
+{
+	int ret;
+	u32 mask = standbywfi_cpu_mask(cpu, cluster);
+
+	if (cluster >= MAX_CLUSTERS)
+		return 1;
+
+	ret = readl_relaxed(info->baseaddr + STANDBYWFI_STAT);
+
+	pr_debug("%s: PCFGREG[0x%X] = 0x%08X, mask = 0x%X\n",
+		 __func__, STANDBYWFI_STAT, ret, mask);
+
+	return ret & mask;
+}
+
+static int ve_spc_get_performance(int cluster, u32 *freq)
+{
+	struct ve_spc_opp *opps = info->opps[cluster];
+	u32 perf_cfg_reg = 0;
+	u32 perf;
+
+	perf_cfg_reg = cluster_is_a15(cluster) ? PERF_LVL_A15 : PERF_LVL_A7;
+
+	perf = readl_relaxed(info->baseaddr + perf_cfg_reg);
+	if (perf >= info->num_opps[cluster])
+		return -EINVAL;
+
+	opps += perf;
+	*freq = opps->freq;
+
+	return 0;
+}
+
+/* find closest match to given frequency in OPP table */
+static int ve_spc_round_performance(int cluster, u32 freq)
+{
+	int idx, max_opp = info->num_opps[cluster];
+	struct ve_spc_opp *opps = info->opps[cluster];
+	u32 fmin = 0, fmax = ~0, ftmp;
+
+	freq /= 1000; /* OPP entries in kHz */
+	for (idx = 0; idx < max_opp; idx++, opps++) {
+		ftmp = opps->freq;
+		if (ftmp >= freq) {
+			if (ftmp <= fmax)
+				fmax = ftmp;
+		} else {
+			if (ftmp >= fmin)
+				fmin = ftmp;
+		}
+	}
+	if (fmax != ~0)
+		return fmax * 1000;
+	else
+		return fmin * 1000;
+}
+
+static int ve_spc_find_performance_index(int cluster, u32 freq)
+{
+	int idx, max_opp = info->num_opps[cluster];
+	struct ve_spc_opp *opps = info->opps[cluster];
+
+	for (idx = 0; idx < max_opp; idx++, opps++)
+		if (opps->freq == freq)
+			break;
+	return (idx == max_opp) ? -EINVAL : idx;
+}
+
+static int ve_spc_waitforcompletion(int req_type)
+{
+	int ret = wait_for_completion_interruptible_timeout(
+			&info->done, usecs_to_jiffies(TIMEOUT_US));
+	if (ret == 0)
+		ret = -ETIMEDOUT;
+	else if (ret > 0)
+		ret = info->cur_rsp_stat & STAT_COMPLETE(req_type) ? 0 : -EIO;
+	return ret;
+}
+
+static int ve_spc_set_performance(int cluster, u32 freq)
+{
+	u32 perf_cfg_reg, perf_stat_reg;
+	int ret, perf, req_type;
+
+	if (cluster_is_a15(cluster)) {
+		req_type = CA15_DVFS;
+		perf_cfg_reg = PERF_LVL_A15;
+		perf_stat_reg = PERF_REQ_A15;
+	} else {
+		req_type = CA7_DVFS;
+		perf_cfg_reg = PERF_LVL_A7;
+		perf_stat_reg = PERF_REQ_A7;
+	}
+
+	perf = ve_spc_find_performance_index(cluster, freq);
+
+	if (perf < 0)
+		return perf;
+
+	if (down_timeout(&info->sem, usecs_to_jiffies(TIMEOUT_US)))
+		return -ETIME;
+
+	init_completion(&info->done);
+	info->cur_rsp_mask = RESPONSE_MASK(req_type);
+
+	writel(perf, info->baseaddr + perf_cfg_reg);
+	ret = ve_spc_waitforcompletion(req_type);
+
+	info->cur_rsp_mask = 0;
+	up(&info->sem);
+
+	return ret;
+}
+
+static int ve_spc_read_sys_cfg(int func, int offset, uint32_t *data)
+{
+	int ret;
+
+	if (down_timeout(&info->sem, usecs_to_jiffies(TIMEOUT_US)))
+		return -ETIME;
+
+	init_completion(&info->done);
+	info->cur_rsp_mask = RESPONSE_MASK(SPC_SYS_CFG);
+
+	/* Set the control value */
+	writel(SYSCFG_START | func | offset >> 2, info->baseaddr + COMMS);
+	ret = ve_spc_waitforcompletion(SPC_SYS_CFG);
+
+	if (ret == 0)
+		*data = readl(info->baseaddr + SYSCFG_RDATA);
+
+	info->cur_rsp_mask = 0;
+	up(&info->sem);
+
+	return ret;
+}
+
+static irqreturn_t ve_spc_irq_handler(int irq, void *data)
+{
+	struct ve_spc_drvdata *drv_data = data;
+	uint32_t status = readl_relaxed(drv_data->baseaddr + PWC_STATUS);
+
+	if (info->cur_rsp_mask & status) {
+		info->cur_rsp_stat = status;
+		complete(&drv_data->done);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ *  +--------------------------+
+ *  | 31      20 | 19        0 |
+ *  +--------------------------+
+ *  |   u_volt   |  freq(kHz)  |
+ *  +--------------------------+
+ */
+#define MULT_FACTOR	20
+#define VOLT_SHIFT	20
+#define FREQ_MASK	(0xFFFFF)
+static int ve_spc_populate_opps(uint32_t cluster)
+{
+	uint32_t data = 0, off, ret, idx;
+	struct ve_spc_opp *opps;
+
+	opps = kzalloc(sizeof(*opps) * MAX_OPPS, GFP_KERNEL);
+	if (!opps)
+		return -ENOMEM;
+
+	info->opps[cluster] = opps;
+
+	off = cluster_is_a15(cluster) ? A15_PERFVAL_BASE : A7_PERFVAL_BASE;
+	for (idx = 0; idx < MAX_OPPS; idx++, off += 4, opps++) {
+		ret = ve_spc_read_sys_cfg(SYSCFG_SCC, off, &data);
+		if (!ret) {
+			opps->freq = (data & FREQ_MASK) * MULT_FACTOR;
+			opps->u_volt = data >> VOLT_SHIFT;
+		} else {
+			break;
+		}
+	}
+	info->num_opps[cluster] = idx;
+
+	return ret;
+}
+
+static int ve_init_opp_table(struct device *cpu_dev)
 {
+	int cluster = topology_physical_package_id(cpu_dev->id);
+	int idx, ret = 0, max_opp = info->num_opps[cluster];
+	struct ve_spc_opp *opps = info->opps[cluster];
+
+	for (idx = 0; idx < max_opp; idx++, opps++) {
+		ret = dev_pm_opp_add(cpu_dev, opps->freq * 1000, opps->u_volt);
+		if (ret) {
+			dev_warn(cpu_dev, "failed to add opp %lu %lu\n",
+				 opps->freq, opps->u_volt);
+			return ret;
+		}
+	}
+	return ret;
+}
+
+int __init ve_spc_init(void __iomem *baseaddr, u32 a15_clusid, int irq)
+{
+	int ret;
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
 		pr_err(SPCLOG "unable to allocate mem\n");
@@ -168,6 +453,25 @@ int __init ve_spc_init(void __iomem *baseaddr, u32 a15_clusid)
 	info->baseaddr = baseaddr;
 	info->a15_clusid = a15_clusid;
 
+	if (irq <= 0) {
+		pr_err(SPCLOG "Invalid IRQ %d\n", irq);
+		kfree(info);
+		return -EINVAL;
+	}
+
+	init_completion(&info->done);
+
+	readl_relaxed(info->baseaddr + PWC_STATUS);
+
+	ret = request_irq(irq, ve_spc_irq_handler, IRQF_TRIGGER_HIGH
+				| IRQF_ONESHOT, "vexpress-spc", info);
+	if (ret) {
+		pr_err(SPCLOG "IRQ %d request failed\n", irq);
+		kfree(info);
+		return -ENODEV;
+	}
+
+	sema_init(&info->sem, 1);
 	/*
 	 * Multi-cluster systems may need this data when non-coherent, during
 	 * cluster power-up/power-down. Make sure driver info reaches main
@@ -178,3 +482,103 @@ int __init ve_spc_init(void __iomem *baseaddr, u32 a15_clusid)
 
 	return 0;
 }
+
+struct clk_spc {
+	struct clk_hw hw;
+	int cluster;
+};
+
+#define to_clk_spc(spc) container_of(spc, struct clk_spc, hw)
+static unsigned long spc_recalc_rate(struct clk_hw *hw,
+		unsigned long parent_rate)
+{
+	struct clk_spc *spc = to_clk_spc(hw);
+	u32 freq;
+
+	if (ve_spc_get_performance(spc->cluster, &freq))
+		return -EIO;
+
+	return freq * 1000;
+}
+
+static long spc_round_rate(struct clk_hw *hw, unsigned long drate,
+		unsigned long *parent_rate)
+{
+	struct clk_spc *spc = to_clk_spc(hw);
+
+	return ve_spc_round_performance(spc->cluster, drate);
+}
+
+static int spc_set_rate(struct clk_hw *hw, unsigned long rate,
+		unsigned long parent_rate)
+{
+	struct clk_spc *spc = to_clk_spc(hw);
+
+	return ve_spc_set_performance(spc->cluster, rate / 1000);
+}
+
+static struct clk_ops clk_spc_ops = {
+	.recalc_rate = spc_recalc_rate,
+	.round_rate = spc_round_rate,
+	.set_rate = spc_set_rate,
+};
+
+static struct clk *ve_spc_clk_register(struct device *cpu_dev)
+{
+	struct clk_init_data init;
+	struct clk_spc *spc;
+
+	spc = kzalloc(sizeof(*spc), GFP_KERNEL);
+	if (!spc) {
+		pr_err("could not allocate spc clk\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	spc->hw.init = &init;
+	spc->cluster = topology_physical_package_id(cpu_dev->id);
+
+	init.name = dev_name(cpu_dev);
+	init.ops = &clk_spc_ops;
+	init.flags = CLK_IS_ROOT | CLK_GET_RATE_NOCACHE;
+	init.num_parents = 0;
+
+	return devm_clk_register(cpu_dev, &spc->hw);
+}
+
+static int __init ve_spc_clk_init(void)
+{
+	int cpu;
+	struct clk *clk;
+
+	if (!info)
+		return 0; /* Continue only if SPC is initialised */
+
+	if (ve_spc_populate_opps(0) || ve_spc_populate_opps(1)) {
+		pr_err("failed to build OPP table\n");
+		return -ENODEV;
+	}
+
+	for_each_possible_cpu(cpu) {
+		struct device *cpu_dev = get_cpu_device(cpu);
+		if (!cpu_dev) {
+			pr_warn("failed to get cpu%d device\n", cpu);
+			continue;
+		}
+		clk = ve_spc_clk_register(cpu_dev);
+		if (IS_ERR(clk)) {
+			pr_warn("failed to register cpu%d clock\n", cpu);
+			continue;
+		}
+		if (clk_register_clkdev(clk, NULL, dev_name(cpu_dev))) {
+			pr_warn("failed to register cpu%d clock lookup\n", cpu);
+			continue;
+		}
+
+		if (ve_init_opp_table(cpu_dev))
+			pr_warn("failed to initialise cpu%d opp table\n", cpu);
+	}
+
+	platform_device_register_simple("vexpress-spc-cpufreq", -1, NULL, 0);
+	return 0;
+}
+module_init(ve_spc_clk_init);
diff --git a/arch/arm/mach-vexpress/spc.h b/arch/arm/mach-vexpress/spc.h
index 5f7e4a446a17..793d065243b9 100644
--- a/arch/arm/mach-vexpress/spc.h
+++ b/arch/arm/mach-vexpress/spc.h
@@ -15,10 +15,11 @@
 #ifndef __SPC_H_
 #define __SPC_H_
 
-int __init ve_spc_init(void __iomem *base, u32 a15_clusid);
+int __init ve_spc_init(void __iomem *base, u32 a15_clusid, int irq);
 void ve_spc_global_wakeup_irq(bool set);
 void ve_spc_cpu_wakeup_irq(u32 cluster, u32 cpu, bool set);
 void ve_spc_set_resume_addr(u32 cluster, u32 cpu, u32 addr);
 void ve_spc_powerdown(u32 cluster, bool enable);
+int ve_spc_cpu_in_wfi(u32 cpu, u32 cluster);
 
 #endif
diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
index e6eb48192912..29e7785a54bc 100644
--- a/arch/arm/mach-vexpress/tc2_pm.c
+++ b/arch/arm/mach-vexpress/tc2_pm.c
@@ -12,10 +12,12 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <linux/spinlock.h>
 #include <linux/errno.h>
 #include <linux/irqchip/arm-gic.h>
@@ -31,11 +33,17 @@
 #include "spc.h"
 
 /* SCC conf registers */
+#define RESET_CTRL		0x018
+#define RESET_A15_NCORERESET(cpu)	(1 << (2 + (cpu)))
+#define RESET_A7_NCORERESET(cpu)	(1 << (16 + (cpu)))
+
 #define A15_CONF		0x400
 #define A7_CONF			0x500
 #define SYS_INFO		0x700
 #define SPC_BASE		0xb00
 
+static void __iomem *scc;
+
 /*
  * We can't use regular spinlocks. In the switcher case, it is possible
  * for an outbound CPU to call power_down() after its inbound counterpart
@@ -156,32 +164,7 @@ static void tc2_pm_down(u64 residency)
 			: : "r" (0x400) );
 		}
 
-		/*
-		 * We need to disable and flush the whole (L1 and L2) cache.
-		 * Let's do it in the safest possible way i.e. with
-		 * no memory access within the following sequence
-		 * including the stack.
-		 *
-		 * Note: fp is preserved to the stack explicitly prior doing
-		 * this since adding it to the clobber list is incompatible
-		 * with having CONFIG_FRAME_POINTER=y.
-		 */
-		asm volatile(
-		"str	fp, [sp, #-4]! \n\t"
-		"mrc	p15, 0, r0, c1, c0, 0	@ get CR \n\t"
-		"bic	r0, r0, #"__stringify(CR_C)" \n\t"
-		"mcr	p15, 0, r0, c1, c0, 0	@ set CR \n\t"
-		"isb	\n\t"
-		"bl	v7_flush_dcache_all \n\t"
-		"clrex	\n\t"
-		"mrc	p15, 0, r0, c1, c0, 1	@ get AUXCR \n\t"
-		"bic	r0, r0, #(1 << 6)	@ disable local coherency \n\t"
-		"mcr	p15, 0, r0, c1, c0, 1	@ set AUXCR \n\t"
-		"isb	\n\t"
-		"dsb	\n\t"
-		"ldr	fp, [sp], #4"
-		: : : "r0","r1","r2","r3","r4","r5","r6","r7",
-		      "r9","r10","lr","memory");
+		v7_exit_coherency_flush(all);
 
 		cci_disable_port_by_cpu(mpidr);
 
@@ -197,26 +180,7 @@ static void tc2_pm_down(u64 residency)
 
 		arch_spin_unlock(&tc2_pm_lock);
 
-		/*
-		 * We need to disable and flush only the L1 cache.
-		 * Let's do it in the safest possible way as above.
-		 */
-		asm volatile(
-		"str	fp, [sp, #-4]! \n\t"
-		"mrc	p15, 0, r0, c1, c0, 0	@ get CR \n\t"
-		"bic	r0, r0, #"__stringify(CR_C)" \n\t"
-		"mcr	p15, 0, r0, c1, c0, 0	@ set CR \n\t"
-		"isb	\n\t"
-		"bl	v7_flush_dcache_louis \n\t"
-		"clrex	\n\t"
-		"mrc	p15, 0, r0, c1, c0, 1	@ get AUXCR \n\t"
-		"bic	r0, r0, #(1 << 6)	@ disable local coherency \n\t"
-		"mcr	p15, 0, r0, c1, c0, 1	@ set AUXCR \n\t"
-		"isb	\n\t"
-		"dsb	\n\t"
-		"ldr	fp, [sp], #4"
-		: : : "r0","r1","r2","r3","r4","r5","r6","r7",
-		      "r9","r10","lr","memory");
+		v7_exit_coherency_flush(louis);
 	}
 
 	__mcpm_cpu_down(cpu, cluster);
@@ -233,6 +197,55 @@ static void tc2_pm_power_down(void)
 	tc2_pm_down(0);
 }
 
+static int tc2_core_in_reset(unsigned int cpu, unsigned int cluster)
+{
+	u32 mask = cluster ?
+		  RESET_A7_NCORERESET(cpu)
+		: RESET_A15_NCORERESET(cpu);
+
+	return !(readl_relaxed(scc + RESET_CTRL) & mask);
+}
+
+#define POLL_MSEC 10
+#define TIMEOUT_MSEC 1000
+
+static int tc2_pm_power_down_finish(unsigned int cpu, unsigned int cluster)
+{
+	unsigned tries;
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cluster >= TC2_CLUSTERS || cpu >= TC2_MAX_CPUS_PER_CLUSTER);
+
+	for (tries = 0; tries < TIMEOUT_MSEC / POLL_MSEC; ++tries) {
+		/*
+		 * Only examine the hardware state if the target CPU has
+		 * caught up at least as far as tc2_pm_down():
+		 */
+		if (ACCESS_ONCE(tc2_pm_use_count[cpu][cluster]) == 0) {
+			pr_debug("%s(cpu=%u, cluster=%u): RESET_CTRL = 0x%08X\n",
+				 __func__, cpu, cluster,
+				 readl_relaxed(scc + RESET_CTRL));
+
+			/*
+			 * We need the CPU to reach WFI, but the power
+			 * controller may put the cluster in reset and
+			 * power it off as soon as that happens, before
+			 * we have a chance to see STANDBYWFI.
+			 *
+			 * So we need to check for both conditions:
+			 */
+			if (tc2_core_in_reset(cpu, cluster) ||
+			    ve_spc_cpu_in_wfi(cpu, cluster))
+				return 0; /* success: the CPU is halted */
+		}
+
+		/* Otherwise, wait and retry: */
+		msleep(POLL_MSEC);
+	}
+
+	return -ETIMEDOUT; /* timeout */
+}
+
 static void tc2_pm_suspend(u64 residency)
 {
 	unsigned int mpidr, cpu, cluster;
@@ -275,10 +288,11 @@ static void tc2_pm_powered_up(void)
 }
 
 static const struct mcpm_platform_ops tc2_pm_power_ops = {
-	.power_up	= tc2_pm_power_up,
-	.power_down	= tc2_pm_power_down,
-	.suspend	= tc2_pm_suspend,
-	.powered_up	= tc2_pm_powered_up,
+	.power_up		= tc2_pm_power_up,
+	.power_down		= tc2_pm_power_down,
+	.power_down_finish	= tc2_pm_power_down_finish,
+	.suspend		= tc2_pm_suspend,
+	.powered_up		= tc2_pm_powered_up,
 };
 
 static bool __init tc2_pm_usage_count_init(void)
@@ -311,8 +325,7 @@ static void __naked tc2_pm_power_up_setup(unsigned int affinity_level)
 
 static int __init tc2_pm_init(void)
 {
-	int ret;
-	void __iomem *scc;
+	int ret, irq;
 	u32 a15_cluster_id, a7_cluster_id, sys_info;
 	struct device_node *np;
 
@@ -336,13 +349,15 @@ static int __init tc2_pm_init(void)
 	tc2_nr_cpus[a15_cluster_id] = (sys_info >> 16) & 0xf;
 	tc2_nr_cpus[a7_cluster_id] = (sys_info >> 20) & 0xf;
 
+	irq = irq_of_parse_and_map(np, 0);
+
 	/*
 	 * A subset of the SCC registers is also used to communicate
 	 * with the SPC (power controller). We need to be able to
 	 * drive it very early in the boot process to power up
 	 * processors, so we initialize the SPC driver here.
 	 */
-	ret = ve_spc_init(scc + SPC_BASE, a15_cluster_id);
+	ret = ve_spc_init(scc + SPC_BASE, a15_cluster_id, irq);
 	if (ret)
 		return ret;
 
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 95a469e23e37..4f8b8cb17ff5 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -1,12 +1,10 @@
 /*
  * Versatile Express V2M Motherboard Support
  */
-#include <linux/clocksource.h>
 #include <linux/device.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/mmci.h>
 #include <linux/io.h>
-#include <linux/clocksource.h>
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/of_address.h>
@@ -22,7 +20,6 @@
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/machine.h>
 #include <linux/vexpress.h>
-#include <linux/clk-provider.h>
 #include <linux/clkdev.h>
 
 #include <asm/mach-types.h>
@@ -422,16 +419,8 @@ void __init v2m_dt_init_early(void)
 			pr_warning("vexpress: DT HBI (%x) is not matching "
 					"hardware (%x)!\n", dt_hbi, hbi);
 	}
-}
-
-static void __init v2m_dt_timer_init(void)
-{
-	of_clk_init(NULL);
 
-	clocksource_of_init();
-
-	versatile_sched_clock_init(vexpress_get_24mhz_clock_base(),
-				24000000);
+	versatile_sched_clock_init(vexpress_get_24mhz_clock_base(), 24000000);
 }
 
 static const struct of_device_id v2m_dt_bus_match[] __initconst = {
@@ -458,6 +447,5 @@ DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express")
 	.smp_init	= smp_init_ops(vexpress_smp_init_ops),
 	.map_io		= v2m_dt_map_io,
 	.init_early	= v2m_dt_init_early,
-	.init_time	= v2m_dt_timer_init,
 	.init_machine	= v2m_dt_init,
 MACHINE_END