87 files changed, 10090 insertions, 2332 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index b493663c7ba7..785127cb281b 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -158,3 +158,7 @@ config DRM_SAVAGE
 	help
 	  Choose this option if you have a Savage3D/4/SuperSavage/Pro/Twister
 	  chipset. If M is selected the module will be called savage.
+
+source "drivers/gpu/drm/exynos/Kconfig"
+
+source "drivers/gpu/drm/vmwgfx/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 89cf05a72d1c..c0496f660707 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -35,4 +35,5 @@ obj-$(CONFIG_DRM_SAVAGE)+= savage/
 obj-$(CONFIG_DRM_VMWGFX)+= vmwgfx/
 obj-$(CONFIG_DRM_VIA)	+=via/
 obj-$(CONFIG_DRM_NOUVEAU) +=nouveau/
+obj-$(CONFIG_DRM_EXYNOS) +=exynos/
 obj-y			+= i2c/
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index f88a9b2c977b..f2366440b738 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -372,11 +372,13 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 		encoder_funcs = encoder->helper_private;
 		if (!(ret = encoder_funcs->mode_fixup(encoder, mode,
 						      adjusted_mode))) {
+			DRM_DEBUG_KMS("Encoder fixup failed\n");
 			goto done;
 		}
 	}
 
 	if (!(ret = crtc_funcs->mode_fixup(crtc, mode, adjusted_mode))) {
+		DRM_DEBUG_KMS("CRTC fixup failed\n");
 		goto done;
 	}
 	DRM_DEBUG_KMS("[CRTC:%d]\n", crtc->base.id);
diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 9d2668a50872..b9dc2629ea9a 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -107,11 +107,8 @@ int drm_debugfs_create_files(struct drm_info_list *files, int count,
 		ent = debugfs_create_file(files[i].name, S_IFREG | S_IRUGO,
 					  root, tmp, &drm_debugfs_fops);
 		if (!ent) {
-			char name[64];
-			strncpy(name, root->d_name.name,
-						min(root->d_name.len, 64U));
 			DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s/%s\n",
-				  name, files[i].name);
+				  root->d_name.name, files[i].name);
 			kfree(tmp);
 			ret = -1;
 			goto fail;
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 93a112d45c1a..7a87e0878f30 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -438,6 +438,8 @@ long drm_ioctl(struct file *filp,
 					goto err_i1;
 				}
 			}
+			if (asize > usize)
+				memset(kdata + usize, 0, asize - usize);
 		}
 
 		if (cmd & IOC_IN) {
diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c
index 9e5b07efebb7..0f3c4e3cafc3 100644
--- a/drivers/gpu/drm/drm_proc.c
+++ b/drivers/gpu/drm/drm_proc.c
@@ -95,7 +95,6 @@ int drm_proc_create_files(struct drm_info_list *files, int count,
 	struct drm_device *dev = minor->dev;
 	struct proc_dir_entry *ent;
 	struct drm_info_node *tmp;
-	char name[64];
 	int i, ret;
 
 	for (i = 0; i < count; i++) {
@@ -118,7 +117,7 @@ int drm_proc_create_files(struct drm_info_list *files, int count,
 				       &drm_proc_fops, tmp);
 		if (!ent) {
 			DRM_ERROR("Cannot create /proc/dri/%s/%s\n",
-				  name, files[i].name);
+				  root->name, files[i].name);
 			list_del(&tmp->list);
 			kfree(tmp);
 			ret = -1;
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
new file mode 100644
index 000000000000..847466aab435
--- /dev/null
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -0,0 +1,20 @@
+config DRM_EXYNOS
+	tristate "DRM Support for Samsung SoC EXYNOS Series"
+	depends on DRM && PLAT_SAMSUNG
+	default	n
+	select DRM_KMS_HELPER
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option if you have a Samsung SoC EXYNOS chipset.
+	  If M is selected the module will be called exynosdrm.
+
+config DRM_EXYNOS_FIMD
+	tristate "Exynos DRM FIMD"
+	depends on DRM_EXYNOS
+	default n
+	help
+	  Choose this option if you want to use Exynos FIMD for DRM.
+	  If M is selected, the module will be called exynos_drm_fimd
diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile
new file mode 100644
index 000000000000..0496d3ff2683
--- /dev/null
+++ b/drivers/gpu/drm/exynos/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for the drm device driver.  This driver provides support for the
+# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+
+ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/exynos
+exynosdrm-y := exynos_drm_drv.o exynos_drm_encoder.o exynos_drm_connector.o \
+		exynos_drm_crtc.o exynos_drm_fbdev.o exynos_drm_fb.o \
+		exynos_drm_buf.o exynos_drm_gem.o exynos_drm_core.o
+
+obj-$(CONFIG_DRM_EXYNOS) += exynosdrm.o
+obj-$(CONFIG_DRM_EXYNOS_FIMD) += exynos_drm_fimd.o
diff --git a/drivers/gpu/drm/exynos/exynos_drm_buf.c b/drivers/gpu/drm/exynos/exynos_drm_buf.c
new file mode 100644
index 000000000000..6f8afea94fc9
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_buf.c
@@ -0,0 +1,110 @@
+/* exynos_drm_buf.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Author: Inki Dae <inki.dae@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm.h"
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_buf.h"
+
+static DEFINE_MUTEX(exynos_drm_buf_lock);
+
+static int lowlevel_buffer_allocate(struct drm_device *dev,
+		struct exynos_drm_buf_entry *entry)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	entry->vaddr = dma_alloc_writecombine(dev->dev, entry->size,
+			(dma_addr_t *)&entry->paddr, GFP_KERNEL);
+	if (!entry->paddr) {
+		DRM_ERROR("failed to allocate buffer.\n");
+		return -ENOMEM;
+	}
+
+	DRM_DEBUG_KMS("allocated : vaddr(0x%x), paddr(0x%x), size(0x%x)\n",
+			(unsigned int)entry->vaddr, entry->paddr, entry->size);
+
+	return 0;
+}
+
+static void lowlevel_buffer_deallocate(struct drm_device *dev,
+		struct exynos_drm_buf_entry *entry)
+{
+	DRM_DEBUG_KMS("%s.\n", __FILE__);
+
+	if (entry->paddr && entry->vaddr && entry->size)
+		dma_free_writecombine(dev->dev, entry->size, entry->vaddr,
+				entry->paddr);
+	else
+		DRM_DEBUG_KMS("entry data is null.\n");
+}
+
+struct exynos_drm_buf_entry *exynos_drm_buf_create(struct drm_device *dev,
+		unsigned int size)
+{
+	struct exynos_drm_buf_entry *entry;
+
+	DRM_DEBUG_KMS("%s.\n", __FILE__);
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		DRM_ERROR("failed to allocate exynos_drm_buf_entry.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	entry->size = size;
+
+	/*
+	 * allocate memory region with size and set the memory information
+	 * to vaddr and paddr of a entry object.
+	 */
+	if (lowlevel_buffer_allocate(dev, entry) < 0) {
+		kfree(entry);
+		entry = NULL;
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return entry;
+}
+
+void exynos_drm_buf_destroy(struct drm_device *dev,
+		struct exynos_drm_buf_entry *entry)
+{
+	DRM_DEBUG_KMS("%s.\n", __FILE__);
+
+	if (!entry) {
+		DRM_DEBUG_KMS("entry is null.\n");
+		return;
+	}
+
+	lowlevel_buffer_deallocate(dev, entry);
+
+	kfree(entry);
+	entry = NULL;
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM Buffer Management Module");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_buf.h b/drivers/gpu/drm/exynos/exynos_drm_buf.h
new file mode 100644
index 000000000000..045d59eab01a
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_buf.h
@@ -0,0 +1,53 @@
+/* exynos_drm_buf.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Author: Inki Dae <inki.dae@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_BUF_H_
+#define _EXYNOS_DRM_BUF_H_
+
+/*
+ * exynos drm buffer entry structure.
+ *
+ * @paddr: physical address of allocated memory.
+ * @vaddr: kernel virtual address of allocated memory.
+ * @size: size of allocated memory.
+ */
+struct exynos_drm_buf_entry {
+	dma_addr_t paddr;
+	void __iomem *vaddr;
+	unsigned int size;
+};
+
+/* allocate physical memory. */
+struct exynos_drm_buf_entry *exynos_drm_buf_create(struct drm_device *dev,
+		unsigned int size);
+
+/* get physical memory information of a drm framebuffer. */
+struct exynos_drm_buf_entry *exynos_drm_fb_get_buf(struct drm_framebuffer *fb);
+
+/* remove allocated physical memory. */
+void exynos_drm_buf_destroy(struct drm_device *dev,
+		struct exynos_drm_buf_entry *entry);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_connector.c b/drivers/gpu/drm/exynos/exynos_drm_connector.c
new file mode 100644
index 000000000000..985d9e768728
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_connector.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_crtc_helper.h"
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_encoder.h"
+
+#define MAX_EDID 256
+#define to_exynos_connector(x)	container_of(x, struct exynos_drm_connector,\
+				drm_connector)
+
+struct exynos_drm_connector {
+	struct drm_connector	drm_connector;
+};
+
+/* convert exynos_video_timings to drm_display_mode */
+static inline void
+convert_to_display_mode(struct drm_display_mode *mode,
+			struct fb_videomode *timing)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mode->clock = timing->pixclock / 1000;
+
+	mode->hdisplay = timing->xres;
+	mode->hsync_start = mode->hdisplay + timing->left_margin;
+	mode->hsync_end = mode->hsync_start + timing->hsync_len;
+	mode->htotal = mode->hsync_end + timing->right_margin;
+
+	mode->vdisplay = timing->yres;
+	mode->vsync_start = mode->vdisplay + timing->upper_margin;
+	mode->vsync_end = mode->vsync_start + timing->vsync_len;
+	mode->vtotal = mode->vsync_end + timing->lower_margin;
+}
+
+/* convert drm_display_mode to exynos_video_timings */
+static inline void
+convert_to_video_timing(struct fb_videomode *timing,
+			struct drm_display_mode *mode)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	memset(timing, 0, sizeof(*timing));
+
+	timing->pixclock = mode->clock * 1000;
+	timing->refresh = mode->vrefresh;
+
+	timing->xres = mode->hdisplay;
+	timing->left_margin = mode->hsync_start - mode->hdisplay;
+	timing->hsync_len = mode->hsync_end - mode->hsync_start;
+	timing->right_margin = mode->htotal - mode->hsync_end;
+
+	timing->yres = mode->vdisplay;
+	timing->upper_margin = mode->vsync_start - mode->vdisplay;
+	timing->vsync_len = mode->vsync_end - mode->vsync_start;
+	timing->lower_margin = mode->vtotal - mode->vsync_end;
+
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		timing->vmode = FB_VMODE_INTERLACED;
+	else
+		timing->vmode = FB_VMODE_NONINTERLACED;
+
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		timing->vmode |= FB_VMODE_DOUBLE;
+}
+
+static int exynos_drm_connector_get_modes(struct drm_connector *connector)
+{
+	struct exynos_drm_manager *manager =
+				exynos_drm_get_manager(connector->encoder);
+	struct exynos_drm_display *display = manager->display;
+	unsigned int count;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!display) {
+		DRM_DEBUG_KMS("display is null.\n");
+		return 0;
+	}
+
+	/*
+	 * if get_edid() exists then get_edid() callback of hdmi side
+	 * is called to get edid data through i2c interface else
+	 * get timing from the FIMD driver(display controller).
+	 *
+	 * P.S. in case of lcd panel, count is always 1 if success
+	 * because lcd panel has only one mode.
+	 */
+	if (display->get_edid) {
+		int ret;
+		void *edid;
+
+		edid = kzalloc(MAX_EDID, GFP_KERNEL);
+		if (!edid) {
+			DRM_ERROR("failed to allocate edid\n");
+			return 0;
+		}
+
+		ret = display->get_edid(manager->dev, connector,
+						edid, MAX_EDID);
+		if (ret < 0) {
+			DRM_ERROR("failed to get edid data.\n");
+			kfree(edid);
+			edid = NULL;
+			return 0;
+		}
+
+		drm_mode_connector_update_edid_property(connector, edid);
+		count = drm_add_edid_modes(connector, edid);
+
+		kfree(connector->display_info.raw_edid);
+		connector->display_info.raw_edid = edid;
+	} else {
+		struct drm_display_mode *mode = drm_mode_create(connector->dev);
+		struct fb_videomode *timing;
+
+		if (display->get_timing)
+			timing = display->get_timing(manager->dev);
+		else {
+			drm_mode_destroy(connector->dev, mode);
+			return 0;
+		}
+
+		convert_to_display_mode(mode, timing);
+
+		mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+		drm_mode_set_name(mode);
+		drm_mode_probed_add(connector, mode);
+
+		count = 1;
+	}
+
+	return count;
+}
+
+static int exynos_drm_connector_mode_valid(struct drm_connector *connector,
+					    struct drm_display_mode *mode)
+{
+	struct exynos_drm_manager *manager =
+				exynos_drm_get_manager(connector->encoder);
+	struct exynos_drm_display *display = manager->display;
+	struct fb_videomode timing;
+	int ret = MODE_BAD;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	convert_to_video_timing(&timing, mode);
+
+	if (display && display->check_timing)
+		if (!display->check_timing(manager->dev, (void *)&timing))
+			ret = MODE_OK;
+
+	return ret;
+}
+
+struct drm_encoder *exynos_drm_best_encoder(struct drm_connector *connector)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return connector->encoder;
+}
+
+static struct drm_connector_helper_funcs exynos_connector_helper_funcs = {
+	.get_modes	= exynos_drm_connector_get_modes,
+	.mode_valid	= exynos_drm_connector_mode_valid,
+	.best_encoder	= exynos_drm_best_encoder,
+};
+
+/* get detection status of display device. */
+static enum drm_connector_status
+exynos_drm_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct exynos_drm_manager *manager =
+				exynos_drm_get_manager(connector->encoder);
+	struct exynos_drm_display *display = manager->display;
+	enum drm_connector_status status = connector_status_disconnected;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (display && display->is_connected) {
+		if (display->is_connected(manager->dev))
+			status = connector_status_connected;
+		else
+			status = connector_status_disconnected;
+	}
+
+	return status;
+}
+
+static void exynos_drm_connector_destroy(struct drm_connector *connector)
+{
+	struct exynos_drm_connector *exynos_connector =
+		to_exynos_connector(connector);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(exynos_connector);
+}
+
+static struct drm_connector_funcs exynos_connector_funcs = {
+	.dpms		= drm_helper_connector_dpms,
+	.fill_modes	= drm_helper_probe_single_connector_modes,
+	.detect		= exynos_drm_connector_detect,
+	.destroy	= exynos_drm_connector_destroy,
+};
+
+struct drm_connector *exynos_drm_connector_create(struct drm_device *dev,
+						   struct drm_encoder *encoder)
+{
+	struct exynos_drm_connector *exynos_connector;
+	struct exynos_drm_manager *manager = exynos_drm_get_manager(encoder);
+	struct drm_connector *connector;
+	int type;
+	int err;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_connector = kzalloc(sizeof(*exynos_connector), GFP_KERNEL);
+	if (!exynos_connector) {
+		DRM_ERROR("failed to allocate connector\n");
+		return NULL;
+	}
+
+	connector = &exynos_connector->drm_connector;
+
+	switch (manager->display->type) {
+	case EXYNOS_DISPLAY_TYPE_HDMI:
+		type = DRM_MODE_CONNECTOR_HDMIA;
+		break;
+	default:
+		type = DRM_MODE_CONNECTOR_Unknown;
+		break;
+	}
+
+	drm_connector_init(dev, connector, &exynos_connector_funcs, type);
+	drm_connector_helper_add(connector, &exynos_connector_helper_funcs);
+
+	err = drm_sysfs_connector_add(connector);
+	if (err)
+		goto err_connector;
+
+	connector->encoder = encoder;
+	err = drm_mode_connector_attach_encoder(connector, encoder);
+	if (err) {
+		DRM_ERROR("failed to attach a connector to a encoder\n");
+		goto err_sysfs;
+	}
+
+	DRM_DEBUG_KMS("connector has been created\n");
+
+	return connector;
+
+err_sysfs:
+	drm_sysfs_connector_remove(connector);
+err_connector:
+	drm_connector_cleanup(connector);
+	kfree(exynos_connector);
+	return NULL;
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM Connector Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_connector.h b/drivers/gpu/drm/exynos/exynos_drm_connector.h
new file mode 100644
index 000000000000..1c7b2b5b579c
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_connector.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_CONNECTOR_H_
+#define _EXYNOS_DRM_CONNECTOR_H_
+
+struct drm_connector *exynos_drm_connector_create(struct drm_device *dev,
+						   struct drm_encoder *encoder);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_core.c b/drivers/gpu/drm/exynos/exynos_drm_core.c
new file mode 100644
index 000000000000..661a03571d0c
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_core.c
@@ -0,0 +1,272 @@
+/* exynos_drm_core.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Author:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "exynos_drm_drv.h"
+#include "exynos_drm_encoder.h"
+#include "exynos_drm_connector.h"
+#include "exynos_drm_fbdev.h"
+
+static DEFINE_MUTEX(exynos_drm_mutex);
+static LIST_HEAD(exynos_drm_subdrv_list);
+static struct drm_device *drm_dev;
+
+static int exynos_drm_subdrv_probe(struct drm_device *dev,
+					struct exynos_drm_subdrv *subdrv)
+{
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (subdrv->probe) {
+		int ret;
+
+		/*
+		 * this probe callback would be called by sub driver
+		 * after setting of all resources to this sub driver,
+		 * such as clock, irq and register map are done or by load()
+		 * of exynos drm driver.
+		 *
+		 * P.S. note that this driver is considered for modularization.
+		 */
+		ret = subdrv->probe(dev, subdrv->manager.dev);
+		if (ret)
+			return ret;
+	}
+
+	/* create and initialize a encoder for this sub driver. */
+	encoder = exynos_drm_encoder_create(dev, &subdrv->manager,
+			(1 << MAX_CRTC) - 1);
+	if (!encoder) {
+		DRM_ERROR("failed to create encoder\n");
+		return -EFAULT;
+	}
+
+	/*
+	 * create and initialize a connector for this sub driver and
+	 * attach the encoder created above to the connector.
+	 */
+	connector = exynos_drm_connector_create(dev, encoder);
+	if (!connector) {
+		DRM_ERROR("failed to create connector\n");
+		encoder->funcs->destroy(encoder);
+		return -EFAULT;
+	}
+
+	subdrv->encoder = encoder;
+	subdrv->connector = connector;
+
+	return 0;
+}
+
+static void exynos_drm_subdrv_remove(struct drm_device *dev,
+				      struct exynos_drm_subdrv *subdrv)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (subdrv->remove)
+		subdrv->remove(dev);
+
+	if (subdrv->encoder) {
+		struct drm_encoder *encoder = subdrv->encoder;
+		encoder->funcs->destroy(encoder);
+		subdrv->encoder = NULL;
+	}
+
+	if (subdrv->connector) {
+		struct drm_connector *connector = subdrv->connector;
+		connector->funcs->destroy(connector);
+		subdrv->connector = NULL;
+	}
+}
+
+int exynos_drm_device_register(struct drm_device *dev)
+{
+	struct exynos_drm_subdrv *subdrv, *n;
+	int err;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (!dev)
+		return -EINVAL;
+
+	if (drm_dev) {
+		DRM_ERROR("Already drm device were registered\n");
+		return -EBUSY;
+	}
+
+	mutex_lock(&exynos_drm_mutex);
+	list_for_each_entry_safe(subdrv, n, &exynos_drm_subdrv_list, list) {
+		err = exynos_drm_subdrv_probe(dev, subdrv);
+		if (err) {
+			DRM_DEBUG("exynos drm subdrv probe failed.\n");
+			list_del(&subdrv->list);
+		}
+	}
+
+	drm_dev = dev;
+	mutex_unlock(&exynos_drm_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(exynos_drm_device_register);
+
+int exynos_drm_device_unregister(struct drm_device *dev)
+{
+	struct exynos_drm_subdrv *subdrv;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (!dev || dev != drm_dev) {
+		WARN(1, "Unexpected drm device unregister!\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&exynos_drm_mutex);
+	list_for_each_entry(subdrv, &exynos_drm_subdrv_list, list)
+		exynos_drm_subdrv_remove(dev, subdrv);
+
+	drm_dev = NULL;
+	mutex_unlock(&exynos_drm_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(exynos_drm_device_unregister);
+
+static int exynos_drm_mode_group_reinit(struct drm_device *dev)
+{
+	struct drm_mode_group *group = &dev->primary->mode_group;
+	uint32_t *id_list = group->id_list;
+	int ret;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	ret = drm_mode_group_init_legacy_group(dev, group);
+	if (ret < 0)
+		return ret;
+
+	kfree(id_list);
+	return 0;
+}
+
+int exynos_drm_subdrv_register(struct exynos_drm_subdrv *subdrv)
+{
+	int err;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (!subdrv)
+		return -EINVAL;
+
+	mutex_lock(&exynos_drm_mutex);
+	if (drm_dev) {
+		err = exynos_drm_subdrv_probe(drm_dev, subdrv);
+		if (err) {
+			DRM_ERROR("failed to probe exynos drm subdrv\n");
+			mutex_unlock(&exynos_drm_mutex);
+			return err;
+		}
+
+		/*
+		 * if any specific driver such as fimd or hdmi driver called
+		 * exynos_drm_subdrv_register() later than drm_load(),
+		 * the fb helper should be re-initialized and re-configured.
+		 */
+		err = exynos_drm_fbdev_reinit(drm_dev);
+		if (err) {
+			DRM_ERROR("failed to reinitialize exynos drm fbdev\n");
+			exynos_drm_subdrv_remove(drm_dev, subdrv);
+			mutex_unlock(&exynos_drm_mutex);
+			return err;
+		}
+
+		err = exynos_drm_mode_group_reinit(drm_dev);
+		if (err) {
+			DRM_ERROR("failed to reinitialize mode group\n");
+			exynos_drm_fbdev_fini(drm_dev);
+			exynos_drm_subdrv_remove(drm_dev, subdrv);
+			mutex_unlock(&exynos_drm_mutex);
+			return err;
+		}
+	}
+
+	subdrv->drm_dev = drm_dev;
+
+	list_add_tail(&subdrv->list, &exynos_drm_subdrv_list);
+	mutex_unlock(&exynos_drm_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(exynos_drm_subdrv_register);
+
+int exynos_drm_subdrv_unregister(struct exynos_drm_subdrv *subdrv)
+{
+	int ret = -EFAULT;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (!subdrv) {
+		DRM_DEBUG("Unexpected exynos drm subdrv unregister!\n");
+		return ret;
+	}
+
+	mutex_lock(&exynos_drm_mutex);
+	if (drm_dev) {
+		exynos_drm_subdrv_remove(drm_dev, subdrv);
+		list_del(&subdrv->list);
+
+		/*
+		 * fb helper should be updated once a sub driver is released
+		 * to re-configure crtc and connector and also to re-setup
+		 * drm framebuffer.
+		 */
+		ret = exynos_drm_fbdev_reinit(drm_dev);
+		if (ret < 0) {
+			DRM_ERROR("failed fb helper reinit.\n");
+			goto fail;
+		}
+
+		ret = exynos_drm_mode_group_reinit(drm_dev);
+		if (ret < 0) {
+			DRM_ERROR("failed drm mode group reinit.\n");
+			goto fail;
+		}
+	}
+
+fail:
+	mutex_unlock(&exynos_drm_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(exynos_drm_subdrv_unregister);
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM Core Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
new file mode 100644
index 000000000000..9337e5e2dbb6
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -0,0 +1,381 @@
+/* exynos_drm_crtc.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_crtc_helper.h"
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_fb.h"
+#include "exynos_drm_encoder.h"
+#include "exynos_drm_buf.h"
+
+#define to_exynos_crtc(x)	container_of(x, struct exynos_drm_crtc,\
+				drm_crtc)
+
+/*
+ * Exynos specific crtc postion structure.
+ *
+ * @fb_x: offset x on a framebuffer to be displyed
+ *	- the unit is screen coordinates.
+ * @fb_y: offset y on a framebuffer to be displayed
+ *	- the unit is screen coordinates.
+ * @crtc_x: offset x on hardware screen.
+ * @crtc_y: offset y on hardware screen.
+ * @crtc_w: width of hardware screen.
+ * @crtc_h: height of hardware screen.
+ */
+struct exynos_drm_crtc_pos {
+	unsigned int fb_x;
+	unsigned int fb_y;
+	unsigned int crtc_x;
+	unsigned int crtc_y;
+	unsigned int crtc_w;
+	unsigned int crtc_h;
+};
+
+/*
+ * Exynos specific crtc structure.
+ *
+ * @drm_crtc: crtc object.
+ * @overlay: contain information common to display controller and hdmi and
+ *	contents of this overlay object would be copied to sub driver size.
+ * @pipe: a crtc index created at load() with a new crtc object creation
+ *	and the crtc object would be set to private->crtc array
+ *	to get a crtc object corresponding to this pipe from private->crtc
+ *	array when irq interrupt occured. the reason of using this pipe is that
+ *	drm framework doesn't support multiple irq yet.
+ *	we can refer to the crtc to current hardware interrupt occured through
+ *	this pipe value.
+ */
+struct exynos_drm_crtc {
+	struct drm_crtc			drm_crtc;
+	struct exynos_drm_overlay	overlay;
+	unsigned int			pipe;
+};
+
+static void exynos_drm_crtc_apply(struct drm_crtc *crtc)
+{
+	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
+	struct exynos_drm_overlay *overlay = &exynos_crtc->overlay;
+
+	exynos_drm_fn_encoder(crtc, overlay,
+			exynos_drm_encoder_crtc_mode_set);
+	exynos_drm_fn_encoder(crtc, NULL, exynos_drm_encoder_crtc_commit);
+}
+
+static int exynos_drm_overlay_update(struct exynos_drm_overlay *overlay,
+				       struct drm_framebuffer *fb,
+				       struct drm_display_mode *mode,
+				       struct exynos_drm_crtc_pos *pos)
+{
+	struct exynos_drm_buf_entry *entry;
+	unsigned int actual_w;
+	unsigned int actual_h;
+
+	entry = exynos_drm_fb_get_buf(fb);
+	if (!entry) {
+		DRM_LOG_KMS("entry is null.\n");
+		return -EFAULT;
+	}
+
+	overlay->paddr = entry->paddr;
+	overlay->vaddr = entry->vaddr;
+
+	DRM_DEBUG_KMS("vaddr = 0x%lx, paddr = 0x%lx\n",
+			(unsigned long)overlay->vaddr,
+			(unsigned long)overlay->paddr);
+
+	actual_w = min((mode->hdisplay - pos->crtc_x), pos->crtc_w);
+	actual_h = min((mode->vdisplay - pos->crtc_y), pos->crtc_h);
+
+	/* set drm framebuffer data. */
+	overlay->fb_x = pos->fb_x;
+	overlay->fb_y = pos->fb_y;
+	overlay->fb_width = fb->width;
+	overlay->fb_height = fb->height;
+	overlay->bpp = fb->bits_per_pixel;
+	overlay->pitch = fb->pitch;
+
+	/* set overlay range to be displayed. */
+	overlay->crtc_x = pos->crtc_x;
+	overlay->crtc_y = pos->crtc_y;
+	overlay->crtc_width = actual_w;
+	overlay->crtc_height = actual_h;
+
+	/* set drm mode data. */
+	overlay->mode_width = mode->hdisplay;
+	overlay->mode_height = mode->vdisplay;
+	overlay->refresh = mode->vrefresh;
+	overlay->scan_flag = mode->flags;
+
+	DRM_DEBUG_KMS("overlay : offset_x/y(%d,%d), width/height(%d,%d)",
+			overlay->crtc_x, overlay->crtc_y,
+			overlay->crtc_width, overlay->crtc_height);
+
+	return 0;
+}
+
+static int exynos_drm_crtc_update(struct drm_crtc *crtc)
+{
+	struct exynos_drm_crtc *exynos_crtc;
+	struct exynos_drm_overlay *overlay;
+	struct exynos_drm_crtc_pos pos;
+	struct drm_display_mode *mode = &crtc->mode;
+	struct drm_framebuffer *fb = crtc->fb;
+
+	if (!mode || !fb)
+		return -EINVAL;
+
+	exynos_crtc = to_exynos_crtc(crtc);
+	overlay = &exynos_crtc->overlay;
+
+	memset(&pos, 0, sizeof(struct exynos_drm_crtc_pos));
+
+	/* it means the offset of framebuffer to be displayed. */
+	pos.fb_x = crtc->x;
+	pos.fb_y = crtc->y;
+
+	/* OSD position to be displayed. */
+	pos.crtc_x = 0;
+	pos.crtc_y = 0;
+	pos.crtc_w = fb->width - crtc->x;
+	pos.crtc_h = fb->height - crtc->y;
+
+	return exynos_drm_overlay_update(overlay, crtc->fb, mode, &pos);
+}
+
+static void exynos_drm_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO */
+}
+
+static void exynos_drm_crtc_prepare(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* drm framework doesn't check NULL. */
+}
+
+static void exynos_drm_crtc_commit(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* drm framework doesn't check NULL. */
+}
+
+static bool
+exynos_drm_crtc_mode_fixup(struct drm_crtc *crtc,
+			    struct drm_display_mode *mode,
+			    struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* drm framework doesn't check NULL */
+	return true;
+}
+
+static int
+exynos_drm_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode,
+			  struct drm_display_mode *adjusted_mode, int x, int y,
+			  struct drm_framebuffer *old_fb)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mode = adjusted_mode;
+
+	return exynos_drm_crtc_update(crtc);
+}
+
+static int exynos_drm_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
+					  struct drm_framebuffer *old_fb)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	ret = exynos_drm_crtc_update(crtc);
+	if (ret)
+		return ret;
+
+	exynos_drm_crtc_apply(crtc);
+
+	return ret;
+}
+
+static void exynos_drm_crtc_load_lut(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+	/* drm framework doesn't check NULL */
+}
+
+static struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = {
+	.dpms		= exynos_drm_crtc_dpms,
+	.prepare	= exynos_drm_crtc_prepare,
+	.commit		= exynos_drm_crtc_commit,
+	.mode_fixup	= exynos_drm_crtc_mode_fixup,
+	.mode_set	= exynos_drm_crtc_mode_set,
+	.mode_set_base	= exynos_drm_crtc_mode_set_base,
+	.load_lut	= exynos_drm_crtc_load_lut,
+};
+
+static int exynos_drm_crtc_page_flip(struct drm_crtc *crtc,
+				      struct drm_framebuffer *fb,
+				      struct drm_pending_vblank_event *event)
+{
+	struct drm_device *dev = crtc->dev;
+	struct exynos_drm_private *dev_priv = dev->dev_private;
+	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
+	struct drm_framebuffer *old_fb = crtc->fb;
+	int ret = -EINVAL;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mutex_lock(&dev->struct_mutex);
+
+	if (event) {
+		/*
+		 * the pipe from user always is 0 so we can set pipe number
+		 * of current owner to event.
+		 */
+		event->pipe = exynos_crtc->pipe;
+
+		list_add_tail(&event->base.link,
+				&dev_priv->pageflip_event_list);
+
+		ret = drm_vblank_get(dev, exynos_crtc->pipe);
+		if (ret) {
+			DRM_DEBUG("failed to acquire vblank counter\n");
+			list_del(&event->base.link);
+
+			goto out;
+		}
+
+		crtc->fb = fb;
+		ret = exynos_drm_crtc_update(crtc);
+		if (ret) {
+			crtc->fb = old_fb;
+			drm_vblank_put(dev, exynos_crtc->pipe);
+			list_del(&event->base.link);
+
+			goto out;
+		}
+
+		/*
+		 * the values related to a buffer of the drm framebuffer
+		 * to be applied should be set at here. because these values
+		 * first, are set to shadow registers and then to
+		 * real registers at vsync front porch period.
+		 */
+		exynos_drm_crtc_apply(crtc);
+	}
+out:
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
+
+static void exynos_drm_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
+	struct exynos_drm_private *private = crtc->dev->dev_private;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	private->crtc[exynos_crtc->pipe] = NULL;
+
+	drm_crtc_cleanup(crtc);
+	kfree(exynos_crtc);
+}
+
+static struct drm_crtc_funcs exynos_crtc_funcs = {
+	.set_config	= drm_crtc_helper_set_config,
+	.page_flip	= exynos_drm_crtc_page_flip,
+	.destroy	= exynos_drm_crtc_destroy,
+};
+
+struct exynos_drm_overlay *get_exynos_drm_overlay(struct drm_device *dev,
+		struct drm_crtc *crtc)
+{
+	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
+
+	return &exynos_crtc->overlay;
+}
+
+int exynos_drm_crtc_create(struct drm_device *dev, unsigned int nr)
+{
+	struct exynos_drm_crtc *exynos_crtc;
+	struct exynos_drm_private *private = dev->dev_private;
+	struct drm_crtc *crtc;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_crtc = kzalloc(sizeof(*exynos_crtc), GFP_KERNEL);
+	if (!exynos_crtc) {
+		DRM_ERROR("failed to allocate exynos crtc\n");
+		return -ENOMEM;
+	}
+
+	exynos_crtc->pipe = nr;
+	crtc = &exynos_crtc->drm_crtc;
+
+	private->crtc[nr] = crtc;
+
+	drm_crtc_init(dev, crtc, &exynos_crtc_funcs);
+	drm_crtc_helper_add(crtc, &exynos_crtc_helper_funcs);
+
+	return 0;
+}
+
+int exynos_drm_crtc_enable_vblank(struct drm_device *dev, int crtc)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_drm_fn_encoder(private->crtc[crtc], &crtc,
+			exynos_drm_enable_vblank);
+
+	return 0;
+}
+
+void exynos_drm_crtc_disable_vblank(struct drm_device *dev, int crtc)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_drm_fn_encoder(private->crtc[crtc], &crtc,
+			exynos_drm_disable_vblank);
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM CRTC Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.h b/drivers/gpu/drm/exynos/exynos_drm_crtc.h
new file mode 100644
index 000000000000..c584042d6d2c
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.h
@@ -0,0 +1,38 @@
+/* exynos_drm_crtc.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_CRTC_H_
+#define _EXYNOS_DRM_CRTC_H_
+
+struct exynos_drm_overlay *get_exynos_drm_overlay(struct drm_device *dev,
+		struct drm_crtc *crtc);
+int exynos_drm_crtc_create(struct drm_device *dev, unsigned int nr);
+int exynos_drm_crtc_enable_vblank(struct drm_device *dev, int crtc);
+void exynos_drm_crtc_disable_vblank(struct drm_device *dev, int crtc);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
new file mode 100644
index 000000000000..83810cbe3c17
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm.h"
+
+#include <drm/exynos_drm.h>
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_crtc.h"
+#include "exynos_drm_fbdev.h"
+#include "exynos_drm_fb.h"
+#include "exynos_drm_gem.h"
+
+#define DRIVER_NAME	"exynos-drm"
+#define DRIVER_DESC	"Samsung SoC DRM"
+#define DRIVER_DATE	"20110530"
+#define DRIVER_MAJOR	1
+#define DRIVER_MINOR	0
+
+static int exynos_drm_load(struct drm_device *dev, unsigned long flags)
+{
+	struct exynos_drm_private *private;
+	int ret;
+	int nr;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	private = kzalloc(sizeof(struct exynos_drm_private), GFP_KERNEL);
+	if (!private) {
+		DRM_ERROR("failed to allocate private\n");
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&private->pageflip_event_list);
+	dev->dev_private = (void *)private;
+
+	drm_mode_config_init(dev);
+
+	exynos_drm_mode_config_init(dev);
+
+	/*
+	 * EXYNOS4 is enough to have two CRTCs and each crtc would be used
+	 * without dependency of hardware.
+	 */
+	for (nr = 0; nr < MAX_CRTC; nr++) {
+		ret = exynos_drm_crtc_create(dev, nr);
+		if (ret)
+			goto err_crtc;
+	}
+
+	ret = drm_vblank_init(dev, MAX_CRTC);
+	if (ret)
+		goto err_crtc;
+
+	/*
+	 * probe sub drivers such as display controller and hdmi driver,
+	 * that were registered at probe() of platform driver
+	 * to the sub driver and create encoder and connector for them.
+	 */
+	ret = exynos_drm_device_register(dev);
+	if (ret)
+		goto err_vblank;
+
+	/*
+	 * create and configure fb helper and also exynos specific
+	 * fbdev object.
+	 */
+	ret = exynos_drm_fbdev_init(dev);
+	if (ret) {
+		DRM_ERROR("failed to initialize drm fbdev\n");
+		goto err_drm_device;
+	}
+
+	return 0;
+
+err_drm_device:
+	exynos_drm_device_unregister(dev);
+err_vblank:
+	drm_vblank_cleanup(dev);
+err_crtc:
+	drm_mode_config_cleanup(dev);
+	kfree(private);
+
+	return ret;
+}
+
+static int exynos_drm_unload(struct drm_device *dev)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	exynos_drm_fbdev_fini(dev);
+	exynos_drm_device_unregister(dev);
+	drm_vblank_cleanup(dev);
+	drm_mode_config_cleanup(dev);
+	kfree(dev->dev_private);
+
+	dev->dev_private = NULL;
+
+	return 0;
+}
+
+static void exynos_drm_preclose(struct drm_device *dev,
+					struct drm_file *file_priv)
+{
+	struct exynos_drm_private *dev_priv = dev->dev_private;
+
+	/*
+	 * drm framework frees all events at release time,
+	 * so private event list should be cleared.
+	 */
+	if (!list_empty(&dev_priv->pageflip_event_list))
+		INIT_LIST_HEAD(&dev_priv->pageflip_event_list);
+}
+
+static void exynos_drm_lastclose(struct drm_device *dev)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	exynos_drm_fbdev_restore_mode(dev);
+}
+
+static struct vm_operations_struct exynos_drm_gem_vm_ops = {
+	.fault = exynos_drm_gem_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+static struct drm_ioctl_desc exynos_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_CREATE, exynos_drm_gem_create_ioctl,
+			DRM_UNLOCKED | DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_MAP_OFFSET,
+			exynos_drm_gem_map_offset_ioctl, DRM_UNLOCKED |
+			DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_MMAP,
+			exynos_drm_gem_mmap_ioctl, DRM_UNLOCKED | DRM_AUTH),
+};
+
+static struct drm_driver exynos_drm_driver = {
+	.driver_features	= DRIVER_HAVE_IRQ | DRIVER_BUS_PLATFORM |
+				  DRIVER_MODESET | DRIVER_GEM,
+	.load			= exynos_drm_load,
+	.unload			= exynos_drm_unload,
+	.preclose		= exynos_drm_preclose,
+	.lastclose		= exynos_drm_lastclose,
+	.get_vblank_counter	= drm_vblank_count,
+	.enable_vblank		= exynos_drm_crtc_enable_vblank,
+	.disable_vblank		= exynos_drm_crtc_disable_vblank,
+	.gem_init_object	= exynos_drm_gem_init_object,
+	.gem_free_object	= exynos_drm_gem_free_object,
+	.gem_vm_ops		= &exynos_drm_gem_vm_ops,
+	.dumb_create		= exynos_drm_gem_dumb_create,
+	.dumb_map_offset	= exynos_drm_gem_dumb_map_offset,
+	.dumb_destroy		= exynos_drm_gem_dumb_destroy,
+	.ioctls			= exynos_ioctls,
+	.fops = {
+		.owner		= THIS_MODULE,
+		.open		= drm_open,
+		.mmap		= exynos_drm_gem_mmap,
+		.poll		= drm_poll,
+		.read		= drm_read,
+		.unlocked_ioctl	= drm_ioctl,
+		.release	= drm_release,
+	},
+	.name	= DRIVER_NAME,
+	.desc	= DRIVER_DESC,
+	.date	= DRIVER_DATE,
+	.major	= DRIVER_MAJOR,
+	.minor	= DRIVER_MINOR,
+};
+
+static int exynos_drm_platform_probe(struct platform_device *pdev)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	exynos_drm_driver.num_ioctls = DRM_ARRAY_SIZE(exynos_ioctls);
+
+	return drm_platform_init(&exynos_drm_driver, pdev);
+}
+
+static int exynos_drm_platform_remove(struct platform_device *pdev)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	drm_platform_exit(&exynos_drm_driver, pdev);
+
+	return 0;
+}
+
+static struct platform_driver exynos_drm_platform_driver = {
+	.probe		= exynos_drm_platform_probe,
+	.remove		= __devexit_p(exynos_drm_platform_remove),
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= DRIVER_NAME,
+	},
+};
+
+static int __init exynos_drm_init(void)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	return platform_driver_register(&exynos_drm_platform_driver);
+}
+
+static void __exit exynos_drm_exit(void)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	platform_driver_unregister(&exynos_drm_platform_driver);
+}
+
+module_init(exynos_drm_init);
+module_exit(exynos_drm_exit);
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
new file mode 100644
index 000000000000..c03683f2ae72
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -0,0 +1,254 @@
+/* exynos_drm_drv.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_DRV_H_
+#define _EXYNOS_DRM_DRV_H_
+
+#include "drm.h"
+
+#define MAX_CRTC	2
+
+struct drm_device;
+struct exynos_drm_overlay;
+struct drm_connector;
+
+/* this enumerates display type. */
+enum exynos_drm_output_type {
+	EXYNOS_DISPLAY_TYPE_NONE,
+	/* RGB or CPU Interface. */
+	EXYNOS_DISPLAY_TYPE_LCD,
+	/* HDMI Interface. */
+	EXYNOS_DISPLAY_TYPE_HDMI,
+};
+
+/*
+ * Exynos drm overlay ops structure.
+ *
+ * @mode_set: copy drm overlay info to hw specific overlay info.
+ * @commit: apply hardware specific overlay data to registers.
+ * @disable: disable hardware specific overlay.
+ */
+struct exynos_drm_overlay_ops {
+	void (*mode_set)(struct device *subdrv_dev,
+			 struct exynos_drm_overlay *overlay);
+	void (*commit)(struct device *subdrv_dev);
+	void (*disable)(struct device *subdrv_dev);
+};
+
+/*
+ * Exynos drm common overlay structure.
+ *
+ * @fb_x: offset x on a framebuffer to be displayed.
+ *	- the unit is screen coordinates.
+ * @fb_y: offset y on a framebuffer to be displayed.
+ *	- the unit is screen coordinates.
+ * @fb_width: width of a framebuffer.
+ * @fb_height: height of a framebuffer.
+ * @crtc_x: offset x on hardware screen.
+ * @crtc_y: offset y on hardware screen.
+ * @crtc_width: window width to be displayed (hardware screen).
+ * @crtc_height: window height to be displayed (hardware screen).
+ * @mode_width: width of screen mode.
+ * @mode_height: height of screen mode.
+ * @refresh: refresh rate.
+ * @scan_flag: interlace or progressive way.
+ *	(it could be DRM_MODE_FLAG_*)
+ * @bpp: pixel size.(in bit)
+ * @paddr: bus(accessed by dma) physical memory address to this overlay
+ *		and this is physically continuous.
+ * @vaddr: virtual memory addresss to this overlay.
+ * @default_win: a window to be enabled.
+ * @color_key: color key on or off.
+ * @index_color: if using color key feature then this value would be used
+ *			as index color.
+ * @local_path: in case of lcd type, local path mode on or off.
+ * @transparency: transparency on or off.
+ * @activated: activated or not.
+ *
+ * this structure is common to exynos SoC and its contents would be copied
+ * to hardware specific overlay info.
+ */
+struct exynos_drm_overlay {
+	unsigned int fb_x;
+	unsigned int fb_y;
+	unsigned int fb_width;
+	unsigned int fb_height;
+	unsigned int crtc_x;
+	unsigned int crtc_y;
+	unsigned int crtc_width;
+	unsigned int crtc_height;
+	unsigned int mode_width;
+	unsigned int mode_height;
+	unsigned int refresh;
+	unsigned int scan_flag;
+	unsigned int bpp;
+	unsigned int pitch;
+	dma_addr_t paddr;
+	void __iomem *vaddr;
+
+	bool default_win;
+	bool color_key;
+	unsigned int index_color;
+	bool local_path;
+	bool transparency;
+	bool activated;
+};
+
+/*
+ * Exynos DRM Display Structure.
+ *	- this structure is common to analog tv, digital tv and lcd panel.
+ *
+ * @type: one of EXYNOS_DISPLAY_TYPE_LCD and HDMI.
+ * @is_connected: check for that display is connected or not.
+ * @get_edid: get edid modes from display driver.
+ * @get_timing: get timing object from display driver.
+ * @check_timing: check if timing is valid or not.
+ * @power_on: display device on or off.
+ */
+struct exynos_drm_display {
+	enum exynos_drm_output_type type;
+	bool (*is_connected)(struct device *dev);
+	int (*get_edid)(struct device *dev, struct drm_connector *connector,
+				u8 *edid, int len);
+	void *(*get_timing)(struct device *dev);
+	int (*check_timing)(struct device *dev, void *timing);
+	int (*power_on)(struct device *dev, int mode);
+};
+
+/*
+ * Exynos drm manager ops
+ *
+ * @mode_set: convert drm_display_mode to hw specific display mode and
+ *	      would be called by encoder->mode_set().
+ * @commit: set current hw specific display mode to hw.
+ * @enable_vblank: specific driver callback for enabling vblank interrupt.
+ * @disable_vblank: specific driver callback for disabling vblank interrupt.
+ */
+struct exynos_drm_manager_ops {
+	void (*mode_set)(struct device *subdrv_dev, void *mode);
+	void (*commit)(struct device *subdrv_dev);
+	int (*enable_vblank)(struct device *subdrv_dev);
+	void (*disable_vblank)(struct device *subdrv_dev);
+};
+
+/*
+ * Exynos drm common manager structure.
+ *
+ * @dev: pointer to device object for subdrv device driver.
+ *	sub drivers such as display controller or hdmi driver,
+ *	have their own device object.
+ * @ops: pointer to callbacks for exynos drm specific framebuffer.
+ *	these callbacks should be set by specific drivers such fimd
+ *	or hdmi driver and are used to control hardware global registers.
+ * @overlay_ops: pointer to callbacks for exynos drm specific framebuffer.
+ *	these callbacks should be set by specific drivers such fimd
+ *	or hdmi driver and are used to control hardware overlay reigsters.
+ * @display: pointer to callbacks for exynos drm specific framebuffer.
+ *	these callbacks should be set by specific drivers such fimd
+ *	or hdmi driver and are used to control display devices such as
+ *	analog tv, digital tv and lcd panel and also get timing data for them.
+ */
+struct exynos_drm_manager {
+	struct device *dev;
+	int pipe;
+	struct exynos_drm_manager_ops *ops;
+	struct exynos_drm_overlay_ops *overlay_ops;
+	struct exynos_drm_display *display;
+};
+
+/*
+ * Exynos drm private structure.
+ */
+struct exynos_drm_private {
+	struct drm_fb_helper *fb_helper;
+
+	/* list head for new event to be added. */
+	struct list_head pageflip_event_list;
+
+	/*
+	 * created crtc object would be contained at this array and
+	 * this array is used to be aware of which crtc did it request vblank.
+	 */
+	struct drm_crtc *crtc[MAX_CRTC];
+};
+
+/*
+ * Exynos drm sub driver structure.
+ *
+ * @list: sub driver has its own list object to register to exynos drm driver.
+ * @drm_dev: pointer to drm_device and this pointer would be set
+ *	when sub driver calls exynos_drm_subdrv_register().
+ * @probe: this callback would be called by exynos drm driver after
+ *	subdrv is registered to it.
+ * @remove: this callback is used to release resources created
+ *	by probe callback.
+ * @manager: subdrv has its own manager to control a hardware appropriately
+ *	and we can access a hardware drawing on this manager.
+ * @encoder: encoder object owned by this sub driver.
+ * @connector: connector object owned by this sub driver.
+ */
+struct exynos_drm_subdrv {
+	struct list_head list;
+	struct drm_device *drm_dev;
+
+	int (*probe)(struct drm_device *drm_dev, struct device *dev);
+	void (*remove)(struct drm_device *dev);
+
+	struct exynos_drm_manager manager;
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+};
+
+/*
+ * this function calls a probe callback registered to sub driver list and
+ * create its own encoder and connector and then set drm_device object
+ * to global one.
+ */
+int exynos_drm_device_register(struct drm_device *dev);
+/*
+ * this function calls a remove callback registered to sub driver list and
+ * destroy its own encoder and connetor.
+ */
+int exynos_drm_device_unregister(struct drm_device *dev);
+
+/*
+ * this function would be called by sub drivers such as display controller
+ * or hdmi driver to register this sub driver object to exynos drm driver
+ * and when a sub driver is registered to exynos drm driver a probe callback
+ * of the sub driver is called and creates its own encoder and connector
+ * and then fb helper and drm mode group would be re-initialized.
+ */
+int exynos_drm_subdrv_register(struct exynos_drm_subdrv *drm_subdrv);
+
+/*
+ * this function removes subdrv list from exynos drm driver and fb helper
+ * and drm mode group would be re-initialized.
+ */
+int exynos_drm_subdrv_unregister(struct exynos_drm_subdrv *drm_subdrv);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_encoder.c b/drivers/gpu/drm/exynos/exynos_drm_encoder.c
new file mode 100644
index 000000000000..7cf6fa86a67e
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_encoder.c
@@ -0,0 +1,271 @@
+/* exynos_drm_encoder.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_crtc_helper.h"
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_crtc.h"
+#include "exynos_drm_encoder.h"
+
+#define to_exynos_encoder(x)	container_of(x, struct exynos_drm_encoder,\
+				drm_encoder)
+
+/*
+ * exynos specific encoder structure.
+ *
+ * @drm_encoder: encoder object.
+ * @manager: specific encoder has its own manager to control a hardware
+ *	appropriately and we can access a hardware drawing on this manager.
+ */
+struct exynos_drm_encoder {
+	struct drm_encoder		drm_encoder;
+	struct exynos_drm_manager	*manager;
+};
+
+static void exynos_drm_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_connector *connector;
+	struct exynos_drm_manager *manager = exynos_drm_get_manager(encoder);
+
+	DRM_DEBUG_KMS("%s, encoder dpms: %d\n", __FILE__, mode);
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder) {
+			struct exynos_drm_display *display = manager->display;
+
+			if (display && display->power_on)
+				display->power_on(manager->dev, mode);
+		}
+	}
+}
+
+static bool
+exynos_drm_encoder_mode_fixup(struct drm_encoder *encoder,
+			       struct drm_display_mode *mode,
+			       struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* drm framework doesn't check NULL. */
+
+	return true;
+}
+
+static void exynos_drm_encoder_mode_set(struct drm_encoder *encoder,
+					 struct drm_display_mode *mode,
+					 struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_connector *connector;
+	struct exynos_drm_manager *manager = exynos_drm_get_manager(encoder);
+	struct exynos_drm_manager_ops *manager_ops = manager->ops;
+	struct exynos_drm_overlay_ops *overlay_ops = manager->overlay_ops;
+	struct exynos_drm_overlay *overlay = get_exynos_drm_overlay(dev,
+						encoder->crtc);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mode = adjusted_mode;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder) {
+			if (manager_ops && manager_ops->mode_set)
+				manager_ops->mode_set(manager->dev, mode);
+
+			if (overlay_ops && overlay_ops->mode_set)
+				overlay_ops->mode_set(manager->dev, overlay);
+		}
+	}
+}
+
+static void exynos_drm_encoder_prepare(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* drm framework doesn't check NULL. */
+}
+
+static void exynos_drm_encoder_commit(struct drm_encoder *encoder)
+{
+	struct exynos_drm_manager *manager = exynos_drm_get_manager(encoder);
+	struct exynos_drm_manager_ops *manager_ops = manager->ops;
+	struct exynos_drm_overlay_ops *overlay_ops = manager->overlay_ops;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (manager_ops && manager_ops->commit)
+		manager_ops->commit(manager->dev);
+
+	if (overlay_ops && overlay_ops->commit)
+		overlay_ops->commit(manager->dev);
+}
+
+static struct drm_crtc *
+exynos_drm_encoder_get_crtc(struct drm_encoder *encoder)
+{
+	return encoder->crtc;
+}
+
+static struct drm_encoder_helper_funcs exynos_encoder_helper_funcs = {
+	.dpms		= exynos_drm_encoder_dpms,
+	.mode_fixup	= exynos_drm_encoder_mode_fixup,
+	.mode_set	= exynos_drm_encoder_mode_set,
+	.prepare	= exynos_drm_encoder_prepare,
+	.commit		= exynos_drm_encoder_commit,
+	.get_crtc	= exynos_drm_encoder_get_crtc,
+};
+
+static void exynos_drm_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct exynos_drm_encoder *exynos_encoder =
+		to_exynos_encoder(encoder);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_encoder->manager->pipe = -1;
+
+	drm_encoder_cleanup(encoder);
+	encoder->dev->mode_config.num_encoder--;
+	kfree(exynos_encoder);
+}
+
+static struct drm_encoder_funcs exynos_encoder_funcs = {
+	.destroy = exynos_drm_encoder_destroy,
+};
+
+struct drm_encoder *
+exynos_drm_encoder_create(struct drm_device *dev,
+			   struct exynos_drm_manager *manager,
+			   unsigned int possible_crtcs)
+{
+	struct drm_encoder *encoder;
+	struct exynos_drm_encoder *exynos_encoder;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!manager || !possible_crtcs)
+		return NULL;
+
+	if (!manager->dev)
+		return NULL;
+
+	exynos_encoder = kzalloc(sizeof(*exynos_encoder), GFP_KERNEL);
+	if (!exynos_encoder) {
+		DRM_ERROR("failed to allocate encoder\n");
+		return NULL;
+	}
+
+	exynos_encoder->manager = manager;
+	encoder = &exynos_encoder->drm_encoder;
+	encoder->possible_crtcs = possible_crtcs;
+
+	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
+
+	drm_encoder_init(dev, encoder, &exynos_encoder_funcs,
+			DRM_MODE_ENCODER_TMDS);
+
+	drm_encoder_helper_add(encoder, &exynos_encoder_helper_funcs);
+
+	DRM_DEBUG_KMS("encoder has been created\n");
+
+	return encoder;
+}
+
+struct exynos_drm_manager *exynos_drm_get_manager(struct drm_encoder *encoder)
+{
+	return to_exynos_encoder(encoder)->manager;
+}
+
+void exynos_drm_fn_encoder(struct drm_crtc *crtc, void *data,
+			    void (*fn)(struct drm_encoder *, void *))
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_encoder *encoder;
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		if (encoder->crtc != crtc)
+			continue;
+
+		fn(encoder, data);
+	}
+}
+
+void exynos_drm_enable_vblank(struct drm_encoder *encoder, void *data)
+{
+	struct exynos_drm_manager *manager =
+		to_exynos_encoder(encoder)->manager;
+	struct exynos_drm_manager_ops *manager_ops = manager->ops;
+	int crtc = *(int *)data;
+
+	if (manager->pipe == -1)
+		manager->pipe = crtc;
+
+	if (manager_ops->enable_vblank)
+		manager_ops->enable_vblank(manager->dev);
+}
+
+void exynos_drm_disable_vblank(struct drm_encoder *encoder, void *data)
+{
+	struct exynos_drm_manager *manager =
+		to_exynos_encoder(encoder)->manager;
+	struct exynos_drm_manager_ops *manager_ops = manager->ops;
+	int crtc = *(int *)data;
+
+	if (manager->pipe == -1)
+		manager->pipe = crtc;
+
+	if (manager_ops->disable_vblank)
+		manager_ops->disable_vblank(manager->dev);
+}
+
+void exynos_drm_encoder_crtc_commit(struct drm_encoder *encoder, void *data)
+{
+	struct exynos_drm_manager *manager =
+		to_exynos_encoder(encoder)->manager;
+	struct exynos_drm_overlay_ops *overlay_ops = manager->overlay_ops;
+
+	overlay_ops->commit(manager->dev);
+}
+
+void exynos_drm_encoder_crtc_mode_set(struct drm_encoder *encoder, void *data)
+{
+	struct exynos_drm_manager *manager =
+		to_exynos_encoder(encoder)->manager;
+	struct exynos_drm_overlay_ops *overlay_ops = manager->overlay_ops;
+	struct exynos_drm_overlay *overlay = data;
+
+	overlay_ops->mode_set(manager->dev, overlay);
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM Encoder Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_encoder.h b/drivers/gpu/drm/exynos/exynos_drm_encoder.h
new file mode 100644
index 000000000000..5ecd645d06a9
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_encoder.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_ENCODER_H_
+#define _EXYNOS_DRM_ENCODER_H_
+
+struct exynos_drm_manager;
+
+struct drm_encoder *exynos_drm_encoder_create(struct drm_device *dev,
+					       struct exynos_drm_manager *mgr,
+					       unsigned int possible_crtcs);
+struct exynos_drm_manager *
+exynos_drm_get_manager(struct drm_encoder *encoder);
+void exynos_drm_fn_encoder(struct drm_crtc *crtc, void *data,
+			    void (*fn)(struct drm_encoder *, void *));
+void exynos_drm_enable_vblank(struct drm_encoder *encoder, void *data);
+void exynos_drm_disable_vblank(struct drm_encoder *encoder, void *data);
+void exynos_drm_encoder_crtc_commit(struct drm_encoder *encoder, void *data);
+void exynos_drm_encoder_crtc_mode_set(struct drm_encoder *encoder, void *data);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c
new file mode 100644
index 000000000000..48d29cfd5240
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -0,0 +1,265 @@
+/* exynos_drm_fb.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_crtc.h"
+#include "drm_crtc_helper.h"
+
+#include "exynos_drm_fb.h"
+#include "exynos_drm_buf.h"
+#include "exynos_drm_gem.h"
+
+#define to_exynos_fb(x)	container_of(x, struct exynos_drm_fb, fb)
+
+/*
+ * exynos specific framebuffer structure.
+ *
+ * @fb: drm framebuffer obejct.
+ * @exynos_gem_obj: exynos specific gem object containing a gem object.
+ * @entry: pointer to exynos drm buffer entry object.
+ *	- containing only the information to physically continuous memory
+ *	region allocated at default framebuffer creation.
+ */
+struct exynos_drm_fb {
+	struct drm_framebuffer		fb;
+	struct exynos_drm_gem_obj	*exynos_gem_obj;
+	struct exynos_drm_buf_entry	*entry;
+};
+
+static void exynos_drm_fb_destroy(struct drm_framebuffer *fb)
+{
+	struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	drm_framebuffer_cleanup(fb);
+
+	/*
+	 * default framebuffer has no gem object so
+	 * a buffer of the default framebuffer should be released at here.
+	 */
+	if (!exynos_fb->exynos_gem_obj && exynos_fb->entry)
+		exynos_drm_buf_destroy(fb->dev, exynos_fb->entry);
+
+	kfree(exynos_fb);
+	exynos_fb = NULL;
+}
+
+static int exynos_drm_fb_create_handle(struct drm_framebuffer *fb,
+					struct drm_file *file_priv,
+					unsigned int *handle)
+{
+	struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return drm_gem_handle_create(file_priv,
+			&exynos_fb->exynos_gem_obj->base, handle);
+}
+
+static int exynos_drm_fb_dirty(struct drm_framebuffer *fb,
+				struct drm_file *file_priv, unsigned flags,
+				unsigned color, struct drm_clip_rect *clips,
+				unsigned num_clips)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO */
+
+	return 0;
+}
+
+static struct drm_framebuffer_funcs exynos_drm_fb_funcs = {
+	.destroy	= exynos_drm_fb_destroy,
+	.create_handle	= exynos_drm_fb_create_handle,
+	.dirty		= exynos_drm_fb_dirty,
+};
+
+static struct drm_framebuffer *
+exynos_drm_fb_init(struct drm_file *file_priv, struct drm_device *dev,
+		    struct drm_mode_fb_cmd *mode_cmd)
+{
+	struct exynos_drm_fb *exynos_fb;
+	struct drm_framebuffer *fb;
+	struct exynos_drm_gem_obj *exynos_gem_obj = NULL;
+	struct drm_gem_object *obj;
+	unsigned int size;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mode_cmd->pitch = max(mode_cmd->pitch,
+			mode_cmd->width * (mode_cmd->bpp >> 3));
+
+	DRM_LOG_KMS("drm fb create(%dx%d)\n",
+			mode_cmd->width, mode_cmd->height);
+
+	exynos_fb = kzalloc(sizeof(*exynos_fb), GFP_KERNEL);
+	if (!exynos_fb) {
+		DRM_ERROR("failed to allocate exynos drm framebuffer.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	fb = &exynos_fb->fb;
+	ret = drm_framebuffer_init(dev, fb, &exynos_drm_fb_funcs);
+	if (ret) {
+		DRM_ERROR("failed to initialize framebuffer.\n");
+		goto err_init;
+	}
+
+	DRM_LOG_KMS("create: fb id: %d\n", fb->base.id);
+
+	size = mode_cmd->pitch * mode_cmd->height;
+
+	/*
+	 * mode_cmd->handle could be NULL at booting time or
+	 * with user request. if NULL, a new buffer or a gem object
+	 * would be allocated.
+	 */
+	if (!mode_cmd->handle) {
+		if (!file_priv) {
+			struct exynos_drm_buf_entry *entry;
+
+			/*
+			 * in case that file_priv is NULL, it allocates
+			 * only buffer and this buffer would be used
+			 * for default framebuffer.
+			 */
+			entry = exynos_drm_buf_create(dev, size);
+			if (IS_ERR(entry)) {
+				ret = PTR_ERR(entry);
+				goto err_buffer;
+			}
+
+			exynos_fb->entry = entry;
+
+			DRM_LOG_KMS("default fb: paddr = 0x%lx, size = 0x%x\n",
+					(unsigned long)entry->paddr, size);
+
+			goto out;
+		} else {
+			exynos_gem_obj = exynos_drm_gem_create(file_priv, dev,
+							size,
+							&mode_cmd->handle);
+			if (IS_ERR(exynos_gem_obj)) {
+				ret = PTR_ERR(exynos_gem_obj);
+				goto err_buffer;
+			}
+		}
+	} else {
+		obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handle);
+		if (!obj) {
+			DRM_ERROR("failed to lookup gem object.\n");
+			goto err_buffer;
+		}
+
+		exynos_gem_obj = to_exynos_gem_obj(obj);
+
+		drm_gem_object_unreference_unlocked(obj);
+	}
+
+	/*
+	 * if got a exynos_gem_obj from either a handle or
+	 * a new creation then exynos_fb->exynos_gem_obj is NULL
+	 * so that default framebuffer has no its own gem object,
+	 * only its own buffer object.
+	 */
+	exynos_fb->entry = exynos_gem_obj->entry;
+
+	DRM_LOG_KMS("paddr = 0x%lx, size = 0x%x, gem object = 0x%x\n",
+			(unsigned long)exynos_fb->entry->paddr, size,
+			(unsigned int)&exynos_gem_obj->base);
+
+out:
+	exynos_fb->exynos_gem_obj = exynos_gem_obj;
+
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+
+	return fb;
+
+err_buffer:
+	drm_framebuffer_cleanup(fb);
+
+err_init:
+	kfree(exynos_fb);
+
+	return ERR_PTR(ret);
+}
+
+struct drm_framebuffer *exynos_drm_fb_create(struct drm_device *dev,
+					      struct drm_file *file_priv,
+					      struct drm_mode_fb_cmd *mode_cmd)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return exynos_drm_fb_init(file_priv, dev, mode_cmd);
+}
+
+struct exynos_drm_buf_entry *exynos_drm_fb_get_buf(struct drm_framebuffer *fb)
+{
+	struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
+	struct exynos_drm_buf_entry *entry;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	entry = exynos_fb->entry;
+	if (!entry)
+		return NULL;
+
+	DRM_DEBUG_KMS("vaddr = 0x%lx, paddr = 0x%lx\n",
+			(unsigned long)entry->vaddr,
+			(unsigned long)entry->paddr);
+
+	return entry;
+}
+
+static struct drm_mode_config_funcs exynos_drm_mode_config_funcs = {
+	.fb_create = exynos_drm_fb_create,
+};
+
+void exynos_drm_mode_config_init(struct drm_device *dev)
+{
+	dev->mode_config.min_width = 0;
+	dev->mode_config.min_height = 0;
+
+	/*
+	 * set max width and height as default value(4096x4096).
+	 * this value would be used to check framebuffer size limitation
+	 * at drm_mode_addfb().
+	 */
+	dev->mode_config.max_width = 4096;
+	dev->mode_config.max_height = 4096;
+
+	dev->mode_config.funcs = &exynos_drm_mode_config_funcs;
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM FB Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.h b/drivers/gpu/drm/exynos/exynos_drm_fb.h
new file mode 100644
index 000000000000..eb35931d302c
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_FB_H_
+#define _EXYNOS_DRM_FB_H
+
+struct drm_framebuffer *exynos_drm_fb_create(struct drm_device *dev,
+					      struct drm_file *filp,
+					      struct drm_mode_fb_cmd *mode_cmd);
+
+void exynos_drm_mode_config_init(struct drm_device *dev);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
new file mode 100644
index 000000000000..1f4b3d1a7713
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -0,0 +1,456 @@
+/* exynos_drm_fbdev.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_crtc.h"
+#include "drm_fb_helper.h"
+#include "drm_crtc_helper.h"
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_fb.h"
+#include "exynos_drm_buf.h"
+
+#define MAX_CONNECTOR		4
+#define PREFERRED_BPP		32
+
+#define to_exynos_fbdev(x)	container_of(x, struct exynos_drm_fbdev,\
+				drm_fb_helper)
+
+struct exynos_drm_fbdev {
+	struct drm_fb_helper	drm_fb_helper;
+	struct drm_framebuffer	*fb;
+};
+
+static int exynos_drm_fbdev_set_par(struct fb_info *info)
+{
+	struct fb_var_screeninfo *var = &info->var;
+
+	switch (var->bits_per_pixel) {
+	case 32:
+	case 24:
+	case 18:
+	case 16:
+	case 12:
+		info->fix.visual = FB_VISUAL_TRUECOLOR;
+		break;
+	case 1:
+		info->fix.visual = FB_VISUAL_MONO01;
+		break;
+	default:
+		info->fix.visual = FB_VISUAL_PSEUDOCOLOR;
+		break;
+	}
+
+	info->fix.line_length = (var->xres_virtual * var->bits_per_pixel) / 8;
+
+	return drm_fb_helper_set_par(info);
+}
+
+
+static struct fb_ops exynos_drm_fb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
+	.fb_check_var	= drm_fb_helper_check_var,
+	.fb_set_par	= exynos_drm_fbdev_set_par,
+	.fb_blank	= drm_fb_helper_blank,
+	.fb_pan_display	= drm_fb_helper_pan_display,
+	.fb_setcmap	= drm_fb_helper_setcmap,
+};
+
+static int exynos_drm_fbdev_update(struct drm_fb_helper *helper,
+				     struct drm_framebuffer *fb,
+				     unsigned int fb_width,
+				     unsigned int fb_height)
+{
+	struct fb_info *fbi = helper->fbdev;
+	struct drm_device *dev = helper->dev;
+	struct exynos_drm_fbdev *exynos_fb = to_exynos_fbdev(helper);
+	struct exynos_drm_buf_entry *entry;
+	unsigned int size = fb_width * fb_height * (fb->bits_per_pixel >> 3);
+	unsigned long offset;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_fb->fb = fb;
+
+	drm_fb_helper_fill_fix(fbi, fb->pitch, fb->depth);
+	drm_fb_helper_fill_var(fbi, helper, fb_width, fb_height);
+
+	entry = exynos_drm_fb_get_buf(fb);
+	if (!entry) {
+		DRM_LOG_KMS("entry is null.\n");
+		return -EFAULT;
+	}
+
+	offset = fbi->var.xoffset * (fb->bits_per_pixel >> 3);
+	offset += fbi->var.yoffset * fb->pitch;
+
+	dev->mode_config.fb_base = entry->paddr;
+	fbi->screen_base = entry->vaddr + offset;
+	fbi->fix.smem_start = entry->paddr + offset;
+	fbi->screen_size = size;
+	fbi->fix.smem_len = size;
+
+	return 0;
+}
+
+static int exynos_drm_fbdev_create(struct drm_fb_helper *helper,
+				    struct drm_fb_helper_surface_size *sizes)
+{
+	struct exynos_drm_fbdev *exynos_fbdev = to_exynos_fbdev(helper);
+	struct drm_device *dev = helper->dev;
+	struct fb_info *fbi;
+	struct drm_mode_fb_cmd mode_cmd = { 0 };
+	struct platform_device *pdev = dev->platformdev;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	DRM_DEBUG_KMS("surface width(%d), height(%d) and bpp(%d\n",
+			sizes->surface_width, sizes->surface_height,
+			sizes->surface_bpp);
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+	mode_cmd.bpp = sizes->surface_bpp;
+	mode_cmd.depth = sizes->surface_depth;
+
+	mutex_lock(&dev->struct_mutex);
+
+	fbi = framebuffer_alloc(0, &pdev->dev);
+	if (!fbi) {
+		DRM_ERROR("failed to allocate fb info.\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	exynos_fbdev->fb = exynos_drm_fb_create(dev, NULL, &mode_cmd);
+	if (IS_ERR_OR_NULL(exynos_fbdev->fb)) {
+		DRM_ERROR("failed to create drm framebuffer.\n");
+		ret = PTR_ERR(exynos_fbdev->fb);
+		goto out;
+	}
+
+	helper->fb = exynos_fbdev->fb;
+	helper->fbdev = fbi;
+
+	fbi->par = helper;
+	fbi->flags = FBINFO_FLAG_DEFAULT;
+	fbi->fbops = &exynos_drm_fb_ops;
+
+	ret = fb_alloc_cmap(&fbi->cmap, 256, 0);
+	if (ret) {
+		DRM_ERROR("failed to allocate cmap.\n");
+		goto out;
+	}
+
+	ret = exynos_drm_fbdev_update(helper, helper->fb, sizes->fb_width,
+			sizes->fb_height);
+	if (ret < 0)
+		fb_dealloc_cmap(&fbi->cmap);
+
+/*
+ * if failed, all resources allocated above would be released by
+ * drm_mode_config_cleanup() when drm_load() had been called prior
+ * to any specific driver such as fimd or hdmi driver.
+ */
+out:
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
+
+static bool
+exynos_drm_fbdev_is_samefb(struct drm_framebuffer *fb,
+			    struct drm_fb_helper_surface_size *sizes)
+{
+	if (fb->width != sizes->surface_width)
+		return false;
+	if (fb->height != sizes->surface_height)
+		return false;
+	if (fb->bits_per_pixel != sizes->surface_bpp)
+		return false;
+	if (fb->depth != sizes->surface_depth)
+		return false;
+
+	return true;
+}
+
+static int exynos_drm_fbdev_recreate(struct drm_fb_helper *helper,
+				      struct drm_fb_helper_surface_size *sizes)
+{
+	struct drm_device *dev = helper->dev;
+	struct exynos_drm_fbdev *exynos_fbdev = to_exynos_fbdev(helper);
+	struct drm_framebuffer *fb = exynos_fbdev->fb;
+	struct drm_mode_fb_cmd mode_cmd = { 0 };
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (helper->fb != fb) {
+		DRM_ERROR("drm framebuffer is different\n");
+		return -EINVAL;
+	}
+
+	if (exynos_drm_fbdev_is_samefb(fb, sizes))
+		return 0;
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+	mode_cmd.bpp = sizes->surface_bpp;
+	mode_cmd.depth = sizes->surface_depth;
+
+	if (fb->funcs->destroy)
+		fb->funcs->destroy(fb);
+
+	exynos_fbdev->fb = exynos_drm_fb_create(dev, NULL, &mode_cmd);
+	if (IS_ERR(exynos_fbdev->fb)) {
+		DRM_ERROR("failed to allocate fb.\n");
+		return PTR_ERR(exynos_fbdev->fb);
+	}
+
+	helper->fb = exynos_fbdev->fb;
+	return exynos_drm_fbdev_update(helper, helper->fb, sizes->fb_width,
+			sizes->fb_height);
+}
+
+static int exynos_drm_fbdev_probe(struct drm_fb_helper *helper,
+				   struct drm_fb_helper_surface_size *sizes)
+{
+	int ret = 0;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!helper->fb) {
+		ret = exynos_drm_fbdev_create(helper, sizes);
+		if (ret < 0) {
+			DRM_ERROR("failed to create fbdev.\n");
+			return ret;
+		}
+
+		/*
+		 * fb_helper expects a value more than 1 if succeed
+		 * because register_framebuffer() should be called.
+		 */
+		ret = 1;
+	} else {
+		ret = exynos_drm_fbdev_recreate(helper, sizes);
+		if (ret < 0) {
+			DRM_ERROR("failed to reconfigure fbdev\n");
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+static struct drm_fb_helper_funcs exynos_drm_fb_helper_funcs = {
+	.fb_probe =	exynos_drm_fbdev_probe,
+};
+
+int exynos_drm_fbdev_init(struct drm_device *dev)
+{
+	struct exynos_drm_fbdev *fbdev;
+	struct exynos_drm_private *private = dev->dev_private;
+	struct drm_fb_helper *helper;
+	unsigned int num_crtc;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!dev->mode_config.num_crtc || !dev->mode_config.num_connector)
+		return 0;
+
+	fbdev = kzalloc(sizeof(*fbdev), GFP_KERNEL);
+	if (!fbdev) {
+		DRM_ERROR("failed to allocate drm fbdev.\n");
+		return -ENOMEM;
+	}
+
+	private->fb_helper = helper = &fbdev->drm_fb_helper;
+	helper->funcs = &exynos_drm_fb_helper_funcs;
+
+	num_crtc = dev->mode_config.num_crtc;
+
+	ret = drm_fb_helper_init(dev, helper, num_crtc, MAX_CONNECTOR);
+	if (ret < 0) {
+		DRM_ERROR("failed to initialize drm fb helper.\n");
+		goto err_init;
+	}
+
+	ret = drm_fb_helper_single_add_all_connectors(helper);
+	if (ret < 0) {
+		DRM_ERROR("failed to register drm_fb_helper_connector.\n");
+		goto err_setup;
+
+	}
+
+	ret = drm_fb_helper_initial_config(helper, PREFERRED_BPP);
+	if (ret < 0) {
+		DRM_ERROR("failed to set up hw configuration.\n");
+		goto err_setup;
+	}
+
+	return 0;
+
+err_setup:
+	drm_fb_helper_fini(helper);
+
+err_init:
+	private->fb_helper = NULL;
+	kfree(fbdev);
+
+	return ret;
+}
+
+static void exynos_drm_fbdev_destroy(struct drm_device *dev,
+				      struct drm_fb_helper *fb_helper)
+{
+	struct drm_framebuffer *fb;
+
+	/* release drm framebuffer and real buffer */
+	if (fb_helper->fb && fb_helper->fb->funcs) {
+		fb = fb_helper->fb;
+		if (fb && fb->funcs->destroy)
+			fb->funcs->destroy(fb);
+	}
+
+	/* release linux framebuffer */
+	if (fb_helper->fbdev) {
+		struct fb_info *info;
+		int ret;
+
+		info = fb_helper->fbdev;
+		ret = unregister_framebuffer(info);
+		if (ret < 0)
+			DRM_DEBUG_KMS("failed unregister_framebuffer()\n");
+
+		if (info->cmap.len)
+			fb_dealloc_cmap(&info->cmap);
+
+		framebuffer_release(info);
+	}
+
+	drm_fb_helper_fini(fb_helper);
+}
+
+void exynos_drm_fbdev_fini(struct drm_device *dev)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+	struct exynos_drm_fbdev *fbdev;
+
+	if (!private || !private->fb_helper)
+		return;
+
+	fbdev = to_exynos_fbdev(private->fb_helper);
+
+	exynos_drm_fbdev_destroy(dev, private->fb_helper);
+	kfree(fbdev);
+	private->fb_helper = NULL;
+}
+
+void exynos_drm_fbdev_restore_mode(struct drm_device *dev)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+
+	if (!private || !private->fb_helper)
+		return;
+
+	drm_fb_helper_restore_fbdev_mode(private->fb_helper);
+}
+
+int exynos_drm_fbdev_reinit(struct drm_device *dev)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+	struct drm_fb_helper *fb_helper;
+	int ret;
+
+	if (!private)
+		return -EINVAL;
+
+	/*
+	 * if all sub drivers were unloaded then num_connector is 0
+	 * so at this time, the framebuffers also should be destroyed.
+	 */
+	if (!dev->mode_config.num_connector) {
+		exynos_drm_fbdev_fini(dev);
+		return 0;
+	}
+
+	fb_helper = private->fb_helper;
+
+	if (fb_helper) {
+		drm_fb_helper_fini(fb_helper);
+
+		ret = drm_fb_helper_init(dev, fb_helper,
+				dev->mode_config.num_crtc, MAX_CONNECTOR);
+		if (ret < 0) {
+			DRM_ERROR("failed to initialize drm fb helper\n");
+			return ret;
+		}
+
+		ret = drm_fb_helper_single_add_all_connectors(fb_helper);
+		if (ret < 0) {
+			DRM_ERROR("failed to add fb helper to connectors\n");
+			goto err;
+		}
+
+		ret = drm_fb_helper_initial_config(fb_helper, PREFERRED_BPP);
+		if (ret < 0) {
+			DRM_ERROR("failed to set up hw configuration.\n");
+			goto err;
+		}
+	} else {
+		/*
+		 * if drm_load() failed whem drm load() was called prior
+		 * to specific drivers, fb_helper must be NULL and so
+		 * this fuction should be called again to re-initialize and
+		 * re-configure the fb helper. it means that this function
+		 * has been called by the specific drivers.
+		 */
+		ret = exynos_drm_fbdev_init(dev);
+	}
+
+	return ret;
+
+err:
+	/*
+	 * if drm_load() failed when drm load() was called prior
+	 * to specific drivers, the fb_helper must be NULL and so check it.
+	 */
+	if (fb_helper)
+		drm_fb_helper_fini(fb_helper);
+
+	return ret;
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM FBDEV Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
new file mode 100644
index 000000000000..ccfce8a1a451
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ *
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_FBDEV_H_
+#define _EXYNOS_DRM_FBDEV_H_
+
+int exynos_drm_fbdev_init(struct drm_device *dev);
+int exynos_drm_fbdev_reinit(struct drm_device *dev);
+void exynos_drm_fbdev_fini(struct drm_device *dev);
+void exynos_drm_fbdev_restore_mode(struct drm_device *dev);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
new file mode 100644
index 000000000000..4659c88cdd9b
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -0,0 +1,811 @@
+/* exynos_drm_fimd.c
+ *
+ * Copyright (C) 2011 Samsung Electronics Co.Ltd
+ * Authors:
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Inki Dae <inki.dae@samsung.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include "drmP.h"
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+
+#include <drm/exynos_drm.h>
+#include <plat/regs-fb-v4.h>
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_fbdev.h"
+#include "exynos_drm_crtc.h"
+
+/*
+ * FIMD is stand for Fully Interactive Mobile Display and
+ * as a display controller, it transfers contents drawn on memory
+ * to a LCD Panel through Display Interfaces such as RGB or
+ * CPU Interface.
+ */
+
+/* position control register for hardware window 0, 2 ~ 4.*/
+#define VIDOSD_A(win)		(VIDOSD_BASE + 0x00 + (win) * 16)
+#define VIDOSD_B(win)		(VIDOSD_BASE + 0x04 + (win) * 16)
+/* size control register for hardware window 0. */
+#define VIDOSD_C_SIZE_W0	(VIDOSD_BASE + 0x08)
+/* alpha control register for hardware window 1 ~ 4. */
+#define VIDOSD_C(win)		(VIDOSD_BASE + 0x18 + (win) * 16)
+/* size control register for hardware window 1 ~ 4. */
+#define VIDOSD_D(win)		(VIDOSD_BASE + 0x0C + (win) * 16)
+
+#define VIDWx_BUF_START(win, buf)	(VIDW_BUF_START(buf) + (win) * 8)
+#define VIDWx_BUF_END(win, buf)		(VIDW_BUF_END(buf) + (win) * 8)
+#define VIDWx_BUF_SIZE(win, buf)	(VIDW_BUF_SIZE(buf) + (win) * 4)
+
+/* color key control register for hardware window 1 ~ 4. */
+#define WKEYCON0_BASE(x)		((WKEYCON0 + 0x140) + (x * 8))
+/* color key value register for hardware window 1 ~ 4. */
+#define WKEYCON1_BASE(x)		((WKEYCON1 + 0x140) + (x * 8))
+
+/* FIMD has totally five hardware windows. */
+#define WINDOWS_NR	5
+
+#define get_fimd_context(dev)	platform_get_drvdata(to_platform_device(dev))
+
+struct fimd_win_data {
+	unsigned int		offset_x;
+	unsigned int		offset_y;
+	unsigned int		ovl_width;
+	unsigned int		ovl_height;
+	unsigned int		fb_width;
+	unsigned int		fb_height;
+	unsigned int		bpp;
+	dma_addr_t		paddr;
+	void __iomem		*vaddr;
+	unsigned int		buf_offsize;
+	unsigned int		line_size;	/* bytes */
+};
+
+struct fimd_context {
+	struct exynos_drm_subdrv	subdrv;
+	int				irq;
+	struct drm_crtc			*crtc;
+	struct clk			*bus_clk;
+	struct clk			*lcd_clk;
+	struct resource			*regs_res;
+	void __iomem			*regs;
+	struct fimd_win_data		win_data[WINDOWS_NR];
+	unsigned int			clkdiv;
+	unsigned int			default_win;
+	unsigned long			irq_flags;
+	u32				vidcon0;
+	u32				vidcon1;
+
+	struct fb_videomode		*timing;
+};
+
+static bool fimd_display_is_connected(struct device *dev)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO. */
+
+	return true;
+}
+
+static void *fimd_get_timing(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return ctx->timing;
+}
+
+static int fimd_check_timing(struct device *dev, void *timing)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO. */
+
+	return 0;
+}
+
+static int fimd_display_power_on(struct device *dev, int mode)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO. */
+
+	return 0;
+}
+
+static struct exynos_drm_display fimd_display = {
+	.type = EXYNOS_DISPLAY_TYPE_LCD,
+	.is_connected = fimd_display_is_connected,
+	.get_timing = fimd_get_timing,
+	.check_timing = fimd_check_timing,
+	.power_on = fimd_display_power_on,
+};
+
+static void fimd_commit(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fb_videomode *timing = ctx->timing;
+	u32 val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* setup polarity values from machine code. */
+	writel(ctx->vidcon1, ctx->regs + VIDCON1);
+
+	/* setup vertical timing values. */
+	val = VIDTCON0_VBPD(timing->upper_margin - 1) |
+	       VIDTCON0_VFPD(timing->lower_margin - 1) |
+	       VIDTCON0_VSPW(timing->vsync_len - 1);
+	writel(val, ctx->regs + VIDTCON0);
+
+	/* setup horizontal timing values.  */
+	val = VIDTCON1_HBPD(timing->left_margin - 1) |
+	       VIDTCON1_HFPD(timing->right_margin - 1) |
+	       VIDTCON1_HSPW(timing->hsync_len - 1);
+	writel(val, ctx->regs + VIDTCON1);
+
+	/* setup horizontal and vertical display size. */
+	val = VIDTCON2_LINEVAL(timing->yres - 1) |
+	       VIDTCON2_HOZVAL(timing->xres - 1);
+	writel(val, ctx->regs + VIDTCON2);
+
+	/* setup clock source, clock divider, enable dma. */
+	val = ctx->vidcon0;
+	val &= ~(VIDCON0_CLKVAL_F_MASK | VIDCON0_CLKDIR);
+
+	if (ctx->clkdiv > 1)
+		val |= VIDCON0_CLKVAL_F(ctx->clkdiv - 1) | VIDCON0_CLKDIR;
+	else
+		val &= ~VIDCON0_CLKDIR;	/* 1:1 clock */
+
+	/*
+	 * fields of register with prefix '_F' would be updated
+	 * at vsync(same as dma start)
+	 */
+	val |= VIDCON0_ENVID | VIDCON0_ENVID_F;
+	writel(val, ctx->regs + VIDCON0);
+}
+
+static int fimd_enable_vblank(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	u32 val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!test_and_set_bit(0, &ctx->irq_flags)) {
+		val = readl(ctx->regs + VIDINTCON0);
+
+		val |= VIDINTCON0_INT_ENABLE;
+		val |= VIDINTCON0_INT_FRAME;
+
+		val &= ~VIDINTCON0_FRAMESEL0_MASK;
+		val |= VIDINTCON0_FRAMESEL0_VSYNC;
+		val &= ~VIDINTCON0_FRAMESEL1_MASK;
+		val |= VIDINTCON0_FRAMESEL1_NONE;
+
+		writel(val, ctx->regs + VIDINTCON0);
+	}
+
+	return 0;
+}
+
+static void fimd_disable_vblank(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	u32 val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (test_and_clear_bit(0, &ctx->irq_flags)) {
+		val = readl(ctx->regs + VIDINTCON0);
+
+		val &= ~VIDINTCON0_INT_FRAME;
+		val &= ~VIDINTCON0_INT_ENABLE;
+
+		writel(val, ctx->regs + VIDINTCON0);
+	}
+}
+
+static struct exynos_drm_manager_ops fimd_manager_ops = {
+	.commit = fimd_commit,
+	.enable_vblank = fimd_enable_vblank,
+	.disable_vblank = fimd_disable_vblank,
+};
+
+static void fimd_win_mode_set(struct device *dev,
+			      struct exynos_drm_overlay *overlay)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fimd_win_data *win_data;
+	unsigned long offset;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!overlay) {
+		dev_err(dev, "overlay is NULL\n");
+		return;
+	}
+
+	offset = overlay->fb_x * (overlay->bpp >> 3);
+	offset += overlay->fb_y * overlay->pitch;
+
+	DRM_DEBUG_KMS("offset = 0x%lx, pitch = %x\n", offset, overlay->pitch);
+
+	win_data = &ctx->win_data[ctx->default_win];
+
+	win_data->offset_x = overlay->crtc_x;
+	win_data->offset_y = overlay->crtc_y;
+	win_data->ovl_width = overlay->crtc_width;
+	win_data->ovl_height = overlay->crtc_height;
+	win_data->fb_width = overlay->fb_width;
+	win_data->fb_height = overlay->fb_height;
+	win_data->paddr = overlay->paddr + offset;
+	win_data->vaddr = overlay->vaddr + offset;
+	win_data->bpp = overlay->bpp;
+	win_data->buf_offsize = (overlay->fb_width - overlay->crtc_width) *
+				(overlay->bpp >> 3);
+	win_data->line_size = overlay->crtc_width * (overlay->bpp >> 3);
+
+	DRM_DEBUG_KMS("offset_x = %d, offset_y = %d\n",
+			win_data->offset_x, win_data->offset_y);
+	DRM_DEBUG_KMS("ovl_width = %d, ovl_height = %d\n",
+			win_data->ovl_width, win_data->ovl_height);
+	DRM_DEBUG_KMS("paddr = 0x%lx, vaddr = 0x%lx\n",
+			(unsigned long)win_data->paddr,
+			(unsigned long)win_data->vaddr);
+	DRM_DEBUG_KMS("fb_width = %d, crtc_width = %d\n",
+			overlay->fb_width, overlay->crtc_width);
+}
+
+static void fimd_win_set_pixfmt(struct device *dev, unsigned int win)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fimd_win_data *win_data = &ctx->win_data[win];
+	unsigned long val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	val = WINCONx_ENWIN;
+
+	switch (win_data->bpp) {
+	case 1:
+		val |= WINCON0_BPPMODE_1BPP;
+		val |= WINCONx_BITSWP;
+		val |= WINCONx_BURSTLEN_4WORD;
+		break;
+	case 2:
+		val |= WINCON0_BPPMODE_2BPP;
+		val |= WINCONx_BITSWP;
+		val |= WINCONx_BURSTLEN_8WORD;
+		break;
+	case 4:
+		val |= WINCON0_BPPMODE_4BPP;
+		val |= WINCONx_BITSWP;
+		val |= WINCONx_BURSTLEN_8WORD;
+		break;
+	case 8:
+		val |= WINCON0_BPPMODE_8BPP_PALETTE;
+		val |= WINCONx_BURSTLEN_8WORD;
+		val |= WINCONx_BYTSWP;
+		break;
+	case 16:
+		val |= WINCON0_BPPMODE_16BPP_565;
+		val |= WINCONx_HAWSWP;
+		val |= WINCONx_BURSTLEN_16WORD;
+		break;
+	case 24:
+		val |= WINCON0_BPPMODE_24BPP_888;
+		val |= WINCONx_WSWP;
+		val |= WINCONx_BURSTLEN_16WORD;
+		break;
+	case 32:
+		val |= WINCON1_BPPMODE_28BPP_A4888
+			| WINCON1_BLD_PIX | WINCON1_ALPHA_SEL;
+		val |= WINCONx_WSWP;
+		val |= WINCONx_BURSTLEN_16WORD;
+		break;
+	default:
+		DRM_DEBUG_KMS("invalid pixel size so using unpacked 24bpp.\n");
+
+		val |= WINCON0_BPPMODE_24BPP_888;
+		val |= WINCONx_WSWP;
+		val |= WINCONx_BURSTLEN_16WORD;
+		break;
+	}
+
+	DRM_DEBUG_KMS("bpp = %d\n", win_data->bpp);
+
+	writel(val, ctx->regs + WINCON(win));
+}
+
+static void fimd_win_set_colkey(struct device *dev, unsigned int win)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	unsigned int keycon0 = 0, keycon1 = 0;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	keycon0 = ~(WxKEYCON0_KEYBL_EN | WxKEYCON0_KEYEN_F |
+			WxKEYCON0_DIRCON) | WxKEYCON0_COMPKEY(0);
+
+	keycon1 = WxKEYCON1_COLVAL(0xffffffff);
+
+	writel(keycon0, ctx->regs + WKEYCON0_BASE(win));
+	writel(keycon1, ctx->regs + WKEYCON1_BASE(win));
+}
+
+static void fimd_win_commit(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fimd_win_data *win_data;
+	int win = ctx->default_win;
+	unsigned long val, alpha, size;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (win < 0 || win > WINDOWS_NR)
+		return;
+
+	win_data = &ctx->win_data[win];
+
+	/*
+	 * SHADOWCON register is used for enabling timing.
+	 *
+	 * for example, once only width value of a register is set,
+	 * if the dma is started then fimd hardware could malfunction so
+	 * with protect window setting, the register fields with prefix '_F'
+	 * wouldn't be updated at vsync also but updated once unprotect window
+	 * is set.
+	 */
+
+	/* protect windows */
+	val = readl(ctx->regs + SHADOWCON);
+	val |= SHADOWCON_WINx_PROTECT(win);
+	writel(val, ctx->regs + SHADOWCON);
+
+	/* buffer start address */
+	val = win_data->paddr;
+	writel(val, ctx->regs + VIDWx_BUF_START(win, 0));
+
+	/* buffer end address */
+	size = win_data->fb_width * win_data->ovl_height * (win_data->bpp >> 3);
+	val = win_data->paddr + size;
+	writel(val, ctx->regs + VIDWx_BUF_END(win, 0));
+
+	DRM_DEBUG_KMS("start addr = 0x%lx, end addr = 0x%lx, size = 0x%lx\n",
+			(unsigned long)win_data->paddr, val, size);
+	DRM_DEBUG_KMS("ovl_width = %d, ovl_height = %d\n",
+			win_data->ovl_width, win_data->ovl_height);
+
+	/* buffer size */
+	val = VIDW_BUF_SIZE_OFFSET(win_data->buf_offsize) |
+		VIDW_BUF_SIZE_PAGEWIDTH(win_data->line_size);
+	writel(val, ctx->regs + VIDWx_BUF_SIZE(win, 0));
+
+	/* OSD position */
+	val = VIDOSDxA_TOPLEFT_X(win_data->offset_x) |
+		VIDOSDxA_TOPLEFT_Y(win_data->offset_y);
+	writel(val, ctx->regs + VIDOSD_A(win));
+
+	val = VIDOSDxB_BOTRIGHT_X(win_data->offset_x +
+					win_data->ovl_width - 1) |
+		VIDOSDxB_BOTRIGHT_Y(win_data->offset_y +
+					win_data->ovl_height - 1);
+	writel(val, ctx->regs + VIDOSD_B(win));
+
+	DRM_DEBUG_KMS("osd pos: tx = %d, ty = %d, bx = %d, by = %d\n",
+			win_data->offset_x, win_data->offset_y,
+			win_data->offset_x + win_data->ovl_width - 1,
+			win_data->offset_y + win_data->ovl_height - 1);
+
+	/* hardware window 0 doesn't support alpha channel. */
+	if (win != 0) {
+		/* OSD alpha */
+		alpha = VIDISD14C_ALPHA1_R(0xf) |
+			VIDISD14C_ALPHA1_G(0xf) |
+			VIDISD14C_ALPHA1_B(0xf);
+
+		writel(alpha, ctx->regs + VIDOSD_C(win));
+	}
+
+	/* OSD size */
+	if (win != 3 && win != 4) {
+		u32 offset = VIDOSD_D(win);
+		if (win == 0)
+			offset = VIDOSD_C_SIZE_W0;
+		val = win_data->ovl_width * win_data->ovl_height;
+		writel(val, ctx->regs + offset);
+
+		DRM_DEBUG_KMS("osd size = 0x%x\n", (unsigned int)val);
+	}
+
+	fimd_win_set_pixfmt(dev, win);
+
+	/* hardware window 0 doesn't support color key. */
+	if (win != 0)
+		fimd_win_set_colkey(dev, win);
+
+	/* Enable DMA channel and unprotect windows */
+	val = readl(ctx->regs + SHADOWCON);
+	val |= SHADOWCON_CHx_ENABLE(win);
+	val &= ~SHADOWCON_WINx_PROTECT(win);
+	writel(val, ctx->regs + SHADOWCON);
+}
+
+static void fimd_win_disable(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fimd_win_data *win_data;
+	int win = ctx->default_win;
+	u32 val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (win < 0 || win > WINDOWS_NR)
+		return;
+
+	win_data = &ctx->win_data[win];
+
+	/* protect windows */
+	val = readl(ctx->regs + SHADOWCON);
+	val |= SHADOWCON_WINx_PROTECT(win);
+	writel(val, ctx->regs + SHADOWCON);
+
+	/* wincon */
+	val = readl(ctx->regs + WINCON(win));
+	val &= ~WINCONx_ENWIN;
+	writel(val, ctx->regs + WINCON(win));
+
+	/* unprotect windows */
+	val = readl(ctx->regs + SHADOWCON);
+	val &= ~SHADOWCON_CHx_ENABLE(win);
+	val &= ~SHADOWCON_WINx_PROTECT(win);
+	writel(val, ctx->regs + SHADOWCON);
+}
+
+static struct exynos_drm_overlay_ops fimd_overlay_ops = {
+	.mode_set = fimd_win_mode_set,
+	.commit = fimd_win_commit,
+	.disable = fimd_win_disable,
+};
+
+static void fimd_finish_pageflip(struct drm_device *drm_dev, int crtc)
+{
+	struct exynos_drm_private *dev_priv = drm_dev->dev_private;
+	struct drm_pending_vblank_event *e, *t;
+	struct timeval now;
+	unsigned long flags;
+	bool is_checked = false;
+
+	spin_lock_irqsave(&drm_dev->event_lock, flags);
+
+	list_for_each_entry_safe(e, t, &dev_priv->pageflip_event_list,
+			base.link) {
+		/* if event's pipe isn't same as crtc then ignore it. */
+		if (crtc != e->pipe)
+			continue;
+
+		is_checked = true;
+
+		do_gettimeofday(&now);
+		e->event.sequence = 0;
+		e->event.tv_sec = now.tv_sec;
+		e->event.tv_usec = now.tv_usec;
+
+		list_move_tail(&e->base.link, &e->base.file_priv->event_list);
+		wake_up_interruptible(&e->base.file_priv->event_wait);
+	}
+
+	if (is_checked)
+		drm_vblank_put(drm_dev, crtc);
+
+	spin_unlock_irqrestore(&drm_dev->event_lock, flags);
+}
+
+static irqreturn_t fimd_irq_handler(int irq, void *dev_id)
+{
+	struct fimd_context *ctx = (struct fimd_context *)dev_id;
+	struct exynos_drm_subdrv *subdrv = &ctx->subdrv;
+	struct drm_device *drm_dev = subdrv->drm_dev;
+	struct exynos_drm_manager *manager = &subdrv->manager;
+	u32 val;
+
+	val = readl(ctx->regs + VIDINTCON1);
+
+	if (val & VIDINTCON1_INT_FRAME)
+		/* VSYNC interrupt */
+		writel(VIDINTCON1_INT_FRAME, ctx->regs + VIDINTCON1);
+
+	drm_handle_vblank(drm_dev, manager->pipe);
+	fimd_finish_pageflip(drm_dev, manager->pipe);
+
+	return IRQ_HANDLED;
+}
+
+static int fimd_subdrv_probe(struct drm_device *drm_dev, struct device *dev)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/*
+	 * enable drm irq mode.
+	 * - with irq_enabled = 1, we can use the vblank feature.
+	 *
+	 * P.S. note that we wouldn't use drm irq handler but
+	 *	just specific driver own one instead because
+	 *	drm framework supports only one irq handler.
+	 */
+	drm_dev->irq_enabled = 1;
+
+	/*
+	 * with vblank_disable_allowed = 1, vblank interrupt will be disabled
+	 * by drm timer once a current process gives up ownership of
+	 * vblank event.(drm_vblank_put function was called)
+	 */
+	drm_dev->vblank_disable_allowed = 1;
+
+	return 0;
+}
+
+static void fimd_subdrv_remove(struct drm_device *drm_dev)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO. */
+}
+
+static int fimd_calc_clkdiv(struct fimd_context *ctx,
+			    struct fb_videomode *timing)
+{
+	unsigned long clk = clk_get_rate(ctx->lcd_clk);
+	u32 retrace;
+	u32 clkdiv;
+	u32 best_framerate = 0;
+	u32 framerate;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	retrace = timing->left_margin + timing->hsync_len +
+				timing->right_margin + timing->xres;
+	retrace *= timing->upper_margin + timing->vsync_len +
+				timing->lower_margin + timing->yres;
+
+	/* default framerate is 60Hz */
+	if (!timing->refresh)
+		timing->refresh = 60;
+
+	clk /= retrace;
+
+	for (clkdiv = 1; clkdiv < 0x100; clkdiv++) {
+		int tmp;
+
+		/* get best framerate */
+		framerate = clk / clkdiv;
+		tmp = timing->refresh - framerate;
+		if (tmp < 0) {
+			best_framerate = framerate;
+			continue;
+		} else {
+			if (!best_framerate)
+				best_framerate = framerate;
+			else if (tmp < (best_framerate - framerate))
+				best_framerate = framerate;
+			break;
+		}
+	}
+
+	return clkdiv;
+}
+
+static void fimd_clear_win(struct fimd_context *ctx, int win)
+{
+	u32 val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	writel(0, ctx->regs + WINCON(win));
+	writel(0, ctx->regs + VIDOSD_A(win));
+	writel(0, ctx->regs + VIDOSD_B(win));
+	writel(0, ctx->regs + VIDOSD_C(win));
+
+	if (win == 1 || win == 2)
+		writel(0, ctx->regs + VIDOSD_D(win));
+
+	val = readl(ctx->regs + SHADOWCON);
+	val &= ~SHADOWCON_WINx_PROTECT(win);
+	writel(val, ctx->regs + SHADOWCON);
+}
+
+static int __devinit fimd_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct fimd_context *ctx;
+	struct exynos_drm_subdrv *subdrv;
+	struct exynos_drm_fimd_pdata *pdata;
+	struct fb_videomode *timing;
+	struct resource *res;
+	int win;
+	int ret = -EINVAL;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		dev_err(dev, "no platform data specified\n");
+		return -EINVAL;
+	}
+
+	timing = &pdata->timing;
+	if (!timing) {
+		dev_err(dev, "timing is null.\n");
+		return -EINVAL;
+	}
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->bus_clk = clk_get(dev, "fimd");
+	if (IS_ERR(ctx->bus_clk)) {
+		dev_err(dev, "failed to get bus clock\n");
+		ret = PTR_ERR(ctx->bus_clk);
+		goto err_clk_get;
+	}
+
+	clk_enable(ctx->bus_clk);
+
+	ctx->lcd_clk = clk_get(dev, "sclk_fimd");
+	if (IS_ERR(ctx->lcd_clk)) {
+		dev_err(dev, "failed to get lcd clock\n");
+		ret = PTR_ERR(ctx->lcd_clk);
+		goto err_bus_clk;
+	}
+
+	clk_enable(ctx->lcd_clk);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "failed to find registers\n");
+		ret = -ENOENT;
+		goto err_clk;
+	}
+
+	ctx->regs_res = request_mem_region(res->start, resource_size(res),
+					   dev_name(dev));
+	if (!ctx->regs_res) {
+		dev_err(dev, "failed to claim register region\n");
+		ret = -ENOENT;
+		goto err_clk;
+	}
+
+	ctx->regs = ioremap(res->start, resource_size(res));
+	if (!ctx->regs) {
+		dev_err(dev, "failed to map registers\n");
+		ret = -ENXIO;
+		goto err_req_region_io;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res) {
+		dev_err(dev, "irq request failed.\n");
+		goto err_req_region_irq;
+	}
+
+	ctx->irq = res->start;
+
+	for (win = 0; win < WINDOWS_NR; win++)
+		fimd_clear_win(ctx, win);
+
+	ret = request_irq(ctx->irq, fimd_irq_handler, 0, "drm_fimd", ctx);
+	if (ret < 0) {
+		dev_err(dev, "irq request failed.\n");
+		goto err_req_irq;
+	}
+
+	ctx->clkdiv = fimd_calc_clkdiv(ctx, timing);
+	ctx->vidcon0 = pdata->vidcon0;
+	ctx->vidcon1 = pdata->vidcon1;
+	ctx->default_win = pdata->default_win;
+	ctx->timing = timing;
+
+	timing->pixclock = clk_get_rate(ctx->lcd_clk) / ctx->clkdiv;
+
+	DRM_DEBUG_KMS("pixel clock = %d, clkdiv = %d\n",
+			timing->pixclock, ctx->clkdiv);
+
+	subdrv = &ctx->subdrv;
+
+	subdrv->probe = fimd_subdrv_probe;
+	subdrv->remove = fimd_subdrv_remove;
+	subdrv->manager.pipe = -1;
+	subdrv->manager.ops = &fimd_manager_ops;
+	subdrv->manager.overlay_ops = &fimd_overlay_ops;
+	subdrv->manager.display = &fimd_display;
+	subdrv->manager.dev = dev;
+
+	platform_set_drvdata(pdev, ctx);
+	exynos_drm_subdrv_register(subdrv);
+
+	return 0;
+
+err_req_irq:
+err_req_region_irq:
+	iounmap(ctx->regs);
+
+err_req_region_io:
+	release_resource(ctx->regs_res);
+	kfree(ctx->regs_res);
+
+err_clk:
+	clk_disable(ctx->lcd_clk);
+	clk_put(ctx->lcd_clk);
+
+err_bus_clk:
+	clk_disable(ctx->bus_clk);
+	clk_put(ctx->bus_clk);
+
+err_clk_get:
+	kfree(ctx);
+	return ret;
+}
+
+static int __devexit fimd_remove(struct platform_device *pdev)
+{
+	struct fimd_context *ctx = platform_get_drvdata(pdev);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_drm_subdrv_unregister(&ctx->subdrv);
+
+	clk_disable(ctx->lcd_clk);
+	clk_disable(ctx->bus_clk);
+	clk_put(ctx->lcd_clk);
+	clk_put(ctx->bus_clk);
+
+	iounmap(ctx->regs);
+	release_resource(ctx->regs_res);
+	kfree(ctx->regs_res);
+	free_irq(ctx->irq, ctx);
+
+	kfree(ctx);
+
+	return 0;
+}
+
+static struct platform_driver fimd_driver = {
+	.probe		= fimd_probe,
+	.remove		= __devexit_p(fimd_remove),
+	.driver		= {
+		.name	= "exynos4-fb",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init fimd_init(void)
+{
+	return platform_driver_register(&fimd_driver);
+}
+
+static void __exit fimd_exit(void)
+{
+	platform_driver_unregister(&fimd_driver);
+}
+
+module_init(fimd_init);
+module_exit(fimd_exit);
+
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_DESCRIPTION("Samsung DRM FIMD Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
new file mode 100644
index 000000000000..a8e7a88906ed
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -0,0 +1,415 @@
+/* exynos_drm_gem.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Author: Inki Dae <inki.dae@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm.h"
+
+#include <drm/exynos_drm.h>
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_gem.h"
+#include "exynos_drm_buf.h"
+
+static unsigned int convert_to_vm_err_msg(int msg)
+{
+	unsigned int out_msg;
+
+	switch (msg) {
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+		out_msg = VM_FAULT_NOPAGE;
+		break;
+
+	case -ENOMEM:
+		out_msg = VM_FAULT_OOM;
+		break;
+
+	default:
+		out_msg = VM_FAULT_SIGBUS;
+		break;
+	}
+
+	return out_msg;
+}
+
+static unsigned int get_gem_mmap_offset(struct drm_gem_object *obj)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return (unsigned int)obj->map_list.hash.key << PAGE_SHIFT;
+}
+
+struct exynos_drm_gem_obj *exynos_drm_gem_create(struct drm_file *file_priv,
+		struct drm_device *dev, unsigned int size,
+		unsigned int *handle)
+{
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+	struct exynos_drm_buf_entry *entry;
+	struct drm_gem_object *obj;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	size = roundup(size, PAGE_SIZE);
+
+	exynos_gem_obj = kzalloc(sizeof(*exynos_gem_obj), GFP_KERNEL);
+	if (!exynos_gem_obj) {
+		DRM_ERROR("failed to allocate exynos gem object.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* allocate the new buffer object and memory region. */
+	entry = exynos_drm_buf_create(dev, size);
+	if (!entry) {
+		kfree(exynos_gem_obj);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	exynos_gem_obj->entry = entry;
+
+	obj = &exynos_gem_obj->base;
+
+	ret = drm_gem_object_init(dev, obj, size);
+	if (ret < 0) {
+		DRM_ERROR("failed to initailize gem object.\n");
+		goto err_obj_init;
+	}
+
+	DRM_DEBUG_KMS("created file object = 0x%x\n", (unsigned int)obj->filp);
+
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret < 0) {
+		DRM_ERROR("failed to allocate mmap offset.\n");
+		goto err_create_mmap_offset;
+	}
+
+	/*
+	 * allocate a id of idr table where the obj is registered
+	 * and handle has the id what user can see.
+	 */
+	ret = drm_gem_handle_create(file_priv, obj, handle);
+	if (ret)
+		goto err_handle_create;
+
+	DRM_DEBUG_KMS("gem handle = 0x%x\n", *handle);
+
+	/* drop reference from allocate - handle holds it now. */
+	drm_gem_object_unreference_unlocked(obj);
+
+	return exynos_gem_obj;
+
+err_handle_create:
+	drm_gem_free_mmap_offset(obj);
+
+err_create_mmap_offset:
+	drm_gem_object_release(obj);
+
+err_obj_init:
+	exynos_drm_buf_destroy(dev, exynos_gem_obj->entry);
+
+	kfree(exynos_gem_obj);
+
+	return ERR_PTR(ret);
+}
+
+int exynos_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv)
+{
+	struct drm_exynos_gem_create *args = data;
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+
+	DRM_DEBUG_KMS("%s : size = 0x%x\n", __FILE__, args->size);
+
+	exynos_gem_obj = exynos_drm_gem_create(file_priv, dev, args->size,
+			&args->handle);
+	if (IS_ERR(exynos_gem_obj))
+		return PTR_ERR(exynos_gem_obj);
+
+	return 0;
+}
+
+int exynos_drm_gem_map_offset_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv)
+{
+	struct drm_exynos_gem_map_off *args = data;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	DRM_DEBUG_KMS("handle = 0x%x, offset = 0x%lx\n",
+			args->handle, (unsigned long)args->offset);
+
+	if (!(dev->driver->driver_features & DRIVER_GEM)) {
+		DRM_ERROR("does not support GEM.\n");
+		return -ENODEV;
+	}
+
+	return exynos_drm_gem_dumb_map_offset(file_priv, dev, args->handle,
+			&args->offset);
+}
+
+static int exynos_drm_gem_mmap_buffer(struct file *filp,
+		struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = filp->private_data;
+	struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj);
+	struct exynos_drm_buf_entry *entry;
+	unsigned long pfn, vm_size;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	vma->vm_flags |= (VM_IO | VM_RESERVED);
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_file = filp;
+
+	vm_size = vma->vm_end - vma->vm_start;
+	/*
+	 * a entry contains information to physically continuous memory
+	 * allocated by user request or at framebuffer creation.
+	 */
+	entry = exynos_gem_obj->entry;
+
+	/* check if user-requested size is valid. */
+	if (vm_size > entry->size)
+		return -EINVAL;
+
+	/*
+	 * get page frame number to physical memory to be mapped
+	 * to user space.
+	 */
+	pfn = exynos_gem_obj->entry->paddr >> PAGE_SHIFT;
+
+	DRM_DEBUG_KMS("pfn = 0x%lx\n", pfn);
+
+	if (remap_pfn_range(vma, vma->vm_start, pfn, vm_size,
+				vma->vm_page_prot)) {
+		DRM_ERROR("failed to remap pfn range.\n");
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+static const struct file_operations exynos_drm_gem_fops = {
+	.mmap = exynos_drm_gem_mmap_buffer,
+};
+
+int exynos_drm_gem_mmap_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv)
+{
+	struct drm_exynos_gem_mmap *args = data;
+	struct drm_gem_object *obj;
+	unsigned int addr;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!(dev->driver->driver_features & DRIVER_GEM)) {
+		DRM_ERROR("does not support GEM.\n");
+		return -ENODEV;
+	}
+
+	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+	if (!obj) {
+		DRM_ERROR("failed to lookup gem object.\n");
+		return -EINVAL;
+	}
+
+	obj->filp->f_op = &exynos_drm_gem_fops;
+	obj->filp->private_data = obj;
+
+	down_write(&current->mm->mmap_sem);
+	addr = do_mmap(obj->filp, 0, args->size,
+			PROT_READ | PROT_WRITE, MAP_SHARED, 0);
+	up_write(&current->mm->mmap_sem);
+
+	drm_gem_object_unreference_unlocked(obj);
+
+	if (IS_ERR((void *)addr))
+		return PTR_ERR((void *)addr);
+
+	args->mapped = addr;
+
+	DRM_DEBUG_KMS("mapped = 0x%lx\n", (unsigned long)args->mapped);
+
+	return 0;
+}
+
+int exynos_drm_gem_init_object(struct drm_gem_object *obj)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return 0;
+}
+
+void exynos_drm_gem_free_object(struct drm_gem_object *gem_obj)
+{
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	DRM_DEBUG_KMS("handle count = %d\n",
+			atomic_read(&gem_obj->handle_count));
+
+	if (gem_obj->map_list.map)
+		drm_gem_free_mmap_offset(gem_obj);
+
+	/* release file pointer to gem object. */
+	drm_gem_object_release(gem_obj);
+
+	exynos_gem_obj = to_exynos_gem_obj(gem_obj);
+
+	exynos_drm_buf_destroy(gem_obj->dev, exynos_gem_obj->entry);
+
+	kfree(exynos_gem_obj);
+}
+
+int exynos_drm_gem_dumb_create(struct drm_file *file_priv,
+		struct drm_device *dev, struct drm_mode_create_dumb *args)
+{
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/*
+	 * alocate memory to be used for framebuffer.
+	 * - this callback would be called by user application
+	 *	with DRM_IOCTL_MODE_CREATE_DUMB command.
+	 */
+
+	args->pitch = args->width * args->bpp >> 3;
+	args->size = args->pitch * args->height;
+
+	exynos_gem_obj = exynos_drm_gem_create(file_priv, dev, args->size,
+							&args->handle);
+	if (IS_ERR(exynos_gem_obj))
+		return PTR_ERR(exynos_gem_obj);
+
+	return 0;
+}
+
+int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
+		struct drm_device *dev, uint32_t handle, uint64_t *offset)
+{
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+	struct drm_gem_object *obj;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mutex_lock(&dev->struct_mutex);
+
+	/*
+	 * get offset of memory allocated for drm framebuffer.
+	 * - this callback would be called by user application
+	 *	with DRM_IOCTL_MODE_MAP_DUMB command.
+	 */
+
+	obj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("failed to lookup gem object.\n");
+		mutex_unlock(&dev->struct_mutex);
+		return -EINVAL;
+	}
+
+	exynos_gem_obj = to_exynos_gem_obj(obj);
+
+	*offset = get_gem_mmap_offset(&exynos_gem_obj->base);
+
+	drm_gem_object_unreference(obj);
+
+	DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
+int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj);
+	struct drm_device *dev = obj->dev;
+	unsigned long pfn;
+	pgoff_t page_offset;
+	int ret;
+
+	page_offset = ((unsigned long)vmf->virtual_address -
+			vma->vm_start) >> PAGE_SHIFT;
+
+	mutex_lock(&dev->struct_mutex);
+
+	pfn = (exynos_gem_obj->entry->paddr >> PAGE_SHIFT) + page_offset;
+
+	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return convert_to_vm_err_msg(ret);
+}
+
+int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* set vm_area_struct. */
+	ret = drm_gem_mmap(filp, vma);
+	if (ret < 0) {
+		DRM_ERROR("failed to mmap.\n");
+		return ret;
+	}
+
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+
+	return ret;
+}
+
+
+int exynos_drm_gem_dumb_destroy(struct drm_file *file_priv,
+		struct drm_device *dev, unsigned int handle)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/*
+	 * obj->refcount and obj->handle_count are decreased and
+	 * if both them are 0 then exynos_drm_gem_free_object()
+	 * would be called by callback to release resources.
+	 */
+	ret = drm_gem_handle_delete(file_priv, handle);
+	if (ret < 0) {
+		DRM_ERROR("failed to delete drm_gem_handle.\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM GEM Module");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
new file mode 100644
index 000000000000..e5fc0148277b
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -0,0 +1,107 @@
+/* exynos_drm_gem.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authoer: Inki Dae <inki.dae@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_GEM_H_
+#define _EXYNOS_DRM_GEM_H_
+
+#define to_exynos_gem_obj(x)	container_of(x,\
+			struct exynos_drm_gem_obj, base)
+
+/*
+ * exynos drm buffer structure.
+ *
+ * @base: a gem object.
+ *	- a new handle to this gem object would be created
+ *	by drm_gem_handle_create().
+ * @entry: pointer to exynos drm buffer entry object.
+ *	- containing the information to physically
+ *	continuous memory region allocated by user request
+ *	or at framebuffer creation.
+ *
+ * P.S. this object would be transfered to user as kms_bo.handle so
+ *	user can access the buffer through kms_bo.handle.
+ */
+struct exynos_drm_gem_obj {
+	struct drm_gem_object base;
+	struct exynos_drm_buf_entry *entry;
+};
+
+/* create a new buffer and get a new gem handle. */
+struct exynos_drm_gem_obj *exynos_drm_gem_create(struct drm_file *file_priv,
+		struct drm_device *dev, unsigned int size,
+		unsigned int *handle);
+
+/*
+ * request gem object creation and buffer allocation as the size
+ * that it is calculated with framebuffer information such as width,
+ * height and bpp.
+ */
+int exynos_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv);
+
+/* get buffer offset to map to user space. */
+int exynos_drm_gem_map_offset_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv);
+
+/* unmap a buffer from user space. */
+int exynos_drm_gem_munmap_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv);
+
+/* initialize gem object. */
+int exynos_drm_gem_init_object(struct drm_gem_object *obj);
+
+/* free gem object. */
+void exynos_drm_gem_free_object(struct drm_gem_object *gem_obj);
+
+/* create memory region for drm framebuffer. */
+int exynos_drm_gem_dumb_create(struct drm_file *file_priv,
+		struct drm_device *dev, struct drm_mode_create_dumb *args);
+
+/* map memory region for drm framebuffer to user space. */
+int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
+		struct drm_device *dev, uint32_t handle, uint64_t *offset);
+
+/* page fault handler and mmap fault address(virtual) to physical memory. */
+int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+
+/*
+ * mmap the physically continuous memory that a gem object contains
+ * to user space.
+ */
+int exynos_drm_gem_mmap_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv);
+
+/* set vm_flags and we can change the vm attribute to other one at here. */
+int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
+
+/*
+ * destroy memory region allocated.
+ *	- a gem handle and physical memory region pointed by a gem object
+ *	would be released by drm_gem_handle_delete().
+ */
+int exynos_drm_gem_dumb_destroy(struct drm_file *file_priv,
+		struct drm_device *dev, unsigned int handle);
+
+#endif
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 7ad43c6b1db7..79e8ebc05307 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -115,6 +115,7 @@ static int radeon_dp_aux_native_write(struct radeon_connector *radeon_connector,
 	u8 msg[20];
 	int msg_bytes = send_bytes + 4;
 	u8 ack;
+	unsigned retry;
 
 	if (send_bytes > 16)
 		return -1;
@@ -125,20 +126,22 @@ static int radeon_dp_aux_native_write(struct radeon_connector *radeon_connector,
 	msg[3] = (msg_bytes << 4) | (send_bytes - 1);
 	memcpy(&msg[4], send, send_bytes);
 
-	while (1) {
+	for (retry = 0; retry < 4; retry++) {
 		ret = radeon_process_aux_ch(dig_connector->dp_i2c_bus,
 					    msg, msg_bytes, NULL, 0, delay, &ack);
-		if (ret < 0)
+		if (ret == -EBUSY)
+			continue;
+		else if (ret < 0)
 			return ret;
 		if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_ACK)
-			break;
+			return send_bytes;
 		else if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_DEFER)
 			udelay(400);
 		else
 			return -EIO;
 	}
 
-	return send_bytes;
+	return -EIO;
 }
 
 static int radeon_dp_aux_native_read(struct radeon_connector *radeon_connector,
@@ -149,26 +152,31 @@ static int radeon_dp_aux_native_read(struct radeon_connector *radeon_connector,
 	int msg_bytes = 4;
 	u8 ack;
 	int ret;
+	unsigned retry;
 
 	msg[0] = address;
 	msg[1] = address >> 8;
 	msg[2] = AUX_NATIVE_READ << 4;
 	msg[3] = (msg_bytes << 4) | (recv_bytes - 1);
 
-	while (1) {
+	for (retry = 0; retry < 4; retry++) {
 		ret = radeon_process_aux_ch(dig_connector->dp_i2c_bus,
 					    msg, msg_bytes, recv, recv_bytes, delay, &ack);
-		if (ret == 0)
-			return -EPROTO;
-		if (ret < 0)
+		if (ret == -EBUSY)
+			continue;
+		else if (ret < 0)
 			return ret;
 		if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_ACK)
 			return ret;
 		else if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_DEFER)
 			udelay(400);
+		else if (ret == 0)
+			return -EPROTO;
 		else
 			return -EIO;
 	}
+
+	return -EIO;
 }
 
 static void radeon_write_dpcd_reg(struct radeon_connector *radeon_connector,
@@ -232,7 +240,9 @@ int radeon_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
 	for (retry = 0; retry < 4; retry++) {
 		ret = radeon_process_aux_ch(auxch,
 					    msg, msg_bytes, reply, reply_bytes, 0, &ack);
-		if (ret < 0) {
+		if (ret == -EBUSY)
+			continue;
+		else if (ret < 0) {
 			DRM_DEBUG_KMS("aux_ch failed %d\n", ret);
 			return ret;
 		}
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index a72dbb3e1335..ed406e8404a3 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -39,7 +39,7 @@
 
 static void evergreen_gpu_init(struct radeon_device *rdev);
 void evergreen_fini(struct radeon_device *rdev);
-static void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
+void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
 
 void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev)
 {
@@ -1407,7 +1407,8 @@ int evergreen_cp_resume(struct radeon_device *rdev)
 	/* Initialize the ring buffer's read and write pointers */
 	WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA);
 	WREG32(CP_RB_RPTR_WR, 0);
-	WREG32(CP_RB_WPTR, 0);
+	rdev->cp.wptr = 0;
+	WREG32(CP_RB_WPTR, rdev->cp.wptr);
 
 	/* set the wb address wether it's enabled or not */
 	WREG32(CP_RB_RPTR_ADDR,
@@ -1429,7 +1430,6 @@ int evergreen_cp_resume(struct radeon_device *rdev)
 	WREG32(CP_DEBUG, (1 << 27) | (1 << 28));
 
 	rdev->cp.rptr = RREG32(CP_RB_RPTR);
-	rdev->cp.wptr = RREG32(CP_RB_WPTR);
 
 	evergreen_cp_start(rdev);
 	rdev->cp.ready = true;
@@ -1593,48 +1593,6 @@ static u32 evergreen_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
 	return backend_map;
 }
 
-static void evergreen_program_channel_remap(struct radeon_device *rdev)
-{
-	u32 tcp_chan_steer_lo, tcp_chan_steer_hi, mc_shared_chremap, tmp;
-
-	tmp = RREG32(MC_SHARED_CHMAP);
-	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
-	case 0:
-	case 1:
-	case 2:
-	case 3:
-	default:
-		/* default mapping */
-		mc_shared_chremap = 0x00fac688;
-		break;
-	}
-
-	switch (rdev->family) {
-	case CHIP_HEMLOCK:
-	case CHIP_CYPRESS:
-	case CHIP_BARTS:
-		tcp_chan_steer_lo = 0x54763210;
-		tcp_chan_steer_hi = 0x0000ba98;
-		break;
-	case CHIP_JUNIPER:
-	case CHIP_REDWOOD:
-	case CHIP_CEDAR:
-	case CHIP_PALM:
-	case CHIP_SUMO:
-	case CHIP_SUMO2:
-	case CHIP_TURKS:
-	case CHIP_CAICOS:
-	default:
-		tcp_chan_steer_lo = 0x76543210;
-		tcp_chan_steer_hi = 0x0000ba98;
-		break;
-	}
-
-	WREG32(TCP_CHAN_STEER_LO, tcp_chan_steer_lo);
-	WREG32(TCP_CHAN_STEER_HI, tcp_chan_steer_hi);
-	WREG32(MC_SHARED_CHREMAP, mc_shared_chremap);
-}
-
 static void evergreen_gpu_init(struct radeon_device *rdev)
 {
 	u32 cc_rb_backend_disable = 0;
@@ -2081,8 +2039,6 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
 
-	evergreen_program_channel_remap(rdev);
-
 	num_shader_engines = ((RREG32(GB_ADDR_CONFIG) & NUM_SHADER_ENGINES(3)) >> 12) + 1;
 	grbm_gfx_index = INSTANCE_BROADCAST_WRITES;
 
@@ -2633,7 +2589,7 @@ int evergreen_irq_set(struct radeon_device *rdev)
 	return 0;
 }
 
-static inline void evergreen_irq_ack(struct radeon_device *rdev)
+static void evergreen_irq_ack(struct radeon_device *rdev)
 {
 	u32 tmp;
 
@@ -2744,7 +2700,7 @@ void evergreen_irq_suspend(struct radeon_device *rdev)
 	r600_rlc_stop(rdev);
 }
 
-static inline u32 evergreen_get_ih_wptr(struct radeon_device *rdev)
+static u32 evergreen_get_ih_wptr(struct radeon_device *rdev)
 {
 	u32 wptr, tmp;
 
@@ -3050,8 +3006,7 @@ static int evergreen_startup(struct radeon_device *rdev)
 	int r;
 
 	/* enable pcie gen2 link */
-	if (!ASIC_IS_DCE5(rdev))
-		evergreen_pcie_gen2_enable(rdev);
+	evergreen_pcie_gen2_enable(rdev);
 
 	if (ASIC_IS_DCE5(rdev)) {
 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
@@ -3088,7 +3043,7 @@ static int evergreen_startup(struct radeon_device *rdev)
 
 	r = evergreen_blit_init(rdev);
 	if (r) {
-		evergreen_blit_fini(rdev);
+		r600_blit_fini(rdev);
 		rdev->asic->copy = NULL;
 		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
 	}
@@ -3154,43 +3109,14 @@ int evergreen_resume(struct radeon_device *rdev)
 
 int evergreen_suspend(struct radeon_device *rdev)
 {
-	int r;
-
 	/* FIXME: we should wait for ring to be empty */
 	r700_cp_stop(rdev);
 	rdev->cp.ready = false;
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	evergreen_pcie_gart_disable(rdev);
+	r600_blit_suspend(rdev);
 
-	/* unpin shaders bo */
-	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-	if (likely(r == 0)) {
-		radeon_bo_unpin(rdev->r600_blit.shader_obj);
-		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-	}
-
-	return 0;
-}
-
-int evergreen_copy_blit(struct radeon_device *rdev,
-			uint64_t src_offset, uint64_t dst_offset,
-			unsigned num_pages, struct radeon_fence *fence)
-{
-	int r;
-
-	mutex_lock(&rdev->r600_blit.mutex);
-	rdev->r600_blit.vb_ib = NULL;
-	r = evergreen_blit_prepare_copy(rdev, num_pages * RADEON_GPU_PAGE_SIZE);
-	if (r) {
-		if (rdev->r600_blit.vb_ib)
-			radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
-		mutex_unlock(&rdev->r600_blit.mutex);
-		return r;
-	}
-	evergreen_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * RADEON_GPU_PAGE_SIZE);
-	evergreen_blit_done_copy(rdev, fence);
-	mutex_unlock(&rdev->r600_blit.mutex);
 	return 0;
 }
 
@@ -3302,7 +3228,7 @@ int evergreen_init(struct radeon_device *rdev)
 
 void evergreen_fini(struct radeon_device *rdev)
 {
-	evergreen_blit_fini(rdev);
+	r600_blit_fini(rdev);
 	r700_cp_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
@@ -3318,7 +3244,7 @@ void evergreen_fini(struct radeon_device *rdev)
 	rdev->bios = NULL;
 }
 
-static void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
+void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
 {
 	u32 link_width_cntl, speed_cntl;
 
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 2eb251858e72..dcf11bbc06d9 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -56,7 +56,9 @@ set_render_target(struct radeon_device *rdev, int format,
 	if (h < 8)
 		h = 8;
 
-	cb_color_info = ((format << 2) | (1 << 24) | (1 << 8));
+	cb_color_info = CB_FORMAT(format) |
+		CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) |
+		CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
 	pitch = (w / 8) - 1;
 	slice = ((w * h) / 64) - 1;
 
@@ -67,7 +69,7 @@ set_render_target(struct radeon_device *rdev, int format,
 	radeon_ring_write(rdev, slice);
 	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, cb_color_info);
-	radeon_ring_write(rdev, (1 << 4));
+	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16));
 	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, 0);
@@ -133,12 +135,16 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 	u32 sq_vtx_constant_word2, sq_vtx_constant_word3;
 
 	/* high addr, stride */
-	sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
+	sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
+		SQ_VTXC_STRIDE(16);
 #ifdef __BIG_ENDIAN
-	sq_vtx_constant_word2 |= (2 << 30);
+	sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
 #endif
 	/* xyzw swizzles */
-	sq_vtx_constant_word3 = (0 << 3) | (1 << 6) | (2 << 9) | (3 << 12);
+	sq_vtx_constant_word3 = SQ_VTCX_SEL_X(SQ_SEL_X) |
+		SQ_VTCX_SEL_Y(SQ_SEL_Y) |
+		SQ_VTCX_SEL_Z(SQ_SEL_Z) |
+		SQ_VTCX_SEL_W(SQ_SEL_W);
 
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
 	radeon_ring_write(rdev, 0x580);
@@ -149,7 +155,7 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30);
+	radeon_ring_write(rdev, S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_BUFFER));
 
 	if ((rdev->family == CHIP_CEDAR) ||
 	    (rdev->family == CHIP_PALM) ||
@@ -176,14 +182,19 @@ set_tex_resource(struct radeon_device *rdev,
 	if (h < 1)
 		h = 1;
 
-	sq_tex_resource_word0 = (1 << 0); /* 2D */
+	sq_tex_resource_word0 = TEX_DIM(SQ_TEX_DIM_2D);
 	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) |
 				  ((w - 1) << 18));
-	sq_tex_resource_word1 = ((h - 1) << 0) | (1 << 28);
+	sq_tex_resource_word1 = ((h - 1) << 0) |
+				TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
 	/* xyzw swizzles */
-	sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25);
+	sq_tex_resource_word4 = TEX_DST_SEL_X(SQ_SEL_X) |
+				TEX_DST_SEL_Y(SQ_SEL_Y) |
+				TEX_DST_SEL_Z(SQ_SEL_Z) |
+				TEX_DST_SEL_W(SQ_SEL_W);
 
-	sq_tex_resource_word7 = format | (SQ_TEX_VTX_VALID_TEXTURE << 30);
+	sq_tex_resource_word7 = format |
+		S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_TEXTURE);
 
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
 	radeon_ring_write(rdev, 0);
@@ -584,31 +595,6 @@ set_default_state(struct radeon_device *rdev)
 
 }
 
-static inline uint32_t i2f(uint32_t input)
-{
-	u32 result, i, exponent, fraction;
-
-	if ((input & 0x3fff) == 0)
-		result = 0; /* 0 is a special case */
-	else {
-		exponent = 140; /* exponent biased by 127; */
-		fraction = (input & 0x3fff) << 10; /* cheat and only
-						      handle numbers below 2^^15 */
-		for (i = 0; i < 14; i++) {
-			if (fraction & 0x800000)
-				break;
-			else {
-				fraction = fraction << 1; /* keep
-							     shifting left until top bit = 1 */
-				exponent = exponent - 1;
-			}
-		}
-		result = exponent << 23 | (fraction & 0x7fffff); /* mask
-								    off top bit; assumed 1 */
-	}
-	return result;
-}
-
 int evergreen_blit_init(struct radeon_device *rdev)
 {
 	u32 obj_size;
@@ -617,6 +603,24 @@ int evergreen_blit_init(struct radeon_device *rdev)
 	u32 packet2s[16];
 	int num_packet2s = 0;
 
+	rdev->r600_blit.primitives.set_render_target = set_render_target;
+	rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync;
+	rdev->r600_blit.primitives.set_shaders = set_shaders;
+	rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource;
+	rdev->r600_blit.primitives.set_tex_resource = set_tex_resource;
+	rdev->r600_blit.primitives.set_scissors = set_scissors;
+	rdev->r600_blit.primitives.draw_auto = draw_auto;
+	rdev->r600_blit.primitives.set_default_state = set_default_state;
+
+	rdev->r600_blit.ring_size_common = 55; /* shaders + def state */
+	rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */
+	rdev->r600_blit.ring_size_common += 5; /* done copy */
+	rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */
+
+	rdev->r600_blit.ring_size_per_loop = 74;
+
+	rdev->r600_blit.max_dim = 16384;
+
 	/* pin copy shader into vram if already initialized */
 	if (rdev->r600_blit.shader_obj)
 		goto done;
@@ -712,277 +716,3 @@ done:
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
 	return 0;
 }
-
-void evergreen_blit_fini(struct radeon_device *rdev)
-{
-	int r;
-
-	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
-	if (rdev->r600_blit.shader_obj == NULL)
-		return;
-	/* If we can't reserve the bo, unref should be enough to destroy
-	 * it when it becomes idle.
-	 */
-	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-	if (!r) {
-		radeon_bo_unpin(rdev->r600_blit.shader_obj);
-		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-	}
-	radeon_bo_unref(&rdev->r600_blit.shader_obj);
-}
-
-static int evergreen_vb_ib_get(struct radeon_device *rdev)
-{
-	int r;
-	r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
-	if (r) {
-		DRM_ERROR("failed to get IB for vertex buffer\n");
-		return r;
-	}
-
-	rdev->r600_blit.vb_total = 64*1024;
-	rdev->r600_blit.vb_used = 0;
-	return 0;
-}
-
-static void evergreen_vb_ib_put(struct radeon_device *rdev)
-{
-	radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
-	radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
-}
-
-int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
-{
-	int r;
-	int ring_size, line_size;
-	int max_size;
-	/* loops of emits + fence emit possible */
-	int dwords_per_loop = 74, num_loops;
-
-	r = evergreen_vb_ib_get(rdev);
-	if (r)
-		return r;
-
-	/* 8 bpp vs 32 bpp for xfer unit */
-	if (size_bytes & 3)
-		line_size = 8192;
-	else
-		line_size = 8192 * 4;
-
-	max_size = 8192 * line_size;
-
-	/* major loops cover the max size transfer */
-	num_loops = ((size_bytes + max_size) / max_size);
-	/* minor loops cover the extra non aligned bits */
-	num_loops += ((size_bytes % line_size) ? 1 : 0);
-	/* calculate number of loops correctly */
-	ring_size = num_loops * dwords_per_loop;
-	/* set default  + shaders */
-	ring_size += 55; /* shaders + def state */
-	ring_size += 10; /* fence emit for VB IB */
-	ring_size += 5; /* done copy */
-	ring_size += 10; /* fence emit for done copy */
-	r = radeon_ring_lock(rdev, ring_size);
-	if (r)
-		return r;
-
-	set_default_state(rdev); /* 36 */
-	set_shaders(rdev); /* 16 */
-	return 0;
-}
-
-void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
-{
-	int r;
-
-	if (rdev->r600_blit.vb_ib)
-		evergreen_vb_ib_put(rdev);
-
-	if (fence)
-		r = radeon_fence_emit(rdev, fence);
-
-	radeon_ring_unlock_commit(rdev);
-}
-
-void evergreen_kms_blit_copy(struct radeon_device *rdev,
-			     u64 src_gpu_addr, u64 dst_gpu_addr,
-			     int size_bytes)
-{
-	int max_bytes;
-	u64 vb_gpu_addr;
-	u32 *vb;
-
-	DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
-		  size_bytes, rdev->r600_blit.vb_used);
-	vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
-	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
-		max_bytes = 8192;
-
-		while (size_bytes) {
-			int cur_size = size_bytes;
-			int src_x = src_gpu_addr & 255;
-			int dst_x = dst_gpu_addr & 255;
-			int h = 1;
-			src_gpu_addr = src_gpu_addr & ~255ULL;
-			dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
-			if (!src_x && !dst_x) {
-				h = (cur_size / max_bytes);
-				if (h > 8192)
-					h = 8192;
-				if (h == 0)
-					h = 1;
-				else
-					cur_size = max_bytes;
-			} else {
-				if (cur_size > max_bytes)
-					cur_size = max_bytes;
-				if (cur_size > (max_bytes - dst_x))
-					cur_size = (max_bytes - dst_x);
-				if (cur_size > (max_bytes - src_x))
-					cur_size = (max_bytes - src_x);
-			}
-
-			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
-				WARN_ON(1);
-			}
-
-			vb[0] = i2f(dst_x);
-			vb[1] = 0;
-			vb[2] = i2f(src_x);
-			vb[3] = 0;
-
-			vb[4] = i2f(dst_x);
-			vb[5] = i2f(h);
-			vb[6] = i2f(src_x);
-			vb[7] = i2f(h);
-
-			vb[8] = i2f(dst_x + cur_size);
-			vb[9] = i2f(h);
-			vb[10] = i2f(src_x + cur_size);
-			vb[11] = i2f(h);
-
-			/* src 10 */
-			set_tex_resource(rdev, FMT_8,
-					 src_x + cur_size, h, src_x + cur_size,
-					 src_gpu_addr);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
-
-
-			/* dst 17 */
-			set_render_target(rdev, COLOR_8,
-					  dst_x + cur_size, h,
-					  dst_gpu_addr);
-
-			/* scissors 12 */
-			set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
-
-			/* 15 */
-			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
-			set_vtx_resource(rdev, vb_gpu_addr);
-
-			/* draw 10 */
-			draw_auto(rdev);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
-					    cur_size * h, dst_gpu_addr);
-
-			vb += 12;
-			rdev->r600_blit.vb_used += 12 * 4;
-
-			src_gpu_addr += cur_size * h;
-			dst_gpu_addr += cur_size * h;
-			size_bytes -= cur_size * h;
-		}
-	} else {
-		max_bytes = 8192 * 4;
-
-		while (size_bytes) {
-			int cur_size = size_bytes;
-			int src_x = (src_gpu_addr & 255);
-			int dst_x = (dst_gpu_addr & 255);
-			int h = 1;
-			src_gpu_addr = src_gpu_addr & ~255ULL;
-			dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
-			if (!src_x && !dst_x) {
-				h = (cur_size / max_bytes);
-				if (h > 8192)
-					h = 8192;
-				if (h == 0)
-					h = 1;
-				else
-					cur_size = max_bytes;
-			} else {
-				if (cur_size > max_bytes)
-					cur_size = max_bytes;
-				if (cur_size > (max_bytes - dst_x))
-					cur_size = (max_bytes - dst_x);
-				if (cur_size > (max_bytes - src_x))
-					cur_size = (max_bytes - src_x);
-			}
-
-			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
-				WARN_ON(1);
-			}
-
-			vb[0] = i2f(dst_x / 4);
-			vb[1] = 0;
-			vb[2] = i2f(src_x / 4);
-			vb[3] = 0;
-
-			vb[4] = i2f(dst_x / 4);
-			vb[5] = i2f(h);
-			vb[6] = i2f(src_x / 4);
-			vb[7] = i2f(h);
-
-			vb[8] = i2f((dst_x + cur_size) / 4);
-			vb[9] = i2f(h);
-			vb[10] = i2f((src_x + cur_size) / 4);
-			vb[11] = i2f(h);
-
-			/* src 10 */
-			set_tex_resource(rdev, FMT_8_8_8_8,
-					 (src_x + cur_size) / 4,
-					 h, (src_x + cur_size) / 4,
-					 src_gpu_addr);
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
-
-			/* dst 17 */
-			set_render_target(rdev, COLOR_8_8_8_8,
-					  (dst_x + cur_size) / 4, h,
-					  dst_gpu_addr);
-
-			/* scissors 12  */
-			set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
-
-			/* Vertex buffer setup 15 */
-			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
-			set_vtx_resource(rdev, vb_gpu_addr);
-
-			/* draw 10 */
-			draw_auto(rdev);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
-					    cur_size * h, dst_gpu_addr);
-
-			/* 74 ring dwords per loop */
-			vb += 12;
-			rdev->r600_blit.vb_used += 12 * 4;
-
-			src_gpu_addr += cur_size * h;
-			dst_gpu_addr += cur_size * h;
-			size_bytes -= cur_size * h;
-		}
-	}
-}
-
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index a134790903d3..7fdfa8ea7570 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -122,12 +122,6 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track)
 	track->db_s_write_bo = NULL;
 }
 
-static inline int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
-{
-	/* XXX fill in */
-	return 0;
-}
-
 static int evergreen_cs_track_check(struct radeon_cs_parser *p)
 {
 	struct evergreen_cs_track *track = p->track;
@@ -236,28 +230,6 @@ static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
 }
 
 /**
- * evergreen_cs_packet_next_is_pkt3_nop() - test if next packet is packet3 nop for reloc
- * @parser:		parser structure holding parsing context.
- *
- * Check next packet is relocation packet3, do bo validation and compute
- * GPU offset using the provided start.
- **/
-static inline int evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
-{
-	struct radeon_cs_packet p3reloc;
-	int r;
-
-	r = evergreen_cs_packet_parse(p, &p3reloc, p->idx);
-	if (r) {
-		return 0;
-	}
-	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
-		return 0;
-	}
-	return 1;
-}
-
-/**
  * evergreen_cs_packet_next_vline() - parse userspace VLINE packet
  * @parser:		parser structure holding parsing context.
  *
@@ -414,7 +386,7 @@ static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
  * if register is safe. If register is not flag as safe this function
  * will test it against a list of register needind special handling.
  */
-static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
+static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 {
 	struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
 	struct radeon_cs_reloc *reloc;
@@ -990,7 +962,7 @@ static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u3
  * This function will check that the resource has valid field and that
  * the texture and mipmap bo object are big enough to cover this resource.
  */
-static inline int evergreen_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
+static int evergreen_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
 						   struct radeon_bo *texture,
 						   struct radeon_bo *mipmap)
 {
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 7363d9dec909..b937c49054d9 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -941,11 +941,15 @@
 #define	CB_COLOR0_SLICE					0x28c68
 #define	CB_COLOR0_VIEW					0x28c6c
 #define	CB_COLOR0_INFO					0x28c70
+#	define CB_FORMAT(x)				((x) << 2)
 #       define CB_ARRAY_MODE(x)                         ((x) << 8)
 #       define ARRAY_LINEAR_GENERAL                     0
 #       define ARRAY_LINEAR_ALIGNED                     1
 #       define ARRAY_1D_TILED_THIN1                     2
 #       define ARRAY_2D_TILED_THIN1                     4
+#	define CB_SOURCE_FORMAT(x)			((x) << 24)
+#	define CB_SF_EXPORT_FULL			0
+#	define CB_SF_EXPORT_NORM			1
 #define	CB_COLOR0_ATTRIB				0x28c74
 #define	CB_COLOR0_DIM					0x28c78
 /* only CB0-7 blocks have these regs */
@@ -1107,15 +1111,53 @@
 #define	CB_COLOR7_CLEAR_WORD3				0x28e3c
 
 #define SQ_TEX_RESOURCE_WORD0_0                         0x30000
+#	define TEX_DIM(x)				((x) << 0)
+#	define SQ_TEX_DIM_1D				0
+#	define SQ_TEX_DIM_2D				1
+#	define SQ_TEX_DIM_3D				2
+#	define SQ_TEX_DIM_CUBEMAP			3
+#	define SQ_TEX_DIM_1D_ARRAY			4
+#	define SQ_TEX_DIM_2D_ARRAY			5
+#	define SQ_TEX_DIM_2D_MSAA			6
+#	define SQ_TEX_DIM_2D_ARRAY_MSAA			7
 #define SQ_TEX_RESOURCE_WORD1_0                         0x30004
 #       define TEX_ARRAY_MODE(x)                        ((x) << 28)
 #define SQ_TEX_RESOURCE_WORD2_0                         0x30008
 #define SQ_TEX_RESOURCE_WORD3_0                         0x3000C
 #define SQ_TEX_RESOURCE_WORD4_0                         0x30010
+#	define TEX_DST_SEL_X(x)				((x) << 16)
+#	define TEX_DST_SEL_Y(x)				((x) << 19)
+#	define TEX_DST_SEL_Z(x)				((x) << 22)
+#	define TEX_DST_SEL_W(x)				((x) << 25)
+#	define SQ_SEL_X					0
+#	define SQ_SEL_Y					1
+#	define SQ_SEL_Z					2
+#	define SQ_SEL_W					3
+#	define SQ_SEL_0					4
+#	define SQ_SEL_1					5
 #define SQ_TEX_RESOURCE_WORD5_0                         0x30014
 #define SQ_TEX_RESOURCE_WORD6_0                         0x30018
 #define SQ_TEX_RESOURCE_WORD7_0                         0x3001c
 
+#define SQ_VTX_CONSTANT_WORD0_0				0x30000
+#define SQ_VTX_CONSTANT_WORD1_0				0x30004
+#define SQ_VTX_CONSTANT_WORD2_0				0x30008
+#	define SQ_VTXC_BASE_ADDR_HI(x)			((x) << 0)
+#	define SQ_VTXC_STRIDE(x)			((x) << 8)
+#	define SQ_VTXC_ENDIAN_SWAP(x)			((x) << 30)
+#	define SQ_ENDIAN_NONE				0
+#	define SQ_ENDIAN_8IN16				1
+#	define SQ_ENDIAN_8IN32				2
+#define SQ_VTX_CONSTANT_WORD3_0				0x3000C
+#	define SQ_VTCX_SEL_X(x)				((x) << 3)
+#	define SQ_VTCX_SEL_Y(x)				((x) << 6)
+#	define SQ_VTCX_SEL_Z(x)				((x) << 9)
+#	define SQ_VTCX_SEL_W(x)				((x) << 12)
+#define SQ_VTX_CONSTANT_WORD4_0				0x30010
+#define SQ_VTX_CONSTANT_WORD5_0                         0x30014
+#define SQ_VTX_CONSTANT_WORD6_0                         0x30018
+#define SQ_VTX_CONSTANT_WORD7_0                         0x3001c
+
 /* cayman 3D regs */
 #define CAYMAN_VGT_OFFCHIP_LDS_BASE			0x89B0
 #define CAYMAN_DB_EQAA					0x28804
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index bf4fce7c43f2..556b7bc3418b 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -40,6 +40,7 @@ extern void evergreen_mc_program(struct radeon_device *rdev);
 extern void evergreen_irq_suspend(struct radeon_device *rdev);
 extern int evergreen_mc_init(struct radeon_device *rdev);
 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
+extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
 
 #define EVERGREEN_PFP_UCODE_SIZE 1120
 #define EVERGREEN_PM4_UCODE_SIZE 1376
@@ -569,36 +570,6 @@ static u32 cayman_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
 	return backend_map;
 }
 
-static void cayman_program_channel_remap(struct radeon_device *rdev)
-{
-	u32 tcp_chan_steer_lo, tcp_chan_steer_hi, mc_shared_chremap, tmp;
-
-	tmp = RREG32(MC_SHARED_CHMAP);
-	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
-	case 0:
-	case 1:
-	case 2:
-	case 3:
-	default:
-		/* default mapping */
-		mc_shared_chremap = 0x00fac688;
-		break;
-	}
-
-	switch (rdev->family) {
-	case CHIP_CAYMAN:
-	default:
-		//tcp_chan_steer_lo = 0x54763210
-		tcp_chan_steer_lo = 0x76543210;
-		tcp_chan_steer_hi = 0x0000ba98;
-		break;
-	}
-
-	WREG32(TCP_CHAN_STEER_LO, tcp_chan_steer_lo);
-	WREG32(TCP_CHAN_STEER_HI, tcp_chan_steer_hi);
-	WREG32(MC_SHARED_CHREMAP, mc_shared_chremap);
-}
-
 static u32 cayman_get_disable_mask_per_asic(struct radeon_device *rdev,
 					    u32 disable_mask_per_se,
 					    u32 max_disable_mask_per_se,
@@ -842,8 +813,6 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
 
-	cayman_program_channel_remap(rdev);
-
 	/* primary versions */
 	WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
 	WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
@@ -1190,7 +1159,8 @@ int cayman_cp_resume(struct radeon_device *rdev)
 
 	/* Initialize the ring buffer's read and write pointers */
 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
-	WREG32(CP_RB0_WPTR, 0);
+	rdev->cp.wptr = 0;
+	WREG32(CP_RB0_WPTR, rdev->cp.wptr);
 
 	/* set the wb address wether it's enabled or not */
 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
@@ -1210,7 +1180,6 @@ int cayman_cp_resume(struct radeon_device *rdev)
 	WREG32(CP_RB0_BASE, rdev->cp.gpu_addr >> 8);
 
 	rdev->cp.rptr = RREG32(CP_RB0_RPTR);
-	rdev->cp.wptr = RREG32(CP_RB0_WPTR);
 
 	/* ring1  - compute only */
 	/* Set ring buffer size */
@@ -1223,7 +1192,8 @@ int cayman_cp_resume(struct radeon_device *rdev)
 
 	/* Initialize the ring buffer's read and write pointers */
 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
-	WREG32(CP_RB1_WPTR, 0);
+	rdev->cp1.wptr = 0;
+	WREG32(CP_RB1_WPTR, rdev->cp1.wptr);
 
 	/* set the wb address wether it's enabled or not */
 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
@@ -1235,7 +1205,6 @@ int cayman_cp_resume(struct radeon_device *rdev)
 	WREG32(CP_RB1_BASE, rdev->cp1.gpu_addr >> 8);
 
 	rdev->cp1.rptr = RREG32(CP_RB1_RPTR);
-	rdev->cp1.wptr = RREG32(CP_RB1_WPTR);
 
 	/* ring2 - compute only */
 	/* Set ring buffer size */
@@ -1248,7 +1217,8 @@ int cayman_cp_resume(struct radeon_device *rdev)
 
 	/* Initialize the ring buffer's read and write pointers */
 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
-	WREG32(CP_RB2_WPTR, 0);
+	rdev->cp2.wptr = 0;
+	WREG32(CP_RB2_WPTR, rdev->cp2.wptr);
 
 	/* set the wb address wether it's enabled or not */
 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
@@ -1260,7 +1230,6 @@ int cayman_cp_resume(struct radeon_device *rdev)
 	WREG32(CP_RB2_BASE, rdev->cp2.gpu_addr >> 8);
 
 	rdev->cp2.rptr = RREG32(CP_RB2_RPTR);
-	rdev->cp2.wptr = RREG32(CP_RB2_WPTR);
 
 	/* start the rings */
 	cayman_cp_start(rdev);
@@ -1376,6 +1345,9 @@ static int cayman_startup(struct radeon_device *rdev)
 {
 	int r;
 
+	/* enable pcie gen2 link */
+	evergreen_pcie_gen2_enable(rdev);
+
 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
 		r = ni_init_microcode(rdev);
 		if (r) {
@@ -1397,7 +1369,7 @@ static int cayman_startup(struct radeon_device *rdev)
 
 	r = evergreen_blit_init(rdev);
 	if (r) {
-		evergreen_blit_fini(rdev);
+		r600_blit_fini(rdev);
 		rdev->asic->copy = NULL;
 		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
 	}
@@ -1458,21 +1430,13 @@ int cayman_resume(struct radeon_device *rdev)
 
 int cayman_suspend(struct radeon_device *rdev)
 {
-	int r;
-
 	/* FIXME: we should wait for ring to be empty */
 	cayman_cp_enable(rdev, false);
 	rdev->cp.ready = false;
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	cayman_pcie_gart_disable(rdev);
-
-	/* unpin shaders bo */
-	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-	if (likely(r == 0)) {
-		radeon_bo_unpin(rdev->r600_blit.shader_obj);
-		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-	}
+	r600_blit_suspend(rdev);
 
 	return 0;
 }
@@ -1585,7 +1549,7 @@ int cayman_init(struct radeon_device *rdev)
 
 void cayman_fini(struct radeon_device *rdev)
 {
-	evergreen_blit_fini(rdev);
+	r600_blit_fini(rdev);
 	cayman_cp_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 574f2c7c6dd9..8f8b8fa14357 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -68,6 +68,108 @@ MODULE_FIRMWARE(FIRMWARE_R520);
  * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
  */
 
+int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
+			    struct radeon_cs_packet *pkt,
+			    unsigned idx,
+			    unsigned reg)
+{
+	int r;
+	u32 tile_flags = 0;
+	u32 tmp;
+	struct radeon_cs_reloc *reloc;
+	u32 value;
+
+	r = r100_cs_packet_next_reloc(p, &reloc);
+	if (r) {
+		DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+			  idx, reg);
+		r100_cs_dump_packet(p, pkt);
+		return r;
+	}
+	value = radeon_get_ib_value(p, idx);
+	tmp = value & 0x003fffff;
+	tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
+
+	if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+		tile_flags |= RADEON_DST_TILE_MACRO;
+	if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+		if (reg == RADEON_SRC_PITCH_OFFSET) {
+			DRM_ERROR("Cannot src blit from microtiled surface\n");
+			r100_cs_dump_packet(p, pkt);
+			return -EINVAL;
+		}
+		tile_flags |= RADEON_DST_TILE_MICRO;
+	}
+
+	tmp |= tile_flags;
+	p->ib->ptr[idx] = (value & 0x3fc00000) | tmp;
+	return 0;
+}
+
+int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
+			     struct radeon_cs_packet *pkt,
+			     int idx)
+{
+	unsigned c, i;
+	struct radeon_cs_reloc *reloc;
+	struct r100_cs_track *track;
+	int r = 0;
+	volatile uint32_t *ib;
+	u32 idx_value;
+
+	ib = p->ib->ptr;
+	track = (struct r100_cs_track *)p->track;
+	c = radeon_get_ib_value(p, idx++) & 0x1F;
+	if (c > 16) {
+	    DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
+		      pkt->opcode);
+	    r100_cs_dump_packet(p, pkt);
+	    return -EINVAL;
+	}
+	track->num_arrays = c;
+	for (i = 0; i < (c - 1); i+=2, idx+=3) {
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n",
+				  pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		idx_value = radeon_get_ib_value(p, idx);
+		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+
+		track->arrays[i + 0].esize = idx_value >> 8;
+		track->arrays[i + 0].robj = reloc->robj;
+		track->arrays[i + 0].esize &= 0x7F;
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n",
+				  pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
+		track->arrays[i + 1].robj = reloc->robj;
+		track->arrays[i + 1].esize = idx_value >> 24;
+		track->arrays[i + 1].esize &= 0x7F;
+	}
+	if (c & 1) {
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n",
+					  pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		idx_value = radeon_get_ib_value(p, idx);
+		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+		track->arrays[i + 0].robj = reloc->robj;
+		track->arrays[i + 0].esize = idx_value >> 8;
+		track->arrays[i + 0].esize &= 0x7F;
+	}
+	return r;
+}
+
 void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
 {
 	/* enable the pflip int */
@@ -591,7 +693,7 @@ void r100_irq_disable(struct radeon_device *rdev)
 	WREG32(R_000044_GEN_INT_STATUS, tmp);
 }
 
-static inline uint32_t r100_irq_ack(struct radeon_device *rdev)
+static uint32_t r100_irq_ack(struct radeon_device *rdev)
 {
 	uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
 	uint32_t irq_mask = RADEON_SW_INT_TEST |
@@ -724,11 +826,11 @@ void r100_fence_ring_emit(struct radeon_device *rdev,
 int r100_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset,
 		   uint64_t dst_offset,
-		   unsigned num_pages,
+		   unsigned num_gpu_pages,
 		   struct radeon_fence *fence)
 {
 	uint32_t cur_pages;
-	uint32_t stride_bytes = PAGE_SIZE;
+	uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
 	uint32_t pitch;
 	uint32_t stride_pixels;
 	unsigned ndw;
@@ -740,7 +842,7 @@ int r100_copy_blit(struct radeon_device *rdev,
 	/* radeon pitch is /64 */
 	pitch = stride_bytes / 64;
 	stride_pixels = stride_bytes / 4;
-	num_loops = DIV_ROUND_UP(num_pages, 8191);
+	num_loops = DIV_ROUND_UP(num_gpu_pages, 8191);
 
 	/* Ask for enough room for blit + flush + fence */
 	ndw = 64 + (10 * num_loops);
@@ -749,12 +851,12 @@ int r100_copy_blit(struct radeon_device *rdev,
 		DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
 		return -EINVAL;
 	}
-	while (num_pages > 0) {
-		cur_pages = num_pages;
+	while (num_gpu_pages > 0) {
+		cur_pages = num_gpu_pages;
 		if (cur_pages > 8191) {
 			cur_pages = 8191;
 		}
-		num_pages -= cur_pages;
+		num_gpu_pages -= cur_pages;
 
 		/* pages are in Y direction - height
 		   page width in X direction - width */
@@ -776,8 +878,8 @@ int r100_copy_blit(struct radeon_device *rdev,
 		radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
 		radeon_ring_write(rdev, 0);
 		radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
-		radeon_ring_write(rdev, num_pages);
-		radeon_ring_write(rdev, num_pages);
+		radeon_ring_write(rdev, num_gpu_pages);
+		radeon_ring_write(rdev, num_gpu_pages);
 		radeon_ring_write(rdev, cur_pages | (stride_pixels << 16));
 	}
 	radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
@@ -993,7 +1095,8 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
 	/* Force read & write ptr to 0 */
 	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
 	WREG32(RADEON_CP_RB_RPTR_WR, 0);
-	WREG32(RADEON_CP_RB_WPTR, 0);
+	rdev->cp.wptr = 0;
+	WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
 
 	/* set the wb address whether it's enabled or not */
 	WREG32(R_00070C_CP_RB_RPTR_ADDR,
@@ -1010,9 +1113,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
 	WREG32(RADEON_CP_RB_CNTL, tmp);
 	udelay(10);
 	rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
-	rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR);
-	/* protect against crazy HW on resume */
-	rdev->cp.wptr &= rdev->cp.ptr_mask;
 	/* Set cp mode to bus mastering & enable cp*/
 	WREG32(RADEON_CP_CSQ_MODE,
 	       REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
@@ -3152,7 +3252,7 @@ void r100_bandwidth_update(struct radeon_device *rdev)
 	}
 }
 
-static inline void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
+static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
 {
 	DRM_ERROR("pitch                      %d\n", t->pitch);
 	DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
@@ -3970,3 +4070,43 @@ int r100_init(struct radeon_device *rdev)
 	}
 	return 0;
 }
+
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	if (reg < rdev->rmmio_size)
+		return readl(((void __iomem *)rdev->rmmio) + reg);
+	else {
+		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+		return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+	}
+}
+
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	if (reg < rdev->rmmio_size)
+		writel(v, ((void __iomem *)rdev->rmmio) + reg);
+	else {
+		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+		writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+	}
+}
+
+u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
+{
+	if (reg < rdev->rio_mem_size)
+		return ioread32(rdev->rio_mem + reg);
+	else {
+		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
+		return ioread32(rdev->rio_mem + RADEON_MM_DATA);
+	}
+}
+
+void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+	if (reg < rdev->rio_mem_size)
+		iowrite32(v, rdev->rio_mem + reg);
+	else {
+		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
+		iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
+	}
+}
diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h
index 686f9dc5d4bd..6a603b378adb 100644
--- a/drivers/gpu/drm/radeon/r100_track.h
+++ b/drivers/gpu/drm/radeon/r100_track.h
@@ -92,106 +92,10 @@ int r200_packet0_check(struct radeon_cs_parser *p,
 		       struct radeon_cs_packet *pkt,
 		       unsigned idx, unsigned reg);
 
-
-
-static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
-					  struct radeon_cs_packet *pkt,
-					  unsigned idx,
-					  unsigned reg)
-{
-	int r;
-	u32 tile_flags = 0;
-	u32 tmp;
-	struct radeon_cs_reloc *reloc;
-	u32 value;
-
-	r = r100_cs_packet_next_reloc(p, &reloc);
-	if (r) {
-		DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
-			  idx, reg);
-		r100_cs_dump_packet(p, pkt);
-		return r;
-	}
-	value = radeon_get_ib_value(p, idx);
-	tmp = value & 0x003fffff;
-	tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
-
-	if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
-		tile_flags |= RADEON_DST_TILE_MACRO;
-	if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
-		if (reg == RADEON_SRC_PITCH_OFFSET) {
-			DRM_ERROR("Cannot src blit from microtiled surface\n");
-			r100_cs_dump_packet(p, pkt);
-			return -EINVAL;
-		}
-		tile_flags |= RADEON_DST_TILE_MICRO;
-	}
-
-	tmp |= tile_flags;
-	p->ib->ptr[idx] = (value & 0x3fc00000) | tmp;
-	return 0;
-}
-
-static inline int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
-					   struct radeon_cs_packet *pkt,
-					   int idx)
-{
-	unsigned c, i;
-	struct radeon_cs_reloc *reloc;
-	struct r100_cs_track *track;
-	int r = 0;
-	volatile uint32_t *ib;
-	u32 idx_value;
-
-	ib = p->ib->ptr;
-	track = (struct r100_cs_track *)p->track;
-	c = radeon_get_ib_value(p, idx++) & 0x1F;
-	if (c > 16) {
-	    DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
-		      pkt->opcode);
-	    r100_cs_dump_packet(p, pkt);
-	    return -EINVAL;
-	}
-	track->num_arrays = c;
-	for (i = 0; i < (c - 1); i+=2, idx+=3) {
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for packet3 %d\n",
-				  pkt->opcode);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-		idx_value = radeon_get_ib_value(p, idx);
-		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
-
-		track->arrays[i + 0].esize = idx_value >> 8;
-		track->arrays[i + 0].robj = reloc->robj;
-		track->arrays[i + 0].esize &= 0x7F;
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for packet3 %d\n",
-				  pkt->opcode);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
-		track->arrays[i + 1].robj = reloc->robj;
-		track->arrays[i + 1].esize = idx_value >> 24;
-		track->arrays[i + 1].esize &= 0x7F;
-	}
-	if (c & 1) {
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for packet3 %d\n",
-					  pkt->opcode);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-		idx_value = radeon_get_ib_value(p, idx);
-		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
-		track->arrays[i + 0].robj = reloc->robj;
-		track->arrays[i + 0].esize = idx_value >> 8;
-		track->arrays[i + 0].esize &= 0x7F;
-	}
-	return r;
-}
+int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
+			    struct radeon_cs_packet *pkt,
+			    unsigned idx,
+			    unsigned reg);
+int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
+			     struct radeon_cs_packet *pkt,
+			     int idx);
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c
index f24058300413..a1f3ba063c2d 100644
--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@@ -84,7 +84,7 @@ static int r200_get_vtx_size_0(uint32_t vtx_fmt_0)
 int r200_copy_dma(struct radeon_device *rdev,
 		  uint64_t src_offset,
 		  uint64_t dst_offset,
-		  unsigned num_pages,
+		  unsigned num_gpu_pages,
 		  struct radeon_fence *fence)
 {
 	uint32_t size;
@@ -93,7 +93,7 @@ int r200_copy_dma(struct radeon_device *rdev,
 	int r = 0;
 
 	/* radeon pitch is /64 */
-	size = num_pages << PAGE_SHIFT;
+	size = num_gpu_pages << RADEON_GPU_PAGE_SHIFT;
 	num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
 	r = radeon_ring_lock(rdev, num_loops * 4 + 64);
 	if (r) {
diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c
index c5c2742e4140..1fe98b421c9b 100644
--- a/drivers/gpu/drm/radeon/r300_cmdbuf.c
+++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c
@@ -791,7 +791,7 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
 /**
  * Emit the sequence to pacify R300.
  */
-static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
+static void r300_pacify(drm_radeon_private_t *dev_priv)
 {
 	uint32_t cache_z, cache_3d, cache_2d;
 	RING_LOCALS;
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 334aee6eab7c..12470b090ddf 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2212,7 +2212,8 @@ int r600_cp_resume(struct radeon_device *rdev)
 	/* Initialize the ring buffer's read and write pointers */
 	WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA);
 	WREG32(CP_RB_RPTR_WR, 0);
-	WREG32(CP_RB_WPTR, 0);
+	rdev->cp.wptr = 0;
+	WREG32(CP_RB_WPTR, rdev->cp.wptr);
 
 	/* set the wb address whether it's enabled or not */
 	WREG32(CP_RB_RPTR_ADDR,
@@ -2234,7 +2235,6 @@ int r600_cp_resume(struct radeon_device *rdev)
 	WREG32(CP_DEBUG, (1 << 27) | (1 << 28));
 
 	rdev->cp.rptr = RREG32(CP_RB_RPTR);
-	rdev->cp.wptr = RREG32(CP_RB_WPTR);
 
 	r600_cp_start(rdev);
 	rdev->cp.ready = true;
@@ -2356,26 +2356,42 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
 }
 
 int r600_copy_blit(struct radeon_device *rdev,
-		   uint64_t src_offset, uint64_t dst_offset,
-		   unsigned num_pages, struct radeon_fence *fence)
+		   uint64_t src_offset,
+		   uint64_t dst_offset,
+		   unsigned num_gpu_pages,
+		   struct radeon_fence *fence)
 {
 	int r;
 
 	mutex_lock(&rdev->r600_blit.mutex);
 	rdev->r600_blit.vb_ib = NULL;
-	r = r600_blit_prepare_copy(rdev, num_pages * RADEON_GPU_PAGE_SIZE);
+	r = r600_blit_prepare_copy(rdev, num_gpu_pages);
 	if (r) {
 		if (rdev->r600_blit.vb_ib)
 			radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
 		mutex_unlock(&rdev->r600_blit.mutex);
 		return r;
 	}
-	r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * RADEON_GPU_PAGE_SIZE);
+	r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages);
 	r600_blit_done_copy(rdev, fence);
 	mutex_unlock(&rdev->r600_blit.mutex);
 	return 0;
 }
 
+void r600_blit_suspend(struct radeon_device *rdev)
+{
+	int r;
+
+	/* unpin shaders bo */
+	if (rdev->r600_blit.shader_obj) {
+		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
+		if (!r) {
+			radeon_bo_unpin(rdev->r600_blit.shader_obj);
+			radeon_bo_unreserve(rdev->r600_blit.shader_obj);
+		}
+	}
+}
+
 int r600_set_surface_reg(struct radeon_device *rdev, int reg,
 			 uint32_t tiling_flags, uint32_t pitch,
 			 uint32_t offset, uint32_t obj_size)
@@ -2495,8 +2511,6 @@ int r600_resume(struct radeon_device *rdev)
 
 int r600_suspend(struct radeon_device *rdev)
 {
-	int r;
-
 	r600_audio_fini(rdev);
 	/* FIXME: we should wait for ring to be empty */
 	r600_cp_stop(rdev);
@@ -2504,14 +2518,8 @@ int r600_suspend(struct radeon_device *rdev)
 	r600_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	r600_pcie_gart_disable(rdev);
-	/* unpin shaders bo */
-	if (rdev->r600_blit.shader_obj) {
-		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-		if (!r) {
-			radeon_bo_unpin(rdev->r600_blit.shader_obj);
-			radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-		}
-	}
+	r600_blit_suspend(rdev);
+
 	return 0;
 }
 
@@ -3138,7 +3146,7 @@ int r600_irq_set(struct radeon_device *rdev)
 	return 0;
 }
 
-static inline void r600_irq_ack(struct radeon_device *rdev)
+static void r600_irq_ack(struct radeon_device *rdev)
 {
 	u32 tmp;
 
@@ -3239,7 +3247,7 @@ void r600_irq_disable(struct radeon_device *rdev)
 	r600_disable_interrupt_state(rdev);
 }
 
-static inline u32 r600_get_ih_wptr(struct radeon_device *rdev)
+static u32 r600_get_ih_wptr(struct radeon_device *rdev)
 {
 	u32 wptr, tmp;
 
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
index 7f1043448d25..3c031a48205d 100644
--- a/drivers/gpu/drm/radeon/r600_blit.c
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -41,7 +41,7 @@
 #define COLOR_5_6_5           0x8
 #define COLOR_8_8_8_8         0x1a
 
-static inline void
+static void
 set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
 {
 	u32 cb_color_info;
@@ -99,7 +99,7 @@ set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64
 	ADVANCE_RING();
 }
 
-static inline void
+static void
 cp_set_surface_sync(drm_radeon_private_t *dev_priv,
 		    u32 sync_type, u32 size, u64 mc_addr)
 {
@@ -121,7 +121,7 @@ cp_set_surface_sync(drm_radeon_private_t *dev_priv,
 	ADVANCE_RING();
 }
 
-static inline void
+static void
 set_shaders(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
@@ -184,7 +184,7 @@ set_shaders(struct drm_device *dev)
 			    R600_SH_ACTION_ENA, 512, gpu_addr);
 }
 
-static inline void
+static void
 set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
 {
 	uint32_t sq_vtx_constant_word2;
@@ -220,7 +220,7 @@ set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
 				    R600_VC_ACTION_ENA, 48, gpu_addr);
 }
 
-static inline void
+static void
 set_tex_resource(drm_radeon_private_t *dev_priv,
 		 int format, int w, int h, int pitch, u64 gpu_addr)
 {
@@ -258,7 +258,7 @@ set_tex_resource(drm_radeon_private_t *dev_priv,
 
 }
 
-static inline void
+static void
 set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
 {
 	RING_LOCALS;
@@ -282,7 +282,7 @@ set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
 	ADVANCE_RING();
 }
 
-static inline void
+static void
 draw_auto(drm_radeon_private_t *dev_priv)
 {
 	RING_LOCALS;
@@ -311,7 +311,7 @@ draw_auto(drm_radeon_private_t *dev_priv)
 	COMMIT_RING();
 }
 
-static inline void
+static void
 set_default_state(drm_radeon_private_t *dev_priv)
 {
 	int i;
@@ -489,7 +489,7 @@ set_default_state(drm_radeon_private_t *dev_priv)
 	ADVANCE_RING();
 }
 
-static inline uint32_t i2f(uint32_t input)
+static uint32_t i2f(uint32_t input)
 {
 	u32 result, i, exponent, fraction;
 
@@ -515,7 +515,7 @@ static inline uint32_t i2f(uint32_t input)
 }
 
 
-static inline int r600_nomm_get_vb(struct drm_device *dev)
+static int r600_nomm_get_vb(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	dev_priv->blit_vb = radeon_freelist_get(dev);
@@ -526,7 +526,7 @@ static inline int r600_nomm_get_vb(struct drm_device *dev)
 	return 0;
 }
 
-static inline void r600_nomm_put_vb(struct drm_device *dev)
+static void r600_nomm_put_vb(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 
@@ -534,7 +534,7 @@ static inline void r600_nomm_put_vb(struct drm_device *dev)
 	radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
 }
 
-static inline void *r600_nomm_get_vb_ptr(struct drm_device *dev)
+static void *r600_nomm_get_vb_ptr(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	return (((char *)dev->agp_buffer_map->handle +
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 9aa74c3f8cb6..c4cf1308d4a1 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -42,6 +42,9 @@
 #define COLOR_5_6_5           0x8
 #define COLOR_8_8_8_8         0x1a
 
+#define RECT_UNIT_H           32
+#define RECT_UNIT_W           (RADEON_GPU_PAGE_SIZE / 4 / RECT_UNIT_H)
+
 /* emits 21 on rv770+, 23 on r600 */
 static void
 set_render_target(struct radeon_device *rdev, int format,
@@ -54,7 +57,9 @@ set_render_target(struct radeon_device *rdev, int format,
 	if (h < 8)
 		h = 8;
 
-	cb_color_info = ((format << 2) | (1 << 27) | (1 << 8));
+	cb_color_info = CB_FORMAT(format) |
+		CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) |
+		CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
 	pitch = (w / 8) - 1;
 	slice = ((w * h) / 64) - 1;
 
@@ -164,9 +169,10 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 {
 	u32 sq_vtx_constant_word2;
 
-	sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
+	sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
+		SQ_VTXC_STRIDE(16);
 #ifdef __BIG_ENDIAN
-	sq_vtx_constant_word2 |= (2 << 30);
+	sq_vtx_constant_word2 |=  SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
 #endif
 
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
@@ -202,18 +208,19 @@ set_tex_resource(struct radeon_device *rdev,
 	if (h < 1)
 		h = 1;
 
-	sq_tex_resource_word0 = (1 << 0) | (1 << 3);
-	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
-				  ((w - 1) << 19));
+	sq_tex_resource_word0 = S_038000_DIM(V_038000_SQ_TEX_DIM_2D) |
+		S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
+	sq_tex_resource_word0 |= S_038000_PITCH((pitch >> 3) - 1) |
+		S_038000_TEX_WIDTH(w - 1);
 
-	sq_tex_resource_word1 = (format << 26);
-	sq_tex_resource_word1 |= ((h - 1) << 0);
+	sq_tex_resource_word1 = S_038004_DATA_FORMAT(format);
+	sq_tex_resource_word1 |= S_038004_TEX_HEIGHT(h - 1);
 
-	sq_tex_resource_word4 = ((1 << 14) |
-				 (0 << 16) |
-				 (1 << 19) |
-				 (2 << 22) |
-				 (3 << 25));
+	sq_tex_resource_word4 = S_038010_REQUEST_SIZE(1) |
+		S_038010_DST_SEL_X(SQ_SEL_X) |
+		S_038010_DST_SEL_Y(SQ_SEL_Y) |
+		S_038010_DST_SEL_Z(SQ_SEL_Z) |
+		S_038010_DST_SEL_W(SQ_SEL_W);
 
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
 	radeon_ring_write(rdev, 0);
@@ -450,7 +457,7 @@ set_default_state(struct radeon_device *rdev)
 	radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
 }
 
-static inline uint32_t i2f(uint32_t input)
+static uint32_t i2f(uint32_t input)
 {
 	u32 result, i, exponent, fraction;
 
@@ -483,6 +490,27 @@ int r600_blit_init(struct radeon_device *rdev)
 	u32 packet2s[16];
 	int num_packet2s = 0;
 
+	rdev->r600_blit.primitives.set_render_target = set_render_target;
+	rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync;
+	rdev->r600_blit.primitives.set_shaders = set_shaders;
+	rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource;
+	rdev->r600_blit.primitives.set_tex_resource = set_tex_resource;
+	rdev->r600_blit.primitives.set_scissors = set_scissors;
+	rdev->r600_blit.primitives.draw_auto = draw_auto;
+	rdev->r600_blit.primitives.set_default_state = set_default_state;
+
+	rdev->r600_blit.ring_size_common = 40; /* shaders + def state */
+	rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */
+	rdev->r600_blit.ring_size_common += 5; /* done copy */
+	rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */
+
+	rdev->r600_blit.ring_size_per_loop = 76;
+	/* set_render_target emits 2 extra dwords on rv6xx */
+	if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770)
+		rdev->r600_blit.ring_size_per_loop += 2;
+
+	rdev->r600_blit.max_dim = 8192;
+
 	/* pin copy shader into vram if already initialized */
 	if (rdev->r600_blit.shader_obj)
 		goto done;
@@ -600,47 +628,80 @@ static void r600_vb_ib_put(struct radeon_device *rdev)
 	radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
 }
 
-int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
+static unsigned r600_blit_create_rect(unsigned num_gpu_pages,
+				      int *width, int *height, int max_dim)
+{
+	unsigned max_pages;
+	unsigned pages = num_gpu_pages;
+	int w, h;
+
+	if (num_gpu_pages == 0) {
+		/* not supposed to be called with no pages, but just in case */
+		h = 0;
+		w = 0;
+		pages = 0;
+		WARN_ON(1);
+	} else {
+		int rect_order = 2;
+		h = RECT_UNIT_H;
+		while (num_gpu_pages / rect_order) {
+			h *= 2;
+			rect_order *= 4;
+			if (h >= max_dim) {
+				h = max_dim;
+				break;
+			}
+		}
+		max_pages = (max_dim * h) / (RECT_UNIT_W * RECT_UNIT_H);
+		if (pages > max_pages)
+			pages = max_pages;
+		w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h;
+		w = (w / RECT_UNIT_W) * RECT_UNIT_W;
+		pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H);
+		BUG_ON(pages == 0);
+	}
+
+
+	DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages);
+
+	/* return width and height only of the caller wants it */
+	if (height)
+		*height = h;
+	if (width)
+		*width = w;
+
+	return pages;
+}
+
+
+int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages)
 {
 	int r;
-	int ring_size, line_size;
-	int max_size;
-	/* loops of emits 64 + fence emit possible */
-	int dwords_per_loop = 76, num_loops;
+	int ring_size;
+	int num_loops = 0;
+	int dwords_per_loop = rdev->r600_blit.ring_size_per_loop;
 
 	r = r600_vb_ib_get(rdev);
 	if (r)
 		return r;
 
-	/* set_render_target emits 2 extra dwords on rv6xx */
-	if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770)
-		dwords_per_loop += 2;
-
-	/* 8 bpp vs 32 bpp for xfer unit */
-	if (size_bytes & 3)
-		line_size = 8192;
-	else
-		line_size = 8192*4;
-
-	max_size = 8192 * line_size;
+	/* num loops */
+	while (num_gpu_pages) {
+		num_gpu_pages -=
+			r600_blit_create_rect(num_gpu_pages, NULL, NULL,
+					      rdev->r600_blit.max_dim);
+		num_loops++;
+	}
 
-	/* major loops cover the max size transfer */
-	num_loops = ((size_bytes + max_size) / max_size);
-	/* minor loops cover the extra non aligned bits */
-	num_loops += ((size_bytes % line_size) ? 1 : 0);
 	/* calculate number of loops correctly */
 	ring_size = num_loops * dwords_per_loop;
-	/* set default  + shaders */
-	ring_size += 40; /* shaders + def state */
-	ring_size += 10; /* fence emit for VB IB */
-	ring_size += 5; /* done copy */
-	ring_size += 10; /* fence emit for done copy */
+	ring_size += rdev->r600_blit.ring_size_common;
 	r = radeon_ring_lock(rdev, ring_size);
 	if (r)
 		return r;
 
-	set_default_state(rdev); /* 14 */
-	set_shaders(rdev); /* 26 */
+	rdev->r600_blit.primitives.set_default_state(rdev);
+	rdev->r600_blit.primitives.set_shaders(rdev);
 	return 0;
 }
 
@@ -659,182 +720,64 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
 
 void r600_kms_blit_copy(struct radeon_device *rdev,
 			u64 src_gpu_addr, u64 dst_gpu_addr,
-			int size_bytes)
+			unsigned num_gpu_pages)
 {
-	int max_bytes;
 	u64 vb_gpu_addr;
 	u32 *vb;
 
-	DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
-		  size_bytes, rdev->r600_blit.vb_used);
+	DRM_DEBUG("emitting copy %16llx %16llx %d %d\n",
+		  src_gpu_addr, dst_gpu_addr,
+		  num_gpu_pages, rdev->r600_blit.vb_used);
 	vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
-	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
-		max_bytes = 8192;
-
-		while (size_bytes) {
-			int cur_size = size_bytes;
-			int src_x = src_gpu_addr & 255;
-			int dst_x = dst_gpu_addr & 255;
-			int h = 1;
-			src_gpu_addr = src_gpu_addr & ~255ULL;
-			dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
-			if (!src_x && !dst_x) {
-				h = (cur_size / max_bytes);
-				if (h > 8192)
-					h = 8192;
-				if (h == 0)
-					h = 1;
-				else
-					cur_size = max_bytes;
-			} else {
-				if (cur_size > max_bytes)
-					cur_size = max_bytes;
-				if (cur_size > (max_bytes - dst_x))
-					cur_size = (max_bytes - dst_x);
-				if (cur_size > (max_bytes - src_x))
-					cur_size = (max_bytes - src_x);
-			}
-
-			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
-				WARN_ON(1);
-			}
-
-			vb[0] = i2f(dst_x);
-			vb[1] = 0;
-			vb[2] = i2f(src_x);
-			vb[3] = 0;
-
-			vb[4] = i2f(dst_x);
-			vb[5] = i2f(h);
-			vb[6] = i2f(src_x);
-			vb[7] = i2f(h);
-
-			vb[8] = i2f(dst_x + cur_size);
-			vb[9] = i2f(h);
-			vb[10] = i2f(src_x + cur_size);
-			vb[11] = i2f(h);
-
-			/* src 9 */
-			set_tex_resource(rdev, FMT_8,
-					 src_x + cur_size, h, src_x + cur_size,
-					 src_gpu_addr);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
 
-			/* dst 23 */
-			set_render_target(rdev, COLOR_8,
-					  dst_x + cur_size, h,
-					  dst_gpu_addr);
+	while (num_gpu_pages) {
+		int w, h;
+		unsigned size_in_bytes;
+		unsigned pages_per_loop =
+			r600_blit_create_rect(num_gpu_pages, &w, &h,
+					      rdev->r600_blit.max_dim);
 
-			/* scissors 12 */
-			set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
+		size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE;
+		DRM_DEBUG("rectangle w=%d h=%d\n", w, h);
 
-			/* 14 */
-			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
-			set_vtx_resource(rdev, vb_gpu_addr);
-
-			/* draw 10 */
-			draw_auto(rdev);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
-					    cur_size * h, dst_gpu_addr);
-
-			vb += 12;
-			rdev->r600_blit.vb_used += 12 * 4;
-
-			src_gpu_addr += cur_size * h;
-			dst_gpu_addr += cur_size * h;
-			size_bytes -= cur_size * h;
+		if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
+			WARN_ON(1);
 		}
-	} else {
-		max_bytes = 8192 * 4;
-
-		while (size_bytes) {
-			int cur_size = size_bytes;
-			int src_x = (src_gpu_addr & 255);
-			int dst_x = (dst_gpu_addr & 255);
-			int h = 1;
-			src_gpu_addr = src_gpu_addr & ~255ULL;
-			dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
-			if (!src_x && !dst_x) {
-				h = (cur_size / max_bytes);
-				if (h > 8192)
-					h = 8192;
-				if (h == 0)
-					h = 1;
-				else
-					cur_size = max_bytes;
-			} else {
-				if (cur_size > max_bytes)
-					cur_size = max_bytes;
-				if (cur_size > (max_bytes - dst_x))
-					cur_size = (max_bytes - dst_x);
-				if (cur_size > (max_bytes - src_x))
-					cur_size = (max_bytes - src_x);
-			}
-
-			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
-				WARN_ON(1);
-			}
 
-			vb[0] = i2f(dst_x / 4);
-			vb[1] = 0;
-			vb[2] = i2f(src_x / 4);
-			vb[3] = 0;
-
-			vb[4] = i2f(dst_x / 4);
-			vb[5] = i2f(h);
-			vb[6] = i2f(src_x / 4);
-			vb[7] = i2f(h);
-
-			vb[8] = i2f((dst_x + cur_size) / 4);
-			vb[9] = i2f(h);
-			vb[10] = i2f((src_x + cur_size) / 4);
-			vb[11] = i2f(h);
-
-			/* src 9 */
-			set_tex_resource(rdev, FMT_8_8_8_8,
-					 (src_x + cur_size) / 4,
-					 h, (src_x + cur_size) / 4,
-					 src_gpu_addr);
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
-
-			/* dst 23 */
-			set_render_target(rdev, COLOR_8_8_8_8,
-					  (dst_x + cur_size) / 4, h,
-					  dst_gpu_addr);
-
-			/* scissors 12  */
-			set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
-
-			/* Vertex buffer setup 14 */
-			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
-			set_vtx_resource(rdev, vb_gpu_addr);
-
-			/* draw 10 */
-			draw_auto(rdev);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
-					    cur_size * h, dst_gpu_addr);
-
-			/* 78 ring dwords per loop */
-			vb += 12;
-			rdev->r600_blit.vb_used += 12 * 4;
-
-			src_gpu_addr += cur_size * h;
-			dst_gpu_addr += cur_size * h;
-			size_bytes -= cur_size * h;
-		}
+		vb[0] = 0;
+		vb[1] = 0;
+		vb[2] = 0;
+		vb[3] = 0;
+
+		vb[4] = 0;
+		vb[5] = i2f(h);
+		vb[6] = 0;
+		vb[7] = i2f(h);
+
+		vb[8] = i2f(w);
+		vb[9] = i2f(h);
+		vb[10] = i2f(w);
+		vb[11] = i2f(h);
+
+		rdev->r600_blit.primitives.set_tex_resource(rdev, FMT_8_8_8_8,
+							    w, h, w, src_gpu_addr);
+		rdev->r600_blit.primitives.cp_set_surface_sync(rdev,
+							       PACKET3_TC_ACTION_ENA,
+							       size_in_bytes, src_gpu_addr);
+		rdev->r600_blit.primitives.set_render_target(rdev, COLOR_8_8_8_8,
+							     w, h, dst_gpu_addr);
+		rdev->r600_blit.primitives.set_scissors(rdev, 0, 0, w, h);
+		vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
+		rdev->r600_blit.primitives.set_vtx_resource(rdev, vb_gpu_addr);
+		rdev->r600_blit.primitives.draw_auto(rdev);
+		rdev->r600_blit.primitives.cp_set_surface_sync(rdev,
+				    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
+				    size_in_bytes, dst_gpu_addr);
+
+		vb += 12;
+		rdev->r600_blit.vb_used += 4*12;
+		src_gpu_addr += size_in_bytes;
+		dst_gpu_addr += size_in_bytes;
+		num_gpu_pages -= pages_per_loop;
 	}
 }
-
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index cf83aa05a684..0a2e023c1557 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -162,7 +162,7 @@ static const struct gpu_formats color_formats_table[] = {
 	[V_038004_FMT_32_AS_32_32_32_32] = { 1, 1, 4, 0, CHIP_CEDAR},
 };
 
-static inline bool fmt_is_valid_color(u32 format)
+static bool fmt_is_valid_color(u32 format)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return false;
@@ -173,7 +173,7 @@ static inline bool fmt_is_valid_color(u32 format)
 	return false;
 }
 
-static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family)
+static bool fmt_is_valid_texture(u32 format, enum radeon_family family)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return false;
@@ -187,7 +187,7 @@ static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family)
 	return false;
 }
 
-static inline int fmt_get_blocksize(u32 format)
+static int fmt_get_blocksize(u32 format)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return 0;
@@ -195,7 +195,7 @@ static inline int fmt_get_blocksize(u32 format)
 	return color_formats_table[format].blocksize;
 }
 
-static inline int fmt_get_nblocksx(u32 format, u32 w)
+static int fmt_get_nblocksx(u32 format, u32 w)
 {
 	unsigned bw;
 
@@ -209,7 +209,7 @@ static inline int fmt_get_nblocksx(u32 format, u32 w)
 	return (w + bw - 1) / bw;
 }
 
-static inline int fmt_get_nblocksy(u32 format, u32 h)
+static int fmt_get_nblocksy(u32 format, u32 h)
 {
 	unsigned bh;
 
@@ -223,25 +223,6 @@ static inline int fmt_get_nblocksy(u32 format, u32 h)
 	return (h + bh - 1) / bh;
 }
 
-static inline int r600_bpe_from_format(u32 *bpe, u32 format)
-{
- 	unsigned res;
-
-	if (format >= ARRAY_SIZE(color_formats_table))
-		goto fail;
-
-	res = color_formats_table[format].blocksize;
-	if (res == 0)
-		goto fail;
-
-	*bpe = res;
-	return 0;
-
-fail:
-	*bpe = 16;
-	return -EINVAL;
-}
-
 struct array_mode_checker {
 	int array_mode;
 	u32 group_size;
@@ -252,7 +233,7 @@ struct array_mode_checker {
 };
 
 /* returns alignment in pixels for pitch/height/depth and bytes for base */
-static inline int r600_get_array_mode_alignment(struct array_mode_checker *values,
+static int r600_get_array_mode_alignment(struct array_mode_checker *values,
 						u32 *pitch_align,
 						u32 *height_align,
 						u32 *depth_align,
@@ -331,7 +312,7 @@ static void r600_cs_track_init(struct r600_cs_track *track)
 	track->db_depth_control = 0xFFFFFFFF;
 }
 
-static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
+static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
 {
 	struct r600_cs_track *track = p->track;
 	u32 slice_tile_max, size, tmp;
@@ -737,7 +718,7 @@ static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
  * Check next packet is relocation packet3, do bo validation and compute
  * GPU offset using the provided start.
  **/
-static inline int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
+static int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
 {
 	struct radeon_cs_packet p3reloc;
 	int r;
@@ -911,7 +892,7 @@ static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
  * if register is safe. If register is not flag as safe this function
  * will test it against a list of register needind special handling.
  */
-static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
+static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 {
 	struct r600_cs_track *track = (struct r600_cs_track *)p->track;
 	struct radeon_cs_reloc *reloc;
@@ -1215,7 +1196,7 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx
 	return 0;
 }
 
-static inline unsigned mip_minify(unsigned size, unsigned level)
+static unsigned mip_minify(unsigned size, unsigned level)
 {
 	unsigned val;
 
@@ -1285,7 +1266,7 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel,
  * This function will check that the resource has valid field and that
  * the texture and mipmap bo object are big enough to cover this resource.
  */
-static inline int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
+static int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
 					      struct radeon_bo *texture,
 					      struct radeon_bo *mipmap,
 					      u64 base_offset,
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 0245ae6c204e..bfe1b5d92afe 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -79,6 +79,11 @@
 #define CB_COLOR0_SIZE                                  0x28060
 #define CB_COLOR0_VIEW                                  0x28080
 #define CB_COLOR0_INFO                                  0x280a0
+#	define CB_FORMAT(x)				((x) << 2)
+#       define CB_ARRAY_MODE(x)                         ((x) << 8)
+#	define CB_SOURCE_FORMAT(x)			((x) << 27)
+#	define CB_SF_EXPORT_FULL			0
+#	define CB_SF_EXPORT_NORM			1
 #define CB_COLOR0_TILE                                  0x280c0
 #define CB_COLOR0_FRAG                                  0x280e0
 #define CB_COLOR0_MASK                                  0x28100
@@ -417,6 +422,17 @@
 #define	SQ_PGM_START_VS					0x28858
 #define SQ_PGM_RESOURCES_VS                             0x28868
 #define SQ_PGM_CF_OFFSET_VS                             0x288d0
+
+#define SQ_VTX_CONSTANT_WORD0_0				0x30000
+#define SQ_VTX_CONSTANT_WORD1_0				0x30004
+#define SQ_VTX_CONSTANT_WORD2_0				0x30008
+#	define SQ_VTXC_BASE_ADDR_HI(x)			((x) << 0)
+#	define SQ_VTXC_STRIDE(x)			((x) << 8)
+#	define SQ_VTXC_ENDIAN_SWAP(x)			((x) << 30)
+#	define SQ_ENDIAN_NONE				0
+#	define SQ_ENDIAN_8IN16				1
+#	define SQ_ENDIAN_8IN32				2
+#define SQ_VTX_CONSTANT_WORD3_0				0x3000c
 #define	SQ_VTX_CONSTANT_WORD6_0				0x38018
 #define		S__SQ_VTX_CONSTANT_TYPE(x)			(((x) & 3) << 30)
 #define		G__SQ_VTX_CONSTANT_TYPE(x)			(((x) >> 30) & 3)
@@ -1352,6 +1368,12 @@
 #define   S_038010_DST_SEL_W(x)                        (((x) & 0x7) << 25)
 #define   G_038010_DST_SEL_W(x)                        (((x) >> 25) & 0x7)
 #define   C_038010_DST_SEL_W                           0xF1FFFFFF
+#	define SQ_SEL_X					0
+#	define SQ_SEL_Y					1
+#	define SQ_SEL_Z					2
+#	define SQ_SEL_W					3
+#	define SQ_SEL_0					4
+#	define SQ_SEL_1					5
 #define   S_038010_BASE_LEVEL(x)                       (((x) & 0xF) << 28)
 #define   G_038010_BASE_LEVEL(x)                       (((x) >> 28) & 0xF)
 #define   C_038010_BASE_LEVEL                          0x0FFFFFFF
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 0040d28816f1..156b8b7e028e 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -102,7 +102,7 @@ extern int radeon_pcie_gen2;
 #define RADEON_FENCE_JIFFIES_TIMEOUT	(HZ / 2)
 /* RADEON_IB_POOL_SIZE must be a power of 2 */
 #define RADEON_IB_POOL_SIZE		16
-#define RADEON_DEBUGFS_MAX_NUM_FILES	32
+#define RADEON_DEBUGFS_MAX_COMPONENTS	32
 #define RADEONFB_CONN_LIMIT		4
 #define RADEON_BIOS_NUM_SCRATCH		8
 
@@ -322,6 +322,7 @@ union radeon_gart_table {
 
 #define RADEON_GPU_PAGE_SIZE 4096
 #define RADEON_GPU_PAGE_MASK (RADEON_GPU_PAGE_SIZE - 1)
+#define RADEON_GPU_PAGE_SHIFT 12
 
 struct radeon_gart {
 	dma_addr_t			table_addr;
@@ -522,9 +523,30 @@ struct r600_ih {
 	bool                    enabled;
 };
 
+struct r600_blit_cp_primitives {
+	void (*set_render_target)(struct radeon_device *rdev, int format,
+				  int w, int h, u64 gpu_addr);
+	void (*cp_set_surface_sync)(struct radeon_device *rdev,
+				    u32 sync_type, u32 size,
+				    u64 mc_addr);
+	void (*set_shaders)(struct radeon_device *rdev);
+	void (*set_vtx_resource)(struct radeon_device *rdev, u64 gpu_addr);
+	void (*set_tex_resource)(struct radeon_device *rdev,
+				 int format, int w, int h, int pitch,
+				 u64 gpu_addr);
+	void (*set_scissors)(struct radeon_device *rdev, int x1, int y1,
+			     int x2, int y2);
+	void (*draw_auto)(struct radeon_device *rdev);
+	void (*set_default_state)(struct radeon_device *rdev);
+};
+
 struct r600_blit {
 	struct mutex		mutex;
 	struct radeon_bo	*shader_obj;
+	struct r600_blit_cp_primitives primitives;
+	int max_dim;
+	int ring_size_common;
+	int ring_size_per_loop;
 	u64 shader_gpu_addr;
 	u32 vs_offset, ps_offset;
 	u32 state_offset;
@@ -533,6 +555,8 @@ struct r600_blit {
 	struct radeon_ib *vb_ib;
 };
 
+void r600_blit_suspend(struct radeon_device *rdev);
+
 int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
@@ -600,32 +624,7 @@ struct radeon_cs_parser {
 
 extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx);
 extern int radeon_cs_finish_pages(struct radeon_cs_parser *p);
-
-
-static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
-{
-	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
-	u32 pg_idx, pg_offset;
-	u32 idx_value = 0;
-	int new_page;
-
-	pg_idx = (idx * 4) / PAGE_SIZE;
-	pg_offset = (idx * 4) % PAGE_SIZE;
-
-	if (ibc->kpage_idx[0] == pg_idx)
-		return ibc->kpage[0][pg_offset/4];
-	if (ibc->kpage_idx[1] == pg_idx)
-		return ibc->kpage[1][pg_offset/4];
-
-	new_page = radeon_cs_update_pages(p, pg_idx);
-	if (new_page < 0) {
-		p->parser_error = new_page;
-		return 0;
-	}
-
-	idx_value = ibc->kpage[new_page][pg_offset/4];
-	return idx_value;
-}
+extern u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx);
 
 struct radeon_cs_packet {
 	unsigned	idx;
@@ -868,7 +867,7 @@ struct radeon_pm {
 /*
  * Benchmarking
  */
-void radeon_benchmark(struct radeon_device *rdev);
+void radeon_benchmark(struct radeon_device *rdev, int test_number);
 
 
 /*
@@ -914,17 +913,17 @@ struct radeon_asic {
 	int (*copy_blit)(struct radeon_device *rdev,
 			 uint64_t src_offset,
 			 uint64_t dst_offset,
-			 unsigned num_pages,
+			 unsigned num_gpu_pages,
 			 struct radeon_fence *fence);
 	int (*copy_dma)(struct radeon_device *rdev,
 			uint64_t src_offset,
 			uint64_t dst_offset,
-			unsigned num_pages,
+			unsigned num_gpu_pages,
 			struct radeon_fence *fence);
 	int (*copy)(struct radeon_device *rdev,
 		    uint64_t src_offset,
 		    uint64_t dst_offset,
-		    unsigned num_pages,
+		    unsigned num_gpu_pages,
 		    struct radeon_fence *fence);
 	uint32_t (*get_engine_clock)(struct radeon_device *rdev);
 	void (*set_engine_clock)(struct radeon_device *rdev, uint32_t eng_clock);
@@ -1253,45 +1252,10 @@ int radeon_device_init(struct radeon_device *rdev,
 void radeon_device_fini(struct radeon_device *rdev);
 int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
 
-static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
-{
-	if (reg < rdev->rmmio_size)
-		return readl((rdev->rmmio) + reg);
-	else {
-		writel(reg, (rdev->rmmio) + RADEON_MM_INDEX);
-		return readl((rdev->rmmio) + RADEON_MM_DATA);
-	}
-}
-
-static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
-{
-	if (reg < rdev->rmmio_size)
-		writel(v, (rdev->rmmio) + reg);
-	else {
-		writel(reg, (rdev->rmmio) + RADEON_MM_INDEX);
-		writel(v, (rdev->rmmio) + RADEON_MM_DATA);
-	}
-}
-
-static inline u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
-{
-	if (reg < rdev->rio_mem_size)
-		return ioread32(rdev->rio_mem + reg);
-	else {
-		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
-		return ioread32(rdev->rio_mem + RADEON_MM_DATA);
-	}
-}
-
-static inline void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-	if (reg < rdev->rio_mem_size)
-		iowrite32(v, rdev->rio_mem + reg);
-	else {
-		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
-		iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
-	}
-}
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg);
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
+void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
 
 /*
  * Cast helper
@@ -1414,19 +1378,19 @@ void radeon_atombios_fini(struct radeon_device *rdev);
 /*
  * RING helpers.
  */
+
+#if DRM_DEBUG_CODE == 0
 static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 {
-#if DRM_DEBUG_CODE
-	if (rdev->cp.count_dw <= 0) {
-		DRM_ERROR("radeon: writting more dword to ring than expected !\n");
-	}
-#endif
 	rdev->cp.ring[rdev->cp.wptr++] = v;
 	rdev->cp.wptr &= rdev->cp.ptr_mask;
 	rdev->cp.count_dw--;
 	rdev->cp.ring_free_dw--;
 }
-
+#else
+/* With debugging this is just too big to inline */
+void radeon_ring_write(struct radeon_device *rdev, uint32_t v);
+#endif
 
 /*
  * ASICs macro.
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index df8218bb83a6..e2944566ffea 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -765,9 +765,9 @@ static struct radeon_asic evergreen_asic = {
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
-	.copy_blit = &evergreen_copy_blit,
+	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
-	.copy = &evergreen_copy_blit,
+	.copy = &r600_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
@@ -812,9 +812,9 @@ static struct radeon_asic sumo_asic = {
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
-	.copy_blit = &evergreen_copy_blit,
+	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
-	.copy = &evergreen_copy_blit,
+	.copy = &r600_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = NULL,
@@ -859,9 +859,9 @@ static struct radeon_asic btc_asic = {
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
-	.copy_blit = &evergreen_copy_blit,
+	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
-	.copy = &evergreen_copy_blit,
+	.copy = &r600_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
@@ -906,9 +906,9 @@ static struct radeon_asic cayman_asic = {
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
-	.copy_blit = &evergreen_copy_blit,
+	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
-	.copy = &evergreen_copy_blit,
+	.copy = &r600_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 3d7a0d7c6a9a..85f14f0337e4 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -75,7 +75,7 @@ uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg);
 int r100_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset,
 		   uint64_t dst_offset,
-		   unsigned num_pages,
+		   unsigned num_gpu_pages,
 		   struct radeon_fence *fence);
 int r100_set_surface_reg(struct radeon_device *rdev, int reg,
 			 uint32_t tiling_flags, uint32_t pitch,
@@ -143,7 +143,7 @@ extern void r100_post_page_flip(struct radeon_device *rdev, int crtc);
 extern int r200_copy_dma(struct radeon_device *rdev,
 			 uint64_t src_offset,
 			 uint64_t dst_offset,
-			 unsigned num_pages,
+			 unsigned num_gpu_pages,
 			 struct radeon_fence *fence);
 void r200_set_safe_registers(struct radeon_device *rdev);
 
@@ -311,7 +311,7 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int r600_ring_test(struct radeon_device *rdev);
 int r600_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset, uint64_t dst_offset,
-		   unsigned num_pages, struct radeon_fence *fence);
+		   unsigned num_gpu_pages, struct radeon_fence *fence);
 void r600_hpd_init(struct radeon_device *rdev);
 void r600_hpd_fini(struct radeon_device *rdev);
 bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -364,11 +364,11 @@ void r600_hdmi_init(struct drm_encoder *encoder);
 int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder);
 void r600_hdmi_update_audio_settings(struct drm_encoder *encoder);
 /* r600 blit */
-int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes);
+int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages);
 void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence);
 void r600_kms_blit_copy(struct radeon_device *rdev,
 			u64 src_gpu_addr, u64 dst_gpu_addr,
-			int size_bytes);
+			unsigned num_gpu_pages);
 
 /*
  * rv770,rv730,rv710,rv740
@@ -401,9 +401,6 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev);
 int evergreen_asic_reset(struct radeon_device *rdev);
 void evergreen_bandwidth_update(struct radeon_device *rdev);
 void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
-int evergreen_copy_blit(struct radeon_device *rdev,
-			uint64_t src_offset, uint64_t dst_offset,
-			unsigned num_pages, struct radeon_fence *fence);
 void evergreen_hpd_init(struct radeon_device *rdev);
 void evergreen_hpd_fini(struct radeon_device *rdev);
 bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -421,13 +418,6 @@ extern u32 evergreen_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_ba
 extern void evergreen_post_page_flip(struct radeon_device *rdev, int crtc);
 void evergreen_disable_interrupt_state(struct radeon_device *rdev);
 int evergreen_blit_init(struct radeon_device *rdev);
-void evergreen_blit_fini(struct radeon_device *rdev);
-/* evergreen blit */
-int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes);
-void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence);
-void evergreen_kms_blit_copy(struct radeon_device *rdev,
-			     u64 src_gpu_addr, u64 dst_gpu_addr,
-			     int size_bytes);
 
 /*
  * cayman
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index bf2b61584cdb..08d0b94332e6 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -62,7 +62,7 @@ union atom_supported_devices {
 	struct _ATOM_SUPPORTED_DEVICES_INFO_2d1 info_2d1;
 };
 
-static inline struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev,
+static struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev,
 							       uint8_t id)
 {
 	struct atom_context *ctx = rdev->mode_info.atom_context;
@@ -228,7 +228,7 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev)
 	}
 }
 
-static inline struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
+static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
 							u8 id)
 {
 	struct atom_context *ctx = rdev->mode_info.atom_context;
diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c
index 10191d9372d8..5cafc90de7f8 100644
--- a/drivers/gpu/drm/radeon/radeon_benchmark.c
+++ b/drivers/gpu/drm/radeon/radeon_benchmark.c
@@ -26,21 +26,81 @@
 #include "radeon_reg.h"
 #include "radeon.h"
 
-void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize,
-			   unsigned sdomain, unsigned ddomain)
+#define RADEON_BENCHMARK_COPY_BLIT 1
+#define RADEON_BENCHMARK_COPY_DMA  0
+
+#define RADEON_BENCHMARK_ITERATIONS 1024
+#define RADEON_BENCHMARK_COMMON_MODES_N 17
+
+static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size,
+				    uint64_t saddr, uint64_t daddr,
+				    int flag, int n)
+{
+	unsigned long start_jiffies;
+	unsigned long end_jiffies;
+	struct radeon_fence *fence = NULL;
+	int i, r;
+
+	start_jiffies = jiffies;
+	for (i = 0; i < n; i++) {
+		r = radeon_fence_create(rdev, &fence);
+		if (r)
+			return r;
+
+		switch (flag) {
+		case RADEON_BENCHMARK_COPY_DMA:
+			r = radeon_copy_dma(rdev, saddr, daddr,
+					    size / RADEON_GPU_PAGE_SIZE,
+					    fence);
+			break;
+		case RADEON_BENCHMARK_COPY_BLIT:
+			r = radeon_copy_blit(rdev, saddr, daddr,
+					     size / RADEON_GPU_PAGE_SIZE,
+					     fence);
+			break;
+		default:
+			DRM_ERROR("Unknown copy method\n");
+			r = -EINVAL;
+		}
+		if (r)
+			goto exit_do_move;
+		r = radeon_fence_wait(fence, false);
+		if (r)
+			goto exit_do_move;
+		radeon_fence_unref(&fence);
+	}
+	end_jiffies = jiffies;
+	r = jiffies_to_msecs(end_jiffies - start_jiffies);
+
+exit_do_move:
+	if (fence)
+		radeon_fence_unref(&fence);
+	return r;
+}
+
+
+static void radeon_benchmark_log_results(int n, unsigned size,
+					 unsigned int time,
+					 unsigned sdomain, unsigned ddomain,
+					 char *kind)
+{
+	unsigned int throughput = (n * (size >> 10)) / time;
+	DRM_INFO("radeon: %s %u bo moves of %u kB from"
+		 " %d to %d in %u ms, throughput: %u Mb/s or %u MB/s\n",
+		 kind, n, size >> 10, sdomain, ddomain, time,
+		 throughput * 8, throughput);
+}
+
+static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size,
+				  unsigned sdomain, unsigned ddomain)
 {
 	struct radeon_bo *dobj = NULL;
 	struct radeon_bo *sobj = NULL;
-	struct radeon_fence *fence = NULL;
 	uint64_t saddr, daddr;
-	unsigned long start_jiffies;
-	unsigned long end_jiffies;
-	unsigned long time;
-	unsigned i, n, size;
-	int r;
+	int r, n;
+	unsigned int time;
 
-	size = bsize;
-	n = 1024;
+	n = RADEON_BENCHMARK_ITERATIONS;
 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, &sobj);
 	if (r) {
 		goto out_cleanup;
@@ -67,65 +127,26 @@ void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize,
 	}
 
 	/* r100 doesn't have dma engine so skip the test */
-	if (rdev->asic->copy_dma) {
-
-		start_jiffies = jiffies;
-		for (i = 0; i < n; i++) {
-			r = radeon_fence_create(rdev, &fence);
-			if (r) {
-				goto out_cleanup;
-			}
-
-			r = radeon_copy_dma(rdev, saddr, daddr,
-					size / RADEON_GPU_PAGE_SIZE, fence);
-
-			if (r) {
-				goto out_cleanup;
-			}
-			r = radeon_fence_wait(fence, false);
-			if (r) {
-				goto out_cleanup;
-			}
-			radeon_fence_unref(&fence);
-		}
-		end_jiffies = jiffies;
-		time = end_jiffies - start_jiffies;
-		time = jiffies_to_msecs(time);
-		if (time > 0) {
-			i = ((n * size) >> 10) / time;
-			printk(KERN_INFO "radeon: dma %u bo moves of %ukb from"
-					" %d to %d in %lums (%ukb/ms %ukb/s %uM/s)\n",
-					n, size >> 10,
-					sdomain, ddomain, time,
-					i, i * 1000, (i * 1000) / 1024);
-		}
-	}
-
-	start_jiffies = jiffies;
-	for (i = 0; i < n; i++) {
-		r = radeon_fence_create(rdev, &fence);
-		if (r) {
-			goto out_cleanup;
-		}
-		r = radeon_copy_blit(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, fence);
-		if (r) {
-			goto out_cleanup;
-		}
-		r = radeon_fence_wait(fence, false);
-		if (r) {
+	/* also, VRAM-to-VRAM test doesn't make much sense for DMA */
+	/* skip it as well if domains are the same */
+	if ((rdev->asic->copy_dma) && (sdomain != ddomain)) {
+		time = radeon_benchmark_do_move(rdev, size, saddr, daddr,
+						RADEON_BENCHMARK_COPY_DMA, n);
+		if (time < 0)
 			goto out_cleanup;
-		}
-		radeon_fence_unref(&fence);
-	}
-	end_jiffies = jiffies;
-	time = end_jiffies - start_jiffies;
-	time = jiffies_to_msecs(time);
-	if (time > 0) {
-		i = ((n * size) >> 10) / time;
-		printk(KERN_INFO "radeon: blit %u bo moves of %ukb from %d to %d"
-		       " in %lums (%ukb/ms %ukb/s %uM/s)\n", n, size >> 10,
-		       sdomain, ddomain, time, i, i * 1000, (i * 1000) / 1024);
+		if (time > 0)
+			radeon_benchmark_log_results(n, size, time,
+						     sdomain, ddomain, "dma");
 	}
+
+	time = radeon_benchmark_do_move(rdev, size, saddr, daddr,
+					RADEON_BENCHMARK_COPY_BLIT, n);
+	if (time < 0)
+		goto out_cleanup;
+	if (time > 0)
+		radeon_benchmark_log_results(n, size, time,
+					     sdomain, ddomain, "blit");
+
 out_cleanup:
 	if (sobj) {
 		r = radeon_bo_reserve(sobj, false);
@@ -143,18 +164,92 @@ out_cleanup:
 		}
 		radeon_bo_unref(&dobj);
 	}
-	if (fence) {
-		radeon_fence_unref(&fence);
-	}
+
 	if (r) {
-		printk(KERN_WARNING "Error while benchmarking BO move.\n");
+		DRM_ERROR("Error while benchmarking BO move.\n");
 	}
 }
 
-void radeon_benchmark(struct radeon_device *rdev)
+void radeon_benchmark(struct radeon_device *rdev, int test_number)
 {
-	radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT,
-			      RADEON_GEM_DOMAIN_VRAM);
-	radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM,
-			      RADEON_GEM_DOMAIN_GTT);
+	int i;
+	int common_modes[RADEON_BENCHMARK_COMMON_MODES_N] = {
+		640 * 480 * 4,
+		720 * 480 * 4,
+		800 * 600 * 4,
+		848 * 480 * 4,
+		1024 * 768 * 4,
+		1152 * 768 * 4,
+		1280 * 720 * 4,
+		1280 * 800 * 4,
+		1280 * 854 * 4,
+		1280 * 960 * 4,
+		1280 * 1024 * 4,
+		1440 * 900 * 4,
+		1400 * 1050 * 4,
+		1680 * 1050 * 4,
+		1600 * 1200 * 4,
+		1920 * 1080 * 4,
+		1920 * 1200 * 4
+	};
+
+	switch (test_number) {
+	case 1:
+		/* simple test, VRAM to GTT and GTT to VRAM */
+		radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT,
+				      RADEON_GEM_DOMAIN_VRAM);
+		radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM,
+				      RADEON_GEM_DOMAIN_GTT);
+		break;
+	case 2:
+		/* simple test, VRAM to VRAM */
+		radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM,
+				      RADEON_GEM_DOMAIN_VRAM);
+		break;
+	case 3:
+		/* GTT to VRAM, buffer size sweep, powers of 2 */
+		for (i = 1; i <= 65536; i <<= 1)
+			radeon_benchmark_move(rdev, i*1024,
+					      RADEON_GEM_DOMAIN_GTT,
+					      RADEON_GEM_DOMAIN_VRAM);
+		break;
+	case 4:
+		/* VRAM to GTT, buffer size sweep, powers of 2 */
+		for (i = 1; i <= 65536; i <<= 1)
+			radeon_benchmark_move(rdev, i*1024,
+					      RADEON_GEM_DOMAIN_VRAM,
+					      RADEON_GEM_DOMAIN_GTT);
+		break;
+	case 5:
+		/* VRAM to VRAM, buffer size sweep, powers of 2 */
+		for (i = 1; i <= 65536; i <<= 1)
+			radeon_benchmark_move(rdev, i*1024,
+					      RADEON_GEM_DOMAIN_VRAM,
+					      RADEON_GEM_DOMAIN_VRAM);
+		break;
+	case 6:
+		/* GTT to VRAM, buffer size sweep, common modes */
+		for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++)
+			radeon_benchmark_move(rdev, common_modes[i],
+					      RADEON_GEM_DOMAIN_GTT,
+					      RADEON_GEM_DOMAIN_VRAM);
+		break;
+	case 7:
+		/* VRAM to GTT, buffer size sweep, common modes */
+		for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++)
+			radeon_benchmark_move(rdev, common_modes[i],
+					      RADEON_GEM_DOMAIN_VRAM,
+					      RADEON_GEM_DOMAIN_GTT);
+		break;
+	case 8:
+		/* VRAM to VRAM, buffer size sweep, common modes */
+		for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++)
+			radeon_benchmark_move(rdev, common_modes[i],
+					      RADEON_GEM_DOMAIN_VRAM,
+					      RADEON_GEM_DOMAIN_VRAM);
+		break;
+
+	default:
+		DRM_ERROR("Unknown benchmark\n");
+	}
 }
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 63675241c7ff..8bf83c4b4147 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -620,8 +620,8 @@ static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rde
 		i2c.y_data_mask = 0x80;
 	} else {
 		/* default masks for ddc pads */
-		i2c.mask_clk_mask = RADEON_GPIO_EN_1;
-		i2c.mask_data_mask = RADEON_GPIO_EN_0;
+		i2c.mask_clk_mask = RADEON_GPIO_MASK_1;
+		i2c.mask_data_mask = RADEON_GPIO_MASK_0;
 		i2c.a_clk_mask = RADEON_GPIO_A_1;
 		i2c.a_data_mask = RADEON_GPIO_A_0;
 		i2c.en_clk_mask = RADEON_GPIO_EN_1;
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index c4b8741dbf58..dec6cbe6a0a6 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -68,11 +68,11 @@ void radeon_connector_hotplug(struct drm_connector *connector)
 	if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
 		int saved_dpms = connector->dpms;
 
-		if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd) &&
-		    radeon_dp_needs_link_train(radeon_connector))
-			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
-		else
+		/* Only turn off the display it it's physically disconnected */
+		if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd))
 			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+		else if (radeon_dp_needs_link_train(radeon_connector))
+			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
 		connector->dpms = saved_dpms;
 	}
 }
@@ -724,6 +724,7 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
 		dret = radeon_ddc_probe(radeon_connector,
 					radeon_connector->requires_extended_probe);
 	if (dret) {
+		radeon_connector->detected_by_load = false;
 		if (radeon_connector->edid) {
 			kfree(radeon_connector->edid);
 			radeon_connector->edid = NULL;
@@ -750,12 +751,21 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
 	} else {
 
 		/* if we aren't forcing don't do destructive polling */
-		if (!force)
-			return connector->status;
+		if (!force) {
+			/* only return the previous status if we last
+			 * detected a monitor via load.
+			 */
+			if (radeon_connector->detected_by_load)
+				return connector->status;
+			else
+				return ret;
+		}
 
 		if (radeon_connector->dac_load_detect && encoder) {
 			encoder_funcs = encoder->helper_private;
 			ret = encoder_funcs->detect(encoder, connector);
+			if (ret == connector_status_connected)
+				radeon_connector->detected_by_load = true;
 		}
 	}
 
@@ -897,6 +907,7 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
 		dret = radeon_ddc_probe(radeon_connector,
 					radeon_connector->requires_extended_probe);
 	if (dret) {
+		radeon_connector->detected_by_load = false;
 		if (radeon_connector->edid) {
 			kfree(radeon_connector->edid);
 			radeon_connector->edid = NULL;
@@ -959,8 +970,18 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
 	if ((ret == connector_status_connected) && (radeon_connector->use_digital == true))
 		goto out;
 
+	/* DVI-D and HDMI-A are digital only */
+	if ((connector->connector_type == DRM_MODE_CONNECTOR_DVID) ||
+	    (connector->connector_type == DRM_MODE_CONNECTOR_HDMIA))
+		goto out;
+
+	/* if we aren't forcing don't do destructive polling */
 	if (!force) {
-		ret = connector->status;
+		/* only return the previous status if we last
+		 * detected a monitor via load.
+		 */
+		if (radeon_connector->detected_by_load)
+			ret = connector->status;
 		goto out;
 	}
 
@@ -984,6 +1005,7 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
 					ret = encoder_funcs->detect(encoder, connector);
 					if (ret == connector_status_connected) {
 						radeon_connector->use_digital = false;
+						radeon_connector->detected_by_load = true;
 					}
 				}
 				break;
@@ -1303,23 +1325,14 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
 		/* get the DPCD from the bridge */
 		radeon_dp_getdpcd(radeon_connector);
 
-		if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd))
-			ret = connector_status_connected;
-		else {
-			/* need to setup ddc on the bridge */
-			if (encoder)
-				radeon_atom_ext_encoder_setup_ddc(encoder);
+		if (encoder) {
+			/* setup ddc on the bridge */
+			radeon_atom_ext_encoder_setup_ddc(encoder);
 			if (radeon_ddc_probe(radeon_connector,
-					     radeon_connector->requires_extended_probe))
+					     radeon_connector->requires_extended_probe)) /* try DDC */
 				ret = connector_status_connected;
-		}
-
-		if ((ret == connector_status_disconnected) &&
-		    radeon_connector->dac_load_detect) {
-			struct drm_encoder *encoder = radeon_best_single_encoder(connector);
-			struct drm_encoder_helper_funcs *encoder_funcs;
-			if (encoder) {
-				encoder_funcs = encoder->helper_private;
+			else if (radeon_connector->dac_load_detect) { /* try load detection */
+				struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
 				ret = encoder_funcs->detect(encoder, connector);
 			}
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 3189a7efb2e9..fde25c0d65a0 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -208,23 +208,25 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
 	int xorigin = 0, yorigin = 0;
 	int w = radeon_crtc->cursor_width;
 
-	if (x < 0)
-		xorigin = -x + 1;
-	if (y < 0)
-		yorigin = -y + 1;
-	if (xorigin >= CURSOR_WIDTH)
-		xorigin = CURSOR_WIDTH - 1;
-	if (yorigin >= CURSOR_HEIGHT)
-		yorigin = CURSOR_HEIGHT - 1;
-
 	if (ASIC_IS_AVIVO(rdev)) {
-		int i = 0;
-		struct drm_crtc *crtc_p;
-
 		/* avivo cursor are offset into the total surface */
 		x += crtc->x;
 		y += crtc->y;
-		DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
+	}
+	DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
+
+	if (x < 0) {
+		xorigin = min(-x, CURSOR_WIDTH - 1);
+		x = 0;
+	}
+	if (y < 0) {
+		yorigin = min(-y, CURSOR_HEIGHT - 1);
+		y = 0;
+	}
+
+	if (ASIC_IS_AVIVO(rdev)) {
+		int i = 0;
+		struct drm_crtc *crtc_p;
 
 		/* avivo cursor image can't end on 128 pixel boundary or
 		 * go past the end of the frame if both crtcs are enabled
@@ -253,16 +255,12 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
 
 	radeon_lock_cursor(crtc, true);
 	if (ASIC_IS_DCE4(rdev)) {
-		WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset,
-		       ((xorigin ? 0 : x) << 16) |
-		       (yorigin ? 0 : y));
+		WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset, (x << 16) | y);
 		WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin);
 		WREG32(EVERGREEN_CUR_SIZE + radeon_crtc->crtc_offset,
 		       ((w - 1) << 16) | (radeon_crtc->cursor_height - 1));
 	} else if (ASIC_IS_AVIVO(rdev)) {
-		WREG32(AVIVO_D1CUR_POSITION + radeon_crtc->crtc_offset,
-			     ((xorigin ? 0 : x) << 16) |
-			     (yorigin ? 0 : y));
+		WREG32(AVIVO_D1CUR_POSITION + radeon_crtc->crtc_offset, (x << 16) | y);
 		WREG32(AVIVO_D1CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin);
 		WREG32(AVIVO_D1CUR_SIZE + radeon_crtc->crtc_offset,
 		       ((w - 1) << 16) | (radeon_crtc->cursor_height - 1));
@@ -276,8 +274,8 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
 			| yorigin));
 		WREG32(RADEON_CUR_HORZ_VERT_POSN + radeon_crtc->crtc_offset,
 		       (RADEON_CUR_LOCK
-			| ((xorigin ? 0 : x) << 16)
-			| (yorigin ? 0 : y)));
+			| (x << 16)
+			| y));
 		/* offset is from DISP(2)_BASE_ADDRESS */
 		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, (radeon_crtc->legacy_cursor_offset +
 								      (yorigin * 256)));
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index b51e15725c6e..c33bc914d93d 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -750,14 +750,15 @@ int radeon_device_init(struct radeon_device *rdev,
 
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 40-bits.
-	 * IGP - can handle 40-bits (in theory)
+	 * IGP - can handle 40-bits
 	 * AGP - generally dma32 is safest
-	 * PCI - only dma32
+	 * PCI - dma32 for legacy pci gart, 40 bits on newer asics
 	 */
 	rdev->need_dma32 = false;
 	if (rdev->flags & RADEON_IS_AGP)
 		rdev->need_dma32 = true;
-	if (rdev->flags & RADEON_IS_PCI)
+	if ((rdev->flags & RADEON_IS_PCI) &&
+	    (rdev->family < CHIP_RS400))
 		rdev->need_dma32 = true;
 
 	dma_bits = rdev->need_dma32 ? 32 : 40;
@@ -817,7 +818,7 @@ int radeon_device_init(struct radeon_device *rdev,
 		radeon_test_moves(rdev);
 	}
 	if (radeon_benchmarking) {
-		radeon_benchmark(rdev);
+		radeon_benchmark(rdev, radeon_benchmarking);
 	}
 	return 0;
 }
@@ -981,7 +982,7 @@ struct radeon_debugfs {
 	struct drm_info_list	*files;
 	unsigned		num_files;
 };
-static struct radeon_debugfs _radeon_debugfs[RADEON_DEBUGFS_MAX_NUM_FILES];
+static struct radeon_debugfs _radeon_debugfs[RADEON_DEBUGFS_MAX_COMPONENTS];
 static unsigned _radeon_debugfs_count = 0;
 
 int radeon_debugfs_add_files(struct radeon_device *rdev,
@@ -996,14 +997,17 @@ int radeon_debugfs_add_files(struct radeon_device *rdev,
 			return 0;
 		}
 	}
-	if ((_radeon_debugfs_count + nfiles) > RADEON_DEBUGFS_MAX_NUM_FILES) {
-		DRM_ERROR("Reached maximum number of debugfs files.\n");
-		DRM_ERROR("Report so we increase RADEON_DEBUGFS_MAX_NUM_FILES.\n");
+
+	i = _radeon_debugfs_count + 1;
+	if (i > RADEON_DEBUGFS_MAX_COMPONENTS) {
+		DRM_ERROR("Reached maximum number of debugfs components.\n");
+		DRM_ERROR("Report so we increase "
+		          "RADEON_DEBUGFS_MAX_COMPONENTS.\n");
 		return -EINVAL;
 	}
 	_radeon_debugfs[_radeon_debugfs_count].files = files;
 	_radeon_debugfs[_radeon_debugfs_count].num_files = nfiles;
-	_radeon_debugfs_count++;
+	_radeon_debugfs_count = i;
 #if defined(CONFIG_DEBUG_FS)
 	drm_debugfs_create_files(files, nfiles,
 				 rdev->ddev->control->debugfs_root,
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 6cc17fb96a57..6adb3e58affd 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -473,8 +473,8 @@ pflip_cleanup:
 	spin_lock_irqsave(&dev->event_lock, flags);
 	radeon_crtc->unpin_work = NULL;
 unlock_free:
-	drm_gem_object_unreference_unlocked(old_radeon_fb->obj);
 	spin_unlock_irqrestore(&dev->event_lock, flags);
+	drm_gem_object_unreference_unlocked(old_radeon_fb->obj);
 	radeon_fence_unref(&work->fence);
 	kfree(work);
 
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index 319d85d7e759..8a171b21b453 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -1507,7 +1507,14 @@ radeon_atom_encoder_dpms(struct drm_encoder *encoder, int mode)
 		switch (mode) {
 		case DRM_MODE_DPMS_ON:
 			args.ucAction = ATOM_ENABLE;
-			atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+			/* workaround for DVOOutputControl on some RS690 systems */
+			if (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_DDI) {
+				u32 reg = RREG32(RADEON_BIOS_3_SCRATCH);
+				WREG32(RADEON_BIOS_3_SCRATCH, reg & ~ATOM_S3_DFP2I_ACTIVE);
+				atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+				WREG32(RADEON_BIOS_3_SCRATCH, reg);
+			} else
+				atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 			if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
 				args.ucAction = ATOM_LCD_BLON;
 				atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
@@ -1748,9 +1755,12 @@ static int radeon_atom_pick_dig_encoder(struct drm_encoder *encoder)
 	/* DCE4/5 */
 	if (ASIC_IS_DCE4(rdev)) {
 		dig = radeon_encoder->enc_priv;
-		if (ASIC_IS_DCE41(rdev))
-			return radeon_crtc->crtc_id;
-		else {
+		if (ASIC_IS_DCE41(rdev)) {
+			if (dig->linkb)
+				return 1;
+			else
+				return 0;
+		} else {
 			switch (radeon_encoder->encoder_id) {
 			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
 				if (dig->linkb)
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 7fd4e3e5ad5f..a488b502ec1f 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -263,7 +263,7 @@ retry:
 		 */
 		if (seq == rdev->fence_drv.last_seq && radeon_gpu_is_lockup(rdev)) {
 			/* good news we believe it's a lockup */
-			WARN(1, "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
+			printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
 			     fence->seq, seq);
 			/* FIXME: what should we do ? marking everyone
 			 * as signaled for now
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index 6c111c1fa3f9..02cb7da4124d 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -81,8 +81,9 @@ bool radeon_ddc_probe(struct radeon_connector *radeon_connector, bool requires_e
 
 /* bit banging i2c */
 
-static void radeon_i2c_do_lock(struct radeon_i2c_chan *i2c, int lock_state)
+static int pre_xfer(struct i2c_adapter *i2c_adap)
 {
+	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
 	struct radeon_device *rdev = i2c->dev->dev_private;
 	struct radeon_i2c_bus_rec *rec = &i2c->rec;
 	uint32_t temp;
@@ -137,19 +138,30 @@ static void radeon_i2c_do_lock(struct radeon_i2c_chan *i2c, int lock_state)
 	WREG32(rec->en_data_reg, temp);
 
 	/* mask the gpio pins for software use */
-	temp = RREG32(rec->mask_clk_reg);
-	if (lock_state)
-		temp |= rec->mask_clk_mask;
-	else
-		temp &= ~rec->mask_clk_mask;
+	temp = RREG32(rec->mask_clk_reg) | rec->mask_clk_mask;
 	WREG32(rec->mask_clk_reg, temp);
 	temp = RREG32(rec->mask_clk_reg);
 
+	temp = RREG32(rec->mask_data_reg) | rec->mask_data_mask;
+	WREG32(rec->mask_data_reg, temp);
 	temp = RREG32(rec->mask_data_reg);
-	if (lock_state)
-		temp |= rec->mask_data_mask;
-	else
-		temp &= ~rec->mask_data_mask;
+
+	return 0;
+}
+
+static void post_xfer(struct i2c_adapter *i2c_adap)
+{
+	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
+	struct radeon_device *rdev = i2c->dev->dev_private;
+	struct radeon_i2c_bus_rec *rec = &i2c->rec;
+	uint32_t temp;
+
+	/* unmask the gpio pins for software use */
+	temp = RREG32(rec->mask_clk_reg) & ~rec->mask_clk_mask;
+	WREG32(rec->mask_clk_reg, temp);
+	temp = RREG32(rec->mask_clk_reg);
+
+	temp = RREG32(rec->mask_data_reg) & ~rec->mask_data_mask;
 	WREG32(rec->mask_data_reg, temp);
 	temp = RREG32(rec->mask_data_reg);
 }
@@ -209,22 +221,6 @@ static void set_data(void *i2c_priv, int data)
 	WREG32(rec->en_data_reg, val);
 }
 
-static int pre_xfer(struct i2c_adapter *i2c_adap)
-{
-	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
-
-	radeon_i2c_do_lock(i2c, 1);
-
-	return 0;
-}
-
-static void post_xfer(struct i2c_adapter *i2c_adap)
-{
-	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
-
-	radeon_i2c_do_lock(i2c, 0);
-}
-
 /* hw i2c */
 
 static u32 radeon_get_i2c_prescale(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/radeon_irq.c b/drivers/gpu/drm/radeon/radeon_irq.c
index 465746bd51b7..00da38424dfc 100644
--- a/drivers/gpu/drm/radeon/radeon_irq.c
+++ b/drivers/gpu/drm/radeon/radeon_irq.c
@@ -129,7 +129,7 @@ void radeon_disable_vblank(struct drm_device *dev, int crtc)
 	}
 }
 
-static inline u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int)
+static u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int)
 {
 	u32 irqs = RADEON_READ(RADEON_GEN_INT_STATUS);
 	u32 irq_mask = RADEON_SW_INT_TEST;
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_tv.c b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
index c7b6cb428d09..b37ec0f1413a 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_tv.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
@@ -864,7 +864,7 @@ void radeon_legacy_tv_adjust_crtc_reg(struct drm_encoder *encoder,
 	*v_sync_strt_wid = tmp;
 }
 
-static inline int get_post_div(int value)
+static int get_post_div(int value)
 {
 	int post_div;
 	switch (value) {
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 68820f5f6303..ed0178f03235 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -447,6 +447,7 @@ struct radeon_connector {
 	struct edid *edid;
 	void *con_priv;
 	bool dac_load_detect;
+	bool detected_by_load; /* if the connection status was determined by load */
 	uint16_t connector_object_id;
 	struct radeon_hpd hpd;
 	struct radeon_router router;
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 976c3b1b1b6e..138839312e8b 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -515,3 +515,45 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	}
 	return 0;
 }
+
+int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait,
+		   enum ttm_buffer_usage usage)
+{
+	int r;
+
+	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
+	if (unlikely(r != 0))
+		return r;
+	spin_lock(&bo->tbo.bdev->fence_lock);
+	if (mem_type)
+		*mem_type = bo->tbo.mem.mem_type;
+	if (bo->tbo.sync_obj)
+		r = ttm_bo_wait(&bo->tbo, true, true, no_wait, usage);
+	spin_unlock(&bo->tbo.bdev->fence_lock);
+	ttm_bo_unreserve(&bo->tbo);
+	return r;
+}
+
+
+/**
+ * radeon_bo_reserve - reserve bo
+ * @bo:		bo structure
+ * @no_wait:		don't sleep while trying to reserve (return -EBUSY)
+ *
+ * Returns:
+ * -EBUSY: buffer is busy and @no_wait is true
+ * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
+ * a signal. Release all buffer reservations and return to user-space.
+ */
+int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait)
+{
+	int r;
+
+	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS)
+			dev_err(bo->rdev->dev, "%p reserve failed\n", bo);
+		return r;
+	}
+	return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index a057a8e5a6e6..c6c8e43e6d9a 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -52,28 +52,7 @@ static inline unsigned radeon_mem_type_to_domain(u32 mem_type)
 	return 0;
 }
 
-/**
- * radeon_bo_reserve - reserve bo
- * @bo:		bo structure
- * @no_wait:		don't sleep while trying to reserve (return -EBUSY)
- *
- * Returns:
- * -EBUSY: buffer is busy and @no_wait is true
- * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
- * a signal. Release all buffer reservations and return to user-space.
- */
-static inline int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait)
-{
-	int r;
-
-	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
-	if (unlikely(r != 0)) {
-		if (r != -ERESTARTSYS)
-			dev_err(bo->rdev->dev, "%p reserve failed\n", bo);
-		return r;
-	}
-	return 0;
-}
+int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait);
 
 static inline void radeon_bo_unreserve(struct radeon_bo *bo)
 {
@@ -118,23 +97,8 @@ static inline u64 radeon_bo_mmap_offset(struct radeon_bo *bo)
 	return bo->tbo.addr_space_offset;
 }
 
-static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
-				 bool no_wait, enum ttm_buffer_usage usage)
-{
-	int r;
-
-	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
-	if (unlikely(r != 0))
-		return r;
-	spin_lock(&bo->tbo.bdev->fence_lock);
-	if (mem_type)
-		*mem_type = bo->tbo.mem.mem_type;
-	if (bo->tbo.sync_obj)
-		r = ttm_bo_wait(&bo->tbo, true, true, no_wait, usage);
-	spin_unlock(&bo->tbo.bdev->fence_lock);
-	ttm_bo_unreserve(&bo->tbo);
-	return r;
-}
+extern int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
+			  bool no_wait, enum ttm_buffer_usage usage);
 
 extern int radeon_bo_create(struct radeon_device *rdev,
 				unsigned long size, int byte_align,
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 08c0233db1b8..49d58202202c 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -35,6 +35,44 @@
 
 int radeon_debugfs_ib_init(struct radeon_device *rdev);
 
+u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
+{
+	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
+	u32 pg_idx, pg_offset;
+	u32 idx_value = 0;
+	int new_page;
+
+	pg_idx = (idx * 4) / PAGE_SIZE;
+	pg_offset = (idx * 4) % PAGE_SIZE;
+
+	if (ibc->kpage_idx[0] == pg_idx)
+		return ibc->kpage[0][pg_offset/4];
+	if (ibc->kpage_idx[1] == pg_idx)
+		return ibc->kpage[1][pg_offset/4];
+
+	new_page = radeon_cs_update_pages(p, pg_idx);
+	if (new_page < 0) {
+		p->parser_error = new_page;
+		return 0;
+	}
+
+	idx_value = ibc->kpage[new_page][pg_offset/4];
+	return idx_value;
+}
+
+void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
+{
+#if DRM_DEBUG_CODE
+	if (rdev->cp.count_dw <= 0) {
+		DRM_ERROR("radeon: writting more dword to ring than expected !\n");
+	}
+#endif
+	rdev->cp.ring[rdev->cp.wptr++] = v;
+	rdev->cp.wptr &= rdev->cp.ptr_mask;
+	rdev->cp.count_dw--;
+	rdev->cp.ring_free_dw--;
+}
+
 void radeon_ib_bogus_cleanup(struct radeon_device *rdev)
 {
 	struct radeon_ib *ib, *n;
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 92e7ea73b7c5..e8422ae7fe74 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -272,12 +272,12 @@ static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
 	return 0;
 }
 
-static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
-						     dev_priv,
-						     struct drm_file *file_priv,
-						     drm_radeon_kcmd_buffer_t *
-						     cmdbuf,
-						     unsigned int *cmdsz)
+static int radeon_check_and_fixup_packet3(drm_radeon_private_t *
+					  dev_priv,
+					  struct drm_file *file_priv,
+					  drm_radeon_kcmd_buffer_t *
+					  cmdbuf,
+					  unsigned int *cmdsz)
 {
 	u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 	u32 offset, narrays;
@@ -446,8 +446,8 @@ static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
  * CP hardware state programming functions
  */
 
-static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
-					     struct drm_clip_rect * box)
+static void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
+				  struct drm_clip_rect * box)
 {
 	RING_LOCALS;
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 9b86fb0e4122..0b5468bfaf54 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -277,7 +277,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 		DRM_ERROR("Trying to move memory with CP turned off.\n");
 		return -EINVAL;
 	}
-	r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages, fence);
+
+	BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0);
+
+	r = radeon_copy(rdev, old_start, new_start,
+			new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */
+			fence);
 	/* FIXME: handle copy error */
 	r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL,
 				      evict, no_wait_reserve, no_wait_gpu, new_mem);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 80928f9ff80f..87cc1feee3ac 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -539,55 +539,6 @@ static u32 r700_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
 	return backend_map;
 }
 
-static void rv770_program_channel_remap(struct radeon_device *rdev)
-{
-	u32 tcp_chan_steer, mc_shared_chremap, tmp;
-	bool force_no_swizzle;
-
-	switch (rdev->family) {
-	case CHIP_RV770:
-	case CHIP_RV730:
-		force_no_swizzle = false;
-		break;
-	case CHIP_RV710:
-	case CHIP_RV740:
-	default:
-		force_no_swizzle = true;
-		break;
-	}
-
-	tmp = RREG32(MC_SHARED_CHMAP);
-	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
-	case 0:
-	case 1:
-	default:
-		/* default mapping */
-		mc_shared_chremap = 0x00fac688;
-		break;
-	case 2:
-	case 3:
-		if (force_no_swizzle)
-			mc_shared_chremap = 0x00fac688;
-		else
-			mc_shared_chremap = 0x00bbc298;
-		break;
-	}
-
-	if (rdev->family == CHIP_RV740)
-		tcp_chan_steer = 0x00ef2a60;
-	else
-		tcp_chan_steer = 0x00fac688;
-
-	/* RV770 CE has special chremap setup */
-	if (rdev->pdev->device == 0x944e) {
-		tcp_chan_steer = 0x00b08b08;
-		mc_shared_chremap = 0x00b08b08;
-	}
-
-	WREG32(TCP_CHAN_STEER, tcp_chan_steer);
-	WREG32(MC_SHARED_CHREMAP, mc_shared_chremap);
-}
-
 static void rv770_gpu_init(struct radeon_device *rdev)
 {
 	int i, j, num_qd_pipes;
@@ -788,8 +739,6 @@ static void rv770_gpu_init(struct radeon_device *rdev)
 	WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
 	WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
 
-	rv770_program_channel_remap(rdev);
-
 	WREG32(CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
 	WREG32(CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
 	WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
@@ -1238,8 +1187,6 @@ int rv770_resume(struct radeon_device *rdev)
 
 int rv770_suspend(struct radeon_device *rdev)
 {
-	int r;
-
 	r600_audio_fini(rdev);
 	/* FIXME: we should wait for ring to be empty */
 	r700_cp_stop(rdev);
@@ -1247,14 +1194,8 @@ int rv770_suspend(struct radeon_device *rdev)
 	r600_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	rv770_pcie_gart_disable(rdev);
-	/* unpin shaders bo */
-	if (rdev->r600_blit.shader_obj) {
-		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-		if (likely(r == 0)) {
-			radeon_bo_unpin(rdev->r600_blit.shader_obj);
-			radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-		}
-	}
+	r600_blit_suspend(rdev);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index b824d9bdd87c..50fc8e4c9a31 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -394,7 +394,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 
 	if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) {
 		if (bo->ttm == NULL) {
-			ret = ttm_bo_add_ttm(bo, false);
+			bool zero = !(old_man->flags & TTM_MEMTYPE_FLAG_FIXED);
+			ret = ttm_bo_add_ttm(bo, zero);
 			if (ret)
 				goto out_err;
 		}
@@ -1295,6 +1296,7 @@ int ttm_bo_create(struct ttm_bo_device *bdev,
 
 	return ret;
 }
+EXPORT_SYMBOL(ttm_bo_create);
 
 static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 					unsigned mem_type, bool allow_errors)
diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig
index 30ad13344f7b..794ff67c5701 100644
--- a/drivers/gpu/drm/vmwgfx/Kconfig
+++ b/drivers/gpu/drm/vmwgfx/Kconfig
@@ -7,7 +7,8 @@ config DRM_VMWGFX
 	select FB_CFB_IMAGEBLIT
 	select DRM_TTM
 	help
-	  KMS enabled DRM driver for SVGA2 virtual hardware.
-
-	  If unsure say n. The compiled module will be
-	  called vmwgfx.ko
+	  Choose this option if you would like to run 3D acceleration
+	  in a VMware virtual machine.
+	  This is a KMS enabled DRM driver for the VMware SVGA2
+	  virtual hardware.
+	  The compiled module will be called "vmwgfx.ko".
diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index 7d8e9d5d498c..586869c8c11f 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -5,6 +5,6 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
 	    vmwgfx_fb.o vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_buffer.o \
 	    vmwgfx_fifo.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \
 	    vmwgfx_overlay.o vmwgfx_marker.o vmwgfx_gmrid_manager.o \
-	    vmwgfx_fence.o
+	    vmwgfx_fence.o vmwgfx_dmabuf.o vmwgfx_scrn.o
 
 obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/svga3d_reg.h b/drivers/gpu/drm/vmwgfx/svga3d_reg.h
index 77cb45331000..d0e085ee8249 100644
--- a/drivers/gpu/drm/vmwgfx/svga3d_reg.h
+++ b/drivers/gpu/drm/vmwgfx/svga3d_reg.h
@@ -57,7 +57,8 @@ typedef enum {
    SVGA3D_HWVERSION_WS6_B1    = SVGA3D_MAKE_HWVERSION(1, 1),
    SVGA3D_HWVERSION_FUSION_11 = SVGA3D_MAKE_HWVERSION(1, 4),
    SVGA3D_HWVERSION_WS65_B1   = SVGA3D_MAKE_HWVERSION(2, 0),
-   SVGA3D_HWVERSION_CURRENT   = SVGA3D_HWVERSION_WS65_B1,
+   SVGA3D_HWVERSION_WS8_B1    = SVGA3D_MAKE_HWVERSION(2, 1),
+   SVGA3D_HWVERSION_CURRENT   = SVGA3D_HWVERSION_WS8_B1,
 } SVGA3dHardwareVersion;
 
 /*
@@ -67,7 +68,8 @@ typedef enum {
 typedef uint32 SVGA3dBool; /* 32-bit Bool definition */
 #define SVGA3D_NUM_CLIPPLANES                   6
 #define SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS  8
-
+#define SVGA3D_MAX_CONTEXT_IDS                  256
+#define SVGA3D_MAX_SURFACE_IDS                  (32 * 1024)
 
 /*
  * Surface formats.
@@ -79,76 +81,91 @@ typedef uint32 SVGA3dBool; /* 32-bit Bool definition */
  */
 
 typedef enum SVGA3dSurfaceFormat {
-   SVGA3D_FORMAT_INVALID = 0,
+   SVGA3D_FORMAT_INVALID               = 0,
 
-   SVGA3D_X8R8G8B8       = 1,
-   SVGA3D_A8R8G8B8       = 2,
+   SVGA3D_X8R8G8B8                     = 1,
+   SVGA3D_A8R8G8B8                     = 2,
 
-   SVGA3D_R5G6B5         = 3,
-   SVGA3D_X1R5G5B5       = 4,
-   SVGA3D_A1R5G5B5       = 5,
-   SVGA3D_A4R4G4B4       = 6,
+   SVGA3D_R5G6B5                       = 3,
+   SVGA3D_X1R5G5B5                     = 4,
+   SVGA3D_A1R5G5B5                     = 5,
+   SVGA3D_A4R4G4B4                     = 6,
 
-   SVGA3D_Z_D32          = 7,
-   SVGA3D_Z_D16          = 8,
-   SVGA3D_Z_D24S8        = 9,
-   SVGA3D_Z_D15S1        = 10,
+   SVGA3D_Z_D32                        = 7,
+   SVGA3D_Z_D16                        = 8,
+   SVGA3D_Z_D24S8                      = 9,
+   SVGA3D_Z_D15S1                      = 10,
 
-   SVGA3D_LUMINANCE8            = 11,
-   SVGA3D_LUMINANCE4_ALPHA4     = 12,
-   SVGA3D_LUMINANCE16           = 13,
-   SVGA3D_LUMINANCE8_ALPHA8     = 14,
+   SVGA3D_LUMINANCE8                   = 11,
+   SVGA3D_LUMINANCE4_ALPHA4            = 12,
+   SVGA3D_LUMINANCE16                  = 13,
+   SVGA3D_LUMINANCE8_ALPHA8            = 14,
 
-   SVGA3D_DXT1           = 15,
-   SVGA3D_DXT2           = 16,
-   SVGA3D_DXT3           = 17,
-   SVGA3D_DXT4           = 18,
-   SVGA3D_DXT5           = 19,
+   SVGA3D_DXT1                         = 15,
+   SVGA3D_DXT2                         = 16,
+   SVGA3D_DXT3                         = 17,
+   SVGA3D_DXT4                         = 18,
+   SVGA3D_DXT5                         = 19,
 
-   SVGA3D_BUMPU8V8       = 20,
-   SVGA3D_BUMPL6V5U5     = 21,
-   SVGA3D_BUMPX8L8V8U8   = 22,
-   SVGA3D_BUMPL8V8U8     = 23,
+   SVGA3D_BUMPU8V8                     = 20,
+   SVGA3D_BUMPL6V5U5                   = 21,
+   SVGA3D_BUMPX8L8V8U8                 = 22,
+   SVGA3D_BUMPL8V8U8                   = 23,
 
-   SVGA3D_ARGB_S10E5     = 24,   /* 16-bit floating-point ARGB */
-   SVGA3D_ARGB_S23E8     = 25,   /* 32-bit floating-point ARGB */
+   SVGA3D_ARGB_S10E5                   = 24,   /* 16-bit floating-point ARGB */
+   SVGA3D_ARGB_S23E8                   = 25,   /* 32-bit floating-point ARGB */
 
-   SVGA3D_A2R10G10B10    = 26,
+   SVGA3D_A2R10G10B10                  = 26,
 
    /* signed formats */
-   SVGA3D_V8U8           = 27,
-   SVGA3D_Q8W8V8U8       = 28,
-   SVGA3D_CxV8U8         = 29,
+   SVGA3D_V8U8                         = 27,
+   SVGA3D_Q8W8V8U8                     = 28,
+   SVGA3D_CxV8U8                       = 29,
 
    /* mixed formats */
-   SVGA3D_X8L8V8U8       = 30,
-   SVGA3D_A2W10V10U10    = 31,
+   SVGA3D_X8L8V8U8                     = 30,
+   SVGA3D_A2W10V10U10                  = 31,
 
-   SVGA3D_ALPHA8         = 32,
+   SVGA3D_ALPHA8                       = 32,
 
    /* Single- and dual-component floating point formats */
-   SVGA3D_R_S10E5        = 33,
-   SVGA3D_R_S23E8        = 34,
-   SVGA3D_RG_S10E5       = 35,
-   SVGA3D_RG_S23E8       = 36,
+   SVGA3D_R_S10E5                      = 33,
+   SVGA3D_R_S23E8                      = 34,
+   SVGA3D_RG_S10E5                     = 35,
+   SVGA3D_RG_S23E8                     = 36,
 
    /*
     * Any surface can be used as a buffer object, but SVGA3D_BUFFER is
     * the most efficient format to use when creating new surfaces
     * expressly for index or vertex data.
     */
-   SVGA3D_BUFFER         = 37,
 
-   SVGA3D_Z_D24X8        = 38,
+   SVGA3D_BUFFER                       = 37,
+
+   SVGA3D_Z_D24X8                      = 38,
 
-   SVGA3D_V16U16         = 39,
+   SVGA3D_V16U16                       = 39,
 
-   SVGA3D_G16R16         = 40,
-   SVGA3D_A16B16G16R16   = 41,
+   SVGA3D_G16R16                       = 40,
+   SVGA3D_A16B16G16R16                 = 41,
 
    /* Packed Video formats */
-   SVGA3D_UYVY           = 42,
-   SVGA3D_YUY2           = 43,
+   SVGA3D_UYVY                         = 42,
+   SVGA3D_YUY2                         = 43,
+
+   /* Planar video formats */
+   SVGA3D_NV12                         = 44,
+
+   /* Video format with alpha */
+   SVGA3D_AYUV                         = 45,
+
+   SVGA3D_BC4_UNORM                    = 108,
+   SVGA3D_BC5_UNORM                    = 111,
+
+   /* Advanced D3D9 depth formats. */
+   SVGA3D_Z_DF16                       = 118,
+   SVGA3D_Z_DF24                       = 119,
+   SVGA3D_Z_D24S8_INT                  = 120,
 
    SVGA3D_FORMAT_MAX
 } SVGA3dSurfaceFormat;
@@ -414,10 +431,20 @@ typedef enum {
    SVGA3D_RS_SRCBLENDALPHA             = 94,    /* SVGA3dBlendOp */
    SVGA3D_RS_DSTBLENDALPHA             = 95,    /* SVGA3dBlendOp */
    SVGA3D_RS_BLENDEQUATIONALPHA        = 96,    /* SVGA3dBlendEquation */
+   SVGA3D_RS_TRANSPARENCYANTIALIAS     = 97,    /* SVGA3dTransparencyAntialiasType */
+   SVGA3D_RS_LINEAA                    = 98,    /* SVGA3dBool */
+   SVGA3D_RS_LINEWIDTH                 = 99,    /* float */
    SVGA3D_RS_MAX
 } SVGA3dRenderStateName;
 
 typedef enum {
+   SVGA3D_TRANSPARENCYANTIALIAS_NORMAL            = 0,
+   SVGA3D_TRANSPARENCYANTIALIAS_ALPHATOCOVERAGE   = 1,
+   SVGA3D_TRANSPARENCYANTIALIAS_SUPERSAMPLE       = 2,
+   SVGA3D_TRANSPARENCYANTIALIAS_MAX
+} SVGA3dTransparencyAntialiasType;
+
+typedef enum {
    SVGA3D_VERTEXMATERIAL_NONE     = 0,    /* Use the value in the current material */
    SVGA3D_VERTEXMATERIAL_DIFFUSE  = 1,    /* Use the value in the diffuse component */
    SVGA3D_VERTEXMATERIAL_SPECULAR = 2,    /* Use the value in the specular component */
@@ -728,10 +755,10 @@ typedef enum {
    SVGA3D_TEX_FILTER_NEAREST        = 1,
    SVGA3D_TEX_FILTER_LINEAR         = 2,
    SVGA3D_TEX_FILTER_ANISOTROPIC    = 3,
-   SVGA3D_TEX_FILTER_FLATCUBIC      = 4, // Deprecated, not implemented
-   SVGA3D_TEX_FILTER_GAUSSIANCUBIC  = 5, // Deprecated, not implemented
-   SVGA3D_TEX_FILTER_PYRAMIDALQUAD  = 6, // Not currently implemented
-   SVGA3D_TEX_FILTER_GAUSSIANQUAD   = 7, // Not currently implemented
+   SVGA3D_TEX_FILTER_FLATCUBIC      = 4, /* Deprecated, not implemented */
+   SVGA3D_TEX_FILTER_GAUSSIANCUBIC  = 5, /* Deprecated, not implemented */
+   SVGA3D_TEX_FILTER_PYRAMIDALQUAD  = 6, /* Not currently implemented */
+   SVGA3D_TEX_FILTER_GAUSSIANQUAD   = 7, /* Not currently implemented */
    SVGA3D_TEX_FILTER_MAX
 } SVGA3dTextureFilter;
 
@@ -799,19 +826,19 @@ typedef enum {
 
 typedef enum {
    SVGA3D_DECLUSAGE_POSITION     = 0,
-   SVGA3D_DECLUSAGE_BLENDWEIGHT,       //  1
-   SVGA3D_DECLUSAGE_BLENDINDICES,      //  2
-   SVGA3D_DECLUSAGE_NORMAL,            //  3
-   SVGA3D_DECLUSAGE_PSIZE,             //  4
-   SVGA3D_DECLUSAGE_TEXCOORD,          //  5
-   SVGA3D_DECLUSAGE_TANGENT,           //  6
-   SVGA3D_DECLUSAGE_BINORMAL,          //  7
-   SVGA3D_DECLUSAGE_TESSFACTOR,        //  8
-   SVGA3D_DECLUSAGE_POSITIONT,         //  9
-   SVGA3D_DECLUSAGE_COLOR,             // 10
-   SVGA3D_DECLUSAGE_FOG,               // 11
-   SVGA3D_DECLUSAGE_DEPTH,             // 12
-   SVGA3D_DECLUSAGE_SAMPLE,            // 13
+   SVGA3D_DECLUSAGE_BLENDWEIGHT,       /*  1 */
+   SVGA3D_DECLUSAGE_BLENDINDICES,      /*  2 */
+   SVGA3D_DECLUSAGE_NORMAL,            /*  3 */
+   SVGA3D_DECLUSAGE_PSIZE,             /*  4 */
+   SVGA3D_DECLUSAGE_TEXCOORD,          /*  5 */
+   SVGA3D_DECLUSAGE_TANGENT,           /*  6 */
+   SVGA3D_DECLUSAGE_BINORMAL,          /*  7 */
+   SVGA3D_DECLUSAGE_TESSFACTOR,        /*  8 */
+   SVGA3D_DECLUSAGE_POSITIONT,         /*  9 */
+   SVGA3D_DECLUSAGE_COLOR,             /* 10 */
+   SVGA3D_DECLUSAGE_FOG,               /* 11 */
+   SVGA3D_DECLUSAGE_DEPTH,             /* 12 */
+   SVGA3D_DECLUSAGE_SAMPLE,            /* 13 */
    SVGA3D_DECLUSAGE_MAX
 } SVGA3dDeclUsage;
 
@@ -819,10 +846,10 @@ typedef enum {
    SVGA3D_DECLMETHOD_DEFAULT     = 0,
    SVGA3D_DECLMETHOD_PARTIALU,
    SVGA3D_DECLMETHOD_PARTIALV,
-   SVGA3D_DECLMETHOD_CROSSUV,          // Normal
+   SVGA3D_DECLMETHOD_CROSSUV,          /* Normal */
    SVGA3D_DECLMETHOD_UV,
-   SVGA3D_DECLMETHOD_LOOKUP,           // Lookup a displacement map
-   SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, // Lookup a pre-sampled displacement map
+   SVGA3D_DECLMETHOD_LOOKUP,           /* Lookup a displacement map */
+   SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, /* Lookup a pre-sampled displacement map */
 } SVGA3dDeclMethod;
 
 typedef enum {
@@ -930,7 +957,6 @@ typedef enum {
 } SVGA3dCubeFace;
 
 typedef enum {
-   SVGA3D_SHADERTYPE_COMPILED_DX8               = 0,
    SVGA3D_SHADERTYPE_VS                         = 1,
    SVGA3D_SHADERTYPE_PS                         = 2,
    SVGA3D_SHADERTYPE_MAX
@@ -968,12 +994,18 @@ typedef enum {
 } SVGA3dTransferType;
 
 /*
- * The maximum number vertex arrays we're guaranteed to support in
+ * The maximum number of vertex arrays we're guaranteed to support in
  * SVGA_3D_CMD_DRAWPRIMITIVES.
  */
 #define SVGA3D_MAX_VERTEX_ARRAYS   32
 
 /*
+ * The maximum number of primitive ranges we're guaranteed to support
+ * in SVGA_3D_CMD_DRAWPRIMITIVES.
+ */
+#define SVGA3D_MAX_DRAW_PRIMITIVE_RANGES 32
+
+/*
  * Identifiers for commands in the command FIFO.
  *
  * IDs between 1000 and 1039 (inclusive) were used by obsolete versions of
@@ -990,7 +1022,7 @@ typedef enum {
 #define SVGA_3D_CMD_LEGACY_BASE            1000
 #define SVGA_3D_CMD_BASE                   1040
 
-#define SVGA_3D_CMD_SURFACE_DEFINE         SVGA_3D_CMD_BASE + 0
+#define SVGA_3D_CMD_SURFACE_DEFINE         SVGA_3D_CMD_BASE + 0     /* Deprecated */
 #define SVGA_3D_CMD_SURFACE_DESTROY        SVGA_3D_CMD_BASE + 1
 #define SVGA_3D_CMD_SURFACE_COPY           SVGA_3D_CMD_BASE + 2
 #define SVGA_3D_CMD_SURFACE_STRETCHBLT     SVGA_3D_CMD_BASE + 3
@@ -1008,7 +1040,7 @@ typedef enum {
 #define SVGA_3D_CMD_SETVIEWPORT            SVGA_3D_CMD_BASE + 15
 #define SVGA_3D_CMD_SETCLIPPLANE           SVGA_3D_CMD_BASE + 16
 #define SVGA_3D_CMD_CLEAR                  SVGA_3D_CMD_BASE + 17
-#define SVGA_3D_CMD_PRESENT                SVGA_3D_CMD_BASE + 18    // Deprecated
+#define SVGA_3D_CMD_PRESENT                SVGA_3D_CMD_BASE + 18    /* Deprecated */
 #define SVGA_3D_CMD_SHADER_DEFINE          SVGA_3D_CMD_BASE + 19
 #define SVGA_3D_CMD_SHADER_DESTROY         SVGA_3D_CMD_BASE + 20
 #define SVGA_3D_CMD_SET_SHADER             SVGA_3D_CMD_BASE + 21
@@ -1018,9 +1050,13 @@ typedef enum {
 #define SVGA_3D_CMD_BEGIN_QUERY            SVGA_3D_CMD_BASE + 25
 #define SVGA_3D_CMD_END_QUERY              SVGA_3D_CMD_BASE + 26
 #define SVGA_3D_CMD_WAIT_FOR_QUERY         SVGA_3D_CMD_BASE + 27
-#define SVGA_3D_CMD_PRESENT_READBACK       SVGA_3D_CMD_BASE + 28    // Deprecated
+#define SVGA_3D_CMD_PRESENT_READBACK       SVGA_3D_CMD_BASE + 28    /* Deprecated */
 #define SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN SVGA_3D_CMD_BASE + 29
-#define SVGA_3D_CMD_MAX                    SVGA_3D_CMD_BASE + 30
+#define SVGA_3D_CMD_SURFACE_DEFINE_V2      SVGA_3D_CMD_BASE + 30
+#define SVGA_3D_CMD_GENERATE_MIPMAPS       SVGA_3D_CMD_BASE + 31
+#define SVGA_3D_CMD_ACTIVATE_SURFACE       SVGA_3D_CMD_BASE + 40
+#define SVGA_3D_CMD_DEACTIVATE_SURFACE     SVGA_3D_CMD_BASE + 41
+#define SVGA_3D_CMD_MAX                    SVGA_3D_CMD_BASE + 42
 
 #define SVGA_3D_CMD_FUTURE_MAX             2000
 
@@ -1031,9 +1067,9 @@ typedef enum {
 typedef struct {
    union {
       struct {
-         uint16  function;       // SVGA3dFogFunction
-         uint8   type;           // SVGA3dFogType
-         uint8   base;           // SVGA3dFogBase
+         uint16  function;       /* SVGA3dFogFunction */
+         uint8   type;           /* SVGA3dFogType */
+         uint8   base;           /* SVGA3dFogBase */
       };
       uint32     uintValue;
    };
@@ -1109,6 +1145,8 @@ typedef enum {
    SVGA3D_SURFACE_HINT_RENDERTARGET    = (1 << 6),
    SVGA3D_SURFACE_HINT_DEPTHSTENCIL    = (1 << 7),
    SVGA3D_SURFACE_HINT_WRITEONLY       = (1 << 8),
+   SVGA3D_SURFACE_MASKABLE_ANTIALIAS   = (1 << 9),
+   SVGA3D_SURFACE_AUTOGENMIPMAPS       = (1 << 10),
 } SVGA3dSurfaceFlags;
 
 typedef
@@ -1121,6 +1159,12 @@ struct {
    uint32                      sid;
    SVGA3dSurfaceFlags          surfaceFlags;
    SVGA3dSurfaceFormat         format;
+   /*
+    * If surfaceFlags has SVGA3D_SURFACE_CUBEMAP bit set, all SVGA3dSurfaceFace
+    * structures must have the same value of numMipLevels field.
+    * Otherwise, all but the first SVGA3dSurfaceFace structures must have the
+    * numMipLevels set to 0.
+    */
    SVGA3dSurfaceFace           face[SVGA3D_MAX_SURFACE_FACES];
    /*
     * Followed by an SVGA3dSize structure for each mip level in each face.
@@ -1135,6 +1179,31 @@ struct {
 
 typedef
 struct {
+   uint32                      sid;
+   SVGA3dSurfaceFlags          surfaceFlags;
+   SVGA3dSurfaceFormat         format;
+   /*
+    * If surfaceFlags has SVGA3D_SURFACE_CUBEMAP bit set, all SVGA3dSurfaceFace
+    * structures must have the same value of numMipLevels field.
+    * Otherwise, all but the first SVGA3dSurfaceFace structures must have the
+    * numMipLevels set to 0.
+    */
+   SVGA3dSurfaceFace           face[SVGA3D_MAX_SURFACE_FACES];
+   uint32                      multisampleCount;
+   SVGA3dTextureFilter         autogenFilter;
+   /*
+    * Followed by an SVGA3dSize structure for each mip level in each face.
+    *
+    * A note on surface sizes: Sizes are always specified in pixels,
+    * even if the true surface size is not a multiple of the minimum
+    * block size of the surface's format. For example, a 3x3x1 DXT1
+    * compressed texture would actually be stored as a 4x4x1 image in
+    * memory.
+    */
+} SVGA3dCmdDefineSurface_v2;     /* SVGA_3D_CMD_SURFACE_DEFINE_V2 */
+
+typedef
+struct {
    uint32               sid;
 } SVGA3dCmdDestroySurface;      /* SVGA_3D_CMD_SURFACE_DESTROY */
 
@@ -1474,10 +1543,12 @@ struct {
     * SVGA3dCmdDrawPrimitives structure. In order,
     * they are:
     *
-    * 1. SVGA3dVertexDecl, quantity 'numVertexDecls'
-    * 2. SVGA3dPrimitiveRange, quantity 'numRanges'
+    * 1. SVGA3dVertexDecl, quantity 'numVertexDecls', but no more than
+    *    SVGA3D_MAX_VERTEX_ARRAYS;
+    * 2. SVGA3dPrimitiveRange, quantity 'numRanges', but no more than
+    *    SVGA3D_MAX_DRAW_PRIMITIVE_RANGES;
     * 3. Optionally, SVGA3dVertexDivisor, quantity 'numVertexDecls' (contains
-    *    the frequency divisor for this the corresponding vertex decl)
+    *    the frequency divisor for the corresponding vertex decl).
     */
 } SVGA3dCmdDrawPrimitives;      /* SVGA_3D_CMD_DRAWPRIMITIVES */
 
@@ -1671,6 +1742,12 @@ struct {
    /* Clipping: zero or more SVGASignedRects follow */
 } SVGA3dCmdBlitSurfaceToScreen;         /* SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN */
 
+typedef
+struct {
+   uint32               sid;
+   SVGA3dTextureFilter  filter;
+} SVGA3dCmdGenerateMipmaps;             /* SVGA_3D_CMD_GENERATE_MIPMAPS */
+
 
 /*
  * Capability query index.
@@ -1774,6 +1851,32 @@ typedef enum {
    SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16           = 67,
    SVGA3D_DEVCAP_SURFACEFMT_UYVY                   = 68,
    SVGA3D_DEVCAP_SURFACEFMT_YUY2                   = 69,
+   SVGA3D_DEVCAP_MULTISAMPLE_NONMASKABLESAMPLES    = 70,
+   SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES       = 71,
+   SVGA3D_DEVCAP_ALPHATOCOVERAGE                   = 72,
+   SVGA3D_DEVCAP_SUPERSAMPLE                       = 73,
+   SVGA3D_DEVCAP_AUTOGENMIPMAPS                    = 74,
+   SVGA3D_DEVCAP_SURFACEFMT_NV12                   = 75,
+   SVGA3D_DEVCAP_SURFACEFMT_AYUV                   = 76,
+
+   /*
+    * This is the maximum number of SVGA context IDs that the guest
+    * can define using SVGA_3D_CMD_CONTEXT_DEFINE.
+    */
+   SVGA3D_DEVCAP_MAX_CONTEXT_IDS                   = 77,
+
+   /*
+    * This is the maximum number of SVGA surface IDs that the guest
+    * can define using SVGA_3D_CMD_SURFACE_DEFINE*.
+    */
+   SVGA3D_DEVCAP_MAX_SURFACE_IDS                   = 78,
+
+   SVGA3D_DEVCAP_SURFACEFMT_Z_DF16                 = 79,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_DF24                 = 80,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8_INT            = 81,
+
+   SVGA3D_DEVCAP_SURFACEFMT_BC4_UNORM              = 82,
+   SVGA3D_DEVCAP_SURFACEFMT_BC5_UNORM              = 83,
 
    /*
     * Don't add new caps into the previous section; the values in this
diff --git a/drivers/gpu/drm/vmwgfx/svga_escape.h b/drivers/gpu/drm/vmwgfx/svga_escape.h
index 7b85e9b8c854..8e8d9682e018 100644
--- a/drivers/gpu/drm/vmwgfx/svga_escape.h
+++ b/drivers/gpu/drm/vmwgfx/svga_escape.h
@@ -75,7 +75,7 @@
  */
 
 #define SVGA_ESCAPE_VMWARE_HINT               0x00030000
-#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN    0x00030001  // Deprecated
+#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN    0x00030001  /* Deprecated */
 
 typedef
 struct {
diff --git a/drivers/gpu/drm/vmwgfx/svga_overlay.h b/drivers/gpu/drm/vmwgfx/svga_overlay.h
index f753d73c14b4..f38416fcb046 100644
--- a/drivers/gpu/drm/vmwgfx/svga_overlay.h
+++ b/drivers/gpu/drm/vmwgfx/svga_overlay.h
@@ -38,9 +38,9 @@
  * Video formats we support
  */
 
-#define VMWARE_FOURCC_YV12 0x32315659 // 'Y' 'V' '1' '2'
-#define VMWARE_FOURCC_YUY2 0x32595559 // 'Y' 'U' 'Y' '2'
-#define VMWARE_FOURCC_UYVY 0x59565955 // 'U' 'Y' 'V' 'Y'
+#define VMWARE_FOURCC_YV12 0x32315659 /* 'Y' 'V' '1' '2' */
+#define VMWARE_FOURCC_YUY2 0x32595559 /* 'Y' 'U' 'Y' '2' */
+#define VMWARE_FOURCC_UYVY 0x59565955 /* 'U' 'Y' 'V' 'Y' */
 
 typedef enum {
    SVGA_OVERLAY_FORMAT_INVALID = 0,
@@ -68,7 +68,7 @@ struct SVGAEscapeVideoSetRegs {
       uint32 streamId;
    } header;
 
-   // May include zero or more items.
+   /* May include zero or more items. */
    struct {
       uint32 registerId;
       uint32 value;
@@ -134,12 +134,12 @@ struct {
  */
 
 static inline bool
-VMwareVideoGetAttributes(const SVGAOverlayFormat format,    // IN
-                         uint32 *width,                     // IN / OUT
-                         uint32 *height,                    // IN / OUT
-                         uint32 *size,                      // OUT
-                         uint32 *pitches,                   // OUT (optional)
-                         uint32 *offsets)                   // OUT (optional)
+VMwareVideoGetAttributes(const SVGAOverlayFormat format,    /* IN */
+                         uint32 *width,                     /* IN / OUT */
+                         uint32 *height,                    /* IN / OUT */
+                         uint32 *size,                      /* OUT */
+                         uint32 *pitches,                   /* OUT (optional) */
+                         uint32 *offsets)                   /* OUT (optional) */
 {
     int tmp;
 
@@ -198,4 +198,4 @@ VMwareVideoGetAttributes(const SVGAOverlayFormat format,    // IN
     return true;
 }
 
-#endif // _SVGA_OVERLAY_H_
+#endif /* _SVGA_OVERLAY_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/svga_reg.h b/drivers/gpu/drm/vmwgfx/svga_reg.h
index ec5aad9b6ed3..01f63cb49678 100644
--- a/drivers/gpu/drm/vmwgfx/svga_reg.h
+++ b/drivers/gpu/drm/vmwgfx/svga_reg.h
@@ -276,7 +276,7 @@ enum {
  * possible.
  */
 #define SVGA_GMR_NULL         ((uint32) -1)
-#define SVGA_GMR_FRAMEBUFFER  ((uint32) -2)  // Guest Framebuffer (GFB)
+#define SVGA_GMR_FRAMEBUFFER  ((uint32) -2)  /* Guest Framebuffer (GFB) */
 
 typedef
 struct SVGAGuestMemDescriptor {
@@ -317,13 +317,35 @@ struct SVGAGMRImageFormat {
       struct {
          uint32 bitsPerPixel : 8;
          uint32 colorDepth   : 8;
-         uint32 reserved     : 16;  // Must be zero
+         uint32 reserved     : 16;  /* Must be zero */
       };
 
       uint32 value;
    };
 } SVGAGMRImageFormat;
 
+typedef
+struct SVGAGuestImage {
+   SVGAGuestPtr         ptr;
+
+   /*
+    * A note on interpretation of pitch: This value of pitch is the
+    * number of bytes between vertically adjacent image
+    * blocks. Normally this is the number of bytes between the first
+    * pixel of two adjacent scanlines. With compressed textures,
+    * however, this may represent the number of bytes between
+    * compression blocks rather than between rows of pixels.
+    *
+    * XXX: Compressed textures currently must be tightly packed in guest memory.
+    *
+    * If the image is 1-dimensional, pitch is ignored.
+    *
+    * If 'pitch' is zero, the SVGA3D device calculates a pitch value
+    * assuming each row of blocks is tightly packed.
+    */
+   uint32 pitch;
+} SVGAGuestImage;
+
 /*
  * SVGAColorBGRX --
  *
@@ -339,7 +361,7 @@ struct SVGAColorBGRX {
          uint32 b : 8;
          uint32 g : 8;
          uint32 r : 8;
-         uint32 x : 8;  // Unused
+         uint32 x : 8;  /* Unused */
       };
 
       uint32 value;
@@ -395,16 +417,16 @@ struct SVGASignedPoint {
 #define SVGA_CAP_NONE               0x00000000
 #define SVGA_CAP_RECT_COPY          0x00000002
 #define SVGA_CAP_CURSOR             0x00000020
-#define SVGA_CAP_CURSOR_BYPASS      0x00000040   // Legacy (Use Cursor Bypass 3 instead)
-#define SVGA_CAP_CURSOR_BYPASS_2    0x00000080   // Legacy (Use Cursor Bypass 3 instead)
+#define SVGA_CAP_CURSOR_BYPASS      0x00000040   /* Legacy (Use Cursor Bypass 3 instead) */
+#define SVGA_CAP_CURSOR_BYPASS_2    0x00000080   /* Legacy (Use Cursor Bypass 3 instead) */
 #define SVGA_CAP_8BIT_EMULATION     0x00000100
 #define SVGA_CAP_ALPHA_CURSOR       0x00000200
 #define SVGA_CAP_3D                 0x00004000
 #define SVGA_CAP_EXTENDED_FIFO      0x00008000
-#define SVGA_CAP_MULTIMON           0x00010000   // Legacy multi-monitor support
+#define SVGA_CAP_MULTIMON           0x00010000   /* Legacy multi-monitor support */
 #define SVGA_CAP_PITCHLOCK          0x00020000
 #define SVGA_CAP_IRQMASK            0x00040000
-#define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000   // Legacy multi-monitor support
+#define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000   /* Legacy multi-monitor support */
 #define SVGA_CAP_GMR                0x00100000
 #define SVGA_CAP_TRACES             0x00200000
 #define SVGA_CAP_GMR2               0x00400000
@@ -453,7 +475,7 @@ enum {
 
    SVGA_FIFO_CAPABILITIES = 4,
    SVGA_FIFO_FLAGS,
-   // Valid with SVGA_FIFO_CAP_FENCE:
+   /* Valid with SVGA_FIFO_CAP_FENCE: */
    SVGA_FIFO_FENCE,
 
    /*
@@ -466,33 +488,47 @@ enum {
     * extended FIFO.
     */
 
-   // Valid if exists (i.e. if extended FIFO enabled):
+   /* Valid if exists (i.e. if extended FIFO enabled): */
    SVGA_FIFO_3D_HWVERSION,       /* See SVGA3dHardwareVersion in svga3d_reg.h */
-   // Valid with SVGA_FIFO_CAP_PITCHLOCK:
+   /* Valid with SVGA_FIFO_CAP_PITCHLOCK: */
    SVGA_FIFO_PITCHLOCK,
 
-   // Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3:
+   /* Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3: */
    SVGA_FIFO_CURSOR_ON,          /* Cursor bypass 3 show/hide register */
    SVGA_FIFO_CURSOR_X,           /* Cursor bypass 3 x register */
    SVGA_FIFO_CURSOR_Y,           /* Cursor bypass 3 y register */
    SVGA_FIFO_CURSOR_COUNT,       /* Incremented when any of the other 3 change */
    SVGA_FIFO_CURSOR_LAST_UPDATED,/* Last time the host updated the cursor */
 
-   // Valid with SVGA_FIFO_CAP_RESERVE:
+   /* Valid with SVGA_FIFO_CAP_RESERVE: */
    SVGA_FIFO_RESERVED,           /* Bytes past NEXT_CMD with real contents */
 
    /*
-    * Valid with SVGA_FIFO_CAP_SCREEN_OBJECT:
+    * Valid with SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2:
     *
     * By default this is SVGA_ID_INVALID, to indicate that the cursor
     * coordinates are specified relative to the virtual root. If this
     * is set to a specific screen ID, cursor position is reinterpreted
-    * as a signed offset relative to that screen's origin. This is the
-    * only way to place the cursor on a non-rooted screen.
+    * as a signed offset relative to that screen's origin.
     */
    SVGA_FIFO_CURSOR_SCREEN_ID,
 
    /*
+    * Valid with SVGA_FIFO_CAP_DEAD
+    *
+    * An arbitrary value written by the host, drivers should not use it.
+    */
+   SVGA_FIFO_DEAD,
+
+   /*
+    * Valid with SVGA_FIFO_CAP_3D_HWVERSION_REVISED:
+    *
+    * Contains 3D HWVERSION (see SVGA3dHardwareVersion in svga3d_reg.h)
+    * on platforms that can enforce graphics resource limits.
+    */
+   SVGA_FIFO_3D_HWVERSION_REVISED,
+
+   /*
     * XXX: The gap here, up until SVGA_FIFO_3D_CAPS, can be used for new
     * registers, but this must be done carefully and with judicious use of
     * capability bits, since comparisons based on SVGA_FIFO_MIN aren't
@@ -530,7 +566,7 @@ enum {
     * sets SVGA_FIFO_MIN high enough to leave room for them.
     */
 
-   // Valid if register exists:
+   /* Valid if register exists: */
    SVGA_FIFO_GUEST_3D_HWVERSION, /* Guest driver's 3D version */
    SVGA_FIFO_FENCE_GOAL,         /* Matching target for SVGA_IRQFLAG_FENCE_GOAL */
    SVGA_FIFO_BUSY,               /* See "FIFO Synchronization Registers" */
@@ -731,6 +767,37 @@ enum {
  *
  *       - When a screen is resized, either using Screen Object commands or
  *         legacy multimon registers, its contents are preserved.
+ *
+ * SVGA_FIFO_CAP_GMR2 --
+ *
+ *    Provides new commands to define and remap guest memory regions (GMR).
+ *
+ *    New 2D commands:
+ *       DEFINE_GMR2, REMAP_GMR2.
+ *
+ * SVGA_FIFO_CAP_3D_HWVERSION_REVISED --
+ *
+ *    Indicates new register SVGA_FIFO_3D_HWVERSION_REVISED exists.
+ *    This register may replace SVGA_FIFO_3D_HWVERSION on platforms
+ *    that enforce graphics resource limits.  This allows the platform
+ *    to clear SVGA_FIFO_3D_HWVERSION and disable 3D in legacy guest
+ *    drivers that do not limit their resources.
+ *
+ *    Note this is an alias to SVGA_FIFO_CAP_GMR2 because these indicators
+ *    are codependent (and thus we use a single capability bit).
+ *
+ * SVGA_FIFO_CAP_SCREEN_OBJECT_2 --
+ *
+ *    Modifies the DEFINE_SCREEN command to include a guest provided
+ *    backing store in GMR memory and the bytesPerLine for the backing
+ *    store.  This capability requires the use of a backing store when
+ *    creating screen objects.  However if SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    is present then backing stores are optional.
+ *
+ * SVGA_FIFO_CAP_DEAD --
+ *
+ *    Drivers should not use this cap bit.  This cap bit can not be
+ *    reused since some hosts already expose it.
  */
 
 #define SVGA_FIFO_CAP_NONE                  0
@@ -742,6 +809,10 @@ enum {
 #define SVGA_FIFO_CAP_ESCAPE            (1<<5)
 #define SVGA_FIFO_CAP_RESERVE           (1<<6)
 #define SVGA_FIFO_CAP_SCREEN_OBJECT     (1<<7)
+#define SVGA_FIFO_CAP_GMR2              (1<<8)
+#define SVGA_FIFO_CAP_3D_HWVERSION_REVISED  SVGA_FIFO_CAP_GMR2
+#define SVGA_FIFO_CAP_SCREEN_OBJECT_2   (1<<9)
+#define SVGA_FIFO_CAP_DEAD              (1<<10)
 
 
 /*
@@ -752,7 +823,7 @@ enum {
 
 #define SVGA_FIFO_FLAG_NONE                 0
 #define SVGA_FIFO_FLAG_ACCELFRONT       (1<<0)
-#define SVGA_FIFO_FLAG_RESERVED        (1<<31) // Internal use only
+#define SVGA_FIFO_FLAG_RESERVED        (1<<31) /* Internal use only */
 
 /*
  * FIFO reservation sentinel value
@@ -785,22 +856,22 @@ enum {
    SVGA_VIDEO_DATA_OFFSET,
    SVGA_VIDEO_FORMAT,
    SVGA_VIDEO_COLORKEY,
-   SVGA_VIDEO_SIZE,          // Deprecated
+   SVGA_VIDEO_SIZE,          /* Deprecated */
    SVGA_VIDEO_WIDTH,
    SVGA_VIDEO_HEIGHT,
    SVGA_VIDEO_SRC_X,
    SVGA_VIDEO_SRC_Y,
    SVGA_VIDEO_SRC_WIDTH,
    SVGA_VIDEO_SRC_HEIGHT,
-   SVGA_VIDEO_DST_X,         // Signed int32
-   SVGA_VIDEO_DST_Y,         // Signed int32
+   SVGA_VIDEO_DST_X,         /* Signed int32 */
+   SVGA_VIDEO_DST_Y,         /* Signed int32 */
    SVGA_VIDEO_DST_WIDTH,
    SVGA_VIDEO_DST_HEIGHT,
    SVGA_VIDEO_PITCH_1,
    SVGA_VIDEO_PITCH_2,
    SVGA_VIDEO_PITCH_3,
-   SVGA_VIDEO_DATA_GMRID,    // Optional, defaults to SVGA_GMR_FRAMEBUFFER
-   SVGA_VIDEO_DST_SCREEN_ID, // Optional, defaults to virtual coords (SVGA_ID_INVALID)
+   SVGA_VIDEO_DATA_GMRID,    /* Optional, defaults to SVGA_GMR_FRAMEBUFFER */
+   SVGA_VIDEO_DST_SCREEN_ID, /* Optional, defaults to virtual coords (SVGA_ID_INVALID) */
    SVGA_VIDEO_NUM_REGS
 };
 
@@ -851,15 +922,51 @@ typedef struct SVGAOverlayUnit {
  *    compatibility. New flags can be added, and the struct may grow,
  *    but existing fields must retain their meaning.
  *
+ *    Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2 are required fields of
+ *    a SVGAGuestPtr that is used to back the screen contents.  This
+ *    memory must come from the GFB.  The guest is not allowed to
+ *    access the memory and doing so will have undefined results.  The
+ *    backing store is required to be page aligned and the size is
+ *    padded to the next page boundry.  The number of pages is:
+ *       (bytesPerLine * size.width * 4 + PAGE_SIZE - 1) / PAGE_SIZE
+ *
+ *    The pitch in the backingStore is required to be at least large
+ *    enough to hold a 32bbp scanline.  It is recommended that the
+ *    driver pad bytesPerLine for a potential performance win.
+ *
+ *    The cloneCount field is treated as a hint from the guest that
+ *    the user wants this display to be cloned, countCount times.  A
+ *    value of zero means no cloning should happen.
  */
 
-#define SVGA_SCREEN_HAS_ROOT    (1 << 0)  // Screen is present in the virtual coord space
-#define SVGA_SCREEN_IS_PRIMARY  (1 << 1)  // Guest considers this screen to be 'primary'
-#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2)   // Guest is running a fullscreen app here
+#define SVGA_SCREEN_MUST_BE_SET     (1 << 0) /* Must be set or results undefined */
+#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET /* Deprecated */
+#define SVGA_SCREEN_IS_PRIMARY      (1 << 1) /* Guest considers this screen to be 'primary' */
+#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2) /* Guest is running a fullscreen app here */
+
+/*
+ * Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2.  When the screen is
+ * deactivated the base layer is defined to lose all contents and
+ * become black.  When a screen is deactivated the backing store is
+ * optional.  When set backingPtr and bytesPerLine will be ignored.
+ */
+#define SVGA_SCREEN_DEACTIVATE  (1 << 3)
+
+/*
+ * Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2.  When this flag is set
+ * the screen contents will be outputted as all black to the user
+ * though the base layer contents is preserved.  The screen base layer
+ * can still be read and written to like normal though the no visible
+ * effect will be seen by the user.  When the flag is changed the
+ * screen will be blanked or redrawn to the current contents as needed
+ * without any extra commands from the driver.  This flag only has an
+ * effect when the screen is not deactivated.
+ */
+#define SVGA_SCREEN_BLANKING (1 << 4)
 
 typedef
 struct SVGAScreenObject {
-   uint32 structSize;   // sizeof(SVGAScreenObject)
+   uint32 structSize;   /* sizeof(SVGAScreenObject) */
    uint32 id;
    uint32 flags;
    struct {
@@ -869,7 +976,14 @@ struct SVGAScreenObject {
    struct {
       int32 x;
       int32 y;
-   } root;              // Only used if SVGA_SCREEN_HAS_ROOT is set.
+   } root;
+
+   /*
+    * Added and required by SVGA_FIFO_CAP_SCREEN_OBJECT_2, optional
+    * with SVGA_FIFO_CAP_SCREEN_OBJECT.
+    */
+   SVGAGuestImage backingStore;
+   uint32 cloneCount;
 } SVGAScreenObject;
 
 
@@ -944,7 +1058,7 @@ typedef enum {
  */
 
 typedef
-struct {
+struct SVGAFifoCmdUpdate {
    uint32 x;
    uint32 y;
    uint32 width;
@@ -963,7 +1077,7 @@ struct {
  */
 
 typedef
-struct {
+struct SVGAFifoCmdRectCopy {
    uint32 srcX;
    uint32 srcY;
    uint32 destX;
@@ -987,14 +1101,14 @@ struct {
  */
 
 typedef
-struct {
-   uint32 id;             // Reserved, must be zero.
+struct SVGAFifoCmdDefineCursor {
+   uint32 id;             /* Reserved, must be zero. */
    uint32 hotspotX;
    uint32 hotspotY;
    uint32 width;
    uint32 height;
-   uint32 andMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
-   uint32 xorMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
+   uint32 andMaskDepth;   /* Value must be 1 or equal to BITS_PER_PIXEL */
+   uint32 xorMaskDepth;   /* Value must be 1 or equal to BITS_PER_PIXEL */
    /*
     * Followed by scanline data for AND mask, then XOR mask.
     * Each scanline is padded to a 32-bit boundary.
@@ -1016,8 +1130,8 @@ struct {
  */
 
 typedef
-struct {
-   uint32 id;             // Reserved, must be zero.
+struct SVGAFifoCmdDefineAlphaCursor {
+   uint32 id;             /* Reserved, must be zero. */
    uint32 hotspotX;
    uint32 hotspotY;
    uint32 width;
@@ -1039,7 +1153,7 @@ struct {
  */
 
 typedef
-struct {
+struct SVGAFifoCmdUpdateVerbose {
    uint32 x;
    uint32 y;
    uint32 width;
@@ -1064,13 +1178,13 @@ struct {
 #define  SVGA_ROP_COPY                    0x03
 
 typedef
-struct {
-   uint32 color;     // In the same format as the GFB
+struct SVGAFifoCmdFrontRopFill {
+   uint32 color;     /* In the same format as the GFB */
    uint32 x;
    uint32 y;
    uint32 width;
    uint32 height;
-   uint32 rop;       // Must be SVGA_ROP_COPY
+   uint32 rop;       /* Must be SVGA_ROP_COPY */
 } SVGAFifoCmdFrontRopFill;
 
 
@@ -1107,7 +1221,7 @@ struct {
  */
 
 typedef
-struct {
+struct SVGAFifoCmdEscape {
    uint32 nsid;
    uint32 size;
    /* followed by 'size' bytes of data */
@@ -1137,12 +1251,12 @@ struct {
  *    registers (SVGA_REG_NUM_GUEST_DISPLAYS, SVGA_REG_DISPLAY_*).
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
 struct {
-   SVGAScreenObject screen;   // Variable-length according to version
+   SVGAScreenObject screen;   /* Variable-length according to version */
 } SVGAFifoCmdDefineScreen;
 
 
@@ -1153,7 +1267,7 @@ struct {
  *    re-use.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1206,7 +1320,7 @@ struct {
  *    GMRFB.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1243,7 +1357,7 @@ struct {
  *    SVGA_CMD_ANNOTATION_* commands for details.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1291,7 +1405,7 @@ struct {
  *    the time any subsequent FENCE commands are reached.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1326,7 +1440,7 @@ struct {
  *    user's display is being remoted over a network connection.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1358,7 +1472,7 @@ struct {
  *    undefined.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1381,8 +1495,7 @@ typedef
 struct {
    uint32 gmrId;
    uint32 numPages;
-}
-SVGAFifoCmdDefineGMR2;
+} SVGAFifoCmdDefineGMR2;
 
 
 /*
@@ -1424,8 +1537,8 @@ typedef
 struct {
    uint32 gmrId;
    SVGARemapGMR2Flags flags;
-	uint32 offsetPages; /* offset in pages to begin remap */
-	uint32 numPages; /* number of pages to remap */
+   uint32 offsetPages; /* offset in pages to begin remap */
+   uint32 numPages; /* number of pages to remap */
    /*
     * Followed by additional data depending on SVGARemapGMR2Flags.
     *
@@ -1434,7 +1547,6 @@ struct {
     * (according to flag SVGA_REMAP_GMR2_PPN64) follows.  If flag
     * SVGA_REMAP_GMR2_SINGLE_PPN is set, array contains a single entry.
     */
-}
-SVGAFifoCmdRemapGMR2;
+} SVGAFifoCmdRemapGMR2;
 
 #endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 5d665ce8cbe4..5a72ed908232 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -42,6 +42,10 @@ static uint32_t sys_placement_flags = TTM_PL_FLAG_SYSTEM |
 static uint32_t gmr_placement_flags = VMW_PL_FLAG_GMR |
 	TTM_PL_FLAG_CACHED;
 
+static uint32_t gmr_ne_placement_flags = VMW_PL_FLAG_GMR |
+	TTM_PL_FLAG_CACHED |
+	TTM_PL_FLAG_NO_EVICT;
+
 struct ttm_placement vmw_vram_placement = {
 	.fpfn = 0,
 	.lpfn = 0,
@@ -56,6 +60,11 @@ static uint32_t vram_gmr_placement_flags[] = {
 	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
 };
 
+static uint32_t gmr_vram_placement_flags[] = {
+	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED,
+	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
+};
+
 struct ttm_placement vmw_vram_gmr_placement = {
 	.fpfn = 0,
 	.lpfn = 0,
@@ -65,6 +74,20 @@ struct ttm_placement vmw_vram_gmr_placement = {
 	.busy_placement = &gmr_placement_flags
 };
 
+static uint32_t vram_gmr_ne_placement_flags[] = {
+	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT,
+	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
+};
+
+struct ttm_placement vmw_vram_gmr_ne_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 2,
+	.placement = vram_gmr_ne_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &gmr_ne_placement_flags
+};
+
 struct ttm_placement vmw_vram_sys_placement = {
 	.fpfn = 0,
 	.lpfn = 0,
@@ -92,6 +115,30 @@ struct ttm_placement vmw_sys_placement = {
 	.busy_placement = &sys_placement_flags
 };
 
+static uint32_t evictable_placement_flags[] = {
+	TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED,
+	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED,
+	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+};
+
+struct ttm_placement vmw_evictable_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 3,
+	.placement = evictable_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags
+};
+
+struct ttm_placement vmw_srf_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 1,
+	.num_busy_placement = 2,
+	.placement = &gmr_placement_flags,
+	.busy_placement = gmr_vram_placement_flags
+};
+
 struct vmw_ttm_backend {
 	struct ttm_backend backend;
 	struct page **pages;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
new file mode 100644
index 000000000000..3fa884db08ab
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
@@ -0,0 +1,322 @@
+/**************************************************************************
+ *
+ * Copyright © 2011 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "ttm/ttm_placement.h"
+
+#include "drmP.h"
+#include "vmwgfx_drv.h"
+
+
+/**
+ * vmw_dmabuf_to_placement - Validate a buffer to placement.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * May only be called by the current master since it assumes that the
+ * master lock is the current master's lock.
+ * This function takes the master's lock in write mode.
+ * Flushes and unpins the query bo to avoid failures.
+ *
+ * Returns
+ *  -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_placement(struct vmw_private *dev_priv,
+			    struct vmw_dma_buffer *buf,
+			    struct ttm_placement *placement,
+			    bool interruptible)
+{
+	struct vmw_master *vmaster = dev_priv->active_master;
+	struct ttm_buffer_object *bo = &buf->base;
+	int ret;
+
+	ret = ttm_write_lock(&vmaster->lock, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
+
+	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
+	if (unlikely(ret != 0))
+		goto err;
+
+	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+
+	ttm_bo_unreserve(bo);
+
+err:
+	ttm_write_unlock(&vmaster->lock);
+	return ret;
+}
+
+/**
+ * vmw_dmabuf_to_vram_or_gmr - Move a buffer to vram or gmr.
+ *
+ * May only be called by the current master since it assumes that the
+ * master lock is the current master's lock.
+ * This function takes the master's lock in write mode.
+ * Flushes and unpins the query bo if @pin == true to avoid failures.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_vram_or_gmr(struct vmw_private *dev_priv,
+			      struct vmw_dma_buffer *buf,
+			      bool pin, bool interruptible)
+{
+	struct vmw_master *vmaster = dev_priv->active_master;
+	struct ttm_buffer_object *bo = &buf->base;
+	struct ttm_placement *placement;
+	int ret;
+
+	ret = ttm_write_lock(&vmaster->lock, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (pin)
+		vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
+
+	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
+	if (unlikely(ret != 0))
+		goto err;
+
+	/**
+	 * Put BO in VRAM if there is space, otherwise as a GMR.
+	 * If there is no space in VRAM and GMR ids are all used up,
+	 * start evicting GMRs to make room. If the DMA buffer can't be
+	 * used as a GMR, this will return -ENOMEM.
+	 */
+
+	if (pin)
+		placement = &vmw_vram_gmr_ne_placement;
+	else
+		placement = &vmw_vram_gmr_placement;
+
+	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+	if (likely(ret == 0) || ret == -ERESTARTSYS)
+		goto err_unreserve;
+
+
+	/**
+	 * If that failed, try VRAM again, this time evicting
+	 * previous contents.
+	 */
+
+	if (pin)
+		placement = &vmw_vram_ne_placement;
+	else
+		placement = &vmw_vram_placement;
+
+	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+
+err_unreserve:
+	ttm_bo_unreserve(bo);
+err:
+	ttm_write_unlock(&vmaster->lock);
+	return ret;
+}
+
+/**
+ * vmw_dmabuf_to_vram - Move a buffer to vram.
+ *
+ * May only be called by the current master since it assumes that the
+ * master lock is the current master's lock.
+ * This function takes the master's lock in write mode.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer in vram if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_vram(struct vmw_private *dev_priv,
+		       struct vmw_dma_buffer *buf,
+		       bool pin, bool interruptible)
+{
+	struct ttm_placement *placement;
+
+	if (pin)
+		placement = &vmw_vram_ne_placement;
+	else
+		placement = &vmw_vram_placement;
+
+	return vmw_dmabuf_to_placement(dev_priv, buf,
+				       placement,
+				       interruptible);
+}
+
+/**
+ * vmw_dmabuf_to_start_of_vram - Move a buffer to start of vram.
+ *
+ * May only be called by the current master since it assumes that the
+ * master lock is the current master's lock.
+ * This function takes the master's lock in write mode.
+ * Flushes and unpins the query bo if @pin == true to avoid failures.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer in vram if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_start_of_vram(struct vmw_private *dev_priv,
+				struct vmw_dma_buffer *buf,
+				bool pin, bool interruptible)
+{
+	struct vmw_master *vmaster = dev_priv->active_master;
+	struct ttm_buffer_object *bo = &buf->base;
+	struct ttm_placement placement;
+	int ret = 0;
+
+	if (pin)
+		placement = vmw_vram_ne_placement;
+	else
+		placement = vmw_vram_placement;
+	placement.lpfn = bo->num_pages;
+
+	ret = ttm_write_lock(&vmaster->lock, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (pin)
+		vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
+
+	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
+	if (unlikely(ret != 0))
+		goto err_unlock;
+
+	/* Is this buffer already in vram but not at the start of it? */
+	if (bo->mem.mem_type == TTM_PL_VRAM &&
+	    bo->mem.start < bo->num_pages &&
+	    bo->mem.start > 0)
+		(void) ttm_bo_validate(bo, &vmw_sys_placement, false,
+				       false, false);
+
+	ret = ttm_bo_validate(bo, &placement, interruptible, false, false);
+
+	/* For some reason we didn't up at the start of vram */
+	WARN_ON(ret == 0 && bo->offset != 0);
+
+	ttm_bo_unreserve(bo);
+err_unlock:
+	ttm_write_unlock(&vmaster->lock);
+
+	return ret;
+}
+
+
+/**
+ * vmw_dmabuf_upin - Unpin the buffer given buffer, does not move the buffer.
+ *
+ * May only be called by the current master since it assumes that the
+ * master lock is the current master's lock.
+ * This function takes the master's lock in write mode.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to unpin.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_unpin(struct vmw_private *dev_priv,
+		     struct vmw_dma_buffer *buf,
+		     bool interruptible)
+{
+	/*
+	 * We could in theory early out if the buffer is
+	 * unpinned but we need to lock and reserve the buffer
+	 * anyways so we don't gain much by that.
+	 */
+	return vmw_dmabuf_to_placement(dev_priv, buf,
+				       &vmw_evictable_placement,
+				       interruptible);
+}
+
+
+/**
+ * vmw_bo_get_guest_ptr - Get the guest ptr representing the current placement
+ * of a buffer.
+ *
+ * @bo: Pointer to a struct ttm_buffer_object. Must be pinned or reserved.
+ * @ptr: SVGAGuestPtr returning the result.
+ */
+void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *bo,
+			  SVGAGuestPtr *ptr)
+{
+	if (bo->mem.mem_type == TTM_PL_VRAM) {
+		ptr->gmrId = SVGA_GMR_FRAMEBUFFER;
+		ptr->offset = bo->offset;
+	} else {
+		ptr->gmrId = bo->mem.start;
+		ptr->offset = 0;
+	}
+}
+
+
+/**
+ * vmw_bo_pin - Pin or unpin a buffer object without moving it.
+ *
+ * @bo: The buffer object. Must be reserved, and present either in VRAM
+ * or GMR memory.
+ * @pin: Whether to pin or unpin.
+ *
+ */
+void vmw_bo_pin(struct ttm_buffer_object *bo, bool pin)
+{
+	uint32_t pl_flags;
+	struct ttm_placement placement;
+	uint32_t old_mem_type = bo->mem.mem_type;
+	int ret;
+
+	BUG_ON(!atomic_read(&bo->reserved));
+	BUG_ON(old_mem_type != TTM_PL_VRAM &&
+	       old_mem_type != VMW_PL_FLAG_GMR);
+
+	pl_flags = TTM_PL_FLAG_VRAM | VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED;
+	if (pin)
+		pl_flags |= TTM_PL_FLAG_NO_EVICT;
+
+	memset(&placement, 0, sizeof(placement));
+	placement.num_placement = 1;
+	placement.placement = &pl_flags;
+
+	ret = ttm_bo_validate(bo, &placement, false, true, true);
+
+	BUG_ON(ret != 0 || bo->mem.mem_type != old_mem_type);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index d4829cbf326d..b8eb8cdcfb78 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -94,6 +94,15 @@
 #define DRM_IOCTL_VMW_FENCE_UNREF				\
 	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_FENCE_UNREF,		\
 		 struct drm_vmw_fence_arg)
+#define DRM_IOCTL_VMW_FENCE_EVENT				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_FENCE_EVENT,		\
+		 struct drm_vmw_fence_event_arg)
+#define DRM_IOCTL_VMW_PRESENT					\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_PRESENT,		\
+		 struct drm_vmw_present_arg)
+#define DRM_IOCTL_VMW_PRESENT_READBACK				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_PRESENT_READBACK,	\
+		 struct drm_vmw_present_readback_arg)
 
 /**
  * The core DRM version of this macro doesn't account for
@@ -144,8 +153,18 @@ static struct drm_ioctl_desc vmw_ioctls[] = {
 		      DRM_AUTH | DRM_UNLOCKED),
 	VMW_IOCTL_DEF(VMW_FENCE_UNREF, vmw_fence_obj_unref_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
+	VMW_IOCTL_DEF(VMW_FENCE_EVENT,
+		      vmw_fence_event_ioctl,
+		      DRM_AUTH | DRM_UNLOCKED),
 	VMW_IOCTL_DEF(VMW_GET_3D_CAP, vmw_get_cap_3d_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
+
+	/* these allow direct access to the framebuffers mark as master only */
+	VMW_IOCTL_DEF(VMW_PRESENT, vmw_present_ioctl,
+		      DRM_MASTER | DRM_AUTH | DRM_UNLOCKED),
+	VMW_IOCTL_DEF(VMW_PRESENT_READBACK,
+		      vmw_present_readback_ioctl,
+		      DRM_MASTER | DRM_AUTH | DRM_UNLOCKED),
 };
 
 static struct pci_device_id vmw_pci_id_list[] = {
@@ -200,6 +219,72 @@ static void vmw_print_capabilities(uint32_t capabilities)
 		DRM_INFO("  Screen Object 2.\n");
 }
 
+
+/**
+ * vmw_execbuf_prepare_dummy_query - Initialize a query result structure at
+ * the start of a buffer object.
+ *
+ * @dev_priv: The device private structure.
+ *
+ * This function will idle the buffer using an uninterruptible wait, then
+ * map the first page and initialize a pending occlusion query result structure,
+ * Finally it will unmap the buffer.
+ *
+ * TODO: Since we're only mapping a single page, we should optimize the map
+ * to use kmap_atomic / iomap_atomic.
+ */
+static void vmw_dummy_query_bo_prepare(struct vmw_private *dev_priv)
+{
+	struct ttm_bo_kmap_obj map;
+	volatile SVGA3dQueryResult *result;
+	bool dummy;
+	int ret;
+	struct ttm_bo_device *bdev = &dev_priv->bdev;
+	struct ttm_buffer_object *bo = dev_priv->dummy_query_bo;
+
+	ttm_bo_reserve(bo, false, false, false, 0);
+	spin_lock(&bdev->fence_lock);
+	ret = ttm_bo_wait(bo, false, false, false, TTM_USAGE_READWRITE);
+	spin_unlock(&bdev->fence_lock);
+	if (unlikely(ret != 0))
+		(void) vmw_fallback_wait(dev_priv, false, true, 0, false,
+					 10*HZ);
+
+	ret = ttm_bo_kmap(bo, 0, 1, &map);
+	if (likely(ret == 0)) {
+		result = ttm_kmap_obj_virtual(&map, &dummy);
+		result->totalSize = sizeof(*result);
+		result->state = SVGA3D_QUERYSTATE_PENDING;
+		result->result32 = 0xff;
+		ttm_bo_kunmap(&map);
+	} else
+		DRM_ERROR("Dummy query buffer map failed.\n");
+	ttm_bo_unreserve(bo);
+}
+
+
+/**
+ * vmw_dummy_query_bo_create - create a bo to hold a dummy query result
+ *
+ * @dev_priv: A device private structure.
+ *
+ * This function creates a small buffer object that holds the query
+ * result for dummy queries emitted as query barriers.
+ * No interruptible waits are done within this function.
+ *
+ * Returns an error if bo creation fails.
+ */
+static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv)
+{
+	return ttm_bo_create(&dev_priv->bdev,
+			     PAGE_SIZE,
+			     ttm_bo_type_device,
+			     &vmw_vram_sys_placement,
+			     0, 0, false, NULL,
+			     &dev_priv->dummy_query_bo);
+}
+
+
 static int vmw_request_device(struct vmw_private *dev_priv)
 {
 	int ret;
@@ -210,12 +295,29 @@ static int vmw_request_device(struct vmw_private *dev_priv)
 		return ret;
 	}
 	vmw_fence_fifo_up(dev_priv->fman);
+	ret = vmw_dummy_query_bo_create(dev_priv);
+	if (unlikely(ret != 0))
+		goto out_no_query_bo;
+	vmw_dummy_query_bo_prepare(dev_priv);
 
 	return 0;
+
+out_no_query_bo:
+	vmw_fence_fifo_down(dev_priv->fman);
+	vmw_fifo_release(dev_priv, &dev_priv->fifo);
+	return ret;
 }
 
 static void vmw_release_device(struct vmw_private *dev_priv)
 {
+	/*
+	 * Previous destructions should've released
+	 * the pinned bo.
+	 */
+
+	BUG_ON(dev_priv->pinned_bo != NULL);
+
+	ttm_bo_unref(&dev_priv->dummy_query_bo);
 	vmw_fence_fifo_down(dev_priv->fman);
 	vmw_fifo_release(dev_priv, &dev_priv->fifo);
 }
@@ -306,6 +408,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	init_waitqueue_head(&dev_priv->fifo_queue);
 	dev_priv->fence_queue_waiters = 0;
 	atomic_set(&dev_priv->fifo_queue_waiters, 0);
+	INIT_LIST_HEAD(&dev_priv->surface_lru);
+	dev_priv->used_memory_size = 0;
 
 	dev_priv->io_start = pci_resource_start(dev->pdev, 0);
 	dev_priv->vram_start = pci_resource_start(dev->pdev, 1);
@@ -326,6 +430,10 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 
 	dev_priv->capabilities = vmw_read(dev_priv, SVGA_REG_CAPABILITIES);
 
+	dev_priv->vram_size = vmw_read(dev_priv, SVGA_REG_VRAM_SIZE);
+	dev_priv->mmio_size = vmw_read(dev_priv, SVGA_REG_MEM_SIZE);
+	dev_priv->fb_max_width = vmw_read(dev_priv, SVGA_REG_MAX_WIDTH);
+	dev_priv->fb_max_height = vmw_read(dev_priv, SVGA_REG_MAX_HEIGHT);
 	if (dev_priv->capabilities & SVGA_CAP_GMR) {
 		dev_priv->max_gmr_descriptors =
 			vmw_read(dev_priv,
@@ -338,13 +446,15 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 			vmw_read(dev_priv, SVGA_REG_GMRS_MAX_PAGES);
 		dev_priv->memory_size =
 			vmw_read(dev_priv, SVGA_REG_MEMORY_SIZE);
+		dev_priv->memory_size -= dev_priv->vram_size;
+	} else {
+		/*
+		 * An arbitrary limit of 512MiB on surface
+		 * memory. But all HWV8 hardware supports GMR2.
+		 */
+		dev_priv->memory_size = 512*1024*1024;
 	}
 
-	dev_priv->vram_size = vmw_read(dev_priv, SVGA_REG_VRAM_SIZE);
-	dev_priv->mmio_size = vmw_read(dev_priv, SVGA_REG_MEM_SIZE);
-	dev_priv->fb_max_width = vmw_read(dev_priv, SVGA_REG_MAX_WIDTH);
-	dev_priv->fb_max_height = vmw_read(dev_priv, SVGA_REG_MAX_HEIGHT);
-
 	mutex_unlock(&dev_priv->hw_mutex);
 
 	vmw_print_capabilities(dev_priv->capabilities);
@@ -358,8 +468,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	if (dev_priv->capabilities & SVGA_CAP_GMR2) {
 		DRM_INFO("Max number of GMR pages is %u\n",
 			 (unsigned)dev_priv->max_gmr_pages);
-		DRM_INFO("Max dedicated hypervisor graphics memory is %u\n",
-			 (unsigned)dev_priv->memory_size);
+		DRM_INFO("Max dedicated hypervisor surface memory is %u kiB\n",
+			 (unsigned)dev_priv->memory_size / 1024);
 	}
 	DRM_INFO("VRAM at 0x%08x size is %u kiB\n",
 		 dev_priv->vram_start, dev_priv->vram_size / 1024);
@@ -451,22 +561,30 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	dev_priv->fman = vmw_fence_manager_init(dev_priv);
 	if (unlikely(dev_priv->fman == NULL))
 		goto out_no_fman;
+
+	/* Need to start the fifo to check if we can do screen objects */
+	ret = vmw_3d_resource_inc(dev_priv, true);
+	if (unlikely(ret != 0))
+		goto out_no_fifo;
+	vmw_kms_save_vga(dev_priv);
+
+	/* Start kms and overlay systems, needs fifo. */
 	ret = vmw_kms_init(dev_priv);
 	if (unlikely(ret != 0))
 		goto out_no_kms;
 	vmw_overlay_init(dev_priv);
+
+	/* 3D Depends on Screen Objects being used. */
+	DRM_INFO("Detected %sdevice 3D availability.\n",
+		 vmw_fifo_have_3d(dev_priv) ?
+		 "" : "no ");
+
+	/* We might be done with the fifo now */
 	if (dev_priv->enable_fb) {
-		ret = vmw_3d_resource_inc(dev_priv, false);
-		if (unlikely(ret != 0))
-			goto out_no_fifo;
-		vmw_kms_save_vga(dev_priv);
 		vmw_fb_init(dev_priv);
-		DRM_INFO("%s", vmw_fifo_have_3d(dev_priv) ?
-			 "Detected device 3D availability.\n" :
-			 "Detected no device 3D availability.\n");
 	} else {
-		DRM_INFO("Delayed 3D detection since we're not "
-			 "running the device in SVGA mode yet.\n");
+		vmw_kms_restore_vga(dev_priv);
+		vmw_3d_resource_dec(dev_priv, true);
 	}
 
 	if (dev_priv->capabilities & SVGA_CAP_IRQMASK) {
@@ -483,15 +601,17 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	return 0;
 
 out_no_irq:
-	if (dev_priv->enable_fb) {
+	if (dev_priv->enable_fb)
 		vmw_fb_close(dev_priv);
+	vmw_overlay_close(dev_priv);
+	vmw_kms_close(dev_priv);
+out_no_kms:
+	/* We still have a 3D resource reference held */
+	if (dev_priv->enable_fb) {
 		vmw_kms_restore_vga(dev_priv);
 		vmw_3d_resource_dec(dev_priv, false);
 	}
 out_no_fifo:
-	vmw_overlay_close(dev_priv);
-	vmw_kms_close(dev_priv);
-out_no_kms:
 	vmw_fence_manager_takedown(dev_priv->fman);
 out_no_fman:
 	if (dev_priv->stealth)
@@ -771,7 +891,7 @@ static void vmw_master_drop(struct drm_device *dev,
 
 	vmw_fp->locked_master = drm_master_get(file_priv->master);
 	ret = ttm_vt_lock(&vmaster->lock, false, vmw_fp->tfile);
-	vmw_kms_idle_workqueues(vmaster);
+	vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
 
 	if (unlikely((ret != 0))) {
 		DRM_ERROR("Unable to lock TTM at VT switch.\n");
@@ -823,6 +943,7 @@ static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
 		 * This empties VRAM and unbinds all GMR bindings.
 		 * Buffer contents is moved to swappable memory.
 		 */
+		vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
 		ttm_bo_swapout_all(&dev_priv->bdev);
 
 		break;
@@ -948,6 +1069,8 @@ static struct drm_driver driver = {
 	.irq_uninstall = vmw_irq_uninstall,
 	.irq_handler = vmw_irq_handler,
 	.get_vblank_counter = vmw_get_vblank_counter,
+	.enable_vblank = vmw_enable_vblank,
+	.disable_vblank = vmw_disable_vblank,
 	.reclaim_buffers_locked = NULL,
 	.ioctls = vmw_ioctls,
 	.num_ioctls = DRM_ARRAY_SIZE(vmw_ioctls),
@@ -964,7 +1087,8 @@ static struct drm_driver driver = {
 		 .release = drm_release,
 		 .unlocked_ioctl = vmw_unlocked_ioctl,
 		 .mmap = vmw_mmap,
-		 .poll = drm_poll,
+		 .poll = vmw_fops_poll,
+		 .read = vmw_fops_read,
 		 .fasync = drm_fasync,
 #if defined(CONFIG_COMPAT)
 		 .compat_ioctl = drm_compat_ioctl,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 564a81582111..30589d0aecd9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -40,9 +40,9 @@
 #include "ttm/ttm_module.h"
 #include "vmwgfx_fence.h"
 
-#define VMWGFX_DRIVER_DATE "20110901"
+#define VMWGFX_DRIVER_DATE "20111008"
 #define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 0
+#define VMWGFX_DRIVER_MINOR 2
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
@@ -79,9 +79,11 @@ struct vmw_resource {
 	int id;
 	enum ttm_object_type res_type;
 	bool avail;
+	void (*remove_from_lists) (struct vmw_resource *res);
 	void (*hw_destroy) (struct vmw_resource *res);
 	void (*res_free) (struct vmw_resource *res);
-	bool on_validate_list;
+	struct list_head validate_head;
+	struct list_head query_head; /* Protected by the cmdbuf mutex */
 	/* TODO is a generic snooper needed? */
 #if 0
 	void (*snoop)(struct vmw_resource *res,
@@ -97,8 +99,12 @@ struct vmw_cursor_snooper {
 	uint32_t *image;
 };
 
+struct vmw_framebuffer;
+struct vmw_surface_offset;
+
 struct vmw_surface {
 	struct vmw_resource res;
+	struct list_head lru_head; /* Protected by the resource lock */
 	uint32_t flags;
 	uint32_t format;
 	uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES];
@@ -109,6 +115,9 @@ struct vmw_surface {
 
 	/* TODO so far just a extra pointer */
 	struct vmw_cursor_snooper snooper;
+	struct ttm_buffer_object *backup;
+	struct vmw_surface_offset *offsets;
+	uint32_t backup_size;
 };
 
 struct vmw_marker_queue {
@@ -139,6 +148,8 @@ struct vmw_sw_context{
 	struct ida bo_list;
 	uint32_t last_cid;
 	bool cid_valid;
+	bool kernel; /**< is the called made from the kernel */
+	struct vmw_resource *cur_ctx;
 	uint32_t last_sid;
 	uint32_t sid_translation;
 	bool sid_valid;
@@ -150,8 +161,12 @@ struct vmw_sw_context{
 	uint32_t cur_val_buf;
 	uint32_t *cmd_bounce;
 	uint32_t cmd_bounce_size;
-	struct vmw_resource *resources[VMWGFX_MAX_VALIDATIONS];
-	uint32_t num_ref_resources;
+	struct list_head resource_list;
+	uint32_t fence_flags;
+	struct list_head query_list;
+	struct ttm_buffer_object *cur_query_bo;
+	uint32_t cur_query_cid;
+	bool query_cid_valid;
 };
 
 struct vmw_legacy_display;
@@ -216,6 +231,7 @@ struct vmw_private {
 
 	void *fb_info;
 	struct vmw_legacy_display *ldu_priv;
+	struct vmw_screen_object_display *sou_priv;
 	struct vmw_overlay *overlay_priv;
 
 	/*
@@ -248,10 +264,12 @@ struct vmw_private {
 	wait_queue_head_t fence_queue;
 	wait_queue_head_t fifo_queue;
 	int fence_queue_waiters; /* Protected by hw_mutex */
+	int goal_queue_waiters; /* Protected by hw_mutex */
 	atomic_t fifo_queue_waiters;
 	uint32_t last_read_seqno;
 	spinlock_t irq_lock;
 	struct vmw_fence_manager *fman;
+	uint32_t irq_mask;
 
 	/*
 	 * Device state
@@ -290,6 +308,26 @@ struct vmw_private {
 
 	struct mutex release_mutex;
 	uint32_t num_3d_resources;
+
+	/*
+	 * Query processing. These members
+	 * are protected by the cmdbuf mutex.
+	 */
+
+	struct ttm_buffer_object *dummy_query_bo;
+	struct ttm_buffer_object *pinned_bo;
+	uint32_t query_cid;
+	bool dummy_query_bo_pinned;
+
+	/*
+	 * Surface swapping. The "surface_lru" list is protected by the
+	 * resource lock in order to be able to destroy a surface and take
+	 * it off the lru atomically. "used_memory_size" is currently
+	 * protected by the cmdbuf mutex for simplicity.
+	 */
+
+	struct list_head surface_lru;
+	uint32_t used_memory_size;
 };
 
 static inline struct vmw_private *vmw_priv(struct drm_device *dev)
@@ -369,6 +407,8 @@ extern int vmw_surface_reference_ioctl(struct drm_device *dev, void *data,
 extern int vmw_surface_check(struct vmw_private *dev_priv,
 			     struct ttm_object_file *tfile,
 			     uint32_t handle, int *id);
+extern int vmw_surface_validate(struct vmw_private *dev_priv,
+				struct vmw_surface *srf);
 extern void vmw_dmabuf_bo_free(struct ttm_buffer_object *bo);
 extern int vmw_dmabuf_init(struct vmw_private *dev_priv,
 			   struct vmw_dma_buffer *vmw_bo,
@@ -384,10 +424,6 @@ extern uint32_t vmw_dmabuf_validate_node(struct ttm_buffer_object *bo,
 extern void vmw_dmabuf_validate_clear(struct ttm_buffer_object *bo);
 extern int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile,
 				  uint32_t id, struct vmw_dma_buffer **out);
-extern int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
-				       struct vmw_dma_buffer *bo);
-extern int vmw_dmabuf_from_vram(struct vmw_private *vmw_priv,
-				struct vmw_dma_buffer *bo);
 extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file_priv);
 extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data,
@@ -396,7 +432,30 @@ extern int vmw_user_stream_lookup(struct vmw_private *dev_priv,
 				  struct ttm_object_file *tfile,
 				  uint32_t *inout_id,
 				  struct vmw_resource **out);
+extern void vmw_resource_unreserve(struct list_head *list);
 
+/**
+ * DMA buffer helper routines - vmwgfx_dmabuf.c
+ */
+extern int vmw_dmabuf_to_placement(struct vmw_private *vmw_priv,
+				   struct vmw_dma_buffer *bo,
+				   struct ttm_placement *placement,
+				   bool interruptible);
+extern int vmw_dmabuf_to_vram(struct vmw_private *dev_priv,
+			      struct vmw_dma_buffer *buf,
+			      bool pin, bool interruptible);
+extern int vmw_dmabuf_to_vram_or_gmr(struct vmw_private *dev_priv,
+				     struct vmw_dma_buffer *buf,
+				     bool pin, bool interruptible);
+extern int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
+				       struct vmw_dma_buffer *bo,
+				       bool pin, bool interruptible);
+extern int vmw_dmabuf_unpin(struct vmw_private *vmw_priv,
+			    struct vmw_dma_buffer *bo,
+			    bool interruptible);
+extern void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *buf,
+				 SVGAGuestPtr *ptr);
+extern void vmw_bo_pin(struct ttm_buffer_object *bo, bool pin);
 
 /**
  * Misc Ioctl functionality - vmwgfx_ioctl.c
@@ -406,6 +465,14 @@ extern int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 extern int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
+extern int vmw_present_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv);
+extern int vmw_present_readback_ioctl(struct drm_device *dev, void *data,
+				      struct drm_file *file_priv);
+extern unsigned int vmw_fops_poll(struct file *filp,
+				  struct poll_table_struct *wait);
+extern ssize_t vmw_fops_read(struct file *filp, char __user *buffer,
+			     size_t count, loff_t *offset);
 
 /**
  * Fifo utilities - vmwgfx_fifo.c
@@ -422,6 +489,8 @@ extern int vmw_fifo_send_fence(struct vmw_private *dev_priv,
 extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason);
 extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv);
 extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv);
+extern int vmw_fifo_emit_dummy_query(struct vmw_private *dev_priv,
+				     uint32_t cid);
 
 /**
  * TTM glue - vmwgfx_ttm_glue.c
@@ -439,7 +508,10 @@ extern struct ttm_placement vmw_vram_placement;
 extern struct ttm_placement vmw_vram_ne_placement;
 extern struct ttm_placement vmw_vram_sys_placement;
 extern struct ttm_placement vmw_vram_gmr_placement;
+extern struct ttm_placement vmw_vram_gmr_ne_placement;
 extern struct ttm_placement vmw_sys_placement;
+extern struct ttm_placement vmw_evictable_placement;
+extern struct ttm_placement vmw_srf_placement;
 extern struct ttm_bo_driver vmw_bo_driver;
 extern int vmw_dma_quiescent(struct drm_device *dev);
 
@@ -449,6 +521,30 @@ extern int vmw_dma_quiescent(struct drm_device *dev);
 
 extern int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv);
+extern int vmw_execbuf_process(struct drm_file *file_priv,
+			       struct vmw_private *dev_priv,
+			       void __user *user_commands,
+			       void *kernel_commands,
+			       uint32_t command_size,
+			       uint64_t throttle_us,
+			       struct drm_vmw_fence_rep __user
+			       *user_fence_rep);
+
+extern void
+vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
+			      bool only_on_cid_match, uint32_t cid);
+
+extern int vmw_execbuf_fence_commands(struct drm_file *file_priv,
+				      struct vmw_private *dev_priv,
+				      struct vmw_fence_obj **p_fence,
+				      uint32_t *p_handle);
+extern void vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
+					struct vmw_fpriv *vmw_fp,
+					int ret,
+					struct drm_vmw_fence_rep __user
+					*user_fence_rep,
+					struct vmw_fence_obj *fence,
+					uint32_t fence_handle);
 
 /**
  * IRQs and wating - vmwgfx_irq.c
@@ -473,6 +569,8 @@ extern void vmw_update_seqno(struct vmw_private *dev_priv,
 				struct vmw_fifo_state *fifo_state);
 extern void vmw_seqno_waiter_add(struct vmw_private *dev_priv);
 extern void vmw_seqno_waiter_remove(struct vmw_private *dev_priv);
+extern void vmw_goal_waiter_add(struct vmw_private *dev_priv);
+extern void vmw_goal_waiter_remove(struct vmw_private *dev_priv);
 
 /**
  * Rudimentary fence-like objects currently used only for throttling -
@@ -520,6 +618,21 @@ bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv,
 				uint32_t pitch,
 				uint32_t height);
 u32 vmw_get_vblank_counter(struct drm_device *dev, int crtc);
+int vmw_enable_vblank(struct drm_device *dev, int crtc);
+void vmw_disable_vblank(struct drm_device *dev, int crtc);
+int vmw_kms_present(struct vmw_private *dev_priv,
+		    struct drm_file *file_priv,
+		    struct vmw_framebuffer *vfb,
+		    struct vmw_surface *surface,
+		    uint32_t sid, int32_t destX, int32_t destY,
+		    struct drm_vmw_rect *clips,
+		    uint32_t num_clips);
+int vmw_kms_readback(struct vmw_private *dev_priv,
+		     struct drm_file *file_priv,
+		     struct vmw_framebuffer *vfb,
+		     struct drm_vmw_fence_rep __user *user_fence_rep,
+		     struct drm_vmw_rect *clips,
+		     uint32_t num_clips);
 
 /**
  * Overlay control - vmwgfx_overlay.c
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index fa26e647f488..28e1c35aec6f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -44,28 +44,64 @@ static int vmw_cmd_ok(struct vmw_private *dev_priv,
 	return 0;
 }
 
-
-static int vmw_resource_to_validate_list(struct vmw_sw_context *sw_context,
-					 struct vmw_resource **p_res)
+static void vmw_resource_to_validate_list(struct vmw_sw_context *sw_context,
+					  struct vmw_resource **p_res)
 {
-	int ret = 0;
 	struct vmw_resource *res = *p_res;
 
-	if (!res->on_validate_list) {
-		if (sw_context->num_ref_resources >= VMWGFX_MAX_VALIDATIONS) {
-			DRM_ERROR("Too many resources referenced in "
-				  "command stream.\n");
-			ret = -ENOMEM;
-			goto out;
-		}
-		sw_context->resources[sw_context->num_ref_resources++] = res;
-		res->on_validate_list = true;
-		return 0;
+	if (list_empty(&res->validate_head)) {
+		list_add_tail(&res->validate_head, &sw_context->resource_list);
+		*p_res = NULL;
+	} else
+		vmw_resource_unreference(p_res);
+}
+
+/**
+ * vmw_bo_to_validate_list - add a bo to a validate list
+ *
+ * @sw_context: The software context used for this command submission batch.
+ * @bo: The buffer object to add.
+ * @fence_flags: Fence flags to be or'ed with any other fence flags for
+ * this buffer on this submission batch.
+ * @p_val_node: If non-NULL Will be updated with the validate node number
+ * on return.
+ *
+ * Returns -EINVAL if the limit of number of buffer objects per command
+ * submission is reached.
+ */
+static int vmw_bo_to_validate_list(struct vmw_sw_context *sw_context,
+				   struct ttm_buffer_object *bo,
+				   uint32_t fence_flags,
+				   uint32_t *p_val_node)
+{
+	uint32_t val_node;
+	struct ttm_validate_buffer *val_buf;
+
+	val_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf);
+
+	if (unlikely(val_node >= VMWGFX_MAX_VALIDATIONS)) {
+		DRM_ERROR("Max number of DMA buffers per submission"
+			  " exceeded.\n");
+		return -EINVAL;
 	}
 
-out:
-	vmw_resource_unreference(p_res);
-	return ret;
+	val_buf = &sw_context->val_bufs[val_node];
+	if (unlikely(val_node == sw_context->cur_val_buf)) {
+		val_buf->new_sync_obj_arg = NULL;
+		val_buf->bo = ttm_bo_reference(bo);
+		val_buf->usage = TTM_USAGE_READWRITE;
+		list_add_tail(&val_buf->head, &sw_context->validate_nodes);
+		++sw_context->cur_val_buf;
+	}
+
+	val_buf->new_sync_obj_arg = (void *)
+		((unsigned long) val_buf->new_sync_obj_arg | fence_flags);
+	sw_context->fence_flags |= fence_flags;
+
+	if (p_val_node)
+		*p_val_node = val_node;
+
+	return 0;
 }
 
 static int vmw_cmd_cid_check(struct vmw_private *dev_priv,
@@ -94,7 +130,10 @@ static int vmw_cmd_cid_check(struct vmw_private *dev_priv,
 
 	sw_context->last_cid = cmd->cid;
 	sw_context->cid_valid = true;
-	return vmw_resource_to_validate_list(sw_context, &ctx);
+	sw_context->cur_ctx = ctx;
+	vmw_resource_to_validate_list(sw_context, &ctx);
+
+	return 0;
 }
 
 static int vmw_cmd_sid_check(struct vmw_private *dev_priv,
@@ -114,7 +153,8 @@ static int vmw_cmd_sid_check(struct vmw_private *dev_priv,
 		return 0;
 	}
 
-	ret = vmw_user_surface_lookup_handle(dev_priv, sw_context->tfile,
+	ret = vmw_user_surface_lookup_handle(dev_priv,
+					     sw_context->tfile,
 					     *sid, &srf);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Could ot find or use surface 0x%08x "
@@ -124,13 +164,23 @@ static int vmw_cmd_sid_check(struct vmw_private *dev_priv,
 		return ret;
 	}
 
+	ret = vmw_surface_validate(dev_priv, srf);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Could not validate surface.\n");
+		vmw_surface_unreference(&srf);
+		return ret;
+	}
+
 	sw_context->last_sid = *sid;
 	sw_context->sid_valid = true;
 	sw_context->sid_translation = srf->res.id;
 	*sid = sw_context->sid_translation;
 
 	res = &srf->res;
-	return vmw_resource_to_validate_list(sw_context, &res);
+	vmw_resource_to_validate_list(sw_context, &res);
+
+	return 0;
 }
 
 
@@ -197,6 +247,12 @@ static int vmw_cmd_blt_surf_screen_check(struct vmw_private *dev_priv,
 	} *cmd;
 
 	cmd = container_of(header, struct vmw_sid_cmd, header);
+
+	if (unlikely(!sw_context->kernel)) {
+		DRM_ERROR("Kernel only SVGA3d command: %u.\n", cmd->header.id);
+		return -EPERM;
+	}
+
 	return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.srcImage.sid);
 }
 
@@ -209,10 +265,179 @@ static int vmw_cmd_present_check(struct vmw_private *dev_priv,
 		SVGA3dCmdPresent body;
 	} *cmd;
 
+
 	cmd = container_of(header, struct vmw_sid_cmd, header);
+
+	if (unlikely(!sw_context->kernel)) {
+		DRM_ERROR("Kernel only SVGA3d command: %u.\n", cmd->header.id);
+		return -EPERM;
+	}
+
 	return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.sid);
 }
 
+/**
+ * vmw_query_bo_switch_prepare - Prepare to switch pinned buffer for queries.
+ *
+ * @dev_priv: The device private structure.
+ * @cid: The hardware context for the next query.
+ * @new_query_bo: The new buffer holding query results.
+ * @sw_context: The software context used for this command submission.
+ *
+ * This function checks whether @new_query_bo is suitable for holding
+ * query results, and if another buffer currently is pinned for query
+ * results. If so, the function prepares the state of @sw_context for
+ * switching pinned buffers after successful submission of the current
+ * command batch. It also checks whether we're using a new query context.
+ * In that case, it makes sure we emit a query barrier for the old
+ * context before the current query buffer is fenced.
+ */
+static int vmw_query_bo_switch_prepare(struct vmw_private *dev_priv,
+				       uint32_t cid,
+				       struct ttm_buffer_object *new_query_bo,
+				       struct vmw_sw_context *sw_context)
+{
+	int ret;
+	bool add_cid = false;
+	uint32_t cid_to_add;
+
+	if (unlikely(new_query_bo != sw_context->cur_query_bo)) {
+
+		if (unlikely(new_query_bo->num_pages > 4)) {
+			DRM_ERROR("Query buffer too large.\n");
+			return -EINVAL;
+		}
+
+		if (unlikely(sw_context->cur_query_bo != NULL)) {
+			BUG_ON(!sw_context->query_cid_valid);
+			add_cid = true;
+			cid_to_add = sw_context->cur_query_cid;
+			ret = vmw_bo_to_validate_list(sw_context,
+						      sw_context->cur_query_bo,
+						      DRM_VMW_FENCE_FLAG_EXEC,
+						      NULL);
+			if (unlikely(ret != 0))
+				return ret;
+		}
+		sw_context->cur_query_bo = new_query_bo;
+
+		ret = vmw_bo_to_validate_list(sw_context,
+					      dev_priv->dummy_query_bo,
+					      DRM_VMW_FENCE_FLAG_EXEC,
+					      NULL);
+		if (unlikely(ret != 0))
+			return ret;
+
+	}
+
+	if (unlikely(cid != sw_context->cur_query_cid &&
+		     sw_context->query_cid_valid)) {
+		add_cid = true;
+		cid_to_add = sw_context->cur_query_cid;
+	}
+
+	sw_context->cur_query_cid = cid;
+	sw_context->query_cid_valid = true;
+
+	if (add_cid) {
+		struct vmw_resource *ctx = sw_context->cur_ctx;
+
+		if (list_empty(&ctx->query_head))
+			list_add_tail(&ctx->query_head,
+				      &sw_context->query_list);
+		ret = vmw_bo_to_validate_list(sw_context,
+					      dev_priv->dummy_query_bo,
+					      DRM_VMW_FENCE_FLAG_EXEC,
+					      NULL);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+	return 0;
+}
+
+
+/**
+ * vmw_query_bo_switch_commit - Finalize switching pinned query buffer
+ *
+ * @dev_priv: The device private structure.
+ * @sw_context: The software context used for this command submission batch.
+ *
+ * This function will check if we're switching query buffers, and will then,
+ * if no other query waits are issued this command submission batch,
+ * issue a dummy occlusion query wait used as a query barrier. When the fence
+ * object following that query wait has signaled, we are sure that all
+ * preseding queries have finished, and the old query buffer can be unpinned.
+ * However, since both the new query buffer and the old one are fenced with
+ * that fence, we can do an asynchronus unpin now, and be sure that the
+ * old query buffer won't be moved until the fence has signaled.
+ *
+ * As mentioned above, both the new - and old query buffers need to be fenced
+ * using a sequence emitted *after* calling this function.
+ */
+static void vmw_query_bo_switch_commit(struct vmw_private *dev_priv,
+				     struct vmw_sw_context *sw_context)
+{
+
+	struct vmw_resource *ctx, *next_ctx;
+	int ret;
+
+	/*
+	 * The validate list should still hold references to all
+	 * contexts here.
+	 */
+
+	list_for_each_entry_safe(ctx, next_ctx, &sw_context->query_list,
+				 query_head) {
+		list_del_init(&ctx->query_head);
+
+		BUG_ON(list_empty(&ctx->validate_head));
+
+		ret = vmw_fifo_emit_dummy_query(dev_priv, ctx->id);
+
+		if (unlikely(ret != 0))
+			DRM_ERROR("Out of fifo space for dummy query.\n");
+	}
+
+	if (dev_priv->pinned_bo != sw_context->cur_query_bo) {
+		if (dev_priv->pinned_bo) {
+			vmw_bo_pin(dev_priv->pinned_bo, false);
+			ttm_bo_unref(&dev_priv->pinned_bo);
+		}
+
+		vmw_bo_pin(sw_context->cur_query_bo, true);
+
+		/*
+		 * We pin also the dummy_query_bo buffer so that we
+		 * don't need to validate it when emitting
+		 * dummy queries in context destroy paths.
+		 */
+
+		vmw_bo_pin(dev_priv->dummy_query_bo, true);
+		dev_priv->dummy_query_bo_pinned = true;
+
+		dev_priv->query_cid = sw_context->cur_query_cid;
+		dev_priv->pinned_bo =
+			ttm_bo_reference(sw_context->cur_query_bo);
+	}
+}
+
+/**
+ * vmw_query_switch_backoff - clear query barrier list
+ * @sw_context: The sw context used for this submission batch.
+ *
+ * This function is used as part of an error path, where a previously
+ * set up list of query barriers needs to be cleared.
+ *
+ */
+static void vmw_query_switch_backoff(struct vmw_sw_context *sw_context)
+{
+	struct list_head *list, *next;
+
+	list_for_each_safe(list, next, &sw_context->query_list) {
+		list_del_init(list);
+	}
+}
+
 static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 				   struct vmw_sw_context *sw_context,
 				   SVGAGuestPtr *ptr,
@@ -222,8 +447,6 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 	struct ttm_buffer_object *bo;
 	uint32_t handle = ptr->gmrId;
 	struct vmw_relocation *reloc;
-	uint32_t cur_validate_node;
-	struct ttm_validate_buffer *val_buf;
 	int ret;
 
 	ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo);
@@ -243,23 +466,11 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 	reloc = &sw_context->relocs[sw_context->cur_reloc++];
 	reloc->location = ptr;
 
-	cur_validate_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf);
-	if (unlikely(cur_validate_node >= VMWGFX_MAX_VALIDATIONS)) {
-		DRM_ERROR("Max number of DMA buffers per submission"
-			  " exceeded.\n");
-		ret = -EINVAL;
+	ret = vmw_bo_to_validate_list(sw_context, bo, DRM_VMW_FENCE_FLAG_EXEC,
+				      &reloc->index);
+	if (unlikely(ret != 0))
 		goto out_no_reloc;
-	}
 
-	reloc->index = cur_validate_node;
-	if (unlikely(cur_validate_node == sw_context->cur_val_buf)) {
-		val_buf = &sw_context->val_bufs[cur_validate_node];
-		val_buf->bo = ttm_bo_reference(bo);
-		val_buf->usage = TTM_USAGE_READWRITE;
-		val_buf->new_sync_obj_arg = (void *) DRM_VMW_FENCE_FLAG_EXEC;
-		list_add_tail(&val_buf->head, &sw_context->validate_nodes);
-		++sw_context->cur_val_buf;
-	}
 	*vmw_bo_p = vmw_bo;
 	return 0;
 
@@ -291,8 +502,11 @@ static int vmw_cmd_end_query(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		return ret;
 
+	ret = vmw_query_bo_switch_prepare(dev_priv, cmd->q.cid,
+					  &vmw_bo->base, sw_context);
+
 	vmw_dmabuf_unreference(&vmw_bo);
-	return 0;
+	return ret;
 }
 
 static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
@@ -305,6 +519,7 @@ static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
 		SVGA3dCmdWaitForQuery q;
 	} *cmd;
 	int ret;
+	struct vmw_resource *ctx;
 
 	cmd = container_of(header, struct vmw_query_cmd, header);
 	ret = vmw_cmd_cid_check(dev_priv, sw_context, header);
@@ -318,6 +533,16 @@ static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
 		return ret;
 
 	vmw_dmabuf_unreference(&vmw_bo);
+
+	/*
+	 * This wait will act as a barrier for previous waits for this
+	 * context.
+	 */
+
+	ctx = sw_context->cur_ctx;
+	if (!list_empty(&ctx->query_head))
+		list_del_init(&ctx->query_head);
+
 	return 0;
 }
 
@@ -350,6 +575,13 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 		goto out_no_reloc;
 	}
 
+	ret = vmw_surface_validate(dev_priv, srf);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Culd not validate surface.\n");
+		goto out_no_validate;
+	}
+
 	/*
 	 * Patch command stream with device SID.
 	 */
@@ -359,8 +591,12 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 	vmw_dmabuf_unreference(&vmw_bo);
 
 	res = &srf->res;
-	return vmw_resource_to_validate_list(sw_context, &res);
+	vmw_resource_to_validate_list(sw_context, &res);
+
+	return 0;
 
+out_no_validate:
+	vmw_surface_unreference(&srf);
 out_no_reloc:
 	vmw_dmabuf_unreference(&vmw_bo);
 	return ret;
@@ -450,6 +686,71 @@ static int vmw_cmd_tex_state(struct vmw_private *dev_priv,
 	return 0;
 }
 
+static int vmw_cmd_check_define_gmrfb(struct vmw_private *dev_priv,
+				      struct vmw_sw_context *sw_context,
+				      void *buf)
+{
+	struct vmw_dma_buffer *vmw_bo;
+	int ret;
+
+	struct {
+		uint32_t header;
+		SVGAFifoCmdDefineGMRFB body;
+	} *cmd = buf;
+
+	ret = vmw_translate_guest_ptr(dev_priv, sw_context,
+				      &cmd->body.ptr,
+				      &vmw_bo);
+	if (unlikely(ret != 0))
+		return ret;
+
+	vmw_dmabuf_unreference(&vmw_bo);
+
+	return ret;
+}
+
+static int vmw_cmd_check_not_3d(struct vmw_private *dev_priv,
+				struct vmw_sw_context *sw_context,
+				void *buf, uint32_t *size)
+{
+	uint32_t size_remaining = *size;
+	uint32_t cmd_id;
+
+	cmd_id = le32_to_cpu(((uint32_t *)buf)[0]);
+	switch (cmd_id) {
+	case SVGA_CMD_UPDATE:
+		*size = sizeof(uint32_t) + sizeof(SVGAFifoCmdUpdate);
+		break;
+	case SVGA_CMD_DEFINE_GMRFB:
+		*size = sizeof(uint32_t) + sizeof(SVGAFifoCmdDefineGMRFB);
+		break;
+	case SVGA_CMD_BLIT_GMRFB_TO_SCREEN:
+		*size = sizeof(uint32_t) + sizeof(SVGAFifoCmdBlitGMRFBToScreen);
+		break;
+	case SVGA_CMD_BLIT_SCREEN_TO_GMRFB:
+		*size = sizeof(uint32_t) + sizeof(SVGAFifoCmdBlitGMRFBToScreen);
+		break;
+	default:
+		DRM_ERROR("Unsupported SVGA command: %u.\n", cmd_id);
+		return -EINVAL;
+	}
+
+	if (*size > size_remaining) {
+		DRM_ERROR("Invalid SVGA command (size mismatch):"
+			  " %u.\n", cmd_id);
+		return -EINVAL;
+	}
+
+	if (unlikely(!sw_context->kernel)) {
+		DRM_ERROR("Kernel only SVGA command: %u.\n", cmd_id);
+		return -EPERM;
+	}
+
+	if (cmd_id == SVGA_CMD_DEFINE_GMRFB)
+		return vmw_cmd_check_define_gmrfb(dev_priv, sw_context, buf);
+
+	return 0;
+}
 
 typedef int (*vmw_cmd_func) (struct vmw_private *,
 			     struct vmw_sw_context *,
@@ -502,11 +803,11 @@ static int vmw_cmd_check(struct vmw_private *dev_priv,
 	SVGA3dCmdHeader *header = (SVGA3dCmdHeader *) buf;
 	int ret;
 
-	cmd_id = ((uint32_t *)buf)[0];
-	if (cmd_id == SVGA_CMD_UPDATE) {
-		*size = 5 << 2;
-		return 0;
-	}
+	cmd_id = le32_to_cpu(((uint32_t *)buf)[0]);
+	/* Handle any none 3D commands */
+	if (unlikely(cmd_id < SVGA_CMD_MAX))
+		return vmw_cmd_check_not_3d(dev_priv, sw_context, buf, size);
+
 
 	cmd_id = le32_to_cpu(header->id);
 	*size = le32_to_cpu(header->size) + sizeof(SVGA3dCmdHeader);
@@ -531,9 +832,9 @@ out_err:
 
 static int vmw_cmd_check_all(struct vmw_private *dev_priv,
 			     struct vmw_sw_context *sw_context,
+			     void *buf,
 			     uint32_t size)
 {
-	void *buf = sw_context->cmd_bounce;
 	int32_t cur_size = size;
 	int ret;
 
@@ -582,7 +883,7 @@ static void vmw_apply_relocations(struct vmw_sw_context *sw_context)
 static void vmw_clear_validations(struct vmw_sw_context *sw_context)
 {
 	struct ttm_validate_buffer *entry, *next;
-	uint32_t i = sw_context->num_ref_resources;
+	struct vmw_resource *res, *res_next;
 
 	/*
 	 * Drop references to DMA buffers held during command submission.
@@ -599,9 +900,11 @@ static void vmw_clear_validations(struct vmw_sw_context *sw_context)
 	/*
 	 * Drop references to resources held during command submission.
 	 */
-	while (i-- > 0) {
-		sw_context->resources[i]->on_validate_list = false;
-		vmw_resource_unreference(&sw_context->resources[i]);
+	vmw_resource_unreserve(&sw_context->resource_list);
+	list_for_each_entry_safe(res, res_next, &sw_context->resource_list,
+				 validate_head) {
+		list_del_init(&res->validate_head);
+		vmw_resource_unreference(&res);
 	}
 }
 
@@ -610,6 +913,16 @@ static int vmw_validate_single_buffer(struct vmw_private *dev_priv,
 {
 	int ret;
 
+
+	/*
+	 * Don't validate pinned buffers.
+	 */
+
+	if (bo == dev_priv->pinned_bo ||
+	    (bo == dev_priv->dummy_query_bo &&
+	     dev_priv->dummy_query_bo_pinned))
+		return 0;
+
 	/**
 	 * Put BO in VRAM if there is space, otherwise as a GMR.
 	 * If there is no space in VRAM and GMR ids are all used up,
@@ -681,6 +994,9 @@ static int vmw_resize_cmd_bounce(struct vmw_sw_context *sw_context,
  * Creates a fence object and submits a command stream marker.
  * If this fails for some reason, We sync the fifo and return NULL.
  * It is then safe to fence buffers with a NULL pointer.
+ *
+ * If @p_handle is not NULL @file_priv must also not be NULL. Creates
+ * a userspace handle if @p_handle is not NULL, otherwise not.
  */
 
 int vmw_execbuf_fence_commands(struct drm_file *file_priv,
@@ -692,6 +1008,8 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv,
 	int ret;
 	bool synced = false;
 
+	/* p_handle implies file_priv. */
+	BUG_ON(p_handle != NULL && file_priv == NULL);
 
 	ret = vmw_fifo_send_fence(dev_priv, &sequence);
 	if (unlikely(ret != 0)) {
@@ -719,69 +1037,127 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv,
 	return 0;
 }
 
-int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
-		      struct drm_file *file_priv)
+/**
+ * vmw_execbuf_copy_fence_user - copy fence object information to
+ * user-space.
+ *
+ * @dev_priv: Pointer to a vmw_private struct.
+ * @vmw_fp: Pointer to the struct vmw_fpriv representing the calling file.
+ * @ret: Return value from fence object creation.
+ * @user_fence_rep: User space address of a struct drm_vmw_fence_rep to
+ * which the information should be copied.
+ * @fence: Pointer to the fenc object.
+ * @fence_handle: User-space fence handle.
+ *
+ * This function copies fence information to user-space. If copying fails,
+ * The user-space struct drm_vmw_fence_rep::error member is hopefully
+ * left untouched, and if it's preloaded with an -EFAULT by user-space,
+ * the error will hopefully be detected.
+ * Also if copying fails, user-space will be unable to signal the fence
+ * object so we wait for it immediately, and then unreference the
+ * user-space reference.
+ */
+void
+vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
+			    struct vmw_fpriv *vmw_fp,
+			    int ret,
+			    struct drm_vmw_fence_rep __user *user_fence_rep,
+			    struct vmw_fence_obj *fence,
+			    uint32_t fence_handle)
 {
-	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct drm_vmw_execbuf_arg *arg = (struct drm_vmw_execbuf_arg *)data;
 	struct drm_vmw_fence_rep fence_rep;
-	struct drm_vmw_fence_rep __user *user_fence_rep;
-	int ret;
-	void *user_cmd;
-	void *cmd;
-	struct vmw_sw_context *sw_context = &dev_priv->ctx;
-	struct vmw_master *vmaster = vmw_master(file_priv->master);
-	struct vmw_fence_obj *fence;
-	uint32_t handle;
+
+	if (user_fence_rep == NULL)
+		return;
+
+	memset(&fence_rep, 0, sizeof(fence_rep));
+
+	fence_rep.error = ret;
+	if (ret == 0) {
+		BUG_ON(fence == NULL);
+
+		fence_rep.handle = fence_handle;
+		fence_rep.seqno = fence->seqno;
+		vmw_update_seqno(dev_priv, &dev_priv->fifo);
+		fence_rep.passed_seqno = dev_priv->last_read_seqno;
+	}
 
 	/*
-	 * This will allow us to extend the ioctl argument while
-	 * maintaining backwards compatibility:
-	 * We take different code paths depending on the value of
-	 * arg->version.
+	 * copy_to_user errors will be detected by user space not
+	 * seeing fence_rep::error filled in. Typically
+	 * user-space would have pre-set that member to -EFAULT.
 	 */
+	ret = copy_to_user(user_fence_rep, &fence_rep,
+			   sizeof(fence_rep));
 
-	if (unlikely(arg->version != DRM_VMW_EXECBUF_VERSION)) {
-		DRM_ERROR("Incorrect execbuf version.\n");
-		DRM_ERROR("You're running outdated experimental "
-			  "vmwgfx user-space drivers.");
-		return -EINVAL;
+	/*
+	 * User-space lost the fence object. We need to sync
+	 * and unreference the handle.
+	 */
+	if (unlikely(ret != 0) && (fence_rep.error == 0)) {
+		ttm_ref_object_base_unref(vmw_fp->tfile,
+					  fence_handle, TTM_REF_USAGE);
+		DRM_ERROR("Fence copy error. Syncing.\n");
+		(void) vmw_fence_obj_wait(fence, fence->signal_mask,
+					  false, false,
+					  VMW_FENCE_WAIT_TIMEOUT);
 	}
+}
 
-	ret = ttm_read_lock(&vmaster->lock, true);
-	if (unlikely(ret != 0))
-		return ret;
+int vmw_execbuf_process(struct drm_file *file_priv,
+			struct vmw_private *dev_priv,
+			void __user *user_commands,
+			void *kernel_commands,
+			uint32_t command_size,
+			uint64_t throttle_us,
+			struct drm_vmw_fence_rep __user *user_fence_rep)
+{
+	struct vmw_sw_context *sw_context = &dev_priv->ctx;
+	struct vmw_fence_obj *fence;
+	uint32_t handle;
+	void *cmd;
+	int ret;
 
 	ret = mutex_lock_interruptible(&dev_priv->cmdbuf_mutex);
-	if (unlikely(ret != 0)) {
-		ret = -ERESTARTSYS;
-		goto out_no_cmd_mutex;
-	}
-
-	ret = vmw_resize_cmd_bounce(sw_context, arg->command_size);
 	if (unlikely(ret != 0))
-		goto out_unlock;
+		return -ERESTARTSYS;
 
-	user_cmd = (void __user *)(unsigned long)arg->commands;
-	ret = copy_from_user(sw_context->cmd_bounce,
-			     user_cmd, arg->command_size);
+	if (kernel_commands == NULL) {
+		sw_context->kernel = false;
 
-	if (unlikely(ret != 0)) {
-		ret = -EFAULT;
-		DRM_ERROR("Failed copying commands.\n");
-		goto out_unlock;
-	}
+		ret = vmw_resize_cmd_bounce(sw_context, command_size);
+		if (unlikely(ret != 0))
+			goto out_unlock;
+
+
+		ret = copy_from_user(sw_context->cmd_bounce,
+				     user_commands, command_size);
+
+		if (unlikely(ret != 0)) {
+			ret = -EFAULT;
+			DRM_ERROR("Failed copying commands.\n");
+			goto out_unlock;
+		}
+		kernel_commands = sw_context->cmd_bounce;
+	} else
+		sw_context->kernel = true;
 
 	sw_context->tfile = vmw_fpriv(file_priv)->tfile;
 	sw_context->cid_valid = false;
 	sw_context->sid_valid = false;
 	sw_context->cur_reloc = 0;
 	sw_context->cur_val_buf = 0;
-	sw_context->num_ref_resources = 0;
+	sw_context->fence_flags = 0;
+	INIT_LIST_HEAD(&sw_context->query_list);
+	INIT_LIST_HEAD(&sw_context->resource_list);
+	sw_context->cur_query_bo = dev_priv->pinned_bo;
+	sw_context->cur_query_cid = dev_priv->query_cid;
+	sw_context->query_cid_valid = (dev_priv->pinned_bo != NULL);
 
 	INIT_LIST_HEAD(&sw_context->validate_nodes);
 
-	ret = vmw_cmd_check_all(dev_priv, sw_context, arg->command_size);
+	ret = vmw_cmd_check_all(dev_priv, sw_context, kernel_commands,
+				command_size);
 	if (unlikely(ret != 0))
 		goto out_err;
 
@@ -795,26 +1171,25 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 
 	vmw_apply_relocations(sw_context);
 
-	if (arg->throttle_us) {
+	if (throttle_us) {
 		ret = vmw_wait_lag(dev_priv, &dev_priv->fifo.marker_queue,
-				   arg->throttle_us);
+				   throttle_us);
 
 		if (unlikely(ret != 0))
 			goto out_throttle;
 	}
 
-	cmd = vmw_fifo_reserve(dev_priv, arg->command_size);
+	cmd = vmw_fifo_reserve(dev_priv, command_size);
 	if (unlikely(cmd == NULL)) {
 		DRM_ERROR("Failed reserving fifo space for commands.\n");
 		ret = -ENOMEM;
-		goto out_err;
+		goto out_throttle;
 	}
 
-	memcpy(cmd, sw_context->cmd_bounce, arg->command_size);
-	vmw_fifo_commit(dev_priv, arg->command_size);
+	memcpy(cmd, kernel_commands, command_size);
+	vmw_fifo_commit(dev_priv, command_size);
 
-	user_fence_rep = (struct drm_vmw_fence_rep __user *)
-		(unsigned long)arg->fence_rep;
+	vmw_query_bo_switch_commit(dev_priv, sw_context);
 	ret = vmw_execbuf_fence_commands(file_priv, dev_priv,
 					 &fence,
 					 (user_fence_rep) ? &handle : NULL);
@@ -831,54 +1206,171 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 				    (void *) fence);
 
 	vmw_clear_validations(sw_context);
-	mutex_unlock(&dev_priv->cmdbuf_mutex);
-
-	if (user_fence_rep) {
-		fence_rep.error = ret;
-		fence_rep.handle = handle;
-		fence_rep.seqno = fence->seqno;
-		vmw_update_seqno(dev_priv, &dev_priv->fifo);
-		fence_rep.passed_seqno = dev_priv->last_read_seqno;
-
-		/*
-		 * copy_to_user errors will be detected by user space not
-		 * seeing fence_rep::error filled in. Typically
-		 * user-space would have pre-set that member to -EFAULT.
-		 */
-		ret = copy_to_user(user_fence_rep, &fence_rep,
-				   sizeof(fence_rep));
-
-		/*
-		 * User-space lost the fence object. We need to sync
-		 * and unreference the handle.
-		 */
-		if (unlikely(ret != 0) && (fence_rep.error == 0)) {
-			BUG_ON(fence == NULL);
-
-			ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
-						  handle, TTM_REF_USAGE);
-			DRM_ERROR("Fence copy error. Syncing.\n");
-			(void) vmw_fence_obj_wait(fence,
-						  fence->signal_mask,
-						  false, false,
-						  VMW_FENCE_WAIT_TIMEOUT);
-		}
-	}
+	vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret,
+				    user_fence_rep, fence, handle);
 
 	if (likely(fence != NULL))
 		vmw_fence_obj_unreference(&fence);
 
-	vmw_kms_cursor_post_execbuf(dev_priv);
-	ttm_read_unlock(&vmaster->lock);
+	mutex_unlock(&dev_priv->cmdbuf_mutex);
 	return 0;
+
 out_err:
 	vmw_free_relocations(sw_context);
 out_throttle:
+	vmw_query_switch_backoff(sw_context);
 	ttm_eu_backoff_reservation(&sw_context->validate_nodes);
 	vmw_clear_validations(sw_context);
 out_unlock:
 	mutex_unlock(&dev_priv->cmdbuf_mutex);
-out_no_cmd_mutex:
+	return ret;
+}
+
+/**
+ * vmw_execbuf_unpin_panic - Idle the fifo and unpin the query buffer.
+ *
+ * @dev_priv: The device private structure.
+ *
+ * This function is called to idle the fifo and unpin the query buffer
+ * if the normal way to do this hits an error, which should typically be
+ * extremely rare.
+ */
+static void vmw_execbuf_unpin_panic(struct vmw_private *dev_priv)
+{
+	DRM_ERROR("Can't unpin query buffer. Trying to recover.\n");
+
+	(void) vmw_fallback_wait(dev_priv, false, true, 0, false, 10*HZ);
+	vmw_bo_pin(dev_priv->pinned_bo, false);
+	vmw_bo_pin(dev_priv->dummy_query_bo, false);
+	dev_priv->dummy_query_bo_pinned = false;
+}
+
+
+/**
+ * vmw_execbuf_release_pinned_bo - Flush queries and unpin the pinned
+ * query bo.
+ *
+ * @dev_priv: The device private structure.
+ * @only_on_cid_match: Only flush and unpin if the current active query cid
+ * matches @cid.
+ * @cid: Optional context id to match.
+ *
+ * This function should be used to unpin the pinned query bo, or
+ * as a query barrier when we need to make sure that all queries have
+ * finished before the next fifo command. (For example on hardware
+ * context destructions where the hardware may otherwise leak unfinished
+ * queries).
+ *
+ * This function does not return any failure codes, but make attempts
+ * to do safe unpinning in case of errors.
+ *
+ * The function will synchronize on the previous query barrier, and will
+ * thus not finish until that barrier has executed.
+ */
+void vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
+				   bool only_on_cid_match, uint32_t cid)
+{
+	int ret = 0;
+	struct list_head validate_list;
+	struct ttm_validate_buffer pinned_val, query_val;
+	struct vmw_fence_obj *fence;
+
+	mutex_lock(&dev_priv->cmdbuf_mutex);
+
+	if (dev_priv->pinned_bo == NULL)
+		goto out_unlock;
+
+	if (only_on_cid_match && cid != dev_priv->query_cid)
+		goto out_unlock;
+
+	INIT_LIST_HEAD(&validate_list);
+
+	pinned_val.new_sync_obj_arg = (void *)(unsigned long)
+		DRM_VMW_FENCE_FLAG_EXEC;
+	pinned_val.bo = ttm_bo_reference(dev_priv->pinned_bo);
+	list_add_tail(&pinned_val.head, &validate_list);
+
+	query_val.new_sync_obj_arg = pinned_val.new_sync_obj_arg;
+	query_val.bo = ttm_bo_reference(dev_priv->dummy_query_bo);
+	list_add_tail(&query_val.head, &validate_list);
+
+	do {
+		ret = ttm_eu_reserve_buffers(&validate_list);
+	} while (ret == -ERESTARTSYS);
+
+	if (unlikely(ret != 0)) {
+		vmw_execbuf_unpin_panic(dev_priv);
+		goto out_no_reserve;
+	}
+
+	ret = vmw_fifo_emit_dummy_query(dev_priv, dev_priv->query_cid);
+	if (unlikely(ret != 0)) {
+		vmw_execbuf_unpin_panic(dev_priv);
+		goto out_no_emit;
+	}
+
+	vmw_bo_pin(dev_priv->pinned_bo, false);
+	vmw_bo_pin(dev_priv->dummy_query_bo, false);
+	dev_priv->dummy_query_bo_pinned = false;
+
+	(void) vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL);
+	ttm_eu_fence_buffer_objects(&validate_list, (void *) fence);
+
+	ttm_bo_unref(&query_val.bo);
+	ttm_bo_unref(&pinned_val.bo);
+	ttm_bo_unref(&dev_priv->pinned_bo);
+
+out_unlock:
+	mutex_unlock(&dev_priv->cmdbuf_mutex);
+	return;
+
+out_no_emit:
+	ttm_eu_backoff_reservation(&validate_list);
+out_no_reserve:
+	ttm_bo_unref(&query_val.bo);
+	ttm_bo_unref(&pinned_val.bo);
+	ttm_bo_unref(&dev_priv->pinned_bo);
+	mutex_unlock(&dev_priv->cmdbuf_mutex);
+}
+
+
+int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_vmw_execbuf_arg *arg = (struct drm_vmw_execbuf_arg *)data;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	int ret;
+
+	/*
+	 * This will allow us to extend the ioctl argument while
+	 * maintaining backwards compatibility:
+	 * We take different code paths depending on the value of
+	 * arg->version.
+	 */
+
+	if (unlikely(arg->version != DRM_VMW_EXECBUF_VERSION)) {
+		DRM_ERROR("Incorrect execbuf version.\n");
+		DRM_ERROR("You're running outdated experimental "
+			  "vmwgfx user-space drivers.");
+		return -EINVAL;
+	}
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = vmw_execbuf_process(file_priv, dev_priv,
+				  (void __user *)(unsigned long)arg->commands,
+				  NULL, arg->command_size, arg->throttle_us,
+				  (void __user *)(unsigned long)arg->fence_rep);
+
+	if (unlikely(ret != 0))
+		goto out_unlock;
+
+	vmw_kms_cursor_post_execbuf(dev_priv);
+
+out_unlock:
 	ttm_read_unlock(&vmaster->lock);
 	return ret;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index b1888e801e22..070797b7b03a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -592,58 +592,6 @@ int vmw_fb_close(struct vmw_private *vmw_priv)
 	return 0;
 }
 
-int vmw_dmabuf_from_vram(struct vmw_private *vmw_priv,
-			 struct vmw_dma_buffer *vmw_bo)
-{
-	struct ttm_buffer_object *bo = &vmw_bo->base;
-	int ret = 0;
-
-	ret = ttm_bo_reserve(bo, false, false, false, 0);
-	if (unlikely(ret != 0))
-		return ret;
-
-	ret = ttm_bo_validate(bo, &vmw_sys_placement, false, false, false);
-	ttm_bo_unreserve(bo);
-
-	return ret;
-}
-
-int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
-				struct vmw_dma_buffer *vmw_bo)
-{
-	struct ttm_buffer_object *bo = &vmw_bo->base;
-	struct ttm_placement ne_placement = vmw_vram_ne_placement;
-	int ret = 0;
-
-	ne_placement.lpfn = bo->num_pages;
-
-	/* interuptable? */
-	ret = ttm_write_lock(&vmw_priv->active_master->lock, false);
-	if (unlikely(ret != 0))
-		return ret;
-
-	ret = ttm_bo_reserve(bo, false, false, false, 0);
-	if (unlikely(ret != 0))
-		goto err_unlock;
-
-	if (bo->mem.mem_type == TTM_PL_VRAM &&
-	    bo->mem.start < bo->num_pages &&
-	    bo->mem.start > 0)
-		(void) ttm_bo_validate(bo, &vmw_sys_placement, false,
-				       false, false);
-
-	ret = ttm_bo_validate(bo, &ne_placement, false, false, false);
-
-	/* Could probably bug on */
-	WARN_ON(bo->offset != 0);
-
-	ttm_bo_unreserve(bo);
-err_unlock:
-	ttm_write_unlock(&vmw_priv->active_master->lock);
-
-	return ret;
-}
-
 int vmw_fb_off(struct vmw_private *vmw_priv)
 {
 	struct fb_info *info;
@@ -665,7 +613,7 @@ int vmw_fb_off(struct vmw_private *vmw_priv)
 	par->bo_ptr = NULL;
 	ttm_bo_kunmap(&par->map);
 
-	vmw_dmabuf_from_vram(vmw_priv, par->vmw_bo);
+	vmw_dmabuf_unpin(vmw_priv, par->vmw_bo, false);
 
 	return 0;
 }
@@ -691,7 +639,7 @@ int vmw_fb_on(struct vmw_private *vmw_priv)
 	/* Make sure that all overlays are stoped when we take over */
 	vmw_overlay_stop_all(vmw_priv);
 
-	ret = vmw_dmabuf_to_start_of_vram(vmw_priv, par->vmw_bo);
+	ret = vmw_dmabuf_to_start_of_vram(vmw_priv, par->vmw_bo, true, false);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("could not move buffer to start of VRAM\n");
 		goto err_no_buffer;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 5065a140fdf8..15fb26088d68 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -34,13 +34,18 @@ struct vmw_fence_manager {
 	int num_fence_objects;
 	struct vmw_private *dev_priv;
 	spinlock_t lock;
-	u32 next_seqno;
 	struct list_head fence_list;
 	struct work_struct work;
 	u32 user_fence_size;
 	u32 fence_size;
+	u32 event_fence_action_size;
 	bool fifo_down;
 	struct list_head cleanup_list;
+	uint32_t pending_actions[VMW_ACTION_MAX];
+	struct mutex goal_irq_mutex;
+	bool goal_irq_on; /* Protected by @goal_irq_mutex */
+	bool seqno_valid; /* Protected by @lock, and may not be set to true
+			     without the @goal_irq_mutex held. */
 };
 
 struct vmw_user_fence {
@@ -49,8 +54,51 @@ struct vmw_user_fence {
 };
 
 /**
- * vmw_fence_destroy_locked
+ * struct vmw_event_fence_action - fence action that delivers a drm event.
  *
+ * @e: A struct drm_pending_event that controls the event delivery.
+ * @action: A struct vmw_fence_action to hook up to a fence.
+ * @fence: A referenced pointer to the fence to keep it alive while @action
+ * hangs on it.
+ * @dev: Pointer to a struct drm_device so we can access the event stuff.
+ * @kref: Both @e and @action has destructors, so we need to refcount.
+ * @size: Size accounted for this object.
+ * @tv_sec: If non-null, the variable pointed to will be assigned
+ * current time tv_sec val when the fence signals.
+ * @tv_usec: Must be set if @tv_sec is set, and the variable pointed to will
+ * be assigned the current time tv_usec val when the fence signals.
+ */
+struct vmw_event_fence_action {
+	struct drm_pending_event e;
+	struct vmw_fence_action action;
+	struct vmw_fence_obj *fence;
+	struct drm_device *dev;
+	struct kref kref;
+	uint32_t size;
+	uint32_t *tv_sec;
+	uint32_t *tv_usec;
+};
+
+/**
+ * Note on fencing subsystem usage of irqs:
+ * Typically the vmw_fences_update function is called
+ *
+ * a) When a new fence seqno has been submitted by the fifo code.
+ * b) On-demand when we have waiters. Sleeping waiters will switch on the
+ * ANY_FENCE irq and call vmw_fences_update function each time an ANY_FENCE
+ * irq is received. When the last fence waiter is gone, that IRQ is masked
+ * away.
+ *
+ * In situations where there are no waiters and we don't submit any new fences,
+ * fence objects may not be signaled. This is perfectly OK, since there are
+ * no consumers of the signaled data, but that is NOT ok when there are fence
+ * actions attached to a fence. The fencing subsystem then makes use of the
+ * FENCE_GOAL irq and sets the fence goal seqno to that of the next fence
+ * which has an action attached, and each time vmw_fences_update is called,
+ * the subsystem makes sure the fence goal seqno is updated.
+ *
+ * The fence goal seqno irq is on as long as there are unsignaled fence
+ * objects with actions attached to them.
  */
 
 static void vmw_fence_obj_destroy_locked(struct kref *kref)
@@ -85,24 +133,36 @@ static void vmw_fence_work_func(struct work_struct *work)
 		container_of(work, struct vmw_fence_manager, work);
 	struct list_head list;
 	struct vmw_fence_action *action, *next_action;
+	bool seqno_valid;
 
 	do {
 		INIT_LIST_HEAD(&list);
+		mutex_lock(&fman->goal_irq_mutex);
+
 		spin_lock_irq(&fman->lock);
 		list_splice_init(&fman->cleanup_list, &list);
+		seqno_valid = fman->seqno_valid;
 		spin_unlock_irq(&fman->lock);
 
+		if (!seqno_valid && fman->goal_irq_on) {
+			fman->goal_irq_on = false;
+			vmw_goal_waiter_remove(fman->dev_priv);
+		}
+		mutex_unlock(&fman->goal_irq_mutex);
+
 		if (list_empty(&list))
 			return;
 
 		/*
 		 * At this point, only we should be able to manipulate the
 		 * list heads of the actions we have on the private list.
+		 * hence fman::lock not held.
 		 */
 
 		list_for_each_entry_safe(action, next_action, &list, head) {
 			list_del_init(&action->head);
-			action->cleanup(action);
+			if (action->cleanup)
+				action->cleanup(action);
 		}
 	} while (1);
 }
@@ -122,6 +182,9 @@ struct vmw_fence_manager *vmw_fence_manager_init(struct vmw_private *dev_priv)
 	fman->fifo_down = true;
 	fman->user_fence_size = ttm_round_pot(sizeof(struct vmw_user_fence));
 	fman->fence_size = ttm_round_pot(sizeof(struct vmw_fence_obj));
+	fman->event_fence_action_size =
+		ttm_round_pot(sizeof(struct vmw_event_fence_action));
+	mutex_init(&fman->goal_irq_mutex);
 
 	return fman;
 }
@@ -177,6 +240,9 @@ out_unlock:
 
 struct vmw_fence_obj *vmw_fence_obj_reference(struct vmw_fence_obj *fence)
 {
+	if (unlikely(fence == NULL))
+		return NULL;
+
 	kref_get(&fence->kref);
 	return fence;
 }
@@ -191,8 +257,12 @@ struct vmw_fence_obj *vmw_fence_obj_reference(struct vmw_fence_obj *fence)
 void vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p)
 {
 	struct vmw_fence_obj *fence = *fence_p;
-	struct vmw_fence_manager *fman = fence->fman;
+	struct vmw_fence_manager *fman;
 
+	if (unlikely(fence == NULL))
+		return;
+
+	fman = fence->fman;
 	*fence_p = NULL;
 	spin_lock_irq(&fman->lock);
 	BUG_ON(atomic_read(&fence->kref.refcount) == 0);
@@ -207,6 +277,7 @@ void vmw_fences_perform_actions(struct vmw_fence_manager *fman,
 
 	list_for_each_entry_safe(action, next_action, list, head) {
 		list_del_init(&action->head);
+		fman->pending_actions[action->type]--;
 		if (action->seq_passed != NULL)
 			action->seq_passed(action);
 
@@ -215,17 +286,101 @@ void vmw_fences_perform_actions(struct vmw_fence_manager *fman,
 		 * it will be performed by a worker task.
 		 */
 
-		if (action->cleanup != NULL)
-			list_add_tail(&action->head, &fman->cleanup_list);
+		list_add_tail(&action->head, &fman->cleanup_list);
 	}
 }
 
-void vmw_fences_update(struct vmw_fence_manager *fman, u32 seqno)
+/**
+ * vmw_fence_goal_new_locked - Figure out a new device fence goal
+ * seqno if needed.
+ *
+ * @fman: Pointer to a fence manager.
+ * @passed_seqno: The seqno the device currently signals as passed.
+ *
+ * This function should be called with the fence manager lock held.
+ * It is typically called when we have a new passed_seqno, and
+ * we might need to update the fence goal. It checks to see whether
+ * the current fence goal has already passed, and, in that case,
+ * scans through all unsignaled fences to get the next fence object with an
+ * action attached, and sets the seqno of that fence as a new fence goal.
+ *
+ * returns true if the device goal seqno was updated. False otherwise.
+ */
+static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
+				      u32 passed_seqno)
+{
+	u32 goal_seqno;
+	__le32 __iomem *fifo_mem;
+	struct vmw_fence_obj *fence;
+
+	if (likely(!fman->seqno_valid))
+		return false;
+
+	fifo_mem = fman->dev_priv->mmio_virt;
+	goal_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE_GOAL);
+	if (likely(passed_seqno - goal_seqno >= VMW_FENCE_WRAP))
+		return false;
+
+	fman->seqno_valid = false;
+	list_for_each_entry(fence, &fman->fence_list, head) {
+		if (!list_empty(&fence->seq_passed_actions)) {
+			fman->seqno_valid = true;
+			iowrite32(fence->seqno,
+				  fifo_mem + SVGA_FIFO_FENCE_GOAL);
+			break;
+		}
+	}
+
+	return true;
+}
+
+
+/**
+ * vmw_fence_goal_check_locked - Replace the device fence goal seqno if
+ * needed.
+ *
+ * @fence: Pointer to a struct vmw_fence_obj the seqno of which should be
+ * considered as a device fence goal.
+ *
+ * This function should be called with the fence manager lock held.
+ * It is typically called when an action has been attached to a fence to
+ * check whether the seqno of that fence should be used for a fence
+ * goal interrupt. This is typically needed if the current fence goal is
+ * invalid, or has a higher seqno than that of the current fence object.
+ *
+ * returns true if the device goal seqno was updated. False otherwise.
+ */
+static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence)
+{
+	u32 goal_seqno;
+	__le32 __iomem *fifo_mem;
+
+	if (fence->signaled & DRM_VMW_FENCE_FLAG_EXEC)
+		return false;
+
+	fifo_mem = fence->fman->dev_priv->mmio_virt;
+	goal_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE_GOAL);
+	if (likely(fence->fman->seqno_valid &&
+		   goal_seqno - fence->seqno < VMW_FENCE_WRAP))
+		return false;
+
+	iowrite32(fence->seqno, fifo_mem + SVGA_FIFO_FENCE_GOAL);
+	fence->fman->seqno_valid = true;
+
+	return true;
+}
+
+void vmw_fences_update(struct vmw_fence_manager *fman)
 {
 	unsigned long flags;
 	struct vmw_fence_obj *fence, *next_fence;
 	struct list_head action_list;
+	bool needs_rerun;
+	uint32_t seqno, new_seqno;
+	__le32 __iomem *fifo_mem = fman->dev_priv->mmio_virt;
 
+	seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+rerun:
 	spin_lock_irqsave(&fman->lock, flags);
 	list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) {
 		if (seqno - fence->seqno < VMW_FENCE_WRAP) {
@@ -236,14 +391,30 @@ void vmw_fences_update(struct vmw_fence_manager *fman, u32 seqno)
 					 &action_list);
 			vmw_fences_perform_actions(fman, &action_list);
 			wake_up_all(&fence->queue);
-		}
-
+		} else
+			break;
 	}
+
+	needs_rerun = vmw_fence_goal_new_locked(fman, seqno);
+
 	if (!list_empty(&fman->cleanup_list))
 		(void) schedule_work(&fman->work);
 	spin_unlock_irqrestore(&fman->lock, flags);
-}
 
+	/*
+	 * Rerun if the fence goal seqno was updated, and the
+	 * hardware might have raced with that update, so that
+	 * we missed a fence_goal irq.
+	 */
+
+	if (unlikely(needs_rerun)) {
+		new_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+		if (new_seqno != seqno) {
+			seqno = new_seqno;
+			goto rerun;
+		}
+	}
+}
 
 bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
 			    uint32_t flags)
@@ -260,14 +431,8 @@ bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
 	if ((signaled & flags) == flags)
 		return 1;
 
-	if ((signaled & DRM_VMW_FENCE_FLAG_EXEC) == 0) {
-		struct vmw_private *dev_priv = fman->dev_priv;
-		__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
-		u32 seqno;
-
-		seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
-		vmw_fences_update(fman, seqno);
-	}
+	if ((signaled & DRM_VMW_FENCE_FLAG_EXEC) == 0)
+		vmw_fences_update(fman);
 
 	spin_lock_irqsave(&fman->lock, irq_flags);
 	signaled = fence->signaled;
@@ -617,3 +782,343 @@ int vmw_fence_obj_unref_ioctl(struct drm_device *dev, void *data,
 					 arg->handle,
 					 TTM_REF_USAGE);
 }
+
+/**
+ * vmw_event_fence_action_destroy
+ *
+ * @kref: The struct kref embedded in a struct vmw_event_fence_action.
+ *
+ * The vmw_event_fence_action destructor that may be called either after
+ * the fence action cleanup, or when the event is delivered.
+ * It frees both the vmw_event_fence_action struct and the actual
+ * event structure copied to user-space.
+ */
+static void vmw_event_fence_action_destroy(struct kref *kref)
+{
+	struct vmw_event_fence_action *eaction =
+		container_of(kref, struct vmw_event_fence_action, kref);
+	struct ttm_mem_global *mem_glob =
+		vmw_mem_glob(vmw_priv(eaction->dev));
+	uint32_t size = eaction->size;
+
+	kfree(eaction->e.event);
+	kfree(eaction);
+	ttm_mem_global_free(mem_glob, size);
+}
+
+
+/**
+ * vmw_event_fence_action_delivered
+ *
+ * @e: The struct drm_pending_event embedded in a struct
+ * vmw_event_fence_action.
+ *
+ * The struct drm_pending_event destructor that is called by drm
+ * once the event is delivered. Since we don't know whether this function
+ * will be called before or after the fence action destructor, we
+ * free a refcount and destroy if it becomes zero.
+ */
+static void vmw_event_fence_action_delivered(struct drm_pending_event *e)
+{
+	struct vmw_event_fence_action *eaction =
+		container_of(e, struct vmw_event_fence_action, e);
+
+	kref_put(&eaction->kref, vmw_event_fence_action_destroy);
+}
+
+
+/**
+ * vmw_event_fence_action_seq_passed
+ *
+ * @action: The struct vmw_fence_action embedded in a struct
+ * vmw_event_fence_action.
+ *
+ * This function is called when the seqno of the fence where @action is
+ * attached has passed. It queues the event on the submitter's event list.
+ * This function is always called from atomic context, and may be called
+ * from irq context. It ups a refcount reflecting that we now have two
+ * destructors.
+ */
+static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action)
+{
+	struct vmw_event_fence_action *eaction =
+		container_of(action, struct vmw_event_fence_action, action);
+	struct drm_device *dev = eaction->dev;
+	struct drm_file *file_priv = eaction->e.file_priv;
+	unsigned long irq_flags;
+
+	kref_get(&eaction->kref);
+	spin_lock_irqsave(&dev->event_lock, irq_flags);
+
+	if (likely(eaction->tv_sec != NULL)) {
+		struct timeval tv;
+
+		do_gettimeofday(&tv);
+		*eaction->tv_sec = tv.tv_sec;
+		*eaction->tv_usec = tv.tv_usec;
+	}
+
+	list_add_tail(&eaction->e.link, &file_priv->event_list);
+	wake_up_all(&file_priv->event_wait);
+	spin_unlock_irqrestore(&dev->event_lock, irq_flags);
+}
+
+/**
+ * vmw_event_fence_action_cleanup
+ *
+ * @action: The struct vmw_fence_action embedded in a struct
+ * vmw_event_fence_action.
+ *
+ * This function is the struct vmw_fence_action destructor. It's typically
+ * called from a workqueue.
+ */
+static void vmw_event_fence_action_cleanup(struct vmw_fence_action *action)
+{
+	struct vmw_event_fence_action *eaction =
+		container_of(action, struct vmw_event_fence_action, action);
+
+	vmw_fence_obj_unreference(&eaction->fence);
+	kref_put(&eaction->kref, vmw_event_fence_action_destroy);
+}
+
+
+/**
+ * vmw_fence_obj_add_action - Add an action to a fence object.
+ *
+ * @fence - The fence object.
+ * @action - The action to add.
+ *
+ * Note that the action callbacks may be executed before this function
+ * returns.
+ */
+void vmw_fence_obj_add_action(struct vmw_fence_obj *fence,
+			      struct vmw_fence_action *action)
+{
+	struct vmw_fence_manager *fman = fence->fman;
+	unsigned long irq_flags;
+	bool run_update = false;
+
+	mutex_lock(&fman->goal_irq_mutex);
+	spin_lock_irqsave(&fman->lock, irq_flags);
+
+	fman->pending_actions[action->type]++;
+	if (fence->signaled & DRM_VMW_FENCE_FLAG_EXEC) {
+		struct list_head action_list;
+
+		INIT_LIST_HEAD(&action_list);
+		list_add_tail(&action->head, &action_list);
+		vmw_fences_perform_actions(fman, &action_list);
+	} else {
+		list_add_tail(&action->head, &fence->seq_passed_actions);
+
+		/*
+		 * This function may set fman::seqno_valid, so it must
+		 * be run with the goal_irq_mutex held.
+		 */
+		run_update = vmw_fence_goal_check_locked(fence);
+	}
+
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+
+	if (run_update) {
+		if (!fman->goal_irq_on) {
+			fman->goal_irq_on = true;
+			vmw_goal_waiter_add(fman->dev_priv);
+		}
+		vmw_fences_update(fman);
+	}
+	mutex_unlock(&fman->goal_irq_mutex);
+
+}
+
+/**
+ * vmw_event_fence_action_create - Post an event for sending when a fence
+ * object seqno has passed.
+ *
+ * @file_priv: The file connection on which the event should be posted.
+ * @fence: The fence object on which to post the event.
+ * @event: Event to be posted. This event should've been alloced
+ * using k[mz]alloc, and should've been completely initialized.
+ * @interruptible: Interruptible waits if possible.
+ *
+ * As a side effect, the object pointed to by @event may have been
+ * freed when this function returns. If this function returns with
+ * an error code, the caller needs to free that object.
+ */
+
+int vmw_event_fence_action_create(struct drm_file *file_priv,
+				  struct vmw_fence_obj *fence,
+				  struct drm_event *event,
+				  uint32_t *tv_sec,
+				  uint32_t *tv_usec,
+				  bool interruptible)
+{
+	struct vmw_event_fence_action *eaction;
+	struct ttm_mem_global *mem_glob =
+		vmw_mem_glob(fence->fman->dev_priv);
+	struct vmw_fence_manager *fman = fence->fman;
+	uint32_t size = fman->event_fence_action_size +
+		ttm_round_pot(event->length);
+	int ret;
+
+	/*
+	 * Account for internal structure size as well as the
+	 * event size itself.
+	 */
+
+	ret = ttm_mem_global_alloc(mem_glob, size, false, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	eaction = kzalloc(sizeof(*eaction), GFP_KERNEL);
+	if (unlikely(eaction == NULL)) {
+		ttm_mem_global_free(mem_glob, size);
+		return -ENOMEM;
+	}
+
+	eaction->e.event = event;
+	eaction->e.file_priv = file_priv;
+	eaction->e.destroy = vmw_event_fence_action_delivered;
+
+	eaction->action.seq_passed = vmw_event_fence_action_seq_passed;
+	eaction->action.cleanup = vmw_event_fence_action_cleanup;
+	eaction->action.type = VMW_ACTION_EVENT;
+
+	eaction->fence = vmw_fence_obj_reference(fence);
+	eaction->dev = fman->dev_priv->dev;
+	eaction->size = size;
+	eaction->tv_sec = tv_sec;
+	eaction->tv_usec = tv_usec;
+
+	kref_init(&eaction->kref);
+	vmw_fence_obj_add_action(fence, &eaction->action);
+
+	return 0;
+}
+
+int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_vmw_fence_event_arg *arg =
+		(struct drm_vmw_fence_event_arg *) data;
+	struct vmw_fence_obj *fence = NULL;
+	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
+	struct drm_vmw_fence_rep __user *user_fence_rep =
+		(struct drm_vmw_fence_rep __user *)(unsigned long)
+		arg->fence_rep;
+	uint32_t handle;
+	unsigned long irq_flags;
+	struct drm_vmw_event_fence *event;
+	int ret;
+
+	/*
+	 * Look up an existing fence object,
+	 * and if user-space wants a new reference,
+	 * add one.
+	 */
+	if (arg->handle) {
+		struct ttm_base_object *base =
+			ttm_base_object_lookup(vmw_fp->tfile, arg->handle);
+
+		if (unlikely(base == NULL)) {
+			DRM_ERROR("Fence event invalid fence object handle "
+				  "0x%08lx.\n",
+				  (unsigned long)arg->handle);
+			return -EINVAL;
+		}
+		fence = &(container_of(base, struct vmw_user_fence,
+				       base)->fence);
+		(void) vmw_fence_obj_reference(fence);
+
+		if (user_fence_rep != NULL) {
+			bool existed;
+
+			ret = ttm_ref_object_add(vmw_fp->tfile, base,
+						 TTM_REF_USAGE, &existed);
+			if (unlikely(ret != 0)) {
+				DRM_ERROR("Failed to reference a fence "
+					  "object.\n");
+				goto out_no_ref_obj;
+			}
+			handle = base->hash.key;
+		}
+		ttm_base_object_unref(&base);
+	}
+
+	/*
+	 * Create a new fence object.
+	 */
+	if (!fence) {
+		ret = vmw_execbuf_fence_commands(file_priv, dev_priv,
+						 &fence,
+						 (user_fence_rep) ?
+						 &handle : NULL);
+		if (unlikely(ret != 0)) {
+			DRM_ERROR("Fence event failed to create fence.\n");
+			return ret;
+		}
+	}
+
+	BUG_ON(fence == NULL);
+
+	spin_lock_irqsave(&dev->event_lock, irq_flags);
+
+	ret = (file_priv->event_space < sizeof(*event)) ? -EBUSY : 0;
+	if (likely(ret == 0))
+		file_priv->event_space -= sizeof(*event);
+
+	spin_unlock_irqrestore(&dev->event_lock, irq_flags);
+
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Failed to allocate event space for this file.\n");
+		goto out_no_event_space;
+	}
+
+	event = kzalloc(sizeof(*event), GFP_KERNEL);
+	if (unlikely(event == NULL)) {
+		DRM_ERROR("Failed to allocate an event.\n");
+		goto out_no_event;
+	}
+
+	event->base.type = DRM_VMW_EVENT_FENCE_SIGNALED;
+	event->base.length = sizeof(*event);
+	event->user_data = arg->user_data;
+
+	if (arg->flags & DRM_VMW_FE_FLAG_REQ_TIME)
+		ret = vmw_event_fence_action_create(file_priv, fence,
+						    &event->base,
+						    &event->tv_sec,
+						    &event->tv_usec,
+						    true);
+	else
+		ret = vmw_event_fence_action_create(file_priv, fence,
+						    &event->base,
+						    NULL,
+						    NULL,
+						    true);
+
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Failed to attach event to fence.\n");
+		goto out_no_attach;
+	}
+
+	vmw_execbuf_copy_fence_user(dev_priv, vmw_fp, 0, user_fence_rep, fence,
+				    handle);
+	vmw_fence_obj_unreference(&fence);
+	return 0;
+out_no_attach:
+	kfree(event);
+out_no_event:
+	spin_lock_irqsave(&dev->event_lock, irq_flags);
+	file_priv->event_space += sizeof(*event);
+	spin_unlock_irqrestore(&dev->event_lock, irq_flags);
+out_no_event_space:
+	if (user_fence_rep != NULL)
+		ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
+					  handle, TTM_REF_USAGE);
+out_no_ref_obj:
+	vmw_fence_obj_unreference(&fence);
+	return ret;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
index 93074064aaf3..0854a2096b55 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
@@ -37,8 +37,14 @@ struct vmw_fence_manager;
  *
  *
  */
+enum vmw_action_type {
+	VMW_ACTION_EVENT = 0,
+	VMW_ACTION_MAX
+};
+
 struct vmw_fence_action {
 	struct list_head head;
+	enum vmw_action_type type;
 	void (*seq_passed) (struct vmw_fence_action *action);
 	void (*cleanup) (struct vmw_fence_action *action);
 };
@@ -66,8 +72,7 @@ extern void vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p);
 extern struct vmw_fence_obj *
 vmw_fence_obj_reference(struct vmw_fence_obj *fence);
 
-extern void vmw_fences_update(struct vmw_fence_manager *fman,
-			      u32 sequence);
+extern void vmw_fences_update(struct vmw_fence_manager *fman);
 
 extern bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
 				   uint32_t flags);
@@ -102,4 +107,7 @@ extern int vmw_fence_obj_signaled_ioctl(struct drm_device *dev, void *data,
 
 extern int vmw_fence_obj_unref_ioctl(struct drm_device *dev, void *data,
 				     struct drm_file *file_priv);
+extern int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv);
+
 #endif /* _VMWGFX_FENCE_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 3ba9cac579e0..03bbc2a6f9a7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -45,7 +45,11 @@ bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
 	if (hwversion == 0)
 		return false;
 
-	if (hwversion < SVGA3D_HWVERSION_WS65_B1)
+	if (hwversion < SVGA3D_HWVERSION_WS8_B1)
+		return false;
+
+	/* Non-Screen Object path does not support surfaces */
+	if (!dev_priv->sou_priv)
 		return false;
 
 	return true;
@@ -243,9 +247,8 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
 		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
 		outl(SVGA_IRQFLAG_FIFO_PROGRESS,
 		     dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
-		vmw_write(dev_priv, SVGA_REG_IRQMASK,
-			  vmw_read(dev_priv, SVGA_REG_IRQMASK) |
-			  SVGA_IRQFLAG_FIFO_PROGRESS);
+		dev_priv->irq_mask |= SVGA_IRQFLAG_FIFO_PROGRESS;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
 	mutex_unlock(&dev_priv->hw_mutex);
@@ -267,9 +270,8 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
 	mutex_lock(&dev_priv->hw_mutex);
 	if (atomic_dec_and_test(&dev_priv->fifo_queue_waiters)) {
 		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
-		vmw_write(dev_priv, SVGA_REG_IRQMASK,
-			  vmw_read(dev_priv, SVGA_REG_IRQMASK) &
-			  ~SVGA_IRQFLAG_FIFO_PROGRESS);
+		dev_priv->irq_mask &= ~SVGA_IRQFLAG_FIFO_PROGRESS;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
 	mutex_unlock(&dev_priv->hw_mutex);
@@ -277,6 +279,16 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
 	return ret;
 }
 
+/**
+ * Reserve @bytes number of bytes in the fifo.
+ *
+ * This function will return NULL (error) on two conditions:
+ *  If it timeouts waiting for fifo space, or if @bytes is larger than the
+ *   available fifo space.
+ *
+ * Returns:
+ *   Pointer to the fifo, or null on error (possible hardware hang).
+ */
 void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes)
 {
 	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
@@ -491,3 +503,60 @@ int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *seqno)
 out_err:
 	return ret;
 }
+
+/**
+ * vmw_fifo_emit_dummy_query - emits a dummy query to the fifo.
+ *
+ * @dev_priv: The device private structure.
+ * @cid: The hardware context id used for the query.
+ *
+ * This function is used to emit a dummy occlusion query with
+ * no primitives rendered between query begin and query end.
+ * It's used to provide a query barrier, in order to know that when
+ * this query is finished, all preceding queries are also finished.
+ *
+ * A Query results structure should have been initialized at the start
+ * of the dev_priv->dummy_query_bo buffer object. And that buffer object
+ * must also be either reserved or pinned when this function is called.
+ *
+ * Returns -ENOMEM on failure to reserve fifo space.
+ */
+int vmw_fifo_emit_dummy_query(struct vmw_private *dev_priv,
+			      uint32_t cid)
+{
+	/*
+	 * A query wait without a preceding query end will
+	 * actually finish all queries for this cid
+	 * without writing to the query result structure.
+	 */
+
+	struct ttm_buffer_object *bo = dev_priv->dummy_query_bo;
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdWaitForQuery body;
+	} *cmd;
+
+	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
+
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Out of fifo space for dummy query.\n");
+		return -ENOMEM;
+	}
+
+	cmd->header.id = SVGA_3D_CMD_WAIT_FOR_QUERY;
+	cmd->header.size = sizeof(cmd->body);
+	cmd->body.cid = cid;
+	cmd->body.type = SVGA3D_QUERYTYPE_OCCLUSION;
+
+	if (bo->mem.mem_type == TTM_PL_VRAM) {
+		cmd->body.guestResult.gmrId = SVGA_GMR_FRAMEBUFFER;
+		cmd->body.guestResult.offset = bo->offset;
+	} else {
+		cmd->body.guestResult.gmrId = bo->mem.start;
+		cmd->body.guestResult.offset = 0;
+	}
+
+	vmw_fifo_commit(dev_priv, sizeof(*cmd));
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 5ecf96660644..3f6343502d1f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -27,6 +27,7 @@
 
 #include "vmwgfx_drv.h"
 #include "vmwgfx_drm.h"
+#include "vmwgfx_kms.h"
 
 int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
@@ -110,3 +111,219 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 
 	return ret;
 }
+
+int vmw_present_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv)
+{
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_vmw_present_arg *arg =
+		(struct drm_vmw_present_arg *)data;
+	struct vmw_surface *surface;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	struct drm_vmw_rect __user *clips_ptr;
+	struct drm_vmw_rect *clips = NULL;
+	struct drm_mode_object *obj;
+	struct vmw_framebuffer *vfb;
+	uint32_t num_clips;
+	int ret;
+
+	num_clips = arg->num_clips;
+	clips_ptr = (struct drm_vmw_rect *)(unsigned long)arg->clips_ptr;
+
+	if (unlikely(num_clips == 0))
+		return 0;
+
+	if (clips_ptr == NULL) {
+		DRM_ERROR("Variable clips_ptr must be specified.\n");
+		ret = -EINVAL;
+		goto out_clips;
+	}
+
+	clips = kzalloc(num_clips * sizeof(*clips), GFP_KERNEL);
+	if (clips == NULL) {
+		DRM_ERROR("Failed to allocate clip rect list.\n");
+		ret = -ENOMEM;
+		goto out_clips;
+	}
+
+	ret = copy_from_user(clips, clips_ptr, num_clips * sizeof(*clips));
+	if (ret) {
+		DRM_ERROR("Failed to copy clip rects from userspace.\n");
+		ret = -EFAULT;
+		goto out_no_copy;
+	}
+
+	ret = mutex_lock_interruptible(&dev->mode_config.mutex);
+	if (unlikely(ret != 0)) {
+		ret = -ERESTARTSYS;
+		goto out_no_mode_mutex;
+	}
+
+	obj = drm_mode_object_find(dev, arg->fb_id, DRM_MODE_OBJECT_FB);
+	if (!obj) {
+		DRM_ERROR("Invalid framebuffer id.\n");
+		ret = -EINVAL;
+		goto out_no_fb;
+	}
+
+	vfb = vmw_framebuffer_to_vfb(obj_to_fb(obj));
+	if (!vfb->dmabuf) {
+		DRM_ERROR("Framebuffer not dmabuf backed.\n");
+		ret = -EINVAL;
+		goto out_no_fb;
+	}
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		goto out_no_ttm_lock;
+
+	ret = vmw_user_surface_lookup_handle(dev_priv, tfile, arg->sid,
+					     &surface);
+	if (ret)
+		goto out_no_surface;
+
+	ret = vmw_kms_present(dev_priv, file_priv,
+			      vfb, surface, arg->sid,
+			      arg->dest_x, arg->dest_y,
+			      clips, num_clips);
+
+	/* vmw_user_surface_lookup takes one ref so does new_fb */
+	vmw_surface_unreference(&surface);
+
+out_no_surface:
+	ttm_read_unlock(&vmaster->lock);
+out_no_ttm_lock:
+out_no_fb:
+	mutex_unlock(&dev->mode_config.mutex);
+out_no_mode_mutex:
+out_no_copy:
+	kfree(clips);
+out_clips:
+	return ret;
+}
+
+int vmw_present_readback_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_vmw_present_readback_arg *arg =
+		(struct drm_vmw_present_readback_arg *)data;
+	struct drm_vmw_fence_rep __user *user_fence_rep =
+		(struct drm_vmw_fence_rep __user *)
+		(unsigned long)arg->fence_rep;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	struct drm_vmw_rect __user *clips_ptr;
+	struct drm_vmw_rect *clips = NULL;
+	struct drm_mode_object *obj;
+	struct vmw_framebuffer *vfb;
+	uint32_t num_clips;
+	int ret;
+
+	num_clips = arg->num_clips;
+	clips_ptr = (struct drm_vmw_rect *)(unsigned long)arg->clips_ptr;
+
+	if (unlikely(num_clips == 0))
+		return 0;
+
+	if (clips_ptr == NULL) {
+		DRM_ERROR("Argument clips_ptr must be specified.\n");
+		ret = -EINVAL;
+		goto out_clips;
+	}
+
+	clips = kzalloc(num_clips * sizeof(*clips), GFP_KERNEL);
+	if (clips == NULL) {
+		DRM_ERROR("Failed to allocate clip rect list.\n");
+		ret = -ENOMEM;
+		goto out_clips;
+	}
+
+	ret = copy_from_user(clips, clips_ptr, num_clips * sizeof(*clips));
+	if (ret) {
+		DRM_ERROR("Failed to copy clip rects from userspace.\n");
+		ret = -EFAULT;
+		goto out_no_copy;
+	}
+
+	ret = mutex_lock_interruptible(&dev->mode_config.mutex);
+	if (unlikely(ret != 0)) {
+		ret = -ERESTARTSYS;
+		goto out_no_mode_mutex;
+	}
+
+	obj = drm_mode_object_find(dev, arg->fb_id, DRM_MODE_OBJECT_FB);
+	if (!obj) {
+		DRM_ERROR("Invalid framebuffer id.\n");
+		ret = -EINVAL;
+		goto out_no_fb;
+	}
+
+	vfb = vmw_framebuffer_to_vfb(obj_to_fb(obj));
+	if (!vfb->dmabuf) {
+		DRM_ERROR("Framebuffer not dmabuf backed.\n");
+		ret = -EINVAL;
+		goto out_no_fb;
+	}
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		goto out_no_ttm_lock;
+
+	ret = vmw_kms_readback(dev_priv, file_priv,
+			       vfb, user_fence_rep,
+			       clips, num_clips);
+
+	ttm_read_unlock(&vmaster->lock);
+out_no_ttm_lock:
+out_no_fb:
+	mutex_unlock(&dev->mode_config.mutex);
+out_no_mode_mutex:
+out_no_copy:
+	kfree(clips);
+out_clips:
+	return ret;
+}
+
+
+/**
+ * vmw_fops_poll - wrapper around the drm_poll function
+ *
+ * @filp: See the linux fops poll documentation.
+ * @wait: See the linux fops poll documentation.
+ *
+ * Wrapper around the drm_poll function that makes sure the device is
+ * processing the fifo if drm_poll decides to wait.
+ */
+unsigned int vmw_fops_poll(struct file *filp, struct poll_table_struct *wait)
+{
+	struct drm_file *file_priv = filp->private_data;
+	struct vmw_private *dev_priv =
+		vmw_priv(file_priv->minor->dev);
+
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+	return drm_poll(filp, wait);
+}
+
+
+/**
+ * vmw_fops_read - wrapper around the drm_read function
+ *
+ * @filp: See the linux fops read documentation.
+ * @buffer: See the linux fops read documentation.
+ * @count: See the linux fops read documentation.
+ * offset: See the linux fops read documentation.
+ *
+ * Wrapper around the drm_read function that makes sure the device is
+ * processing the fifo if drm_read decides to wait.
+ */
+ssize_t vmw_fops_read(struct file *filp, char __user *buffer,
+		      size_t count, loff_t *offset)
+{
+	struct drm_file *file_priv = filp->private_data;
+	struct vmw_private *dev_priv =
+		vmw_priv(file_priv->minor->dev);
+
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+	return drm_read(filp, buffer, count, offset);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index a005292a8908..cabc95f7517e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -34,28 +34,30 @@ irqreturn_t vmw_irq_handler(DRM_IRQ_ARGS)
 {
 	struct drm_device *dev = (struct drm_device *)arg;
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	uint32_t status;
+	uint32_t status, masked_status;
 
 	spin_lock(&dev_priv->irq_lock);
 	status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+	masked_status = status & dev_priv->irq_mask;
 	spin_unlock(&dev_priv->irq_lock);
 
-	if (status & SVGA_IRQFLAG_ANY_FENCE) {
-		__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
-		uint32_t seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+	if (likely(status))
+		outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+
+	if (!masked_status)
+		return IRQ_NONE;
 
-		vmw_fences_update(dev_priv->fman, seqno);
+	if (masked_status & (SVGA_IRQFLAG_ANY_FENCE |
+			     SVGA_IRQFLAG_FENCE_GOAL)) {
+		vmw_fences_update(dev_priv->fman);
 		wake_up_all(&dev_priv->fence_queue);
 	}
-	if (status & SVGA_IRQFLAG_FIFO_PROGRESS)
+
+	if (masked_status & SVGA_IRQFLAG_FIFO_PROGRESS)
 		wake_up_all(&dev_priv->fifo_queue);
 
-	if (likely(status)) {
-		outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
-		return IRQ_HANDLED;
-	}
 
-	return IRQ_NONE;
+	return IRQ_HANDLED;
 }
 
 static bool vmw_fifo_idle(struct vmw_private *dev_priv, uint32_t seqno)
@@ -78,7 +80,7 @@ void vmw_update_seqno(struct vmw_private *dev_priv,
 	if (dev_priv->last_read_seqno != seqno) {
 		dev_priv->last_read_seqno = seqno;
 		vmw_marker_pull(&fifo_state->marker_queue, seqno);
-		vmw_fences_update(dev_priv->fman, seqno);
+		vmw_fences_update(dev_priv->fman);
 	}
 }
 
@@ -189,9 +191,8 @@ void vmw_seqno_waiter_add(struct vmw_private *dev_priv)
 		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
 		outl(SVGA_IRQFLAG_ANY_FENCE,
 		     dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
-		vmw_write(dev_priv, SVGA_REG_IRQMASK,
-			  vmw_read(dev_priv, SVGA_REG_IRQMASK) |
-			  SVGA_IRQFLAG_ANY_FENCE);
+		dev_priv->irq_mask |= SVGA_IRQFLAG_ANY_FENCE;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
 	mutex_unlock(&dev_priv->hw_mutex);
@@ -204,9 +205,39 @@ void vmw_seqno_waiter_remove(struct vmw_private *dev_priv)
 		unsigned long irq_flags;
 
 		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
-		vmw_write(dev_priv, SVGA_REG_IRQMASK,
-			  vmw_read(dev_priv, SVGA_REG_IRQMASK) &
-			  ~SVGA_IRQFLAG_ANY_FENCE);
+		dev_priv->irq_mask &= ~SVGA_IRQFLAG_ANY_FENCE;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
+	}
+	mutex_unlock(&dev_priv->hw_mutex);
+}
+
+
+void vmw_goal_waiter_add(struct vmw_private *dev_priv)
+{
+	mutex_lock(&dev_priv->hw_mutex);
+	if (dev_priv->goal_queue_waiters++ == 0) {
+		unsigned long irq_flags;
+
+		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
+		outl(SVGA_IRQFLAG_FENCE_GOAL,
+		     dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+		dev_priv->irq_mask |= SVGA_IRQFLAG_FENCE_GOAL;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
+	}
+	mutex_unlock(&dev_priv->hw_mutex);
+}
+
+void vmw_goal_waiter_remove(struct vmw_private *dev_priv)
+{
+	mutex_lock(&dev_priv->hw_mutex);
+	if (--dev_priv->goal_queue_waiters == 0) {
+		unsigned long irq_flags;
+
+		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
+		dev_priv->irq_mask &= ~SVGA_IRQFLAG_FENCE_GOAL;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
 	mutex_unlock(&dev_priv->hw_mutex);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 1a4c84cecca7..8b14dfd513a1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -27,12 +27,10 @@
 
 #include "vmwgfx_kms.h"
 
+
 /* Might need a hrtimer here? */
 #define VMWGFX_PRESENT_RATE ((HZ / 60 > 0) ? HZ / 60 : 1)
 
-static int vmw_surface_dmabuf_pin(struct vmw_framebuffer *vfb);
-static int vmw_surface_dmabuf_unpin(struct vmw_framebuffer *vfb);
-
 void vmw_display_unit_cleanup(struct vmw_display_unit *du)
 {
 	if (du->cursor_surface)
@@ -329,41 +327,10 @@ struct vmw_framebuffer_surface {
 	struct vmw_framebuffer base;
 	struct vmw_surface *surface;
 	struct vmw_dma_buffer *buffer;
-	struct delayed_work d_work;
-	struct mutex work_lock;
-	bool present_fs;
 	struct list_head head;
 	struct drm_master *master;
 };
 
-/**
- * vmw_kms_idle_workqueues - Flush workqueues on this master
- *
- * @vmaster - Pointer identifying the master, for the surfaces of which
- * we idle the dirty work queues.
- *
- * This function should be called with the ttm lock held in exclusive mode
- * to idle all dirty work queues before the fifo is taken down.
- *
- * The work task may actually requeue itself, but after the flush returns we're
- * sure that there's nothing to present, since the ttm lock is held in
- * exclusive mode, so the fifo will never get used.
- */
-
-void vmw_kms_idle_workqueues(struct vmw_master *vmaster)
-{
-	struct vmw_framebuffer_surface *entry;
-
-	mutex_lock(&vmaster->fb_surf_mutex);
-	list_for_each_entry(entry, &vmaster->fb_surf, head) {
-		if (cancel_delayed_work_sync(&entry->d_work))
-			(void) entry->d_work.work.func(&entry->d_work.work);
-
-		(void) cancel_delayed_work_sync(&entry->d_work);
-	}
-	mutex_unlock(&vmaster->fb_surf_mutex);
-}
-
 void vmw_framebuffer_surface_destroy(struct drm_framebuffer *framebuffer)
 {
 	struct vmw_framebuffer_surface *vfbs =
@@ -375,64 +342,127 @@ void vmw_framebuffer_surface_destroy(struct drm_framebuffer *framebuffer)
 	list_del(&vfbs->head);
 	mutex_unlock(&vmaster->fb_surf_mutex);
 
-	cancel_delayed_work_sync(&vfbs->d_work);
 	drm_master_put(&vfbs->master);
 	drm_framebuffer_cleanup(framebuffer);
 	vmw_surface_unreference(&vfbs->surface);
+	ttm_base_object_unref(&vfbs->base.user_obj);
 
 	kfree(vfbs);
 }
 
-static void vmw_framebuffer_present_fs_callback(struct work_struct *work)
+static int do_surface_dirty_sou(struct vmw_private *dev_priv,
+				struct drm_file *file_priv,
+				struct vmw_framebuffer *framebuffer,
+				unsigned flags, unsigned color,
+				struct drm_clip_rect *clips,
+				unsigned num_clips, int inc)
 {
-	struct delayed_work *d_work =
-		container_of(work, struct delayed_work, work);
-	struct vmw_framebuffer_surface *vfbs =
-		container_of(d_work, struct vmw_framebuffer_surface, d_work);
-	struct vmw_surface *surf = vfbs->surface;
-	struct drm_framebuffer *framebuffer = &vfbs->base.base;
-	struct vmw_private *dev_priv = vmw_priv(framebuffer->dev);
+	struct drm_clip_rect *clips_ptr;
+	struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
+	struct drm_crtc *crtc;
+	size_t fifo_size;
+	int i, num_units;
+	int ret = 0; /* silence warning */
+	int left, right, top, bottom;
 
 	struct {
 		SVGA3dCmdHeader header;
-		SVGA3dCmdPresent body;
-		SVGA3dCopyRect cr;
+		SVGA3dCmdBlitSurfaceToScreen body;
 	} *cmd;
+	SVGASignedRect *blits;
+
+
+	num_units = 0;
+	list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list,
+			    head) {
+		if (crtc->fb != &framebuffer->base)
+			continue;
+		units[num_units++] = vmw_crtc_to_du(crtc);
+	}
+
+	BUG_ON(!clips || !num_clips);
+
+	fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num_clips;
+	cmd = kzalloc(fifo_size, GFP_KERNEL);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Temporary fifo memory alloc failed.\n");
+		return -ENOMEM;
+	}
+
+	left = clips->x1;
+	right = clips->x2;
+	top = clips->y1;
+	bottom = clips->y2;
+
+	clips_ptr = clips;
+	for (i = 1; i < num_clips; i++, clips_ptr += inc) {
+		left = min_t(int, left, (int)clips_ptr->x1);
+		right = max_t(int, right, (int)clips_ptr->x2);
+		top = min_t(int, top, (int)clips_ptr->y1);
+		bottom = max_t(int, bottom, (int)clips_ptr->y2);
+	}
+
+	/* only need to do this once */
+	memset(cmd, 0, fifo_size);
+	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN);
+	cmd->header.size = cpu_to_le32(fifo_size - sizeof(cmd->header));
+
+	cmd->body.srcRect.left = left;
+	cmd->body.srcRect.right = right;
+	cmd->body.srcRect.top = top;
+	cmd->body.srcRect.bottom = bottom;
+
+	clips_ptr = clips;
+	blits = (SVGASignedRect *)&cmd[1];
+	for (i = 0; i < num_clips; i++, clips_ptr += inc) {
+		blits[i].left   = clips_ptr->x1 - left;
+		blits[i].right  = clips_ptr->x2 - left;
+		blits[i].top    = clips_ptr->y1 - top;
+		blits[i].bottom = clips_ptr->y2 - top;
+	}
+
+	/* do per unit writing, reuse fifo for each */
+	for (i = 0; i < num_units; i++) {
+		struct vmw_display_unit *unit = units[i];
+		int clip_x1 = left - unit->crtc.x;
+		int clip_y1 = top - unit->crtc.y;
+		int clip_x2 = right - unit->crtc.x;
+		int clip_y2 = bottom - unit->crtc.y;
+
+		/* skip any crtcs that misses the clip region */
+		if (clip_x1 >= unit->crtc.mode.hdisplay ||
+		    clip_y1 >= unit->crtc.mode.vdisplay ||
+		    clip_x2 <= 0 || clip_y2 <= 0)
+			continue;
+
+		/* need to reset sid as it is changed by execbuf */
+		cmd->body.srcImage.sid = cpu_to_le32(framebuffer->user_handle);
+
+		cmd->body.destScreenId = unit->unit;
+
+		/*
+		 * The blit command is a lot more resilient then the
+		 * readback command when it comes to clip rects. So its
+		 * okay to go out of bounds.
+		 */
+
+		cmd->body.destRect.left = clip_x1;
+		cmd->body.destRect.right = clip_x2;
+		cmd->body.destRect.top = clip_y1;
+		cmd->body.destRect.bottom = clip_y2;
 
-	/**
-	 * Strictly we should take the ttm_lock in read mode before accessing
-	 * the fifo, to make sure the fifo is present and up. However,
-	 * instead we flush all workqueues under the ttm lock in exclusive mode
-	 * before taking down the fifo.
-	 */
-	mutex_lock(&vfbs->work_lock);
-	if (!vfbs->present_fs)
-		goto out_unlock;
-
-	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
-	if (unlikely(cmd == NULL))
-		goto out_resched;
-
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_PRESENT);
-	cmd->header.size = cpu_to_le32(sizeof(cmd->body) + sizeof(cmd->cr));
-	cmd->body.sid = cpu_to_le32(surf->res.id);
-	cmd->cr.x = cpu_to_le32(0);
-	cmd->cr.y = cpu_to_le32(0);
-	cmd->cr.srcx = cmd->cr.x;
-	cmd->cr.srcy = cmd->cr.y;
-	cmd->cr.w = cpu_to_le32(framebuffer->width);
-	cmd->cr.h = cpu_to_le32(framebuffer->height);
-	vfbs->present_fs = false;
-	vmw_fifo_commit(dev_priv, sizeof(*cmd));
-out_resched:
-	/**
-	 * Will not re-add if already pending.
-	 */
-	schedule_delayed_work(&vfbs->d_work, VMWGFX_PRESENT_RATE);
-out_unlock:
-	mutex_unlock(&vfbs->work_lock);
-}
 
+		ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd,
+					  fifo_size, 0, NULL);
+
+		if (unlikely(ret != 0))
+			break;
+	}
+
+	kfree(cmd);
+
+	return ret;
+}
 
 int vmw_framebuffer_surface_dirty(struct drm_framebuffer *framebuffer,
 				  struct drm_file *file_priv,
@@ -444,44 +474,20 @@ int vmw_framebuffer_surface_dirty(struct drm_framebuffer *framebuffer,
 	struct vmw_master *vmaster = vmw_master(file_priv->master);
 	struct vmw_framebuffer_surface *vfbs =
 		vmw_framebuffer_to_vfbs(framebuffer);
-	struct vmw_surface *surf = vfbs->surface;
 	struct drm_clip_rect norect;
-	SVGA3dCopyRect *cr;
-	int i, inc = 1;
-	int ret;
-
-	struct {
-		SVGA3dCmdHeader header;
-		SVGA3dCmdPresent body;
-		SVGA3dCopyRect cr;
-	} *cmd;
+	int ret, inc = 1;
 
 	if (unlikely(vfbs->master != file_priv->master))
 		return -EINVAL;
 
+	/* Require ScreenObject support for 3D */
+	if (!dev_priv->sou_priv)
+		return -EINVAL;
+
 	ret = ttm_read_lock(&vmaster->lock, true);
 	if (unlikely(ret != 0))
 		return ret;
 
-	if (!num_clips ||
-	    !(dev_priv->fifo.capabilities &
-	      SVGA_FIFO_CAP_SCREEN_OBJECT)) {
-		int ret;
-
-		mutex_lock(&vfbs->work_lock);
-		vfbs->present_fs = true;
-		ret = schedule_delayed_work(&vfbs->d_work, VMWGFX_PRESENT_RATE);
-		mutex_unlock(&vfbs->work_lock);
-		if (ret) {
-			/**
-			 * No work pending, Force immediate present.
-			 */
-			vmw_framebuffer_present_fs_callback(&vfbs->d_work.work);
-		}
-		ttm_read_unlock(&vmaster->lock);
-		return 0;
-	}
-
 	if (!num_clips) {
 		num_clips = 1;
 		clips = &norect;
@@ -493,29 +499,10 @@ int vmw_framebuffer_surface_dirty(struct drm_framebuffer *framebuffer,
 		inc = 2; /* skip source rects */
 	}
 
-	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd) + (num_clips - 1) * sizeof(cmd->cr));
-	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Fifo reserve failed.\n");
-		ttm_read_unlock(&vmaster->lock);
-		return -ENOMEM;
-	}
-
-	memset(cmd, 0, sizeof(*cmd));
-
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_PRESENT);
-	cmd->header.size = cpu_to_le32(sizeof(cmd->body) + num_clips * sizeof(cmd->cr));
-	cmd->body.sid = cpu_to_le32(surf->res.id);
-
-	for (i = 0, cr = &cmd->cr; i < num_clips; i++, cr++, clips += inc) {
-		cr->x = cpu_to_le16(clips->x1);
-		cr->y = cpu_to_le16(clips->y1);
-		cr->srcx = cr->x;
-		cr->srcy = cr->y;
-		cr->w = cpu_to_le16(clips->x2 - clips->x1);
-		cr->h = cpu_to_le16(clips->y2 - clips->y1);
-	}
+	ret = do_surface_dirty_sou(dev_priv, file_priv, &vfbs->base,
+				   flags, color,
+				   clips, num_clips, inc);
 
-	vmw_fifo_commit(dev_priv, sizeof(*cmd) + (num_clips - 1) * sizeof(cmd->cr));
 	ttm_read_unlock(&vmaster->lock);
 	return 0;
 }
@@ -540,6 +527,10 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
 	struct vmw_master *vmaster = vmw_master(file_priv->master);
 	int ret;
 
+	/* 3D is only supported on HWv8 hosts which supports screen objects */
+	if (!dev_priv->sou_priv)
+		return -ENOSYS;
+
 	/*
 	 * Sanity checks.
 	 */
@@ -602,14 +593,11 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
 	vfbs->base.base.depth = mode_cmd->depth;
 	vfbs->base.base.width = mode_cmd->width;
 	vfbs->base.base.height = mode_cmd->height;
-	vfbs->base.pin = &vmw_surface_dmabuf_pin;
-	vfbs->base.unpin = &vmw_surface_dmabuf_unpin;
 	vfbs->surface = surface;
+	vfbs->base.user_handle = mode_cmd->handle;
 	vfbs->master = drm_master_get(file_priv->master);
-	mutex_init(&vfbs->work_lock);
 
 	mutex_lock(&vmaster->fb_surf_mutex);
-	INIT_DELAYED_WORK(&vfbs->d_work, &vmw_framebuffer_present_fs_callback);
 	list_add_tail(&vfbs->head, &vmaster->fb_surf);
 	mutex_unlock(&vmaster->fb_surf_mutex);
 
@@ -644,48 +632,33 @@ void vmw_framebuffer_dmabuf_destroy(struct drm_framebuffer *framebuffer)
 
 	drm_framebuffer_cleanup(framebuffer);
 	vmw_dmabuf_unreference(&vfbd->buffer);
+	ttm_base_object_unref(&vfbd->base.user_obj);
 
 	kfree(vfbd);
 }
 
-int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
-				 struct drm_file *file_priv,
-				 unsigned flags, unsigned color,
-				 struct drm_clip_rect *clips,
-				 unsigned num_clips)
+static int do_dmabuf_dirty_ldu(struct vmw_private *dev_priv,
+			       struct vmw_framebuffer *framebuffer,
+			       unsigned flags, unsigned color,
+			       struct drm_clip_rect *clips,
+			       unsigned num_clips, int increment)
 {
-	struct vmw_private *dev_priv = vmw_priv(framebuffer->dev);
-	struct vmw_master *vmaster = vmw_master(file_priv->master);
-	struct drm_clip_rect norect;
-	int ret;
+	size_t fifo_size;
+	int i;
+
 	struct {
 		uint32_t header;
 		SVGAFifoCmdUpdate body;
 	} *cmd;
-	int i, increment = 1;
-
-	ret = ttm_read_lock(&vmaster->lock, true);
-	if (unlikely(ret != 0))
-		return ret;
 
-	if (!num_clips) {
-		num_clips = 1;
-		clips = &norect;
-		norect.x1 = norect.y1 = 0;
-		norect.x2 = framebuffer->width;
-		norect.y2 = framebuffer->height;
-	} else if (flags & DRM_MODE_FB_DIRTY_ANNOTATE_COPY) {
-		num_clips /= 2;
-		increment = 2;
-	}
-
-	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd) * num_clips);
+	fifo_size = sizeof(*cmd) * num_clips;
+	cmd = vmw_fifo_reserve(dev_priv, fifo_size);
 	if (unlikely(cmd == NULL)) {
 		DRM_ERROR("Fifo reserve failed.\n");
-		ttm_read_unlock(&vmaster->lock);
 		return -ENOMEM;
 	}
 
+	memset(cmd, 0, fifo_size);
 	for (i = 0; i < num_clips; i++, clips += increment) {
 		cmd[i].header = cpu_to_le32(SVGA_CMD_UPDATE);
 		cmd[i].body.x = cpu_to_le32(clips->x1);
@@ -694,57 +667,186 @@ int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
 		cmd[i].body.height = cpu_to_le32(clips->y2 - clips->y1);
 	}
 
-	vmw_fifo_commit(dev_priv, sizeof(*cmd) * num_clips);
-	ttm_read_unlock(&vmaster->lock);
-
+	vmw_fifo_commit(dev_priv, fifo_size);
 	return 0;
 }
 
-static struct drm_framebuffer_funcs vmw_framebuffer_dmabuf_funcs = {
-	.destroy = vmw_framebuffer_dmabuf_destroy,
-	.dirty = vmw_framebuffer_dmabuf_dirty,
-	.create_handle = vmw_framebuffer_create_handle,
-};
-
-static int vmw_surface_dmabuf_pin(struct vmw_framebuffer *vfb)
+static int do_dmabuf_define_gmrfb(struct drm_file *file_priv,
+				  struct vmw_private *dev_priv,
+				  struct vmw_framebuffer *framebuffer)
 {
-	struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
-	struct vmw_framebuffer_surface *vfbs =
-		vmw_framebuffer_to_vfbs(&vfb->base);
-	unsigned long size = vfbs->base.base.pitch * vfbs->base.base.height;
+	int depth = framebuffer->base.depth;
+	size_t fifo_size;
 	int ret;
 
-	vfbs->buffer = kzalloc(sizeof(*vfbs->buffer), GFP_KERNEL);
-	if (unlikely(vfbs->buffer == NULL))
+	struct {
+		uint32_t header;
+		SVGAFifoCmdDefineGMRFB body;
+	} *cmd;
+
+	/* Emulate RGBA support, contrary to svga_reg.h this is not
+	 * supported by hosts. This is only a problem if we are reading
+	 * this value later and expecting what we uploaded back.
+	 */
+	if (depth == 32)
+		depth = 24;
+
+	fifo_size = sizeof(*cmd);
+	cmd = kmalloc(fifo_size, GFP_KERNEL);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed to allocate temporary cmd buffer.\n");
 		return -ENOMEM;
+	}
 
-	vmw_overlay_pause_all(dev_priv);
-	ret = vmw_dmabuf_init(dev_priv, vfbs->buffer, size,
-			       &vmw_vram_ne_placement,
-			       false, &vmw_dmabuf_bo_free);
-	vmw_overlay_resume_all(dev_priv);
+	memset(cmd, 0, fifo_size);
+	cmd->header = SVGA_CMD_DEFINE_GMRFB;
+	cmd->body.format.bitsPerPixel = framebuffer->base.bits_per_pixel;
+	cmd->body.format.colorDepth = depth;
+	cmd->body.format.reserved = 0;
+	cmd->body.bytesPerLine = framebuffer->base.pitch;
+	cmd->body.ptr.gmrId = framebuffer->user_handle;
+	cmd->body.ptr.offset = 0;
+
+	ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd,
+				  fifo_size, 0, NULL);
+
+	kfree(cmd);
+
+	return ret;
+}
+
+static int do_dmabuf_dirty_sou(struct drm_file *file_priv,
+			       struct vmw_private *dev_priv,
+			       struct vmw_framebuffer *framebuffer,
+			       unsigned flags, unsigned color,
+			       struct drm_clip_rect *clips,
+			       unsigned num_clips, int increment)
+{
+	struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
+	struct drm_clip_rect *clips_ptr;
+	int i, k, num_units, ret;
+	struct drm_crtc *crtc;
+	size_t fifo_size;
+
+	struct {
+		uint32_t header;
+		SVGAFifoCmdBlitGMRFBToScreen body;
+	} *blits;
+
+	ret = do_dmabuf_define_gmrfb(file_priv, dev_priv, framebuffer);
 	if (unlikely(ret != 0))
-		vfbs->buffer = NULL;
+		return ret; /* define_gmrfb prints warnings */
+
+	fifo_size = sizeof(*blits) * num_clips;
+	blits = kmalloc(fifo_size, GFP_KERNEL);
+	if (unlikely(blits == NULL)) {
+		DRM_ERROR("Failed to allocate temporary cmd buffer.\n");
+		return -ENOMEM;
+	}
+
+	num_units = 0;
+	list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list, head) {
+		if (crtc->fb != &framebuffer->base)
+			continue;
+		units[num_units++] = vmw_crtc_to_du(crtc);
+	}
+
+	for (k = 0; k < num_units; k++) {
+		struct vmw_display_unit *unit = units[k];
+		int hit_num = 0;
+
+		clips_ptr = clips;
+		for (i = 0; i < num_clips; i++, clips_ptr += increment) {
+			int clip_x1 = clips_ptr->x1 - unit->crtc.x;
+			int clip_y1 = clips_ptr->y1 - unit->crtc.y;
+			int clip_x2 = clips_ptr->x2 - unit->crtc.x;
+			int clip_y2 = clips_ptr->y2 - unit->crtc.y;
+
+			/* skip any crtcs that misses the clip region */
+			if (clip_x1 >= unit->crtc.mode.hdisplay ||
+			    clip_y1 >= unit->crtc.mode.vdisplay ||
+			    clip_x2 <= 0 || clip_y2 <= 0)
+				continue;
+
+			blits[hit_num].header = SVGA_CMD_BLIT_GMRFB_TO_SCREEN;
+			blits[hit_num].body.destScreenId = unit->unit;
+			blits[hit_num].body.srcOrigin.x = clips_ptr->x1;
+			blits[hit_num].body.srcOrigin.y = clips_ptr->y1;
+			blits[hit_num].body.destRect.left = clip_x1;
+			blits[hit_num].body.destRect.top = clip_y1;
+			blits[hit_num].body.destRect.right = clip_x2;
+			blits[hit_num].body.destRect.bottom = clip_y2;
+			hit_num++;
+		}
+
+		/* no clips hit the crtc */
+		if (hit_num == 0)
+			continue;
+
+		fifo_size = sizeof(*blits) * hit_num;
+		ret = vmw_execbuf_process(file_priv, dev_priv, NULL, blits,
+					  fifo_size, 0, NULL);
+
+		if (unlikely(ret != 0))
+			break;
+	}
+
+	kfree(blits);
 
 	return ret;
 }
 
-static int vmw_surface_dmabuf_unpin(struct vmw_framebuffer *vfb)
+int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
+				 struct drm_file *file_priv,
+				 unsigned flags, unsigned color,
+				 struct drm_clip_rect *clips,
+				 unsigned num_clips)
 {
-	struct ttm_buffer_object *bo;
-	struct vmw_framebuffer_surface *vfbs =
-		vmw_framebuffer_to_vfbs(&vfb->base);
+	struct vmw_private *dev_priv = vmw_priv(framebuffer->dev);
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	struct vmw_framebuffer_dmabuf *vfbd =
+		vmw_framebuffer_to_vfbd(framebuffer);
+	struct drm_clip_rect norect;
+	int ret, increment = 1;
 
-	if (unlikely(vfbs->buffer == NULL))
-		return 0;
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
 
-	bo = &vfbs->buffer->base;
-	ttm_bo_unref(&bo);
-	vfbs->buffer = NULL;
+	if (!num_clips) {
+		num_clips = 1;
+		clips = &norect;
+		norect.x1 = norect.y1 = 0;
+		norect.x2 = framebuffer->width;
+		norect.y2 = framebuffer->height;
+	} else if (flags & DRM_MODE_FB_DIRTY_ANNOTATE_COPY) {
+		num_clips /= 2;
+		increment = 2;
+	}
 
-	return 0;
+	if (dev_priv->ldu_priv) {
+		ret = do_dmabuf_dirty_ldu(dev_priv, &vfbd->base,
+					  flags, color,
+					  clips, num_clips, increment);
+	} else {
+		ret = do_dmabuf_dirty_sou(file_priv, dev_priv, &vfbd->base,
+					  flags, color,
+					  clips, num_clips, increment);
+	}
+
+	ttm_read_unlock(&vmaster->lock);
+	return ret;
 }
 
+static struct drm_framebuffer_funcs vmw_framebuffer_dmabuf_funcs = {
+	.destroy = vmw_framebuffer_dmabuf_destroy,
+	.dirty = vmw_framebuffer_dmabuf_dirty,
+	.create_handle = vmw_framebuffer_create_handle,
+};
+
+/**
+ * Pin the dmabuffer to the start of vram.
+ */
 static int vmw_framebuffer_dmabuf_pin(struct vmw_framebuffer *vfb)
 {
 	struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
@@ -752,10 +854,12 @@ static int vmw_framebuffer_dmabuf_pin(struct vmw_framebuffer *vfb)
 		vmw_framebuffer_to_vfbd(&vfb->base);
 	int ret;
 
+	/* This code should not be used with screen objects */
+	BUG_ON(dev_priv->sou_priv);
 
 	vmw_overlay_pause_all(dev_priv);
 
-	ret = vmw_dmabuf_to_start_of_vram(dev_priv, vfbd->buffer);
+	ret = vmw_dmabuf_to_start_of_vram(dev_priv, vfbd->buffer, true, false);
 
 	vmw_overlay_resume_all(dev_priv);
 
@@ -775,7 +879,7 @@ static int vmw_framebuffer_dmabuf_unpin(struct vmw_framebuffer *vfb)
 		return 0;
 	}
 
-	return vmw_dmabuf_from_vram(dev_priv, vfbd->buffer);
+	return vmw_dmabuf_unpin(dev_priv, vfbd->buffer, false);
 }
 
 static int vmw_kms_new_framebuffer_dmabuf(struct vmw_private *dev_priv,
@@ -797,6 +901,33 @@ static int vmw_kms_new_framebuffer_dmabuf(struct vmw_private *dev_priv,
 		return -EINVAL;
 	}
 
+	/* Limited framebuffer color depth support for screen objects */
+	if (dev_priv->sou_priv) {
+		switch (mode_cmd->depth) {
+		case 32:
+		case 24:
+			/* Only support 32 bpp for 32 and 24 depth fbs */
+			if (mode_cmd->bpp == 32)
+				break;
+
+			DRM_ERROR("Invalid color depth/bbp: %d %d\n",
+				  mode_cmd->depth, mode_cmd->bpp);
+			return -EINVAL;
+		case 16:
+		case 15:
+			/* Only support 16 bpp for 16 and 15 depth fbs */
+			if (mode_cmd->bpp == 16)
+				break;
+
+			DRM_ERROR("Invalid color depth/bbp: %d %d\n",
+				  mode_cmd->depth, mode_cmd->bpp);
+			return -EINVAL;
+		default:
+			DRM_ERROR("Invalid color depth: %d\n", mode_cmd->depth);
+			return -EINVAL;
+		}
+	}
+
 	vfbd = kzalloc(sizeof(*vfbd), GFP_KERNEL);
 	if (!vfbd) {
 		ret = -ENOMEM;
@@ -818,9 +949,13 @@ static int vmw_kms_new_framebuffer_dmabuf(struct vmw_private *dev_priv,
 	vfbd->base.base.depth = mode_cmd->depth;
 	vfbd->base.base.width = mode_cmd->width;
 	vfbd->base.base.height = mode_cmd->height;
-	vfbd->base.pin = vmw_framebuffer_dmabuf_pin;
-	vfbd->base.unpin = vmw_framebuffer_dmabuf_unpin;
+	if (!dev_priv->sou_priv) {
+		vfbd->base.pin = vmw_framebuffer_dmabuf_pin;
+		vfbd->base.unpin = vmw_framebuffer_dmabuf_unpin;
+	}
+	vfbd->base.dmabuf = true;
 	vfbd->buffer = dmabuf;
+	vfbd->base.user_handle = mode_cmd->handle;
 	*out = &vfbd->base;
 
 	return 0;
@@ -846,6 +981,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 	struct vmw_framebuffer *vfb = NULL;
 	struct vmw_surface *surface = NULL;
 	struct vmw_dma_buffer *bo = NULL;
+	struct ttm_base_object *user_obj;
 	u64 required_size;
 	int ret;
 
@@ -861,6 +997,21 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 		return NULL;
 	}
 
+	/*
+	 * Take a reference on the user object of the resource
+	 * backing the kms fb. This ensures that user-space handle
+	 * lookups on that resource will always work as long as
+	 * it's registered with a kms framebuffer. This is important,
+	 * since vmw_execbuf_process identifies resources in the
+	 * command stream using user-space handles.
+	 */
+
+	user_obj = ttm_base_object_lookup(tfile, mode_cmd->handle);
+	if (unlikely(user_obj == NULL)) {
+		DRM_ERROR("Could not locate requested kms frame buffer.\n");
+		return ERR_PTR(-ENOENT);
+	}
+
 	/**
 	 * End conditioned code.
 	 */
@@ -881,8 +1032,10 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 
 	if (ret) {
 		DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret);
+		ttm_base_object_unref(&user_obj);
 		return ERR_PTR(ret);
-	}
+	} else
+		vfb->user_obj = user_obj;
 	return &vfb->base;
 
 try_dmabuf:
@@ -902,8 +1055,10 @@ try_dmabuf:
 
 	if (ret) {
 		DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret);
+		ttm_base_object_unref(&user_obj);
 		return ERR_PTR(ret);
-	}
+	} else
+		vfb->user_obj = user_obj;
 
 	return &vfb->base;
 
@@ -911,6 +1066,7 @@ err_not_scanout:
 	DRM_ERROR("surface not marked as scanout\n");
 	/* vmw_user_surface_lookup takes one ref */
 	vmw_surface_unreference(&surface);
+	ttm_base_object_unref(&user_obj);
 
 	return ERR_PTR(-EINVAL);
 }
@@ -919,6 +1075,210 @@ static struct drm_mode_config_funcs vmw_kms_funcs = {
 	.fb_create = vmw_kms_fb_create,
 };
 
+int vmw_kms_present(struct vmw_private *dev_priv,
+		    struct drm_file *file_priv,
+		    struct vmw_framebuffer *vfb,
+		    struct vmw_surface *surface,
+		    uint32_t sid,
+		    int32_t destX, int32_t destY,
+		    struct drm_vmw_rect *clips,
+		    uint32_t num_clips)
+{
+	struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
+	struct drm_crtc *crtc;
+	size_t fifo_size;
+	int i, k, num_units;
+	int ret = 0; /* silence warning */
+
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdBlitSurfaceToScreen body;
+	} *cmd;
+	SVGASignedRect *blits;
+
+	num_units = 0;
+	list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list, head) {
+		if (crtc->fb != &vfb->base)
+			continue;
+		units[num_units++] = vmw_crtc_to_du(crtc);
+	}
+
+	BUG_ON(surface == NULL);
+	BUG_ON(!clips || !num_clips);
+
+	fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num_clips;
+	cmd = kmalloc(fifo_size, GFP_KERNEL);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed to allocate temporary fifo memory.\n");
+		return -ENOMEM;
+	}
+
+	/* only need to do this once */
+	memset(cmd, 0, fifo_size);
+	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN);
+	cmd->header.size = cpu_to_le32(fifo_size - sizeof(cmd->header));
+
+	cmd->body.srcRect.left = 0;
+	cmd->body.srcRect.right = surface->sizes[0].width;
+	cmd->body.srcRect.top = 0;
+	cmd->body.srcRect.bottom = surface->sizes[0].height;
+
+	blits = (SVGASignedRect *)&cmd[1];
+	for (i = 0; i < num_clips; i++) {
+		blits[i].left   = clips[i].x;
+		blits[i].right  = clips[i].x + clips[i].w;
+		blits[i].top    = clips[i].y;
+		blits[i].bottom = clips[i].y + clips[i].h;
+	}
+
+	for (k = 0; k < num_units; k++) {
+		struct vmw_display_unit *unit = units[k];
+		int clip_x1 = destX - unit->crtc.x;
+		int clip_y1 = destY - unit->crtc.y;
+		int clip_x2 = clip_x1 + surface->sizes[0].width;
+		int clip_y2 = clip_y1 + surface->sizes[0].height;
+
+		/* skip any crtcs that misses the clip region */
+		if (clip_x1 >= unit->crtc.mode.hdisplay ||
+		    clip_y1 >= unit->crtc.mode.vdisplay ||
+		    clip_x2 <= 0 || clip_y2 <= 0)
+			continue;
+
+		/* need to reset sid as it is changed by execbuf */
+		cmd->body.srcImage.sid = sid;
+
+		cmd->body.destScreenId = unit->unit;
+
+		/*
+		 * The blit command is a lot more resilient then the
+		 * readback command when it comes to clip rects. So its
+		 * okay to go out of bounds.
+		 */
+
+		cmd->body.destRect.left = clip_x1;
+		cmd->body.destRect.right = clip_x2;
+		cmd->body.destRect.top = clip_y1;
+		cmd->body.destRect.bottom = clip_y2;
+
+		ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd,
+					  fifo_size, 0, NULL);
+
+		if (unlikely(ret != 0))
+			break;
+	}
+
+	kfree(cmd);
+
+	return ret;
+}
+
+int vmw_kms_readback(struct vmw_private *dev_priv,
+		     struct drm_file *file_priv,
+		     struct vmw_framebuffer *vfb,
+		     struct drm_vmw_fence_rep __user *user_fence_rep,
+		     struct drm_vmw_rect *clips,
+		     uint32_t num_clips)
+{
+	struct vmw_framebuffer_dmabuf *vfbd =
+		vmw_framebuffer_to_vfbd(&vfb->base);
+	struct vmw_dma_buffer *dmabuf = vfbd->buffer;
+	struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
+	struct drm_crtc *crtc;
+	size_t fifo_size;
+	int i, k, ret, num_units, blits_pos;
+
+	struct {
+		uint32_t header;
+		SVGAFifoCmdDefineGMRFB body;
+	} *cmd;
+	struct {
+		uint32_t header;
+		SVGAFifoCmdBlitScreenToGMRFB body;
+	} *blits;
+
+	num_units = 0;
+	list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list, head) {
+		if (crtc->fb != &vfb->base)
+			continue;
+		units[num_units++] = vmw_crtc_to_du(crtc);
+	}
+
+	BUG_ON(dmabuf == NULL);
+	BUG_ON(!clips || !num_clips);
+
+	/* take a safe guess at fifo size */
+	fifo_size = sizeof(*cmd) + sizeof(*blits) * num_clips * num_units;
+	cmd = kmalloc(fifo_size, GFP_KERNEL);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed to allocate temporary fifo memory.\n");
+		return -ENOMEM;
+	}
+
+	memset(cmd, 0, fifo_size);
+	cmd->header = SVGA_CMD_DEFINE_GMRFB;
+	cmd->body.format.bitsPerPixel = vfb->base.bits_per_pixel;
+	cmd->body.format.colorDepth = vfb->base.depth;
+	cmd->body.format.reserved = 0;
+	cmd->body.bytesPerLine = vfb->base.pitch;
+	cmd->body.ptr.gmrId = vfb->user_handle;
+	cmd->body.ptr.offset = 0;
+
+	blits = (void *)&cmd[1];
+	blits_pos = 0;
+	for (i = 0; i < num_units; i++) {
+		struct drm_vmw_rect *c = clips;
+		for (k = 0; k < num_clips; k++, c++) {
+			/* transform clip coords to crtc origin based coords */
+			int clip_x1 = c->x - units[i]->crtc.x;
+			int clip_x2 = c->x - units[i]->crtc.x + c->w;
+			int clip_y1 = c->y - units[i]->crtc.y;
+			int clip_y2 = c->y - units[i]->crtc.y + c->h;
+			int dest_x = c->x;
+			int dest_y = c->y;
+
+			/* compensate for clipping, we negate
+			 * a negative number and add that.
+			 */
+			if (clip_x1 < 0)
+				dest_x += -clip_x1;
+			if (clip_y1 < 0)
+				dest_y += -clip_y1;
+
+			/* clip */
+			clip_x1 = max(clip_x1, 0);
+			clip_y1 = max(clip_y1, 0);
+			clip_x2 = min(clip_x2, units[i]->crtc.mode.hdisplay);
+			clip_y2 = min(clip_y2, units[i]->crtc.mode.vdisplay);
+
+			/* and cull any rects that misses the crtc */
+			if (clip_x1 >= units[i]->crtc.mode.hdisplay ||
+			    clip_y1 >= units[i]->crtc.mode.vdisplay ||
+			    clip_x2 <= 0 || clip_y2 <= 0)
+				continue;
+
+			blits[blits_pos].header = SVGA_CMD_BLIT_SCREEN_TO_GMRFB;
+			blits[blits_pos].body.srcScreenId = units[i]->unit;
+			blits[blits_pos].body.destOrigin.x = dest_x;
+			blits[blits_pos].body.destOrigin.y = dest_y;
+
+			blits[blits_pos].body.srcRect.left = clip_x1;
+			blits[blits_pos].body.srcRect.top = clip_y1;
+			blits[blits_pos].body.srcRect.right = clip_x2;
+			blits[blits_pos].body.srcRect.bottom = clip_y2;
+			blits_pos++;
+		}
+	}
+	/* reset size here and use calculated exact size from loops */
+	fifo_size = sizeof(*cmd) + sizeof(*blits) * blits_pos;
+
+	ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd, fifo_size,
+				  0, user_fence_rep);
+
+	kfree(cmd);
+
+	return ret;
+}
+
 int vmw_kms_init(struct vmw_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -932,7 +1292,9 @@ int vmw_kms_init(struct vmw_private *dev_priv)
 	dev->mode_config.max_width = 8192;
 	dev->mode_config.max_height = 8192;
 
-	ret = vmw_kms_init_legacy_display_system(dev_priv);
+	ret = vmw_kms_init_screen_object_display(dev_priv);
+	if (ret) /* Fallback */
+		(void)vmw_kms_init_legacy_display_system(dev_priv);
 
 	return 0;
 }
@@ -1099,7 +1461,265 @@ bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv,
 	return ((u64) pitch * (u64) height) < (u64) dev_priv->vram_size;
 }
 
+
+/**
+ * Function called by DRM code called with vbl_lock held.
+ */
 u32 vmw_get_vblank_counter(struct drm_device *dev, int crtc)
 {
 	return 0;
 }
+
+/**
+ * Function called by DRM code called with vbl_lock held.
+ */
+int vmw_enable_vblank(struct drm_device *dev, int crtc)
+{
+	return -ENOSYS;
+}
+
+/**
+ * Function called by DRM code called with vbl_lock held.
+ */
+void vmw_disable_vblank(struct drm_device *dev, int crtc)
+{
+}
+
+
+/*
+ * Small shared kms functions.
+ */
+
+int vmw_du_update_layout(struct vmw_private *dev_priv, unsigned num,
+			 struct drm_vmw_rect *rects)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct vmw_display_unit *du;
+	struct drm_connector *con;
+
+	mutex_lock(&dev->mode_config.mutex);
+
+#if 0
+	{
+		unsigned int i;
+
+		DRM_INFO("%s: new layout ", __func__);
+		for (i = 0; i < num; i++)
+			DRM_INFO("(%i, %i %ux%u) ", rects[i].x, rects[i].y,
+				 rects[i].w, rects[i].h);
+		DRM_INFO("\n");
+	}
+#endif
+
+	list_for_each_entry(con, &dev->mode_config.connector_list, head) {
+		du = vmw_connector_to_du(con);
+		if (num > du->unit) {
+			du->pref_width = rects[du->unit].w;
+			du->pref_height = rects[du->unit].h;
+			du->pref_active = true;
+		} else {
+			du->pref_width = 800;
+			du->pref_height = 600;
+			du->pref_active = false;
+		}
+		con->status = vmw_du_connector_detect(con, true);
+	}
+
+	mutex_unlock(&dev->mode_config.mutex);
+
+	return 0;
+}
+
+void vmw_du_crtc_save(struct drm_crtc *crtc)
+{
+}
+
+void vmw_du_crtc_restore(struct drm_crtc *crtc)
+{
+}
+
+void vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
+			   u16 *r, u16 *g, u16 *b,
+			   uint32_t start, uint32_t size)
+{
+	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
+	int i;
+
+	for (i = 0; i < size; i++) {
+		DRM_DEBUG("%d r/g/b = 0x%04x / 0x%04x / 0x%04x\n", i,
+			  r[i], g[i], b[i]);
+		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 0, r[i] >> 8);
+		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 1, g[i] >> 8);
+		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 2, b[i] >> 8);
+	}
+}
+
+void vmw_du_connector_dpms(struct drm_connector *connector, int mode)
+{
+}
+
+void vmw_du_connector_save(struct drm_connector *connector)
+{
+}
+
+void vmw_du_connector_restore(struct drm_connector *connector)
+{
+}
+
+enum drm_connector_status
+vmw_du_connector_detect(struct drm_connector *connector, bool force)
+{
+	uint32_t num_displays;
+	struct drm_device *dev = connector->dev;
+	struct vmw_private *dev_priv = vmw_priv(dev);
+
+	mutex_lock(&dev_priv->hw_mutex);
+	num_displays = vmw_read(dev_priv, SVGA_REG_NUM_DISPLAYS);
+	mutex_unlock(&dev_priv->hw_mutex);
+
+	return ((vmw_connector_to_du(connector)->unit < num_displays) ?
+		connector_status_connected : connector_status_disconnected);
+}
+
+static struct drm_display_mode vmw_kms_connector_builtin[] = {
+	/* 640x480@60Hz */
+	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 25175, 640, 656,
+		   752, 800, 0, 480, 489, 492, 525, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
+	/* 800x600@60Hz */
+	{ DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 40000, 800, 840,
+		   968, 1056, 0, 600, 601, 605, 628, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1024x768@60Hz */
+	{ DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 65000, 1024, 1048,
+		   1184, 1344, 0, 768, 771, 777, 806, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
+	/* 1152x864@75Hz */
+	{ DRM_MODE("1152x864", DRM_MODE_TYPE_DRIVER, 108000, 1152, 1216,
+		   1344, 1600, 0, 864, 865, 868, 900, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1280x768@60Hz */
+	{ DRM_MODE("1280x768", DRM_MODE_TYPE_DRIVER, 79500, 1280, 1344,
+		   1472, 1664, 0, 768, 771, 778, 798, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1280x800@60Hz */
+	{ DRM_MODE("1280x800", DRM_MODE_TYPE_DRIVER, 83500, 1280, 1352,
+		   1480, 1680, 0, 800, 803, 809, 831, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+	/* 1280x960@60Hz */
+	{ DRM_MODE("1280x960", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1376,
+		   1488, 1800, 0, 960, 961, 964, 1000, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1280x1024@60Hz */
+	{ DRM_MODE("1280x1024", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1328,
+		   1440, 1688, 0, 1024, 1025, 1028, 1066, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1360x768@60Hz */
+	{ DRM_MODE("1360x768", DRM_MODE_TYPE_DRIVER, 85500, 1360, 1424,
+		   1536, 1792, 0, 768, 771, 777, 795, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1440x1050@60Hz */
+	{ DRM_MODE("1400x1050", DRM_MODE_TYPE_DRIVER, 121750, 1400, 1488,
+		   1632, 1864, 0, 1050, 1053, 1057, 1089, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1440x900@60Hz */
+	{ DRM_MODE("1440x900", DRM_MODE_TYPE_DRIVER, 106500, 1440, 1520,
+		   1672, 1904, 0, 900, 903, 909, 934, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1600x1200@60Hz */
+	{ DRM_MODE("1600x1200", DRM_MODE_TYPE_DRIVER, 162000, 1600, 1664,
+		   1856, 2160, 0, 1200, 1201, 1204, 1250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1680x1050@60Hz */
+	{ DRM_MODE("1680x1050", DRM_MODE_TYPE_DRIVER, 146250, 1680, 1784,
+		   1960, 2240, 0, 1050, 1053, 1059, 1089, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1792x1344@60Hz */
+	{ DRM_MODE("1792x1344", DRM_MODE_TYPE_DRIVER, 204750, 1792, 1920,
+		   2120, 2448, 0, 1344, 1345, 1348, 1394, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1853x1392@60Hz */
+	{ DRM_MODE("1856x1392", DRM_MODE_TYPE_DRIVER, 218250, 1856, 1952,
+		   2176, 2528, 0, 1392, 1393, 1396, 1439, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1920x1200@60Hz */
+	{ DRM_MODE("1920x1200", DRM_MODE_TYPE_DRIVER, 193250, 1920, 2056,
+		   2256, 2592, 0, 1200, 1203, 1209, 1245, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1920x1440@60Hz */
+	{ DRM_MODE("1920x1440", DRM_MODE_TYPE_DRIVER, 234000, 1920, 2048,
+		   2256, 2600, 0, 1440, 1441, 1444, 1500, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 2560x1600@60Hz */
+	{ DRM_MODE("2560x1600", DRM_MODE_TYPE_DRIVER, 348500, 2560, 2752,
+		   3032, 3504, 0, 1600, 1603, 1609, 1658, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* Terminate */
+	{ DRM_MODE("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) },
+};
+
+int vmw_du_connector_fill_modes(struct drm_connector *connector,
+				uint32_t max_width, uint32_t max_height)
+{
+	struct vmw_display_unit *du = vmw_connector_to_du(connector);
+	struct drm_device *dev = connector->dev;
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_display_mode *mode = NULL;
+	struct drm_display_mode *bmode;
+	struct drm_display_mode prefmode = { DRM_MODE("preferred",
+		DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC)
+	};
+	int i;
+
+	/* Add preferred mode */
+	{
+		mode = drm_mode_duplicate(dev, &prefmode);
+		if (!mode)
+			return 0;
+		mode->hdisplay = du->pref_width;
+		mode->vdisplay = du->pref_height;
+		mode->vrefresh = drm_mode_vrefresh(mode);
+		if (vmw_kms_validate_mode_vram(dev_priv, mode->hdisplay * 2,
+					       mode->vdisplay)) {
+			drm_mode_probed_add(connector, mode);
+
+			if (du->pref_mode) {
+				list_del_init(&du->pref_mode->head);
+				drm_mode_destroy(dev, du->pref_mode);
+			}
+
+			du->pref_mode = mode;
+		}
+	}
+
+	for (i = 0; vmw_kms_connector_builtin[i].type != 0; i++) {
+		bmode = &vmw_kms_connector_builtin[i];
+		if (bmode->hdisplay > max_width ||
+		    bmode->vdisplay > max_height)
+			continue;
+
+		if (!vmw_kms_validate_mode_vram(dev_priv, bmode->hdisplay * 2,
+						bmode->vdisplay))
+			continue;
+
+		mode = drm_mode_duplicate(dev, bmode);
+		if (!mode)
+			return 0;
+		mode->vrefresh = drm_mode_vrefresh(mode);
+
+		drm_mode_probed_add(connector, mode);
+	}
+
+	drm_mode_connector_list_update(connector);
+
+	return 1;
+}
+
+int vmw_du_connector_set_property(struct drm_connector *connector,
+				  struct drm_property *property,
+				  uint64_t val)
+{
+	return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index 8a398a0339b6..db0b901f8c3f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -31,6 +31,8 @@
 #include "drmP.h"
 #include "vmwgfx_drv.h"
 
+#define VMWGFX_NUM_DISPLAY_UNITS 8
+
 
 #define vmw_framebuffer_to_vfb(x) \
 	container_of(x, struct vmw_framebuffer, base)
@@ -45,6 +47,9 @@ struct vmw_framebuffer {
 	struct drm_framebuffer base;
 	int (*pin)(struct vmw_framebuffer *fb);
 	int (*unpin)(struct vmw_framebuffer *fb);
+	bool dmabuf;
+	struct ttm_base_object *user_obj;
+	uint32_t user_handle;
 };
 
 
@@ -83,22 +88,59 @@ struct vmw_display_unit {
 	int hotspot_y;
 
 	unsigned unit;
+
+	/*
+	 * Prefered mode tracking.
+	 */
+	unsigned pref_width;
+	unsigned pref_height;
+	bool pref_active;
+	struct drm_display_mode *pref_mode;
 };
 
+#define vmw_crtc_to_du(x) \
+	container_of(x, struct vmw_display_unit, crtc)
+#define vmw_connector_to_du(x) \
+	container_of(x, struct vmw_display_unit, connector)
+
+
 /*
  * Shared display unit functions - vmwgfx_kms.c
  */
 void vmw_display_unit_cleanup(struct vmw_display_unit *du);
+void vmw_du_crtc_save(struct drm_crtc *crtc);
+void vmw_du_crtc_restore(struct drm_crtc *crtc);
+void vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
+			   u16 *r, u16 *g, u16 *b,
+			   uint32_t start, uint32_t size);
 int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 			   uint32_t handle, uint32_t width, uint32_t height);
 int vmw_du_crtc_cursor_move(struct drm_crtc *crtc, int x, int y);
+void vmw_du_connector_dpms(struct drm_connector *connector, int mode);
+void vmw_du_connector_save(struct drm_connector *connector);
+void vmw_du_connector_restore(struct drm_connector *connector);
+enum drm_connector_status
+vmw_du_connector_detect(struct drm_connector *connector, bool force);
+int vmw_du_connector_fill_modes(struct drm_connector *connector,
+				uint32_t max_width, uint32_t max_height);
+int vmw_du_connector_set_property(struct drm_connector *connector,
+				  struct drm_property *property,
+				  uint64_t val);
+int vmw_du_update_layout(struct vmw_private *dev_priv, unsigned num,
+			 struct drm_vmw_rect *rects);
 
 /*
  * Legacy display unit functions - vmwgfx_ldu.c
  */
 int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv);
 int vmw_kms_close_legacy_display_system(struct vmw_private *dev_priv);
-int vmw_kms_ldu_update_layout(struct vmw_private *dev_priv, unsigned num,
+
+/*
+ * Screen Objects display functions - vmwgfx_scrn.c
+ */
+int vmw_kms_init_screen_object_display(struct vmw_private *dev_priv);
+int vmw_kms_close_screen_object_display(struct vmw_private *dev_priv);
+int vmw_kms_sou_update_layout(struct vmw_private *dev_priv, unsigned num,
 			      struct drm_vmw_rect *rects);
 
 #endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index 7e1901c4f065..92f56bc594eb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -27,7 +27,6 @@
 
 #include "vmwgfx_kms.h"
 
-#define VMWGFX_LDU_NUM_DU 8
 
 #define vmw_crtc_to_ldu(x) \
 	container_of(x, struct vmw_legacy_display_unit, base.crtc)
@@ -51,11 +50,6 @@ struct vmw_legacy_display {
 struct vmw_legacy_display_unit {
 	struct vmw_display_unit base;
 
-	unsigned pref_width;
-	unsigned pref_height;
-	bool pref_active;
-	struct drm_display_mode *pref_mode;
-
 	struct list_head active;
 };
 
@@ -71,29 +65,6 @@ static void vmw_ldu_destroy(struct vmw_legacy_display_unit *ldu)
  * Legacy Display Unit CRTC functions
  */
 
-static void vmw_ldu_crtc_save(struct drm_crtc *crtc)
-{
-}
-
-static void vmw_ldu_crtc_restore(struct drm_crtc *crtc)
-{
-}
-
-static void vmw_ldu_crtc_gamma_set(struct drm_crtc *crtc,
-				   u16 *r, u16 *g, u16 *b,
-				   uint32_t start, uint32_t size)
-{
-	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
-	int i;
-
-	for (i = 0; i < size; i++) {
-		DRM_DEBUG("%d r/g/b = 0x%04x / 0x%04x / 0x%04x\n", i, r[i], g[i], b[i]);
-		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 0, r[i] >> 8);
-		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 1, g[i] >> 8);
-		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 2, b[i] >> 8);
-	}
-}
-
 static void vmw_ldu_crtc_destroy(struct drm_crtc *crtc)
 {
 	vmw_ldu_destroy(vmw_crtc_to_ldu(crtc));
@@ -301,15 +272,16 @@ static int vmw_ldu_crtc_set_config(struct drm_mode_set *set)
 }
 
 static struct drm_crtc_funcs vmw_legacy_crtc_funcs = {
-	.save = vmw_ldu_crtc_save,
-	.restore = vmw_ldu_crtc_restore,
+	.save = vmw_du_crtc_save,
+	.restore = vmw_du_crtc_restore,
 	.cursor_set = vmw_du_crtc_cursor_set,
 	.cursor_move = vmw_du_crtc_cursor_move,
-	.gamma_set = vmw_ldu_crtc_gamma_set,
+	.gamma_set = vmw_du_crtc_gamma_set,
 	.destroy = vmw_ldu_crtc_destroy,
 	.set_config = vmw_ldu_crtc_set_config,
 };
 
+
 /*
  * Legacy Display Unit encoder functions
  */
@@ -327,190 +299,18 @@ static struct drm_encoder_funcs vmw_legacy_encoder_funcs = {
  * Legacy Display Unit connector functions
  */
 
-static void vmw_ldu_connector_dpms(struct drm_connector *connector, int mode)
-{
-}
-
-static void vmw_ldu_connector_save(struct drm_connector *connector)
-{
-}
-
-static void vmw_ldu_connector_restore(struct drm_connector *connector)
-{
-}
-
-static enum drm_connector_status
-	vmw_ldu_connector_detect(struct drm_connector *connector,
-				 bool force)
-{
-	uint32_t num_displays;
-	struct drm_device *dev = connector->dev;
-	struct vmw_private *dev_priv = vmw_priv(dev);
-
-	mutex_lock(&dev_priv->hw_mutex);
-	num_displays = vmw_read(dev_priv, SVGA_REG_NUM_DISPLAYS);
-	mutex_unlock(&dev_priv->hw_mutex);
-
-	return ((vmw_connector_to_ldu(connector)->base.unit < num_displays) ?
-		connector_status_connected : connector_status_disconnected);
-}
-
-static const struct drm_display_mode vmw_ldu_connector_builtin[] = {
-	/* 640x480@60Hz */
-	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 25175, 640, 656,
-		   752, 800, 0, 480, 489, 492, 525, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
-	/* 800x600@60Hz */
-	{ DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 40000, 800, 840,
-		   968, 1056, 0, 600, 601, 605, 628, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1024x768@60Hz */
-	{ DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 65000, 1024, 1048,
-		   1184, 1344, 0, 768, 771, 777, 806, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
-	/* 1152x864@75Hz */
-	{ DRM_MODE("1152x864", DRM_MODE_TYPE_DRIVER, 108000, 1152, 1216,
-		   1344, 1600, 0, 864, 865, 868, 900, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1280x768@60Hz */
-	{ DRM_MODE("1280x768", DRM_MODE_TYPE_DRIVER, 79500, 1280, 1344,
-		   1472, 1664, 0, 768, 771, 778, 798, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1280x800@60Hz */
-	{ DRM_MODE("1280x800", DRM_MODE_TYPE_DRIVER, 83500, 1280, 1352,
-		   1480, 1680, 0, 800, 803, 809, 831, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
-	/* 1280x960@60Hz */
-	{ DRM_MODE("1280x960", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1376,
-		   1488, 1800, 0, 960, 961, 964, 1000, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1280x1024@60Hz */
-	{ DRM_MODE("1280x1024", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1328,
-		   1440, 1688, 0, 1024, 1025, 1028, 1066, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1360x768@60Hz */
-	{ DRM_MODE("1360x768", DRM_MODE_TYPE_DRIVER, 85500, 1360, 1424,
-		   1536, 1792, 0, 768, 771, 777, 795, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1440x1050@60Hz */
-	{ DRM_MODE("1400x1050", DRM_MODE_TYPE_DRIVER, 121750, 1400, 1488,
-		   1632, 1864, 0, 1050, 1053, 1057, 1089, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1440x900@60Hz */
-	{ DRM_MODE("1440x900", DRM_MODE_TYPE_DRIVER, 106500, 1440, 1520,
-		   1672, 1904, 0, 900, 903, 909, 934, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1600x1200@60Hz */
-	{ DRM_MODE("1600x1200", DRM_MODE_TYPE_DRIVER, 162000, 1600, 1664,
-		   1856, 2160, 0, 1200, 1201, 1204, 1250, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1680x1050@60Hz */
-	{ DRM_MODE("1680x1050", DRM_MODE_TYPE_DRIVER, 146250, 1680, 1784,
-		   1960, 2240, 0, 1050, 1053, 1059, 1089, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1792x1344@60Hz */
-	{ DRM_MODE("1792x1344", DRM_MODE_TYPE_DRIVER, 204750, 1792, 1920,
-		   2120, 2448, 0, 1344, 1345, 1348, 1394, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1853x1392@60Hz */
-	{ DRM_MODE("1856x1392", DRM_MODE_TYPE_DRIVER, 218250, 1856, 1952,
-		   2176, 2528, 0, 1392, 1393, 1396, 1439, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1920x1200@60Hz */
-	{ DRM_MODE("1920x1200", DRM_MODE_TYPE_DRIVER, 193250, 1920, 2056,
-		   2256, 2592, 0, 1200, 1203, 1209, 1245, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1920x1440@60Hz */
-	{ DRM_MODE("1920x1440", DRM_MODE_TYPE_DRIVER, 234000, 1920, 2048,
-		   2256, 2600, 0, 1440, 1441, 1444, 1500, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 2560x1600@60Hz */
-	{ DRM_MODE("2560x1600", DRM_MODE_TYPE_DRIVER, 348500, 2560, 2752,
-		   3032, 3504, 0, 1600, 1603, 1609, 1658, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* Terminate */
-	{ DRM_MODE("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) },
-};
-
-static int vmw_ldu_connector_fill_modes(struct drm_connector *connector,
-					uint32_t max_width, uint32_t max_height)
-{
-	struct vmw_legacy_display_unit *ldu = vmw_connector_to_ldu(connector);
-	struct drm_device *dev = connector->dev;
-	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct drm_display_mode *mode = NULL;
-	struct drm_display_mode prefmode = { DRM_MODE("preferred",
-		DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC)
-	};
-	int i;
-
-	/* Add preferred mode */
-	{
-		mode = drm_mode_duplicate(dev, &prefmode);
-		if (!mode)
-			return 0;
-		mode->hdisplay = ldu->pref_width;
-		mode->vdisplay = ldu->pref_height;
-		mode->vrefresh = drm_mode_vrefresh(mode);
-		if (vmw_kms_validate_mode_vram(dev_priv, mode->hdisplay * 2,
-					       mode->vdisplay)) {
-			drm_mode_probed_add(connector, mode);
-
-			if (ldu->pref_mode) {
-				list_del_init(&ldu->pref_mode->head);
-				drm_mode_destroy(dev, ldu->pref_mode);
-			}
-
-			ldu->pref_mode = mode;
-		}
-	}
-
-	for (i = 0; vmw_ldu_connector_builtin[i].type != 0; i++) {
-		const struct drm_display_mode *bmode;
-
-		bmode = &vmw_ldu_connector_builtin[i];
-		if (bmode->hdisplay > max_width ||
-		    bmode->vdisplay > max_height)
-			continue;
-
-		if (!vmw_kms_validate_mode_vram(dev_priv, bmode->hdisplay * 2,
-						bmode->vdisplay))
-			continue;
-
-		mode = drm_mode_duplicate(dev, bmode);
-		if (!mode)
-			return 0;
-		mode->vrefresh = drm_mode_vrefresh(mode);
-
-		drm_mode_probed_add(connector, mode);
-	}
-
-	drm_mode_connector_list_update(connector);
-
-	return 1;
-}
-
-static int vmw_ldu_connector_set_property(struct drm_connector *connector,
-					  struct drm_property *property,
-					  uint64_t val)
-{
-	return 0;
-}
-
 static void vmw_ldu_connector_destroy(struct drm_connector *connector)
 {
 	vmw_ldu_destroy(vmw_connector_to_ldu(connector));
 }
 
 static struct drm_connector_funcs vmw_legacy_connector_funcs = {
-	.dpms = vmw_ldu_connector_dpms,
-	.save = vmw_ldu_connector_save,
-	.restore = vmw_ldu_connector_restore,
-	.detect = vmw_ldu_connector_detect,
-	.fill_modes = vmw_ldu_connector_fill_modes,
-	.set_property = vmw_ldu_connector_set_property,
+	.dpms = vmw_du_connector_dpms,
+	.save = vmw_du_connector_save,
+	.restore = vmw_du_connector_restore,
+	.detect = vmw_du_connector_detect,
+	.fill_modes = vmw_du_connector_fill_modes,
+	.set_property = vmw_du_connector_set_property,
 	.destroy = vmw_ldu_connector_destroy,
 };
 
@@ -533,14 +333,14 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit)
 
 	INIT_LIST_HEAD(&ldu->active);
 
-	ldu->pref_active = (unit == 0);
-	ldu->pref_width = 800;
-	ldu->pref_height = 600;
-	ldu->pref_mode = NULL;
+	ldu->base.pref_active = (unit == 0);
+	ldu->base.pref_width = 800;
+	ldu->base.pref_height = 600;
+	ldu->base.pref_mode = NULL;
 
 	drm_connector_init(dev, connector, &vmw_legacy_connector_funcs,
 			   DRM_MODE_CONNECTOR_LVDS);
-	connector->status = vmw_ldu_connector_detect(connector, true);
+	connector->status = vmw_du_connector_detect(connector, true);
 
 	drm_encoder_init(dev, encoder, &vmw_legacy_encoder_funcs,
 			 DRM_MODE_ENCODER_LVDS);
@@ -562,8 +362,7 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit)
 int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
-	int i;
-	int ret;
+	int i, ret;
 
 	if (dev_priv->ldu_priv) {
 		DRM_INFO("ldu system already on\n");
@@ -571,7 +370,6 @@ int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv)
 	}
 
 	dev_priv->ldu_priv = kmalloc(sizeof(*dev_priv->ldu_priv), GFP_KERNEL);
-
 	if (!dev_priv->ldu_priv)
 		return -ENOMEM;
 
@@ -580,18 +378,31 @@ int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv)
 	dev_priv->ldu_priv->last_num_active = 0;
 	dev_priv->ldu_priv->fb = NULL;
 
-	drm_mode_create_dirty_info_property(dev_priv->dev);
+	/* for old hardware without multimon only enable one display */
+	if (dev_priv->capabilities & SVGA_CAP_MULTIMON)
+		ret = drm_vblank_init(dev, VMWGFX_NUM_DISPLAY_UNITS);
+	else
+		ret = drm_vblank_init(dev, 1);
+	if (ret != 0)
+		goto err_free;
+
+	ret = drm_mode_create_dirty_info_property(dev);
+	if (ret != 0)
+		goto err_vblank_cleanup;
 
-	if (dev_priv->capabilities & SVGA_CAP_MULTIMON) {
-		for (i = 0; i < VMWGFX_LDU_NUM_DU; ++i)
+	if (dev_priv->capabilities & SVGA_CAP_MULTIMON)
+		for (i = 0; i < VMWGFX_NUM_DISPLAY_UNITS; ++i)
 			vmw_ldu_init(dev_priv, i);
-		ret = drm_vblank_init(dev, VMWGFX_LDU_NUM_DU);
-	} else {
-		/* for old hardware without multimon only enable one display */
+	else
 		vmw_ldu_init(dev_priv, 0);
-		ret = drm_vblank_init(dev, 1);
-	}
 
+	return 0;
+
+err_vblank_cleanup:
+	drm_vblank_cleanup(dev);
+err_free:
+	kfree(dev_priv->ldu_priv);
+	dev_priv->ldu_priv = NULL;
 	return ret;
 }
 
@@ -599,52 +410,14 @@ int vmw_kms_close_legacy_display_system(struct vmw_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
 
-	drm_vblank_cleanup(dev);
 	if (!dev_priv->ldu_priv)
 		return -ENOSYS;
 
+	drm_vblank_cleanup(dev);
+
 	BUG_ON(!list_empty(&dev_priv->ldu_priv->active));
 
 	kfree(dev_priv->ldu_priv);
 
 	return 0;
 }
-
-int vmw_kms_ldu_update_layout(struct vmw_private *dev_priv, unsigned num,
-			      struct drm_vmw_rect *rects)
-{
-	struct drm_device *dev = dev_priv->dev;
-	struct vmw_legacy_display_unit *ldu;
-	struct drm_connector *con;
-	int i;
-
-	mutex_lock(&dev->mode_config.mutex);
-
-#if 0
-	DRM_INFO("%s: new layout ", __func__);
-	for (i = 0; i < (int)num; i++)
-		DRM_INFO("(%i, %i %ux%u) ", rects[i].x, rects[i].y,
-			 rects[i].w, rects[i].h);
-	DRM_INFO("\n");
-#else
-	(void)i;
-#endif
-
-	list_for_each_entry(con, &dev->mode_config.connector_list, head) {
-		ldu = vmw_connector_to_ldu(con);
-		if (num > ldu->base.unit) {
-			ldu->pref_width = rects[ldu->base.unit].w;
-			ldu->pref_height = rects[ldu->base.unit].h;
-			ldu->pref_active = true;
-		} else {
-			ldu->pref_width = 800;
-			ldu->pref_height = 600;
-			ldu->pref_active = false;
-		}
-		con->status = vmw_ldu_connector_detect(con, true);
-	}
-
-	mutex_unlock(&dev->mode_config.mutex);
-
-	return 0;
-}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
index 07ce02da78a4..14399eec9c3c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
@@ -87,48 +87,6 @@ static inline void fill_flush(struct vmw_escape_video_flush *cmd,
 }
 
 /**
- * Pin or unpin a buffer in vram.
- *
- * @dev_priv:  Driver private.
- * @buf:  DMA buffer to pin or unpin.
- * @pin:  Pin buffer in vram if true.
- * @interruptible:  Use interruptible wait.
- *
- * Takes the current masters ttm lock in read.
- *
- * Returns
- * -ERESTARTSYS if interrupted by a signal.
- */
-static int vmw_dmabuf_pin_in_vram(struct vmw_private *dev_priv,
-				  struct vmw_dma_buffer *buf,
-				  bool pin, bool interruptible)
-{
-	struct ttm_buffer_object *bo = &buf->base;
-	struct ttm_placement *overlay_placement = &vmw_vram_placement;
-	int ret;
-
-	ret = ttm_read_lock(&dev_priv->active_master->lock, interruptible);
-	if (unlikely(ret != 0))
-		return ret;
-
-	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
-	if (unlikely(ret != 0))
-		goto err;
-
-	if (pin)
-		overlay_placement = &vmw_vram_ne_placement;
-
-	ret = ttm_bo_validate(bo, overlay_placement, interruptible, false, false);
-
-	ttm_bo_unreserve(bo);
-
-err:
-	ttm_read_unlock(&dev_priv->active_master->lock);
-
-	return ret;
-}
-
-/**
  * Send put command to hw.
  *
  * Returns
@@ -139,68 +97,80 @@ static int vmw_overlay_send_put(struct vmw_private *dev_priv,
 				struct drm_vmw_control_stream_arg *arg,
 				bool interruptible)
 {
+	struct vmw_escape_video_flush *flush;
+	size_t fifo_size;
+	bool have_so = dev_priv->sou_priv ? true : false;
+	int i, num_items;
+	SVGAGuestPtr ptr;
+
 	struct {
 		struct vmw_escape_header escape;
 		struct {
-			struct {
-				uint32_t cmdType;
-				uint32_t streamId;
-			} header;
-			struct {
-				uint32_t registerId;
-				uint32_t value;
-			} items[SVGA_VIDEO_PITCH_3 + 1];
-		} body;
-		struct vmw_escape_video_flush flush;
+			uint32_t cmdType;
+			uint32_t streamId;
+		} header;
 	} *cmds;
-	uint32_t offset;
-	int i, ret;
+	struct {
+		uint32_t registerId;
+		uint32_t value;
+	} *items;
 
-	for (;;) {
-		cmds = vmw_fifo_reserve(dev_priv, sizeof(*cmds));
-		if (cmds)
-			break;
+	/* defines are a index needs + 1 */
+	if (have_so)
+		num_items = SVGA_VIDEO_DST_SCREEN_ID + 1;
+	else
+		num_items = SVGA_VIDEO_PITCH_3 + 1;
 
-		ret = vmw_fallback_wait(dev_priv, false, true, 0,
-					interruptible, 3*HZ);
-		if (interruptible && ret == -ERESTARTSYS)
-			return ret;
-		else
-			BUG_ON(ret != 0);
+	fifo_size = sizeof(*cmds) + sizeof(*flush) + sizeof(*items) * num_items;
+
+	cmds = vmw_fifo_reserve(dev_priv, fifo_size);
+	/* hardware has hung, can't do anything here */
+	if (!cmds)
+		return -ENOMEM;
+
+	items = (typeof(items))&cmds[1];
+	flush = (struct vmw_escape_video_flush *)&items[num_items];
+
+	/* the size is header + number of items */
+	fill_escape(&cmds->escape, sizeof(*items) * (num_items + 1));
+
+	cmds->header.cmdType = SVGA_ESCAPE_VMWARE_VIDEO_SET_REGS;
+	cmds->header.streamId = arg->stream_id;
+
+	/* the IDs are neatly numbered */
+	for (i = 0; i < num_items; i++)
+		items[i].registerId = i;
+
+	vmw_bo_get_guest_ptr(&buf->base, &ptr);
+	ptr.offset += arg->offset;
+
+	items[SVGA_VIDEO_ENABLED].value     = true;
+	items[SVGA_VIDEO_FLAGS].value       = arg->flags;
+	items[SVGA_VIDEO_DATA_OFFSET].value = ptr.offset;
+	items[SVGA_VIDEO_FORMAT].value      = arg->format;
+	items[SVGA_VIDEO_COLORKEY].value    = arg->color_key;
+	items[SVGA_VIDEO_SIZE].value        = arg->size;
+	items[SVGA_VIDEO_WIDTH].value       = arg->width;
+	items[SVGA_VIDEO_HEIGHT].value      = arg->height;
+	items[SVGA_VIDEO_SRC_X].value       = arg->src.x;
+	items[SVGA_VIDEO_SRC_Y].value       = arg->src.y;
+	items[SVGA_VIDEO_SRC_WIDTH].value   = arg->src.w;
+	items[SVGA_VIDEO_SRC_HEIGHT].value  = arg->src.h;
+	items[SVGA_VIDEO_DST_X].value       = arg->dst.x;
+	items[SVGA_VIDEO_DST_Y].value       = arg->dst.y;
+	items[SVGA_VIDEO_DST_WIDTH].value   = arg->dst.w;
+	items[SVGA_VIDEO_DST_HEIGHT].value  = arg->dst.h;
+	items[SVGA_VIDEO_PITCH_1].value     = arg->pitch[0];
+	items[SVGA_VIDEO_PITCH_2].value     = arg->pitch[1];
+	items[SVGA_VIDEO_PITCH_3].value     = arg->pitch[2];
+	if (have_so) {
+		items[SVGA_VIDEO_DATA_GMRID].value    = ptr.gmrId;
+		items[SVGA_VIDEO_DST_SCREEN_ID].value = SVGA_ID_INVALID;
 	}
 
-	fill_escape(&cmds->escape, sizeof(cmds->body));
-	cmds->body.header.cmdType = SVGA_ESCAPE_VMWARE_VIDEO_SET_REGS;
-	cmds->body.header.streamId = arg->stream_id;
-
-	for (i = 0; i <= SVGA_VIDEO_PITCH_3; i++)
-		cmds->body.items[i].registerId = i;
-
-	offset = buf->base.offset + arg->offset;
-
-	cmds->body.items[SVGA_VIDEO_ENABLED].value     = true;
-	cmds->body.items[SVGA_VIDEO_FLAGS].value       = arg->flags;
-	cmds->body.items[SVGA_VIDEO_DATA_OFFSET].value = offset;
-	cmds->body.items[SVGA_VIDEO_FORMAT].value      = arg->format;
-	cmds->body.items[SVGA_VIDEO_COLORKEY].value    = arg->color_key;
-	cmds->body.items[SVGA_VIDEO_SIZE].value        = arg->size;
-	cmds->body.items[SVGA_VIDEO_WIDTH].value       = arg->width;
-	cmds->body.items[SVGA_VIDEO_HEIGHT].value      = arg->height;
-	cmds->body.items[SVGA_VIDEO_SRC_X].value       = arg->src.x;
-	cmds->body.items[SVGA_VIDEO_SRC_Y].value       = arg->src.y;
-	cmds->body.items[SVGA_VIDEO_SRC_WIDTH].value   = arg->src.w;
-	cmds->body.items[SVGA_VIDEO_SRC_HEIGHT].value  = arg->src.h;
-	cmds->body.items[SVGA_VIDEO_DST_X].value       = arg->dst.x;
-	cmds->body.items[SVGA_VIDEO_DST_Y].value       = arg->dst.y;
-	cmds->body.items[SVGA_VIDEO_DST_WIDTH].value   = arg->dst.w;
-	cmds->body.items[SVGA_VIDEO_DST_HEIGHT].value  = arg->dst.h;
-	cmds->body.items[SVGA_VIDEO_PITCH_1].value     = arg->pitch[0];
-	cmds->body.items[SVGA_VIDEO_PITCH_2].value     = arg->pitch[1];
-	cmds->body.items[SVGA_VIDEO_PITCH_3].value     = arg->pitch[2];
-
-	fill_flush(&cmds->flush, arg->stream_id);
+	fill_flush(flush, arg->stream_id);
 
-	vmw_fifo_commit(dev_priv, sizeof(*cmds));
+	vmw_fifo_commit(dev_priv, fifo_size);
 
 	return 0;
 }
@@ -248,6 +218,25 @@ static int vmw_overlay_send_stop(struct vmw_private *dev_priv,
 }
 
 /**
+ * Move a buffer to vram or gmr if @pin is set, else unpin the buffer.
+ *
+ * With the introduction of screen objects buffers could now be
+ * used with GMRs instead of being locked to vram.
+ */
+static int vmw_overlay_move_buffer(struct vmw_private *dev_priv,
+				   struct vmw_dma_buffer *buf,
+				   bool pin, bool inter)
+{
+	if (!pin)
+		return vmw_dmabuf_unpin(dev_priv, buf, inter);
+
+	if (!dev_priv->sou_priv)
+		return vmw_dmabuf_to_vram(dev_priv, buf, true, inter);
+
+	return vmw_dmabuf_to_vram_or_gmr(dev_priv, buf, true, inter);
+}
+
+/**
  * Stop or pause a stream.
  *
  * If the stream is paused the no evict flag is removed from the buffer
@@ -279,8 +268,8 @@ static int vmw_overlay_stop(struct vmw_private *dev_priv,
 			return ret;
 
 		/* We just remove the NO_EVICT flag so no -ENOMEM */
-		ret = vmw_dmabuf_pin_in_vram(dev_priv, stream->buf, false,
-					     interruptible);
+		ret = vmw_overlay_move_buffer(dev_priv, stream->buf, false,
+					      interruptible);
 		if (interruptible && ret == -ERESTARTSYS)
 			return ret;
 		else
@@ -342,7 +331,7 @@ static int vmw_overlay_update_stream(struct vmw_private *dev_priv,
 	/* We don't start the old stream if we are interrupted.
 	 * Might return -ENOMEM if it can't fit the buffer in vram.
 	 */
-	ret = vmw_dmabuf_pin_in_vram(dev_priv, buf, true, interruptible);
+	ret = vmw_overlay_move_buffer(dev_priv, buf, true, interruptible);
 	if (ret)
 		return ret;
 
@@ -351,7 +340,8 @@ static int vmw_overlay_update_stream(struct vmw_private *dev_priv,
 		/* This one needs to happen no matter what. We only remove
 		 * the NO_EVICT flag so this is safe from -ENOMEM.
 		 */
-		BUG_ON(vmw_dmabuf_pin_in_vram(dev_priv, buf, false, false) != 0);
+		BUG_ON(vmw_overlay_move_buffer(dev_priv, buf, false, false)
+		       != 0);
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index c1b6ffd4ce7b..86c5e4cceb31 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -39,6 +39,7 @@ struct vmw_user_context {
 struct vmw_user_surface {
 	struct ttm_base_object base;
 	struct vmw_surface srf;
+	uint32_t size;
 };
 
 struct vmw_user_dma_buffer {
@@ -61,6 +62,17 @@ struct vmw_user_stream {
 	struct vmw_stream stream;
 };
 
+struct vmw_surface_offset {
+	uint32_t face;
+	uint32_t mip;
+	uint32_t bo_offset;
+};
+
+
+static uint64_t vmw_user_context_size;
+static uint64_t vmw_user_surface_size;
+static uint64_t vmw_user_stream_size;
+
 static inline struct vmw_dma_buffer *
 vmw_dma_buffer(struct ttm_buffer_object *bo)
 {
@@ -80,13 +92,36 @@ struct vmw_resource *vmw_resource_reference(struct vmw_resource *res)
 	return res;
 }
 
+
+/**
+ * vmw_resource_release_id - release a resource id to the id manager.
+ *
+ * @res: Pointer to the resource.
+ *
+ * Release the resource id to the resource id manager and set it to -1
+ */
+static void vmw_resource_release_id(struct vmw_resource *res)
+{
+	struct vmw_private *dev_priv = res->dev_priv;
+
+	write_lock(&dev_priv->resource_lock);
+	if (res->id != -1)
+		idr_remove(res->idr, res->id);
+	res->id = -1;
+	write_unlock(&dev_priv->resource_lock);
+}
+
 static void vmw_resource_release(struct kref *kref)
 {
 	struct vmw_resource *res =
 	    container_of(kref, struct vmw_resource, kref);
 	struct vmw_private *dev_priv = res->dev_priv;
+	int id = res->id;
+	struct idr *idr = res->idr;
 
-	idr_remove(res->idr, res->id);
+	res->avail = false;
+	if (res->remove_from_lists != NULL)
+		res->remove_from_lists(res);
 	write_unlock(&dev_priv->resource_lock);
 
 	if (likely(res->hw_destroy != NULL))
@@ -98,6 +133,9 @@ static void vmw_resource_release(struct kref *kref)
 		kfree(res);
 
 	write_lock(&dev_priv->resource_lock);
+
+	if (id != -1)
+		idr_remove(idr, id);
 }
 
 void vmw_resource_unreference(struct vmw_resource **p_res)
@@ -111,28 +149,29 @@ void vmw_resource_unreference(struct vmw_resource **p_res)
 	write_unlock(&dev_priv->resource_lock);
 }
 
-static int vmw_resource_init(struct vmw_private *dev_priv,
-			     struct vmw_resource *res,
-			     struct idr *idr,
-			     enum ttm_object_type obj_type,
-			     void (*res_free) (struct vmw_resource *res))
+
+/**
+ * vmw_resource_alloc_id - release a resource id to the id manager.
+ *
+ * @dev_priv: Pointer to the device private structure.
+ * @res: Pointer to the resource.
+ *
+ * Allocate the lowest free resource from the resource manager, and set
+ * @res->id to that id. Returns 0 on success and -ENOMEM on failure.
+ */
+static int vmw_resource_alloc_id(struct vmw_private *dev_priv,
+				 struct vmw_resource *res)
 {
 	int ret;
 
-	kref_init(&res->kref);
-	res->hw_destroy = NULL;
-	res->res_free = res_free;
-	res->res_type = obj_type;
-	res->idr = idr;
-	res->avail = false;
-	res->dev_priv = dev_priv;
+	BUG_ON(res->id != -1);
 
 	do {
-		if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0))
+		if (unlikely(idr_pre_get(res->idr, GFP_KERNEL) == 0))
 			return -ENOMEM;
 
 		write_lock(&dev_priv->resource_lock);
-		ret = idr_get_new_above(idr, res, 1, &res->id);
+		ret = idr_get_new_above(res->idr, res, 1, &res->id);
 		write_unlock(&dev_priv->resource_lock);
 
 	} while (ret == -EAGAIN);
@@ -140,6 +179,33 @@ static int vmw_resource_init(struct vmw_private *dev_priv,
 	return ret;
 }
 
+
+static int vmw_resource_init(struct vmw_private *dev_priv,
+			     struct vmw_resource *res,
+			     struct idr *idr,
+			     enum ttm_object_type obj_type,
+			     bool delay_id,
+			     void (*res_free) (struct vmw_resource *res),
+			     void (*remove_from_lists)
+			     (struct vmw_resource *res))
+{
+	kref_init(&res->kref);
+	res->hw_destroy = NULL;
+	res->res_free = res_free;
+	res->remove_from_lists = remove_from_lists;
+	res->res_type = obj_type;
+	res->idr = idr;
+	res->avail = false;
+	res->dev_priv = dev_priv;
+	INIT_LIST_HEAD(&res->query_head);
+	INIT_LIST_HEAD(&res->validate_head);
+	res->id = -1;
+	if (delay_id)
+		return 0;
+	else
+		return vmw_resource_alloc_id(dev_priv, res);
+}
+
 /**
  * vmw_resource_activate
  *
@@ -194,8 +260,12 @@ static void vmw_hw_context_destroy(struct vmw_resource *res)
 	struct {
 		SVGA3dCmdHeader header;
 		SVGA3dCmdDestroyContext body;
-	} *cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
+	} *cmd;
+
 
+	vmw_execbuf_release_pinned_bo(dev_priv, true, res->id);
+
+	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
 	if (unlikely(cmd == NULL)) {
 		DRM_ERROR("Failed reserving FIFO space for surface "
 			  "destruction.\n");
@@ -222,14 +292,17 @@ static int vmw_context_init(struct vmw_private *dev_priv,
 	} *cmd;
 
 	ret = vmw_resource_init(dev_priv, res, &dev_priv->context_idr,
-				VMW_RES_CONTEXT, res_free);
+				VMW_RES_CONTEXT, false, res_free, NULL);
 
 	if (unlikely(ret != 0)) {
-		if (res_free == NULL)
-			kfree(res);
-		else
-			res_free(res);
-		return ret;
+		DRM_ERROR("Failed to allocate a resource id.\n");
+		goto out_early;
+	}
+
+	if (unlikely(res->id >= SVGA3D_MAX_CONTEXT_IDS)) {
+		DRM_ERROR("Out of hw context ids.\n");
+		vmw_resource_unreference(&res);
+		return -ENOMEM;
 	}
 
 	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
@@ -247,6 +320,13 @@ static int vmw_context_init(struct vmw_private *dev_priv,
 	(void) vmw_3d_resource_inc(dev_priv, false);
 	vmw_resource_activate(res, vmw_hw_context_destroy);
 	return 0;
+
+out_early:
+	if (res_free == NULL)
+		kfree(res);
+	else
+		res_free(res);
+	return ret;
 }
 
 struct vmw_resource *vmw_context_alloc(struct vmw_private *dev_priv)
@@ -269,8 +349,11 @@ static void vmw_user_context_free(struct vmw_resource *res)
 {
 	struct vmw_user_context *ctx =
 	    container_of(res, struct vmw_user_context, res);
+	struct vmw_private *dev_priv = res->dev_priv;
 
 	kfree(ctx);
+	ttm_mem_global_free(vmw_mem_glob(dev_priv),
+			    vmw_user_context_size);
 }
 
 /**
@@ -324,23 +407,56 @@ int vmw_context_define_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_user_context *ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	struct vmw_user_context *ctx;
 	struct vmw_resource *res;
 	struct vmw_resource *tmp;
 	struct drm_vmw_context_arg *arg = (struct drm_vmw_context_arg *)data;
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
 	int ret;
 
-	if (unlikely(ctx == NULL))
-		return -ENOMEM;
+
+	/*
+	 * Approximate idr memory usage with 128 bytes. It will be limited
+	 * by maximum number_of contexts anyway.
+	 */
+
+	if (unlikely(vmw_user_context_size == 0))
+		vmw_user_context_size = ttm_round_pot(sizeof(*ctx)) + 128;
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
+				   vmw_user_context_size,
+				   false, true);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Out of graphics memory for context"
+				  " creation.\n");
+		goto out_unlock;
+	}
+
+	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	if (unlikely(ctx == NULL)) {
+		ttm_mem_global_free(vmw_mem_glob(dev_priv),
+				    vmw_user_context_size);
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
 
 	res = &ctx->res;
 	ctx->base.shareable = false;
 	ctx->base.tfile = NULL;
 
+	/*
+	 * From here on, the destructor takes over resource freeing.
+	 */
+
 	ret = vmw_context_init(dev_priv, res, vmw_user_context_free);
 	if (unlikely(ret != 0))
-		return ret;
+		goto out_unlock;
 
 	tmp = vmw_resource_reference(&ctx->res);
 	ret = ttm_base_object_init(tfile, &ctx->base, false, VMW_RES_CONTEXT,
@@ -354,6 +470,8 @@ int vmw_context_define_ioctl(struct drm_device *dev, void *data,
 	arg->cid = res->id;
 out_err:
 	vmw_resource_unreference(&res);
+out_unlock:
+	ttm_read_unlock(&vmaster->lock);
 	return ret;
 
 }
@@ -382,31 +500,285 @@ int vmw_context_check(struct vmw_private *dev_priv,
 	return ret;
 }
 
+struct vmw_bpp {
+	uint8_t bpp;
+	uint8_t s_bpp;
+};
+
+/*
+ * Size table for the supported SVGA3D surface formats. It consists of
+ * two values. The bpp value and the s_bpp value which is short for
+ * "stride bits per pixel" The values are given in such a way that the
+ * minimum stride for the image is calculated using
+ *
+ * min_stride = w*s_bpp
+ *
+ * and the total memory requirement for the image is
+ *
+ * h*min_stride*bpp/s_bpp
+ *
+ */
+static const struct vmw_bpp vmw_sf_bpp[] = {
+	[SVGA3D_FORMAT_INVALID] = {0, 0},
+	[SVGA3D_X8R8G8B8] = {32, 32},
+	[SVGA3D_A8R8G8B8] = {32, 32},
+	[SVGA3D_R5G6B5] = {16, 16},
+	[SVGA3D_X1R5G5B5] = {16, 16},
+	[SVGA3D_A1R5G5B5] = {16, 16},
+	[SVGA3D_A4R4G4B4] = {16, 16},
+	[SVGA3D_Z_D32] = {32, 32},
+	[SVGA3D_Z_D16] = {16, 16},
+	[SVGA3D_Z_D24S8] = {32, 32},
+	[SVGA3D_Z_D15S1] = {16, 16},
+	[SVGA3D_LUMINANCE8] = {8, 8},
+	[SVGA3D_LUMINANCE4_ALPHA4] = {8, 8},
+	[SVGA3D_LUMINANCE16] = {16, 16},
+	[SVGA3D_LUMINANCE8_ALPHA8] = {16, 16},
+	[SVGA3D_DXT1] = {4, 16},
+	[SVGA3D_DXT2] = {8, 32},
+	[SVGA3D_DXT3] = {8, 32},
+	[SVGA3D_DXT4] = {8, 32},
+	[SVGA3D_DXT5] = {8, 32},
+	[SVGA3D_BUMPU8V8] = {16, 16},
+	[SVGA3D_BUMPL6V5U5] = {16, 16},
+	[SVGA3D_BUMPX8L8V8U8] = {32, 32},
+	[SVGA3D_ARGB_S10E5] = {16, 16},
+	[SVGA3D_ARGB_S23E8] = {32, 32},
+	[SVGA3D_A2R10G10B10] = {32, 32},
+	[SVGA3D_V8U8] = {16, 16},
+	[SVGA3D_Q8W8V8U8] = {32, 32},
+	[SVGA3D_CxV8U8] = {16, 16},
+	[SVGA3D_X8L8V8U8] = {32, 32},
+	[SVGA3D_A2W10V10U10] = {32, 32},
+	[SVGA3D_ALPHA8] = {8, 8},
+	[SVGA3D_R_S10E5] = {16, 16},
+	[SVGA3D_R_S23E8] = {32, 32},
+	[SVGA3D_RG_S10E5] = {16, 16},
+	[SVGA3D_RG_S23E8] = {32, 32},
+	[SVGA3D_BUFFER] = {8, 8},
+	[SVGA3D_Z_D24X8] = {32, 32},
+	[SVGA3D_V16U16] = {32, 32},
+	[SVGA3D_G16R16] = {32, 32},
+	[SVGA3D_A16B16G16R16] = {64,  64},
+	[SVGA3D_UYVY] = {12, 12},
+	[SVGA3D_YUY2] = {12, 12},
+	[SVGA3D_NV12] = {12, 8},
+	[SVGA3D_AYUV] = {32, 32},
+	[SVGA3D_BC4_UNORM] = {4,  16},
+	[SVGA3D_BC5_UNORM] = {8,  32},
+	[SVGA3D_Z_DF16] = {16,  16},
+	[SVGA3D_Z_DF24] = {24,  24},
+	[SVGA3D_Z_D24S8_INT] = {32,  32}
+};
+
 
 /**
  * Surface management.
  */
 
+struct vmw_surface_dma {
+	SVGA3dCmdHeader header;
+	SVGA3dCmdSurfaceDMA body;
+	SVGA3dCopyBox cb;
+	SVGA3dCmdSurfaceDMASuffix suffix;
+};
+
+struct vmw_surface_define {
+	SVGA3dCmdHeader header;
+	SVGA3dCmdDefineSurface body;
+};
+
+struct vmw_surface_destroy {
+	SVGA3dCmdHeader header;
+	SVGA3dCmdDestroySurface body;
+};
+
+
+/**
+ * vmw_surface_dma_size - Compute fifo size for a dma command.
+ *
+ * @srf: Pointer to a struct vmw_surface
+ *
+ * Computes the required size for a surface dma command for backup or
+ * restoration of the surface represented by @srf.
+ */
+static inline uint32_t vmw_surface_dma_size(const struct vmw_surface *srf)
+{
+	return srf->num_sizes * sizeof(struct vmw_surface_dma);
+}
+
+
+/**
+ * vmw_surface_define_size - Compute fifo size for a surface define command.
+ *
+ * @srf: Pointer to a struct vmw_surface
+ *
+ * Computes the required size for a surface define command for the definition
+ * of the surface represented by @srf.
+ */
+static inline uint32_t vmw_surface_define_size(const struct vmw_surface *srf)
+{
+	return sizeof(struct vmw_surface_define) + srf->num_sizes *
+		sizeof(SVGA3dSize);
+}
+
+
+/**
+ * vmw_surface_destroy_size - Compute fifo size for a surface destroy command.
+ *
+ * Computes the required size for a surface destroy command for the destruction
+ * of a hw surface.
+ */
+static inline uint32_t vmw_surface_destroy_size(void)
+{
+	return sizeof(struct vmw_surface_destroy);
+}
+
+/**
+ * vmw_surface_destroy_encode - Encode a surface_destroy command.
+ *
+ * @id: The surface id
+ * @cmd_space: Pointer to memory area in which the commands should be encoded.
+ */
+static void vmw_surface_destroy_encode(uint32_t id,
+				       void *cmd_space)
+{
+	struct vmw_surface_destroy *cmd = (struct vmw_surface_destroy *)
+		cmd_space;
+
+	cmd->header.id = SVGA_3D_CMD_SURFACE_DESTROY;
+	cmd->header.size = sizeof(cmd->body);
+	cmd->body.sid = id;
+}
+
+/**
+ * vmw_surface_define_encode - Encode a surface_define command.
+ *
+ * @srf: Pointer to a struct vmw_surface object.
+ * @cmd_space: Pointer to memory area in which the commands should be encoded.
+ */
+static void vmw_surface_define_encode(const struct vmw_surface *srf,
+				      void *cmd_space)
+{
+	struct vmw_surface_define *cmd = (struct vmw_surface_define *)
+		cmd_space;
+	struct drm_vmw_size *src_size;
+	SVGA3dSize *cmd_size;
+	uint32_t cmd_len;
+	int i;
+
+	cmd_len = sizeof(cmd->body) + srf->num_sizes * sizeof(SVGA3dSize);
+
+	cmd->header.id = SVGA_3D_CMD_SURFACE_DEFINE;
+	cmd->header.size = cmd_len;
+	cmd->body.sid = srf->res.id;
+	cmd->body.surfaceFlags = srf->flags;
+	cmd->body.format = cpu_to_le32(srf->format);
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
+		cmd->body.face[i].numMipLevels = srf->mip_levels[i];
+
+	cmd += 1;
+	cmd_size = (SVGA3dSize *) cmd;
+	src_size = srf->sizes;
+
+	for (i = 0; i < srf->num_sizes; ++i, cmd_size++, src_size++) {
+		cmd_size->width = src_size->width;
+		cmd_size->height = src_size->height;
+		cmd_size->depth = src_size->depth;
+	}
+}
+
+
+/**
+ * vmw_surface_dma_encode - Encode a surface_dma command.
+ *
+ * @srf: Pointer to a struct vmw_surface object.
+ * @cmd_space: Pointer to memory area in which the commands should be encoded.
+ * @ptr: Pointer to an SVGAGuestPtr indicating where the surface contents
+ * should be placed or read from.
+ * @to_surface: Boolean whether to DMA to the surface or from the surface.
+ */
+static void vmw_surface_dma_encode(struct vmw_surface *srf,
+				   void *cmd_space,
+				   const SVGAGuestPtr *ptr,
+				   bool to_surface)
+{
+	uint32_t i;
+	uint32_t bpp = vmw_sf_bpp[srf->format].bpp;
+	uint32_t stride_bpp = vmw_sf_bpp[srf->format].s_bpp;
+	struct vmw_surface_dma *cmd = (struct vmw_surface_dma *)cmd_space;
+
+	for (i = 0; i < srf->num_sizes; ++i) {
+		SVGA3dCmdHeader *header = &cmd->header;
+		SVGA3dCmdSurfaceDMA *body = &cmd->body;
+		SVGA3dCopyBox *cb = &cmd->cb;
+		SVGA3dCmdSurfaceDMASuffix *suffix = &cmd->suffix;
+		const struct vmw_surface_offset *cur_offset = &srf->offsets[i];
+		const struct drm_vmw_size *cur_size = &srf->sizes[i];
+
+		header->id = SVGA_3D_CMD_SURFACE_DMA;
+		header->size = sizeof(*body) + sizeof(*cb) + sizeof(*suffix);
+
+		body->guest.ptr = *ptr;
+		body->guest.ptr.offset += cur_offset->bo_offset;
+		body->guest.pitch = (cur_size->width * stride_bpp + 7) >> 3;
+		body->host.sid = srf->res.id;
+		body->host.face = cur_offset->face;
+		body->host.mipmap = cur_offset->mip;
+		body->transfer = ((to_surface) ?  SVGA3D_WRITE_HOST_VRAM :
+				  SVGA3D_READ_HOST_VRAM);
+		cb->x = 0;
+		cb->y = 0;
+		cb->z = 0;
+		cb->srcx = 0;
+		cb->srcy = 0;
+		cb->srcz = 0;
+		cb->w = cur_size->width;
+		cb->h = cur_size->height;
+		cb->d = cur_size->depth;
+
+		suffix->suffixSize = sizeof(*suffix);
+		suffix->maximumOffset = body->guest.pitch*cur_size->height*
+			cur_size->depth*bpp / stride_bpp;
+		suffix->flags.discard = 0;
+		suffix->flags.unsynchronized = 0;
+		suffix->flags.reserved = 0;
+		++cmd;
+	}
+};
+
+
 static void vmw_hw_surface_destroy(struct vmw_resource *res)
 {
 
 	struct vmw_private *dev_priv = res->dev_priv;
-	struct {
-		SVGA3dCmdHeader header;
-		SVGA3dCmdDestroySurface body;
-	} *cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
+	struct vmw_surface *srf;
+	void *cmd;
 
-	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Failed reserving FIFO space for surface "
-			  "destruction.\n");
-		return;
-	}
+	if (res->id != -1) {
 
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_SURFACE_DESTROY);
-	cmd->header.size = cpu_to_le32(sizeof(cmd->body));
-	cmd->body.sid = cpu_to_le32(res->id);
+		cmd = vmw_fifo_reserve(dev_priv, vmw_surface_destroy_size());
+		if (unlikely(cmd == NULL)) {
+			DRM_ERROR("Failed reserving FIFO space for surface "
+				  "destruction.\n");
+			return;
+		}
 
-	vmw_fifo_commit(dev_priv, sizeof(*cmd));
+		vmw_surface_destroy_encode(res->id, cmd);
+		vmw_fifo_commit(dev_priv, vmw_surface_destroy_size());
+
+		/*
+		 * used_memory_size_atomic, or separate lock
+		 * to avoid taking dev_priv::cmdbuf_mutex in
+		 * the destroy path.
+		 */
+
+		mutex_lock(&dev_priv->cmdbuf_mutex);
+		srf = container_of(res, struct vmw_surface, res);
+		dev_priv->used_memory_size -= srf->backup_size;
+		mutex_unlock(&dev_priv->cmdbuf_mutex);
+
+	}
 	vmw_3d_resource_dec(dev_priv, false);
 }
 
@@ -414,70 +786,352 @@ void vmw_surface_res_free(struct vmw_resource *res)
 {
 	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
 
+	if (srf->backup)
+		ttm_bo_unref(&srf->backup);
+	kfree(srf->offsets);
 	kfree(srf->sizes);
 	kfree(srf->snooper.image);
 	kfree(srf);
 }
 
-int vmw_surface_init(struct vmw_private *dev_priv,
-		     struct vmw_surface *srf,
-		     void (*res_free) (struct vmw_resource *res))
+
+/**
+ * vmw_surface_do_validate - make a surface available to the device.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @srf: Pointer to a struct vmw_surface.
+ *
+ * If the surface doesn't have a hw id, allocate one, and optionally
+ * DMA the backed up surface contents to the device.
+ *
+ * Returns -EBUSY if there wasn't sufficient device resources to
+ * complete the validation. Retry after freeing up resources.
+ *
+ * May return other errors if the kernel is out of guest resources.
+ */
+int vmw_surface_do_validate(struct vmw_private *dev_priv,
+			    struct vmw_surface *srf)
 {
-	int ret;
-	struct {
-		SVGA3dCmdHeader header;
-		SVGA3dCmdDefineSurface body;
-	} *cmd;
-	SVGA3dSize *cmd_size;
 	struct vmw_resource *res = &srf->res;
-	struct drm_vmw_size *src_size;
-	size_t submit_size;
-	uint32_t cmd_len;
-	int i;
+	struct list_head val_list;
+	struct ttm_validate_buffer val_buf;
+	uint32_t submit_size;
+	uint8_t *cmd;
+	int ret;
 
-	BUG_ON(res_free == NULL);
-	ret = vmw_resource_init(dev_priv, res, &dev_priv->surface_idr,
-				VMW_RES_SURFACE, res_free);
+	if (likely(res->id != -1))
+		return 0;
+
+	if (unlikely(dev_priv->used_memory_size + srf->backup_size >=
+		     dev_priv->memory_size))
+		return -EBUSY;
+
+	/*
+	 * Reserve- and validate the backup DMA bo.
+	 */
+
+	if (srf->backup) {
+		INIT_LIST_HEAD(&val_list);
+		val_buf.bo = ttm_bo_reference(srf->backup);
+		val_buf.new_sync_obj_arg = (void *)((unsigned long)
+						    DRM_VMW_FENCE_FLAG_EXEC);
+		list_add_tail(&val_buf.head, &val_list);
+		ret = ttm_eu_reserve_buffers(&val_list);
+		if (unlikely(ret != 0))
+			goto out_no_reserve;
+
+		ret = ttm_bo_validate(srf->backup, &vmw_srf_placement,
+				      true, false, false);
+		if (unlikely(ret != 0))
+			goto out_no_validate;
+	}
+
+	/*
+	 * Alloc id for the resource.
+	 */
 
+	ret = vmw_resource_alloc_id(dev_priv, res);
 	if (unlikely(ret != 0)) {
-		res_free(res);
-		return ret;
+		DRM_ERROR("Failed to allocate a surface id.\n");
+		goto out_no_id;
+	}
+	if (unlikely(res->id >= SVGA3D_MAX_SURFACE_IDS)) {
+		ret = -EBUSY;
+		goto out_no_fifo;
 	}
 
-	submit_size = sizeof(*cmd) + srf->num_sizes * sizeof(SVGA3dSize);
-	cmd_len = sizeof(cmd->body) + srf->num_sizes * sizeof(SVGA3dSize);
+
+	/*
+	 * Encode surface define- and dma commands.
+	 */
+
+	submit_size = vmw_surface_define_size(srf);
+	if (srf->backup)
+		submit_size += vmw_surface_dma_size(srf);
 
 	cmd = vmw_fifo_reserve(dev_priv, submit_size);
 	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Fifo reserve failed for create surface.\n");
-		vmw_resource_unreference(&res);
-		return -ENOMEM;
+		DRM_ERROR("Failed reserving FIFO space for surface "
+			  "validation.\n");
+		ret = -ENOMEM;
+		goto out_no_fifo;
 	}
 
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_SURFACE_DEFINE);
-	cmd->header.size = cpu_to_le32(cmd_len);
-	cmd->body.sid = cpu_to_le32(res->id);
-	cmd->body.surfaceFlags = cpu_to_le32(srf->flags);
-	cmd->body.format = cpu_to_le32(srf->format);
-	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
-		cmd->body.face[i].numMipLevels =
-		    cpu_to_le32(srf->mip_levels[i]);
+	vmw_surface_define_encode(srf, cmd);
+	if (srf->backup) {
+		SVGAGuestPtr ptr;
+
+		cmd += vmw_surface_define_size(srf);
+		vmw_bo_get_guest_ptr(srf->backup, &ptr);
+		vmw_surface_dma_encode(srf, cmd, &ptr, true);
 	}
 
-	cmd += 1;
-	cmd_size = (SVGA3dSize *) cmd;
-	src_size = srf->sizes;
+	vmw_fifo_commit(dev_priv, submit_size);
 
-	for (i = 0; i < srf->num_sizes; ++i, cmd_size++, src_size++) {
-		cmd_size->width = cpu_to_le32(src_size->width);
-		cmd_size->height = cpu_to_le32(src_size->height);
-		cmd_size->depth = cpu_to_le32(src_size->depth);
+	/*
+	 * Create a fence object and fence the backup buffer.
+	 */
+
+	if (srf->backup) {
+		struct vmw_fence_obj *fence;
+
+		(void) vmw_execbuf_fence_commands(NULL, dev_priv,
+						  &fence, NULL);
+		ttm_eu_fence_buffer_objects(&val_list, fence);
+		if (likely(fence != NULL))
+			vmw_fence_obj_unreference(&fence);
+		ttm_bo_unref(&val_buf.bo);
+		ttm_bo_unref(&srf->backup);
+	}
+
+	/*
+	 * Surface memory usage accounting.
+	 */
+
+	dev_priv->used_memory_size += srf->backup_size;
+
+	return 0;
+
+out_no_fifo:
+	vmw_resource_release_id(res);
+out_no_id:
+out_no_validate:
+	if (srf->backup)
+		ttm_eu_backoff_reservation(&val_list);
+out_no_reserve:
+	if (srf->backup)
+		ttm_bo_unref(&val_buf.bo);
+	return ret;
+}
+
+/**
+ * vmw_surface_evict - Evict a hw surface.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @srf: Pointer to a struct vmw_surface
+ *
+ * DMA the contents of a hw surface to a backup guest buffer object,
+ * and destroy the hw surface, releasing its id.
+ */
+int vmw_surface_evict(struct vmw_private *dev_priv,
+		      struct vmw_surface *srf)
+{
+	struct vmw_resource *res = &srf->res;
+	struct list_head val_list;
+	struct ttm_validate_buffer val_buf;
+	uint32_t submit_size;
+	uint8_t *cmd;
+	int ret;
+	struct vmw_fence_obj *fence;
+	SVGAGuestPtr ptr;
+
+	BUG_ON(res->id == -1);
+
+	/*
+	 * Create a surface backup buffer object.
+	 */
+
+	if (!srf->backup) {
+		ret = ttm_bo_create(&dev_priv->bdev, srf->backup_size,
+				    ttm_bo_type_device,
+				    &vmw_srf_placement, 0, 0, true,
+				    NULL, &srf->backup);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+
+	/*
+	 * Reserve- and validate the backup DMA bo.
+	 */
+
+	INIT_LIST_HEAD(&val_list);
+	val_buf.bo = ttm_bo_reference(srf->backup);
+	val_buf.new_sync_obj_arg = (void *)(unsigned long)
+		DRM_VMW_FENCE_FLAG_EXEC;
+	list_add_tail(&val_buf.head, &val_list);
+	ret = ttm_eu_reserve_buffers(&val_list);
+	if (unlikely(ret != 0))
+		goto out_no_reserve;
+
+	ret = ttm_bo_validate(srf->backup, &vmw_srf_placement,
+			      true, false, false);
+	if (unlikely(ret != 0))
+		goto out_no_validate;
+
+
+	/*
+	 * Encode the dma- and surface destroy commands.
+	 */
+
+	submit_size = vmw_surface_dma_size(srf) + vmw_surface_destroy_size();
+	cmd = vmw_fifo_reserve(dev_priv, submit_size);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed reserving FIFO space for surface "
+			  "eviction.\n");
+		ret = -ENOMEM;
+		goto out_no_fifo;
 	}
 
+	vmw_bo_get_guest_ptr(srf->backup, &ptr);
+	vmw_surface_dma_encode(srf, cmd, &ptr, false);
+	cmd += vmw_surface_dma_size(srf);
+	vmw_surface_destroy_encode(res->id, cmd);
 	vmw_fifo_commit(dev_priv, submit_size);
+
+	/*
+	 * Surface memory usage accounting.
+	 */
+
+	dev_priv->used_memory_size -= srf->backup_size;
+
+	/*
+	 * Create a fence object and fence the DMA buffer.
+	 */
+
+	(void) vmw_execbuf_fence_commands(NULL, dev_priv,
+					  &fence, NULL);
+	ttm_eu_fence_buffer_objects(&val_list, fence);
+	if (likely(fence != NULL))
+		vmw_fence_obj_unreference(&fence);
+	ttm_bo_unref(&val_buf.bo);
+
+	/*
+	 * Release the surface ID.
+	 */
+
+	vmw_resource_release_id(res);
+
+	return 0;
+
+out_no_fifo:
+out_no_validate:
+	if (srf->backup)
+		ttm_eu_backoff_reservation(&val_list);
+out_no_reserve:
+	ttm_bo_unref(&val_buf.bo);
+	ttm_bo_unref(&srf->backup);
+	return ret;
+}
+
+
+/**
+ * vmw_surface_validate - make a surface available to the device, evicting
+ * other surfaces if needed.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @srf: Pointer to a struct vmw_surface.
+ *
+ * Try to validate a surface and if it fails due to limited device resources,
+ * repeatedly try to evict other surfaces until the request can be
+ * acommodated.
+ *
+ * May return errors if out of resources.
+ */
+int vmw_surface_validate(struct vmw_private *dev_priv,
+			 struct vmw_surface *srf)
+{
+	int ret;
+	struct vmw_surface *evict_srf;
+
+	do {
+		write_lock(&dev_priv->resource_lock);
+		list_del_init(&srf->lru_head);
+		write_unlock(&dev_priv->resource_lock);
+
+		ret = vmw_surface_do_validate(dev_priv, srf);
+		if (likely(ret != -EBUSY))
+			break;
+
+		write_lock(&dev_priv->resource_lock);
+		if (list_empty(&dev_priv->surface_lru)) {
+			DRM_ERROR("Out of device memory for surfaces.\n");
+			ret = -EBUSY;
+			write_unlock(&dev_priv->resource_lock);
+			break;
+		}
+
+		evict_srf = vmw_surface_reference
+			(list_first_entry(&dev_priv->surface_lru,
+					  struct vmw_surface,
+					  lru_head));
+		list_del_init(&evict_srf->lru_head);
+
+		write_unlock(&dev_priv->resource_lock);
+		(void) vmw_surface_evict(dev_priv, evict_srf);
+
+		vmw_surface_unreference(&evict_srf);
+
+	} while (1);
+
+	if (unlikely(ret != 0 && srf->res.id != -1)) {
+		write_lock(&dev_priv->resource_lock);
+		list_add_tail(&srf->lru_head, &dev_priv->surface_lru);
+		write_unlock(&dev_priv->resource_lock);
+	}
+
+	return ret;
+}
+
+
+/**
+ * vmw_surface_remove_from_lists - Remove surface resources from lookup lists
+ *
+ * @res: Pointer to a struct vmw_resource embedded in a struct vmw_surface
+ *
+ * As part of the resource destruction, remove the surface from any
+ * lookup lists.
+ */
+static void vmw_surface_remove_from_lists(struct vmw_resource *res)
+{
+	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
+
+	list_del_init(&srf->lru_head);
+}
+
+int vmw_surface_init(struct vmw_private *dev_priv,
+		     struct vmw_surface *srf,
+		     void (*res_free) (struct vmw_resource *res))
+{
+	int ret;
+	struct vmw_resource *res = &srf->res;
+
+	BUG_ON(res_free == NULL);
+	INIT_LIST_HEAD(&srf->lru_head);
+	ret = vmw_resource_init(dev_priv, res, &dev_priv->surface_idr,
+				VMW_RES_SURFACE, true, res_free,
+				vmw_surface_remove_from_lists);
+
+	if (unlikely(ret != 0))
+		res_free(res);
+
+	/*
+	 * The surface won't be visible to hardware until a
+	 * surface validate.
+	 */
+
 	(void) vmw_3d_resource_inc(dev_priv, false);
 	vmw_resource_activate(res, vmw_hw_surface_destroy);
-	return 0;
+	return ret;
 }
 
 static void vmw_user_surface_free(struct vmw_resource *res)
@@ -485,12 +1139,58 @@ static void vmw_user_surface_free(struct vmw_resource *res)
 	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
 	struct vmw_user_surface *user_srf =
 	    container_of(srf, struct vmw_user_surface, srf);
+	struct vmw_private *dev_priv = srf->res.dev_priv;
+	uint32_t size = user_srf->size;
 
+	if (srf->backup)
+		ttm_bo_unref(&srf->backup);
+	kfree(srf->offsets);
 	kfree(srf->sizes);
 	kfree(srf->snooper.image);
 	kfree(user_srf);
+	ttm_mem_global_free(vmw_mem_glob(dev_priv), size);
+}
+
+/**
+ * vmw_resource_unreserve - unreserve resources previously reserved for
+ * command submission.
+ *
+ * @list_head: list of resources to unreserve.
+ *
+ * Currently only surfaces are considered, and unreserving a surface
+ * means putting it back on the device's surface lru list,
+ * so that it can be evicted if necessary.
+ * This function traverses the resource list and
+ * checks whether resources are surfaces, and in that case puts them back
+ * on the device's surface LRU list.
+ */
+void vmw_resource_unreserve(struct list_head *list)
+{
+	struct vmw_resource *res;
+	struct vmw_surface *srf;
+	rwlock_t *lock = NULL;
+
+	list_for_each_entry(res, list, validate_head) {
+
+		if (res->res_free != &vmw_surface_res_free &&
+		    res->res_free != &vmw_user_surface_free)
+			continue;
+
+		if (unlikely(lock == NULL)) {
+			lock = &res->dev_priv->resource_lock;
+			write_lock(lock);
+		}
+
+		srf = container_of(res, struct vmw_surface, res);
+		list_del_init(&srf->lru_head);
+		list_add_tail(&srf->lru_head, &res->dev_priv->surface_lru);
+	}
+
+	if (lock != NULL)
+		write_unlock(lock);
 }
 
+
 int vmw_user_surface_lookup_handle(struct vmw_private *dev_priv,
 				   struct ttm_object_file *tfile,
 				   uint32_t handle, struct vmw_surface **out)
@@ -555,8 +1255,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_user_surface *user_srf =
-	    kmalloc(sizeof(*user_srf), GFP_KERNEL);
+	struct vmw_user_surface *user_srf;
 	struct vmw_surface *srf;
 	struct vmw_resource *res;
 	struct vmw_resource *tmp;
@@ -567,10 +1266,51 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
 	struct drm_vmw_size __user *user_sizes;
 	int ret;
-	int i;
+	int i, j;
+	uint32_t cur_bo_offset;
+	struct drm_vmw_size *cur_size;
+	struct vmw_surface_offset *cur_offset;
+	uint32_t stride_bpp;
+	uint32_t bpp;
+	uint32_t num_sizes;
+	uint32_t size;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
 
-	if (unlikely(user_srf == NULL))
-		return -ENOMEM;
+	if (unlikely(vmw_user_surface_size == 0))
+		vmw_user_surface_size = ttm_round_pot(sizeof(*user_srf)) +
+			128;
+
+	num_sizes = 0;
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
+		num_sizes += req->mip_levels[i];
+
+	if (num_sizes > DRM_VMW_MAX_SURFACE_FACES *
+	    DRM_VMW_MAX_MIP_LEVELS)
+		return -EINVAL;
+
+	size = vmw_user_surface_size + 128 +
+		ttm_round_pot(num_sizes * sizeof(struct drm_vmw_size)) +
+		ttm_round_pot(num_sizes * sizeof(struct vmw_surface_offset));
+
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
+				   size, false, true);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Out of graphics memory for surface"
+				  " creation.\n");
+		goto out_unlock;
+	}
+
+	user_srf = kmalloc(sizeof(*user_srf), GFP_KERNEL);
+	if (unlikely(user_srf == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_user_srf;
+	}
 
 	srf = &user_srf->srf;
 	res = &srf->res;
@@ -578,21 +1318,22 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	srf->flags = req->flags;
 	srf->format = req->format;
 	srf->scanout = req->scanout;
-	memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels));
-	srf->num_sizes = 0;
-	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
-		srf->num_sizes += srf->mip_levels[i];
+	srf->backup = NULL;
 
-	if (srf->num_sizes > DRM_VMW_MAX_SURFACE_FACES *
-	    DRM_VMW_MAX_MIP_LEVELS) {
-		ret = -EINVAL;
-		goto out_err0;
-	}
+	memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels));
+	srf->num_sizes = num_sizes;
+	user_srf->size = size;
 
 	srf->sizes = kmalloc(srf->num_sizes * sizeof(*srf->sizes), GFP_KERNEL);
 	if (unlikely(srf->sizes == NULL)) {
 		ret = -ENOMEM;
-		goto out_err0;
+		goto out_no_sizes;
+	}
+	srf->offsets = kmalloc(srf->num_sizes * sizeof(*srf->offsets),
+			       GFP_KERNEL);
+	if (unlikely(srf->sizes == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_offsets;
 	}
 
 	user_sizes = (struct drm_vmw_size __user *)(unsigned long)
@@ -602,9 +1343,32 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 			     srf->num_sizes * sizeof(*srf->sizes));
 	if (unlikely(ret != 0)) {
 		ret = -EFAULT;
-		goto out_err1;
+		goto out_no_copy;
 	}
 
+	cur_bo_offset = 0;
+	cur_offset = srf->offsets;
+	cur_size = srf->sizes;
+
+	bpp = vmw_sf_bpp[srf->format].bpp;
+	stride_bpp = vmw_sf_bpp[srf->format].s_bpp;
+
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
+		for (j = 0; j < srf->mip_levels[i]; ++j) {
+			uint32_t stride =
+				(cur_size->width * stride_bpp + 7) >> 3;
+
+			cur_offset->face = i;
+			cur_offset->mip = j;
+			cur_offset->bo_offset = cur_bo_offset;
+			cur_bo_offset += stride * cur_size->height *
+				cur_size->depth * bpp / stride_bpp;
+			++cur_offset;
+			++cur_size;
+		}
+	}
+	srf->backup_size = cur_bo_offset;
+
 	if (srf->scanout &&
 	    srf->num_sizes == 1 &&
 	    srf->sizes[0].width == 64 &&
@@ -616,7 +1380,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 		if (!srf->snooper.image) {
 			DRM_ERROR("Failed to allocate cursor_image\n");
 			ret = -ENOMEM;
-			goto out_err1;
+			goto out_no_copy;
 		}
 	} else {
 		srf->snooper.image = NULL;
@@ -633,7 +1397,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 
 	ret = vmw_surface_init(dev_priv, srf, vmw_user_surface_free);
 	if (unlikely(ret != 0))
-		return ret;
+		goto out_unlock;
 
 	tmp = vmw_resource_reference(&srf->res);
 	ret = ttm_base_object_init(tfile, &user_srf->base,
@@ -643,7 +1407,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	if (unlikely(ret != 0)) {
 		vmw_resource_unreference(&tmp);
 		vmw_resource_unreference(&res);
-		return ret;
+		goto out_unlock;
 	}
 
 	rep->sid = user_srf->base.hash.key;
@@ -651,11 +1415,19 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 		DRM_ERROR("Created bad Surface ID.\n");
 
 	vmw_resource_unreference(&res);
+
+	ttm_read_unlock(&vmaster->lock);
 	return 0;
-out_err1:
+out_no_copy:
+	kfree(srf->offsets);
+out_no_offsets:
 	kfree(srf->sizes);
-out_err0:
+out_no_sizes:
 	kfree(user_srf);
+out_no_user_srf:
+	ttm_mem_global_free(vmw_mem_glob(dev_priv), size);
+out_unlock:
+	ttm_read_unlock(&vmaster->lock);
 	return ret;
 }
 
@@ -969,7 +1741,7 @@ static int vmw_stream_init(struct vmw_private *dev_priv,
 	int ret;
 
 	ret = vmw_resource_init(dev_priv, res, &dev_priv->stream_idr,
-				VMW_RES_STREAM, res_free);
+				VMW_RES_STREAM, false, res_free, NULL);
 
 	if (unlikely(ret != 0)) {
 		if (res_free == NULL)
@@ -999,8 +1771,11 @@ static void vmw_user_stream_free(struct vmw_resource *res)
 {
 	struct vmw_user_stream *stream =
 	    container_of(res, struct vmw_user_stream, stream.res);
+	struct vmw_private *dev_priv = res->dev_priv;
 
 	kfree(stream);
+	ttm_mem_global_free(vmw_mem_glob(dev_priv),
+			    vmw_user_stream_size);
 }
 
 /**
@@ -1054,23 +1829,56 @@ int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_user_stream *stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+	struct vmw_user_stream *stream;
 	struct vmw_resource *res;
 	struct vmw_resource *tmp;
 	struct drm_vmw_stream_arg *arg = (struct drm_vmw_stream_arg *)data;
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
 	int ret;
 
-	if (unlikely(stream == NULL))
-		return -ENOMEM;
+	/*
+	 * Approximate idr memory usage with 128 bytes. It will be limited
+	 * by maximum number_of streams anyway?
+	 */
+
+	if (unlikely(vmw_user_stream_size == 0))
+		vmw_user_stream_size = ttm_round_pot(sizeof(*stream)) + 128;
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
+				   vmw_user_stream_size,
+				   false, true);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Out of graphics memory for stream"
+				  " creation.\n");
+		goto out_unlock;
+	}
+
+
+	stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+	if (unlikely(stream == NULL)) {
+		ttm_mem_global_free(vmw_mem_glob(dev_priv),
+				    vmw_user_stream_size);
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
 
 	res = &stream->stream.res;
 	stream->base.shareable = false;
 	stream->base.tfile = NULL;
 
+	/*
+	 * From here on, the destructor takes over resource freeing.
+	 */
+
 	ret = vmw_stream_init(dev_priv, &stream->stream, vmw_user_stream_free);
 	if (unlikely(ret != 0))
-		return ret;
+		goto out_unlock;
 
 	tmp = vmw_resource_reference(res);
 	ret = ttm_base_object_init(tfile, &stream->base, false, VMW_RES_STREAM,
@@ -1084,6 +1892,8 @@ int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
 	arg->stream_id = res->id;
 out_err:
 	vmw_resource_unreference(&res);
+out_unlock:
+	ttm_read_unlock(&vmaster->lock);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
new file mode 100644
index 000000000000..477b2a9eb3c2
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -0,0 +1,567 @@
+/**************************************************************************
+ *
+ * Copyright © 2011 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vmwgfx_kms.h"
+
+
+#define vmw_crtc_to_sou(x) \
+	container_of(x, struct vmw_screen_object_unit, base.crtc)
+#define vmw_encoder_to_sou(x) \
+	container_of(x, struct vmw_screen_object_unit, base.encoder)
+#define vmw_connector_to_sou(x) \
+	container_of(x, struct vmw_screen_object_unit, base.connector)
+
+struct vmw_screen_object_display {
+	struct list_head active;
+
+	unsigned num_active;
+	unsigned last_num_active;
+
+	struct vmw_framebuffer *fb;
+};
+
+/**
+ * Display unit using screen objects.
+ */
+struct vmw_screen_object_unit {
+	struct vmw_display_unit base;
+
+	unsigned long buffer_size; /**< Size of allocated buffer */
+	struct vmw_dma_buffer *buffer; /**< Backing store buffer */
+
+	bool defined;
+
+	struct list_head active;
+};
+
+static void vmw_sou_destroy(struct vmw_screen_object_unit *sou)
+{
+	list_del_init(&sou->active);
+	vmw_display_unit_cleanup(&sou->base);
+	kfree(sou);
+}
+
+
+/*
+ * Screen Object Display Unit CRTC functions
+ */
+
+static void vmw_sou_crtc_destroy(struct drm_crtc *crtc)
+{
+	vmw_sou_destroy(vmw_crtc_to_sou(crtc));
+}
+
+static int vmw_sou_del_active(struct vmw_private *vmw_priv,
+			      struct vmw_screen_object_unit *sou)
+{
+	struct vmw_screen_object_display *ld = vmw_priv->sou_priv;
+	if (list_empty(&sou->active))
+		return 0;
+
+	/* Must init otherwise list_empty(&sou->active) will not work. */
+	list_del_init(&sou->active);
+	if (--(ld->num_active) == 0) {
+		BUG_ON(!ld->fb);
+		if (ld->fb->unpin)
+			ld->fb->unpin(ld->fb);
+		ld->fb = NULL;
+	}
+
+	return 0;
+}
+
+static int vmw_sou_add_active(struct vmw_private *vmw_priv,
+			      struct vmw_screen_object_unit *sou,
+			      struct vmw_framebuffer *vfb)
+{
+	struct vmw_screen_object_display *ld = vmw_priv->sou_priv;
+	struct vmw_screen_object_unit *entry;
+	struct list_head *at;
+
+	BUG_ON(!ld->num_active && ld->fb);
+	if (vfb != ld->fb) {
+		if (ld->fb && ld->fb->unpin)
+			ld->fb->unpin(ld->fb);
+		if (vfb->pin)
+			vfb->pin(vfb);
+		ld->fb = vfb;
+	}
+
+	if (!list_empty(&sou->active))
+		return 0;
+
+	at = &ld->active;
+	list_for_each_entry(entry, &ld->active, active) {
+		if (entry->base.unit > sou->base.unit)
+			break;
+
+		at = &entry->active;
+	}
+
+	list_add(&sou->active, at);
+
+	ld->num_active++;
+
+	return 0;
+}
+
+/**
+ * Send the fifo command to create a screen.
+ */
+static int vmw_sou_fifo_create(struct vmw_private *dev_priv,
+			       struct vmw_screen_object_unit *sou,
+			       uint32_t x, uint32_t y,
+			       struct drm_display_mode *mode)
+{
+	size_t fifo_size;
+
+	struct {
+		struct {
+			uint32_t cmdType;
+		} header;
+		SVGAScreenObject obj;
+	} *cmd;
+
+	BUG_ON(!sou->buffer);
+
+	fifo_size = sizeof(*cmd);
+	cmd = vmw_fifo_reserve(dev_priv, fifo_size);
+	/* The hardware has hung, nothing we can do about it here. */
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Fifo reserve failed.\n");
+		return -ENOMEM;
+	}
+
+	memset(cmd, 0, fifo_size);
+	cmd->header.cmdType = SVGA_CMD_DEFINE_SCREEN;
+	cmd->obj.structSize = sizeof(SVGAScreenObject);
+	cmd->obj.id = sou->base.unit;
+	cmd->obj.flags = SVGA_SCREEN_HAS_ROOT |
+		(sou->base.unit == 0 ? SVGA_SCREEN_IS_PRIMARY : 0);
+	cmd->obj.size.width = mode->hdisplay;
+	cmd->obj.size.height = mode->vdisplay;
+	cmd->obj.root.x = x;
+	cmd->obj.root.y = y;
+
+	/* Ok to assume that buffer is pinned in vram */
+	vmw_bo_get_guest_ptr(&sou->buffer->base, &cmd->obj.backingStore.ptr);
+	cmd->obj.backingStore.pitch = mode->hdisplay * 4;
+
+	vmw_fifo_commit(dev_priv, fifo_size);
+
+	sou->defined = true;
+
+	return 0;
+}
+
+/**
+ * Send the fifo command to destroy a screen.
+ */
+static int vmw_sou_fifo_destroy(struct vmw_private *dev_priv,
+				struct vmw_screen_object_unit *sou)
+{
+	size_t fifo_size;
+	int ret;
+
+	struct {
+		struct {
+			uint32_t cmdType;
+		} header;
+		SVGAFifoCmdDestroyScreen body;
+	} *cmd;
+
+	/* no need to do anything */
+	if (unlikely(!sou->defined))
+		return 0;
+
+	fifo_size = sizeof(*cmd);
+	cmd = vmw_fifo_reserve(dev_priv, fifo_size);
+	/* the hardware has hung, nothing we can do about it here */
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Fifo reserve failed.\n");
+		return -ENOMEM;
+	}
+
+	memset(cmd, 0, fifo_size);
+	cmd->header.cmdType = SVGA_CMD_DESTROY_SCREEN;
+	cmd->body.screenId = sou->base.unit;
+
+	vmw_fifo_commit(dev_priv, fifo_size);
+
+	/* Force sync */
+	ret = vmw_fallback_wait(dev_priv, false, true, 0, false, 3*HZ);
+	if (unlikely(ret != 0))
+		DRM_ERROR("Failed to sync with HW");
+	else
+		sou->defined = false;
+
+	return ret;
+}
+
+/**
+ * Free the backing store.
+ */
+static void vmw_sou_backing_free(struct vmw_private *dev_priv,
+				 struct vmw_screen_object_unit *sou)
+{
+	struct ttm_buffer_object *bo;
+
+	if (unlikely(sou->buffer == NULL))
+		return;
+
+	bo = &sou->buffer->base;
+	ttm_bo_unref(&bo);
+	sou->buffer = NULL;
+	sou->buffer_size = 0;
+}
+
+/**
+ * Allocate the backing store for the buffer.
+ */
+static int vmw_sou_backing_alloc(struct vmw_private *dev_priv,
+				 struct vmw_screen_object_unit *sou,
+				 unsigned long size)
+{
+	int ret;
+
+	if (sou->buffer_size == size)
+		return 0;
+
+	if (sou->buffer)
+		vmw_sou_backing_free(dev_priv, sou);
+
+	sou->buffer = kzalloc(sizeof(*sou->buffer), GFP_KERNEL);
+	if (unlikely(sou->buffer == NULL))
+		return -ENOMEM;
+
+	/* After we have alloced the backing store might not be able to
+	 * resume the overlays, this is preferred to failing to alloc.
+	 */
+	vmw_overlay_pause_all(dev_priv);
+	ret = vmw_dmabuf_init(dev_priv, sou->buffer, size,
+			      &vmw_vram_ne_placement,
+			      false, &vmw_dmabuf_bo_free);
+	vmw_overlay_resume_all(dev_priv);
+
+	if (unlikely(ret != 0))
+		sou->buffer = NULL; /* vmw_dmabuf_init frees on error */
+	else
+		sou->buffer_size = size;
+
+	return ret;
+}
+
+static int vmw_sou_crtc_set_config(struct drm_mode_set *set)
+{
+	struct vmw_private *dev_priv;
+	struct vmw_screen_object_unit *sou;
+	struct drm_connector *connector;
+	struct drm_display_mode *mode;
+	struct drm_encoder *encoder;
+	struct vmw_framebuffer *vfb;
+	struct drm_framebuffer *fb;
+	struct drm_crtc *crtc;
+	int ret = 0;
+
+	if (!set)
+		return -EINVAL;
+
+	if (!set->crtc)
+		return -EINVAL;
+
+	/* get the sou */
+	crtc = set->crtc;
+	sou = vmw_crtc_to_sou(crtc);
+	vfb = set->fb ? vmw_framebuffer_to_vfb(set->fb) : NULL;
+	dev_priv = vmw_priv(crtc->dev);
+
+	if (set->num_connectors > 1) {
+		DRM_ERROR("to many connectors\n");
+		return -EINVAL;
+	}
+
+	if (set->num_connectors == 1 &&
+	    set->connectors[0] != &sou->base.connector) {
+		DRM_ERROR("connector doesn't match %p %p\n",
+			set->connectors[0], &sou->base.connector);
+		return -EINVAL;
+	}
+
+	/* sou only supports one fb active at the time */
+	if (dev_priv->sou_priv->fb && vfb &&
+	    !(dev_priv->sou_priv->num_active == 1 &&
+	      !list_empty(&sou->active)) &&
+	    dev_priv->sou_priv->fb != vfb) {
+		DRM_ERROR("Multiple framebuffers not supported\n");
+		return -EINVAL;
+	}
+
+	/* since they always map one to one these are safe */
+	connector = &sou->base.connector;
+	encoder = &sou->base.encoder;
+
+	/* should we turn the crtc off */
+	if (set->num_connectors == 0 || !set->mode || !set->fb) {
+		ret = vmw_sou_fifo_destroy(dev_priv, sou);
+		/* the hardware has hung don't do anything more */
+		if (unlikely(ret != 0))
+			return ret;
+
+		connector->encoder = NULL;
+		encoder->crtc = NULL;
+		crtc->fb = NULL;
+		crtc->x = 0;
+		crtc->y = 0;
+
+		vmw_sou_del_active(dev_priv, sou);
+
+		vmw_sou_backing_free(dev_priv, sou);
+
+		return 0;
+	}
+
+
+	/* we now know we want to set a mode */
+	mode = set->mode;
+	fb = set->fb;
+
+	if (set->x + mode->hdisplay > fb->width ||
+	    set->y + mode->vdisplay > fb->height) {
+		DRM_ERROR("set outside of framebuffer\n");
+		return -EINVAL;
+	}
+
+	vmw_fb_off(dev_priv);
+
+	if (mode->hdisplay != crtc->mode.hdisplay ||
+	    mode->vdisplay != crtc->mode.vdisplay) {
+		/* no need to check if depth is different, because backing
+		 * store depth is forced to 4 by the device.
+		 */
+
+		ret = vmw_sou_fifo_destroy(dev_priv, sou);
+		/* the hardware has hung don't do anything more */
+		if (unlikely(ret != 0))
+			return ret;
+
+		vmw_sou_backing_free(dev_priv, sou);
+	}
+
+	if (!sou->buffer) {
+		/* forced to depth 4 by the device */
+		size_t size = mode->hdisplay * mode->vdisplay * 4;
+		ret = vmw_sou_backing_alloc(dev_priv, sou, size);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+
+	ret = vmw_sou_fifo_create(dev_priv, sou, set->x, set->y, mode);
+	if (unlikely(ret != 0)) {
+		/*
+		 * We are in a bit of a situation here, the hardware has
+		 * hung and we may or may not have a buffer hanging of
+		 * the screen object, best thing to do is not do anything
+		 * if we where defined, if not just turn the crtc of.
+		 * Not what userspace wants but it needs to htfu.
+		 */
+		if (sou->defined)
+			return ret;
+
+		connector->encoder = NULL;
+		encoder->crtc = NULL;
+		crtc->fb = NULL;
+		crtc->x = 0;
+		crtc->y = 0;
+
+		return ret;
+	}
+
+	vmw_sou_add_active(dev_priv, sou, vfb);
+
+	connector->encoder = encoder;
+	encoder->crtc = crtc;
+	crtc->mode = *mode;
+	crtc->fb = fb;
+	crtc->x = set->x;
+	crtc->y = set->y;
+
+	return 0;
+}
+
+static struct drm_crtc_funcs vmw_screen_object_crtc_funcs = {
+	.save = vmw_du_crtc_save,
+	.restore = vmw_du_crtc_restore,
+	.cursor_set = vmw_du_crtc_cursor_set,
+	.cursor_move = vmw_du_crtc_cursor_move,
+	.gamma_set = vmw_du_crtc_gamma_set,
+	.destroy = vmw_sou_crtc_destroy,
+	.set_config = vmw_sou_crtc_set_config,
+};
+
+/*
+ * Screen Object Display Unit encoder functions
+ */
+
+static void vmw_sou_encoder_destroy(struct drm_encoder *encoder)
+{
+	vmw_sou_destroy(vmw_encoder_to_sou(encoder));
+}
+
+static struct drm_encoder_funcs vmw_screen_object_encoder_funcs = {
+	.destroy = vmw_sou_encoder_destroy,
+};
+
+/*
+ * Screen Object Display Unit connector functions
+ */
+
+static void vmw_sou_connector_destroy(struct drm_connector *connector)
+{
+	vmw_sou_destroy(vmw_connector_to_sou(connector));
+}
+
+static struct drm_connector_funcs vmw_legacy_connector_funcs = {
+	.dpms = vmw_du_connector_dpms,
+	.save = vmw_du_connector_save,
+	.restore = vmw_du_connector_restore,
+	.detect = vmw_du_connector_detect,
+	.fill_modes = vmw_du_connector_fill_modes,
+	.set_property = vmw_du_connector_set_property,
+	.destroy = vmw_sou_connector_destroy,
+};
+
+static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit)
+{
+	struct vmw_screen_object_unit *sou;
+	struct drm_device *dev = dev_priv->dev;
+	struct drm_connector *connector;
+	struct drm_encoder *encoder;
+	struct drm_crtc *crtc;
+
+	sou = kzalloc(sizeof(*sou), GFP_KERNEL);
+	if (!sou)
+		return -ENOMEM;
+
+	sou->base.unit = unit;
+	crtc = &sou->base.crtc;
+	encoder = &sou->base.encoder;
+	connector = &sou->base.connector;
+
+	INIT_LIST_HEAD(&sou->active);
+
+	sou->base.pref_active = (unit == 0);
+	sou->base.pref_width = 800;
+	sou->base.pref_height = 600;
+	sou->base.pref_mode = NULL;
+
+	drm_connector_init(dev, connector, &vmw_legacy_connector_funcs,
+			   DRM_MODE_CONNECTOR_LVDS);
+	connector->status = vmw_du_connector_detect(connector, true);
+
+	drm_encoder_init(dev, encoder, &vmw_screen_object_encoder_funcs,
+			 DRM_MODE_ENCODER_LVDS);
+	drm_mode_connector_attach_encoder(connector, encoder);
+	encoder->possible_crtcs = (1 << unit);
+	encoder->possible_clones = 0;
+
+	drm_crtc_init(dev, crtc, &vmw_screen_object_crtc_funcs);
+
+	drm_mode_crtc_set_gamma_size(crtc, 256);
+
+	drm_connector_attach_property(connector,
+				      dev->mode_config.dirty_info_property,
+				      1);
+
+	return 0;
+}
+
+int vmw_kms_init_screen_object_display(struct vmw_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	int i, ret;
+
+	if (dev_priv->sou_priv) {
+		DRM_INFO("sou system already on\n");
+		return -EINVAL;
+	}
+
+	if (!(dev_priv->fifo.capabilities & SVGA_FIFO_CAP_SCREEN_OBJECT_2)) {
+		DRM_INFO("Not using screen objects,"
+			 " missing cap SCREEN_OBJECT_2\n");
+		return -ENOSYS;
+	}
+
+	ret = -ENOMEM;
+	dev_priv->sou_priv = kmalloc(sizeof(*dev_priv->sou_priv), GFP_KERNEL);
+	if (unlikely(!dev_priv->sou_priv))
+		goto err_no_mem;
+
+	INIT_LIST_HEAD(&dev_priv->sou_priv->active);
+	dev_priv->sou_priv->num_active = 0;
+	dev_priv->sou_priv->last_num_active = 0;
+	dev_priv->sou_priv->fb = NULL;
+
+	ret = drm_vblank_init(dev, VMWGFX_NUM_DISPLAY_UNITS);
+	if (unlikely(ret != 0))
+		goto err_free;
+
+	ret = drm_mode_create_dirty_info_property(dev);
+	if (unlikely(ret != 0))
+		goto err_vblank_cleanup;
+
+	for (i = 0; i < VMWGFX_NUM_DISPLAY_UNITS; ++i)
+		vmw_sou_init(dev_priv, i);
+
+	DRM_INFO("Screen objects system initialized\n");
+
+	return 0;
+
+err_vblank_cleanup:
+	drm_vblank_cleanup(dev);
+err_free:
+	kfree(dev_priv->sou_priv);
+	dev_priv->sou_priv = NULL;
+err_no_mem:
+	return ret;
+}
+
+int vmw_kms_close_screen_object_display(struct vmw_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+
+	if (!dev_priv->sou_priv)
+		return -ENOSYS;
+
+	drm_vblank_cleanup(dev);
+
+	if (!list_empty(&dev_priv->sou_priv->active))
+		DRM_ERROR("Still have active outputs when unloading driver");
+
+	kfree(dev_priv->sou_priv);
+
+	return 0;
+}