From 8b169b5f1f46da8ece1ce7304cda7155fffe3892 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 24 Jun 2009 16:31:50 +1000 Subject: drm: remove unused #include 's Remove unused #include ('s) in drivers/gpu/drm/ttm/ttm_bo_util.c drivers/gpu/drm/ttm/ttm_bo_vm.c drivers/gpu/drm/ttm/ttm_tt.c Signed-off-by: Huang Weiyi Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo_util.c | 1 - drivers/gpu/drm/ttm/ttm_bo_vm.c | 1 - drivers/gpu/drm/ttm/ttm_tt.c | 1 - 3 files changed, 3 deletions(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 517c84559633..bdec583901eb 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -34,7 +34,6 @@ #include #include #include -#include #include void ttm_bo_free_old_node(struct ttm_buffer_object *bo) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 27b146c54fbc..40b75032ea47 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 0331fa74cd3f..75dc8bd24592 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -28,7 +28,6 @@ * Authors: Thomas Hellstrom */ -#include #include #include #include -- cgit v1.2.3 From 916635bfcae5fec170ccd36f4b451cf7c5d23b9d Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Wed, 15 Jul 2009 16:00:37 +1000 Subject: drm/ttm: fix misplaced parentheses Signed-off-by: Roel Kluin Signed-off-by: Andrew Morton Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 40b75032ea47..fe949a12fe40 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -327,7 +327,7 @@ ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp, goto out_unref; kmap_offset = dev_offset - bo->vm_node->start; - if (unlikely(kmap_offset) >= bo->num_pages) { + if (unlikely(kmap_offset >= bo->num_pages)) { ret = -EFBIG; goto out_unref; } @@ -401,7 +401,7 @@ ssize_t ttm_bo_fbdev_io(struct ttm_buffer_object *bo, const char __user *wbuf, bool dummy; kmap_offset = (*f_pos >> PAGE_SHIFT); - if (unlikely(kmap_offset) >= bo->num_pages) + if (unlikely(kmap_offset >= bo->num_pages)) return -EFBIG; page_offset = *f_pos & ~PAGE_MASK; -- cgit v1.2.3 From ae3e8122cbf8f9301369f276f4179aa6ec1b5b9c Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 24 Jun 2009 19:57:34 +0200 Subject: ttm: Fix caching mode selection. A bug caused a new caching state to be selected on each buffer object validation regardless of the current caching state. Moreover, a caching state could be selected that wasn't supported by the memory type. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 51 +++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c1c407f7cca3..a753598a5e35 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -655,31 +655,52 @@ retry_pre_get: return 0; } +static uint32_t ttm_bo_select_caching(struct ttm_mem_type_manager *man, + uint32_t cur_placement, + uint32_t proposed_placement) +{ + uint32_t caching = proposed_placement & TTM_PL_MASK_CACHING; + uint32_t result = proposed_placement & ~TTM_PL_MASK_CACHING; + + /** + * Keep current caching if possible. + */ + + if ((cur_placement & caching) != 0) + result |= (cur_placement & caching); + else if ((man->default_caching & caching) != 0) + result |= man->default_caching; + else if ((TTM_PL_FLAG_CACHED & caching) != 0) + result |= TTM_PL_FLAG_CACHED; + else if ((TTM_PL_FLAG_WC & caching) != 0) + result |= TTM_PL_FLAG_WC; + else if ((TTM_PL_FLAG_UNCACHED & caching) != 0) + result |= TTM_PL_FLAG_UNCACHED; + + return result; +} + + static bool ttm_bo_mt_compatible(struct ttm_mem_type_manager *man, bool disallow_fixed, uint32_t mem_type, - uint32_t mask, uint32_t *res_mask) + uint32_t proposed_placement, + uint32_t *masked_placement) { uint32_t cur_flags = ttm_bo_type_flags(mem_type); if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && disallow_fixed) return false; - if ((cur_flags & mask & TTM_PL_MASK_MEM) == 0) + if ((cur_flags & proposed_placement & TTM_PL_MASK_MEM) == 0) return false; - if ((mask & man->available_caching) == 0) + if ((proposed_placement & man->available_caching) == 0) return false; - if (mask & man->default_caching) - cur_flags |= man->default_caching; - else if (mask & TTM_PL_FLAG_CACHED) - cur_flags |= TTM_PL_FLAG_CACHED; - else if (mask & TTM_PL_FLAG_WC) - cur_flags |= TTM_PL_FLAG_WC; - else - cur_flags |= TTM_PL_FLAG_UNCACHED; - *res_mask = cur_flags; + cur_flags |= (proposed_placement & man->available_caching); + + *masked_placement = cur_flags; return true; } @@ -723,6 +744,9 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, if (!type_ok) continue; + cur_flags = ttm_bo_select_caching(man, bo->mem.placement, + cur_flags); + if (mem_type == TTM_PL_SYSTEM) break; @@ -779,6 +803,9 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, proposed_placement, &cur_flags)) continue; + cur_flags = ttm_bo_select_caching(man, bo->mem.placement, + cur_flags); + ret = ttm_bo_mem_force_space(bdev, mem, mem_type, interruptible, no_wait); -- cgit v1.2.3 From ad49f501867cba87e1e45e5ebae0b12435d68bf1 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 10 Jul 2009 22:36:26 +1000 Subject: drm/ttm/radeon: add dma32 support. This add support for using dma32 memory on gpus that really need it. Currently IGPs are left without DMA32 but we might need to change that unless we can fix rs690. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_device.c | 17 +++++++++++++++-- drivers/gpu/drm/radeon/radeon_ttm.c | 3 ++- drivers/gpu/drm/ttm/ttm_bo.c | 7 ++++++- drivers/gpu/drm/ttm/ttm_tt.c | 9 +++++++-- include/drm/ttm/ttm_bo_driver.h | 5 ++++- 6 files changed, 35 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index e7662ba9abfb..3060ce14071e 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -624,6 +624,7 @@ struct radeon_device { bool gpu_lockup; bool shutdown; bool suspend; + bool need_dma32; }; int radeon_device_init(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 332911267ebe..27a5ac969953 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -450,6 +450,7 @@ int radeon_device_init(struct radeon_device *rdev, uint32_t flags) { int r, ret; + int dma_bits; DRM_INFO("radeon: Initializing kernel modesetting.\n"); rdev->shutdown = false; @@ -492,8 +493,20 @@ int radeon_device_init(struct radeon_device *rdev, return r; } - /* Report DMA addressing limitation */ - r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(32)); + /* set DMA mask + need_dma32 flags. + * PCIE - can handle 40-bits. + * IGP - can handle 40-bits (in theory) + * AGP - generally dma32 is safest + * PCI - only dma32 + */ + rdev->need_dma32 = false; + if (rdev->flags & RADEON_IS_AGP) + rdev->need_dma32 = true; + if (rdev->flags & RADEON_IS_PCI) + rdev->need_dma32 = true; + + dma_bits = rdev->need_dma32 ? 32 : 40; + r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(dma_bits)); if (r) { printk(KERN_WARNING "radeon: No suitable DMA available.\n"); } diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 1227a97f5169..4ca9aa9203d0 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -442,7 +442,8 @@ int radeon_ttm_init(struct radeon_device *rdev) /* No others user of address space so set it to 0 */ r = ttm_bo_device_init(&rdev->mman.bdev, rdev->mman.mem_global_ref.object, - &radeon_bo_driver, DRM_FILE_PAGE_OFFSET); + &radeon_bo_driver, DRM_FILE_PAGE_OFFSET, + rdev->need_dma32); if (r) { DRM_ERROR("failed initializing buffer object driver(%d).\n", r); return r; diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a753598a5e35..e55e7972c897 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -224,6 +224,9 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc) TTM_ASSERT_LOCKED(&bo->mutex); bo->ttm = NULL; + if (bdev->need_dma32) + page_flags |= TTM_PAGE_FLAG_DMA32; + switch (bo->type) { case ttm_bo_type_device: if (zero_alloc) @@ -1332,7 +1335,8 @@ EXPORT_SYMBOL(ttm_bo_device_release); int ttm_bo_device_init(struct ttm_bo_device *bdev, struct ttm_mem_global *mem_glob, - struct ttm_bo_driver *driver, uint64_t file_page_offset) + struct ttm_bo_driver *driver, uint64_t file_page_offset, + bool need_dma32) { int ret = -EINVAL; @@ -1369,6 +1373,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, INIT_LIST_HEAD(&bdev->ddestroy); INIT_LIST_HEAD(&bdev->swap_lru); bdev->dev_mapping = NULL; + bdev->need_dma32 = need_dma32; ttm_mem_init_shrink(&bdev->shrink, ttm_bo_swapout); ret = ttm_mem_register_shrink(mem_glob, &bdev->shrink); if (unlikely(ret != 0)) { diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 75dc8bd24592..81ab81f030a3 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -131,10 +131,15 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm) static struct page *ttm_tt_alloc_page(unsigned page_flags) { + gfp_t gfp_flags = GFP_HIGHUSER; + if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC) - return alloc_page(GFP_HIGHUSER | __GFP_ZERO); + gfp_flags |= __GFP_ZERO; + + if (page_flags & TTM_PAGE_FLAG_DMA32) + gfp_flags |= __GFP_DMA32; - return alloc_page(GFP_HIGHUSER); + return alloc_page(gfp_flags); } static void ttm_tt_free_user_pages(struct ttm_tt *ttm) diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 62ed733c52a2..ea83dd23a4d7 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -121,6 +121,7 @@ struct ttm_backend { #define TTM_PAGE_FLAG_SWAPPED (1 << 4) #define TTM_PAGE_FLAG_PERSISTANT_SWAP (1 << 5) #define TTM_PAGE_FLAG_ZERO_ALLOC (1 << 6) +#define TTM_PAGE_FLAG_DMA32 (1 << 7) enum ttm_caching_state { tt_uncached, @@ -429,6 +430,8 @@ struct ttm_bo_device { */ struct delayed_work wq; + + bool need_dma32; }; /** @@ -648,7 +651,7 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev); extern int ttm_bo_device_init(struct ttm_bo_device *bdev, struct ttm_mem_global *mem_glob, struct ttm_bo_driver *driver, - uint64_t file_page_offset); + uint64_t file_page_offset, bool need_dma32); /** * ttm_bo_reserve: -- cgit v1.2.3 From e024e11070a0a0dc7163ce1ec2da354a638bdbed Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 24 Jun 2009 09:48:08 +1000 Subject: drm/radeon/kms: add initial colortiling support. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds new set/get tiling interfaces where the pitch and macro/micro tiling enables can be set. Along with a flag to decide if this object should have a surface when mapped. The only thing we need to allocate with a mapped surface should be the frontbuffer. Note rotate scanout shouldn't require one, and back/depth shouldn't either, though mesa needs some fixes. It fixes the TTM interfaces along Thomas's suggestions, and I've tested the surface stealing code with two X servers and not seen any lockdep issues. I've stopped tiling the fbcon frontbuffer, as I don't see there being any advantage other than testing, I've left the testing commands in there, just flip the fb_tiled to true in radeon_fb.c Open: Can we integrate endian swapping in with this? Future features: texture tiling - need to relocate texture registers TXOFFSET* with tiling info. This also merges Michel's cleanup surfaces regs at init time patch even though it makes sense on its own, this patch really relies on it. Some PowerMac firmwares set up a tiling surface at the beginning of VRAM which messes us up otherwise. that patch is: Signed-off-by: Michel Dänzer Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/atombios_crtc.c | 11 ++- drivers/gpu/drm/radeon/r100.c | 79 ++++++++++++++++- drivers/gpu/drm/radeon/r300.c | 51 ++++++++++- drivers/gpu/drm/radeon/r300_reg.h | 4 +- drivers/gpu/drm/radeon/radeon.h | 32 ++++++- drivers/gpu/drm/radeon/radeon_asic.h | 19 ++++ drivers/gpu/drm/radeon/radeon_device.c | 2 + drivers/gpu/drm/radeon/radeon_fb.c | 12 ++- drivers/gpu/drm/radeon/radeon_gem.c | 41 +++++++++ drivers/gpu/drm/radeon/radeon_kms.c | 2 + drivers/gpu/drm/radeon/radeon_legacy_crtc.c | 15 ++-- drivers/gpu/drm/radeon/radeon_object.c | 130 ++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_ttm.c | 2 + drivers/gpu/drm/ttm/ttm_bo.c | 5 +- drivers/gpu/drm/ttm/ttm_bo_vm.c | 3 + include/drm/radeon_drm.h | 23 ++++- include/drm/ttm/ttm_bo_driver.h | 15 ++++ 17 files changed, 427 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index e64a199b5ee1..eac26cdb5dae 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -327,7 +327,7 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y, struct drm_gem_object *obj; struct drm_radeon_gem_object *obj_priv; uint64_t fb_location; - uint32_t fb_format, fb_pitch_pixels; + uint32_t fb_format, fb_pitch_pixels, tiling_flags; if (!crtc->fb) return -EINVAL; @@ -364,7 +364,14 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y, return -EINVAL; } - /* TODO tiling */ + radeon_object_get_tiling_flags(obj->driver_private, + &tiling_flags, NULL); + if (tiling_flags & RADEON_TILING_MACRO) + fb_format |= AVIVO_D1GRPH_MACRO_ADDRESS_MODE; + + if (tiling_flags & RADEON_TILING_MICRO) + fb_format |= AVIVO_D1GRPH_TILED; + if (radeon_crtc->crtc_id == 0) WREG32(AVIVO_D1VGA_CONTROL, 0); else diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 0d05909f03f6..69bd7cb59972 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -909,6 +909,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, unsigned idx; bool onereg; int r; + u32 tile_flags = 0; ib = p->ib->ptr; ib_chunk = &p->chunks[p->chunk_ib_idx]; @@ -942,7 +943,20 @@ static int r100_packet0_check(struct radeon_cs_parser *p, } tmp = ib_chunk->kdata[idx] & 0x003fffff; tmp += (((u32)reloc->lobj.gpu_offset) >> 10); - ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp; + + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + tile_flags |= RADEON_DST_TILE_MACRO; + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { + if (reg == RADEON_SRC_PITCH_OFFSET) { + DRM_ERROR("Cannot src blit from microtiled surface\n"); + r100_cs_dump_packet(p, pkt); + return -EINVAL; + } + tile_flags |= RADEON_DST_TILE_MICRO; + } + + tmp |= tile_flags; + ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp; break; case RADEON_RB3D_DEPTHOFFSET: case RADEON_RB3D_COLOROFFSET: @@ -987,6 +1001,25 @@ static int r100_packet0_check(struct radeon_cs_parser *p, } ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset); break; + case R300_RB3D_COLORPITCH0: + case RADEON_RB3D_COLORPITCH: + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + tile_flags |= RADEON_COLOR_TILE_ENABLE; + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; + + tmp = ib_chunk->kdata[idx] & ~(0x7 << 16); + tmp |= tile_flags; + ib[idx] = tmp; + break; default: /* FIXME: we don't want to allow anyothers packet */ break; @@ -1707,3 +1740,47 @@ int r100_debugfs_mc_info_init(struct radeon_device *rdev) return 0; #endif } + +int r100_set_surface_reg(struct radeon_device *rdev, int reg, + uint32_t tiling_flags, uint32_t pitch, + uint32_t offset, uint32_t obj_size) +{ + int surf_index = reg * 16; + int flags = 0; + + /* r100/r200 divide by 16 */ + if (rdev->family < CHIP_R300) + flags = pitch / 16; + else + flags = pitch / 8; + + if (rdev->family <= CHIP_RS200) { + if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) + == (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) + flags |= RADEON_SURF_TILE_COLOR_BOTH; + if (tiling_flags & RADEON_TILING_MACRO) + flags |= RADEON_SURF_TILE_COLOR_MACRO; + } else if (rdev->family <= CHIP_RV280) { + if (tiling_flags & (RADEON_TILING_MACRO)) + flags |= R200_SURF_TILE_COLOR_MACRO; + if (tiling_flags & RADEON_TILING_MICRO) + flags |= R200_SURF_TILE_COLOR_MICRO; + } else { + if (tiling_flags & RADEON_TILING_MACRO) + flags |= R300_SURF_TILE_MACRO; + if (tiling_flags & RADEON_TILING_MICRO) + flags |= R300_SURF_TILE_MICRO; + } + + DRM_DEBUG("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1); + WREG32(RADEON_SURFACE0_INFO + surf_index, flags); + WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset); + WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1); + return 0; +} + +void r100_clear_surface_reg(struct radeon_device *rdev, int reg) +{ + int surf_index = reg * 16; + WREG32(RADEON_SURFACE0_INFO + surf_index, 0); +} diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 0e0e094da503..28e5777658be 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -30,6 +30,7 @@ #include "drm.h" #include "radeon_reg.h" #include "radeon.h" +#include "radeon_drm.h" /* r300,r350,rv350,rv370,rv380 depends on : */ void r100_hdp_reset(struct radeon_device *rdev); @@ -1023,7 +1024,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, struct radeon_cs_reloc *reloc; struct r300_cs_track *track; volatile uint32_t *ib; - uint32_t tmp; + uint32_t tmp, tile_flags = 0; unsigned i; int r; @@ -1052,7 +1053,19 @@ static int r300_packet0_check(struct radeon_cs_parser *p, } tmp = ib_chunk->kdata[idx] & 0x003fffff; tmp += (((u32)reloc->lobj.gpu_offset) >> 10); - ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp; + + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + tile_flags |= RADEON_DST_TILE_MACRO; + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { + if (reg == RADEON_SRC_PITCH_OFFSET) { + DRM_ERROR("Cannot src blit from microtiled surface\n"); + r100_cs_dump_packet(p, pkt); + return -EINVAL; + } + tile_flags |= RADEON_DST_TILE_MICRO; + } + tmp |= tile_flags; + ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp; break; case R300_RB3D_COLOROFFSET0: case R300_RB3D_COLOROFFSET1: @@ -1141,6 +1154,23 @@ static int r300_packet0_check(struct radeon_cs_parser *p, /* RB3D_COLORPITCH1 */ /* RB3D_COLORPITCH2 */ /* RB3D_COLORPITCH3 */ + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + tile_flags |= R300_COLOR_TILE_ENABLE; + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + tile_flags |= R300_COLOR_MICROTILE_ENABLE; + + tmp = ib_chunk->kdata[idx] & ~(0x7 << 16); + tmp |= tile_flags; + ib[idx] = tmp; + i = (reg - 0x4E38) >> 2; track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE; switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) { @@ -1196,6 +1226,23 @@ static int r300_packet0_check(struct radeon_cs_parser *p, break; case 0x4F24: /* ZB_DEPTHPITCH */ + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + tile_flags |= R300_DEPTHMACROTILE_ENABLE; + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + tile_flags |= R300_DEPTHMICROTILE_TILED;; + + tmp = ib_chunk->kdata[idx] & ~(0x7 << 16); + tmp |= tile_flags; + ib[idx] = tmp; + track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC; break; case 0x4104: diff --git a/drivers/gpu/drm/radeon/r300_reg.h b/drivers/gpu/drm/radeon/r300_reg.h index 70f48609515e..4b7afef35a65 100644 --- a/drivers/gpu/drm/radeon/r300_reg.h +++ b/drivers/gpu/drm/radeon/r300_reg.h @@ -27,7 +27,9 @@ #ifndef _R300_REG_H_ #define _R300_REG_H_ - +#define R300_SURF_TILE_MACRO (1<<16) +#define R300_SURF_TILE_MICRO (2<<16) +#define R300_SURF_TILE_BOTH (3<<16) #define R300_MC_INIT_MISC_LAT_TIMER 0x180 diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 7f007185e7f7..af12a2fe3221 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -201,6 +201,14 @@ int radeon_fence_wait_last(struct radeon_device *rdev); struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence); void radeon_fence_unref(struct radeon_fence **fence); +/* + * Tiling registers + */ +struct radeon_surface_reg { + struct radeon_object *robj; +}; + +#define RADEON_GEM_MAX_SURFACES 8 /* * Radeon buffer. @@ -213,6 +221,7 @@ struct radeon_object_list { uint64_t gpu_offset; unsigned rdomain; unsigned wdomain; + uint32_t tiling_flags; }; int radeon_object_init(struct radeon_device *rdev); @@ -242,8 +251,15 @@ void radeon_object_list_clean(struct list_head *head); int radeon_object_fbdev_mmap(struct radeon_object *robj, struct vm_area_struct *vma); unsigned long radeon_object_size(struct radeon_object *robj); - - +void radeon_object_clear_surface_reg(struct radeon_object *robj); +int radeon_object_check_tiling(struct radeon_object *robj, bool has_moved, + bool force_drop); +void radeon_object_set_tiling_flags(struct radeon_object *robj, + uint32_t tiling_flags, uint32_t pitch); +void radeon_object_get_tiling_flags(struct radeon_object *robj, uint32_t *tiling_flags, uint32_t *pitch); +void radeon_bo_move_notify(struct ttm_buffer_object *bo, + struct ttm_mem_reg *mem); +void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo); /* * GEM objects. */ @@ -535,6 +551,11 @@ struct radeon_asic { void (*set_memory_clock)(struct radeon_device *rdev, uint32_t mem_clock); void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes); void (*set_clock_gating)(struct radeon_device *rdev, int enable); + + int (*set_surface_reg)(struct radeon_device *rdev, int reg, + uint32_t tiling_flags, uint32_t pitch, + uint32_t offset, uint32_t obj_size); + int (*clear_surface_reg)(struct radeon_device *rdev, int reg); }; union radeon_asic_config { @@ -568,6 +589,10 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); /* @@ -627,6 +652,7 @@ struct radeon_device { bool shutdown; bool suspend; bool need_dma32; + struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES]; }; int radeon_device_init(struct radeon_device *rdev, @@ -801,5 +827,7 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) #define radeon_set_memory_clock(rdev, e) (rdev)->asic->set_engine_clock((rdev), (e)) #define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->set_pcie_lanes((rdev), (l)) #define radeon_set_clock_gating(rdev, e) (rdev)->asic->set_clock_gating((rdev), (e)) +#define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->set_surface_reg((rdev), (r), (f), (p), (o), (s))) +#define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->clear_surface_reg((rdev), (r))) #endif diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index e2e567395df8..dd903d329406 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -71,6 +71,10 @@ int r100_copy_blit(struct radeon_device *rdev, uint64_t dst_offset, unsigned num_pages, struct radeon_fence *fence); +int r100_set_surface_reg(struct radeon_device *rdev, int reg, + uint32_t tiling_flags, uint32_t pitch, + uint32_t offset, uint32_t obj_size); +int r100_clear_surface_reg(struct radeon_device *rdev, int reg); static struct radeon_asic r100_asic = { .init = &r100_init, @@ -100,6 +104,8 @@ static struct radeon_asic r100_asic = { .set_memory_clock = NULL, .set_pcie_lanes = NULL, .set_clock_gating = &radeon_legacy_set_clock_gating, + .set_surface_reg = r100_set_surface_reg, + .clear_surface_reg = r100_clear_surface_reg, }; @@ -128,6 +134,7 @@ int r300_copy_dma(struct radeon_device *rdev, uint64_t dst_offset, unsigned num_pages, struct radeon_fence *fence); + static struct radeon_asic r300_asic = { .init = &r300_init, .errata = &r300_errata, @@ -156,6 +163,8 @@ static struct radeon_asic r300_asic = { .set_memory_clock = NULL, .set_pcie_lanes = &rv370_set_pcie_lanes, .set_clock_gating = &radeon_legacy_set_clock_gating, + .set_surface_reg = r100_set_surface_reg, + .clear_surface_reg = r100_clear_surface_reg, }; /* @@ -193,6 +202,8 @@ static struct radeon_asic r420_asic = { .set_memory_clock = &radeon_atom_set_memory_clock, .set_pcie_lanes = &rv370_set_pcie_lanes, .set_clock_gating = &radeon_atom_set_clock_gating, + .set_surface_reg = r100_set_surface_reg, + .clear_surface_reg = r100_clear_surface_reg, }; @@ -237,6 +248,8 @@ static struct radeon_asic rs400_asic = { .set_memory_clock = NULL, .set_pcie_lanes = NULL, .set_clock_gating = &radeon_legacy_set_clock_gating, + .set_surface_reg = r100_set_surface_reg, + .clear_surface_reg = r100_clear_surface_reg, }; @@ -322,6 +335,8 @@ static struct radeon_asic rs690_asic = { .set_memory_clock = &radeon_atom_set_memory_clock, .set_pcie_lanes = NULL, .set_clock_gating = &radeon_atom_set_clock_gating, + .set_surface_reg = r100_set_surface_reg, + .clear_surface_reg = r100_clear_surface_reg, }; @@ -367,6 +382,8 @@ static struct radeon_asic rv515_asic = { .set_memory_clock = &radeon_atom_set_memory_clock, .set_pcie_lanes = &rv370_set_pcie_lanes, .set_clock_gating = &radeon_atom_set_clock_gating, + .set_surface_reg = r100_set_surface_reg, + .clear_surface_reg = r100_clear_surface_reg, }; @@ -405,6 +422,8 @@ static struct radeon_asic r520_asic = { .set_memory_clock = &radeon_atom_set_memory_clock, .set_pcie_lanes = &rv370_set_pcie_lanes, .set_clock_gating = &radeon_atom_set_clock_gating, + .set_surface_reg = r100_set_surface_reg, + .clear_surface_reg = r100_clear_surface_reg, }; /* diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index cdef6eb01baf..f23083bbba3f 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -48,6 +48,8 @@ static void radeon_surface_init(struct radeon_device *rdev) i * (RADEON_SURFACE1_INFO - RADEON_SURFACE0_INFO), 0); } + /* enable surfaces */ + WREG32(RADEON_SURFACE_CNTL, 0); } } diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 260870a29d83..36d2f5588f20 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -471,10 +471,10 @@ static struct notifier_block paniced = { .notifier_call = radeonfb_panic, }; -static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp) +static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp, bool tiled) { int aligned = width; - int align_large = (ASIC_IS_AVIVO(rdev)); + int align_large = (ASIC_IS_AVIVO(rdev)) || tiled; int pitch_mask = 0; switch (bpp / 8) { @@ -512,12 +512,13 @@ int radeonfb_create(struct radeon_device *rdev, u64 fb_gpuaddr; void *fbptr = NULL; unsigned long tmp; + bool fb_tiled = false; /* useful for testing */ mode_cmd.width = surface_width; mode_cmd.height = surface_height; mode_cmd.bpp = 32; /* need to align pitch with crtc limits */ - mode_cmd.pitch = radeon_align_pitch(rdev, mode_cmd.width, mode_cmd.bpp) * ((mode_cmd.bpp + 1) / 8); + mode_cmd.pitch = radeon_align_pitch(rdev, mode_cmd.width, mode_cmd.bpp, fb_tiled) * ((mode_cmd.bpp + 1) / 8); mode_cmd.depth = 24; size = mode_cmd.pitch * mode_cmd.height; @@ -535,6 +536,8 @@ int radeonfb_create(struct radeon_device *rdev, } robj = gobj->driver_private; + if (fb_tiled) + radeon_object_set_tiling_flags(robj, RADEON_TILING_MACRO|RADEON_TILING_SURFACE, mode_cmd.pitch); mutex_lock(&rdev->ddev->struct_mutex); fb = radeon_framebuffer_create(rdev->ddev, &mode_cmd, gobj); if (fb == NULL) { @@ -563,6 +566,9 @@ int radeonfb_create(struct radeon_device *rdev, } rfbdev = info->par; + if (fb_tiled) + radeon_object_check_tiling(robj, 0, 0); + ret = radeon_object_kmap(robj, &fbptr); if (ret) { goto out_unref; diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index eb516034235d..12542087b298 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -285,3 +285,44 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, mutex_unlock(&dev->struct_mutex); return r; } + +int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct drm_radeon_gem_set_tiling *args = data; + struct drm_gem_object *gobj; + struct radeon_object *robj; + int r = 0; + + DRM_DEBUG("%d \n", args->handle); + gobj = drm_gem_object_lookup(dev, filp, args->handle); + if (gobj == NULL) + return -EINVAL; + robj = gobj->driver_private; + radeon_object_set_tiling_flags(robj, args->tiling_flags, args->pitch); + mutex_lock(&dev->struct_mutex); + drm_gem_object_unreference(gobj); + mutex_unlock(&dev->struct_mutex); + return r; +} + +int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct drm_radeon_gem_get_tiling *args = data; + struct drm_gem_object *gobj; + struct radeon_object *robj; + int r = 0; + + DRM_DEBUG("\n"); + gobj = drm_gem_object_lookup(dev, filp, args->handle); + if (gobj == NULL) + return -EINVAL; + robj = gobj->driver_private; + radeon_object_get_tiling_flags(robj, &args->tiling_flags, + &args->pitch); + mutex_lock(&dev->struct_mutex); + drm_gem_object_unreference(gobj); + mutex_unlock(&dev->struct_mutex); + return r; +} diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 4612a7c146d1..937a2f1cdb46 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -291,5 +291,7 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF(DRM_RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH), DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH), DRM_IOCTL_DEF(DRM_RADEON_INFO, radeon_info_ioctl, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH), }; int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms); diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c index 14c1a5107fc9..0613790e2a5b 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c @@ -235,6 +235,7 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y, uint64_t base; uint32_t crtc_offset, crtc_offset_cntl, crtc_tile_x0_y0 = 0; uint32_t crtc_pitch, pitch_pixels; + uint32_t tiling_flags; DRM_DEBUG("\n"); @@ -258,8 +259,12 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y, (crtc->fb->bits_per_pixel * 8)); crtc_pitch |= crtc_pitch << 16; - /* TODO tiling */ - if (0) { + radeon_object_get_tiling_flags(obj->driver_private, + &tiling_flags, NULL); + if (tiling_flags & RADEON_TILING_MICRO) + DRM_ERROR("trying to scanout microtiled buffer\n"); + + if (tiling_flags & RADEON_TILING_MACRO) { if (ASIC_IS_R300(rdev)) crtc_offset_cntl |= (R300_CRTC_X_Y_MODE_EN | R300_CRTC_MICRO_TILE_BUFFER_DIS | @@ -275,15 +280,13 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y, crtc_offset_cntl &= ~RADEON_CRTC_TILE_EN; } - - /* TODO more tiling */ - if (0) { + if (tiling_flags & RADEON_TILING_MACRO) { if (ASIC_IS_R300(rdev)) { crtc_tile_x0_y0 = x | (y << 16); base &= ~0x7ff; } else { int byteshift = crtc->fb->bits_per_pixel >> 4; - int tile_addr = (((y >> 3) * crtc->fb->width + x) >> (8 - byteshift)) << 11; + int tile_addr = (((y >> 3) * pitch_pixels + x) >> (8 - byteshift)) << 11; base += tile_addr + ((x << byteshift) % 256) + ((y % 8) << 8); crtc_offset_cntl |= (y % 16); } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index bac0d06c52ac..d5b1fd562d88 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -44,6 +44,9 @@ struct radeon_object { uint64_t gpu_addr; void *kptr; bool is_iomem; + uint32_t tiling_flags; + uint32_t pitch; + int surface_reg; }; int radeon_ttm_init(struct radeon_device *rdev); @@ -70,6 +73,7 @@ static void radeon_ttm_object_object_destroy(struct ttm_buffer_object *tobj) robj = container_of(tobj, struct radeon_object, tobj); list_del_init(&robj->list); + radeon_object_clear_surface_reg(robj); kfree(robj); } @@ -141,6 +145,7 @@ int radeon_object_create(struct radeon_device *rdev, } robj->rdev = rdev; robj->gobj = gobj; + robj->surface_reg = -1; INIT_LIST_HEAD(&robj->list); flags = radeon_object_flags_from_domain(domain); @@ -435,6 +440,7 @@ int radeon_object_list_validate(struct list_head *head, void *fence) radeon_object_gpu_addr(robj); } lobj->gpu_offset = robj->gpu_addr; + lobj->tiling_flags = robj->tiling_flags; if (fence) { old_fence = (struct radeon_fence *)robj->tobj.sync_obj; robj->tobj.sync_obj = radeon_fence_ref(fence); @@ -479,3 +485,127 @@ unsigned long radeon_object_size(struct radeon_object *robj) { return robj->tobj.num_pages << PAGE_SHIFT; } + +int radeon_object_get_surface_reg(struct radeon_object *robj) +{ + struct radeon_device *rdev = robj->rdev; + struct radeon_surface_reg *reg; + struct radeon_object *old_object; + int steal; + int i; + + if (!robj->tiling_flags) + return 0; + + if (robj->surface_reg >= 0) { + reg = &rdev->surface_regs[robj->surface_reg]; + i = robj->surface_reg; + goto out; + } + + steal = -1; + for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) { + + reg = &rdev->surface_regs[i]; + if (!reg->robj) + break; + + old_object = reg->robj; + if (old_object->pin_count == 0) + steal = i; + } + + /* if we are all out */ + if (i == RADEON_GEM_MAX_SURFACES) { + if (steal == -1) + return -ENOMEM; + /* find someone with a surface reg and nuke their BO */ + reg = &rdev->surface_regs[steal]; + old_object = reg->robj; + /* blow away the mapping */ + DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object); + ttm_bo_unmap_virtual(&old_object->tobj); + old_object->surface_reg = -1; + i = steal; + } + + robj->surface_reg = i; + reg->robj = robj; + +out: + radeon_set_surface_reg(rdev, i, robj->tiling_flags, robj->pitch, + robj->tobj.mem.mm_node->start << PAGE_SHIFT, + robj->tobj.num_pages << PAGE_SHIFT); + return 0; +} + +void radeon_object_clear_surface_reg(struct radeon_object *robj) +{ + struct radeon_device *rdev = robj->rdev; + struct radeon_surface_reg *reg; + + if (robj->surface_reg == -1) + return; + + reg = &rdev->surface_regs[robj->surface_reg]; + radeon_clear_surface_reg(rdev, robj->surface_reg); + + reg->robj = NULL; + robj->surface_reg = -1; +} + +void radeon_object_set_tiling_flags(struct radeon_object *robj, + uint32_t tiling_flags, uint32_t pitch) +{ + robj->tiling_flags = tiling_flags; + robj->pitch = pitch; +} + +void radeon_object_get_tiling_flags(struct radeon_object *robj, + uint32_t *tiling_flags, + uint32_t *pitch) +{ + if (tiling_flags) + *tiling_flags = robj->tiling_flags; + if (pitch) + *pitch = robj->pitch; +} + +int radeon_object_check_tiling(struct radeon_object *robj, bool has_moved, + bool force_drop) +{ + if (!(robj->tiling_flags & RADEON_TILING_SURFACE)) + return 0; + + if (force_drop) { + radeon_object_clear_surface_reg(robj); + return 0; + } + + if (robj->tobj.mem.mem_type != TTM_PL_VRAM) { + if (!has_moved) + return 0; + + if (robj->surface_reg >= 0) + radeon_object_clear_surface_reg(robj); + return 0; + } + + if ((robj->surface_reg >= 0) && !has_moved) + return 0; + + return radeon_object_get_surface_reg(robj); +} + +void radeon_bo_move_notify(struct ttm_buffer_object *bo, + struct ttm_mem_reg *mem) +{ + struct radeon_object *robj = container_of(bo, struct radeon_object, tobj); + radeon_object_check_tiling(robj, 0, 1); +} + +void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) +{ + struct radeon_object *robj = container_of(bo, struct radeon_object, tobj); + radeon_object_check_tiling(robj, 0, 0); +} diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 4ca9aa9203d0..37e1cbcce3a9 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -429,6 +429,8 @@ static struct ttm_bo_driver radeon_bo_driver = { .sync_obj_flush = &radeon_sync_obj_flush, .sync_obj_unref = &radeon_sync_obj_unref, .sync_obj_ref = &radeon_sync_obj_ref, + .move_notify = &radeon_bo_move_notify, + .fault_reserve_notify = &radeon_bo_fault_reserve_notify, }; int radeon_ttm_init(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index e55e7972c897..6538d4236989 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -43,7 +43,6 @@ #define TTM_BO_HASH_ORDER 13 static int ttm_bo_setup_vm(struct ttm_buffer_object *bo); -static void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo); static int ttm_bo_swapout(struct ttm_mem_shrink *shrink); static inline uint32_t ttm_bo_type_flags(unsigned type) @@ -307,6 +306,9 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, } + if (bdev->driver->move_notify) + bdev->driver->move_notify(bo, mem); + if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) && !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) ret = ttm_bo_move_ttm(bo, evict, no_wait, mem); @@ -1451,6 +1453,7 @@ void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo) unmap_mapping_range(bdev->dev_mapping, offset, holelen, 1); } +EXPORT_SYMBOL(ttm_bo_unmap_virtual); static void ttm_bo_vm_insert_rb(struct ttm_buffer_object *bo) { diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 40b75032ea47..41c907f6c560 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -101,6 +101,9 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_NOPAGE; } + if (bdev->driver->fault_reserve_notify) + bdev->driver->fault_reserve_notify(bo); + /* * Wait for buffer data in transit, due to a pipelined * move. diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h index 41862e9a4c20..af4b4826997e 100644 --- a/include/drm/radeon_drm.h +++ b/include/drm/radeon_drm.h @@ -506,6 +506,8 @@ typedef struct { #define DRM_RADEON_GEM_WAIT_IDLE 0x24 #define DRM_RADEON_CS 0x26 #define DRM_RADEON_INFO 0x27 +#define DRM_RADEON_GEM_SET_TILING 0x28 +#define DRM_RADEON_GEM_GET_TILING 0x29 #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) @@ -544,7 +546,8 @@ typedef struct { #define DRM_IOCTL_RADEON_GEM_WAIT_IDLE DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle) #define DRM_IOCTL_RADEON_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs) #define DRM_IOCTL_RADEON_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info) - +#define DRM_IOCTL_RADEON_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling) +#define DRM_IOCTL_RADEON_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) typedef struct drm_radeon_init { enum { @@ -796,6 +799,24 @@ struct drm_radeon_gem_create { uint32_t flags; }; +#define RADEON_TILING_MACRO 0x1 +#define RADEON_TILING_MICRO 0x2 +#define RADEON_TILING_SWAP 0x4 +#define RADEON_TILING_SURFACE 0x8 /* this object requires a surface + * when mapped - i.e. front buffer */ + +struct drm_radeon_gem_set_tiling { + uint32_t handle; + uint32_t tiling_flags; + uint32_t pitch; +}; + +struct drm_radeon_gem_get_tiling { + uint32_t handle; + uint32_t tiling_flags; + uint32_t pitch; +}; + struct drm_radeon_gem_mmap { uint32_t handle; uint32_t pad; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index ea83dd23a4d7..a68829db381a 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -354,6 +354,14 @@ struct ttm_bo_driver { int (*sync_obj_flush) (void *sync_obj, void *sync_arg); void (*sync_obj_unref) (void **sync_obj); void *(*sync_obj_ref) (void *sync_obj); + + /* hook to notify driver about a driver move so it + * can do tiling things */ + void (*move_notify)(struct ttm_buffer_object *bo, + struct ttm_mem_reg *new_mem); + /* notify the driver we are taking a fault on this BO + * and have reserved it */ + void (*fault_reserve_notify)(struct ttm_buffer_object *bo); }; #define TTM_NUM_MEM_TYPES 8 @@ -653,6 +661,13 @@ extern int ttm_bo_device_init(struct ttm_bo_device *bdev, struct ttm_bo_driver *driver, uint64_t file_page_offset, bool need_dma32); +/** + * ttm_bo_unmap_virtual + * + * @bo: tear down the virtual mappings for this BO + */ +extern void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo); + /** * ttm_bo_reserve: * -- cgit v1.2.3 From 4677f15c60421d48566c48c3149474e64977f071 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 21 Jul 2009 17:45:13 +0200 Subject: drm/ttm: Fix an oops and sync object leak. The code was potentially dereferencig a NULL sync object pointer. At the same time a sync object reference was potentially leaked. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo_util.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index bdec583901eb..3e5d0c4ad85c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -509,8 +509,8 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, if (evict) { ret = ttm_bo_wait(bo, false, false, false); spin_unlock(&bo->lock); - driver->sync_obj_unref(&bo->sync_obj); - + if (tmp_obj) + driver->sync_obj_unref(&tmp_obj); if (ret) return ret; @@ -532,6 +532,8 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); spin_unlock(&bo->lock); + if (tmp_obj) + driver->sync_obj_unref(&tmp_obj); ret = ttm_buffer_object_transfer(bo, &ghost_obj); if (ret) -- cgit v1.2.3 From 542c6f6df51327dbb180cf4d9b34827e147efe17 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 24 Jul 2009 09:57:34 +0200 Subject: drm/ttm: Fix ttm in-kernel copying of pages with non-standard caching attributes. For x86 this affected highmem pages only, since they were always kmapped cache-coherent, and this is fixed using kmap_atomic_prot(). For other architectures that may not modify the linear kernel map we resort to vmap() for now, since kmap_atomic_prot() generally uses the linear kernel map for lowmem pages. This of course comes with a performance impact and should be optimized when possible. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo_util.c | 63 ++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 3e5d0c4ad85c..ce2e6f38ea01 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -136,7 +136,8 @@ static int ttm_copy_io_page(void *dst, void *src, unsigned long page) } static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src, - unsigned long page) + unsigned long page, + pgprot_t prot) { struct page *d = ttm_tt_get_page(ttm, page); void *dst; @@ -145,17 +146,35 @@ static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src, return -ENOMEM; src = (void *)((unsigned long)src + (page << PAGE_SHIFT)); - dst = kmap(d); + +#ifdef CONFIG_X86 + dst = kmap_atomic_prot(d, KM_USER0, prot); +#else + if (prot != PAGE_KERNEL) + dst = vmap(&d, 1, 0, prot); + else + dst = kmap(d); +#endif if (!dst) return -ENOMEM; memcpy_fromio(dst, src, PAGE_SIZE); - kunmap(d); + +#ifdef CONFIG_X86 + kunmap_atomic(dst, KM_USER0); +#else + if (prot != PAGE_KERNEL) + vunmap(dst); + else + kunmap(d); +#endif + return 0; } static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst, - unsigned long page) + unsigned long page, + pgprot_t prot) { struct page *s = ttm_tt_get_page(ttm, page); void *src; @@ -164,12 +183,28 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst, return -ENOMEM; dst = (void *)((unsigned long)dst + (page << PAGE_SHIFT)); - src = kmap(s); +#ifdef CONFIG_X86 + src = kmap_atomic_prot(s, KM_USER0, prot); +#else + if (prot != PAGE_KERNEL) + src = vmap(&s, 1, 0, prot); + else + src = kmap(s); +#endif if (!src) return -ENOMEM; memcpy_toio(dst, src, PAGE_SIZE); - kunmap(s); + +#ifdef CONFIG_X86 + kunmap_atomic(src, KM_USER0); +#else + if (prot != PAGE_KERNEL) + vunmap(src); + else + kunmap(s); +#endif + return 0; } @@ -214,11 +249,17 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, for (i = 0; i < new_mem->num_pages; ++i) { page = i * dir + add; - if (old_iomap == NULL) - ret = ttm_copy_ttm_io_page(ttm, new_iomap, page); - else if (new_iomap == NULL) - ret = ttm_copy_io_ttm_page(ttm, old_iomap, page); - else + if (old_iomap == NULL) { + pgprot_t prot = ttm_io_prot(old_mem->placement, + PAGE_KERNEL); + ret = ttm_copy_ttm_io_page(ttm, new_iomap, page, + prot); + } else if (new_iomap == NULL) { + pgprot_t prot = ttm_io_prot(new_mem->placement, + PAGE_KERNEL); + ret = ttm_copy_io_ttm_page(ttm, old_iomap, page, + prot); + } else ret = ttm_copy_io_page(new_iomap, old_iomap, page); if (ret) goto out1; -- cgit v1.2.3 From f121ecfebbea1452a17d57c656def7d1537440f7 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 24 Jul 2009 10:22:36 +0200 Subject: drm/ttm: powerpc: Fix Highmem cache flushing. Temporarily maps highmem pages while flushing to get a valid virtual address to flush. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_tt.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 81ab81f030a3..9b2248a80cff 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -86,10 +86,16 @@ void ttm_tt_cache_flush(struct page *pages[], unsigned long num_pages) unsigned long i; for (i = 0; i < num_pages; ++i) { - if (pages[i]) { - unsigned long start = (unsigned long)page_address(pages[i]); - flush_dcache_range(start, start + PAGE_SIZE); - } + struct page *page = pages[i]; + void *page_virtual; + + if (unlikely(page == NULL)) + continue; + + page_virtual = kmap_atomic(page, KM_USER0); + flush_dcache_range((unsigned long) page_virtual, + (unsigned long) page_virtual + PAGE_SIZE); + kunmap_atomic(page_virtual, KM_USER0); } #else if (on_each_cpu(ttm_tt_ipi_handler, NULL, 1) != 0) -- cgit v1.2.3 From b42db2b12df7b4f7b2ace581a7726cb5bcb2d658 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 29 Jul 2009 16:56:52 +1000 Subject: drm/ttm: fix highuser vs dma32 confusion. DMA32 and highmem are sort of exclusive. Noticed by AndrewR on #radeon. Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_tt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 9b2248a80cff..b8b6c4a5f983 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -137,13 +137,15 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm) static struct page *ttm_tt_alloc_page(unsigned page_flags) { - gfp_t gfp_flags = GFP_HIGHUSER; + gfp_t gfp_flags = GFP_USER; if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC) gfp_flags |= __GFP_ZERO; if (page_flags & TTM_PAGE_FLAG_DMA32) gfp_flags |= __GFP_DMA32; + else + gfp_flags |= __GFP_HIGHMEM; return alloc_page(gfp_flags); } -- cgit v1.2.3 From 6d0897ba58139523d37e97855ee0fe2d78629da6 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 31 Jul 2009 10:47:51 +0200 Subject: drm/ttm: Fix a potential comparison of structs. On some architectures the comparison may cause a compilation failure. Original partial fix Signed-off-by: Thomas Hellstrom Signed-off-by: Pekka Paalanen Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo_util.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index ce2e6f38ea01..ad4ada07c6cf 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -150,7 +150,7 @@ static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src, #ifdef CONFIG_X86 dst = kmap_atomic_prot(d, KM_USER0, prot); #else - if (prot != PAGE_KERNEL) + if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) dst = vmap(&d, 1, 0, prot); else dst = kmap(d); @@ -163,7 +163,7 @@ static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src, #ifdef CONFIG_X86 kunmap_atomic(dst, KM_USER0); #else - if (prot != PAGE_KERNEL) + if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) vunmap(dst); else kunmap(d); @@ -186,7 +186,7 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst, #ifdef CONFIG_X86 src = kmap_atomic_prot(s, KM_USER0, prot); #else - if (prot != PAGE_KERNEL) + if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) src = vmap(&s, 1, 0, prot); else src = kmap(s); @@ -199,7 +199,7 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst, #ifdef CONFIG_X86 kunmap_atomic(src, KM_USER0); #else - if (prot != PAGE_KERNEL) + if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) vunmap(src); else kunmap(s); -- cgit v1.2.3 From fee280d3fd9bc5247bef9f4ab35a4693bfffdcfd Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 3 Aug 2009 12:39:06 +0200 Subject: drm/ttm: Fix a sync object leak. If there are multiple simultaneous waiters for the same buffer object, a temporary reference to its sync object may be leaked. Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 6538d4236989..aa82d5370c38 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1575,6 +1575,10 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, driver->sync_obj_unref(&sync_obj); driver->sync_obj_unref(&tmp_obj); spin_lock(&bo->lock); + } else { + spin_unlock(&bo->lock); + driver->sync_obj_unref(&sync_obj); + spin_lock(&bo->lock); } } return 0; -- cgit v1.2.3 From c96e7c7a3a79931446ecf9494a8415e4d164ebd8 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Mon, 3 Aug 2009 14:22:53 +0200 Subject: drm/ttm: Read buffer overflow Check whether index is within bounds before grabbing the element. Signed-off-by: Roel Kluin Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/ttm') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index aa82d5370c38..c2b0d710d10f 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1182,13 +1182,14 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev, int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type) { - struct ttm_mem_type_manager *man = &bdev->man[mem_type]; + struct ttm_mem_type_manager *man; int ret = -EINVAL; if (mem_type >= TTM_NUM_MEM_TYPES) { printk(KERN_ERR TTM_PFX "Illegal memory type %d\n", mem_type); return ret; } + man = &bdev->man[mem_type]; if (!man->has_type) { printk(KERN_ERR TTM_PFX "Trying to take down uninitialized " -- cgit v1.2.3