diff options
author | Ben Skeggs <bskeggs@redhat.com> | 2010-02-11 02:31:44 +0100 |
---|---|---|
committer | Ben Skeggs <bskeggs@redhat.com> | 2010-02-23 04:50:03 +0100 |
commit | 531e77139f26e8da32ee694b9ee5e6f4c764f1db (patch) | |
tree | 36b279e8fd6ec24be06287fe2f92cfc0505e4e97 /drivers | |
parent | drm/nv50: more efficient clearing of gpu page table entries (diff) | |
download | linux-531e77139f26e8da32ee694b9ee5e6f4c764f1db.tar.xz linux-531e77139f26e8da32ee694b9ee5e6f4c764f1db.zip |
drm/nv50: improve vram page table construction
This commit changes nouveau to construct PTEs which look very much like
the ones the binary driver creates.
I presume that filling multiple PTEs identically with length flags and
the physical address of the start of a block of VRAM is a hint to the
memory controller that it need not perform additional page table lookups
for that range of addresses.
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_mem.c | 44 |
1 files changed, 33 insertions, 11 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 6832c4c969a3..134fedbb7669 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -285,23 +285,45 @@ nv50_mem_vm_bind_linear(struct drm_device *dev, uint64_t virt, uint32_t size, uint32_t flags, uint64_t phys) { struct drm_nouveau_private *dev_priv = dev->dev_private; - unsigned pages; + struct nouveau_gpuobj *pgt; + unsigned block; + int i; - virt -= dev_priv->vm_vram_base; - pages = size >> 16; + virt = ((virt - dev_priv->vm_vram_base) >> 16) << 1; + size = (size >> 16) << 1; + phys |= ((uint64_t)flags << 32) | 1; dev_priv->engine.instmem.prepare_access(dev, true); - while (pages--) { - struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt[virt >> 29]; - unsigned pte = ((virt & 0x1fffffffULL) >> 16) << 1; - unsigned offset_h = upper_32_bits(phys) & 0xff; + while (size) { + unsigned offset_h = upper_32_bits(phys); unsigned offset_l = lower_32_bits(phys); + unsigned pte, end; + + for (i = 7; i >= 0; i--) { + block = 1 << (i + 1); + if (size >= block && !(virt & (block - 1))) + break; + } + offset_l |= (i << 7); + + phys += block << 15; + size -= block; - nv_wo32(dev, pt, pte++, offset_l | 1); - nv_wo32(dev, pt, pte++, offset_h | flags); + while (block) { + pgt = dev_priv->vm_vram_pt[virt >> 14]; + pte = virt & 0x3ffe; - phys += (1 << 16); - virt += (1 << 16); + end = pte + block; + if (end > 16384) + end = 16384; + block -= (end - pte); + virt += (end - pte); + + while (pte < end) { + nv_wo32(dev, pgt, pte++, offset_l); + nv_wo32(dev, pgt, pte++, offset_h); + } + } } dev_priv->engine.instmem.finish_access(dev); |