diff options
Diffstat (limited to 'arch/sparc64/kernel/iommu.c')
-rw-r--r-- | arch/sparc64/kernel/iommu.c | 452 |
1 files changed, 242 insertions, 210 deletions
diff --git a/arch/sparc64/kernel/iommu.c b/arch/sparc64/kernel/iommu.c index 4b9115a4d92e..d3276ebcfb47 100644 --- a/arch/sparc64/kernel/iommu.c +++ b/arch/sparc64/kernel/iommu.c @@ -1,6 +1,6 @@ /* iommu.c: Generic sparc64 IOMMU support. * - * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net) + * Copyright (C) 1999, 2007, 2008 David S. Miller (davem@davemloft.net) * Copyright (C) 1999, 2000 Jakub Jelinek (jakub@redhat.com) */ @@ -10,6 +10,7 @@ #include <linux/device.h> #include <linux/dma-mapping.h> #include <linux/errno.h> +#include <linux/iommu-helper.h> #ifdef CONFIG_PCI #include <linux/pci.h> @@ -41,7 +42,7 @@ "i" (ASI_PHYS_BYPASS_EC_E)) /* Must be invoked under the IOMMU lock. */ -static void __iommu_flushall(struct iommu *iommu) +static void iommu_flushall(struct iommu *iommu) { if (iommu->iommu_flushinv) { iommu_write(iommu->iommu_flushinv, ~(u64)0); @@ -83,54 +84,91 @@ static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte) iopte_val(*iopte) = val; } -/* Based largely upon the ppc64 iommu allocator. */ -static long arena_alloc(struct iommu *iommu, unsigned long npages) +/* Based almost entirely upon the ppc64 iommu allocator. If you use the 'handle' + * facility it must all be done in one pass while under the iommu lock. + * + * On sun4u platforms, we only flush the IOMMU once every time we've passed + * over the entire page table doing allocations. Therefore we only ever advance + * the hint and cannot backtrack it. + */ +unsigned long iommu_range_alloc(struct device *dev, + struct iommu *iommu, + unsigned long npages, + unsigned long *handle) { + unsigned long n, end, start, limit, boundary_size; struct iommu_arena *arena = &iommu->arena; - unsigned long n, i, start, end, limit; - int pass; + int pass = 0; + + /* This allocator was derived from x86_64's bit string search */ + + /* Sanity check */ + if (unlikely(npages == 0)) { + if (printk_ratelimit()) + WARN_ON(1); + return DMA_ERROR_CODE; + } + + if (handle && *handle) + start = *handle; + else + start = arena->hint; limit = arena->limit; - start = arena->hint; - pass = 0; -again: - n = find_next_zero_bit(arena->map, limit, start); - end = n + npages; - if (unlikely(end >= limit)) { + /* The case below can happen if we have a small segment appended + * to a large, or when the previous alloc was at the very end of + * the available space. If so, go back to the beginning and flush. + */ + if (start >= limit) { + start = 0; + if (iommu->flush_all) + iommu->flush_all(iommu); + } + + again: + + if (dev) + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + 1 << IO_PAGE_SHIFT); + else + boundary_size = ALIGN(1UL << 32, 1 << IO_PAGE_SHIFT); + + n = iommu_area_alloc(arena->map, limit, start, npages, 0, + boundary_size >> IO_PAGE_SHIFT, 0); + if (n == -1) { if (likely(pass < 1)) { - limit = start; + /* First failure, rescan from the beginning. */ start = 0; - __iommu_flushall(iommu); + if (iommu->flush_all) + iommu->flush_all(iommu); pass++; goto again; } else { - /* Scanned the whole thing, give up. */ - return -1; + /* Second failure, give up */ + return DMA_ERROR_CODE; } } - for (i = n; i < end; i++) { - if (test_bit(i, arena->map)) { - start = i + 1; - goto again; - } - } - - for (i = n; i < end; i++) - __set_bit(i, arena->map); + end = n + npages; arena->hint = end; + /* Update handle for SG allocations */ + if (handle) + *handle = end; + return n; } -static void arena_free(struct iommu_arena *arena, unsigned long base, unsigned long npages) +void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long npages) { - unsigned long i; + struct iommu_arena *arena = &iommu->arena; + unsigned long entry; - for (i = base; i < (base + npages); i++) - __clear_bit(i, arena->map); + entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT; + + iommu_area_free(arena->map, entry, npages); } int iommu_table_init(struct iommu *iommu, int tsbsize, @@ -156,6 +194,9 @@ int iommu_table_init(struct iommu *iommu, int tsbsize, } iommu->arena.limit = num_tsb_entries; + if (tlb_type != hypervisor) + iommu->flush_all = iommu_flushall; + /* Allocate and initialize the dummy page which we * set inactive IO PTEs to point to. */ @@ -192,22 +233,18 @@ out_free_map: return -ENOMEM; } -static inline iopte_t *alloc_npages(struct iommu *iommu, unsigned long npages) +static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu, + unsigned long npages) { - long entry; + unsigned long entry; - entry = arena_alloc(iommu, npages); - if (unlikely(entry < 0)) + entry = iommu_range_alloc(dev, iommu, npages, NULL); + if (unlikely(entry == DMA_ERROR_CODE)) return NULL; return iommu->page_table + entry; } -static inline void free_npages(struct iommu *iommu, dma_addr_t base, unsigned long npages) -{ - arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages); -} - static int iommu_alloc_ctx(struct iommu *iommu) { int lowest = iommu->ctx_lowest_free; @@ -258,7 +295,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size, iommu = dev->archdata.iommu; spin_lock_irqsave(&iommu->lock, flags); - iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT); + iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT); spin_unlock_irqrestore(&iommu->lock, flags); if (unlikely(iopte == NULL)) { @@ -296,7 +333,7 @@ static void dma_4u_free_coherent(struct device *dev, size_t size, spin_lock_irqsave(&iommu->lock, flags); - free_npages(iommu, dvma - iommu->page_table_map_base, npages); + iommu_range_free(iommu, dvma, npages); spin_unlock_irqrestore(&iommu->lock, flags); @@ -327,7 +364,7 @@ static dma_addr_t dma_4u_map_single(struct device *dev, void *ptr, size_t sz, npages >>= IO_PAGE_SHIFT; spin_lock_irqsave(&iommu->lock, flags); - base = alloc_npages(iommu, npages); + base = alloc_npages(dev, iommu, npages); ctx = 0; if (iommu->iommu_ctxflush) ctx = iommu_alloc_ctx(iommu); @@ -465,224 +502,219 @@ static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr, for (i = 0; i < npages; i++) iopte_make_dummy(iommu, base + i); - free_npages(iommu, bus_addr - iommu->page_table_map_base, npages); + iommu_range_free(iommu, bus_addr, npages); iommu_free_ctx(iommu, ctx); spin_unlock_irqrestore(&iommu->lock, flags); } -#define SG_ENT_PHYS_ADDRESS(SG) (__pa(sg_virt((SG)))) - -static void fill_sg(iopte_t *iopte, struct scatterlist *sg, - int nused, int nelems, - unsigned long iopte_protection) -{ - struct scatterlist *dma_sg = sg; - int i; - - for (i = 0; i < nused; i++) { - unsigned long pteval = ~0UL; - u32 dma_npages; - - dma_npages = ((dma_sg->dma_address & (IO_PAGE_SIZE - 1UL)) + - dma_sg->dma_length + - ((IO_PAGE_SIZE - 1UL))) >> IO_PAGE_SHIFT; - do { - unsigned long offset; - signed int len; - - /* If we are here, we know we have at least one - * more page to map. So walk forward until we - * hit a page crossing, and begin creating new - * mappings from that spot. - */ - for (;;) { - unsigned long tmp; - - tmp = SG_ENT_PHYS_ADDRESS(sg); - len = sg->length; - if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) { - pteval = tmp & IO_PAGE_MASK; - offset = tmp & (IO_PAGE_SIZE - 1UL); - break; - } - if (((tmp ^ (tmp + len - 1UL)) >> IO_PAGE_SHIFT) != 0UL) { - pteval = (tmp + IO_PAGE_SIZE) & IO_PAGE_MASK; - offset = 0UL; - len -= (IO_PAGE_SIZE - (tmp & (IO_PAGE_SIZE - 1UL))); - break; - } - sg = sg_next(sg); - nelems--; - } - - pteval = iopte_protection | (pteval & IOPTE_PAGE); - while (len > 0) { - *iopte++ = __iopte(pteval); - pteval += IO_PAGE_SIZE; - len -= (IO_PAGE_SIZE - offset); - offset = 0; - dma_npages--; - } - - pteval = (pteval & IOPTE_PAGE) + len; - sg = sg_next(sg); - nelems--; - - /* Skip over any tail mappings we've fully mapped, - * adjusting pteval along the way. Stop when we - * detect a page crossing event. - */ - while (nelems && - (pteval << (64 - IO_PAGE_SHIFT)) != 0UL && - (pteval == SG_ENT_PHYS_ADDRESS(sg)) && - ((pteval ^ - (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) { - pteval += sg->length; - sg = sg_next(sg); - nelems--; - } - if ((pteval << (64 - IO_PAGE_SHIFT)) == 0UL) - pteval = ~0UL; - } while (dma_npages != 0); - dma_sg = sg_next(dma_sg); - } -} - static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction) { - struct iommu *iommu; + struct scatterlist *s, *outs, *segstart; + unsigned long flags, handle, prot, ctx; + dma_addr_t dma_next = 0, dma_addr; + unsigned int max_seg_size; + int outcount, incount, i; struct strbuf *strbuf; - unsigned long flags, ctx, npages, iopte_protection; - iopte_t *base; - u32 dma_base; - struct scatterlist *sgtmp; - int used; - - /* Fast path single entry scatterlists. */ - if (nelems == 1) { - sglist->dma_address = - dma_4u_map_single(dev, sg_virt(sglist), - sglist->length, direction); - if (unlikely(sglist->dma_address == DMA_ERROR_CODE)) - return 0; - sglist->dma_length = sglist->length; - return 1; - } + struct iommu *iommu; + + BUG_ON(direction == DMA_NONE); iommu = dev->archdata.iommu; strbuf = dev->archdata.stc; - - if (unlikely(direction == DMA_NONE)) - goto bad_no_ctx; - - /* Step 1: Prepare scatter list. */ - - npages = prepare_sg(dev, sglist, nelems); - - /* Step 2: Allocate a cluster and context, if necessary. */ + if (nelems == 0 || !iommu) + return 0; spin_lock_irqsave(&iommu->lock, flags); - base = alloc_npages(iommu, npages); ctx = 0; if (iommu->iommu_ctxflush) ctx = iommu_alloc_ctx(iommu); - spin_unlock_irqrestore(&iommu->lock, flags); + if (strbuf->strbuf_enabled) + prot = IOPTE_STREAMING(ctx); + else + prot = IOPTE_CONSISTENT(ctx); + if (direction != DMA_TO_DEVICE) + prot |= IOPTE_WRITE; + + outs = s = segstart = &sglist[0]; + outcount = 1; + incount = nelems; + handle = 0; + + /* Init first segment length for backout at failure */ + outs->dma_length = 0; + + max_seg_size = dma_get_max_seg_size(dev); + for_each_sg(sglist, s, nelems, i) { + unsigned long paddr, npages, entry, slen; + iopte_t *base; + + slen = s->length; + /* Sanity check */ + if (slen == 0) { + dma_next = 0; + continue; + } + /* Allocate iommu entries for that segment */ + paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); + npages = iommu_num_pages(paddr, slen); + entry = iommu_range_alloc(dev, iommu, npages, &handle); + + /* Handle failure */ + if (unlikely(entry == DMA_ERROR_CODE)) { + if (printk_ratelimit()) + printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx" + " npages %lx\n", iommu, paddr, npages); + goto iommu_map_failed; + } - if (base == NULL) - goto bad; + base = iommu->page_table + entry; - dma_base = iommu->page_table_map_base + - ((base - iommu->page_table) << IO_PAGE_SHIFT); + /* Convert entry to a dma_addr_t */ + dma_addr = iommu->page_table_map_base + + (entry << IO_PAGE_SHIFT); + dma_addr |= (s->offset & ~IO_PAGE_MASK); - /* Step 3: Normalize DMA addresses. */ - used = nelems; + /* Insert into HW table */ + paddr &= IO_PAGE_MASK; + while (npages--) { + iopte_val(*base) = prot | paddr; + base++; + paddr += IO_PAGE_SIZE; + } - sgtmp = sglist; - while (used && sgtmp->dma_length) { - sgtmp->dma_address += dma_base; - sgtmp = sg_next(sgtmp); - used--; + /* If we are in an open segment, try merging */ + if (segstart != s) { + /* We cannot merge if: + * - allocated dma_addr isn't contiguous to previous allocation + */ + if ((dma_addr != dma_next) || + (outs->dma_length + s->length > max_seg_size)) { + /* Can't merge: create a new segment */ + segstart = s; + outcount++; + outs = sg_next(outs); + } else { + outs->dma_length += s->length; + } + } + + if (segstart == s) { + /* This is a new segment, fill entries */ + outs->dma_address = dma_addr; + outs->dma_length = slen; + } + + /* Calculate next page pointer for contiguous check */ + dma_next = dma_addr + slen; } - used = nelems - used; - /* Step 4: Create the mappings. */ - if (strbuf->strbuf_enabled) - iopte_protection = IOPTE_STREAMING(ctx); - else - iopte_protection = IOPTE_CONSISTENT(ctx); - if (direction != DMA_TO_DEVICE) - iopte_protection |= IOPTE_WRITE; + spin_unlock_irqrestore(&iommu->lock, flags); - fill_sg(base, sglist, used, nelems, iopte_protection); + if (outcount < incount) { + outs = sg_next(outs); + outs->dma_address = DMA_ERROR_CODE; + outs->dma_length = 0; + } -#ifdef VERIFY_SG - verify_sglist(sglist, nelems, base, npages); -#endif + return outcount; - return used; +iommu_map_failed: + for_each_sg(sglist, s, nelems, i) { + if (s->dma_length != 0) { + unsigned long vaddr, npages, entry, i; + iopte_t *base; + + vaddr = s->dma_address & IO_PAGE_MASK; + npages = iommu_num_pages(s->dma_address, s->dma_length); + iommu_range_free(iommu, vaddr, npages); + + entry = (vaddr - iommu->page_table_map_base) + >> IO_PAGE_SHIFT; + base = iommu->page_table + entry; + + for (i = 0; i < npages; i++) + iopte_make_dummy(iommu, base + i); + + s->dma_address = DMA_ERROR_CODE; + s->dma_length = 0; + } + if (s == outs) + break; + } + spin_unlock_irqrestore(&iommu->lock, flags); -bad: - iommu_free_ctx(iommu, ctx); -bad_no_ctx: - if (printk_ratelimit()) - WARN_ON(1); return 0; } +/* If contexts are being used, they are the same in all of the mappings + * we make for a particular SG. + */ +static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg) +{ + unsigned long ctx = 0; + + if (iommu->iommu_ctxflush) { + iopte_t *base; + u32 bus_addr; + + bus_addr = sg->dma_address & IO_PAGE_MASK; + base = iommu->page_table + + ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + + ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; + } + return ctx; +} + static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction) { - struct iommu *iommu; + unsigned long flags, ctx; + struct scatterlist *sg; struct strbuf *strbuf; - iopte_t *base; - unsigned long flags, ctx, i, npages; - struct scatterlist *sg, *sgprv; - u32 bus_addr; + struct iommu *iommu; - if (unlikely(direction == DMA_NONE)) { - if (printk_ratelimit()) - WARN_ON(1); - } + BUG_ON(direction == DMA_NONE); iommu = dev->archdata.iommu; strbuf = dev->archdata.stc; - bus_addr = sglist->dma_address & IO_PAGE_MASK; - - sgprv = NULL; - for_each_sg(sglist, sg, nelems, i) { - if (sg->dma_length == 0) - break; - sgprv = sg; - } + ctx = fetch_sg_ctx(iommu, sglist); - npages = (IO_PAGE_ALIGN(sgprv->dma_address + sgprv->dma_length) - - bus_addr) >> IO_PAGE_SHIFT; + spin_lock_irqsave(&iommu->lock, flags); - base = iommu->page_table + - ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + sg = sglist; + while (nelems--) { + dma_addr_t dma_handle = sg->dma_address; + unsigned int len = sg->dma_length; + unsigned long npages, entry; + iopte_t *base; + int i; - spin_lock_irqsave(&iommu->lock, flags); + if (!len) + break; + npages = iommu_num_pages(dma_handle, len); + iommu_range_free(iommu, dma_handle, npages); - /* Record the context, if any. */ - ctx = 0; - if (iommu->iommu_ctxflush) - ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; + entry = ((dma_handle - iommu->page_table_map_base) + >> IO_PAGE_SHIFT); + base = iommu->page_table + entry; - /* Step 1: Kick data out of streaming buffers if necessary. */ - if (strbuf->strbuf_enabled) - strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); + dma_handle &= IO_PAGE_MASK; + if (strbuf->strbuf_enabled) + strbuf_flush(strbuf, iommu, dma_handle, ctx, + npages, direction); - /* Step 2: Clear out the TSB entries. */ - for (i = 0; i < npages; i++) - iopte_make_dummy(iommu, base + i); + for (i = 0; i < npages; i++) + iopte_make_dummy(iommu, base + i); - free_npages(iommu, bus_addr - iommu->page_table_map_base, npages); + sg = sg_next(sg); + } iommu_free_ctx(iommu, ctx); |