diff options
author | Robin Murphy <robin.murphy@arm.com> | 2017-09-21 17:52:44 +0200 |
---|---|---|
committer | Joerg Roedel <jroedel@suse.de> | 2017-09-27 17:09:57 +0200 |
commit | e60aa7b53845a261dd419652f12ab9f89e668843 (patch) | |
tree | 4fd8f76916b5ad7b393f6e9c1382cd36443ab4e5 /drivers/iommu/iova.c | |
parent | iommu/iova: Optimise the padding calculation (diff) | |
download | linux-e60aa7b53845a261dd419652f12ab9f89e668843.tar.xz linux-e60aa7b53845a261dd419652f12ab9f89e668843.zip |
iommu/iova: Extend rbtree node caching
The cached node mechanism provides a significant performance benefit for
allocations using a 32-bit DMA mask, but in the case of non-PCI devices
or where the 32-bit space is full, the loss of this benefit can be
significant - on large systems there can be many thousands of entries in
the tree, such that walking all the way down to find free space every
time becomes increasingly awful.
Maintain a similar cached node for the whole IOVA space as a superset of
the 32-bit space so that performance can remain much more consistent.
Inspired by work by Zhen Lei <thunder.leizhen@huawei.com>.
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Zhen Lei <thunder.leizhen@huawei.com>
Tested-by: Nate Watterson <nwatters@codeaurora.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Diffstat (limited to 'drivers/iommu/iova.c')
-rw-r--r-- | drivers/iommu/iova.c | 60 |
1 files changed, 28 insertions, 32 deletions
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 20be9a8b3188..c6f5a22f8d20 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -48,6 +48,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, spin_lock_init(&iovad->iova_rbtree_lock); iovad->rbroot = RB_ROOT; + iovad->cached_node = NULL; iovad->cached32_node = NULL; iovad->granule = granule; iovad->start_pfn = start_pfn; @@ -110,48 +111,44 @@ EXPORT_SYMBOL_GPL(init_iova_flush_queue); static struct rb_node * __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) { - if ((*limit_pfn > iovad->dma_32bit_pfn) || - (iovad->cached32_node == NULL)) + struct rb_node *cached_node = NULL; + struct iova *curr_iova; + + if (*limit_pfn <= iovad->dma_32bit_pfn) + cached_node = iovad->cached32_node; + if (!cached_node) + cached_node = iovad->cached_node; + if (!cached_node) return rb_last(&iovad->rbroot); - else { - struct rb_node *prev_node = rb_prev(iovad->cached32_node); - struct iova *curr_iova = - rb_entry(iovad->cached32_node, struct iova, node); - *limit_pfn = curr_iova->pfn_lo; - return prev_node; - } + + curr_iova = rb_entry(cached_node, struct iova, node); + *limit_pfn = min(*limit_pfn, curr_iova->pfn_lo); + + return rb_prev(cached_node); } static void -__cached_rbnode_insert_update(struct iova_domain *iovad, - unsigned long limit_pfn, struct iova *new) +__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new) { - if (limit_pfn != iovad->dma_32bit_pfn) - return; - iovad->cached32_node = &new->node; + if (new->pfn_hi < iovad->dma_32bit_pfn) + iovad->cached32_node = &new->node; + else + iovad->cached_node = &new->node; } static void __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) { struct iova *cached_iova; - struct rb_node *curr; - if (!iovad->cached32_node) - return; - curr = iovad->cached32_node; - cached_iova = rb_entry(curr, struct iova, node); + cached_iova = rb_entry(iovad->cached32_node, struct iova, node); + if (free->pfn_hi < iovad->dma_32bit_pfn && + iovad->cached32_node && free->pfn_lo >= cached_iova->pfn_lo) + iovad->cached32_node = rb_next(&free->node); - if (free->pfn_lo >= cached_iova->pfn_lo) { - struct rb_node *node = rb_next(&free->node); - struct iova *iova = rb_entry(node, struct iova, node); - - /* only cache if it's below 32bit pfn */ - if (node && iova->pfn_lo < iovad->dma_32bit_pfn) - iovad->cached32_node = node; - else - iovad->cached32_node = NULL; - } + cached_iova = rb_entry(iovad->cached_node, struct iova, node); + if (iovad->cached_node && free->pfn_lo >= cached_iova->pfn_lo) + iovad->cached_node = rb_next(&free->node); } /* Insert the iova into domain rbtree by holding writer lock */ @@ -188,7 +185,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, { struct rb_node *prev, *curr = NULL; unsigned long flags; - unsigned long saved_pfn, new_pfn; + unsigned long new_pfn; unsigned long align_mask = ~0UL; if (size_aligned) @@ -196,7 +193,6 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, /* Walk the tree backwards */ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - saved_pfn = limit_pfn; curr = __get_cached_rbnode(iovad, &limit_pfn); prev = curr; while (curr) { @@ -226,7 +222,7 @@ move_left: /* If we have 'prev', it's a valid place to start the insertion. */ iova_insert_rbtree(&iovad->rbroot, new, prev); - __cached_rbnode_insert_update(iovad, saved_pfn, new); + __cached_rbnode_insert_update(iovad, new); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); |