diff options
Diffstat (limited to 'drivers/iommu/amd_iommu_v2.c')
-rw-r--r-- | drivers/iommu/amd_iommu_v2.c | 196 |
1 files changed, 85 insertions, 111 deletions
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 5208828792e6..499b4366a98d 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -45,17 +45,22 @@ struct pri_queue { struct pasid_state { struct list_head list; /* For global state-list */ atomic_t count; /* Reference count */ + unsigned mmu_notifier_count; /* Counting nested mmu_notifier + calls */ struct task_struct *task; /* Task bound to this PASID */ struct mm_struct *mm; /* mm_struct for the faults */ struct mmu_notifier mn; /* mmu_otifier handle */ struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ struct device_state *device_state; /* Link to our device_state */ int pasid; /* PASID index */ - spinlock_t lock; /* Protect pri_queues */ + spinlock_t lock; /* Protect pri_queues and + mmu_notifer_count */ wait_queue_head_t wq; /* To wait for count == 0 */ }; struct device_state { + struct list_head list; + u16 devid; atomic_t count; struct pci_dev *pdev; struct pasid_state **states; @@ -81,13 +86,9 @@ struct fault { u16 flags; }; -static struct device_state **state_table; +static LIST_HEAD(state_list); static spinlock_t state_lock; -/* List and lock for all pasid_states */ -static LIST_HEAD(pasid_state_list); -static DEFINE_SPINLOCK(ps_lock); - static struct workqueue_struct *iommu_wq; /* @@ -99,7 +100,6 @@ static u64 *empty_page_table; static void free_pasid_states(struct device_state *dev_state); static void unbind_pasid(struct device_state *dev_state, int pasid); -static int task_exit(struct notifier_block *nb, unsigned long e, void *data); static u16 device_id(struct pci_dev *pdev) { @@ -111,13 +111,25 @@ static u16 device_id(struct pci_dev *pdev) return devid; } +static struct device_state *__get_device_state(u16 devid) +{ + struct device_state *dev_state; + + list_for_each_entry(dev_state, &state_list, list) { + if (dev_state->devid == devid) + return dev_state; + } + + return NULL; +} + static struct device_state *get_device_state(u16 devid) { struct device_state *dev_state; unsigned long flags; spin_lock_irqsave(&state_lock, flags); - dev_state = state_table[devid]; + dev_state = __get_device_state(devid); if (dev_state != NULL) atomic_inc(&dev_state->count); spin_unlock_irqrestore(&state_lock, flags); @@ -158,29 +170,6 @@ static void put_device_state_wait(struct device_state *dev_state) free_device_state(dev_state); } -static struct notifier_block profile_nb = { - .notifier_call = task_exit, -}; - -static void link_pasid_state(struct pasid_state *pasid_state) -{ - spin_lock(&ps_lock); - list_add_tail(&pasid_state->list, &pasid_state_list); - spin_unlock(&ps_lock); -} - -static void __unlink_pasid_state(struct pasid_state *pasid_state) -{ - list_del(&pasid_state->list); -} - -static void unlink_pasid_state(struct pasid_state *pasid_state) -{ - spin_lock(&ps_lock); - __unlink_pasid_state(pasid_state); - spin_unlock(&ps_lock); -} - /* Must be called under dev_state->lock */ static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state, int pasid, bool alloc) @@ -337,7 +326,6 @@ static void unbind_pasid(struct device_state *dev_state, int pasid) if (pasid_state == NULL) return; - unlink_pasid_state(pasid_state); __unbind_pasid(pasid_state); put_pasid_state_wait(pasid_state); /* Reference taken in this function */ } @@ -379,7 +367,12 @@ static void free_pasid_states(struct device_state *dev_state) continue; put_pasid_state(pasid_state); - unbind_pasid(dev_state, i); + + /* + * This will call the mn_release function and + * unbind the PASID + */ + mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); } if (dev_state->pasid_levels == 2) @@ -439,12 +432,19 @@ static void mn_invalidate_range_start(struct mmu_notifier *mn, { struct pasid_state *pasid_state; struct device_state *dev_state; + unsigned long flags; pasid_state = mn_to_state(mn); dev_state = pasid_state->device_state; - amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid, - __pa(empty_page_table)); + spin_lock_irqsave(&pasid_state->lock, flags); + if (pasid_state->mmu_notifier_count == 0) { + amd_iommu_domain_set_gcr3(dev_state->domain, + pasid_state->pasid, + __pa(empty_page_table)); + } + pasid_state->mmu_notifier_count += 1; + spin_unlock_irqrestore(&pasid_state->lock, flags); } static void mn_invalidate_range_end(struct mmu_notifier *mn, @@ -453,15 +453,39 @@ static void mn_invalidate_range_end(struct mmu_notifier *mn, { struct pasid_state *pasid_state; struct device_state *dev_state; + unsigned long flags; pasid_state = mn_to_state(mn); dev_state = pasid_state->device_state; - amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid, - __pa(pasid_state->mm->pgd)); + spin_lock_irqsave(&pasid_state->lock, flags); + pasid_state->mmu_notifier_count -= 1; + if (pasid_state->mmu_notifier_count == 0) { + amd_iommu_domain_set_gcr3(dev_state->domain, + pasid_state->pasid, + __pa(pasid_state->mm->pgd)); + } + spin_unlock_irqrestore(&pasid_state->lock, flags); +} + +static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct pasid_state *pasid_state; + struct device_state *dev_state; + + might_sleep(); + + pasid_state = mn_to_state(mn); + dev_state = pasid_state->device_state; + + if (pasid_state->device_state->inv_ctx_cb) + dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid); + + unbind_pasid(dev_state, pasid_state->pasid); } static struct mmu_notifier_ops iommu_mn = { + .release = mn_release, .clear_flush_young = mn_clear_flush_young, .change_pte = mn_change_pte, .invalidate_page = mn_invalidate_page, @@ -504,8 +528,10 @@ static void do_fault(struct work_struct *work) write = !!(fault->flags & PPR_FAULT_WRITE); + down_read(&fault->state->mm->mmap_sem); npages = get_user_pages(fault->state->task, fault->state->mm, fault->address, 1, write, 0, &page, NULL); + up_read(&fault->state->mm->mmap_sem); if (npages == 1) { put_page(page); @@ -604,53 +630,6 @@ static struct notifier_block ppr_nb = { .notifier_call = ppr_notifier, }; -static int task_exit(struct notifier_block *nb, unsigned long e, void *data) -{ - struct pasid_state *pasid_state; - struct task_struct *task; - - task = data; - - /* - * Using this notifier is a hack - but there is no other choice - * at the moment. What I really want is a sleeping notifier that - * is called when an MM goes down. But such a notifier doesn't - * exist yet. The notifier needs to sleep because it has to make - * sure that the device does not use the PASID and the address - * space anymore before it is destroyed. This includes waiting - * for pending PRI requests to pass the workqueue. The - * MMU-Notifiers would be a good fit, but they use RCU and so - * they are not allowed to sleep. Lets see how we can solve this - * in a more intelligent way in the future. - */ -again: - spin_lock(&ps_lock); - list_for_each_entry(pasid_state, &pasid_state_list, list) { - struct device_state *dev_state; - int pasid; - - if (pasid_state->task != task) - continue; - - /* Drop Lock and unbind */ - spin_unlock(&ps_lock); - - dev_state = pasid_state->device_state; - pasid = pasid_state->pasid; - - if (pasid_state->device_state->inv_ctx_cb) - dev_state->inv_ctx_cb(dev_state->pdev, pasid); - - unbind_pasid(dev_state, pasid); - - /* Task may be in the list multiple times */ - goto again; - } - spin_unlock(&ps_lock); - - return NOTIFY_OK; -} - int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, struct task_struct *task) { @@ -703,8 +682,6 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, if (ret) goto out_clear_state; - link_pasid_state(pasid_state); - return 0; out_clear_state: @@ -725,6 +702,7 @@ EXPORT_SYMBOL(amd_iommu_bind_pasid); void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid) { + struct pasid_state *pasid_state; struct device_state *dev_state; u16 devid; @@ -741,7 +719,17 @@ void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid) if (pasid < 0 || pasid >= dev_state->max_pasids) goto out; - unbind_pasid(dev_state, pasid); + pasid_state = get_pasid_state(dev_state, pasid); + if (pasid_state == NULL) + goto out; + /* + * Drop reference taken here. We are safe because we still hold + * the reference taken in the amd_iommu_bind_pasid function. + */ + put_pasid_state(pasid_state); + + /* This will call the mn_release function and unbind the PASID */ + mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); out: put_device_state(dev_state); @@ -771,7 +759,8 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) spin_lock_init(&dev_state->lock); init_waitqueue_head(&dev_state->wq); - dev_state->pdev = pdev; + dev_state->pdev = pdev; + dev_state->devid = devid; tmp = pasids; for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9) @@ -801,13 +790,13 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) spin_lock_irqsave(&state_lock, flags); - if (state_table[devid] != NULL) { + if (__get_device_state(devid) != NULL) { spin_unlock_irqrestore(&state_lock, flags); ret = -EBUSY; goto out_free_domain; } - state_table[devid] = dev_state; + list_add_tail(&dev_state->list, &state_list); spin_unlock_irqrestore(&state_lock, flags); @@ -839,13 +828,13 @@ void amd_iommu_free_device(struct pci_dev *pdev) spin_lock_irqsave(&state_lock, flags); - dev_state = state_table[devid]; + dev_state = __get_device_state(devid); if (dev_state == NULL) { spin_unlock_irqrestore(&state_lock, flags); return; } - state_table[devid] = NULL; + list_del(&dev_state->list); spin_unlock_irqrestore(&state_lock, flags); @@ -872,7 +861,7 @@ int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, spin_lock_irqsave(&state_lock, flags); ret = -EINVAL; - dev_state = state_table[devid]; + dev_state = __get_device_state(devid); if (dev_state == NULL) goto out_unlock; @@ -903,7 +892,7 @@ int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, spin_lock_irqsave(&state_lock, flags); ret = -EINVAL; - dev_state = state_table[devid]; + dev_state = __get_device_state(devid); if (dev_state == NULL) goto out_unlock; @@ -920,7 +909,6 @@ EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb); static int __init amd_iommu_v2_init(void) { - size_t state_table_size; int ret; pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>\n"); @@ -936,16 +924,10 @@ static int __init amd_iommu_v2_init(void) spin_lock_init(&state_lock); - state_table_size = MAX_DEVICES * sizeof(struct device_state *); - state_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(state_table_size)); - if (state_table == NULL) - return -ENOMEM; - ret = -ENOMEM; iommu_wq = create_workqueue("amd_iommu_v2"); if (iommu_wq == NULL) - goto out_free; + goto out; ret = -ENOMEM; empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL); @@ -953,29 +935,24 @@ static int __init amd_iommu_v2_init(void) goto out_destroy_wq; amd_iommu_register_ppr_notifier(&ppr_nb); - profile_event_register(PROFILE_TASK_EXIT, &profile_nb); return 0; out_destroy_wq: destroy_workqueue(iommu_wq); -out_free: - free_pages((unsigned long)state_table, get_order(state_table_size)); - +out: return ret; } static void __exit amd_iommu_v2_exit(void) { struct device_state *dev_state; - size_t state_table_size; int i; if (!amd_iommu_v2_supported()) return; - profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb); amd_iommu_unregister_ppr_notifier(&ppr_nb); flush_workqueue(iommu_wq); @@ -998,9 +975,6 @@ static void __exit amd_iommu_v2_exit(void) destroy_workqueue(iommu_wq); - state_table_size = MAX_DEVICES * sizeof(struct device_state *); - free_pages((unsigned long)state_table, get_order(state_table_size)); - free_page((unsigned long)empty_page_table); } |