diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-12 15:01:59 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-12 15:01:59 +0200 |
commit | 260d16580db018e3faeb1992c70c13bf00e726b8 (patch) | |
tree | de12ab8c1c47c7070b7a14e6f31a67bc90a74455 | |
parent | Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/l... (diff) | |
parent | s390/dasd: blk-mq conversion (diff) | |
download | linux-260d16580db018e3faeb1992c70c13bf00e726b8.tar.xz linux-260d16580db018e3faeb1992c70c13bf00e726b8.zip |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull more s390 updates from Martin Schwidefsky:
"The second patch set for the 4.14 merge window:
- Convert the dasd device driver to the blk-mq interface.
- Provide three zcrypt interfaces for vfio_ap. These will be required
for KVM guest access to the crypto cards attached via the AP bus.
- A couple of memory management bug fixes."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
s390/dasd: blk-mq conversion
s390/mm: use a single lock for the fields in mm_context_t
s390/mm: fix race on mm->context.flush_mm
s390/mm: fix local TLB flushing vs. detach of an mm address space
s390/zcrypt: externalize AP queue interrupt control
s390/zcrypt: externalize AP config info query
s390/zcrypt: externalize test AP queue
s390/mm: use VM_BUG_ON in crst_table_[upgrade|downgrade]
-rw-r--r-- | arch/s390/include/asm/ap.h | 126 | ||||
-rw-r--r-- | arch/s390/include/asm/mmu.h | 7 | ||||
-rw-r--r-- | arch/s390/include/asm/mmu_context.h | 10 | ||||
-rw-r--r-- | arch/s390/include/asm/tlbflush.h | 30 | ||||
-rw-r--r-- | arch/s390/mm/gmap.c | 8 | ||||
-rw-r--r-- | arch/s390/mm/pgalloc.c | 20 | ||||
-rw-r--r-- | drivers/s390/block/dasd.c | 331 | ||||
-rw-r--r-- | drivers/s390/block/dasd_devmap.c | 8 | ||||
-rw-r--r-- | drivers/s390/block/dasd_int.h | 19 | ||||
-rw-r--r-- | drivers/s390/crypto/ap_asm.h | 9 | ||||
-rw-r--r-- | drivers/s390/crypto/ap_bus.c | 49 | ||||
-rw-r--r-- | drivers/s390/crypto/ap_bus.h | 47 | ||||
-rw-r--r-- | drivers/s390/crypto/ap_queue.c | 26 |
13 files changed, 416 insertions, 274 deletions
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h new file mode 100644 index 000000000000..c02f4aba88a6 --- /dev/null +++ b/arch/s390/include/asm/ap.h @@ -0,0 +1,126 @@ +/* + * Adjunct processor (AP) interfaces + * + * Copyright IBM Corp. 2017 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Tony Krowiak <akrowia@linux.vnet.ibm.com> + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * Harald Freudenberger <freude@de.ibm.com> + */ + +#ifndef _ASM_S390_AP_H_ +#define _ASM_S390_AP_H_ + +/** + * The ap_qid_t identifier of an ap queue. + * If the AP facilities test (APFT) facility is available, + * card and queue index are 8 bit values, otherwise + * card index is 6 bit and queue index a 4 bit value. + */ +typedef unsigned int ap_qid_t; + +#define AP_MKQID(_card, _queue) (((_card) & 63) << 8 | ((_queue) & 255)) +#define AP_QID_CARD(_qid) (((_qid) >> 8) & 63) +#define AP_QID_QUEUE(_qid) ((_qid) & 255) + +/** + * struct ap_queue_status - Holds the AP queue status. + * @queue_empty: Shows if queue is empty + * @replies_waiting: Waiting replies + * @queue_full: Is 1 if the queue is full + * @irq_enabled: Shows if interrupts are enabled for the AP + * @response_code: Holds the 8 bit response code + * + * The ap queue status word is returned by all three AP functions + * (PQAP, NQAP and DQAP). There's a set of flags in the first + * byte, followed by a 1 byte response code. + */ +struct ap_queue_status { + unsigned int queue_empty : 1; + unsigned int replies_waiting : 1; + unsigned int queue_full : 1; + unsigned int _pad1 : 4; + unsigned int irq_enabled : 1; + unsigned int response_code : 8; + unsigned int _pad2 : 16; +}; + +/** + * ap_test_queue(): Test adjunct processor queue. + * @qid: The AP queue number + * @tbit: Test facilities bit + * @info: Pointer to queue descriptor + * + * Returns AP queue status structure. + */ +struct ap_queue_status ap_test_queue(ap_qid_t qid, + int tbit, + unsigned long *info); + +struct ap_config_info { + unsigned int apsc : 1; /* S bit */ + unsigned int apxa : 1; /* N bit */ + unsigned int qact : 1; /* C bit */ + unsigned int rc8a : 1; /* R bit */ + unsigned char _reserved1 : 4; + unsigned char _reserved2[3]; + unsigned char Na; /* max # of APs - 1 */ + unsigned char Nd; /* max # of Domains - 1 */ + unsigned char _reserved3[10]; + unsigned int apm[8]; /* AP ID mask */ + unsigned int aqm[8]; /* AP queue mask */ + unsigned int adm[8]; /* AP domain mask */ + unsigned char _reserved4[16]; +} __aligned(8); + +/* + * ap_query_configuration(): Fetch cryptographic config info + * + * Returns the ap configuration info fetched via PQAP(QCI). + * On success 0 is returned, on failure a negative errno + * is returned, e.g. if the PQAP(QCI) instruction is not + * available, the return value will be -EOPNOTSUPP. + */ +int ap_query_configuration(struct ap_config_info *info); + +/* + * struct ap_qirq_ctrl - convenient struct for easy invocation + * of the ap_queue_irq_ctrl() function. This struct is passed + * as GR1 parameter to the PQAP(AQIC) instruction. For details + * please see the AR documentation. + */ +struct ap_qirq_ctrl { + unsigned int _res1 : 8; + unsigned int zone : 8; /* zone info */ + unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */ + unsigned int _res2 : 4; + unsigned int gisc : 3; /* guest isc field */ + unsigned int _res3 : 6; + unsigned int gf : 2; /* gisa format */ + unsigned int _res4 : 1; + unsigned int gisa : 27; /* gisa origin */ + unsigned int _res5 : 1; + unsigned int isc : 3; /* irq sub class */ +}; + +/** + * ap_queue_irq_ctrl(): Control interruption on a AP queue. + * @qid: The AP queue number + * @qirqctrl: struct ap_qirq_ctrl, see above + * @ind: The notification indicator byte + * + * Returns AP queue status. + * + * Control interruption on the given AP queue. + * Just a simple wrapper function for the low level PQAP(AQIC) + * instruction available for other kernel modules. + */ +struct ap_queue_status ap_queue_irq_ctrl(ap_qid_t qid, + struct ap_qirq_ctrl qirqctrl, + void *ind); + +#endif /* _ASM_S390_AP_H_ */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index bd6f30304518..3f46a6577b8d 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -5,12 +5,11 @@ #include <linux/errno.h> typedef struct { + spinlock_t lock; cpumask_t cpu_attach_mask; atomic_t flush_count; unsigned int flush_mm; - spinlock_t pgtable_lock; struct list_head pgtable_list; - spinlock_t gmap_lock; struct list_head gmap_list; unsigned long gmap_asce; unsigned long asce; @@ -27,10 +26,8 @@ typedef struct { } mm_context_t; #define INIT_MM_CONTEXT(name) \ - .context.pgtable_lock = \ - __SPIN_LOCK_UNLOCKED(name.context.pgtable_lock), \ + .context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock), \ .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \ - .context.gmap_lock = __SPIN_LOCK_UNLOCKED(name.context.gmap_lock), \ .context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list), static inline int tprot(unsigned long addr) diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 72e9ca83a668..3c9abedc323c 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -17,9 +17,8 @@ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { - spin_lock_init(&mm->context.pgtable_lock); + spin_lock_init(&mm->context.lock); INIT_LIST_HEAD(&mm->context.pgtable_list); - spin_lock_init(&mm->context.gmap_lock); INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); atomic_set(&mm->context.flush_count, 0); @@ -103,7 +102,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, if (prev == next) return; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); - cpumask_set_cpu(cpu, mm_cpumask(next)); /* Clear old ASCE by loading the kernel ASCE. */ __ctl_load(S390_lowcore.kernel_asce, 1, 1); __ctl_load(S390_lowcore.kernel_asce, 7, 7); @@ -121,9 +119,8 @@ static inline void finish_arch_post_lock_switch(void) preempt_disable(); while (atomic_read(&mm->context.flush_count)) cpu_relax(); - - if (mm->context.flush_mm) - __tlb_flush_mm(mm); + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + __tlb_flush_mm_lazy(mm); preempt_enable(); } set_fs(current->thread.mm_segment); @@ -136,6 +133,7 @@ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { switch_mm(prev, next, current); + cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); set_user_asce(next); } diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 4d759f8f4bc7..b08d5bc2666e 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -48,23 +48,6 @@ static inline void __tlb_flush_global(void) * Flush TLB entries for a specific mm on all CPUs (in case gmap is used * this implicates multiple ASCEs!). */ -static inline void __tlb_flush_full(struct mm_struct *mm) -{ - preempt_disable(); - atomic_inc(&mm->context.flush_count); - if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { - /* Local TLB flush */ - __tlb_flush_local(); - } else { - /* Global TLB flush */ - __tlb_flush_global(); - /* Reset TLB flush mask */ - cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); - } - atomic_dec(&mm->context.flush_count); - preempt_enable(); -} - static inline void __tlb_flush_mm(struct mm_struct *mm) { unsigned long gmap_asce; @@ -76,16 +59,18 @@ static inline void __tlb_flush_mm(struct mm_struct *mm) */ preempt_disable(); atomic_inc(&mm->context.flush_count); + /* Reset TLB flush mask */ + cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); + barrier(); gmap_asce = READ_ONCE(mm->context.gmap_asce); if (MACHINE_HAS_IDTE && gmap_asce != -1UL) { if (gmap_asce) __tlb_flush_idte(gmap_asce); __tlb_flush_idte(mm->context.asce); } else { - __tlb_flush_full(mm); + /* Global TLB flush */ + __tlb_flush_global(); } - /* Reset TLB flush mask */ - cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); atomic_dec(&mm->context.flush_count); preempt_enable(); } @@ -99,7 +84,6 @@ static inline void __tlb_flush_kernel(void) } #else #define __tlb_flush_global() __tlb_flush_local() -#define __tlb_flush_full(mm) __tlb_flush_local() /* * Flush TLB entries for a specific ASCE on all CPUs. @@ -117,10 +101,12 @@ static inline void __tlb_flush_kernel(void) static inline void __tlb_flush_mm_lazy(struct mm_struct * mm) { + spin_lock(&mm->context.lock); if (mm->context.flush_mm) { - __tlb_flush_mm(mm); mm->context.flush_mm = 0; + __tlb_flush_mm(mm); } + spin_unlock(&mm->context.lock); } /* diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 9e1494e3d849..2f66290c9b92 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -100,14 +100,14 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit) if (!gmap) return NULL; gmap->mm = mm; - spin_lock(&mm->context.gmap_lock); + spin_lock(&mm->context.lock); list_add_rcu(&gmap->list, &mm->context.gmap_list); if (list_is_singular(&mm->context.gmap_list)) gmap_asce = gmap->asce; else gmap_asce = -1UL; WRITE_ONCE(mm->context.gmap_asce, gmap_asce); - spin_unlock(&mm->context.gmap_lock); + spin_unlock(&mm->context.lock); return gmap; } EXPORT_SYMBOL_GPL(gmap_create); @@ -248,7 +248,7 @@ void gmap_remove(struct gmap *gmap) spin_unlock(&gmap->shadow_lock); } /* Remove gmap from the pre-mm list */ - spin_lock(&gmap->mm->context.gmap_lock); + spin_lock(&gmap->mm->context.lock); list_del_rcu(&gmap->list); if (list_empty(&gmap->mm->context.gmap_list)) gmap_asce = 0; @@ -258,7 +258,7 @@ void gmap_remove(struct gmap *gmap) else gmap_asce = -1UL; WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce); - spin_unlock(&gmap->mm->context.gmap_lock); + spin_unlock(&gmap->mm->context.lock); synchronize_rcu(); /* Put reference */ gmap_put(gmap); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index c5b74dd61197..05f1f27e6708 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -83,7 +83,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) int rc, notify; /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ - BUG_ON(mm->context.asce_limit < _REGION2_SIZE); + VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE); if (end >= TASK_SIZE_MAX) return -ENOMEM; rc = 0; @@ -124,7 +124,7 @@ void crst_table_downgrade(struct mm_struct *mm) pgd_t *pgd; /* downgrade should only happen from 3 to 2 levels (compat only) */ - BUG_ON(mm->context.asce_limit != _REGION2_SIZE); + VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE); if (current->active_mm == mm) { clear_user_asce(); @@ -188,7 +188,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) /* Try to get a fragment of a 4K page as a 2K page table */ if (!mm_alloc_pgste(mm)) { table = NULL; - spin_lock_bh(&mm->context.pgtable_lock); + spin_lock_bh(&mm->context.lock); if (!list_empty(&mm->context.pgtable_list)) { page = list_first_entry(&mm->context.pgtable_list, struct page, lru); @@ -203,7 +203,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) list_del(&page->lru); } } - spin_unlock_bh(&mm->context.pgtable_lock); + spin_unlock_bh(&mm->context.lock); if (table) return table; } @@ -227,9 +227,9 @@ unsigned long *page_table_alloc(struct mm_struct *mm) /* Return the first 2K fragment of the page */ atomic_set(&page->_mapcount, 1); clear_table(table, _PAGE_INVALID, PAGE_SIZE); - spin_lock_bh(&mm->context.pgtable_lock); + spin_lock_bh(&mm->context.lock); list_add(&page->lru, &mm->context.pgtable_list); - spin_unlock_bh(&mm->context.pgtable_lock); + spin_unlock_bh(&mm->context.lock); } return table; } @@ -243,13 +243,13 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) if (!mm_alloc_pgste(mm)) { /* Free 2K page table fragment of a 4K page */ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.pgtable_lock); + spin_lock_bh(&mm->context.lock); mask = atomic_xor_bits(&page->_mapcount, 1U << bit); if (mask & 3) list_add(&page->lru, &mm->context.pgtable_list); else list_del(&page->lru); - spin_unlock_bh(&mm->context.pgtable_lock); + spin_unlock_bh(&mm->context.lock); if (mask != 0) return; } @@ -275,13 +275,13 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, return; } bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.pgtable_lock); + spin_lock_bh(&mm->context.lock); mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); if (mask & 3) list_add_tail(&page->lru, &mm->context.pgtable_list); else list_del(&page->lru); - spin_unlock_bh(&mm->context.pgtable_lock); + spin_unlock_bh(&mm->context.lock); table = (unsigned long *) (__pa(table) | (1U << bit)); tlb_remove_table(tlb, table); } diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 9c97ad1ee121..ea19b4ff87a2 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -62,7 +62,6 @@ MODULE_LICENSE("GPL"); static int dasd_alloc_queue(struct dasd_block *); static void dasd_setup_queue(struct dasd_block *); static void dasd_free_queue(struct dasd_block *); -static void dasd_flush_request_queue(struct dasd_block *); static int dasd_flush_block_queue(struct dasd_block *); static void dasd_device_tasklet(struct dasd_device *); static void dasd_block_tasklet(struct dasd_block *); @@ -158,7 +157,6 @@ struct dasd_block *dasd_alloc_block(void) /* open_count = 0 means device online but not in use */ atomic_set(&block->open_count, -1); - spin_lock_init(&block->request_queue_lock); atomic_set(&block->tasklet_scheduled, 0); tasklet_init(&block->tasklet, (void (*)(unsigned long)) dasd_block_tasklet, @@ -391,7 +389,6 @@ static int dasd_state_ready_to_basic(struct dasd_device *device) device->state = DASD_STATE_READY; return rc; } - dasd_flush_request_queue(block); dasd_destroy_partitions(block); block->blocks = 0; block->bp_block = 0; @@ -1645,8 +1642,10 @@ void dasd_generic_handle_state_change(struct dasd_device *device) dasd_device_remove_stop_bits(device, DASD_STOPPED_PENDING); dasd_schedule_device_bh(device); - if (device->block) + if (device->block) { dasd_schedule_block_bh(device->block); + blk_mq_run_hw_queues(device->block->request_queue, true); + } } EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change); @@ -2638,6 +2637,7 @@ static void dasd_block_timeout(unsigned long ptr) dasd_device_remove_stop_bits(block->base, DASD_STOPPED_PENDING); spin_unlock_irqrestore(get_ccwdev_lock(block->base->cdev), flags); dasd_schedule_block_bh(block); + blk_mq_run_hw_queues(block->request_queue, true); } /* @@ -2677,115 +2677,11 @@ static void __dasd_process_erp(struct dasd_device *device, erp_fn(cqr); } -/* - * Fetch requests from the block device queue. - */ -static void __dasd_process_request_queue(struct dasd_block *block) -{ - struct request_queue *queue; - struct request *req; - struct dasd_ccw_req *cqr; - struct dasd_device *basedev; - unsigned long flags; - queue = block->request_queue; - basedev = block->base; - /* No queue ? Then there is nothing to do. */ - if (queue == NULL) - return; - - /* - * We requeue request from the block device queue to the ccw - * queue only in two states. In state DASD_STATE_READY the - * partition detection is done and we need to requeue requests - * for that. State DASD_STATE_ONLINE is normal block device - * operation. - */ - if (basedev->state < DASD_STATE_READY) { - while ((req = blk_fetch_request(block->request_queue))) - __blk_end_request_all(req, BLK_STS_IOERR); - return; - } - - /* - * if device is stopped do not fetch new requests - * except failfast is active which will let requests fail - * immediately in __dasd_block_start_head() - */ - if (basedev->stopped && !(basedev->features & DASD_FEATURE_FAILFAST)) - return; - - /* Now we try to fetch requests from the request queue */ - while ((req = blk_peek_request(queue))) { - if (basedev->features & DASD_FEATURE_READONLY && - rq_data_dir(req) == WRITE) { - DBF_DEV_EVENT(DBF_ERR, basedev, - "Rejecting write request %p", - req); - blk_start_request(req); - __blk_end_request_all(req, BLK_STS_IOERR); - continue; - } - if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) && - (basedev->features & DASD_FEATURE_FAILFAST || - blk_noretry_request(req))) { - DBF_DEV_EVENT(DBF_ERR, basedev, - "Rejecting failfast request %p", - req); - blk_start_request(req); - __blk_end_request_all(req, BLK_STS_TIMEOUT); - continue; - } - cqr = basedev->discipline->build_cp(basedev, block, req); - if (IS_ERR(cqr)) { - if (PTR_ERR(cqr) == -EBUSY) - break; /* normal end condition */ - if (PTR_ERR(cqr) == -ENOMEM) - break; /* terminate request queue loop */ - if (PTR_ERR(cqr) == -EAGAIN) { - /* - * The current request cannot be build right - * now, we have to try later. If this request - * is the head-of-queue we stop the device - * for 1/2 second. - */ - if (!list_empty(&block->ccw_queue)) - break; - spin_lock_irqsave( - get_ccwdev_lock(basedev->cdev), flags); - dasd_device_set_stop_bits(basedev, - DASD_STOPPED_PENDING); - spin_unlock_irqrestore( - get_ccwdev_lock(basedev->cdev), flags); - dasd_block_set_timer(block, HZ/2); - break; - } - DBF_DEV_EVENT(DBF_ERR, basedev, - "CCW creation failed (rc=%ld) " - "on request %p", - PTR_ERR(cqr), req); - blk_start_request(req); - __blk_end_request_all(req, BLK_STS_IOERR); - continue; - } - /* - * Note: callback is set to dasd_return_cqr_cb in - * __dasd_block_start_head to cover erp requests as well - */ - cqr->callback_data = (void *) req; - cqr->status = DASD_CQR_FILLED; - req->completion_data = cqr; - blk_start_request(req); - list_add_tail(&cqr->blocklist, &block->ccw_queue); - INIT_LIST_HEAD(&cqr->devlist); - dasd_profile_start(block, cqr, req); - } -} - static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr) { struct request *req; - int status; blk_status_t error = BLK_STS_OK; + int status; req = (struct request *) cqr->callback_data; dasd_profile_end(cqr->block, cqr, req); @@ -2809,7 +2705,19 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr) break; } } - __blk_end_request_all(req, error); + + /* + * We need to take care for ETIMEDOUT errors here since the + * complete callback does not get called in this case. + * Take care of all errors here and avoid additional code to + * transfer the error value to the complete callback. + */ + if (error) { + blk_mq_end_request(req, error); + blk_mq_run_hw_queues(req->q, true); + } else { + blk_mq_complete_request(req); + } } /* @@ -2938,27 +2846,30 @@ static void dasd_block_tasklet(struct dasd_block *block) struct list_head final_queue; struct list_head *l, *n; struct dasd_ccw_req *cqr; + struct dasd_queue *dq; atomic_set(&block->tasklet_scheduled, 0); INIT_LIST_HEAD(&final_queue); - spin_lock(&block->queue_lock); + spin_lock_irq(&block->queue_lock); /* Finish off requests on ccw queue */ __dasd_process_block_ccw_queue(block, &final_queue); - spin_unlock(&block->queue_lock); + spin_unlock_irq(&block->queue_lock); + /* Now call the callback function of requests with final status */ - spin_lock_irq(&block->request_queue_lock); list_for_each_safe(l, n, &final_queue) { cqr = list_entry(l, struct dasd_ccw_req, blocklist); + dq = cqr->dq; + spin_lock_irq(&dq->lock); list_del_init(&cqr->blocklist); __dasd_cleanup_cqr(cqr); + spin_unlock_irq(&dq->lock); } - spin_lock(&block->queue_lock); - /* Get new request from the block device request queue */ - __dasd_process_request_queue(block); + + spin_lock_irq(&block->queue_lock); /* Now check if the head of the ccw queue needs to be started. */ __dasd_block_start_head(block); - spin_unlock(&block->queue_lock); - spin_unlock_irq(&block->request_queue_lock); + spin_unlock_irq(&block->queue_lock); + if (waitqueue_active(&shutdown_waitq)) wake_up(&shutdown_waitq); dasd_put_device(block->base); @@ -2977,14 +2888,13 @@ static int _dasd_requeue_request(struct dasd_ccw_req *cqr) { struct dasd_block *block = cqr->block; struct request *req; - unsigned long flags; if (!block) return -EINVAL; - spin_lock_irqsave(&block->request_queue_lock, flags); + spin_lock_irq(&cqr->dq->lock); req = (struct request *) cqr->callback_data; - blk_requeue_request(block->request_queue, req); - spin_unlock_irqrestore(&block->request_queue_lock, flags); + blk_mq_requeue_request(req, false); + spin_unlock_irq(&cqr->dq->lock); return 0; } @@ -2999,6 +2909,7 @@ static int dasd_flush_block_queue(struct dasd_block *block) struct dasd_ccw_req *cqr, *n; int rc, i; struct list_head flush_queue; + unsigned long flags; INIT_LIST_HEAD(&flush_queue); spin_lock_bh(&block->queue_lock); @@ -3037,11 +2948,11 @@ restart_cb: goto restart_cb; } /* call the callback function */ - spin_lock_irq(&block->request_queue_lock); + spin_lock_irqsave(&cqr->dq->lock, flags); cqr->endclk = get_tod_clock(); list_del_init(&cqr->blocklist); __dasd_cleanup_cqr(cqr); - spin_unlock_irq(&block->request_queue_lock); + spin_unlock_irqrestore(&cqr->dq->lock, flags); } return rc; } @@ -3069,42 +2980,114 @@ EXPORT_SYMBOL(dasd_schedule_block_bh); /* * Dasd request queue function. Called from ll_rw_blk.c */ -static void do_dasd_request(struct request_queue *queue) +static blk_status_t do_dasd_request(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *qd) { - struct dasd_block *block; + struct dasd_block *block = hctx->queue->queuedata; + struct dasd_queue *dq = hctx->driver_data; + struct request *req = qd->rq; + struct dasd_device *basedev; + struct dasd_ccw_req *cqr; + blk_status_t rc = BLK_STS_OK; + + basedev = block->base; + spin_lock_irq(&dq->lock); + if (basedev->state < DASD_STATE_READY) { + DBF_DEV_EVENT(DBF_ERR, basedev, + "device not ready for request %p", req); + rc = BLK_STS_IOERR; + goto out; + } + + /* + * if device is stopped do not fetch new requests + * except failfast is active which will let requests fail + * immediately in __dasd_block_start_head() + */ + if (basedev->stopped && !(basedev->features & DASD_FEATURE_FAILFAST)) { + DBF_DEV_EVENT(DBF_ERR, basedev, + "device stopped request %p", req); + rc = BLK_STS_RESOURCE; + goto out; + } + + if (basedev->features & DASD_FEATURE_READONLY && + rq_data_dir(req) == WRITE) { + DBF_DEV_EVENT(DBF_ERR, basedev, + "Rejecting write request %p", req); + rc = BLK_STS_IOERR; + goto out; + } - block = queue->queuedata; + if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) && + (basedev->features & DASD_FEATURE_FAILFAST || + blk_noretry_request(req))) { + DBF_DEV_EVENT(DBF_ERR, basedev, + "Rejecting failfast request %p", req); + rc = BLK_STS_IOERR; + goto out; + } + + cqr = basedev->discipline->build_cp(basedev, block, req); + if (IS_ERR(cqr)) { + if (PTR_ERR(cqr) == -EBUSY || + PTR_ERR(cqr) == -ENOMEM || + PTR_ERR(cqr) == -EAGAIN) { + rc = BLK_STS_RESOURCE; + goto out; + } + DBF_DEV_EVENT(DBF_ERR, basedev, + "CCW creation failed (rc=%ld) on request %p", + PTR_ERR(cqr), req); + rc = BLK_STS_IOERR; + goto out; + } + /* + * Note: callback is set to dasd_return_cqr_cb in + * __dasd_block_start_head to cover erp requests as well + */ + cqr->callback_data = req; + cqr->status = DASD_CQR_FILLED; + cqr->dq = dq; + req->completion_data = cqr; + blk_mq_start_request(req); spin_lock(&block->queue_lock); - /* Get new request from the block device request queue */ - __dasd_process_request_queue(block); - /* Now check if the head of the ccw queue needs to be started. */ - __dasd_block_start_head(block); + list_add_tail(&cqr->blocklist, &block->ccw_queue); + INIT_LIST_HEAD(&cqr->devlist); + dasd_profile_start(block, cqr, req); + dasd_schedule_block_bh(block); spin_unlock(&block->queue_lock); + +out: + spin_unlock_irq(&dq->lock); + return rc; } /* * Block timeout callback, called from the block layer * - * request_queue lock is held on entry. - * * Return values: * BLK_EH_RESET_TIMER if the request should be left running * BLK_EH_NOT_HANDLED if the request is handled or terminated * by the driver. */ -enum blk_eh_timer_return dasd_times_out(struct request *req) +enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved) { struct dasd_ccw_req *cqr = req->completion_data; struct dasd_block *block = req->q->queuedata; struct dasd_device *device; + unsigned long flags; int rc = 0; if (!cqr) return BLK_EH_NOT_HANDLED; + spin_lock_irqsave(&cqr->dq->lock, flags); device = cqr->startdev ? cqr->startdev : block->base; - if (!device->blk_timeout) + if (!device->blk_timeout) { + spin_unlock_irqrestore(&cqr->dq->lock, flags); return BLK_EH_RESET_TIMER; + } DBF_DEV_EVENT(DBF_WARNING, device, " dasd_times_out cqr %p status %x", cqr, cqr->status); @@ -3154,19 +3137,64 @@ enum blk_eh_timer_return dasd_times_out(struct request *req) } dasd_schedule_block_bh(block); spin_unlock(&block->queue_lock); + spin_unlock_irqrestore(&cqr->dq->lock, flags); return rc ? BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED; } +static int dasd_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, + unsigned int idx) +{ + struct dasd_queue *dq = kzalloc(sizeof(*dq), GFP_KERNEL); + + if (!dq) + return -ENOMEM; + + spin_lock_init(&dq->lock); + hctx->driver_data = dq; + + return 0; +} + +static void dasd_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int idx) +{ + kfree(hctx->driver_data); + hctx->driver_data = NULL; +} + +static void dasd_request_done(struct request *req) +{ + blk_mq_end_request(req, 0); + blk_mq_run_hw_queues(req->q, true); +} + +static struct blk_mq_ops dasd_mq_ops = { + .queue_rq = do_dasd_request, + .complete = dasd_request_done, + .timeout = dasd_times_out, + .init_hctx = dasd_init_hctx, + .exit_hctx = dasd_exit_hctx, +}; + /* * Allocate and initialize request queue and default I/O scheduler. */ static int dasd_alloc_queue(struct dasd_block *block) { - block->request_queue = blk_init_queue(do_dasd_request, - &block->request_queue_lock); - if (block->request_queue == NULL) - return -ENOMEM; + int rc; + + block->tag_set.ops = &dasd_mq_ops; + block->tag_set.nr_hw_queues = DASD_NR_HW_QUEUES; + block->tag_set.queue_depth = DASD_MAX_LCU_DEV * DASD_REQ_PER_DEV; + block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + + rc = blk_mq_alloc_tag_set(&block->tag_set); + if (rc) + return rc; + + block->request_queue = blk_mq_init_queue(&block->tag_set); + if (IS_ERR(block->request_queue)) + return PTR_ERR(block->request_queue); block->request_queue->queuedata = block; @@ -3229,26 +3257,11 @@ static void dasd_free_queue(struct dasd_block *block) { if (block->request_queue) { blk_cleanup_queue(block->request_queue); + blk_mq_free_tag_set(&block->tag_set); block->request_queue = NULL; } } -/* - * Flush request on the request queue. - */ -static void dasd_flush_request_queue(struct dasd_block *block) -{ - struct request *req; - - if (!block->request_queue) - return; - - spin_lock_irq(&block->request_queue_lock); - while ((req = blk_fetch_request(block->request_queue))) - __blk_end_request_all(req, BLK_STS_IOERR); - spin_unlock_irq(&block->request_queue_lock); -} - static int dasd_open(struct block_device *bdev, fmode_t mode) { struct dasd_device *base; @@ -3744,8 +3757,10 @@ int dasd_generic_path_operational(struct dasd_device *device) return 1; } dasd_schedule_device_bh(device); - if (device->block) + if (device->block) { dasd_schedule_block_bh(device->block); + blk_mq_run_hw_queues(device->block->request_queue, true); + } if (!device->stopped) wake_up(&generic_waitq); @@ -4008,8 +4023,10 @@ int dasd_generic_restore_device(struct ccw_device *cdev) */ device->stopped |= DASD_UNRESUMED_PM; - if (device->block) + if (device->block) { dasd_schedule_block_bh(device->block); + blk_mq_run_hw_queues(device->block->request_queue, true); + } clear_bit(DASD_FLAG_SUSPENDED, &device->flags); dasd_put_device(device); diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index e38042ce94e6..c95a4784c191 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1326,7 +1326,7 @@ dasd_timeout_store(struct device *dev, struct device_attribute *attr, { struct dasd_device *device; struct request_queue *q; - unsigned long val, flags; + unsigned long val; device = dasd_device_from_cdev(to_ccwdev(dev)); if (IS_ERR(device) || !device->block) @@ -1342,16 +1342,10 @@ dasd_timeout_store(struct device *dev, struct device_attribute *attr, dasd_put_device(device); return -ENODEV; } - spin_lock_irqsave(&device->block->request_queue_lock, flags); - if (!val) - blk_queue_rq_timed_out(q, NULL); - else - blk_queue_rq_timed_out(q, dasd_times_out); device->blk_timeout = val; blk_queue_rq_timeout(q, device->blk_timeout * HZ); - spin_unlock_irqrestore(&device->block->request_queue_lock, flags); dasd_put_device(device); return count; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index f9e25fc03d6b..db470bd10175 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -56,6 +56,7 @@ #include <asm/dasd.h> #include <asm/idals.h> #include <linux/bitops.h> +#include <linux/blk-mq.h> /* DASD discipline magic */ #define DASD_ECKD_MAGIC 0xC5C3D2C4 @@ -185,6 +186,7 @@ struct dasd_ccw_req { char status; /* status of this request */ short retries; /* A retry counter */ unsigned long flags; /* flags of this request */ + struct dasd_queue *dq; /* ... and how */ unsigned long starttime; /* jiffies time of request start */ @@ -248,6 +250,16 @@ struct dasd_ccw_req { #define DASD_CQR_SUPPRESS_IL 6 /* Suppress 'Incorrect Length' error */ #define DASD_CQR_SUPPRESS_CR 7 /* Suppress 'Command Reject' error */ +/* + * There is no reliable way to determine the number of available CPUs on + * LPAR but there is no big performance difference between 1 and the + * maximum CPU number. + * 64 is a good trade off performance wise. + */ +#define DASD_NR_HW_QUEUES 64 +#define DASD_MAX_LCU_DEV 256 +#define DASD_REQ_PER_DEV 4 + /* Signature for error recovery functions. */ typedef struct dasd_ccw_req *(*dasd_erp_fn_t) (struct dasd_ccw_req *); @@ -539,6 +551,7 @@ struct dasd_block { struct gendisk *gdp; struct request_queue *request_queue; spinlock_t request_queue_lock; + struct blk_mq_tag_set tag_set; struct block_device *bdev; atomic_t open_count; @@ -563,6 +576,10 @@ struct dasd_attention_data { __u8 lpum; }; +struct dasd_queue { + spinlock_t lock; +}; + /* reasons why device (ccw_device_start) was stopped */ #define DASD_STOPPED_NOT_ACC 1 /* not accessible */ #define DASD_STOPPED_QUIESCE 2 /* Quiesced */ @@ -731,7 +748,7 @@ void dasd_free_device(struct dasd_device *); struct dasd_block *dasd_alloc_block(void); void dasd_free_block(struct dasd_block *); -enum blk_eh_timer_return dasd_times_out(struct request *req); +enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved); void dasd_enable_device(struct dasd_device *); void dasd_set_target_state(struct dasd_device *, int); diff --git a/drivers/s390/crypto/ap_asm.h b/drivers/s390/crypto/ap_asm.h index 287b4ad0999e..cd350345b3d2 100644 --- a/drivers/s390/crypto/ap_asm.h +++ b/drivers/s390/crypto/ap_asm.h @@ -69,16 +69,19 @@ static inline struct ap_queue_status ap_rapq(ap_qid_t qid) } /** - * ap_aqic(): Enable interruption for a specific AP. + * ap_aqic(): Control interruption for a specific AP. * @qid: The AP queue number + * @qirqctrl: struct ap_qirq_ctrl (64 bit value) * @ind: The notification indicator byte * * Returns AP queue status. */ -static inline struct ap_queue_status ap_aqic(ap_qid_t qid, void *ind) +static inline struct ap_queue_status ap_aqic(ap_qid_t qid, + struct ap_qirq_ctrl qirqctrl, + void *ind) { register unsigned long reg0 asm ("0") = qid | (3UL << 24); - register unsigned long reg1_in asm ("1") = (8UL << 44) | AP_ISC; + register struct ap_qirq_ctrl reg1_in asm ("1") = qirqctrl; register struct ap_queue_status reg1_out asm ("1"); register void *reg2 asm ("2") = ind; diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 6dee598979e7..5f0be2040272 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -166,26 +166,51 @@ static int ap_configuration_available(void) } /** + * ap_apft_available(): Test if AP facilities test (APFT) + * facility is available. + * + * Returns 1 if APFT is is available. + */ +static int ap_apft_available(void) +{ + return test_facility(15); +} + +/** * ap_test_queue(): Test adjunct processor queue. * @qid: The AP queue number + * @tbit: Test facilities bit * @info: Pointer to queue descriptor * * Returns AP queue status structure. */ -static inline struct ap_queue_status -ap_test_queue(ap_qid_t qid, unsigned long *info) +struct ap_queue_status ap_test_queue(ap_qid_t qid, + int tbit, + unsigned long *info) { - if (test_facility(15)) - qid |= 1UL << 23; /* set APFT T bit*/ + if (tbit) + qid |= 1UL << 23; /* set T bit*/ return ap_tapq(qid, info); } +EXPORT_SYMBOL(ap_test_queue); -static inline int ap_query_configuration(void) +/* + * ap_query_configuration(): Fetch cryptographic config info + * + * Returns the ap configuration info fetched via PQAP(QCI). + * On success 0 is returned, on failure a negative errno + * is returned, e.g. if the PQAP(QCI) instruction is not + * available, the return value will be -EOPNOTSUPP. + */ +int ap_query_configuration(struct ap_config_info *info) { - if (!ap_configuration) + if (!ap_configuration_available()) return -EOPNOTSUPP; - return ap_qci(ap_configuration); + if (!info) + return -EINVAL; + return ap_qci(info); } +EXPORT_SYMBOL(ap_query_configuration); /** * ap_init_configuration(): Allocate and query configuration array. @@ -198,7 +223,7 @@ static void ap_init_configuration(void) ap_configuration = kzalloc(sizeof(*ap_configuration), GFP_KERNEL); if (!ap_configuration) return; - if (ap_query_configuration() != 0) { + if (ap_query_configuration(ap_configuration) != 0) { kfree(ap_configuration); ap_configuration = NULL; return; @@ -261,7 +286,7 @@ static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type, if (!ap_test_config_card_id(AP_QID_CARD(qid))) return -ENODEV; - status = ap_test_queue(qid, &info); + status = ap_test_queue(qid, ap_apft_available(), &info); switch (status.response_code) { case AP_RESPONSE_NORMAL: *queue_depth = (int)(info & 0xff); @@ -940,7 +965,9 @@ static int ap_select_domain(void) for (j = 0; j < AP_DEVICES; j++) { if (!ap_test_config_card_id(j)) continue; - status = ap_test_queue(AP_MKQID(j, i), NULL); + status = ap_test_queue(AP_MKQID(j, i), + ap_apft_available(), + NULL); if (status.response_code != AP_RESPONSE_NORMAL) continue; count++; @@ -993,7 +1020,7 @@ static void ap_scan_bus(struct work_struct *unused) AP_DBF(DBF_DEBUG, "ap_scan_bus running\n"); - ap_query_configuration(); + ap_query_configuration(ap_configuration); if (ap_select_domain() != 0) goto out; diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 4dc7c88fb054..754cf2223cfb 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -28,6 +28,7 @@ #include <linux/device.h> #include <linux/types.h> +#include <asm/ap.h> #define AP_DEVICES 64 /* Number of AP devices. */ #define AP_DOMAINS 256 /* Number of AP domains. */ @@ -40,41 +41,6 @@ extern int ap_domain_index; extern spinlock_t ap_list_lock; extern struct list_head ap_card_list; -/** - * The ap_qid_t identifier of an ap queue. It contains a - * 6 bit card index and a 4 bit queue index (domain). - */ -typedef unsigned int ap_qid_t; - -#define AP_MKQID(_card, _queue) (((_card) & 63) << 8 | ((_queue) & 255)) -#define AP_QID_CARD(_qid) (((_qid) >> 8) & 63) -#define AP_QID_QUEUE(_qid) ((_qid) & 255) - -/** - * structy ap_queue_status - Holds the AP queue status. - * @queue_empty: Shows if queue is empty - * @replies_waiting: Waiting replies - * @queue_full: Is 1 if the queue is full - * @pad: A 4 bit pad - * @int_enabled: Shows if interrupts are enabled for the AP - * @response_code: Holds the 8 bit response code - * @pad2: A 16 bit pad - * - * The ap queue status word is returned by all three AP functions - * (PQAP, NQAP and DQAP). There's a set of flags in the first - * byte, followed by a 1 byte response code. - */ -struct ap_queue_status { - unsigned int queue_empty : 1; - unsigned int replies_waiting : 1; - unsigned int queue_full : 1; - unsigned int pad1 : 4; - unsigned int int_enabled : 1; - unsigned int response_code : 8; - unsigned int pad2 : 16; -} __packed; - - static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) { return (*ptr & (0x80000000u >> nr)) != 0; @@ -238,17 +204,6 @@ struct ap_message { struct ap_message *); }; -struct ap_config_info { - unsigned int special_command:1; - unsigned int ap_extended:1; - unsigned char reserved1:6; - unsigned char reserved2[15]; - unsigned int apm[8]; /* AP ID mask */ - unsigned int aqm[8]; /* AP queue mask */ - unsigned int adm[8]; /* AP domain mask */ - unsigned char reserved4[16]; -} __packed; - /** * ap_init_message() - Initialize ap_message. * Initialize a message before using. Otherwise this might result in diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 0f1a5d02acb0..56b96edffd5b 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -16,6 +16,25 @@ #include "ap_asm.h" /** + * ap_queue_irq_ctrl(): Control interruption on a AP queue. + * @qirqctrl: struct ap_qirq_ctrl (64 bit value) + * @ind: The notification indicator byte + * + * Returns AP queue status. + * + * Control interruption on the given AP queue. + * Just a simple wrapper function for the low level PQAP(AQIC) + * instruction available for other kernel modules. + */ +struct ap_queue_status ap_queue_irq_ctrl(ap_qid_t qid, + struct ap_qirq_ctrl qirqctrl, + void *ind) +{ + return ap_aqic(qid, qirqctrl, ind); +} +EXPORT_SYMBOL(ap_queue_irq_ctrl); + +/** * ap_queue_enable_interruption(): Enable interruption on an AP queue. * @qid: The AP queue number * @ind: the notification indicator byte @@ -27,8 +46,11 @@ static int ap_queue_enable_interruption(struct ap_queue *aq, void *ind) { struct ap_queue_status status; + struct ap_qirq_ctrl qirqctrl = { 0 }; - status = ap_aqic(aq->qid, ind); + qirqctrl.ir = 1; + qirqctrl.isc = AP_ISC; + status = ap_aqic(aq->qid, qirqctrl, ind); switch (status.response_code) { case AP_RESPONSE_NORMAL: case AP_RESPONSE_OTHERWISE_CHANGED: @@ -362,7 +384,7 @@ static enum ap_wait ap_sm_setirq_wait(struct ap_queue *aq) /* Get the status with TAPQ */ status = ap_tapq(aq->qid, NULL); - if (status.int_enabled == 1) { + if (status.irq_enabled == 1) { /* Irqs are now enabled */ aq->interrupt = AP_INTR_ENABLED; aq->state = (aq->queue_count > 0) ? |