From 8301c719a2bd131436438e49130ee381d30933f5 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 10 Jun 2020 18:41:35 -0700 Subject: nilfs2: fix null pointer dereference at nilfs_segctor_do_construct() After commit c3aab9a0bd91 ("mm/filemap.c: don't initiate writeback if mapping has no dirty pages"), the following null pointer dereference has been reported on nilfs2: BUG: kernel NULL pointer dereference, address: 00000000000000a8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI ... RIP: 0010:percpu_counter_add_batch+0xa/0x60 ... Call Trace: __test_set_page_writeback+0x2d3/0x330 nilfs_segctor_do_construct+0x10d3/0x2110 [nilfs2] nilfs_segctor_construct+0x168/0x260 [nilfs2] nilfs_segctor_thread+0x127/0x3b0 [nilfs2] kthread+0xf8/0x130 ... This crash turned out to be caused by set_page_writeback() call for segment summary buffers at nilfs_segctor_prepare_write(). set_page_writeback() can call inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK) where inode_to_wb(inode) is NULL if the inode of underlying block device does not have an associated wb. This fixes the issue by calling inode_attach_wb() in advance to ensure to associate the bdev inode with its wb. Fixes: c3aab9a0bd91 ("mm/filemap.c: don't initiate writeback if mapping has no dirty pages") Reported-by: Walton Hoops Reported-by: Tomas Hlavaty Reported-by: ARAI Shun-ichi Reported-by: Hideki EIRAKU Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Tested-by: Ryusuke Konishi Cc: [5.4+] Link: http://lkml.kernel.org/r/20200608.011819.1399059588922299158.konishi.ryusuke@gmail.com Signed-off-by: Linus Torvalds --- fs/nilfs2/segment.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 445eef41bfaf..91b58c897f92 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2780,6 +2780,8 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) if (!nilfs->ns_writer) return -ENOMEM; + inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL); + err = nilfs_segctor_start_thread(nilfs->ns_writer); if (err) { kfree(nilfs->ns_writer); -- cgit v1.2.3 From cc989e78472e29c69b196d6985e718c8834027b7 Mon Sep 17 00:00:00 2001 From: Keyur Patel Date: Wed, 10 Jun 2020 18:41:47 -0700 Subject: ocfs2: fix spelling mistake and grammar ./ocfs2/mmap.c:65: bebongs ==> belonging Signed-off-by: Keyur Patel Signed-off-by: Andrew Morton Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Link: http://lkml.kernel.org/r/20200608014818.102358-1-iamkeyur96@gmail.com Signed-off-by: Linus Torvalds --- fs/ocfs2/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 3a44e461828a..25cabbfe87fc 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -62,7 +62,7 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file, last_index = (size - 1) >> PAGE_SHIFT; /* - * There are cases that lead to the page no longer bebongs to the + * There are cases that lead to the page no longer belonging to the * mapping. * 1) pagecache truncates locally due to memory pressure. * 2) pagecache truncates when another is taking EX lock against -- cgit v1.2.3 From 9bf5b9eb232b34738800868e30bea3bad4a6a1ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Jun 2020 18:41:59 -0700 Subject: kernel: move use_mm/unuse_mm to kthread.c Patch series "improve use_mm / unuse_mm", v2. This series improves the use_mm / unuse_mm interface by better documenting the assumptions, and my taking the set_fs manipulations spread over the callers into the core API. This patch (of 3): Use the proper API instead. Link: http://lkml.kernel.org/r/20200404094101.672954-1-hch@lst.de These helpers are only for use with kernel threads, and I will tie them more into the kthread infrastructure going forward. Also move the prototypes to kthread.h - mmu_context.h was a little weird to start with as it otherwise contains very low-level MM bits. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Tested-by: Jens Axboe Reviewed-by: Jens Axboe Acked-by: Felix Kuehling Cc: Alex Deucher Cc: Al Viro Cc: Felipe Balbi Cc: Jason Wang Cc: "Michael S. Tsirkin" Cc: Zhenyu Wang Cc: Zhi Wang Cc: Greg Kroah-Hartman Link: http://lkml.kernel.org/r/20200404094101.672954-1-hch@lst.de Link: http://lkml.kernel.org/r/20200416053158.586887-1-hch@lst.de Link: http://lkml.kernel.org/r/20200404094101.672954-5-hch@lst.de Signed-off-by: Linus Torvalds --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 2 - drivers/gpu/drm/i915/gvt/kvmgt.c | 2 +- drivers/usb/gadget/function/f_fs.c | 2 +- drivers/usb/gadget/legacy/inode.c | 2 +- drivers/vhost/vhost.c | 1 - fs/aio.c | 1 - fs/io-wq.c | 1 - fs/io_uring.c | 1 - include/linux/kthread.h | 5 ++ include/linux/mmu_context.h | 5 -- kernel/kthread.c | 56 +++++++++++++++++++ mm/Makefile | 2 +- mm/mmu_context.c | 64 ---------------------- 18 files changed, 66 insertions(+), 85 deletions(-) delete mode 100644 mm/mmu_context.c (limited to 'fs') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 53b4126373a5..b94bbb8e7bb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -27,6 +27,7 @@ #include #include +#include #include #include #include diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 6529caca88fe..35d4a5ab0228 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include "amdgpu.h" #include "amdgpu_amdkfd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 691c89705bcd..bf927f432506 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -19,7 +19,6 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -#include #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "gc/gc_10_1_0_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index c6944739183a..744366c7ee85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include - #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "cikd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 2f4bdc80a6b2..feab4cc6e836 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include - #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "gfx_v8_0.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2ac5e7..c7fd0c47b254 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -19,8 +19,6 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -#include - #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "gc/gc_9_0_offset.h" diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index eee530453aa6..ad8a9df49f29 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 494f853f2206..7ae54b7b637b 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -32,7 +32,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 3afddd3bea6e..20fba95ed0a6 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 596132a96cd5..ffc7cc31d7eb 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/aio.c b/fs/aio.c index 7e079137fdcf..7ecddc2f38db 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/io-wq.c b/fs/io-wq.c index 4023c9846860..5f590bf27bff 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/io_uring.c b/fs/io_uring.c index 9fb0dc6033ba..9842443dde20 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -55,7 +55,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 8bbcaad7ef0f..c2d40c9672d6 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -5,6 +5,8 @@ #include #include +struct mm_struct; + __printf(4, 5) struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, @@ -198,6 +200,9 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work); void kthread_destroy_worker(struct kthread_worker *worker); +void use_mm(struct mm_struct *mm); +void unuse_mm(struct mm_struct *mm); + struct cgroup_subsys_state; #ifdef CONFIG_BLK_CGROUP diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index d9a543a9e1cc..c51a84132d7c 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -4,11 +4,6 @@ #include -struct mm_struct; - -void use_mm(struct mm_struct *mm); -void unuse_mm(struct mm_struct *mm); - /* Architectures that care about IRQ state in switch_mm can override this. */ #ifndef switch_mm_irqs_off # define switch_mm_irqs_off switch_mm diff --git a/kernel/kthread.c b/kernel/kthread.c index bfbfa481be3a..ce4610316377 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1,13 +1,17 @@ // SPDX-License-Identifier: GPL-2.0-only /* Kernel thread helper functions. * Copyright (C) 2004 IBM Corporation, Rusty Russell. + * Copyright (C) 2009 Red Hat, Inc. * * Creation is done via kthreadd, so that we get a clean environment * even if we're invoked from userspace (think modprobe, hotplug cpu, * etc.). */ #include +#include +#include #include +#include #include #include #include @@ -25,6 +29,7 @@ #include #include + static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); struct task_struct *kthreadd_task; @@ -1203,6 +1208,57 @@ void kthread_destroy_worker(struct kthread_worker *worker) } EXPORT_SYMBOL(kthread_destroy_worker); +/* + * use_mm + * Makes the calling kernel thread take on the specified + * mm context. + * (Note: this routine is intended to be called only + * from a kernel thread context) + */ +void use_mm(struct mm_struct *mm) +{ + struct mm_struct *active_mm; + struct task_struct *tsk = current; + + task_lock(tsk); + active_mm = tsk->active_mm; + if (active_mm != mm) { + mmgrab(mm); + tsk->active_mm = mm; + } + tsk->mm = mm; + switch_mm(active_mm, mm, tsk); + task_unlock(tsk); +#ifdef finish_arch_post_lock_switch + finish_arch_post_lock_switch(); +#endif + + if (active_mm != mm) + mmdrop(active_mm); +} +EXPORT_SYMBOL_GPL(use_mm); + +/* + * unuse_mm + * Reverses the effect of use_mm, i.e. releases the + * specified mm context which was earlier taken on + * by the calling kernel thread + * (Note: this routine is intended to be called only + * from a kernel thread context) + */ +void unuse_mm(struct mm_struct *mm) +{ + struct task_struct *tsk = current; + + task_lock(tsk); + sync_mm_rss(mm); + tsk->mm = NULL; + /* active_mm is still 'mm' */ + enter_lazy_tlb(mm, tsk); + task_unlock(tsk); +} +EXPORT_SYMBOL_GPL(unuse_mm); + #ifdef CONFIG_BLK_CGROUP /** * kthread_associate_blkcg - associate blkcg to current kthread diff --git a/mm/Makefile b/mm/Makefile index 662fd1504646..cc8f897dfac0 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -41,7 +41,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ - mm_init.o mmu_context.o percpu.o slab_common.o \ + mm_init.o percpu.o slab_common.o \ compaction.o vmacache.o \ interval_tree.o list_lru.o workingset.o \ debug.o gup.o $(mmu-y) diff --git a/mm/mmu_context.c b/mm/mmu_context.c deleted file mode 100644 index 3e612ae748e9..000000000000 --- a/mm/mmu_context.c +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright (C) 2009 Red Hat, Inc. - * - * See ../COPYING for licensing terms. - */ - -#include -#include -#include -#include -#include -#include - -#include - -/* - * use_mm - * Makes the calling kernel thread take on the specified - * mm context. - * (Note: this routine is intended to be called only - * from a kernel thread context) - */ -void use_mm(struct mm_struct *mm) -{ - struct mm_struct *active_mm; - struct task_struct *tsk = current; - - task_lock(tsk); - active_mm = tsk->active_mm; - if (active_mm != mm) { - mmgrab(mm); - tsk->active_mm = mm; - } - tsk->mm = mm; - switch_mm(active_mm, mm, tsk); - task_unlock(tsk); -#ifdef finish_arch_post_lock_switch - finish_arch_post_lock_switch(); -#endif - - if (active_mm != mm) - mmdrop(active_mm); -} -EXPORT_SYMBOL_GPL(use_mm); - -/* - * unuse_mm - * Reverses the effect of use_mm, i.e. releases the - * specified mm context which was earlier taken on - * by the calling kernel thread - * (Note: this routine is intended to be called only - * from a kernel thread context) - */ -void unuse_mm(struct mm_struct *mm) -{ - struct task_struct *tsk = current; - - task_lock(tsk); - sync_mm_rss(mm); - tsk->mm = NULL; - /* active_mm is still 'mm' */ - enter_lazy_tlb(mm, tsk); - task_unlock(tsk); -} -EXPORT_SYMBOL_GPL(unuse_mm); -- cgit v1.2.3 From f5678e7f2ac31c270334b936352f0ef2fe7dd2b3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Jun 2020 18:42:06 -0700 Subject: kernel: better document the use_mm/unuse_mm API contract Switch the function documentation to kerneldoc comments, and add WARN_ON_ONCE asserts that the calling thread is a kernel thread and does not have ->mm set (or has ->mm set in the case of unuse_mm). Also give the functions a kthread_ prefix to better document the use case. [hch@lst.de: fix a comment typo, cover the newly merged use_mm/unuse_mm caller in vfio] Link: http://lkml.kernel.org/r/20200416053158.586887-3-hch@lst.de [sfr@canb.auug.org.au: powerpc/vas: fix up for {un}use_mm() rename] Link: http://lkml.kernel.org/r/20200422163935.5aa93ba5@canb.auug.org.au Signed-off-by: Christoph Hellwig Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Tested-by: Jens Axboe Reviewed-by: Jens Axboe Acked-by: Felix Kuehling Acked-by: Greg Kroah-Hartman [usb] Acked-by: Haren Myneni Cc: Alex Deucher Cc: Al Viro Cc: Felipe Balbi Cc: Jason Wang Cc: "Michael S. Tsirkin" Cc: Zhenyu Wang Cc: Zhi Wang Link: http://lkml.kernel.org/r/20200404094101.672954-6-hch@lst.de Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/powernv/vas-fault.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 ++-- drivers/usb/gadget/function/f_fs.c | 4 ++-- drivers/usb/gadget/legacy/inode.c | 4 ++-- drivers/vfio/vfio_iommu_type1.c | 4 ++-- drivers/vhost/vhost.c | 4 ++-- fs/io-wq.c | 6 +++--- fs/io_uring.c | 4 ++-- include/linux/kthread.h | 4 ++-- kernel/kthread.c | 33 +++++++++++++++--------------- mm/oom_kill.c | 6 +++--- mm/vmacache.c | 4 ++-- 12 files changed, 40 insertions(+), 41 deletions(-) (limited to 'fs') diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index 25db70be4c9c..266a6ca5e15e 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -127,7 +127,7 @@ static void update_csb(struct vas_window *window, return; } - use_mm(window->mm); + kthread_use_mm(window->mm); rc = copy_to_user(csb_addr, &csb, sizeof(csb)); /* * User space polls on csb.flags (first byte). So add barrier @@ -139,7 +139,7 @@ static void update_csb(struct vas_window *window, smp_mb(); rc = copy_to_user(csb_addr, &csb, sizeof(u8)); } - unuse_mm(window->mm); + kthread_unuse_mm(window->mm); put_task_struct(tsk); /* Success */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index b94bbb8e7bb4..142746836838 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -197,9 +197,9 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s if ((mmptr) == current->mm) { \ valid = !get_user((dst), (wptr)); \ } else if (current->mm == NULL) { \ - use_mm(mmptr); \ + kthread_use_mm(mmptr); \ valid = !get_user((dst), (wptr)); \ - unuse_mm(mmptr); \ + kthread_unuse_mm(mmptr); \ } \ pagefault_enable(); \ } \ diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 7ae54b7b637b..f80b2747d7c5 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -827,9 +827,9 @@ static void ffs_user_copy_worker(struct work_struct *work) mm_segment_t oldfs = get_fs(); set_fs(USER_DS); - use_mm(io_data->mm); + kthread_use_mm(io_data->mm); ret = ffs_copy_to_iter(io_data->buf, ret, &io_data->data); - unuse_mm(io_data->mm); + kthread_unuse_mm(io_data->mm); set_fs(oldfs); } diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 20fba95ed0a6..9ee0bfe7bcda 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -462,9 +462,9 @@ static void ep_user_copy_worker(struct work_struct *work) struct kiocb *iocb = priv->iocb; size_t ret; - use_mm(mm); + kthread_use_mm(mm); ret = copy_to_iter(priv->buf, priv->actual, &priv->to); - unuse_mm(mm); + kthread_unuse_mm(mm); if (!ret) ret = -EFAULT; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index d5c08a750441..5e556ac9102a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2817,7 +2817,7 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, return -EPERM; if (kthread) - use_mm(mm); + kthread_use_mm(mm); else if (current->mm != mm) goto out; @@ -2844,7 +2844,7 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, *copied = copy_from_user(data, (void __user *)vaddr, count) ? 0 : count; if (kthread) - unuse_mm(mm); + kthread_unuse_mm(mm); out: mmput(mm); return *copied ? 0 : -EFAULT; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ffc7cc31d7eb..1ad3d10c121a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -332,7 +332,7 @@ static int vhost_worker(void *data) mm_segment_t oldfs = get_fs(); set_fs(USER_DS); - use_mm(dev->mm); + kthread_use_mm(dev->mm); for (;;) { /* mb paired w/ kthread_stop */ @@ -360,7 +360,7 @@ static int vhost_worker(void *data) schedule(); } } - unuse_mm(dev->mm); + kthread_unuse_mm(dev->mm); set_fs(oldfs); return 0; } diff --git a/fs/io-wq.c b/fs/io-wq.c index 5f590bf27bff..748621f7391e 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -170,7 +170,7 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker) } __set_current_state(TASK_RUNNING); set_fs(KERNEL_DS); - unuse_mm(worker->mm); + kthread_unuse_mm(worker->mm); mmput(worker->mm); worker->mm = NULL; } @@ -417,7 +417,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe) static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) { if (worker->mm) { - unuse_mm(worker->mm); + kthread_unuse_mm(worker->mm); mmput(worker->mm); worker->mm = NULL; } @@ -426,7 +426,7 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) return; } if (mmget_not_zero(work->mm)) { - use_mm(work->mm); + kthread_use_mm(work->mm); if (!worker->mm) set_fs(USER_DS); worker->mm = work->mm; diff --git a/fs/io_uring.c b/fs/io_uring.c index 9842443dde20..ec4e9d36210b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5866,7 +5866,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (io_op_defs[req->opcode].needs_mm && !current->mm) { if (unlikely(!mmget_not_zero(ctx->sqo_mm))) return -EFAULT; - use_mm(ctx->sqo_mm); + kthread_use_mm(ctx->sqo_mm); } sqe_flags = READ_ONCE(sqe->flags); @@ -5980,7 +5980,7 @@ static inline void io_sq_thread_drop_mm(struct io_ring_ctx *ctx) struct mm_struct *mm = current->mm; if (mm) { - unuse_mm(mm); + kthread_unuse_mm(mm); mmput(mm); } } diff --git a/include/linux/kthread.h b/include/linux/kthread.h index c2d40c9672d6..12258ea077cf 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -200,8 +200,8 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work); void kthread_destroy_worker(struct kthread_worker *worker); -void use_mm(struct mm_struct *mm); -void unuse_mm(struct mm_struct *mm); +void kthread_use_mm(struct mm_struct *mm); +void kthread_unuse_mm(struct mm_struct *mm); struct cgroup_subsys_state; diff --git a/kernel/kthread.c b/kernel/kthread.c index ce4610316377..8ed4b4fbec7c 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1208,18 +1208,18 @@ void kthread_destroy_worker(struct kthread_worker *worker) } EXPORT_SYMBOL(kthread_destroy_worker); -/* - * use_mm - * Makes the calling kernel thread take on the specified - * mm context. - * (Note: this routine is intended to be called only - * from a kernel thread context) +/** + * kthread_use_mm - make the calling kthread operate on an address space + * @mm: address space to operate on */ -void use_mm(struct mm_struct *mm) +void kthread_use_mm(struct mm_struct *mm) { struct mm_struct *active_mm; struct task_struct *tsk = current; + WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); + WARN_ON_ONCE(tsk->mm); + task_lock(tsk); active_mm = tsk->active_mm; if (active_mm != mm) { @@ -1236,20 +1236,19 @@ void use_mm(struct mm_struct *mm) if (active_mm != mm) mmdrop(active_mm); } -EXPORT_SYMBOL_GPL(use_mm); +EXPORT_SYMBOL_GPL(kthread_use_mm); -/* - * unuse_mm - * Reverses the effect of use_mm, i.e. releases the - * specified mm context which was earlier taken on - * by the calling kernel thread - * (Note: this routine is intended to be called only - * from a kernel thread context) +/** + * kthread_unuse_mm - reverse the effect of kthread_use_mm() + * @mm: address space to operate on */ -void unuse_mm(struct mm_struct *mm) +void kthread_unuse_mm(struct mm_struct *mm) { struct task_struct *tsk = current; + WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); + WARN_ON_ONCE(!tsk->mm); + task_lock(tsk); sync_mm_rss(mm); tsk->mm = NULL; @@ -1257,7 +1256,7 @@ void unuse_mm(struct mm_struct *mm) enter_lazy_tlb(mm, tsk); task_unlock(tsk); } -EXPORT_SYMBOL_GPL(unuse_mm); +EXPORT_SYMBOL_GPL(kthread_unuse_mm); #ifdef CONFIG_BLK_CGROUP /** diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b4e9491cb320..6e94962893ee 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -126,7 +126,7 @@ static bool oom_cpuset_eligible(struct task_struct *tsk, struct oom_control *oc) /* * The process p may have detached its own ->mm while exiting or through - * use_mm(), but one or more of its subthreads may still have a valid + * kthread_use_mm(), but one or more of its subthreads may still have a valid * pointer. Return p, or any of its subthreads with a valid ->mm, with * task_lock() held. */ @@ -919,8 +919,8 @@ static void __oom_kill_process(struct task_struct *victim, const char *message) continue; } /* - * No use_mm() user needs to read from the userspace so we are - * ok to reap it. + * No kthead_use_mm() user needs to read from the userspace so + * we are ok to reap it. */ if (unlikely(p->flags & PF_KTHREAD)) continue; diff --git a/mm/vmacache.c b/mm/vmacache.c index d9092814c772..01a6e6688ec1 100644 --- a/mm/vmacache.c +++ b/mm/vmacache.c @@ -24,8 +24,8 @@ * task's vmacache pertains to a different mm (ie, its own). There is * nothing we can do here. * - * Also handle the case where a kernel thread has adopted this mm via use_mm(). - * That kernel thread's vmacache is not applicable to this mm. + * Also handle the case where a kernel thread has adopted this mm via + * kthread_use_mm(). That kernel thread's vmacache is not applicable to this mm. */ static inline bool vmacache_valid_mm(struct mm_struct *mm) { -- cgit v1.2.3 From 37c54f9bd48663f7657a9178fe08c47e4f5b537b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Jun 2020 18:42:10 -0700 Subject: kernel: set USER_DS in kthread_use_mm Some architectures like arm64 and s390 require USER_DS to be set for kernel threads to access user address space, which is the whole purpose of kthread_use_mm, but other like x86 don't. That has lead to a huge mess where some callers are fixed up once they are tested on said architectures, while others linger around and yet other like io_uring try to do "clever" optimizations for what usually is just a trivial asignment to a member in the thread_struct for most architectures. Make kthread_use_mm set USER_DS, and kthread_unuse_mm restore to the previous value instead. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Tested-by: Jens Axboe Reviewed-by: Jens Axboe Acked-by: Michael S. Tsirkin Cc: Alex Deucher Cc: Al Viro Cc: Felipe Balbi Cc: Felix Kuehling Cc: Jason Wang Cc: Zhenyu Wang Cc: Zhi Wang Cc: Greg Kroah-Hartman Link: http://lkml.kernel.org/r/20200404094101.672954-7-hch@lst.de Signed-off-by: Linus Torvalds --- drivers/usb/gadget/function/f_fs.c | 4 ---- drivers/vhost/vhost.c | 3 --- fs/io-wq.c | 8 ++------ fs/io_uring.c | 4 ---- kernel/kthread.c | 6 ++++++ 5 files changed, 8 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index f80b2747d7c5..490d353d5fde 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -824,13 +824,9 @@ static void ffs_user_copy_worker(struct work_struct *work) bool kiocb_has_eventfd = io_data->kiocb->ki_flags & IOCB_EVENTFD; if (io_data->read && ret > 0) { - mm_segment_t oldfs = get_fs(); - - set_fs(USER_DS); kthread_use_mm(io_data->mm); ret = ffs_copy_to_iter(io_data->buf, ret, &io_data->data); kthread_unuse_mm(io_data->mm); - set_fs(oldfs); } io_data->kiocb->ki_complete(io_data->kiocb, ret, ret); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 1ad3d10c121a..421710c53f6a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -329,9 +329,7 @@ static int vhost_worker(void *data) struct vhost_dev *dev = data; struct vhost_work *work, *work_next; struct llist_node *node; - mm_segment_t oldfs = get_fs(); - set_fs(USER_DS); kthread_use_mm(dev->mm); for (;;) { @@ -361,7 +359,6 @@ static int vhost_worker(void *data) } } kthread_unuse_mm(dev->mm); - set_fs(oldfs); return 0; } diff --git a/fs/io-wq.c b/fs/io-wq.c index 748621f7391e..a5e90ac39e4d 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -169,7 +169,6 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker) dropped_lock = true; } __set_current_state(TASK_RUNNING); - set_fs(KERNEL_DS); kthread_unuse_mm(worker->mm); mmput(worker->mm); worker->mm = NULL; @@ -421,14 +420,11 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) mmput(worker->mm); worker->mm = NULL; } - if (!work->mm) { - set_fs(KERNEL_DS); + if (!work->mm) return; - } + if (mmget_not_zero(work->mm)) { kthread_use_mm(work->mm); - if (!worker->mm) - set_fs(USER_DS); worker->mm = work->mm; /* hang on to this mm */ work->mm = NULL; diff --git a/fs/io_uring.c b/fs/io_uring.c index ec4e9d36210b..26f7bc941d01 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5989,15 +5989,12 @@ static int io_sq_thread(void *data) { struct io_ring_ctx *ctx = data; const struct cred *old_cred; - mm_segment_t old_fs; DEFINE_WAIT(wait); unsigned long timeout; int ret = 0; complete(&ctx->sq_thread_comp); - old_fs = get_fs(); - set_fs(USER_DS); old_cred = override_creds(ctx->creds); timeout = jiffies + ctx->sq_thread_idle; @@ -6102,7 +6099,6 @@ static int io_sq_thread(void *data) if (current->task_works) task_work_run(); - set_fs(old_fs); io_sq_thread_drop_mm(ctx); revert_creds(old_cred); diff --git a/kernel/kthread.c b/kernel/kthread.c index 8ed4b4fbec7c..86357cd38eb2 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -52,6 +52,7 @@ struct kthread { unsigned long flags; unsigned int cpu; void *data; + mm_segment_t oldfs; struct completion parked; struct completion exited; #ifdef CONFIG_BLK_CGROUP @@ -1235,6 +1236,9 @@ void kthread_use_mm(struct mm_struct *mm) if (active_mm != mm) mmdrop(active_mm); + + to_kthread(tsk)->oldfs = get_fs(); + set_fs(USER_DS); } EXPORT_SYMBOL_GPL(kthread_use_mm); @@ -1249,6 +1253,8 @@ void kthread_unuse_mm(struct mm_struct *mm) WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); WARN_ON_ONCE(!tsk->mm); + set_fs(to_kthread(tsk)->oldfs); + task_lock(tsk); sync_mm_rss(mm); tsk->mm = NULL; -- cgit v1.2.3