diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 99 |
1 files changed, 86 insertions, 13 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 757301cf4d80..38681ad44c76 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -471,6 +471,20 @@ void free_task(struct task_struct *tsk) } EXPORT_SYMBOL(free_task); +static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm) +{ + struct file *exe_file; + + exe_file = get_mm_exe_file(oldmm); + RCU_INIT_POINTER(mm->exe_file, exe_file); + /* + * We depend on the oldmm having properly denied write access to the + * exe_file already. + */ + if (exe_file && deny_write_access(exe_file)) + pr_warn_once("deny_write_access() failed in %s\n", __func__); +} + #ifdef CONFIG_MMU static __latent_entropy int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) @@ -494,7 +508,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING); /* No ordering required: file already has been exposed. */ - RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); + dup_mm_exe_file(mm, oldmm); mm->total_vm = oldmm->total_vm; mm->data_vm = oldmm->data_vm; @@ -557,12 +571,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); file = tmp->vm_file; if (file) { - struct inode *inode = file_inode(file); struct address_space *mapping = file->f_mapping; get_file(file); - if (tmp->vm_flags & VM_DENYWRITE) - put_write_access(inode); i_mmap_lock_write(mapping); if (tmp->vm_flags & VM_SHARED) mapping_allow_writable(mapping); @@ -640,7 +651,7 @@ static inline void mm_free_pgd(struct mm_struct *mm) static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) { mmap_write_lock(oldmm); - RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); + dup_mm_exe_file(mm, oldmm); mmap_write_unlock(oldmm); return 0; } @@ -1052,6 +1063,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm->pmd_huge_pte = NULL; #endif mm_init_uprobes_state(mm); + hugetlb_count_init(mm); if (current->mm) { mm->flags = current->mm->flags & MMF_INIT_MASK; @@ -1150,11 +1162,11 @@ void mmput_async(struct mm_struct *mm) * * Main users are mmput() and sys_execve(). Callers prevent concurrent * invocations: in mmput() nobody alive left, in execve task is single - * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the - * mm->exe_file, but does so without using set_mm_exe_file() in order - * to avoid the need for any locks. + * threaded. + * + * Can only fail if new_exe_file != NULL. */ -void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) +int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) { struct file *old_exe_file; @@ -1165,11 +1177,73 @@ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) */ old_exe_file = rcu_dereference_raw(mm->exe_file); - if (new_exe_file) + if (new_exe_file) { + /* + * We expect the caller (i.e., sys_execve) to already denied + * write access, so this is unlikely to fail. + */ + if (unlikely(deny_write_access(new_exe_file))) + return -EACCES; get_file(new_exe_file); + } rcu_assign_pointer(mm->exe_file, new_exe_file); - if (old_exe_file) + if (old_exe_file) { + allow_write_access(old_exe_file); fput(old_exe_file); + } + return 0; +} + +/** + * replace_mm_exe_file - replace a reference to the mm's executable file + * + * This changes mm's executable file (shown as symlink /proc/[pid]/exe), + * dealing with concurrent invocation and without grabbing the mmap lock in + * write mode. + * + * Main user is sys_prctl(PR_SET_MM_MAP/EXE_FILE). + */ +int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) +{ + struct vm_area_struct *vma; + struct file *old_exe_file; + int ret = 0; + + /* Forbid mm->exe_file change if old file still mapped. */ + old_exe_file = get_mm_exe_file(mm); + if (old_exe_file) { + mmap_read_lock(mm); + for (vma = mm->mmap; vma && !ret; vma = vma->vm_next) { + if (!vma->vm_file) + continue; + if (path_equal(&vma->vm_file->f_path, + &old_exe_file->f_path)) + ret = -EBUSY; + } + mmap_read_unlock(mm); + fput(old_exe_file); + if (ret) + return ret; + } + + /* set the new file, lockless */ + ret = deny_write_access(new_exe_file); + if (ret) + return -EACCES; + get_file(new_exe_file); + + old_exe_file = xchg(&mm->exe_file, new_exe_file); + if (old_exe_file) { + /* + * Don't race with dup_mmap() getting the file and disallowing + * write access while someone might open the file writable. + */ + mmap_read_lock(mm); + allow_write_access(old_exe_file); + fput(old_exe_file); + mmap_read_unlock(mm); + } + return 0; } /** @@ -1189,7 +1263,6 @@ struct file *get_mm_exe_file(struct mm_struct *mm) rcu_read_unlock(); return exe_file; } -EXPORT_SYMBOL(get_mm_exe_file); /** * get_task_exe_file - acquire a reference to the task's executable file @@ -1212,7 +1285,6 @@ struct file *get_task_exe_file(struct task_struct *task) task_unlock(task); return exe_file; } -EXPORT_SYMBOL(get_task_exe_file); /** * get_task_mm - acquire a reference to the task's mm @@ -2085,6 +2157,7 @@ static __latent_entropy struct task_struct *copy_process( #endif #ifdef CONFIG_BPF_SYSCALL RCU_INIT_POINTER(p->bpf_storage, NULL); + p->bpf_ctx = NULL; #endif /* Perform scheduler related setup. Assign this task to a CPU. */ |